dna-commits September 2009

dna-commits@lists.jboss.org

1 participants
95 discussions

DNA SVN: r1187 - in trunk: extensions/dna-connector-store-jpa and 5 other directories.

by dna-commits＠lists.jboss.org

Author: rhauch Date: 2009-09-02 14:58:39 -0400 (Wed, 02 Sep 2009) New Revision: 1187 Removed: trunk/extensions/dna-web-jcr-rest-war/.settings/org.eclipse.jdt.core.prefs trunk/extensions/dna-web-jcr-rest/.settings/org.eclipse.jdt.core.prefs Modified: trunk/dna-graph/src/test/java/org/jboss/dna/graph/connector/test/AbstractConnectorTest.java trunk/dna-graph/src/test/java/org/jboss/dna/graph/connector/test/WritableConnectorTest.java trunk/extensions/dna-connector-store-jpa/pom.xml trunk/extensions/dna-connector-store-jpa/src/main/java/org/jboss/dna/connector/store/jpa/JpaSource.java trunk/extensions/dna-web-jcr-rest-war/.classpath trunk/extensions/dna-web-jcr-rest/.classpath Log: DNA-509 JPA Connector does not close all EntityManager instances that it creates Made temporary fix for the JpaSource class to properly close the EntityManagers, as well as the connector framework's AbstractConnectorTest and WritableConnectorTest so that they properly close the RepositorySource instances when finished with each test. This needs to be cleaned up a little more before this issue can be resolved. Modified: trunk/dna-graph/src/test/java/org/jboss/dna/graph/connector/test/AbstractConnectorTest.java =================================================================== --- trunk/dna-graph/src/test/java/org/jboss/dna/graph/connector/test/AbstractConnectorTest.java 2009-09-02 18:07:00 UTC (rev 1186) +++ trunk/dna-graph/src/test/java/org/jboss/dna/graph/connector/test/AbstractConnectorTest.java 2009-09-02 18:58:39 UTC (rev 1187) @@ -28,6 +28,7 @@ import static org.junit.Assert.assertThat; import java.io.PrintStream; import java.io.PrintWriter; +import java.lang.reflect.InvocationTargetException; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; @@ -38,6 +39,7 @@ import java.util.UUID; import java.util.concurrent.TimeUnit; import org.jboss.dna.common.statistic.Stopwatch; +import org.jboss.dna.common.util.Reflection; import org.jboss.dna.graph.DnaLexicon; import org.jboss.dna.graph.ExecutionContext; import org.jboss.dna.graph.Graph; @@ -161,11 +163,28 @@ connection.close(); } } finally { + openConnections = null; running = false; rootLocation = null; rootUuid = null; } } + if (source != null) { + try { + Reflection reflection = new Reflection(source.getClass()); + reflection.invokeBestMethodOnTarget(new String[] {"close"}, source); + } catch (NoSuchMethodException err) { + // do nothing (method did not exist) + } catch (IllegalAccessException err) { + } catch (InvocationTargetException err) { + } finally { + source = null; + } + } + graph = null; + context = null; + configSource = null; + connectionFactory = null; } /** Modified: trunk/dna-graph/src/test/java/org/jboss/dna/graph/connector/test/WritableConnectorTest.java =================================================================== --- trunk/dna-graph/src/test/java/org/jboss/dna/graph/connector/test/WritableConnectorTest.java 2009-09-02 18:07:00 UTC (rev 1186) +++ trunk/dna-graph/src/test/java/org/jboss/dna/graph/connector/test/WritableConnectorTest.java 2009-09-02 18:58:39 UTC (rev 1187) @@ -47,6 +47,7 @@ import org.jboss.dna.graph.property.PathNotFoundException; import org.jboss.dna.graph.property.PropertyFactory; import org.jboss.dna.graph.property.Reference; +import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -76,6 +77,12 @@ IoUtil.read(getClass().getClassLoader().getResourceAsStream("LoremIpsum3.txt"))}; } + @Override + @After + public void afterEach() throws Exception { + super.afterEach(); + } + /** * These tests require that the source supports updates, since all of the tests do some form of updates. */ Modified: trunk/extensions/dna-connector-store-jpa/pom.xml =================================================================== --- trunk/extensions/dna-connector-store-jpa/pom.xml 2009-09-02 18:07:00 UTC (rev 1186) +++ trunk/extensions/dna-connector-store-jpa/pom.xml 2009-09-02 18:58:39 UTC (rev 1187) @@ -36,7 +36,7 @@ <dependency> <groupId>org.hibernate</groupId> <artifactId>hibernate-c3p0</artifactId> - <version>3.3.1.GA</version> + <version>3.3.2.GA</version> </dependency> <!-- HSQLDB Modified: trunk/extensions/dna-connector-store-jpa/src/main/java/org/jboss/dna/connector/store/jpa/JpaSource.java =================================================================== --- trunk/extensions/dna-connector-store-jpa/src/main/java/org/jboss/dna/connector/store/jpa/JpaSource.java 2009-09-02 18:07:00 UTC (rev 1186) +++ trunk/extensions/dna-connector-store-jpa/src/main/java/org/jboss/dna/connector/store/jpa/JpaSource.java 2009-09-02 18:58:39 UTC (rev 1187) @@ -880,7 +880,8 @@ Context context = new InitialContext(); dataSource = (DataSource)context.lookup(this.dataSourceJndiName); } catch (Throwable t) { - Logger.getLogger(getClass()).error(t, JpaConnectorI18n.errorFindingDataSourceInJndi, name, dataSourceJndiName); + Logger.getLogger(getClass()) + .error(t, JpaConnectorI18n.errorFindingDataSourceInJndi, name, dataSourceJndiName); } } @@ -938,11 +939,13 @@ setModel(actualModelName); } catch (Throwable e) { // The actual model name doesn't match what's available in the software ... + entityManager.close(); entityManagerFactory.close(); String msg = JpaConnectorI18n.existingStoreSpecifiesUnknownModel.text(name, actualModelName); throw new RepositorySourceException(msg); } } + entityManager.close(); entityManagerFactory.close(); // Now, create another entity manager with the classes from the correct model Modified: trunk/extensions/dna-web-jcr-rest/.classpath =================================================================== --- trunk/extensions/dna-web-jcr-rest/.classpath 2009-09-02 18:07:00 UTC (rev 1186) +++ trunk/extensions/dna-web-jcr-rest/.classpath 2009-09-02 18:58:39 UTC (rev 1187) @@ -1,8 +1,8 @@ -<?xml version="1.0" encoding="UTF-8"?> -<classpath> - <classpathentry kind="src" output="target/classes" path="src/main/java"/> - <classpathentry kind="con" path="org.maven.ide.eclipse.MAVEN2_CLASSPATH_CONTAINER"/> - <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/J2SE-1.5"/> - <classpathentry kind="con" path="org.eclipse.jst.j2ee.internal.module.container"/> - <classpathentry kind="output" path="target/classes"/> -</classpath> +<?xml version="1.0" encoding="UTF-8"?> +<classpath> + <classpathentry kind="src" output="target/classes" path="src/main/java"/> + <classpathentry kind="con" path="org.maven.ide.eclipse.MAVEN2_CLASSPATH_CONTAINER"/> + <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/> + <classpathentry kind="con" path="org.eclipse.jst.j2ee.internal.module.container"/> + <classpathentry kind="output" path="target/classes"/> +</classpath> Deleted: trunk/extensions/dna-web-jcr-rest/.settings/org.eclipse.jdt.core.prefs =================================================================== --- trunk/extensions/dna-web-jcr-rest/.settings/org.eclipse.jdt.core.prefs 2009-09-02 18:07:00 UTC (rev 1186) +++ trunk/extensions/dna-web-jcr-rest/.settings/org.eclipse.jdt.core.prefs 2009-09-02 18:58:39 UTC (rev 1187) @@ -1,7 +0,0 @@ -#Wed May 27 16:41:15 EDT 2009 -eclipse.preferences.version=1 -org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5 -org.eclipse.jdt.core.compiler.compliance=1.5 -org.eclipse.jdt.core.compiler.problem.assertIdentifier=error -org.eclipse.jdt.core.compiler.problem.enumIdentifier=error -org.eclipse.jdt.core.compiler.source=1.5 Modified: trunk/extensions/dna-web-jcr-rest-war/.classpath =================================================================== --- trunk/extensions/dna-web-jcr-rest-war/.classpath 2009-09-02 18:07:00 UTC (rev 1186) +++ trunk/extensions/dna-web-jcr-rest-war/.classpath 2009-09-02 18:58:39 UTC (rev 1187) @@ -2,7 +2,7 @@ <classpath> <classpathentry kind="src" output="target/test-classes" path="src/test/java"/> <classpathentry excluding="**" kind="src" output="target/test-classes" path="src/test/resources"/> - <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/J2SE-1.5"/> + <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/> <classpathentry kind="con" path="org.maven.ide.eclipse.MAVEN2_CLASSPATH_CONTAINER"> <attributes> <attribute name="org.eclipse.jst.component.dependency" value="/WEB-INF/lib"/> Deleted: trunk/extensions/dna-web-jcr-rest-war/.settings/org.eclipse.jdt.core.prefs =================================================================== --- trunk/extensions/dna-web-jcr-rest-war/.settings/org.eclipse.jdt.core.prefs 2009-09-02 18:07:00 UTC (rev 1186) +++ trunk/extensions/dna-web-jcr-rest-war/.settings/org.eclipse.jdt.core.prefs 2009-09-02 18:58:39 UTC (rev 1187) @@ -1,7 +0,0 @@ -#Wed May 27 16:41:12 EDT 2009 -eclipse.preferences.version=1 -org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5 -org.eclipse.jdt.core.compiler.compliance=1.5 -org.eclipse.jdt.core.compiler.problem.assertIdentifier=error -org.eclipse.jdt.core.compiler.problem.enumIdentifier=error -org.eclipse.jdt.core.compiler.source=1.5

14 years, 8 months

1
0
0 / 0

DNA SVN: r1186 - in trunk/dna-common/src: test/java/org/jboss/dna/common/text and 1 other directory.

by dna-commits＠lists.jboss.org

Author: rhauch Date: 2009-09-02 14:07:00 -0400 (Wed, 02 Sep 2009) New Revision: 1186 Modified: trunk/dna-common/src/main/java/org/jboss/dna/common/text/TokenStream.java trunk/dna-common/src/test/java/org/jboss/dna/common/text/SampleSqlParser.java trunk/dna-common/src/test/java/org/jboss/dna/common/text/TokenStreamTest.java Log: Added ability to use a wildcard for the token value (in some of the methods), and also fixed the examples that didn't call start(). Modified: trunk/dna-common/src/main/java/org/jboss/dna/common/text/TokenStream.java =================================================================== --- trunk/dna-common/src/main/java/org/jboss/dna/common/text/TokenStream.java 2009-09-02 18:06:33 UTC (rev 1185) +++ trunk/dna-common/src/main/java/org/jboss/dna/common/text/TokenStream.java 2009-09-02 18:07:00 UTC (rev 1186) @@ -127,6 +127,7 @@ * public List<Statement> parse( String ddl ) { * TokenStream tokens = new TokenStream(ddl, new SqlTokenizer(), false); * List<Statement> statements = new LinkedList<Statement>(); + * token.start(); * while (tokens.hasNext()) { * if (tokens.matches("SELECT")) { * statements.add(parseSelect(tokens)); @@ -361,6 +362,21 @@ @NotThreadSafe public class TokenStream { + /** + * A constant that can be used with the {@link #matches(String)}, {@link #matches(String, String...)}, + * {@link #consume(String)}, {@link #consume(String, String...)}, {@link #canConsume(String)} and + * {@link #canConsume(String, String...)} methods to signal that any value is allowed to be matched. + * + * Note that this exact instance must be used; an equivalent string will not work. + * + */ + public static final String ANY_VALUE = "any value"; + /** + * A constant that can be used with the {@link #matches(int)}, {@link #matches(int, int...)}, {@link #consume(int)}, and + * {@link #canConsume(int)} methods to signal that any token type is allowed to be matched. + */ + public static final int ANY_TYPE = Integer.MIN_VALUE; + protected final String inputString; protected final String inputUppercased; private final char[] inputContent; @@ -425,6 +441,9 @@ /** * Attempt to consume this current token as long as it matches the expected value, or throw an exception if the token does not * match. + * + * The {@link #ANY_VALUE ANY_VALUE} constant can be used in the expected values as a wildcard. + * * * @param expected the expected value of the current token * @throws ParsingException if the current token doesn't match the supplied value @@ -436,7 +455,7 @@ throw new ParsingException(tokens.get(tokens.size() - 1).position(), msg); } // Get the value from the current token ... - if (!currentToken().matches(expected)) { + if (expected != ANY_VALUE && !currentToken().matches(expected)) { String found = currentToken().value(); Position pos = currentToken().position(); String fragment = generateFragment(); @@ -473,6 +492,9 @@ /** * Attempt to consume this current token as long as it matches the expected character, or throw an exception if the token does * not match. + * + * The {@link #ANY_TYPE ANY_TYPE} constant can be used in the expected values as a wildcard. + * * * @param expectedType the expected token type of the current token * @throws ParsingException if the current token doesn't match the supplied value @@ -484,7 +506,7 @@ throw new ParsingException(tokens.get(tokens.size() - 1).position(), msg); } // Get the value from the current token ... - if (currentToken().type() != expectedType) { + if (expectedType != ANY_TYPE && currentToken().type() != expectedType) { String found = currentToken().value(); Position pos = currentToken().position(); String fragment = generateFragment(); @@ -497,6 +519,9 @@ /** * Attempt to consume this current token as the next tokens as long as they match the expected values, or throw an exception * if the token does not match. + * + * The {@link #ANY_VALUE ANY_VALUE} constant can be used in the expected values as a wildcard. + * * * @param expected the expected value of the current token * @param expectedForNextTokens the expected values fo the following tokens @@ -505,10 +530,6 @@ */ public void consume( String expected, String... expectedForNextTokens ) throws ParsingException, IllegalStateException { - if (completed) { - String msg = CommonI18n.noMoreContentButWasExpectingToken.text(expected); - throw new ParsingException(tokens.get(tokens.size() - 1).position(), msg); - } consume(expected); for (String nextExpected : expectedForNextTokens) { consume(nextExpected); @@ -518,6 +539,9 @@ /** * Attempt to consume this current token if it matches the expected value, and return whether this method was indeed able to * consume the token. + * + * The {@link #ANY_VALUE ANY_VALUE} constant can be used in the expected value as a wildcard. + * * * @param expected the expected value of the current token token * @return true if the current token did match and was consumed, or false if the current token did not match and therefore was @@ -525,7 +549,7 @@ * @throws IllegalStateException if this method was called before the stream was {@link #start() started} */ public boolean canConsume( String expected ) throws IllegalStateException { - if (completed || !matches(expected)) return false; + if (!matches(expected)) return false; moveToNextToken(); return true; } @@ -540,7 +564,7 @@ * @throws IllegalStateException if this method was called before the stream was {@link #start() started} */ public boolean canConsume( char expected ) throws IllegalStateException { - if (completed || !matches(expected)) return false; + if (!matches(expected)) return false; moveToNextToken(); return true; } @@ -548,6 +572,9 @@ /** * Attempt to consume this current token if it matches the expected token type, and return whether this method was indeed able * to consume the token. + * + * The {@link #ANY_TYPE ANY_TYPE} constant can be used in the expected type as a wildcard. + * * * @param expectedType the expected token type of the current token * @return true if the current token did match and was consumed, or false if the current token did not match and therefore was @@ -555,7 +582,7 @@ * @throws IllegalStateException if this method was called before the stream was {@link #start() started} */ public boolean canConsume( int expectedType ) throws IllegalStateException { - if (completed || !matches(expectedType)) return false; + if (!matches(expectedType)) return false; moveToNextToken(); return true; } @@ -576,6 +603,9 @@ * * </pre> * + * + * The {@link #ANY_VALUE ANY_VALUE} constant can be used in the expected values as a wildcard. + * * * @param currentExpected the expected value of the current token * @param expectedForNextTokens the expected values fo the following tokens @@ -589,10 +619,11 @@ ListIterator<Token> iter = tokens.listIterator(tokenIterator.previousIndex()); if (!iter.hasNext()) return false; Token token = iter.next(); - if (!token.matches(currentExpected)) return false; + if (currentExpected != ANY_VALUE && !token.matches(currentExpected)) return false; for (String nextExpected : expectedForNextTokens) { if (!iter.hasNext()) return false; token = iter.next(); + if (nextExpected == ANY_VALUE) continue; if (!token.matches(nextExpected)) return false; } this.tokenIterator = iter; @@ -602,13 +633,16 @@ /** * Determine if the current token matches the expected value. + * + * The {@link #ANY_VALUE ANY_VALUE} constant can be used as a wildcard. + * * * @param expected the expected value of the current token token * @return true if the current token did match, or false if the current token did not match * @throws IllegalStateException if this method was called before the stream was {@link #start() started} */ public boolean matches( String expected ) throws IllegalStateException { - return !completed && currentToken().matches(expected); + return !completed && (expected == ANY_VALUE || currentToken().matches(expected)); } /** @@ -630,11 +664,14 @@ * @throws IllegalStateException if this method was called before the stream was {@link #start() started} */ public boolean matches( int expectedType ) throws IllegalStateException { - return !completed && currentToken().type() == expectedType; + return !completed && (expectedType == ANY_TYPE || currentToken().type() == expectedType); } /** * Determine if the next few tokens match the expected values. + * + * The {@link #ANY_VALUE ANY_VALUE} constant can be used in the expected values as a wildcard. + * * * @param currentExpected the expected value of the current token * @param expectedForNextTokens the expected values for the following tokens @@ -647,10 +684,11 @@ ListIterator<Token> iter = tokens.listIterator(tokenIterator.previousIndex()); if (!iter.hasNext()) return false; Token token = iter.next(); - if (!token.matches(currentExpected)) return false; + if (currentExpected != ANY_VALUE || !token.matches(currentExpected)) return false; for (String nextExpected : expectedForNextTokens) { if (!iter.hasNext()) return false; token = iter.next(); + if (nextExpected == ANY_VALUE) continue; if (!token.matches(nextExpected)) return false; } return true; @@ -658,6 +696,9 @@ /** * Determine if the next few tokens have the supplied types. + * + * The {@link #ANY_TYPE ANY_TYPE} constant can be used in the expected values as a wildcard. + * * * @param currentExpectedType the expected type of the current token * @param expectedTypeForNextTokens the expected type for the following tokens @@ -670,10 +711,11 @@ ListIterator<Token> iter = tokens.listIterator(tokenIterator.previousIndex()); if (!iter.hasNext()) return false; Token token = iter.next(); - if (currentToken().type() != currentExpectedType) return false; + if (currentExpectedType != ANY_TYPE || currentToken().type() != currentExpectedType) return false; for (int nextExpectedType : expectedTypeForNextTokens) { if (!iter.hasNext()) return false; token = iter.next(); + if (nextExpectedType == ANY_TYPE) continue; if (token.type() != nextExpectedType) return false; } return true; Modified: trunk/dna-common/src/test/java/org/jboss/dna/common/text/SampleSqlParser.java =================================================================== --- trunk/dna-common/src/test/java/org/jboss/dna/common/text/SampleSqlParser.java 2009-09-02 18:06:33 UTC (rev 1185) +++ trunk/dna-common/src/test/java/org/jboss/dna/common/text/SampleSqlParser.java 2009-09-02 18:07:00 UTC (rev 1186) @@ -36,6 +36,7 @@ public List<Statement> parse( String ddl ) { TokenStream tokens = new TokenStream(ddl, TokenStream.basicTokenizer(false), false); List<Statement> statements = new LinkedList<Statement>(); + tokens.start(); while (tokens.hasNext()) { if (tokens.matches("SELECT")) { Modified: trunk/dna-common/src/test/java/org/jboss/dna/common/text/TokenStreamTest.java =================================================================== --- trunk/dna-common/src/test/java/org/jboss/dna/common/text/TokenStreamTest.java 2009-09-02 18:06:33 UTC (rev 1185) +++ trunk/dna-common/src/test/java/org/jboss/dna/common/text/TokenStreamTest.java 2009-09-02 18:07:00 UTC (rev 1186) @@ -319,4 +319,32 @@ assertThat(tokens.canConsume("FROM", "THIS", "TABLE"), is(true)); assertThat(tokens.hasNext(), is(false)); } + + @Test + public void shouldConsumeMultipleTokensWithAnyValueConstant() { + makeCaseInsensitive(); + // Unable to consume unless they all match ... + tokens.consume("SELECT", "ALL", TokenStream.ANY_VALUE); + tokens.consume("FROM", "THIS", "TABLE"); + assertThat(tokens.hasNext(), is(false)); + } + + @Test + public void shouldConsumeTokenWithAnyValueConstant() { + makeCaseInsensitive(); + // Unable to consume unless they all match ... + tokens.consume("SELECT", "ALL"); + tokens.consume(TokenStream.ANY_VALUE); + tokens.consume("FROM", "THIS", "TABLE"); + assertThat(tokens.hasNext(), is(false)); + } + + @Test + public void shouldReturnTrueFromCanConsumeMultipleTokensWithAnyValueConstant() { + makeCaseInsensitive(); + // Unable to consume unless they all match ... + assertThat(tokens.canConsume("SELECT", "ALL", TokenStream.ANY_VALUE, "FRM", "THIS", "TABLE"), is(false)); + assertThat(tokens.canConsume("SELECT", "ALL", "COLUMNS", "FROM", TokenStream.ANY_VALUE, "TABLE"), is(true)); + assertThat(tokens.hasNext(), is(false)); + } }

14 years, 8 months

1
0
0 / 0

DNA SVN: r1185 - in trunk/dna-common/src: test/java/org/jboss/dna/common/text and 1 other directory.

by dna-commits＠lists.jboss.org

Author: rhauch Date: 2009-09-02 14:06:33 -0400 (Wed, 02 Sep 2009) New Revision: 1185 Modified: trunk/dna-common/src/main/java/org/jboss/dna/common/text/TokenStream.java trunk/dna-common/src/test/java/org/jboss/dna/common/text/TokenStreamBasicTokenizerTest.java Log: Corrected the NPE for the position of each token. Modified: trunk/dna-common/src/main/java/org/jboss/dna/common/text/TokenStream.java =================================================================== --- trunk/dna-common/src/main/java/org/jboss/dna/common/text/TokenStream.java 2009-09-02 18:06:06 UTC (rev 1184) +++ trunk/dna-common/src/main/java/org/jboss/dna/common/text/TokenStream.java 2009-09-02 18:06:33 UTC (rev 1185) @@ -923,9 +923,11 @@ * Create a single-character token at the supplied index in the character stream. The token type is set to 0, meaning this * is equivalent to calling <code>addToken(index,index+1)</code> or <code>addToken(index,index+1,0)</code>. * + * @param position the position (line and column numbers) of this new token; may not be null * @param index the index of the character to appear in the token; must be a valid index in the stream */ - void addToken( int index ); + void addToken( Position position, + int index ); /** * Create a single- or multi-character token with the characters in the range given by the starting and ending index in @@ -933,10 +935,12 @@ * practice when using 0-based indexes). The token type is set to 0, meaning this is equivalent to calling <code> * addToken(startIndex,endIndex,0)</code> . * + * @param position the position (line and column numbers) of this new token; may not be null * @param startIndex the index of the first character to appear in the token; must be a valid index in the stream * @param endIndex the index just past the last character to appear in the token; must be a valid index in the stream */ - void addToken( int startIndex, + void addToken( Position position, + int startIndex, int endIndex ); /** @@ -944,11 +948,13 @@ * starting and ending index in the character stream. The character at the ending index is not included in the * token (as this is standard practice when using 0-based indexes). * + * @param position the position (line and column numbers) of this new token; may not be null * @param startIndex the index of the first character to appear in the token; must be a valid index in the stream * @param endIndex the index just past the last character to appear in the token; must be a valid index in the stream * @param type the type of the token */ - void addToken( int startIndex, + void addToken( Position position, + int startIndex, int endIndex, int type ); } @@ -980,13 +986,16 @@ private final int startIndex; private final int endIndex; private final int type; + private final Position position; public CaseSensitiveToken( int startIndex, int endIndex, - int type ) { + int type, + Position position ) { this.startIndex = startIndex; this.endIndex = endIndex; this.type = type; + this.position = position; } /** @@ -1065,7 +1074,7 @@ */ @Override public Position position() { - return null; + return position; } protected String matchString() { @@ -1087,8 +1096,9 @@ protected class CaseInsensitiveToken extends CaseSensitiveToken { public CaseInsensitiveToken( int startIndex, int endIndex, - int type ) { - super(startIndex, endIndex, type); + int type, + Position position ) { + super(startIndex, endIndex, type, position); } /** @@ -1108,22 +1118,24 @@ /** * {@inheritDoc} * - * @see org.jboss.dna.common.text.TokenStream.Tokens#addToken(int) + * @see org.jboss.dna.common.text.TokenStream.Tokens#addToken(org.jboss.dna.common.text.TokenStream.Position, int) */ @Override - public final void addToken( int index ) { - addToken(index, index + 1, 0); + public void addToken( Position position, + int index ) { + addToken(position, index, index + 1, 0); } /** * {@inheritDoc} * - * @see org.jboss.dna.common.text.TokenStream.Tokens#addToken(int, int) + * @see org.jboss.dna.common.text.TokenStream.Tokens#addToken(Position, int, int) */ @Override - public final void addToken( int startIndex, + public final void addToken( Position position, + int startIndex, int endIndex ) { - addToken(startIndex, endIndex, 0); + addToken(position, startIndex, endIndex, 0); } /** @@ -1138,13 +1150,14 @@ /** * {@inheritDoc} * - * @see org.jboss.dna.common.text.TokenStream.Tokens#addToken(int, int, int) + * @see org.jboss.dna.common.text.TokenStream.Tokens#addToken(Position,int, int, int) */ @Override - public void addToken( int startIndex, + public void addToken( Position position, + int startIndex, int endIndex, int type ) { - tokens.add(new CaseSensitiveToken(startIndex, endIndex, type)); + tokens.add(new CaseSensitiveToken(startIndex, endIndex, type, position)); } } @@ -1152,13 +1165,14 @@ /** * {@inheritDoc} * - * @see org.jboss.dna.common.text.TokenStream.Tokens#addToken(int, int, int) + * @see org.jboss.dna.common.text.TokenStream.Tokens#addToken(Position,int, int, int) */ @Override - public void addToken( int startIndex, + public void addToken( Position position, + int startIndex, int endIndex, int type ) { - tokens.add(new CaseInsensitiveToken(startIndex, endIndex, type)); + tokens.add(new CaseInsensitiveToken(startIndex, endIndex, type, position)); } } @@ -1500,10 +1514,10 @@ case '|': case '=': case ':': - tokens.addToken(input.index(), input.index() + 1, SYMBOL); + tokens.addToken(input.position(), input.index(), input.index() + 1, SYMBOL); break; case '.': - tokens.addToken(input.index(), input.index() + 1, DECIMAL); + tokens.addToken(input.position(), input.index(), input.index() + 1, DECIMAL); break; case '\"': int startIndex = input.index(); @@ -1524,7 +1538,7 @@ throw new ParsingException(startingPosition, msg); } int endIndex = input.index() + 1; // beyond last character read - tokens.addToken(startIndex, endIndex, DOUBLE_QUOTED_STRING); + tokens.addToken(input.position(), startIndex, endIndex, DOUBLE_QUOTED_STRING); break; case '\'': startIndex = input.index(); @@ -1545,7 +1559,7 @@ throw new ParsingException(startingPosition, msg); } endIndex = input.index() + 1; // beyond last character read - tokens.addToken(startIndex, endIndex, SINGLE_QUOTED_STRING); + tokens.addToken(input.position(), startIndex, endIndex, SINGLE_QUOTED_STRING); break; case '/': startIndex = input.index(); @@ -1563,7 +1577,7 @@ if (!foundLineTerminator) ++endIndex; // must point beyond last char if (c == '\r' && input.isNext('\n')) input.next(); if (useComments) { - tokens.addToken(startIndex, endIndex, COMMENT); + tokens.addToken(input.position(), startIndex, endIndex, COMMENT); } } else if (input.isNext('*')) { // Multi-line comment ... @@ -1574,11 +1588,11 @@ if (input.hasNext()) input.next(); // consume the '/' if (useComments) { endIndex = input.index() + 1; // the token will include the '/' and '*' characters - tokens.addToken(startIndex, endIndex, COMMENT); + tokens.addToken(input.position(), startIndex, endIndex, COMMENT); } } else { // just a regular slash ... - tokens.addToken(startIndex, startIndex + 1, SYMBOL); + tokens.addToken(input.position(), startIndex, startIndex + 1, SYMBOL); } break; default: @@ -1588,7 +1602,7 @@ c = input.next(); } endIndex = input.index() + 1; // beyond last character that was included - tokens.addToken(startIndex, endIndex, WORD); + tokens.addToken(input.position(), startIndex, endIndex, WORD); } } } Modified: trunk/dna-common/src/test/java/org/jboss/dna/common/text/TokenStreamBasicTokenizerTest.java =================================================================== --- trunk/dna-common/src/test/java/org/jboss/dna/common/text/TokenStreamBasicTokenizerTest.java 2009-09-02 18:06:06 UTC (rev 1184) +++ trunk/dna-common/src/test/java/org/jboss/dna/common/text/TokenStreamBasicTokenizerTest.java 2009-09-02 18:06:33 UTC (rev 1185) @@ -29,6 +29,7 @@ import org.jboss.dna.common.text.TokenStream.BasicTokenizer; import org.jboss.dna.common.text.TokenStream.CharacterArrayStream; import org.jboss.dna.common.text.TokenStream.ParsingException; +import org.jboss.dna.common.text.TokenStream.Position; import org.jboss.dna.common.text.TokenStream.Tokens; import org.junit.Before; import org.junit.Test; @@ -48,20 +49,23 @@ final LinkedList<int[]> tokenValues = new LinkedList<int[]>(); tokenFactory = new Tokens() { @Override - public void addToken( int index ) { + public void addToken( Position position, + int index ) { int[] token = new int[] {index, index + 1, 0}; tokenValues.add(token); } @Override - public void addToken( int startIndex, + public void addToken( Position position, + int startIndex, int endIndex ) { int[] token = new int[] {startIndex, endIndex, 0}; tokenValues.add(token); } @Override - public void addToken( int startIndex, + public void addToken( Position position, + int startIndex, int endIndex, int type ) { int[] token = new int[] {startIndex, endIndex, type};

14 years, 8 months

1
0
0 / 0

DNA SVN: r1184 - trunk/dna-common/src/main/java/org/jboss/dna/common/text.

by dna-commits＠lists.jboss.org

Author: rhauch Date: 2009-09-02 14:06:06 -0400 (Wed, 02 Sep 2009) New Revision: 1184 Modified: trunk/dna-common/src/main/java/org/jboss/dna/common/text/TokenStream.java Log: Added a toString() method to the Token implementations to make debugging easier. Also changed a LinkedList to an ArrayList for the same reason. Modified: trunk/dna-common/src/main/java/org/jboss/dna/common/text/TokenStream.java =================================================================== --- trunk/dna-common/src/main/java/org/jboss/dna/common/text/TokenStream.java 2009-09-02 18:05:46 UTC (rev 1183) +++ trunk/dna-common/src/main/java/org/jboss/dna/common/text/TokenStream.java 2009-09-02 18:06:06 UTC (rev 1184) @@ -23,8 +23,8 @@ */ package org.jboss.dna.common.text; +import java.util.ArrayList; import java.util.Iterator; -import java.util.LinkedList; import java.util.List; import java.util.ListIterator; import java.util.NoSuchElementException; @@ -1071,6 +1071,16 @@ protected String matchString() { return inputString; } + + /** + * {@inheritDoc} + * + * @see java.lang.Object#toString() + */ + @Override + public String toString() { + return value(); + } } @Immutable @@ -1093,7 +1103,7 @@ } protected abstract class TokenFactory implements Tokens { - protected final List<Token> tokens = new LinkedList<Token>(); + protected final List<Token> tokens = new ArrayList<Token>(); /** * {@inheritDoc}

14 years, 8 months

1
0
0 / 0

DNA SVN: r1183 - in trunk/dna-common/src: main/java/org/jboss/dna/common/text and 2 other directories.

by dna-commits＠lists.jboss.org

Author: rhauch Date: 2009-09-02 14:05:46 -0400 (Wed, 02 Sep 2009) New Revision: 1183 Added: trunk/dna-common/src/main/java/org/jboss/dna/common/text/TokenStream.java trunk/dna-common/src/test/java/org/jboss/dna/common/text/SampleSqlParser.java trunk/dna-common/src/test/java/org/jboss/dna/common/text/TokenStreamBasicTokenizerTest.java trunk/dna-common/src/test/java/org/jboss/dna/common/text/TokenStreamTest.java Modified: trunk/dna-common/src/main/java/org/jboss/dna/common/CommonI18n.java trunk/dna-common/src/main/resources/org/jboss/dna/common/CommonI18n.properties Log: Added very simple parser framework that provides a very effective and lightweight system for building a parser with code. It is not as capable as a parser-generator (like ANTLR), but it is very easy to do even moderately-complex grammars, and very easy to debug. Modified: trunk/dna-common/src/main/java/org/jboss/dna/common/CommonI18n.java =================================================================== --- trunk/dna-common/src/main/java/org/jboss/dna/common/CommonI18n.java 2009-08-31 13:46:22 UTC (rev 1182) +++ trunk/dna-common/src/main/java/org/jboss/dna/common/CommonI18n.java 2009-09-02 18:05:46 UTC (rev 1183) @@ -90,6 +90,19 @@ public static I18n requiredToSuppliedParameterMismatch; public static I18n unableToAccessResourceFileFromClassLoader; + // TokenStream + public static I18n noMoreContent; + public static I18n noMoreContentButWasExpectingToken; + public static I18n unexpectedToken; + public static I18n noMoreContentButWasExpectingCharacter; + public static I18n unexpectedCharacter; + public static I18n noMoreContentButWasExpectingTokenType; + public static I18n unexpectedTokenType; + public static I18n startMethodMustBeCalledBeforeNext; + public static I18n startMethodMustBeCalledBeforeConsumingOrMatching; + public static I18n noMatchingDoubleQuoteFound; + public static I18n noMatchingSingleQuoteFound; + static { try { I18n.initialize(CommonI18n.class); Added: trunk/dna-common/src/main/java/org/jboss/dna/common/text/TokenStream.java =================================================================== --- trunk/dna-common/src/main/java/org/jboss/dna/common/text/TokenStream.java (rev 0) +++ trunk/dna-common/src/main/java/org/jboss/dna/common/text/TokenStream.java 2009-09-02 18:05:46 UTC (rev 1183) @@ -0,0 +1,1586 @@ +/* + * JBoss DNA (http://www.jboss.org/dna) + * See the COPYRIGHT.txt file distributed with this work for information + * regarding copyright ownership. Some portions may be licensed + * to Red Hat, Inc. under one or more contributor license agreements. + * See the AUTHORS.txt file in the distribution for a full listing of + * individual contributors. + * + * JBoss DNA is free software. Unless otherwise indicated, all code in JBoss DNA + * is licensed to you under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * JBoss DNA is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this software; if not, write to the Free + * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA, or see the FSF site: http://www.fsf.org. + */ +package org.jboss.dna.common.text; + +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.ListIterator; +import java.util.NoSuchElementException; +import net.jcip.annotations.Immutable; +import net.jcip.annotations.NotThreadSafe; +import org.jboss.dna.common.CommonI18n; +import org.jboss.dna.common.util.CheckArg; + +/** + * A foundation for basic parsers that tokenizes input content and allows parsers to easily access and use those tokens. A + * {@link TokenStream} object literally represents the stream of {@link Token} objects that each represent a word, symbol, comment + * or other lexically-relevant piece of information. This simple framework makes it very easy to create a parser that walks + * through (or "consumes") the tokens in the order they appear and do something useful with that content (usually creating another + * representation of the content, such as some domain-specific Abstract Syntax Tree or object model). + * + * + * <h3>The parts</h3> + * + * This simple framework consists of a couple of pieces that fit together to do the whole job of parsing input content. + * + * + * The {@link Tokenizer} is responsible for consuming the character-level input content and constructing {@link Token} objects for + * the different words, symbols, or other meaningful elements contained in the content. Each Token object is a simple object that + * records the character(s) that make up the token's value, but it does this in a very lightweight and efficient way by pointing + * to the original character stream. Each token can be assigned a parser-specific integral token type that may make it + * easier to do quickly figure out later in the process what kind of information each token represents. The general idea is to + * keep the Tokenizer logic very simple, and very often Tokenizers will merely look for the different kinds of characters (e.g., + * symbols, letters, digits, etc.) as well as things like quoted strings and comments. However, Tokenizers are never called by the + * parser, but instead are always given to the TokenStream that then calls the Tokenizer at the appropriate time. + * + * + * The {@link TokenStream} is supplied the input content, a Tokenizer implementation, and a few options. Its job is to prepare the + * content for processing, call the Tokenizer implementation to create the series of Token objects, and then provide an interface + * for walking through and consuming the tokens. This interface makes it possible to discover the value and type of the current + * token, and consume the current token and move to the next token. Plus, the interface has been designed to make the code that + * works with the tokens to be as readable as possible. + * + * + * The final component in this framework is the Parser. The parser is really any class that takes as input the content to + * be parsed and that outputs some meaningful information. The parser will do this by defining the Tokenizer, constructing a + * TokenStream object, and then using the TokenStream to walk through the sequence of Tokens and produce some meaningful + * representation of the content. Parsers can create instances of some object model, or they can create a domain-specific Abstract + * Syntax Tree representation. + * + * + * The benefit of breaking the responsibility along these lines is that the TokenStream implementation is able to encapsulate + * quite a bit of very tedious and very useful functionality, while still allowing a lot of flexibility as to what makes up the + * different tokens. It also makes the parser very easy to write and read (and thus maintain), without placing very many + * restrictions on how that logic is to be defined. Plus, because the TokenStream takes responsibility for tracking the positions + * of every token (including line and column numbers), it can automatically produce meaningful errors. + * + * <h3>Consuming tokens</h3> + * + * A parser works with the tokens on the TokenStream using a variety of methods: + * <ul> + * <li>The {@link #start()} method must be called before any of the other methods. It performs initialization and tokenizing, and + * prepares the internal state by finding the first token and setting an internal current token reference.</li> + * <li>The {@link #hasNext()} method can be called repeatedly to determine if there is another token after the current + * token. This is often useful when an unknown number of tokens is to be processed, and behaves very similarly to the + * {@link Iterator#hasNext()} method.</li> + * <li>The {@link #consume()} method returns the {@link Token#value() value} of the current token and moves the current + * token pointer to the next available token.</li> + * <li>The {@link #consume(String)} and {@link #consume(char)} methods look at the current token and ensure the token's + * {@link Token#value() value} matches the value supplied as a method parameter, or they throw a {@link ParsingException} if the + * values don't match. The {@link #consume(int)} method works similarly, except that it attempts to match the token's + * {@link Token#type() type}. And, the {@link #consume(String, String...)} is a convenience method that is equivalent to calling + * {@link #consume(String)} for each of the arguments.</li> + * <li>The {@link #canConsume(String)} and {@link #canConsume(char)} methods look at the current token and check whether + * the token's {@link Token#value() value} matches the value supplied as a method parameter. If there is a match, the method + * advances the current token reference and returns true. Otherwise, the current token does not match and the method + * returns false without advancing the current token reference or throwing a ParsingException. Similarly, the + * {@link #canConsume(int)} method checks the token's {@link Token#type() type} rather than the value, consuming the token and + * returning true if there is a match, or just returning false if there is no match. The {@link #canConsume(String, String...)} + * method determines whether all of the supplied values can be consumed in the given order.</li> + * <li>The {@link #matches(String)} and {@link #matches(char)} methods look at the current token and check whether the + * token's {@link Token#value() value} matches the value supplied as a method parameter. The method then returns whether there was + * a match, but does not advance the current token pointer. Similarly, the {@link #matches(int)} method checks the + * token's {@link Token#type() type} rather than the value. The {@link #matches(String, String...)} method is a convenience method + * that is equivalent to calling {@link #matches(String)} for each of the arguments, and the {@link #matches(int, int...)} method + * is a convenience method that is equivalent to calling {@link #matches(int)} for each of the arguments.</li> + * </ul> + * <li>The {@link #matchesAnyOf(String, String...)} methods look at the current token and check whether the token's + * {@link Token#value() value} matches at least one of the values supplied as method parameters. The method then returns whether + * there was a match, but does not advance the current token pointer. Similarly, the + * {@link #matchesAnyOf(int, int...)} method checks the token's {@link Token#type() type} rather than the value.</li> </ul> + * + * + * With these methods, it's very easy to create a parser that looks at the current token to decide what to do, and then consume + * that token, and repeat this process. + * + * <h3>Example parser</h3> + * + * Here is an example of a very simple parser that parses very simple and limited SQL <code>SELECT</code> and <code>DELETE</code> + * statements, such as <code>SELECT * FROM Customers</code> or + * <code>SELECT Name, StreetAddress AS Address, City, Zip FROM Customers</code> or + * <code>DELETE FROM Customers WHERE Zip=12345</code>: + * + * <pre> + * public class SampleSqlSelectParser { + * public List<Statement> parse( String ddl ) { + * TokenStream tokens = new TokenStream(ddl, new SqlTokenizer(), false); + * List<Statement> statements = new LinkedList<Statement>(); + * while (tokens.hasNext()) { + * if (tokens.matches("SELECT")) { + * statements.add(parseSelect(tokens)); + * } else { + * statements.add(parseDelete(tokens)); + * } + * } + * return statements; + * } + * + * protected Select parseSelect( TokenStream tokens ) throws ParsingException { + * tokens.consume("SELECT"); + * List<Column> columns = parseColumns(tokens); + * tokens.consume("FROM"); + * String tableName = tokens.consume(); + * return new Select(tableName, columns); + * } + * + * protected List<Column> parseColumns( TokenStream tokens ) throws ParsingException { + * List<Column> columns = new LinkedList<Column>(); + * if (tokens.matches('*')) { + * tokens.consume(); // leave the columns empty to signal wildcard + * } else { + * // Read names until we see a ',' + * do { + * String columnName = tokens.consume(); + * if (tokens.canConsume("AS")) { + * String columnAlias = tokens.consume(); + * columns.add(new Column(columnName, columnAlias)); + * } else { + * columns.add(new Column(columnName, null)); + * } + * } while (tokens.canConsume(',')); + * } + * return columns; + * } + * + * protected Delete parseDelete( TokenStream tokens ) throws ParsingException { + * tokens.consume("DELETE", "FROM"); + * String tableName = tokens.consume(); + * tokens.consume("WHERE"); + * String lhs = tokens.consume(); + * tokens.consume('='); + * String rhs = tokens.consume(); + * return new Delete(tableName, new Criteria(lhs, rhs)); + * } + * } + * public abstract class Statement { ... } + * public class Query extends Statement { ... } + * public class Delete extends Statement { ... } + * public class Column { ... } + * </pre> + * + * This example shows an idiomatic way of writing a parser that is stateless and thread-safe. The <code>parse(...)</code> method + * takes the input as a parameter, and returns the domain-specific representation that resulted from the parsing. All other + * methods are utility methods that simply encapsulate common logic or make the code more readable. + * + * + * In the example, the <code>parse(...)</code> first creates a TokenStream object (using a Tokenizer implementation that is not + * shown), and then loops as long as there are more tokens to read. As it loops, if the next token is "SELECT", the parser calls + * the <code>parseSelect(...)</code> method which immediately consumes a "SELECT" token, the names of the columns separated by + * commas (or a '*' if there all columns are to be selected), a "FROM" token, and the name of the table being queried. The + * <code>parseSelect(...)</code> method returns a <code>Select</code> object, which then added to the list of statements in the + * <code>parse(...)</code> method. The parser handles the "DELETE" statements in a similar manner. + * + * <h3>Case sensitivity</h3> + * + * Very often grammars to not require the case of keywords to match. This can make parsing a challenge, because all combinations + * of case need to be used. The TokenStream framework provides a very simple solution that requires no more effort than providing + * a boolean parameter to the constructor. + * + * + * When a <code>false</code> value is provided for the the <code>caseSensitive</code> parameter, the TokenStream performs all + * matching operations as if each token's value were in uppercase only. This means that the arguments supplied to the + * <code>match(...)</code>, <code>canConsume(...)</code>, and <code>consume(...)</code> methods should be upper-cased. Note that + * the actual value of each token remains the actual case as it appears in the input. + * + * + * Of course, when the TokenStream is created with a <code>true</code> value for the <code>caseSensitive</code> parameter, the + * matching is performed using the actual value as it appears in the input content + * + * <h3>Whitespace</h3> + * + * Many grammars are independent of lines breaks or whitespace, allowing a lot of flexibility when writing the content. The + * TokenStream framework makes it very easy to ignore line breaks and whitespace. To do so, the Tokenizer implementation must + * simply not include the line break character sequences and whitespace in the token ranges. Since none of the tokens contain + * whitespace, the parser never has to deal with them. + * + * + * Of course, many parsers will require that some whitespace be included. For example, whitespace within a quoted string may be + * needed by the parser. In this case, the Tokenizer should simply include the whitespace characters in the tokens. + * + * <h3>Writing a Tokenizer</h3> + * + * Each parser will likely have its own {@link Tokenizer} implementation that contains the parser-specific logic about how to + * break the content into token objects. Generally, the easiest way to do this is to simply iterate through the character sequence + * passed into the {@link Tokenizer#tokenize(CharacterStream, Tokens) tokenize(...)} method, and use a switch statement to decide + * what to do. + * + * + * Here is the code for a very basic Tokenizer implementation that ignores whitespace, line breaks and Java-style (multi-line and + * end-of-line) comments, while constructing single tokens for each quoted string. + * + * <pre> + * public class BasicTokenizer implements Tokenizer { + * public void tokenize( CharacterStream input, + * Tokens tokens ) throws ParsingException { + * while (input.hasNext()) { + * char c = input.next(); + * switch (c) { + * case ' ': + * case '\t': + * case '\n': + * case '\r': + * // Just skip these whitespace characters ... + * break; + * case '-': + * case '(': + * case ')': + * case '{': + * case '}': + * case '*': + * case ',': + * case ';': + * case '+': + * case '%': + * case '?': + * case '$': + * case '[': + * case ']': + * case '!': + * case '<': + * case '>': + * case '|': + * case '=': + * case ':': + * tokens.addToken(input.index(), input.index() + 1, SYMBOL); + * break; + * case '.': + * tokens.addToken(input.index(), input.index() + 1, DECIMAL); + * break; + * case '\"': + * case '\"': + * int startIndex = input.index(); + * Position startingPosition = input.position(); + * boolean foundClosingQuote = false; + * while (input.hasNext()) { + * c = input.next(); + * if (c == '\\' && input.isNext('"')) { + * c = input.next(); // consume the ' character since it is escaped + * } else if (c == '"') { + * foundClosingQuote = true; + * break; + * } + * } + * if (!foundClosingQuote) { + * throw new ParsingException(startingPosition, "No matching closing double quote found"); + * } + * int endIndex = input.index() + 1; // beyond last character read + * tokens.addToken(startIndex, endIndex, DOUBLE_QUOTED_STRING); + * break; + * case '\'': + * startIndex = input.index(); + * startingPosition = input.position(); + * foundClosingQuote = false; + * while (input.hasNext()) { + * c = input.next(); + * if (c == '\\' && input.isNext('\'')) { + * c = input.next(); // consume the ' character since it is escaped + * } else if (c == '\'') { + * foundClosingQuote = true; + * break; + * } + * } + * if (!foundClosingQuote) { + * throw new ParsingException(startingPosition, "No matching closing single quote found"); + * } + * endIndex = input.index() + 1; // beyond last character read + * tokens.addToken(startIndex, endIndex, SINGLE_QUOTED_STRING); + * break; + * case '/': + * startIndex = input.index(); + * if (input.isNext('/')) { + * // End-of-line comment ... + * boolean foundLineTerminator = false; + * while (input.hasNext()) { + * c = input.next(); + * if (c == '\n' || c == '\r') { + * foundLineTerminator = true; + * break; + * } + * } + * endIndex = input.index(); // the token won't include the '\n' or '\r' character(s) + * if (!foundLineTerminator) ++endIndex; // must point beyond last char + * if (c == '\r' && input.isNext('\n')) input.next(); + * if (useComments) { + * tokens.addToken(startIndex, endIndex, COMMENT); + * } + * } else if (input.isNext('*')) { + * // Multi-line comment ... + * while (input.hasNext() && !input.isNext('*', '/')) { + * c = input.next(); + * } + * if (input.hasNext()) input.next(); // consume the '*' + * if (input.hasNext()) input.next(); // consume the '/' + * if (useComments) { + * endIndex = input.index() + 1; // the token will include the '/' and '*' characters + * tokens.addToken(startIndex, endIndex, COMMENT); + * } + * } else { + * // just a regular slash ... + * tokens.addToken(startIndex, startIndex + 1, SYMBOL); + * } + * break; + * default: + * startIndex = input.index(); + * // Read until another whitespace/symbol/decimal/slash is found + * while (input.hasNext() && !(input.isNextWhitespace() || input.isNextAnyOf("/.-(){}*,;+%?$[]!<>|=:"))) { + * c = input.next(); + * } + * endIndex = input.index() + 1; // beyond last character that was included + * tokens.addToken(startIndex, endIndex, WORD); + * } + * } + * } + * } + * </pre> + * Tokenizers with exactly this behavior can actually be created using the {@link #basicTokenizer(boolean)} method. So while this very + * basic implementation is not meant to be used in all situations, it may be useful in some situations. + * + */ +@NotThreadSafe +public class TokenStream { + + protected final String inputString; + protected final String inputUppercased; + private final char[] inputContent; + private final boolean caseSensitive; + private final Tokenizer tokenizer; + private List<Token> tokens; + private ListIterator<Token> tokenIterator; + private Token currentToken; + private boolean completed; + + public TokenStream( String content, + Tokenizer tokenizer, + boolean caseSensitive ) { + CheckArg.isNotNull(content, "content"); + CheckArg.isNotNull(tokenizer, "tokenizer"); + this.inputString = content; + this.inputContent = content.toCharArray(); + this.caseSensitive = caseSensitive; + this.inputUppercased = caseSensitive ? inputString : content.toUpperCase(); + this.tokenizer = tokenizer; + } + + /** + * Begin the token stream, including (if required) the tokenization of the input content. + * + * @return this object for easy method chaining; never null + * @throws ParsingException if an error occurs during tokenization of the content + */ + public TokenStream start() throws ParsingException { + // Create the tokens ... + if (tokens == null) { + TokenFactory tokenFactory = caseSensitive ? new CaseSensitiveTokenFactory() : new CaseInsensitiveTokenFactory(); + CharacterStream characterStream = new CharacterArrayStream(inputContent); + tokenizer.tokenize(characterStream, tokenFactory); + this.tokens = tokenFactory.getTokens(); + } + + // Create the iterator ... + tokenIterator = this.tokens.listIterator(); + moveToNextToken(); + return this; + } + + /** + * Return the value of this token and move to the next token. + * + * @return the value of the current token + * @throws ParsingException if there is no such token to consume + * @throws IllegalStateException if this method was called before the stream was {@link #start() started} + */ + public String consume() throws ParsingException, IllegalStateException { + if (completed) { + String msg = CommonI18n.noMoreContent.text(); + throw new ParsingException(tokens.get(tokens.size() - 1).position(), msg); + } + // Get the value from the current token ... + String result = currentToken().value(); + moveToNextToken(); + return result; + } + + /** + * Attempt to consume this current token as long as it matches the expected value, or throw an exception if the token does not + * match. + * + * @param expected the expected value of the current token + * @throws ParsingException if the current token doesn't match the supplied value + * @throws IllegalStateException if this method was called before the stream was {@link #start() started} + */ + public void consume( String expected ) throws ParsingException, IllegalStateException { + if (completed) { + String msg = CommonI18n.noMoreContentButWasExpectingToken.text(expected); + throw new ParsingException(tokens.get(tokens.size() - 1).position(), msg); + } + // Get the value from the current token ... + if (!currentToken().matches(expected)) { + String found = currentToken().value(); + Position pos = currentToken().position(); + String fragment = generateFragment(); + String msg = CommonI18n.unexpectedToken.text(expected, found, pos.getLine(), pos.getColumn(), fragment); + throw new ParsingException(pos, msg); + } + moveToNextToken(); + } + + /** + * Attempt to consume this current token as long as it matches the expected character, or throw an exception if the token does + * not match. + * + * @param expected the expected character of the current token + * @throws ParsingException if the current token doesn't match the supplied value + * @throws IllegalStateException if this method was called before the stream was {@link #start() started} + */ + public void consume( char expected ) throws ParsingException, IllegalStateException { + if (completed) { + String msg = CommonI18n.noMoreContentButWasExpectingCharacter.text(expected); + throw new ParsingException(tokens.get(tokens.size() - 1).position(), msg); + } + // Get the value from the current token ... + if (!currentToken().matches(expected)) { + String found = currentToken().value(); + Position pos = currentToken().position(); + String fragment = generateFragment(); + String msg = CommonI18n.unexpectedCharacter.text(expected, found, pos.getLine(), pos.getColumn(), fragment); + throw new ParsingException(pos, msg); + } + moveToNextToken(); + } + + /** + * Attempt to consume this current token as long as it matches the expected character, or throw an exception if the token does + * not match. + * + * @param expectedType the expected token type of the current token + * @throws ParsingException if the current token doesn't match the supplied value + * @throws IllegalStateException if this method was called before the stream was {@link #start() started} + */ + public void consume( int expectedType ) throws ParsingException, IllegalStateException { + if (completed) { + String msg = CommonI18n.noMoreContentButWasExpectingTokenType.text(expectedType); + throw new ParsingException(tokens.get(tokens.size() - 1).position(), msg); + } + // Get the value from the current token ... + if (currentToken().type() != expectedType) { + String found = currentToken().value(); + Position pos = currentToken().position(); + String fragment = generateFragment(); + String msg = CommonI18n.unexpectedTokenType.text(expectedType, found, pos.getLine(), pos.getColumn(), fragment); + throw new ParsingException(pos, msg); + } + moveToNextToken(); + } + + /** + * Attempt to consume this current token as the next tokens as long as they match the expected values, or throw an exception + * if the token does not match. + * + * @param expected the expected value of the current token + * @param expectedForNextTokens the expected values fo the following tokens + * @throws ParsingException if the current token doesn't match the supplied value + * @throws IllegalStateException if this method was called before the stream was {@link #start() started} + */ + public void consume( String expected, + String... expectedForNextTokens ) throws ParsingException, IllegalStateException { + if (completed) { + String msg = CommonI18n.noMoreContentButWasExpectingToken.text(expected); + throw new ParsingException(tokens.get(tokens.size() - 1).position(), msg); + } + consume(expected); + for (String nextExpected : expectedForNextTokens) { + consume(nextExpected); + } + } + + /** + * Attempt to consume this current token if it matches the expected value, and return whether this method was indeed able to + * consume the token. + * + * @param expected the expected value of the current token token + * @return true if the current token did match and was consumed, or false if the current token did not match and therefore was + * not consumed + * @throws IllegalStateException if this method was called before the stream was {@link #start() started} + */ + public boolean canConsume( String expected ) throws IllegalStateException { + if (completed || !matches(expected)) return false; + moveToNextToken(); + return true; + } + + /** + * Attempt to consume this current token if it matches the expected value, and return whether this method was indeed able to + * consume the token. + * + * @param expected the expected value of the current token token + * @return true if the current token did match and was consumed, or false if the current token did not match and therefore was + * not consumed + * @throws IllegalStateException if this method was called before the stream was {@link #start() started} + */ + public boolean canConsume( char expected ) throws IllegalStateException { + if (completed || !matches(expected)) return false; + moveToNextToken(); + return true; + } + + /** + * Attempt to consume this current token if it matches the expected token type, and return whether this method was indeed able + * to consume the token. + * + * @param expectedType the expected token type of the current token + * @return true if the current token did match and was consumed, or false if the current token did not match and therefore was + * not consumed + * @throws IllegalStateException if this method was called before the stream was {@link #start() started} + */ + public boolean canConsume( int expectedType ) throws IllegalStateException { + if (completed || !matches(expectedType)) return false; + moveToNextToken(); + return true; + } + + /** + * Attempt to consume this current token and the next tokens if and only if they match the expected values, and return whether + * this method was indeed able to consume all of the supplied tokens. + * + * This is not the same as calling {@link #canConsume(String)} for each of the supplied arguments, since this method + * ensures that all of the supplied values can be consumed. + * + * + * This method is equivalent to calling the following: + * + * <pre> + * + * if ( tokens.matches(currentExpected,expectedForNextTokens) ) { tokens.consume(currentExpected,expectedForNextTokens); } + * + * </pre> + * + * + * @param currentExpected the expected value of the current token + * @param expectedForNextTokens the expected values fo the following tokens + * @return true if the current token did match and was consumed, or false if the current token did not match and therefore was + * not consumed + * @throws IllegalStateException if this method was called before the stream was {@link #start() started} + */ + public boolean canConsume( String currentExpected, + String... expectedForNextTokens ) throws IllegalStateException { + if (completed) return false; + ListIterator<Token> iter = tokens.listIterator(tokenIterator.previousIndex()); + if (!iter.hasNext()) return false; + Token token = iter.next(); + if (!token.matches(currentExpected)) return false; + for (String nextExpected : expectedForNextTokens) { + if (!iter.hasNext()) return false; + token = iter.next(); + if (!token.matches(nextExpected)) return false; + } + this.tokenIterator = iter; + this.currentToken = tokenIterator.hasNext() ? tokenIterator.next() : null; + return true; + } + + /** + * Determine if the current token matches the expected value. + * + * @param expected the expected value of the current token token + * @return true if the current token did match, or false if the current token did not match + * @throws IllegalStateException if this method was called before the stream was {@link #start() started} + */ + public boolean matches( String expected ) throws IllegalStateException { + return !completed && currentToken().matches(expected); + } + + /** + * Determine if the current token matches the expected value. + * + * @param expected the expected value of the current token token + * @return true if the current token did match, or false if the current token did not match + * @throws IllegalStateException if this method was called before the stream was {@link #start() started} + */ + public boolean matches( char expected ) throws IllegalStateException { + return !completed && currentToken().matches(expected); + } + + /** + * Determine if the current token matches the expected token type. + * + * @param expectedType the expected token type of the current token + * @return true if the current token did match, or false if the current token did not match + * @throws IllegalStateException if this method was called before the stream was {@link #start() started} + */ + public boolean matches( int expectedType ) throws IllegalStateException { + return !completed && currentToken().type() == expectedType; + } + + /** + * Determine if the next few tokens match the expected values. + * + * @param currentExpected the expected value of the current token + * @param expectedForNextTokens the expected values for the following tokens + * @return true if the tokens did match, or false otherwise + * @throws IllegalStateException if this method was called before the stream was {@link #start() started} + */ + public boolean matches( String currentExpected, + String... expectedForNextTokens ) throws IllegalStateException { + if (completed) return false; + ListIterator<Token> iter = tokens.listIterator(tokenIterator.previousIndex()); + if (!iter.hasNext()) return false; + Token token = iter.next(); + if (!token.matches(currentExpected)) return false; + for (String nextExpected : expectedForNextTokens) { + if (!iter.hasNext()) return false; + token = iter.next(); + if (!token.matches(nextExpected)) return false; + } + return true; + } + + /** + * Determine if the next few tokens have the supplied types. + * + * @param currentExpectedType the expected type of the current token + * @param expectedTypeForNextTokens the expected type for the following tokens + * @return true if the tokens did match, or false otherwise + * @throws IllegalStateException if this method was called before the stream was {@link #start() started} + */ + public boolean matches( int currentExpectedType, + int... expectedTypeForNextTokens ) throws IllegalStateException { + if (completed) return false; + ListIterator<Token> iter = tokens.listIterator(tokenIterator.previousIndex()); + if (!iter.hasNext()) return false; + Token token = iter.next(); + if (currentToken().type() != currentExpectedType) return false; + for (int nextExpectedType : expectedTypeForNextTokens) { + if (!iter.hasNext()) return false; + token = iter.next(); + if (token.type() != nextExpectedType) return false; + } + return true; + } + + /** + * Determine if the next token matches one of the supplied values. + * + * @param firstOption the first option for the value of the current token + * @param additionalOptions the additional options for the value of the current token + * @return true if the current token's value did match one of the suplied options, or false otherwise + * @throws IllegalStateException if this method was called before the stream was {@link #start() started} + */ + public boolean matchesAnyOf( String firstOption, + String... additionalOptions ) throws IllegalStateException { + if (completed) return false; + Token current = currentToken(); + if (current.matches(firstOption)) return true; + for (String nextOption : additionalOptions) { + if (current.matches(nextOption)) return true; + } + return false; + } + + /** + * Determine if the next token have one of the supplied types. + * + * @param firstTypeOption the first option for the type of the current token + * @param additionalTypeOptions the additional options for the type of the current token + * @return true if the current token's type matched one of the supplied options, or false otherwise + * @throws IllegalStateException if this method was called before the stream was {@link #start() started} + */ + public boolean matchesAnyOf( int firstTypeOption, + int... additionalTypeOptions ) throws IllegalStateException { + if (completed) return false; + int currentType = currentToken().type(); + if (currentType == firstTypeOption) return true; + for (int nextTypeOption : additionalTypeOptions) { + if (currentType == nextTypeOption) return true; + } + return false; + } + + /** + * Determine if this stream has another token beyond the current position. + * + * @return true if there is another token, or false otherwise + * @throws IllegalStateException if this method was called before the stream was {@link #start() started} + */ + public boolean hasNext() { + if (tokenIterator == null) { + throw new IllegalStateException(CommonI18n.startMethodMustBeCalledBeforeNext.text()); + } + return tokenIterator.hasNext(); + } + + // public Position currentPosition() { + // return currentToken().position(); + // } + // + // public int currentType() { + // return currentToken().type(); + // } + + private void moveToNextToken() { + // And move the currentToken to the next token ... + if (!tokenIterator.hasNext()) { + completed = true; + currentToken = null; + } else { + currentToken = tokenIterator.next(); + } + } + + /** + * Get the current token. + * + * @return the current token; never null + * @throws IllegalStateException if this method was called before the stream was {@link #start() started} + * @throws NoSuchElementException if there are no more tokens + */ + final Token currentToken() throws IllegalStateException, NoSuchElementException { + if (currentToken == null) { + if (completed) { + throw new NoSuchElementException(CommonI18n.noMoreContent.text()); + } + throw new IllegalStateException(CommonI18n.startMethodMustBeCalledBeforeConsumingOrMatching.text()); + } + assert currentToken != null; + return currentToken; + } + + String generateFragment() { + // Find the current position ... + assert currentToken != null; + int startIndex = currentToken.startIndex(); + return generateFragment(inputString, startIndex, 20, " ===>> "); + } + + /** + * Utility method to generate a highlighted fragment of a particular point in the stream. + * + * @param content the content from which the fragment should be taken; may not be null + * @param indexOfProblem the index of the problem point that should be highlighted; must be a valid index in the content + * @param charactersToIncludeBeforeAndAfter the maximum number of characters before and after the problem point to include in + * the fragment + * @param highlightText the text that should be included in the fragment at the problem point to highlight the location, or an + * empty string if there should be no highlighting + * @return the highlighted fragment; never null + */ + static String generateFragment( String content, + int indexOfProblem, + int charactersToIncludeBeforeAndAfter, + String highlightText ) { + assert content != null; + assert indexOfProblem < content.length(); + // Find the substring that immediately precedes the current position ... + int beforeStart = Math.max(0, indexOfProblem - charactersToIncludeBeforeAndAfter); + String before = content.substring(beforeStart, indexOfProblem); + + // Find the substring that immediately follows the current position ... + int afterEnd = Math.min(indexOfProblem + charactersToIncludeBeforeAndAfter, content.length()); + String after = content.substring(indexOfProblem, afterEnd); + + return before + (highlightText != null ? highlightText : "") + after; + } + + /** + * Interface for a Tokenizer component responsible for processing the characters in a {@link CharacterStream} and constructing + * the appropriate {@link Token} objects. + */ + public static interface Tokenizer { + /** + * Process the supplied characters and construct the appropriate {@link Token} objects. + * + * @param input the character input stream; never null + * @param tokens the factory for {@link Token} objects, which records the order in which the tokens are created + * @throws ParsingException if there is an error while processing the character stream (e.g., a quote is not closed, etc.) + */ + void tokenize( CharacterStream input, + Tokens tokens ) throws ParsingException; + } + + /** + * Interface used by a {@link Tokenizer} to iterate through the characters in the content input to the {@link TokenStream}. + */ + public static interface CharacterStream { + + /** + * Determine if there is another character available in this stream. + * + * @return true if there is another character (and {@link #next()} can be called), or false otherwise + */ + boolean hasNext(); + + /** + * Obtain the next character value, and advance the stream. + * + * @return the next character + * @throws NoSuchElementException if there is no {@link #hasNext() next character} + */ + char next(); + + /** + * Get the index for the last character returned from {@link #next()}. + * + * @return the index of the last character returned + */ + int index(); + + /** + * Get the position for the last character returned from {@link #next()}. + * + * @return the position of the last character returned; never null + */ + Position position(); + + /** + * Determine if the next character on the sream is a {@link Character#isWhitespace(char) whitespace character}. This + * method does not advance the stream. + * + * @return true if there is a {@link #next() next} character and it is a whitespace character, or false otherwise + */ + boolean isNextWhitespace(); + + /** + * Determine if the next character on the sream is the supplied value. This method does not advance the stream. + * + * @param c the character value to compare to the next character on the stream + * @return true if there is a {@link #next() next} character and it is the supplied character, or false otherwise + */ + boolean isNext( char c ); + + /** + * Determine if the next two characters on the stream match the supplied values. This method does not advance the + * stream. + * + * @param nextChar the character value to compare to the next character on the stream + * @param followingChar the character value to compare to the character immediately after the next character on the stream + * @return true if there are at least two characters left on the stream and the first matches <code>nextChar</code> and + * the second matches <code>followingChar</code> + */ + boolean isNext( char nextChar, + char followingChar ); + + /** + * Determine if the next three characters on the sream match the supplied values. This method does not advance the + * stream. + * + * @param nextChar the character value to compare to the next character on the stream + * @param nextChar2 the character value to compare to the second character on the stream + * @param nextChar3 the character value to compare to the second character on the stream + * @return true if there are at least two characters left on the stream and the first matches <code>nextChar</code> and + * the second matches <code>followingChar</code> + */ + boolean isNext( char nextChar, + char nextChar2, + char nextChar3 ); + + /** + * Determine if the next character on the stream matches one of the supplied characters. This method does not + * advance the stream. + * + * @param characters the characters to match + * @return true if there is a {@link #next() next} character and it does match one of the supplied characters, or false + * otherwise + */ + boolean isNextAnyOf( char[] characters ); + + /** + * Determine if the next character on the stream matches one of the supplied characters. This method does not + * advance the stream. + * + * @param characters the characters to match + * @return true if there is a {@link #next() next} character and it does match one of the supplied characters, or false + * otherwise + */ + boolean isNextAnyOf( String characters ); + + } + + /** + * A factory for Token objects, used by a {@link Tokenizer} to create tokens in the correct order. + */ + public static interface Tokens { + /** + * Create a single-character token at the supplied index in the character stream. The token type is set to 0, meaning this + * is equivalent to calling <code>addToken(index,index+1)</code> or <code>addToken(index,index+1,0)</code>. + * + * @param index the index of the character to appear in the token; must be a valid index in the stream + */ + void addToken( int index ); + + /** + * Create a single- or multi-character token with the characters in the range given by the starting and ending index in + * the character stream. The character at the ending index is not included in the token (as this is standard + * practice when using 0-based indexes). The token type is set to 0, meaning this is equivalent to calling <code> + * addToken(startIndex,endIndex,0)</code> . + * + * @param startIndex the index of the first character to appear in the token; must be a valid index in the stream + * @param endIndex the index just past the last character to appear in the token; must be a valid index in the stream + */ + void addToken( int startIndex, + int endIndex ); + + /** + * Create a single- or multi-character token with the supplied type and with the characters in the range given by the + * starting and ending index in the character stream. The character at the ending index is not included in the + * token (as this is standard practice when using 0-based indexes). + * + * @param startIndex the index of the first character to appear in the token; must be a valid index in the stream + * @param endIndex the index just past the last character to appear in the token; must be a valid index in the stream + * @param type the type of the token + */ + void addToken( int startIndex, + int endIndex, + int type ); + } + + /** + * The interface defining a token, which references the characters in the actual input character stream. + */ + @Immutable + protected interface Token { + String value(); + + boolean matches( String expected ); + + boolean matches( char expected ); + + int type(); + + int startIndex(); + + int endIndex(); + + int length(); + + Position position(); + } + + @Immutable + protected class CaseSensitiveToken implements Token { + private final int startIndex; + private final int endIndex; + private final int type; + + public CaseSensitiveToken( int startIndex, + int endIndex, + int type ) { + this.startIndex = startIndex; + this.endIndex = endIndex; + this.type = type; + } + + /** + * {@inheritDoc} + * + * @see org.jboss.dna.common.text.TokenStream.Token#type() + */ + @Override + public final int type() { + return type; + } + + /** + * {@inheritDoc} + * + * @see org.jboss.dna.common.text.TokenStream.Token#startIndex() + */ + @Override + public final int startIndex() { + return startIndex; + } + + /** + * {@inheritDoc} + * + * @see org.jboss.dna.common.text.TokenStream.Token#endIndex() + */ + @Override + public final int endIndex() { + return endIndex; + } + + /** + * {@inheritDoc} + * + * @see org.jboss.dna.common.text.TokenStream.Token#length() + */ + @Override + public final int length() { + return 0; + } + + /** + * {@inheritDoc} + * + * @see org.jboss.dna.common.text.TokenStream.Token#matches(char) + */ + @Override + public final boolean matches( char expected ) { + return length() == 1 && matchString().charAt(startIndex) == expected; + } + + /** + * {@inheritDoc} + * + * @see org.jboss.dna.common.text.TokenStream.Token#matches(java.lang.String) + */ + @Override + public final boolean matches( String expected ) { + return matchString().substring(startIndex, endIndex).equals(expected); + } + + /** + * {@inheritDoc} + * + * @see org.jboss.dna.common.text.TokenStream.Token#value() + */ + public final String value() { + return inputString.substring(startIndex, endIndex); + } + + /** + * {@inheritDoc} + * + * @see org.jboss.dna.common.text.TokenStream.Token#position() + */ + @Override + public Position position() { + return null; + } + + protected String matchString() { + return inputString; + } + } + + @Immutable + protected class CaseInsensitiveToken extends CaseSensitiveToken { + public CaseInsensitiveToken( int startIndex, + int endIndex, + int type ) { + super(startIndex, endIndex, type); + } + + /** + * {@inheritDoc} + * + * @see org.jboss.dna.common.text.TokenStream.CaseSensitiveToken#matchString() + */ + @Override + protected String matchString() { + return inputUppercased; + } + } + + protected abstract class TokenFactory implements Tokens { + protected final List<Token> tokens = new LinkedList<Token>(); + + /** + * {@inheritDoc} + * + * @see org.jboss.dna.common.text.TokenStream.Tokens#addToken(int) + */ + @Override + public final void addToken( int index ) { + addToken(index, index + 1, 0); + } + + /** + * {@inheritDoc} + * + * @see org.jboss.dna.common.text.TokenStream.Tokens#addToken(int, int) + */ + @Override + public final void addToken( int startIndex, + int endIndex ) { + addToken(startIndex, endIndex, 0); + } + + /** + * @return tokens + */ + public List<Token> getTokens() { + return tokens; + } + } + + protected class CaseSensitiveTokenFactory extends TokenFactory { + /** + * {@inheritDoc} + * + * @see org.jboss.dna.common.text.TokenStream.Tokens#addToken(int, int, int) + */ + @Override + public void addToken( int startIndex, + int endIndex, + int type ) { + tokens.add(new CaseSensitiveToken(startIndex, endIndex, type)); + } + } + + protected class CaseInsensitiveTokenFactory extends TokenFactory { + /** + * {@inheritDoc} + * + * @see org.jboss.dna.common.text.TokenStream.Tokens#addToken(int, int, int) + */ + @Override + public void addToken( int startIndex, + int endIndex, + int type ) { + tokens.add(new CaseInsensitiveToken(startIndex, endIndex, type)); + } + } + + protected static final class CharacterArrayStream implements CharacterStream { + private final char[] content; + private int lastIndex = -1; + private final int maxIndex; + private int lineNumber = 1; + private int columnNumber = 1; + private boolean nextCharMayBeLineFeed; + + protected CharacterArrayStream( char[] content ) { + this.content = content; + this.maxIndex = content.length - 1; + } + + /** + * {@inheritDoc} + * + * @see org.jboss.dna.common.text.TokenStream.CharacterStream#hasNext() + */ + @Override + public boolean hasNext() { + return lastIndex < maxIndex; + } + + /** + * {@inheritDoc} + * + * @see org.jboss.dna.common.text.TokenStream.CharacterStream#index() + */ + @Override + public int index() { + return lastIndex; + } + + /** + * {@inheritDoc} + * + * @see org.jboss.dna.common.text.TokenStream.CharacterStream#position() + */ + @Override + public Position position() { + return new Position(lineNumber, columnNumber); + } + + /** + * {@inheritDoc} + * + * @see org.jboss.dna.common.text.TokenStream.CharacterStream#next() + */ + @Override + public char next() { + if (lastIndex >= maxIndex) { + throw new NoSuchElementException(); + } + char result = content[++lastIndex]; + if (result == '\r') { + nextCharMayBeLineFeed = true; + ++lineNumber; + columnNumber = 1; + } else if (result == '\n') { + if (!nextCharMayBeLineFeed) ++lineNumber; + columnNumber = 1; + } else if (nextCharMayBeLineFeed) { + nextCharMayBeLineFeed = false; + } + return result; + } + + /** + * {@inheritDoc} + * + * @see org.jboss.dna.common.text.TokenStream.CharacterStream#isNext(char) + */ + @Override + public boolean isNext( char c ) { + int nextIndex = lastIndex + 1; + return nextIndex <= maxIndex && content[nextIndex] == c; + } + + /** + * {@inheritDoc} + * + * @see org.jboss.dna.common.text.TokenStream.CharacterStream#isNext(char, char) + */ + @Override + public boolean isNext( char nextChar1, + char nextChar2 ) { + int nextIndex1 = lastIndex + 1; + int nextIndex2 = lastIndex + 2; + return nextIndex2 <= maxIndex && content[nextIndex1] == nextChar1 && content[nextIndex2] == nextChar2; + } + + /** + * {@inheritDoc} + * + * @see org.jboss.dna.common.text.TokenStream.CharacterStream#isNext(char, char, char) + */ + @Override + public boolean isNext( char nextChar1, + char nextChar2, + char nextChar3 ) { + int nextIndex1 = lastIndex + 1; + int nextIndex2 = lastIndex + 2; + int nextIndex3 = lastIndex + 3; + return nextIndex3 <= maxIndex && content[nextIndex1] == nextChar1 && content[nextIndex2] == nextChar2 + && content[nextIndex3] == nextChar3; + } + + /** + * {@inheritDoc} + * + * @see org.jboss.dna.common.text.TokenStream.CharacterStream#isNextAnyOf(char[]) + */ + @Override + public boolean isNextAnyOf( char[] characters ) { + int nextIndex = lastIndex + 1; + if (nextIndex <= maxIndex) { + char nextChar = content[lastIndex + 1]; + for (char c : characters) { + if (c == nextChar) return true; + } + } + return false; + } + + /** + * {@inheritDoc} + * + * @see org.jboss.dna.common.text.TokenStream.CharacterStream#isNextAnyOf(java.lang.String) + */ + @Override + public boolean isNextAnyOf( String characters ) { + int nextIndex = lastIndex + 1; + if (nextIndex <= maxIndex) { + char nextChar = content[lastIndex + 1]; + if (characters.indexOf(nextChar) != -1) return true; + } + return false; + } + + /** + * {@inheritDoc} + * + * @see org.jboss.dna.common.text.TokenStream.CharacterStream#isNextWhitespace() + */ + @Override + public boolean isNextWhitespace() { + int nextIndex = lastIndex + 1; + return nextIndex <= maxIndex && Character.isWhitespace(content[nextIndex]); + } + } + + /** + * A class that represents the position of a particular character in terms of the lines and columns of a character sequence. + */ + @Immutable + public final static class Position { + private final int line; + private final int column; + + protected Position( int line, + int column ) { + this.line = line; + this.column = column; + } + + /** + * Get the 1-based column number of the character. + * + * @return the column number; always positive + */ + public int getColumn() { + return column; + } + + /** + * Get the 1-based line number of the character. + * + * @return the line number; always positive + */ + public int getLine() { + return line; + } + + /** + * {@inheritDoc} + * + * @see java.lang.Object#toString() + */ + @Override + public String toString() { + return "" + line + ':' + column; + } + } + + /** + * An exception representing a problem during parsing. + */ + public static class ParsingException extends RuntimeException { + private static final long serialVersionUID = 1L; + + private final Position position; + + /** + * @param position the position of the error; never null + */ + public ParsingException( Position position ) { + super(); + this.position = position; + } + + /** + * @param position the position of the error; never null + * @param message the message + * @param cause the underlying cause + */ + public ParsingException( Position position, + String message, + Throwable cause ) { + super(message, cause); + this.position = position; + } + + /** + * @param position the position of the error; never null + * @param message the message + */ + public ParsingException( Position position, + String message ) { + super(message); + this.position = position; + } + + /** + * @return position + */ + public Position getPosition() { + return position; + } + } + + /** + * Obtain a basic {@link Tokenizer} implementation that ignores whitespace but includes tokens for individual symbols, the + * period ('.'), single-quoted strings, double-quoted strings, whitespace-delimited words, and optionally comments. + * + * Note that the resulting Tokenizer may not be appropriate in many situations, but is provided merely as a convenience for + * those situations that happen to be able to use it. + * + * + * @param includeComments true if the comments should be retained and be included in the token stream, or false if comments + * should be stripped and not included in the token stream + * @return the tokenizer; never null + */ + public static BasicTokenizer basicTokenizer( boolean includeComments ) { + return new BasicTokenizer(includeComments); + } + + /** + * A basic {@link Tokenizer} implementation that ignores whitespace but includes tokens for individual symbols, the period + * ('.'), single-quoted strings, double-quoted strings, whitespace-delimited words, and optionally comments. + * + * Note this Tokenizer may not be appropriate in many situations, but is provided merely as a convenience for those situations + * that happen to be able to use it. + * + */ + public static class BasicTokenizer implements Tokenizer { + /** + * The {@link Token#type() token type} for tokens that represent an unquoted string containing a character sequence made + * up of non-whitespace and non-symbol characters. + */ + public static final int WORD = 1; + /** + * The {@link Token#type() token type} for tokens that consist of an individual "symbol" character. The set of characters + * includes: <code>-(){}*,;+%?$[]!<>|=:</code> + */ + public static final int SYMBOL = 2; + /** + * The {@link Token#type() token type} for tokens that consist of an individual '.' character. + */ + public static final int DECIMAL = 3; + /** + * The {@link Token#type() token type} for tokens that consist of all the characters within single-quotes. Single quote + * characters are included if they are preceded (escaped) by a '\' character. + */ + public static final int SINGLE_QUOTED_STRING = 4; + /** + * The {@link Token#type() token type} for tokens that consist of all the characters within double-quotes. Double quote + * characters are included if they are preceded (escaped) by a '\' character. + */ + public static final int DOUBLE_QUOTED_STRING = 5; + /** + * The {@link Token#type() token type} for tokens that consist of all the characters between "/*" and "*/" or between + * "//" and the next line terminator (e.g., '\n', '\r' or "\r\n"). + */ + public static final int COMMENT = 6; + + private final boolean useComments; + + protected BasicTokenizer( boolean useComments ) { + this.useComments = useComments; + } + + /** + * {@inheritDoc} + * + * @see org.jboss.dna.common.text.TokenStream.Tokenizer#tokenize(CharacterStream, Tokens) + */ + @Override + public void tokenize( CharacterStream input, + Tokens tokens ) throws ParsingException { + while (input.hasNext()) { + char c = input.next(); + switch (c) { + case ' ': + case '\t': + case '\n': + case '\r': + // Just skip these whitespace characters ... + break; + case '-': + case '(': + case ')': + case '{': + case '}': + case '*': + case ',': + case ';': + case '+': + case '%': + case '?': + case '$': + case '[': + case ']': + case '!': + case '<': + case '>': + case '|': + case '=': + case ':': + tokens.addToken(input.index(), input.index() + 1, SYMBOL); + break; + case '.': + tokens.addToken(input.index(), input.index() + 1, DECIMAL); + break; + case '\"': + int startIndex = input.index(); + Position startingPosition = input.position(); + boolean foundClosingQuote = false; + while (input.hasNext()) { + c = input.next(); + if (c == '\\' && input.isNext('"')) { + c = input.next(); // consume the ' character since it is escaped + } else if (c == '"') { + foundClosingQuote = true; + break; + } + } + if (!foundClosingQuote) { + String msg = CommonI18n.noMatchingDoubleQuoteFound.text(startingPosition.getLine(), + startingPosition.getColumn()); + throw new ParsingException(startingPosition, msg); + } + int endIndex = input.index() + 1; // beyond last character read + tokens.addToken(startIndex, endIndex, DOUBLE_QUOTED_STRING); + break; + case '\'': + startIndex = input.index(); + startingPosition = input.position(); + foundClosingQuote = false; + while (input.hasNext()) { + c = input.next(); + if (c == '\\' && input.isNext('\'')) { + c = input.next(); // consume the ' character since it is escaped + } else if (c == '\'') { + foundClosingQuote = true; + break; + } + } + if (!foundClosingQuote) { + String msg = CommonI18n.noMatchingSingleQuoteFound.text(startingPosition.getLine(), + startingPosition.getColumn()); + throw new ParsingException(startingPosition, msg); + } + endIndex = input.index() + 1; // beyond last character read + tokens.addToken(startIndex, endIndex, SINGLE_QUOTED_STRING); + break; + case '/': + startIndex = input.index(); + if (input.isNext('/')) { + // End-of-line comment ... + boolean foundLineTerminator = false; + while (input.hasNext()) { + c = input.next(); + if (c == '\n' || c == '\r') { + foundLineTerminator = true; + break; + } + } + endIndex = input.index(); // the token won't include the '\n' or '\r' character(s) + if (!foundLineTerminator) ++endIndex; // must point beyond last char + if (c == '\r' && input.isNext('\n')) input.next(); + if (useComments) { + tokens.addToken(startIndex, endIndex, COMMENT); + } + } else if (input.isNext('*')) { + // Multi-line comment ... + while (input.hasNext() && !input.isNext('*', '/')) { + c = input.next(); + } + if (input.hasNext()) input.next(); // consume the '*' + if (input.hasNext()) input.next(); // consume the '/' + if (useComments) { + endIndex = input.index() + 1; // the token will include the '/' and '*' characters + tokens.addToken(startIndex, endIndex, COMMENT); + } + } else { + // just a regular slash ... + tokens.addToken(startIndex, startIndex + 1, SYMBOL); + } + break; + default: + startIndex = input.index(); + // Read until another whitespace/symbol/decimal/slash is found + while (input.hasNext() && !(input.isNextWhitespace() || input.isNextAnyOf("/.-(){}*,;+%?$[]!<>|=:"))) { + c = input.next(); + } + endIndex = input.index() + 1; // beyond last character that was included + tokens.addToken(startIndex, endIndex, WORD); + } + } + } + } +} Property changes on: trunk/dna-common/src/main/java/org/jboss/dna/common/text/TokenStream.java ___________________________________________________________________ Name: svn:keywords + Id Revision Name: svn:eol-style + LF Modified: trunk/dna-common/src/main/resources/org/jboss/dna/common/CommonI18n.properties =================================================================== --- trunk/dna-common/src/main/resources/org/jboss/dna/common/CommonI18n.properties 2009-08-31 13:46:22 UTC (rev 1182) +++ trunk/dna-common/src/main/resources/org/jboss/dna/common/CommonI18n.properties 2009-09-02 18:05:46 UTC (rev 1183) @@ -78,3 +78,15 @@ pathIsNotRelative = The path {0} is not a relative path requiredToSuppliedParameterMismatch = {0} parameter{1} supplied, but {2} parameter{3} required: "{4}" => "{5}" unableToAccessResourceFileFromClassLoader = Unable to access "{0}" resource from the class loader + +noMoreContent = No more content. +noMoreContentButWasExpectingToken = No more content, but was expecting "{0}" +unexpectedToken = Expecting "{0}" but found "{1}" at line {2}, column {3}: {4} +noMoreContentButWasExpectingCharacter = No more content, but was expecting '{0}' +unexpectedCharacter = Expecting '{0}' but found '{1}' at line {2}, column {3}: {4} +noMoreContentButWasExpectingTokenType = No more content, but was expecting {0} token +unexpectedTokenType = Expecting {0} token but found {1} at line {2}, column {3}: {4} +startMethodMustBeCalledBeforeNext = The 'start()' method must be called before 'hasNext()' +startMethodMustBeCalledBeforeConsumingOrMatching = The 'start()' method must be called before tokens can be consumed or matched +noMatchingDoubleQuoteFound = No matching closing double quote found for the one at line {0}, column {1} +noMatchingSingleQuoteFound = No matching closing single quote found for the one at line {0}, column {1} Added: trunk/dna-common/src/test/java/org/jboss/dna/common/text/SampleSqlParser.java =================================================================== --- trunk/dna-common/src/test/java/org/jboss/dna/common/text/SampleSqlParser.java (rev 0) +++ trunk/dna-common/src/test/java/org/jboss/dna/common/text/SampleSqlParser.java 2009-09-02 18:05:46 UTC (rev 1183) @@ -0,0 +1,191 @@ +/* + * JBoss DNA (http://www.jboss.org/dna) + * See the COPYRIGHT.txt file distributed with this work for information + * regarding copyright ownership. Some portions may be licensed + * to Red Hat, Inc. under one or more contributor license agreements. + * See the AUTHORS.txt file in the distribution for a full listing of + * individual contributors. + * + * JBoss DNA is free software. Unless otherwise indicated, all code in JBoss DNA + * is licensed to you under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * JBoss DNA is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this software; if not, write to the Free + * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA, or see the FSF site: http://www.fsf.org. + */ +package org.jboss.dna.common.text; + +import java.util.LinkedList; +import java.util.List; +import net.jcip.annotations.Immutable; +import org.jboss.dna.common.text.TokenStream.ParsingException; + +/** + * + */ +public class SampleSqlParser { + + public List<Statement> parse( String ddl ) { + TokenStream tokens = new TokenStream(ddl, TokenStream.basicTokenizer(false), false); + List<Statement> statements = new LinkedList<Statement>(); + + while (tokens.hasNext()) { + if (tokens.matches("SELECT")) { + statements.add(parseSelect(tokens)); + } else { + statements.add(parseDelete(tokens)); + } + } + return statements; + } + + protected Select parseSelect( TokenStream tokens ) throws ParsingException { + tokens.consume("SELECT"); + List<Column> columns = parseColumns(tokens); + tokens.consume("FROM"); + String tableName = tokens.consume(); + return new Select(tableName, columns); + } + + protected List<Column> parseColumns( TokenStream tokens ) throws ParsingException { + List<Column> columns = new LinkedList<Column>(); + if (tokens.matches('*')) { + tokens.consume(); // leave the columns empty to signal wildcard + } else { + // Read names until we see a ',' + do { + String columnName = tokens.consume(); + if (tokens.canConsume("AS")) { + String columnAlias = tokens.consume(); + columns.add(new Column(columnName, columnAlias)); + } else { + columns.add(new Column(columnName, null)); + } + } while (tokens.canConsume(',')); + } + return columns; + } + + protected Delete parseDelete( TokenStream tokens ) throws ParsingException { + tokens.consume("DELETE", "FROM"); + String tableName = tokens.consume(); + tokens.consume("WHERE"); + String lhs = tokens.consume(); + tokens.consume('='); + String rhs = tokens.consume(); + return new Delete(tableName, new Criteria(lhs, rhs)); + } + + @Immutable + public static abstract class Statement { + } + + @Immutable + public static class Select extends Statement { + private final String from; + private final List<Column> columns; + + public Select( String from, + List<Column> columns ) { + this.from = from; + this.columns = columns; + } + + public String getFrom() { + return from; + } + + public List<Column> getColumns() { + return columns; + } + } + + @Immutable + public static class Delete extends Statement { + private final String from; + private final Criteria criteria; + + public Delete( String from, + Criteria criteria ) { + this.from = from; + this.criteria = criteria; + } + + public String getFrom() { + return from; + } + + public Criteria getCriteria() { + return criteria; + } + } + + @Immutable + public static class Column { + private final String name; + private final String alias; + + public Column( String name, + String alias ) { + this.name = name; + this.alias = alias; + } + + public String getName() { + return name; + } + + public String getAlias() { + return alias; + } + } + + @Immutable + public static class Criteria { + private final String lhs; + private final String rhs; + + public Criteria( String lhs, + String rhs ) { + this.lhs = lhs; + this.rhs = rhs; + } + + public String getLhs() { + return lhs; + } + + public String getRhs() { + return rhs; + } + } + + @Immutable + public static class Query { + private final String from; + private final List<Column> columns; + + public Query( String from, + List<Column> columns ) { + this.from = from; + this.columns = columns; + } + + public String getFrom() { + return from; + } + + public List<Column> getColumns() { + return columns; + } + } + +} Property changes on: trunk/dna-common/src/test/java/org/jboss/dna/common/text/SampleSqlParser.java ___________________________________________________________________ Name: svn:keywords + Id Revision Name: svn:eol-style + LF Added: trunk/dna-common/src/test/java/org/jboss/dna/common/text/TokenStreamBasicTokenizerTest.java =================================================================== --- trunk/dna-common/src/test/java/org/jboss/dna/common/text/TokenStreamBasicTokenizerTest.java (rev 0) +++ trunk/dna-common/src/test/java/org/jboss/dna/common/text/TokenStreamBasicTokenizerTest.java 2009-09-02 18:05:46 UTC (rev 1183) @@ -0,0 +1,306 @@ +/* + * JBoss DNA (http://www.jboss.org/dna) + * See the COPYRIGHT.txt file distributed with this work for information + * regarding copyright ownership. Some portions may be licensed + * to Red Hat, Inc. under one or more contributor license agreements. + * See the AUTHORS.txt file in the distribution for a full listing of + * individual contributors. + * + * JBoss DNA is free software. Unless otherwise indicated, all code in JBoss DNA + * is licensed to you under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * JBoss DNA is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this software; if not, write to the Free + * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA, or see the FSF site: http://www.fsf.org. + */ +package org.jboss.dna.common.text; + +import static org.hamcrest.core.Is.is; +import static org.junit.Assert.assertThat; +import java.util.LinkedList; +import org.jboss.dna.common.text.TokenStream.BasicTokenizer; +import org.jboss.dna.common.text.TokenStream.CharacterArrayStream; +import org.jboss.dna.common.text.TokenStream.ParsingException; +import org.jboss.dna.common.text.TokenStream.Tokens; +import org.junit.Before; +import org.junit.Test; + +/** + * + */ +public class TokenStreamBasicTokenizerTest { + + private BasicTokenizer tokenizer; + private Tokens tokenFactory; + private LinkedList<int[]> tokenValues; + + @Before + public void beforeEach() { + tokenizer = TokenStream.basicTokenizer(true); + final LinkedList<int[]> tokenValues = new LinkedList<int[]>(); + tokenFactory = new Tokens() { + @Override + public void addToken( int index ) { + int[] token = new int[] {index, index + 1, 0}; + tokenValues.add(token); + } + + @Override + public void addToken( int startIndex, + int endIndex ) { + int[] token = new int[] {startIndex, endIndex, 0}; + tokenValues.add(token); + } + + @Override + public void addToken( int startIndex, + int endIndex, + int type ) { + int[] token = new int[] {startIndex, endIndex, type}; + tokenValues.add(token); + } + }; + this.tokenValues = tokenValues; + } + + protected void tokenize( String input ) { + tokenizer.tokenize(new CharacterArrayStream(input.toCharArray()), tokenFactory); + } + + protected void assertNextTokenIs( int startIndex, + int endIndex, + int type ) { + int[] token = tokenValues.removeFirst(); + assertThat(token[0], is(startIndex)); + assertThat(token[1], is(endIndex)); + assertThat(token[2], is(type)); + } + + protected void assertNoMoreTokens() { + assertThat(tokenValues.isEmpty(), is(true)); + } + + @Test + public void shouldCreateNoTokensForEmptyContent() { + tokenize(""); + assertNoMoreTokens(); + } + + @Test + public void shouldCreateNoTokensForContentWithOnlyWhitespace() { + tokenize(" \t \n \r\n \r "); + assertNoMoreTokens(); + } + + @Test + public void shouldCreateTokenForEachSymbolCharacter() { + String content = "-(){}*,;+%?$[]!<>|=:"; + int numSymbols = content.length(); + tokenize(content); + for (int i = 0; i != numSymbols; ++i) { + assertNextTokenIs(i, i + 1, BasicTokenizer.SYMBOL); + } + assertNoMoreTokens(); + } + + @Test + public void shouldCreateTokenForEachDecimalCharacter() { + tokenize("."); + assertNextTokenIs(0, 1, BasicTokenizer.DECIMAL); + assertNoMoreTokens(); + } + + @Test + public void shouldCreateTokenForEndOfLineComment() { + String content = "--//this is a comment\n"; + tokenize(content); + assertNextTokenIs(0, 1, BasicTokenizer.SYMBOL); + assertNextTokenIs(1, 2, BasicTokenizer.SYMBOL); + assertNextTokenIs(2, content.length() - 1, BasicTokenizer.COMMENT); // -1 because '\n' is not included + assertNoMoreTokens(); + } + + @Test + public void shouldCreateTokenForEndOfLineCommentThatEndsWithEndOfString() { + String content = "--//this is a comment"; + tokenize(content); + assertNextTokenIs(0, 1, BasicTokenizer.SYMBOL); + assertNextTokenIs(1, 2, BasicTokenizer.SYMBOL); + assertNextTokenIs(2, content.length(), BasicTokenizer.COMMENT); + assertNoMoreTokens(); + } + + @Test + public void shouldCreateTokenForMultiLineComment() { + String content = "--/*this is a comment*/-"; + tokenize(content); + assertNextTokenIs(0, 1, BasicTokenizer.SYMBOL); + assertNextTokenIs(1, 2, BasicTokenizer.SYMBOL); + assertNextTokenIs(2, content.length() - 1, BasicTokenizer.COMMENT); + assertNextTokenIs(content.length() - 1, content.length(), BasicTokenizer.SYMBOL); + assertNoMoreTokens(); + } + + @Test + public void shouldCreateTokenForMultiLineCommentAtEndOfContent() { + String content = "--/*this is a comment*/"; + tokenize(content); + assertNextTokenIs(0, 1, BasicTokenizer.SYMBOL); + assertNextTokenIs(1, 2, BasicTokenizer.SYMBOL); + assertNextTokenIs(2, content.length(), BasicTokenizer.COMMENT); + assertNoMoreTokens(); + } + + @Test + public void shouldCreateTokenForMultiLineCommentWithoutTerminatingCharacters() { + String content = "--/*this is a comment"; + tokenize(content); + assertNextTokenIs(0, 1, BasicTokenizer.SYMBOL); + assertNextTokenIs(1, 2, BasicTokenizer.SYMBOL); + assertNextTokenIs(2, content.length(), BasicTokenizer.COMMENT); + assertNoMoreTokens(); + } + + @Test + public void shouldCreateTokenForMultiLineCommentWithoutAllTerminatingCharacters() { + String content = "--/*this is a comment*"; + tokenize(content); + assertNextTokenIs(0, 1, BasicTokenizer.SYMBOL); + assertNextTokenIs(1, 2, BasicTokenizer.SYMBOL); + assertNextTokenIs(2, content.length(), BasicTokenizer.COMMENT); + assertNoMoreTokens(); + } + + @Test + public void shouldCreateTokenForSingleQuotedString() { + String content = "--'this is a single-quoted \n string'-"; + assertThat(content.charAt(2), is('\'')); + assertThat(content.charAt(35), is('\'')); + tokenize(content); + assertNextTokenIs(0, 1, BasicTokenizer.SYMBOL); + assertNextTokenIs(1, 2, BasicTokenizer.SYMBOL); + assertNextTokenIs(2, 36, BasicTokenizer.SINGLE_QUOTED_STRING); + assertNextTokenIs(36, 37, BasicTokenizer.SYMBOL); + assertNoMoreTokens(); + } + + @Test + public void shouldCreateTokenForSingleQuotedStringWithEscapedSingleQuoteCharacters() { + String content = "--'this \"is\" a \\'single-quoted\\' \n string'-"; + assertThat(content.charAt(2), is('\'')); + assertThat(content.charAt(41), is('\'')); + tokenize(content); + assertNextTokenIs(0, 1, BasicTokenizer.SYMBOL); + assertNextTokenIs(1, 2, BasicTokenizer.SYMBOL); + assertNextTokenIs(2, 42, BasicTokenizer.SINGLE_QUOTED_STRING); + assertNextTokenIs(42, 43, BasicTokenizer.SYMBOL); + assertNoMoreTokens(); + } + + @Test + public void shouldCreateTokenForSingleQuotedStringAtEndOfContent() { + String content = "--'this is a single-quoted \n string'"; + assertThat(content.charAt(2), is('\'')); + assertThat(content.charAt(35), is('\'')); + tokenize(content); + assertNextTokenIs(0, 1, BasicTokenizer.SYMBOL); + assertNextTokenIs(1, 2, BasicTokenizer.SYMBOL); + assertNextTokenIs(2, 36, BasicTokenizer.SINGLE_QUOTED_STRING); + assertNoMoreTokens(); + } + + @Test( expected = ParsingException.class ) + public void shouldCreateTokenForSingleQuotedStringWithoutClosingQuote() { + String content = "--'this is a single-quoted \n string"; + tokenize(content); + } + + @Test + public void shouldCreateTokenForDoubleQuotedString() { + String content = "--\"this is a double-quoted \n string\"-"; + assertThat(content.charAt(2), is('"')); + assertThat(content.charAt(35), is('"')); + tokenize(content); + assertNextTokenIs(0, 1, BasicTokenizer.SYMBOL); + assertNextTokenIs(1, 2, BasicTokenizer.SYMBOL); + assertNextTokenIs(2, 36, BasicTokenizer.DOUBLE_QUOTED_STRING); + assertNextTokenIs(36, 37, BasicTokenizer.SYMBOL); + assertNoMoreTokens(); + } + + @Test + public void shouldCreateTokenForDoubleQuotedStringWithEscapedDoubleQuoteCharacters() { + String content = "--\"this 'is' a \\\"double-quoted\\\" \n string\"-"; + assertThat(content.charAt(2), is('"')); + assertThat(content.charAt(41), is('"')); + tokenize(content); + assertNextTokenIs(0, 1, BasicTokenizer.SYMBOL); + assertNextTokenIs(1, 2, BasicTokenizer.SYMBOL); + assertNextTokenIs(2, 42, BasicTokenizer.DOUBLE_QUOTED_STRING); + assertNextTokenIs(42, 43, BasicTokenizer.SYMBOL); + assertNoMoreTokens(); + } + + @Test + public void shouldCreateTokenForDoubleQuotedStringAtEndOfContent() { + String content = "--\"this is a double-quoted \n string\""; + assertThat(content.charAt(2), is('"')); + assertThat(content.charAt(35), is('"')); + tokenize(content); + assertNextTokenIs(0, 1, BasicTokenizer.SYMBOL); + assertNextTokenIs(1, 2, BasicTokenizer.SYMBOL); + assertNextTokenIs(2, 36, BasicTokenizer.DOUBLE_QUOTED_STRING); + assertNoMoreTokens(); + } + + @Test( expected = ParsingException.class ) + public void shouldCreateTokenForDoubleQuotedStringWithoutClosingQuote() { + String content = "--\"this is a double-quoted \n string"; + tokenize(content); + } + + @Test + public void shouldCreateTokensForWordsWithAlphabeticCharacters() { + String content = "This is a series of words."; + tokenize(content); + assertNextTokenIs(0, 4, BasicTokenizer.WORD); + assertNextTokenIs(5, 7, BasicTokenizer.WORD); + assertNextTokenIs(8, 9, BasicTokenizer.WORD); + assertNextTokenIs(10, 16, BasicTokenizer.WORD); + assertNextTokenIs(17, 19, BasicTokenizer.WORD); + assertNextTokenIs(20, 25, BasicTokenizer.WORD); + assertNextTokenIs(25, 26, BasicTokenizer.DECIMAL); + assertNoMoreTokens(); + } + + @Test + public void shouldCreateTokensForWordsWithNumericCharacters() { + String content = "1234 4 5353.324"; + tokenize(content); + assertNextTokenIs(0, 4, BasicTokenizer.WORD); + assertNextTokenIs(5, 6, BasicTokenizer.WORD); + assertNextTokenIs(7, 11, BasicTokenizer.WORD); + assertNextTokenIs(11, 12, BasicTokenizer.DECIMAL); + assertNextTokenIs(12, 15, BasicTokenizer.WORD); + assertNoMoreTokens(); + } + + @Test + public void shouldCreateTokensForWordsWithAlphaNumericCharacters() { + String content = "123a 5353.324e100"; + tokenize(content); + assertNextTokenIs(0, 4, BasicTokenizer.WORD); + assertNextTokenIs(5, 9, BasicTokenizer.WORD); + assertNextTokenIs(9, 10, BasicTokenizer.DECIMAL); + assertNextTokenIs(10, 17, BasicTokenizer.WORD); + assertNoMoreTokens(); + } +} Property changes on: trunk/dna-common/src/test/java/org/jboss/dna/common/text/TokenStreamBasicTokenizerTest.java ___________________________________________________________________ Name: svn:keywords + Id Revision Name: svn:eol-style + LF Added: trunk/dna-common/src/test/java/org/jboss/dna/common/text/TokenStreamTest.java =================================================================== --- trunk/dna-common/src/test/java/org/jboss/dna/common/text/TokenStreamTest.java (rev 0) +++ trunk/dna-common/src/test/java/org/jboss/dna/common/text/TokenStreamTest.java 2009-09-02 18:05:46 UTC (rev 1183) @@ -0,0 +1,322 @@ +/* + * JBoss DNA (http://www.jboss.org/dna) + * See the COPYRIGHT.txt file distributed with this work for information + * regarding copyright ownership. Some portions may be licensed + * to Red Hat, Inc. under one or more contributor license agreements. + * See the AUTHORS.txt file in the distribution for a full listing of + * individual contributors. + * + * JBoss DNA is free software. Unless otherwise indicated, all code in JBoss DNA + * is licensed to you under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * JBoss DNA is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this software; if not, write to the Free + * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA, or see the FSF site: http://www.fsf.org. + */ +package org.jboss.dna.common.text; + +import static org.hamcrest.core.Is.is; +import static org.junit.Assert.assertThat; +import org.jboss.dna.common.text.TokenStream.ParsingException; +import org.jboss.dna.common.text.TokenStream.Tokenizer; +import org.junit.Before; +import org.junit.Test; + +/** + * + */ +public class TokenStreamTest { + public static final int WORD = TokenStream.BasicTokenizer.WORD; + public static final int SYMBOL = TokenStream.BasicTokenizer.SYMBOL; + public static final int DECIMAL = TokenStream.BasicTokenizer.DECIMAL; + public static final int SINGLE_QUOTED_STRING = TokenStream.BasicTokenizer.SINGLE_QUOTED_STRING; + public static final int DOUBLE_QUOTED_STRING = TokenStream.BasicTokenizer.DOUBLE_QUOTED_STRING; + public static final int COMMENT = TokenStream.BasicTokenizer.COMMENT; + + private Tokenizer tokenizer; + private String content; + private TokenStream tokens; + + @Before + public void beforeEach() { + tokenizer = TokenStream.basicTokenizer(false); + content = "Select all columns from this table"; + makeCaseInsensitive(); + } + + public void makeCaseSensitive() { + tokens = new TokenStream(content, tokenizer, true); + tokens.start(); + } + + public void makeCaseInsensitive() { + tokens = new TokenStream(content, tokenizer, false); + tokens.start(); + } + + @Test( expected = IllegalStateException.class ) + public void shouldNotAllowConsumeBeforeStartIsCalled() { + tokens = new TokenStream(content, TokenStream.basicTokenizer(false), false); + tokens.consume("Select"); + } + + @Test( expected = IllegalStateException.class ) + public void shouldNotAllowHasNextBeforeStartIsCalled() { + tokens = new TokenStream(content, TokenStream.basicTokenizer(false), false); + tokens.hasNext(); + } + + @Test( expected = IllegalStateException.class ) + public void shouldNotAllowMatchesBeforeStartIsCalled() { + tokens = new TokenStream(content, TokenStream.basicTokenizer(false), false); + tokens.matches("Select"); + } + + @Test( expected = IllegalStateException.class ) + public void shouldNotAllowCanConsumeBeforeStartIsCalled() { + tokens = new TokenStream(content, TokenStream.basicTokenizer(false), false); + tokens.canConsume("Select"); + } + + @Test + public void shouldConsumeInCaseSensitiveMannerWithExpectedValuesWhenMatchingExactCase() { + makeCaseSensitive(); + tokens.consume("Select"); + tokens.consume("all"); + tokens.consume("columns"); + tokens.consume("from"); + tokens.consume("this"); + tokens.consume("table"); + assertThat(tokens.hasNext(), is(false)); + } + + @Test( expected = ParsingException.class ) + public void shouldFailToConsumeInCaseSensitiveMannerWithExpectedValuesWhenMatchingIncorrectCase() { + makeCaseSensitive(); + tokens.consume("Select"); + tokens.consume("all"); + tokens.consume("Columns"); + } + + @Test + public void shouldConsumeInCaseInsensitiveMannerWithExpectedValuesWhenMatchingNonExactCase() { + makeCaseInsensitive(); + tokens.consume("SELECT"); + tokens.consume("ALL"); + tokens.consume("COLUMNS"); + tokens.consume("FROM"); + tokens.consume("THIS"); + tokens.consume("TABLE"); + assertThat(tokens.hasNext(), is(false)); + } + + @Test( expected = ParsingException.class ) + public void shouldFailToConsumeInCaseInsensitiveMannerWithExpectedValuesWhenMatchingStringIsInLowerCase() { + makeCaseInsensitive(); + tokens.consume("SELECT"); + tokens.consume("ALL"); + tokens.consume("columns"); + } + + @Test + public void shouldReturnTrueFromCanConsumeWithCaseSensitiveTokenStreamIfMatchStringDoesMatchCaseExactly() { + makeCaseSensitive(); + assertThat(tokens.canConsume("Select"), is(true)); + assertThat(tokens.canConsume("all"), is(true)); + assertThat(tokens.canConsume("columns"), is(true)); + assertThat(tokens.canConsume("from"), is(true)); + assertThat(tokens.canConsume("this"), is(true)); + assertThat(tokens.canConsume("table"), is(true)); + assertThat(tokens.hasNext(), is(false)); + } + + @Test + public void shouldReturnFalseFromCanConsumeWithCaseSensitiveTokenStreamIfMatchStringDoesNotMatchCaseExactly() { + makeCaseSensitive(); + assertThat(tokens.canConsume("Select"), is(true)); + assertThat(tokens.canConsume("all"), is(true)); + assertThat(tokens.canConsume("Columns"), is(false)); + assertThat(tokens.canConsume("COLUMNS"), is(false)); + assertThat(tokens.canConsume("columns"), is(true)); + assertThat(tokens.canConsume("from"), is(true)); + assertThat(tokens.canConsume("THIS"), is(false)); + assertThat(tokens.canConsume("table"), is(false)); + assertThat(tokens.canConsume("this"), is(true)); + assertThat(tokens.canConsume("table"), is(true)); + assertThat(tokens.hasNext(), is(false)); + } + + @Test + public void shouldReturnTrueFromCanConsumeWithCaseSensitiveTokenStreamIfSuppliedTypeDoesMatch() { + makeCaseSensitive(); + assertThat(tokens.canConsume(WORD), is(true)); + assertThat(tokens.canConsume(WORD), is(true)); + assertThat(tokens.canConsume(WORD), is(true)); + assertThat(tokens.canConsume(WORD), is(true)); + assertThat(tokens.canConsume(WORD), is(true)); + assertThat(tokens.canConsume(WORD), is(true)); + assertThat(tokens.hasNext(), is(false)); + } + + @Test + public void shouldReturnFalseFromCanConsumeWithCaseSensitiveTokenStreamIfSuppliedTypeDoesMatch() { + makeCaseSensitive(); + assertThat(tokens.canConsume(WORD), is(true)); + assertThat(tokens.canConsume(WORD), is(true)); + assertThat(tokens.canConsume(COMMENT), is(false)); + assertThat(tokens.canConsume(SINGLE_QUOTED_STRING), is(false)); + assertThat(tokens.canConsume(DOUBLE_QUOTED_STRING), is(false)); + assertThat(tokens.canConsume(DECIMAL), is(false)); + assertThat(tokens.canConsume(SYMBOL), is(false)); + + assertThat(tokens.canConsume(WORD), is(true)); + assertThat(tokens.canConsume(WORD), is(true)); + assertThat(tokens.canConsume(WORD), is(true)); + assertThat(tokens.canConsume(WORD), is(true)); + assertThat(tokens.hasNext(), is(false)); + } + + @Test + public void shouldReturnTrueFromMatchesWithCaseSensitiveTokenStreamIfMatchStringDoesMatchCaseExactly() { + makeCaseSensitive(); + assertThat(tokens.matches("Select"), is(true)); + assertThat(tokens.matches("select"), is(false)); + assertThat(tokens.canConsume("Select"), is(true)); + assertThat(tokens.matches("all"), is(true)); + assertThat(tokens.canConsume("all"), is(true)); + } + + @Test + public void shouldReturnFalseFromMatchesWithCaseSensitiveTokenStreamIfMatchStringDoesMatchCaseExactly() { + makeCaseSensitive(); + assertThat(tokens.matches("select"), is(false)); + assertThat(tokens.matches("SElect"), is(false)); + assertThat(tokens.matches("Select"), is(true)); + } + + @Test + public void shouldReturnFalseFromCanConsumeWithCaseInsensitiveTokenStreamIfMatchStringIsNotUppercase() { + makeCaseInsensitive(); + assertThat(tokens.canConsume("Select"), is(false)); + assertThat(tokens.canConsume("SELECT"), is(true)); + assertThat(tokens.canConsume("aLL"), is(false)); + assertThat(tokens.canConsume("all"), is(false)); + assertThat(tokens.canConsume("ALL"), is(true)); + } + + @Test + public void shouldReturnTrueFromCanConsumeWithCaseInsensitiveTokenStreamIfMatchStringDoesNotMatchCaseExactly() { + makeCaseInsensitive(); + assertThat(tokens.canConsume("SELECT"), is(true)); + assertThat(tokens.canConsume("ALL"), is(true)); + assertThat(tokens.canConsume("COLUMNS"), is(true)); + assertThat(tokens.canConsume("FROM"), is(true)); + assertThat(tokens.canConsume("THIS"), is(true)); + assertThat(tokens.canConsume("TABLE"), is(true)); + assertThat(tokens.hasNext(), is(false)); + } + + @Test + public void shouldReturnTrueFromCanConsumeWithCaseInsensitiveTokenStreamIfSuppliedTypeDoesMatch() { + makeCaseInsensitive(); + assertThat(tokens.canConsume(WORD), is(true)); + assertThat(tokens.canConsume(WORD), is(true)); + assertThat(tokens.canConsume(WORD), is(true)); + assertThat(tokens.canConsume(WORD), is(true)); + assertThat(tokens.canConsume(WORD), is(true)); + assertThat(tokens.canConsume(WORD), is(true)); + assertThat(tokens.hasNext(), is(false)); + } + + @Test + public void shouldReturnFalseFromCanConsumeWithCaseInsensitiveTokenStreamIfSuppliedTypeDoesMatch() { + makeCaseInsensitive(); + assertThat(tokens.canConsume(WORD), is(true)); + assertThat(tokens.canConsume(WORD), is(true)); + assertThat(tokens.canConsume(COMMENT), is(false)); + assertThat(tokens.canConsume(SINGLE_QUOTED_STRING), is(false)); + assertThat(tokens.canConsume(DOUBLE_QUOTED_STRING), is(false)); + assertThat(tokens.canConsume(DECIMAL), is(false)); + assertThat(tokens.canConsume(SYMBOL), is(false)); + + assertThat(tokens.canConsume(WORD), is(true)); + assertThat(tokens.canConsume(WORD), is(true)); + assertThat(tokens.canConsume(WORD), is(true)); + assertThat(tokens.canConsume(WORD), is(true)); + assertThat(tokens.hasNext(), is(false)); + } + + @Test + public void shouldReturnTrueFromMatchesWithCaseInsensitiveTokenStreamIfMatchStringIsUppercaseAndMatches() { + makeCaseInsensitive(); + assertThat(tokens.matches("SELECT"), is(true)); + assertThat(tokens.canConsume("SELECT"), is(true)); + assertThat(tokens.matches("ALL"), is(true)); + assertThat(tokens.canConsume("ALL"), is(true)); + } + + @Test + public void shouldReturnFalseFromMatchesWithCaseInsensitiveTokenStreamIfMatchStringIsUppercaseAndDoesNotMatch() { + makeCaseInsensitive(); + assertThat(tokens.matches("ALL"), is(false)); + assertThat(tokens.matches("SElect"), is(false)); + assertThat(tokens.matches("SELECT"), is(true)); + } + + @Test + public void shouldConsumeMultipleTokensIfTheyMatch() { + makeCaseInsensitive(); + tokens.consume("SELECT", "ALL", "COLUMNS", "FROM", "THIS", "TABLE"); + assertThat(tokens.hasNext(), is(false)); + } + + @Test( expected = ParsingException.class ) + public void shouldFailToConsumeMultipleTokensIfTheyDoNotMatch() { + makeCaseInsensitive(); + tokens.consume("SELECT", "ALL", "COLUMNS", "FROM", "TABLE"); + } + + @Test + public void shouldReturnTrueFromCanConsumeMultipleTokensIfTheyAllMatch() { + makeCaseInsensitive(); + assertThat(tokens.canConsume("SELECT", "ALL", "COLUMNS", "FROM", "THIS", "TABLE"), is(true)); + assertThat(tokens.hasNext(), is(false)); + } + + @Test + public void shouldReturnTrueFromCanConsumeMultipleTokensIfTheyDoNotAllMatch() { + makeCaseInsensitive(); + // Unable to consume unless they all match ... + assertThat(tokens.canConsume("SELECT", "ALL", "COLUMNS", "FRM", "THIS", "TABLE"), is(false)); + assertThat(tokens.canConsume("SELECT", "ALL", "COLUMNS", "FROM", "THIS", "TABLE", "EXTRA"), is(false)); + assertThat(tokens.canConsume("SELECT", "ALL", "COLUMNS", "FROM", "EXTRA", "THIS", "TABLE"), is(false)); + assertThat(tokens.hasNext(), is(true)); + // Should have consumed nothing so far ... + assertThat(tokens.canConsume("SELECT", "ALL", "COLUMNS"), is(true)); + assertThat(tokens.canConsume("FROM", "THIS", "TABLE"), is(true)); + assertThat(tokens.hasNext(), is(false)); + } + + @Test + public void shouldReturnTrueFromMatchAnyOfIfAnyOfTheTokenValuesMatch() { + makeCaseInsensitive(); + // Unable to consume unless they all match ... + assertThat(tokens.matchesAnyOf("ALL", "COLUMNS"), is(false)); + assertThat(tokens.matchesAnyOf("ALL", "COLUMNS", "SELECT"), is(true)); + tokens.consume("SELECT"); + assertThat(tokens.matchesAnyOf("ALL", "COLUMNS", "SELECT"), is(true)); + tokens.consume("ALL"); + assertThat(tokens.matchesAnyOf("ALL", "COLUMNS", "SELECT"), is(true)); + tokens.consume("COLUMNS"); + assertThat(tokens.canConsume("FROM", "THIS", "TABLE"), is(true)); + assertThat(tokens.hasNext(), is(false)); + } +} Property changes on: trunk/dna-common/src/test/java/org/jboss/dna/common/text/TokenStreamTest.java ___________________________________________________________________ Name: svn:keywords + Id Revision Name: svn:eol-style + LF

14 years, 8 months

1
0
0 / 0

← Newer
1
2
3
4
5
6
7
8
9
10
Older →

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

2008

dna-commits September 2009