Author: epbernard
Date: 2010-05-20 13:57:24 -0400 (Thu, 20 May 2010)
New Revision: 19569
Added:
search/trunk/hibernate-search/src/main/java/org/hibernate/search/util/PassThroughAnalyzer.java
Modified:
search/trunk/hibernate-search/src/main/java/org/hibernate/search/engine/DocumentBuilderContainedEntity.java
search/trunk/hibernate-search/src/main/java/org/hibernate/search/engine/DocumentBuilderIndexedEntity.java
search/trunk/hibernate-search/src/test/java/org/hibernate/search/test/analyzer/AnalyzerTest.java
search/trunk/hibernate-search/src/test/java/org/hibernate/search/test/analyzer/MyEntity.java
Log:
HSEARCH-530 Return a pass through analyzer on fields not TOKENIZED
For fields marked as UN_TOKENIZED, NO and NO_NORMS, scoped analyzers (returned by
searchFactory.getAnalyzer(...); now return an analyzer that pass through the data
unmodified. There is room for improvement on the PassThroughAnalyzer but that's a
start.
Modified:
search/trunk/hibernate-search/src/main/java/org/hibernate/search/engine/DocumentBuilderContainedEntity.java
===================================================================
---
search/trunk/hibernate-search/src/main/java/org/hibernate/search/engine/DocumentBuilderContainedEntity.java 2010-05-20
17:56:27 UTC (rev 19568)
+++
search/trunk/hibernate-search/src/main/java/org/hibernate/search/engine/DocumentBuilderContainedEntity.java 2010-05-20
17:57:24 UTC (rev 19569)
@@ -69,6 +69,7 @@
import org.hibernate.search.bridge.LuceneOptions;
import org.hibernate.search.impl.InitContext;
import org.hibernate.search.util.LoggerFactory;
+import org.hibernate.search.util.PassThroughAnalyzer;
import org.hibernate.search.util.ReflectionHelper;
import org.hibernate.search.util.ScopedAnalyzer;
import org.hibernate.util.StringHelper;
@@ -96,6 +97,7 @@
protected Similarity similarity; //there is only 1 similarity per class hierarchy, and
only 1 per index
protected boolean isRoot;
protected EntityState entityState;
+ private Analyzer passThroughAnalyzer = new PassThroughAnalyzer();
/**
* Constructor used on contained entities not annotated with
<code>@Indexed</code> themselves.
@@ -468,7 +470,7 @@
if ( analyzer == null ) {
throw new AssertionFailure( "Analyzer should not be undefined" );
}
- this.analyzer.addScopedAnalyzer( fieldName, analyzer );
+ addToScopedAnalyzer( fieldName, analyzer, ann.index() );
}
private void bindFieldAnnotation(XProperty member, PropertiesMetadata
propertiesMetadata, String prefix, org.hibernate.search.annotations.Field fieldAnn,
InitContext context) {
@@ -488,9 +490,19 @@
if ( analyzer == null ) {
analyzer = getAnalyzer( member, context );
}
- if ( analyzer != null ) {
- this.analyzer.addScopedAnalyzer( fieldName, analyzer );
+ addToScopedAnalyzer( fieldName, analyzer, fieldAnn.index() );
+ }
+
+ protected void addToScopedAnalyzer(String fieldName, Analyzer analyzer, Index index) {
+ if ( index == Index.TOKENIZED) {
+ if ( analyzer != null ) {
+ this.analyzer.addScopedAnalyzer( fieldName, analyzer );
+ }
}
+ else {
+ //no analyzer is used, add a fake one for queries
+ this.analyzer.addScopedAnalyzer( fieldName, passThroughAnalyzer );
+ }
}
protected Float getBoost(XProperty member, org.hibernate.search.annotations.Field
fieldAnn) {
Modified:
search/trunk/hibernate-search/src/main/java/org/hibernate/search/engine/DocumentBuilderIndexedEntity.java
===================================================================
---
search/trunk/hibernate-search/src/main/java/org/hibernate/search/engine/DocumentBuilderIndexedEntity.java 2010-05-20
17:56:27 UTC (rev 19568)
+++
search/trunk/hibernate-search/src/main/java/org/hibernate/search/engine/DocumentBuilderIndexedEntity.java 2010-05-20
17:57:24 UTC (rev 19569)
@@ -221,7 +221,7 @@
if ( analyzer == null ) {
throw new AssertionFailure( "Analyzer should not be undefined" );
}
- this.analyzer.addScopedAnalyzer( fieldName, analyzer );
+ addToScopedAnalyzer( fieldName, analyzer, Index.UN_TOKENIZED );
}
}
}
Added:
search/trunk/hibernate-search/src/main/java/org/hibernate/search/util/PassThroughAnalyzer.java
===================================================================
---
search/trunk/hibernate-search/src/main/java/org/hibernate/search/util/PassThroughAnalyzer.java
(rev 0)
+++
search/trunk/hibernate-search/src/main/java/org/hibernate/search/util/PassThroughAnalyzer.java 2010-05-20
17:57:24 UTC (rev 19569)
@@ -0,0 +1,39 @@
+package org.hibernate.search.util;
+
+import java.io.Reader;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.util.AttributeSource;
+
+/**
+ * Analyzer that applies no operation whatsoever to the flux
+ * This is useful for queries operating on non tokenized fields.
+ *
+ * TODO there is probably a way to make that much more efficient by
+ * reimplementing TokenStream to take the Reader and pass through the flux as a single
token
+ *
+ * @author Emmanuel Bernard
+ */
+public class PassThroughAnalyzer extends Analyzer {
+
+
+
+ @Override
+ public TokenStream tokenStream(String fieldName, Reader reader) {
+ return new PassThroughTokenizer(reader);
+ }
+
+ private static class PassThroughTokenizer extends CharTokenizer {
+ public PassThroughTokenizer(Reader input) {
+ super( input );
+ }
+
+ @Override
+ protected boolean isTokenChar(char c) {
+ return true;
+ }
+ }
+}
Modified:
search/trunk/hibernate-search/src/test/java/org/hibernate/search/test/analyzer/AnalyzerTest.java
===================================================================
---
search/trunk/hibernate-search/src/test/java/org/hibernate/search/test/analyzer/AnalyzerTest.java 2010-05-20
17:56:27 UTC (rev 19568)
+++
search/trunk/hibernate-search/src/test/java/org/hibernate/search/test/analyzer/AnalyzerTest.java 2010-05-20
17:57:24 UTC (rev 19569)
@@ -179,6 +179,20 @@
session.close();
}
+ public void testNotAnalyzedFieldAndScopedAnalyzer() throws Exception {
+ FullTextSession session = Search.getFullTextSession( openSession() );
+ SearchFactory searchFactory = session.getSearchFactory();
+ Analyzer analyzer = searchFactory.getAnalyzer( MyEntity.class );
+
+ // you can pass what so ever into the analysis since the used analyzers are
+ // returning the same tokens all the time. We just want to make sure that
+ // the right analyzers are used.
+ Token[] tokens = AnalyzerUtils.tokensFromAnalysis( analyzer, "notAnalyzed",
"pass through" );
+ AnalyzerUtils.assertTokensEqual( tokens, new String[] { "pass through" } );
+
+ session.close();
+ }
+
protected Class<?>[] getMappings() {
return new Class[] { MyEntity.class, Article.class };
}
Modified:
search/trunk/hibernate-search/src/test/java/org/hibernate/search/test/analyzer/MyEntity.java
===================================================================
---
search/trunk/hibernate-search/src/test/java/org/hibernate/search/test/analyzer/MyEntity.java 2010-05-20
17:56:27 UTC (rev 19568)
+++
search/trunk/hibernate-search/src/test/java/org/hibernate/search/test/analyzer/MyEntity.java 2010-05-20
17:57:24 UTC (rev 19569)
@@ -59,6 +59,17 @@
@Analyzer(impl = Test2Analyzer.class)
private String field;
+ @Field(index = Index.UN_TOKENIZED)
+ private String notAnalyzed;
+
+ public String getNotAnalyzed() {
+ return notAnalyzed;
+ }
+
+ public void setNotAnalyzed(String notAnalyzed) {
+ this.notAnalyzed = notAnalyzed;
+ }
+
@IndexedEmbedded
@Embedded
private MyComponent component;