Author: hardy.ferentschik
Date: 2008-12-02 09:28:28 -0500 (Tue, 02 Dec 2008)
New Revision: 15637
Added:
search/trunk/src/java/org/hibernate/search/analyzer/
search/trunk/src/java/org/hibernate/search/analyzer/Discriminator.java
search/trunk/src/java/org/hibernate/search/annotations/AnalyzerDiscriminator.java
search/trunk/src/test/org/hibernate/search/test/analyzer/Article.java
search/trunk/src/test/org/hibernate/search/test/analyzer/BlogEntry.java
search/trunk/src/test/org/hibernate/search/test/analyzer/LanguageDiscriminator.java
Modified:
search/trunk/src/java/org/hibernate/search/backend/AddLuceneWork.java
search/trunk/src/java/org/hibernate/search/backend/Workspace.java
search/trunk/src/java/org/hibernate/search/backend/impl/lucene/works/AddWorkDelegate.java
search/trunk/src/java/org/hibernate/search/engine/DocumentBuilderContainedEntity.java
search/trunk/src/java/org/hibernate/search/engine/DocumentBuilderIndexedEntity.java
search/trunk/src/java/org/hibernate/search/util/ScopedAnalyzer.java
search/trunk/src/test/org/hibernate/search/test/analyzer/AnalyzerTest.java
Log:
HSEARCH-221
Implementation of AnalyzerDiscriminator framework
Added: search/trunk/src/java/org/hibernate/search/analyzer/Discriminator.java
===================================================================
--- search/trunk/src/java/org/hibernate/search/analyzer/Discriminator.java
(rev 0)
+++ search/trunk/src/java/org/hibernate/search/analyzer/Discriminator.java 2008-12-02
14:28:28 UTC (rev 15637)
@@ -0,0 +1,23 @@
+// $Id:$
+package org.hibernate.search.analyzer;
+
+/**
+ * Allows to choose a by name defines analyzer at runtime.
+ *
+ * @author Hardy Ferentschik
+ */
+public interface Discriminator {
+
+ /**
+ * Allows to specify the analyzer to be used for the given field based on the specified
entity state.
+ *
+ * @param value The value of the field the
<code>@AnalyzerDiscriminator</code> annotation was placed on.
<code>null</code>
+ * if the annotation was placed on class level.
+ * @param entity The entity to be indexed.
+ * @param field The document field.
+ * @return The name of a defined analyzer to be used for the specified
<code>field</code> or <code>null</code> if the
+ * default analyzer for this field should be used.
+ * @see org.hibernate.search.annotations.AnalyzerDef
+ */
+ String getAnanyzerDefinitionName(Object value, Object entity, String field);
+}
Property changes on:
search/trunk/src/java/org/hibernate/search/analyzer/Discriminator.java
___________________________________________________________________
Name: svn:keywords
+ Id
Added: search/trunk/src/java/org/hibernate/search/annotations/AnalyzerDiscriminator.java
===================================================================
--- search/trunk/src/java/org/hibernate/search/annotations/AnalyzerDiscriminator.java
(rev 0)
+++
search/trunk/src/java/org/hibernate/search/annotations/AnalyzerDiscriminator.java 2008-12-02
14:28:28 UTC (rev 15637)
@@ -0,0 +1,22 @@
+// $Id:$
+package org.hibernate.search.annotations;
+
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Documented;
+
+import org.hibernate.search.analyzer.Discriminator;
+
+/**
+ * Allows to dynamically select a named analyzer through a
<code>Discriminator</code> implementation.
+ *
+ * @author Hardy Ferentschik
+ */
+(a)Retention(RetentionPolicy.RUNTIME)
+@Target({ ElementType.TYPE, ElementType.FIELD, ElementType.METHOD })
+@Documented
+public @interface AnalyzerDiscriminator {
+ public Class<? extends Discriminator> impl();
+}
Property changes on:
search/trunk/src/java/org/hibernate/search/annotations/AnalyzerDiscriminator.java
___________________________________________________________________
Name: svn:keywords
+ Id
Modified: search/trunk/src/java/org/hibernate/search/backend/AddLuceneWork.java
===================================================================
--- search/trunk/src/java/org/hibernate/search/backend/AddLuceneWork.java 2008-12-02
14:21:37 UTC (rev 15636)
+++ search/trunk/src/java/org/hibernate/search/backend/AddLuceneWork.java 2008-12-02
14:28:28 UTC (rev 15637)
@@ -2,6 +2,7 @@
package org.hibernate.search.backend;
import java.io.Serializable;
+import java.util.Map;
import org.apache.lucene.document.Document;
@@ -12,14 +13,29 @@
private static final long serialVersionUID = -2450349312813297371L;
+ private final Map<String, String> fieldToAnalyzerMap;
+
public AddLuceneWork(Serializable id, String idInString, Class entity, Document
document) {
- super( id, idInString, entity, document, false );
+ this( id, idInString, entity, document, false );
}
public AddLuceneWork(Serializable id, String idInString, Class entity, Document
document, boolean batch) {
+ this( id, idInString, entity, document, null, batch );
+ }
+
+ public AddLuceneWork(Serializable id, String idInString, Class entity, Document
document, Map<String, String> fieldToAnalyzerMap) {
+ this( id, idInString, entity, document, fieldToAnalyzerMap, false );
+ }
+
+ public AddLuceneWork(Serializable id, String idInString, Class entity, Document
document, Map<String, String> fieldToAnalyzerMap, boolean batch) {
super( id, idInString, entity, document, batch );
+ this.fieldToAnalyzerMap = fieldToAnalyzerMap;
}
+ public Map<String, String> getFieldToAnalyzerMap() {
+ return fieldToAnalyzerMap;
+ }
+
@Override
public <T> T getWorkDelegate(final WorkVisitor<T> visitor) {
return visitor.getDelegate( this );
Modified: search/trunk/src/java/org/hibernate/search/backend/Workspace.java
===================================================================
--- search/trunk/src/java/org/hibernate/search/backend/Workspace.java 2008-12-02 14:21:37
UTC (rev 15636)
+++ search/trunk/src/java/org/hibernate/search/backend/Workspace.java 2008-12-02 14:28:28
UTC (rev 15637)
@@ -81,6 +81,10 @@
return searchFactoryImplementor.getDocumentBuilderIndexedEntity( entity );
}
+ public Analyzer getAnalyzer(String name) {
+ return searchFactoryImplementor.getAnalyzer( name );
+ }
+
/**
* If optimization has not been forced give a change to configured OptimizerStrategy
* to optimize the index.
Modified:
search/trunk/src/java/org/hibernate/search/backend/impl/lucene/works/AddWorkDelegate.java
===================================================================
---
search/trunk/src/java/org/hibernate/search/backend/impl/lucene/works/AddWorkDelegate.java 2008-12-02
14:21:37 UTC (rev 15636)
+++
search/trunk/src/java/org/hibernate/search/backend/impl/lucene/works/AddWorkDelegate.java 2008-12-02
14:28:28 UTC (rev 15637)
@@ -1,6 +1,7 @@
package org.hibernate.search.backend.impl.lucene.works;
import java.io.IOException;
+import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.IndexReader;
@@ -9,14 +10,16 @@
import org.slf4j.Logger;
import org.hibernate.search.SearchException;
+import org.hibernate.search.backend.AddLuceneWork;
import org.hibernate.search.backend.LuceneWork;
import org.hibernate.search.backend.Workspace;
import org.hibernate.search.backend.impl.lucene.IndexInteractionType;
import org.hibernate.search.engine.DocumentBuilderIndexedEntity;
import org.hibernate.search.util.LoggerFactory;
+import org.hibernate.search.util.ScopedAnalyzer;
/**
- * Stateless implementation that performs a AddLuceneWork.
+ * Stateless implementation that performs a <code>AddLuceneWork</code>.
*
* @author Emmanuel Bernard
* @author Hardy Ferentschik
@@ -40,8 +43,11 @@
}
public void performWork(LuceneWork work, IndexWriter writer) {
+ @SuppressWarnings("unchecked")
DocumentBuilderIndexedEntity documentBuilder = workspace.getDocumentBuilder(
work.getEntityClass() );
- Analyzer analyzer = documentBuilder.getAnalyzer();
+ Map<String, String> fieldToAnalyzerMap = ( ( AddLuceneWork ) work
).getFieldToAnalyzerMap();
+ ScopedAnalyzer analyzer = ( ScopedAnalyzer ) documentBuilder.getAnalyzer();
+ analyzer = updateAnalyzerMappings( analyzer, fieldToAnalyzerMap, workspace );
Similarity similarity = documentBuilder.getSimilarity();
if ( log.isTraceEnabled() ) {
log.trace(
@@ -64,8 +70,37 @@
}
}
+ /**
+ * Allows to override the otherwise static field to analyzer mapping in
<code>scopedAnalyzer</code>.
+ *
+ * @param scopedAnalyzer The scoped analyzer created at startup time.
+ * @param fieldToAnalyzerMap A map of <code>Document</code> field names for
analyzer names. This map gets creates
+ * when the Lucene <code>Document</code> gets created and uses the state of
the entiy to index to determine analyzers
+ * dynamically at index time.
+ * @param workspace The current workspace.
+ * @return <code>scopedAnalyzer</code> in case
<code>fieldToAnalyzerMap</code> is <code>null</code> or empty.
Otherwise
+ * a clone of <code>scopedAnalyzer</code> is created where the analyzers get
overriden according to <code>fieldToAnalyzerMap</code>.
+ */
+ private ScopedAnalyzer updateAnalyzerMappings(ScopedAnalyzer scopedAnalyzer,
Map<String, String> fieldToAnalyzerMap, Workspace workspace) {
+ // for backwards compatability
+ if ( fieldToAnalyzerMap == null || fieldToAnalyzerMap.isEmpty() ) {
+ return scopedAnalyzer;
+ }
+
+ ScopedAnalyzer analyzerClone = scopedAnalyzer.clone();
+ for ( Map.Entry<String, String> entry : fieldToAnalyzerMap.entrySet() ) {
+ Analyzer analyzer = workspace.getAnalyzer( entry.getValue() );
+ if ( analyzer == null ) {
+ log.warn( "Unable to retrieve named analyzer: " + entry.getValue() );
+ }
+ else {
+ analyzerClone.addScopedAnalyzer( entry.getKey(), analyzer );
+ }
+ }
+ return analyzerClone;
+ }
+
public void performWork(LuceneWork work, IndexReader reader) {
throw new UnsupportedOperationException();
}
-
}
Modified:
search/trunk/src/java/org/hibernate/search/engine/DocumentBuilderContainedEntity.java
===================================================================
---
search/trunk/src/java/org/hibernate/search/engine/DocumentBuilderContainedEntity.java 2008-12-02
14:21:37 UTC (rev 15636)
+++
search/trunk/src/java/org/hibernate/search/engine/DocumentBuilderContainedEntity.java 2008-12-02
14:28:28 UTC (rev 15637)
@@ -26,8 +26,10 @@
import org.hibernate.annotations.common.reflection.XProperty;
import org.hibernate.annotations.common.util.StringHelper;
import org.hibernate.search.SearchException;
+import org.hibernate.search.analyzer.Discriminator;
import org.hibernate.search.annotations.AnalyzerDef;
import org.hibernate.search.annotations.AnalyzerDefs;
+import org.hibernate.search.annotations.AnalyzerDiscriminator;
import org.hibernate.search.annotations.Boost;
import org.hibernate.search.annotations.ClassBridge;
import org.hibernate.search.annotations.ClassBridges;
@@ -101,7 +103,7 @@
Set<XClass> processedClasses = new HashSet<XClass>();
processedClasses.add( clazz );
- initializeMembers( clazz, metadata, true, "", processedClasses, context );
+ initializeClass( clazz, metadata, true, "", processedClasses, context );
this.analyzer.setGlobalAnalyzer( metadata.analyzer );
@@ -115,8 +117,8 @@
return isRoot;
}
- private void initializeMembers(XClass clazz, PropertiesMetadata propertiesMetadata,
boolean isRoot, String prefix,
- Set<XClass> processedClasses, InitContext context) {
+ private void initializeClass(XClass clazz, PropertiesMetadata propertiesMetadata,
boolean isRoot, String prefix,
+ Set<XClass> processedClasses, InitContext context) {
List<XClass> hierarchy = new ArrayList<XClass>();
for ( XClass currClass = clazz; currClass != null; currClass =
currClass.getSuperclass() ) {
hierarchy.add( currClass );
@@ -149,14 +151,24 @@
}
/**
- * Checks for class level annotations.
+ * Check and initialize class level annotations.
+ *
+ * @param clazz The class to process.
+ * @param propertiesMetadata The meta data holder.
+ * @param isRoot Flag indicating if the specified class is a root entity, meaning the
start of a chain of indexed
+ * entities.
+ * @param prefix The current prefix used for the <code>Document</code> field
names.
+ * @param context Handle to default configuration settings.
*/
private void initalizeClassLevelAnnotations(XClass clazz, PropertiesMetadata
propertiesMetadata, boolean isRoot, String prefix, InitContext context) {
+
+ // check for a class level specified analyzer
Analyzer analyzer = getAnalyzer( clazz, context );
-
if ( analyzer != null ) {
propertiesMetadata.analyzer = analyzer;
}
+
+ // check for AnalyzerDefs annotations
checkForAnalyzerDefs( clazz, context );
// Check for any ClassBridges annotation.
@@ -164,16 +176,18 @@
if ( classBridgesAnn != null ) {
ClassBridge[] cbs = classBridgesAnn.value();
for ( ClassBridge cb : cbs ) {
- bindClassAnnotation( prefix, propertiesMetadata, cb, context );
+ bindClassBridgeAnnotation( prefix, propertiesMetadata, cb, context );
}
}
// Check for any ClassBridge style of annotations.
ClassBridge classBridgeAnn = clazz.getAnnotation( ClassBridge.class );
if ( classBridgeAnn != null ) {
- bindClassAnnotation( prefix, propertiesMetadata, classBridgeAnn, context );
+ bindClassBridgeAnnotation( prefix, propertiesMetadata, classBridgeAnn, context );
}
+ checkForAnalyzerDiscriminator( clazz, propertiesMetadata );
+
// Get similarity
//TODO: similarity form @IndexedEmbedded are not taken care of. Exception??
if ( isRoot ) {
@@ -190,6 +204,7 @@
checkForField( member, propertiesMetadata, prefix, context );
checkForFields( member, propertiesMetadata, prefix, context );
checkForAnalyzerDefs( member, context );
+ checkForAnalyzerDiscriminator( member, propertiesMetadata );
checkForIndexedEmbedded( member, propertiesMetadata, prefix, processedClasses, context
);
checkForContainedIn( member, propertiesMetadata );
}
@@ -241,7 +256,30 @@
context.addAnalyzerDef( def );
}
+ private void checkForAnalyzerDiscriminator(XAnnotatedElement annotatedElement,
PropertiesMetadata propertiesMetadata) {
+ AnalyzerDiscriminator discriminiatorAnn = annotatedElement.getAnnotation(
AnalyzerDiscriminator.class );
+ if ( discriminiatorAnn != null ) {
+ if ( propertiesMetadata.discriminator != null ) {
+ throw new SearchException(
+ "Multiple AnalyzerDiscriminator defined in the same class hierarchy: " +
beanClass.getName()
+ );
+ }
+
+ Class<? extends Discriminator> discriminatorClass = discriminiatorAnn.impl();
+ try {
+ propertiesMetadata.discriminator = discriminatorClass.newInstance();
+ }
+ catch ( Exception e ) {
+ throw new SearchException(
+ "Unable to instantiate analyzer discriminator implementation: " +
discriminatorClass.getName()
+ );
+ }
+ if ( annotatedElement instanceof XMember ) {
+ propertiesMetadata.discriminatorGetter = ( XMember ) annotatedElement;
+ }
+ }
+ }
public Similarity getSimilarity() {
return similarity;
@@ -333,7 +371,7 @@
Analyzer analyzer = getAnalyzer( member, context );
metadata.analyzer = analyzer != null ? analyzer : propertiesMetadata.analyzer;
String localPrefix = buildEmbeddedPrefix( prefix, embeddedAnn, member );
- initializeMembers( elementClass, metadata, false, localPrefix, processedClasses,
context );
+ initializeClass( elementClass, metadata, false, localPrefix, processedClasses,
context );
/**
* We will only index the "expected" type but that's OK, HQL cannot do
downcasting either
*/
@@ -396,8 +434,7 @@
return ReflectionHelper.getAttributeName( member, name );
}
- private void bindClassAnnotation(String prefix, PropertiesMetadata propertiesMetadata,
ClassBridge ann, InitContext context) {
- //FIXME name should be prefixed
+ private void bindClassBridgeAnnotation(String prefix, PropertiesMetadata
propertiesMetadata, ClassBridge ann, InitContext context) {
String fieldName = prefix + ann.name();
propertiesMetadata.classNames.add( fieldName );
propertiesMetadata.classStores.add( getStore( ann.store() ) );
@@ -641,6 +678,8 @@
protected static class PropertiesMetadata {
public Float boost;
public Analyzer analyzer;
+ public Discriminator discriminator;
+ public XMember discriminatorGetter;
public final List<String> fieldNames = new ArrayList<String>();
public final List<XMember> fieldGetters = new ArrayList<XMember>();
public final List<FieldBridge> fieldBridges = new
ArrayList<FieldBridge>();
Modified:
search/trunk/src/java/org/hibernate/search/engine/DocumentBuilderIndexedEntity.java
===================================================================
---
search/trunk/src/java/org/hibernate/search/engine/DocumentBuilderIndexedEntity.java 2008-12-02
14:21:37 UTC (rev 15636)
+++
search/trunk/src/java/org/hibernate/search/engine/DocumentBuilderIndexedEntity.java 2008-12-02
14:28:28 UTC (rev 15637)
@@ -7,6 +7,9 @@
import java.util.Collection;
import java.util.List;
import java.util.Map;
+import java.util.HashMap;
+import java.util.Set;
+import java.util.HashSet;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
@@ -23,6 +26,7 @@
import org.hibernate.annotations.common.util.ReflectHelper;
import org.hibernate.proxy.HibernateProxy;
import org.hibernate.search.SearchException;
+import org.hibernate.search.analyzer.Discriminator;
import org.hibernate.search.annotations.DocumentId;
import org.hibernate.search.annotations.Index;
import org.hibernate.search.annotations.ProvidedId;
@@ -295,8 +299,7 @@
String idInString = idBridge.objectToString( id );
if ( workType == WorkType.ADD ) {
- Document doc = getDocument( entity, id );
- queue.add( new AddLuceneWork( id, idInString, entityClass, doc ) );
+ queue.add( createAddWork( entityClass, entity, id, idInString, false ) );
}
else if ( workType == WorkType.DELETE || workType == WorkType.PURGE ) {
queue.add( new DeleteLuceneWork( id, idInString, entityClass ) );
@@ -305,7 +308,6 @@
queue.add( new PurgeAllLuceneWork( entityClass ) );
}
else if ( workType == WorkType.UPDATE || workType == WorkType.COLLECTION ) {
- Document doc = getDocument( entity, id );
/**
* even with Lucene 2.1, use of indexWriter to update is not an option
* We can only delete by term, and the index doesn't have a term that
@@ -314,12 +316,11 @@
* double file opening.
*/
queue.add( new DeleteLuceneWork( id, idInString, entityClass ) );
- queue.add( new AddLuceneWork( id, idInString, entityClass, doc ) );
+ queue.add( createAddWork( entityClass, entity, id, idInString, false ) );
}
else if ( workType == WorkType.INDEX ) {
- Document doc = getDocument( entity, id );
queue.add( new DeleteLuceneWork( id, idInString, entityClass ) );
- queue.add( new AddLuceneWork( id, idInString, entityClass, doc, true ) );
+ queue.add( createAddWork( entityClass, entity, id, idInString, true ) );
}
else {
throw new AssertionFailure( "Unknown WorkType: " + workType );
@@ -328,14 +329,34 @@
super.addWorkToQueue( entityClass, entity, id, workType, queue,
searchFactoryImplementor );
}
+ private AddLuceneWork createAddWork(Class<T> entityClass, T entity, Serializable
id, String idInString, boolean isBatch) {
+ Map<String, String> fieldToAnalyzerMap = new HashMap<String, String>();
+ Document doc = getDocument( entity, id, fieldToAnalyzerMap );
+ AddLuceneWork addWork;
+ if ( fieldToAnalyzerMap.isEmpty() ) {
+ addWork = new AddLuceneWork( id, idInString, entityClass, doc, isBatch );
+ }
+ else {
+ addWork = new AddLuceneWork( id, idInString, entityClass, doc, fieldToAnalyzerMap,
isBatch );
+ }
+ return addWork;
+ }
+
/**
* Builds the Lucene <code>Document</code> for a given entity
<code>instance</code> and its <code>id</code>.
*
* @param instance The entity for which to build the matching Lucene
<code>Document</code>
* @param id the entity id.
+ * @param fieldToAnalyzerMap this maps gets populated while generateing the
<code>Document</code>.
+ * It allows to specify for any document field a named analyzer to use. This parameter
cannot be <code>null</code>.
+ *
* @return The Lucene <code>Document</code> for the specified entity.
*/
- public Document getDocument(T instance, Serializable id) {
+ public Document getDocument(T instance, Serializable id, Map<String, String>
fieldToAnalyzerMap) {
+ if ( fieldToAnalyzerMap == null ) {
+ throw new IllegalArgumentException( "fieldToAnalyzerMap cannot be null" );
+ }
+
Document doc = new Document();
final Class<?> entityType = Hibernate.getClass( instance );
if ( metadata.boost != null ) {
@@ -361,16 +382,21 @@
idBridge.set( idKeywordName, id, doc, luceneOptions );
// finally add all other document fields
- buildDocumentFields( instance, doc, metadata );
+ Set<String> processedFieldNames = new HashSet<String>();
+ buildDocumentFields( instance, doc, metadata, fieldToAnalyzerMap, processedFieldNames
);
return doc;
}
- private void buildDocumentFields(Object instance, Document doc, PropertiesMetadata
propertiesMetadata) {
+ private void buildDocumentFields(Object instance, Document doc, PropertiesMetadata
propertiesMetadata, Map<String, String> fieldToAnalyzerMap,
+ Set<String> processedFieldNames) {
if ( instance == null ) {
return;
}
- //needed for field access: I cannot work in the proxied version
+
+ // needed for field access: I cannot work in the proxied version
Object unproxiedInstance = unproxy( instance );
+
+ // process the class bridges
for ( int i = 0; i < propertiesMetadata.classBridges.size(); i++ ) {
FieldBridge fb = propertiesMetadata.classBridges.get( i );
fb.set(
@@ -378,6 +404,8 @@
doc, propertiesMetadata.getClassLuceneOptions( i )
);
}
+
+ // process the indexed fields
for ( int i = 0; i < propertiesMetadata.fieldNames.size(); i++ ) {
XMember member = propertiesMetadata.fieldGetters.get( i );
Object value = ReflectionHelper.getMemberValue( unproxiedInstance, member );
@@ -386,6 +414,13 @@
propertiesMetadata.getFieldLuceneOptions( i )
);
}
+
+ // allow analyzer override for the fields added by the class and field bridges
+ allowAnalyzerDiscriminatorOverride(
+ doc, propertiesMetadata, fieldToAnalyzerMap, processedFieldNames, unproxiedInstance
+ );
+
+ // recursively process embedded objects
for ( int i = 0; i < propertiesMetadata.embeddedGetters.size(); i++ ) {
XMember member = propertiesMetadata.embeddedGetters.get( i );
Object value = ReflectionHelper.getMemberValue( unproxiedInstance, member );
@@ -398,21 +433,27 @@
switch ( propertiesMetadata.embeddedContainers.get( i ) ) {
case ARRAY:
for ( Object arrayValue : ( Object[] ) value ) {
- buildDocumentFields( arrayValue, doc, embeddedMetadata );
+ buildDocumentFields(
+ arrayValue, doc, embeddedMetadata, fieldToAnalyzerMap, processedFieldNames
+ );
}
break;
case COLLECTION:
for ( Object collectionValue : ( Collection ) value ) {
- buildDocumentFields( collectionValue, doc, embeddedMetadata );
+ buildDocumentFields(
+ collectionValue, doc, embeddedMetadata, fieldToAnalyzerMap, processedFieldNames
+ );
}
break;
case MAP:
for ( Object collectionValue : ( ( Map ) value ).values() ) {
- buildDocumentFields( collectionValue, doc, embeddedMetadata );
+ buildDocumentFields(
+ collectionValue, doc, embeddedMetadata, fieldToAnalyzerMap, processedFieldNames
+ );
}
break;
case OBJECT:
- buildDocumentFields( value, doc, embeddedMetadata );
+ buildDocumentFields( value, doc, embeddedMetadata, fieldToAnalyzerMap,
processedFieldNames );
break;
default:
throw new AssertionFailure(
@@ -423,6 +464,40 @@
}
}
+ /**
+ * Allows a analyzer discriminator to override the analyzer used for any field in the
Lucene document.
+ *
+ * @param doc The Lucene <code>Document</code> which shall be indexed.
+ * @param propertiesMetadata The metadata for the entity we currently add to the
document.
+ * @param fieldToAnalyzerMap This map contains the actual override data. It is a map
between document fields names and
+ * analyzer definition names. This map will be added to the
<code>Work</code> instance and processed at actual indexing time.
+ * @param processedFieldNames A list of field names we have already processed.
+ * @param unproxiedInstance The entity we currently "add" to the document.
+ */
+ private void allowAnalyzerDiscriminatorOverride(Document doc, PropertiesMetadata
propertiesMetadata, Map<String, String> fieldToAnalyzerMap, Set<String>
processedFieldNames, Object unproxiedInstance) {
+ Discriminator discriminator = propertiesMetadata.discriminator;
+ if ( discriminator == null ) {
+ return;
+ }
+
+ Object value = null;
+ if ( propertiesMetadata.discriminatorGetter != null ) {
+ value = ReflectionHelper.getMemberValue( unproxiedInstance,
propertiesMetadata.discriminatorGetter );
+ }
+
+ // now we give the discriminator the oppertunity to specify a analyzer per field level
+ for ( Object o : doc.getFields() ) {
+ Field field = ( Field ) o;
+ if ( !processedFieldNames.contains( field.name() ) ) {
+ String analyzerName = discriminator.getAnanyzerDefinitionName( value,
unproxiedInstance, field.name() );
+ if ( analyzerName != null ) {
+ fieldToAnalyzerMap.put( field.name(), analyzerName );
+ }
+ processedFieldNames.add( field.name() );
+ }
+ }
+ }
+
private Object unproxy(Object value) {
//FIXME this service should be part of Core?
if ( value instanceof HibernateProxy ) {
Modified: search/trunk/src/java/org/hibernate/search/util/ScopedAnalyzer.java
===================================================================
--- search/trunk/src/java/org/hibernate/search/util/ScopedAnalyzer.java 2008-12-02
14:21:37 UTC (rev 15636)
+++ search/trunk/src/java/org/hibernate/search/util/ScopedAnalyzer.java 2008-12-02
14:28:28 UTC (rev 15637)
@@ -16,11 +16,18 @@
* @author Emmanuel Bernard
*/
public class ScopedAnalyzer extends Analyzer {
+ private Analyzer globalAnalyzer;
+ private Map<String, Analyzer> scopedAnalyzers = new HashMap<String,
Analyzer>();
+
public ScopedAnalyzer() {
}
- private Analyzer globalAnalyzer;
- private Map<String, Analyzer> scopedAnalyzers = new HashMap<String,
Analyzer>();
+ private ScopedAnalyzer( Analyzer globalAnalyzer, Map<String, Analyzer>
scopedAnalyzers) {
+ this.globalAnalyzer = globalAnalyzer;
+ for ( Map.Entry<String, Analyzer> entry : scopedAnalyzers.entrySet() ) {
+ addScopedAnalyzer( entry.getKey(), entry.getValue() );
+ }
+ }
public void setGlobalAnalyzer( Analyzer globalAnalyzer ) {
this.globalAnalyzer = globalAnalyzer;
@@ -45,4 +52,9 @@
}
return analyzer;
}
+
+ public ScopedAnalyzer clone() {
+ ScopedAnalyzer clone = new ScopedAnalyzer( globalAnalyzer, scopedAnalyzers );
+ return clone;
+ }
}
Modified: search/trunk/src/test/org/hibernate/search/test/analyzer/AnalyzerTest.java
===================================================================
--- search/trunk/src/test/org/hibernate/search/test/analyzer/AnalyzerTest.java 2008-12-02
14:21:37 UTC (rev 15636)
+++ search/trunk/src/test/org/hibernate/search/test/analyzer/AnalyzerTest.java 2008-12-02
14:28:28 UTC (rev 15637)
@@ -1,6 +1,10 @@
// $Id$
package org.hibernate.search.test.analyzer;
+import java.util.HashSet;
+import java.util.Set;
+import javax.print.attribute.HashAttributeSet;
+
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
@@ -8,94 +12,154 @@
import org.slf4j.Logger;
import org.hibernate.Transaction;
+import org.hibernate.annotations.common.reflection.ReflectionManager;
+import org.hibernate.annotations.common.reflection.XClass;
import org.hibernate.search.FullTextQuery;
import org.hibernate.search.FullTextSession;
import org.hibernate.search.Search;
import org.hibernate.search.SearchFactory;
+import org.hibernate.search.SearchException;
+import org.hibernate.search.impl.InitContext;
+import org.hibernate.search.cfg.SearchConfiguration;
+import org.hibernate.search.cfg.SearchConfigurationFromHibernateCore;
+import org.hibernate.search.engine.DocumentBuilderContainedEntity;
import org.hibernate.search.test.SearchTestCase;
import org.hibernate.search.test.util.AnalyzerUtils;
import org.hibernate.search.util.LoggerFactory;
/**
* @author Emmanuel Bernard
+ * @author Hardy Ferentschik
*/
public class AnalyzerTest extends SearchTestCase {
public static final Logger log = LoggerFactory.make();
+ public void testAnalyzerDiscriminator() throws Exception {
+ Article germanArticle = new Article();
+ germanArticle.setLanguage( "de" );
+ germanArticle.setText( "aufeinanderschl�gen" );
+ Set<Article> references = new HashSet<Article>();
+ references.add( germanArticle );
+
+
+ Article englishArticle = new Article();
+ englishArticle.setLanguage( "en" );
+ englishArticle.setText( "acknowledgment" );
+ englishArticle.setReferences( references );
+
+ FullTextSession s = Search.getFullTextSession( openSession() );
+ Transaction tx = s.beginTransaction();
+ s.persist( englishArticle );
+ tx.commit();
+
+ tx = s.beginTransaction();
+
+ // at query time we use a standard analyzer. We explicitly search for tokens which can
only be found if the
+ // right language specific stemmer was used at index time
+ QueryParser parser = new QueryParser( "references.text", new
StandardAnalyzer() );
+ org.apache.lucene.search.Query luceneQuery = parser.parse(
"aufeinanderschlug" );
+ FullTextQuery query = s.createFullTextQuery( luceneQuery );
+ assertEquals( 1, query.getResultSize() );
+
+ parser = new QueryParser( "text", new StandardAnalyzer() );
+ luceneQuery = parser.parse( "acknowledg" );
+ query = s.createFullTextQuery( luceneQuery );
+ assertEquals( 1, query.getResultSize() );
+
+ tx.commit();
+ s.close();
+ }
+
+ public void testMultipleAnalyzerDiscriminatorDefinitions() throws Exception {
+ SearchConfigurationFromHibernateCore searchConfig = new
SearchConfigurationFromHibernateCore( cfg );
+ ReflectionManager reflectionManager = searchConfig.getReflectionManager();
+ XClass xclass = reflectionManager.toXClass( BlogEntry.class );
+ InitContext context = new InitContext( searchConfig );
+ try {
+ new DocumentBuilderContainedEntity( xclass, context, reflectionManager );
+ fail();
+ }
+ catch ( SearchException e ) {
+ assertTrue( "Wrong error message", e.getMessage().startsWith( "Multiple
AnalyzerDiscriminator defined in the same class hierarchy" ));
+ }
+ }
+
public void testScopedAnalyzers() throws Exception {
MyEntity en = new MyEntity();
- en.setEntity("Entity");
- en.setField("Field");
- en.setProperty("Property");
- en.setComponent(new MyComponent());
- en.getComponent().setComponentProperty("component property");
- FullTextSession s = Search.getFullTextSession(openSession());
+ en.setEntity( "Entity" );
+ en.setField( "Field" );
+ en.setProperty( "Property" );
+ en.setComponent( new MyComponent() );
+ en.getComponent().setComponentProperty( "component property" );
+ FullTextSession s = Search.getFullTextSession( openSession() );
Transaction tx = s.beginTransaction();
- s.persist(en);
+ s.persist( en );
tx.commit();
tx = s.beginTransaction();
- QueryParser parser = new QueryParser("id", new StandardAnalyzer());
- org.apache.lucene.search.Query luceneQuery = parser.parse("entity:alarm");
- FullTextQuery query = s.createFullTextQuery(luceneQuery, MyEntity.class);
- assertEquals(1, query.getResultSize());
+ QueryParser parser = new QueryParser( "id", new StandardAnalyzer() );
+ org.apache.lucene.search.Query luceneQuery = parser.parse( "entity:alarm" );
+ FullTextQuery query = s.createFullTextQuery( luceneQuery, MyEntity.class );
+ assertEquals( 1, query.getResultSize() );
- luceneQuery = parser.parse("property:cat");
- query = s.createFullTextQuery(luceneQuery, MyEntity.class);
- assertEquals(1, query.getResultSize());
+ luceneQuery = parser.parse( "property:cat" );
+ query = s.createFullTextQuery( luceneQuery, MyEntity.class );
+ assertEquals( 1, query.getResultSize() );
- luceneQuery = parser.parse("field:energy");
- query = s.createFullTextQuery(luceneQuery, MyEntity.class);
- assertEquals(1, query.getResultSize());
+ luceneQuery = parser.parse( "field:energy" );
+ query = s.createFullTextQuery( luceneQuery, MyEntity.class );
+ assertEquals( 1, query.getResultSize() );
- luceneQuery = parser.parse("component.componentProperty:noise");
- query = s.createFullTextQuery(luceneQuery, MyEntity.class);
- assertEquals(1, query.getResultSize());
+ luceneQuery = parser.parse( "component.componentProperty:noise" );
+ query = s.createFullTextQuery( luceneQuery, MyEntity.class );
+ assertEquals( 1, query.getResultSize() );
- s.delete(query.uniqueResult());
+ s.delete( query.uniqueResult() );
tx.commit();
s.close();
}
public void testScopedAnalyzersFromSearchFactory() throws Exception {
- FullTextSession session = Search.getFullTextSession(openSession());
+ FullTextSession session = Search.getFullTextSession( openSession() );
SearchFactory searchFactory = session.getSearchFactory();
- Analyzer analyzer = searchFactory.getAnalyzer(MyEntity.class);
+ Analyzer analyzer = searchFactory.getAnalyzer( MyEntity.class );
// you can pass what so ever into the analysis since the used analyzers are
// returning the same tokens all the time. We just want to make sure that
// the right analyzers are used.
- Token[] tokens = AnalyzerUtils.tokensFromAnalysis(analyzer, "entity",
"");
- AnalyzerUtils.assertTokensEqual(tokens, new String[] { "alarm",
"dog", "performance" });
+ Token[] tokens = AnalyzerUtils.tokensFromAnalysis( analyzer, "entity",
"" );
+ AnalyzerUtils.assertTokensEqual( tokens, new String[] { "alarm",
"dog", "performance" } );
- tokens = AnalyzerUtils.tokensFromAnalysis(analyzer, "property",
"");
- AnalyzerUtils.assertTokensEqual(tokens, new String[] { "sound",
"cat", "speed" });
+ tokens = AnalyzerUtils.tokensFromAnalysis( analyzer, "property", ""
);
+ AnalyzerUtils.assertTokensEqual( tokens, new String[] { "sound",
"cat", "speed" } );
- tokens = AnalyzerUtils.tokensFromAnalysis(analyzer, "field", "");
- AnalyzerUtils.assertTokensEqual(tokens, new String[] { "music",
"elephant", "energy" });
+ tokens = AnalyzerUtils.tokensFromAnalysis( analyzer, "field", ""
);
+ AnalyzerUtils.assertTokensEqual( tokens, new String[] { "music",
"elephant", "energy" } );
- tokens = AnalyzerUtils.tokensFromAnalysis(analyzer,
"component.componentProperty", "");
- AnalyzerUtils.assertTokensEqual(tokens, new String[] { "noise",
"mouse", "light" });
+ tokens = AnalyzerUtils.tokensFromAnalysis( analyzer,
"component.componentProperty", "" );
+ AnalyzerUtils.assertTokensEqual( tokens, new String[] { "noise",
"mouse", "light" } );
// test border cases
try {
- searchFactory.getAnalyzer((Class) null);
- } catch ( IllegalArgumentException iae ) {
- log.debug("success");
+ searchFactory.getAnalyzer( ( Class ) null );
}
+ catch ( IllegalArgumentException iae ) {
+ log.debug( "success" );
+ }
try {
- searchFactory.getAnalyzer(String.class);
- } catch ( IllegalArgumentException iae ) {
- log.debug("success");
+ searchFactory.getAnalyzer( String.class );
}
+ catch ( IllegalArgumentException iae ) {
+ log.debug( "success" );
+ }
session.close();
}
protected Class[] getMappings() {
- return new Class[] { MyEntity.class };
+ return new Class[] { MyEntity.class, Article.class };
}
}
Added: search/trunk/src/test/org/hibernate/search/test/analyzer/Article.java
===================================================================
--- search/trunk/src/test/org/hibernate/search/test/analyzer/Article.java
(rev 0)
+++ search/trunk/src/test/org/hibernate/search/test/analyzer/Article.java 2008-12-02
14:28:28 UTC (rev 15637)
@@ -0,0 +1,94 @@
+// $Id:$
+package org.hibernate.search.test.analyzer;
+
+import java.util.Set;
+import javax.persistence.CascadeType;
+import javax.persistence.Entity;
+import javax.persistence.GeneratedValue;
+import javax.persistence.Id;
+import javax.persistence.OneToMany;
+
+import org.apache.solr.analysis.EnglishPorterFilterFactory;
+import org.apache.solr.analysis.GermanStemFilterFactory;
+import org.apache.solr.analysis.LowerCaseFilterFactory;
+import org.apache.solr.analysis.StandardTokenizerFactory;
+
+import org.hibernate.search.annotations.AnalyzerDef;
+import org.hibernate.search.annotations.AnalyzerDefs;
+import org.hibernate.search.annotations.AnalyzerDiscriminator;
+import org.hibernate.search.annotations.DocumentId;
+import org.hibernate.search.annotations.Field;
+import org.hibernate.search.annotations.Indexed;
+import org.hibernate.search.annotations.IndexedEmbedded;
+import org.hibernate.search.annotations.Store;
+import org.hibernate.search.annotations.TokenFilterDef;
+import org.hibernate.search.annotations.TokenizerDef;
+
+/**
+ * @author Hardy Ferentschik
+ */
+@Entity
+@Indexed
+@AnalyzerDefs({
+ @AnalyzerDef(name = "en",
+ tokenizer = @TokenizerDef(factory = StandardTokenizerFactory.class),
+ filters = {
+ @TokenFilterDef(factory = LowerCaseFilterFactory.class),
+ @TokenFilterDef(factory = EnglishPorterFilterFactory.class
+ )
+ }),
+ @AnalyzerDef(name = "de",
+ tokenizer = @TokenizerDef(factory = StandardTokenizerFactory.class),
+ filters = {
+ @TokenFilterDef(factory = LowerCaseFilterFactory.class),
+ @TokenFilterDef(factory = GermanStemFilterFactory.class)
+ })
+})
+public class Article {
+
+ private Integer id;
+ private String language;
+ private String text;
+ private Set<Article> references;
+
+ @Id
+ @GeneratedValue
+ @DocumentId
+ public Integer getId() {
+ return id;
+ }
+
+ public void setId(Integer id) {
+ this.id = id;
+ }
+
+ @Field(store = Store.YES)
+ @AnalyzerDiscriminator(impl = LanguageDiscriminator.class)
+ public String getLanguage() {
+ return language;
+ }
+
+ public void setLanguage(String language) {
+ this.language = language;
+ }
+
+ @Field(store = Store.YES)
+ public String getText() {
+ return text;
+ }
+
+ public void setText(String text) {
+ this.text = text;
+ }
+
+ @OneToMany(cascade = CascadeType.ALL)
+ @IndexedEmbedded(depth = 1)
+ public Set<Article> getReferences() {
+ return references;
+ }
+
+ public void setReferences(Set<Article> references) {
+ this.references = references;
+ }
+}
+
Property changes on:
search/trunk/src/test/org/hibernate/search/test/analyzer/Article.java
___________________________________________________________________
Name: svn:keywords
+ Id
Added: search/trunk/src/test/org/hibernate/search/test/analyzer/BlogEntry.java
===================================================================
--- search/trunk/src/test/org/hibernate/search/test/analyzer/BlogEntry.java
(rev 0)
+++ search/trunk/src/test/org/hibernate/search/test/analyzer/BlogEntry.java 2008-12-02
14:28:28 UTC (rev 15637)
@@ -0,0 +1,94 @@
+// $Id:$
+package org.hibernate.search.test.analyzer;
+
+import java.util.Set;
+import javax.persistence.CascadeType;
+import javax.persistence.Entity;
+import javax.persistence.GeneratedValue;
+import javax.persistence.Id;
+import javax.persistence.OneToMany;
+
+import org.apache.solr.analysis.EnglishPorterFilterFactory;
+import org.apache.solr.analysis.GermanStemFilterFactory;
+import org.apache.solr.analysis.LowerCaseFilterFactory;
+import org.apache.solr.analysis.StandardTokenizerFactory;
+
+import org.hibernate.search.annotations.AnalyzerDef;
+import org.hibernate.search.annotations.AnalyzerDefs;
+import org.hibernate.search.annotations.AnalyzerDiscriminator;
+import org.hibernate.search.annotations.DocumentId;
+import org.hibernate.search.annotations.Field;
+import org.hibernate.search.annotations.Indexed;
+import org.hibernate.search.annotations.IndexedEmbedded;
+import org.hibernate.search.annotations.Store;
+import org.hibernate.search.annotations.TokenFilterDef;
+import org.hibernate.search.annotations.TokenizerDef;
+
+/**
+ * @author Hardy Ferentschik
+ */
+@Entity
+@Indexed
+@AnalyzerDefs({
+ @AnalyzerDef(name = "en",
+ tokenizer = @TokenizerDef(factory = StandardTokenizerFactory.class),
+ filters = {
+ @TokenFilterDef(factory = LowerCaseFilterFactory.class),
+ @TokenFilterDef(factory = EnglishPorterFilterFactory.class
+ )
+ }),
+ @AnalyzerDef(name = "de",
+ tokenizer = @TokenizerDef(factory = StandardTokenizerFactory.class),
+ filters = {
+ @TokenFilterDef(factory = LowerCaseFilterFactory.class),
+ @TokenFilterDef(factory = GermanStemFilterFactory.class)
+ })
+})
+@AnalyzerDiscriminator(impl = LanguageDiscriminator.class)
+public class BlogEntry {
+
+ private Integer id;
+ private String language;
+ private String text;
+ private Set<BlogEntry> references;
+
+ @Id
+ @GeneratedValue
+ @DocumentId
+ public Integer getId() {
+ return id;
+ }
+
+ public void setId(Integer id) {
+ this.id = id;
+ }
+
+ @Field(store = Store.YES)
+ @AnalyzerDiscriminator(impl = LanguageDiscriminator.class)
+ public String getLanguage() {
+ return language;
+ }
+
+ public void setLanguage(String language) {
+ this.language = language;
+ }
+
+ @Field(store = Store.YES)
+ public String getText() {
+ return text;
+ }
+
+ public void setText(String text) {
+ this.text = text;
+ }
+
+ @OneToMany(cascade = CascadeType.ALL)
+ @IndexedEmbedded(depth = 1)
+ public Set<BlogEntry> getReferences() {
+ return references;
+ }
+
+ public void setReferences(Set<BlogEntry> references) {
+ this.references = references;
+ }
+}
\ No newline at end of file
Property changes on:
search/trunk/src/test/org/hibernate/search/test/analyzer/BlogEntry.java
___________________________________________________________________
Name: svn:keywords
+ Id
Added:
search/trunk/src/test/org/hibernate/search/test/analyzer/LanguageDiscriminator.java
===================================================================
--- search/trunk/src/test/org/hibernate/search/test/analyzer/LanguageDiscriminator.java
(rev 0)
+++
search/trunk/src/test/org/hibernate/search/test/analyzer/LanguageDiscriminator.java 2008-12-02
14:28:28 UTC (rev 15637)
@@ -0,0 +1,17 @@
+// $Id:$
+package org.hibernate.search.test.analyzer;
+
+import org.hibernate.search.analyzer.Discriminator;
+
+/**
+ * @author Hardy Ferentschik
+ */
+public class LanguageDiscriminator implements Discriminator {
+
+ public String getAnanyzerDefinitionName(Object value, Object entity, String field) {
+ if ( value == null || !( entity instanceof Article ) ) {
+ return null;
+ }
+ return (String) value;
+ }
+}
Property changes on:
search/trunk/src/test/org/hibernate/search/test/analyzer/LanguageDiscriminator.java
___________________________________________________________________
Name: svn:keywords
+ Id