[hibernate-commits] Hibernate SVN: r17828 - in search/trunk/src: main/java/org/hibernate/search/analyzer and 3 other directories.

hibernate-commits at lists.jboss.org hibernate-commits at lists.jboss.org
Fri Oct 23 12:51:24 EDT 2009


Author: epbernard
Date: 2009-10-23 12:51:24 -0400 (Fri, 23 Oct 2009)
New Revision: 17828

Added:
   search/trunk/src/test/java/org/hibernate/search/test/configuration/BlogEntry.java
Modified:
   search/trunk/src/main/docbook/en-US/modules/mapping.xml
   search/trunk/src/main/java/org/hibernate/search/analyzer/Discriminator.java
   search/trunk/src/main/java/org/hibernate/search/cfg/EntityDescriptor.java
   search/trunk/src/main/java/org/hibernate/search/cfg/EntityMapping.java
   search/trunk/src/main/java/org/hibernate/search/cfg/PropertyDescriptor.java
   search/trunk/src/main/java/org/hibernate/search/cfg/PropertyMapping.java
   search/trunk/src/main/java/org/hibernate/search/impl/MappingModelMetadataProvider.java
   search/trunk/src/test/java/org/hibernate/search/test/configuration/ProgrammaticMappingTest.java
Log:
HSEARCH-352 add analyzer discriminator support

Modified: search/trunk/src/main/docbook/en-US/modules/mapping.xml
===================================================================
--- search/trunk/src/main/docbook/en-US/modules/mapping.xml	2009-10-23 14:32:18 UTC (rev 17827)
+++ search/trunk/src/main/docbook/en-US/modules/mapping.xml	2009-10-23 16:51:24 UTC (rev 17828)
@@ -860,7 +860,7 @@
         <para>So far all the introduced ways to specify an analyzer were
         static. However, there are use cases where it is useful to select an
         analyzer depending on the current state of the entity to be indexed,
-        for example in multilingual application. For an
+        for example in multilingual applications. For an
         <classname>BlogEntry</classname> class for example the analyzer could
         depend on the language property of the entry. Depending on this
         property the correct language specific stemmer should be chosen to

Modified: search/trunk/src/main/java/org/hibernate/search/analyzer/Discriminator.java
===================================================================
--- search/trunk/src/main/java/org/hibernate/search/analyzer/Discriminator.java	2009-10-23 14:32:18 UTC (rev 17827)
+++ search/trunk/src/main/java/org/hibernate/search/analyzer/Discriminator.java	2009-10-23 16:51:24 UTC (rev 17828)
@@ -25,7 +25,7 @@
 package org.hibernate.search.analyzer;
 
 /**
- * Allows to choose a by name defines analyzer at runtime.
+ * Returs the expected discriminator name to use on the element evaluated
  *
  * @author Hardy Ferentschik
  */

Modified: search/trunk/src/main/java/org/hibernate/search/cfg/EntityDescriptor.java
===================================================================
--- search/trunk/src/main/java/org/hibernate/search/cfg/EntityDescriptor.java	2009-10-23 14:32:18 UTC (rev 17827)
+++ search/trunk/src/main/java/org/hibernate/search/cfg/EntityDescriptor.java	2009-10-23 16:51:24 UTC (rev 17828)
@@ -37,6 +37,7 @@
 	private Map<PropertyKey, PropertyDescriptor> properties = new HashMap<PropertyKey, PropertyDescriptor>();
 	private Map<String, Object> similarity;
 	private Map<String, Object> boost;
+	private Map<String, Object> analyzerDiscriminator;
 
 	public Map<String, Object> getIndexed() {
 		return indexed;
@@ -80,6 +81,14 @@
 		return boost;
 	}
 
+	public void setAnalyzerDiscriminator(Map<String, Object> analyzerDiscriminator) {
+		this.analyzerDiscriminator = analyzerDiscriminator;
+	}
+
+	public Map<String, Object> getAnalyzerDiscriminator() {
+		return analyzerDiscriminator;
+	}
+
 	private static class PropertyKey {
 		private String name;
 		private ElementType type;

Modified: search/trunk/src/main/java/org/hibernate/search/cfg/EntityMapping.java
===================================================================
--- search/trunk/src/main/java/org/hibernate/search/cfg/EntityMapping.java	2009-10-23 14:32:18 UTC (rev 17827)
+++ search/trunk/src/main/java/org/hibernate/search/cfg/EntityMapping.java	2009-10-23 16:51:24 UTC (rev 17828)
@@ -30,6 +30,8 @@
 
 import org.apache.solr.analysis.TokenizerFactory;
 
+import org.hibernate.search.analyzer.Discriminator;
+
 /**
  * @author Emmanuel Bernard
  */
@@ -59,6 +61,13 @@
 		return this;
 	}
 
+	public EntityMapping analyzerDiscriminator(Class<? extends Discriminator> discriminator) {
+		final Map<String, Object> discriminatorAnn = new HashMap<String, Object>();
+		discriminatorAnn.put( "impl", discriminator );
+		entity.setAnalyzerDiscriminator(discriminatorAnn);
+		return this;
+	}
+
 	public PropertyMapping property(String name, ElementType type) {
 		return new PropertyMapping(name, type, entity, mapping);
 	}

Modified: search/trunk/src/main/java/org/hibernate/search/cfg/PropertyDescriptor.java
===================================================================
--- search/trunk/src/main/java/org/hibernate/search/cfg/PropertyDescriptor.java	2009-10-23 14:32:18 UTC (rev 17827)
+++ search/trunk/src/main/java/org/hibernate/search/cfg/PropertyDescriptor.java	2009-10-23 16:51:24 UTC (rev 17828)
@@ -37,6 +37,7 @@
 	private String name;
 	private Collection<Map<String, Object>> fields = new ArrayList<Map<String, Object>>();
 	private Map<String, Object> documentId;
+	private Map<String, Object> analyzerDiscriminator;
 
 	public PropertyDescriptor(String name, ElementType type) {
 		this.name = name;
@@ -58,4 +59,12 @@
 	public Map<String, Object> getDocumentId() {
 		return documentId;
 	}
+
+	public Map<String, Object> getAnalyzerDiscriminator() {
+		return analyzerDiscriminator;
+	}
+
+	public void setAnalyzerDiscriminator(Map<String, Object> analyzerDiscriminator) {
+		this.analyzerDiscriminator = analyzerDiscriminator;
+	}
 }

Modified: search/trunk/src/main/java/org/hibernate/search/cfg/PropertyMapping.java
===================================================================
--- search/trunk/src/main/java/org/hibernate/search/cfg/PropertyMapping.java	2009-10-23 14:32:18 UTC (rev 17827)
+++ search/trunk/src/main/java/org/hibernate/search/cfg/PropertyMapping.java	2009-10-23 16:51:24 UTC (rev 17828)
@@ -25,9 +25,13 @@
 package org.hibernate.search.cfg;
 
 import java.lang.annotation.ElementType;
+import java.util.Map;
+import java.util.HashMap;
 
 import org.apache.solr.analysis.TokenizerFactory;
 
+import org.hibernate.search.analyzer.Discriminator;
+
 /**
  * @author Emmanuel Bernard
  */
@@ -50,6 +54,13 @@
 		return new FieldMapping(property, entity, mapping);
 	}
 
+	public PropertyMapping analyzerDiscriminator(Class<? extends Discriminator> discriminator) {
+		Map<String, Object> analyzerDiscriminatorAnn = new HashMap<String, Object>();
+		analyzerDiscriminatorAnn.put( "impl", discriminator );
+		property.setAnalyzerDiscriminator(analyzerDiscriminatorAnn);
+		return this;
+	}
+
 	public PropertyMapping property(String name, ElementType type) {
 		return new PropertyMapping(name, type, entity, mapping);
 	}

Modified: search/trunk/src/main/java/org/hibernate/search/impl/MappingModelMetadataProvider.java
===================================================================
--- search/trunk/src/main/java/org/hibernate/search/impl/MappingModelMetadataProvider.java	2009-10-23 14:32:18 UTC (rev 17827)
+++ search/trunk/src/main/java/org/hibernate/search/impl/MappingModelMetadataProvider.java	2009-10-23 16:51:24 UTC (rev 17828)
@@ -58,6 +58,7 @@
 import org.hibernate.search.annotations.FieldBridge;
 import org.hibernate.search.annotations.Similarity;
 import org.hibernate.search.annotations.DocumentId;
+import org.hibernate.search.annotations.AnalyzerDiscriminator;
 
 /**
  * @author Emmanuel Bernard
@@ -251,6 +252,7 @@
 							if (property != null) {
 								// property name overriding
 								createDocumentId( property );
+								createAnalyzerDiscriminator( property );
 								createFields( property );
 							}
 						}
@@ -275,6 +277,17 @@
 			}
 		}
 
+		private void createAnalyzerDiscriminator(PropertyDescriptor property) {
+			Map<String, Object> analyzerDiscriminator = property.getAnalyzerDiscriminator();
+			if (analyzerDiscriminator != null) {
+				AnnotationDescriptor analyzerDiscriminatorAnn = new AnnotationDescriptor( AnalyzerDiscriminator.class );
+				for ( Map.Entry<String, Object> entry : analyzerDiscriminator.entrySet() ) {
+					analyzerDiscriminatorAnn.setValue( entry.getKey(), entry.getValue() );
+				}
+				annotations.put( AnalyzerDiscriminator.class, AnnotationFactory.create( analyzerDiscriminatorAnn ) );
+			}
+		}
+
 		private void createFields(PropertyDescriptor property) {
 			final Collection<Map<String,Object>> fields = property.getFields();
 			List<org.hibernate.search.annotations.Field> fieldAnnotations =
@@ -352,6 +365,14 @@
 				}
 				annotations.put( Boost.class, AnnotationFactory.create( annotation ) );
 			}
+
+			if ( entity.getAnalyzerDiscriminator() != null ) {
+				annotation = new AnnotationDescriptor( AnalyzerDiscriminator.class );
+				for ( Map.Entry<String, Object> entry : entity.getAnalyzerDiscriminator().entrySet() ) {
+					annotation.setValue( entry.getKey(), entry.getValue() );
+				}
+				annotations.put( AnalyzerDiscriminator.class, AnnotationFactory.create( annotation ) );
+			}
 		}
 
 		private void populateAnnotationArray() {

Added: search/trunk/src/test/java/org/hibernate/search/test/configuration/BlogEntry.java
===================================================================
--- search/trunk/src/test/java/org/hibernate/search/test/configuration/BlogEntry.java	                        (rev 0)
+++ search/trunk/src/test/java/org/hibernate/search/test/configuration/BlogEntry.java	2009-10-23 16:51:24 UTC (rev 17828)
@@ -0,0 +1,96 @@
+package org.hibernate.search.test.configuration;
+
+import javax.persistence.Entity;
+import javax.persistence.Id;
+import javax.persistence.GeneratedValue;
+
+import org.apache.solr.analysis.StandardTokenizerFactory;
+import org.apache.solr.analysis.LowerCaseFilterFactory;
+import org.apache.solr.analysis.EnglishPorterFilterFactory;
+import org.apache.solr.analysis.GermanStemFilterFactory;
+
+import org.hibernate.search.annotations.Indexed;
+import org.hibernate.search.annotations.AnalyzerDefs;
+import org.hibernate.search.annotations.AnalyzerDef;
+import org.hibernate.search.annotations.TokenizerDef;
+import org.hibernate.search.annotations.TokenFilterDef;
+import org.hibernate.search.annotations.DocumentId;
+import org.hibernate.search.annotations.AnalyzerDiscriminator;
+import org.hibernate.search.analyzer.Discriminator;
+
+/**
+ * @author Emmanuel Bernard
+ */
+ at Entity
+ at Indexed
+ at AnalyzerDefs({
+		@AnalyzerDef(name = "en",
+				tokenizer = @TokenizerDef(factory = StandardTokenizerFactory.class),
+				filters = {
+						@TokenFilterDef(factory = LowerCaseFilterFactory.class),
+						@TokenFilterDef(factory = EnglishPorterFilterFactory.class
+						)
+				}),
+		@AnalyzerDef(name = "de",
+				tokenizer = @TokenizerDef(factory = StandardTokenizerFactory.class),
+				filters = {
+						@TokenFilterDef(factory = LowerCaseFilterFactory.class),
+						@TokenFilterDef(factory = GermanStemFilterFactory.class)
+				})
+})
+public class BlogEntry {
+	private Long id;
+	private String language;
+	private String title;
+	private String description;
+
+	@Id @GeneratedValue
+	public Long getId() {
+		return id;
+	}
+
+	public void setId(Long id) {
+		this.id = id;
+	}
+
+	public String getLanguage() {
+		return language;
+	}
+
+	public void setLanguage(String language) {
+		this.language = language;
+	}
+
+	public String getTitle() {
+		return title;
+	}
+
+	public void setTitle(String title) {
+		this.title = title;
+	}
+
+	public String getDescription() {
+		return description;
+	}
+
+	public void setDescription(String description) {
+		this.description = description;
+	}
+
+	public static class BlogLangDiscriminator implements Discriminator {
+
+		public String getAnalyzerDefinitionName(Object value, Object entity, String field) {
+			if ( value == null ) return null;
+			if ( !( value instanceof String ) )
+				throw new IllegalArgumentException( "expecte string as value in language discriminator");
+			if ( "description".equals( field ) ) {
+				return (String) value;
+			}
+			else {
+				//"title" is not affected
+				return null;
+			}
+
+		}
+	}
+}

Modified: search/trunk/src/test/java/org/hibernate/search/test/configuration/ProgrammaticMappingTest.java
===================================================================
--- search/trunk/src/test/java/org/hibernate/search/test/configuration/ProgrammaticMappingTest.java	2009-10-23 14:32:18 UTC (rev 17827)
+++ search/trunk/src/test/java/org/hibernate/search/test/configuration/ProgrammaticMappingTest.java	2009-10-23 16:51:24 UTC (rev 17828)
@@ -31,7 +31,10 @@
 import org.apache.solr.analysis.SnowballPorterFilterFactory;
 import org.apache.solr.analysis.LowerCaseFilterFactory;
 import org.apache.solr.analysis.NGramFilterFactory;
+import org.apache.solr.analysis.EnglishPorterFilterFactory;
+import org.apache.solr.analysis.GermanStemFilterFactory;
 import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.queryParser.ParseException;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.search.DefaultSimilarity;
 
@@ -174,6 +177,49 @@
 		s.close();
 	}
 
+	public void testAnalyzerDiscriminator() throws Exception{
+		FullTextSession s = Search.getFullTextSession( openSession() );
+		Transaction tx = s.beginTransaction();
+
+		BlogEntry deEntry = new BlogEntry();
+		deEntry.setTitle( "aufeinanderschl\u00FCgen" );
+		deEntry.setDescription( "aufeinanderschl\u00FCgen" );
+		deEntry.setLanguage( "de" );
+		s.persist( deEntry );
+		
+		BlogEntry enEntry = new BlogEntry();
+		enEntry.setTitle( "acknowledgment" );
+		enEntry.setDescription( "acknowledgment" );
+		enEntry.setLanguage( "en" );
+		s.persist( enEntry );
+
+		tx.commit();
+
+		s.clear();
+
+		tx = s.beginTransaction();
+
+		// at query time we use a standard analyzer. We explicitly search for tokens which can only be found if the
+		// right language specific stemmer was used at index time
+		assertEquals( 1, nbrOfMatchingResults( "description", "aufeinanderschlug", s ) );
+		assertEquals( 1, nbrOfMatchingResults( "description", "acknowledg", s ) );
+		assertEquals( 0, nbrOfMatchingResults( "title", "aufeinanderschlug", s ) );
+		assertEquals( 1, nbrOfMatchingResults( "title", "acknowledgment", s ) );
+
+		for( Object result : s.createQuery( "from " + BlogEntry.class.getName() ).list() ) {
+			s.delete( result );
+		}
+		tx.commit();
+		s.close();
+	}
+
+	private int nbrOfMatchingResults(String field, String token, FullTextSession s) throws ParseException {
+		QueryParser parser = new QueryParser( field, new StandardAnalyzer() );
+		org.apache.lucene.search.Query luceneQuery = parser.parse( token );
+		FullTextQuery query = s.createFullTextQuery( luceneQuery );
+		return query.getResultSize();
+	}
+
 	@Override
 	protected void configure(Configuration cfg) {
 		super.configure( cfg );
@@ -184,6 +230,12 @@
 					.filter( NGramFilterFactory.class )
 						.param( "minGramSize", "3" )
 						.param( "maxGramSize", "3" )
+				.analyzerDef( "en", StandardTokenizerFactory.class )
+					.filter( LowerCaseFilterFactory.class )
+					.filter( EnglishPorterFilterFactory.class )
+				.analyzerDef( "de", StandardTokenizerFactory.class )
+					.filter( LowerCaseFilterFactory.class )
+					.filter( GermanStemFilterFactory.class )
 
 				.indexedClass( Address.class )
 					.similarity( DefaultSimilarity.class )
@@ -196,7 +248,15 @@
 							.name( "street1_abridged" )
 							.bridge( ConcatStringBridge.class ).param( ConcatStringBridge.SIZE, "4" )
 					.property( "street2", ElementType.METHOD )
-						.field().name( "idx_street2" ).store( Store.YES ).boost( 2 );
+						.field().name( "idx_street2" ).store( Store.YES ).boost( 2 )
+				.indexedClass( BlogEntry.class )
+					.property( "title", ElementType.METHOD )
+						.field()
+					.property( "description", ElementType.METHOD )
+						.field()
+					.property( "language", ElementType.METHOD )
+						.analyzerDiscriminator(BlogEntry.BlogLangDiscriminator.class)
+				;
 		cfg.getProperties().put( "hibernate.search.mapping_model", mapping );
 	}
 
@@ -236,7 +296,8 @@
 	protected Class[] getMappings() {
 		return new Class[] {
 				Address.class,
-				Country.class
+				Country.class,
+				BlogEntry.class
 		};
 	}
 }



More information about the hibernate-commits mailing list