[hibernate-commits] Hibernate SVN: r19629 - in search/trunk/hibernate-search/src: main/java/org/hibernate/search/query/dsl/v2/impl and 1 other directories.

hibernate-commits at lists.jboss.org hibernate-commits at lists.jboss.org
Fri May 28 12:44:28 EDT 2010


Author: epbernard
Date: 2010-05-28 12:44:27 -0400 (Fri, 28 May 2010)
New Revision: 19629

Added:
   search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/PhraseContext.java
   search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/PhraseMatchingContext.java
   search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/PhraseTermination.java
   search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/impl/ConnectedMultiFieldsPhraseQueryBuilder.java
   search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/impl/ConnectedPhraseContext.java
   search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/impl/ConnectedPhraseMatchingContext.java
   search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/impl/PhraseQueryContext.java
Modified:
   search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/QueryBuilder.java
   search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/impl/ConnectedQueryBuilder.java
   search/trunk/hibernate-search/src/test/java/org/hibernate/search/test/query/dsl/DSLTest.java
Log:
HSEARCH-414 Add support for phrase query

Support phrase queries by applying the analyzer on a sentence
Still to do is the ability to add words (terms really)

Added: search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/PhraseContext.java
===================================================================
--- search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/PhraseContext.java	                        (rev 0)
+++ search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/PhraseContext.java	2010-05-28 16:44:27 UTC (rev 19629)
@@ -0,0 +1,20 @@
+package org.hibernate.search.query.dsl.v2;
+
+/**
+ * @author Emmanuel Bernard
+ */
+public interface PhraseContext extends QueryCustomization<PhraseContext> {
+	/**
+	 * Sets the number of other words permitted between words in query phrase.
+	 * If zero, then this is an exact phrase search.  For larger values this works
+	 * like a <code>WITHIN</code> or <code>NEAR</code> operator.
+	 *
+	 * Defaults to 0
+	 */
+	PhraseContext slop(int slop);
+
+	/**
+	 * field / property the term query is executed on
+	 */
+	PhraseMatchingContext onField(String fieldName);
+}

Added: search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/PhraseMatchingContext.java
===================================================================
--- search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/PhraseMatchingContext.java	                        (rev 0)
+++ search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/PhraseMatchingContext.java	2010-05-28 16:44:27 UTC (rev 19629)
@@ -0,0 +1,16 @@
+package org.hibernate.search.query.dsl.v2;
+
+/**
+ * @author Emmanuel Bernard
+ */
+public interface PhraseMatchingContext extends FieldCustomization<PhraseMatchingContext> {
+	/**
+	 * field / property the term query is executed on
+	 */
+	PhraseMatchingContext andField(String field);
+
+	/**
+	 * Sentence to match. It will be processed by the analyzer
+	 */
+	PhraseTermination sentence(String sentence);
+}

Added: search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/PhraseTermination.java
===================================================================
--- search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/PhraseTermination.java	                        (rev 0)
+++ search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/PhraseTermination.java	2010-05-28 16:44:27 UTC (rev 19629)
@@ -0,0 +1,7 @@
+package org.hibernate.search.query.dsl.v2;
+
+/**
+ * @author Emmanuel Bernard
+ */
+public interface PhraseTermination extends Termination<PhraseTermination> {
+}

Modified: search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/QueryBuilder.java
===================================================================
--- search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/QueryBuilder.java	2010-05-28 16:43:05 UTC (rev 19628)
+++ search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/QueryBuilder.java	2010-05-28 16:44:27 UTC (rev 19629)
@@ -36,6 +36,11 @@
 	RangeContext range();
 
 	/**
+	 * find an sentence (words can be inversed according to the slop factor
+	 */
+	PhraseContext phrase();
+
+	/**
 	 * Query matching all documents
 	 * Typically mixed with a boolean query.
 	 */

Added: search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/impl/ConnectedMultiFieldsPhraseQueryBuilder.java
===================================================================
--- search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/impl/ConnectedMultiFieldsPhraseQueryBuilder.java	                        (rev 0)
+++ search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/impl/ConnectedMultiFieldsPhraseQueryBuilder.java	2010-05-28 16:44:27 UTC (rev 19629)
@@ -0,0 +1,150 @@
+package org.hibernate.search.query.dsl.v2.impl;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.MultiPhraseQuery;
+import org.apache.lucene.search.PhraseQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermRangeQuery;
+
+import org.hibernate.annotations.common.AssertionFailure;
+import org.hibernate.search.SearchException;
+import org.hibernate.search.query.dsl.v2.PhraseTermination;
+import org.hibernate.search.query.dsl.v2.RangeTerminationExcludable;
+
+/**
+ * @author Emmanuel Bernard
+ */
+public class ConnectedMultiFieldsPhraseQueryBuilder implements PhraseTermination {
+	private final PhraseQueryContext queryContext;
+	private final Analyzer queryAnalyzer;
+	private final QueryCustomizer queryCustomizer;
+	private final List<FieldContext> fieldContexts;
+
+	public ConnectedMultiFieldsPhraseQueryBuilder(PhraseQueryContext queryContext, Analyzer queryAnalyzer, QueryCustomizer queryCustomizer, List<FieldContext> fieldContexts) {
+		this.queryContext = queryContext;
+		this.queryAnalyzer = queryAnalyzer;
+		this.queryCustomizer = queryCustomizer;
+		this.fieldContexts = fieldContexts;
+	}
+
+	public Query createQuery() {
+		final int size = fieldContexts.size();
+		if ( size == 1 ) {
+			return queryCustomizer.setWrappedQuery( createQuery( fieldContexts.get( 0 ) ) ).createQuery();
+		}
+		else {
+			BooleanQuery aggregatedFieldsQuery = new BooleanQuery( );
+			for ( FieldContext fieldContext : fieldContexts ) {
+				aggregatedFieldsQuery.add( createQuery( fieldContext ), BooleanClause.Occur.SHOULD );
+			}
+			return  queryCustomizer.setWrappedQuery( aggregatedFieldsQuery ).createQuery();
+		}
+	}
+
+	public Query createQuery(FieldContext fieldContext) {
+		final Query perFieldQuery;
+		final String fieldName = fieldContext.getField();
+
+		/*
+		 * Store terms per position and detect if for a given position more than one term is present
+		 */
+		TokenStream stream = null;
+		boolean isMultiPhrase = false;
+		Map<Integer, List<Term>> termsPerPosition = new HashMap<Integer, List<Term>>();
+		final String sentence = queryContext.getSentence();
+		try {
+			Reader reader = new StringReader( sentence );
+			stream = queryAnalyzer.reusableTokenStream( fieldName, reader);
+
+			TermAttribute termAttribute = (TermAttribute) stream.addAttribute( TermAttribute.class );
+			PositionIncrementAttribute positionAttribute = (PositionIncrementAttribute) stream.addAttribute( PositionIncrementAttribute.class );
+
+			stream.reset();
+			int position = -1; //start at -1 since we apply at least one increment
+			List<Term> termsAtSamePosition = null;
+			while ( stream.incrementToken() ) {
+				int positionIncrement = 1;
+				if ( positionAttribute != null ) {
+					positionIncrement = positionAttribute.getPositionIncrement();
+				}
+
+				if ( positionIncrement > 0 ) {
+					position+=positionIncrement;
+					termsAtSamePosition = termsPerPosition.get(position);
+				}
+
+				if (termsAtSamePosition == null) {
+					termsAtSamePosition = new ArrayList<Term>();
+					termsPerPosition.put( position, termsAtSamePosition  );
+				}
+
+				termsAtSamePosition.add( new Term( fieldName, termAttribute.term() ) );
+				if ( termsAtSamePosition.size() > 1 ) {
+					isMultiPhrase = true;
+				}
+			}
+		}
+		catch ( IOException e ) {
+			throw new AssertionFailure( "IOException while reading a string. Doh!", e);
+		}
+		finally {
+			if ( stream != null ) {
+				try {
+					stream.end();
+					stream.close();
+				}
+				catch ( IOException e ) {
+					throw new AssertionFailure( "IOException while reading a string. Doh!", e);
+				}
+			}
+		}
+
+		/*
+		 * Create the appropriate query depending on the conditions
+		 * note that a MultiPhraseQuery is needed if several terms share the same position
+		 * as it will do a OR and not a AND like PhraseQuery
+		 */
+		final int size = termsPerPosition.size();
+		if ( size == 0 ) {
+			throw new SearchException( "phrase query returns no term. Is there a problem with your analyzers? " + sentence);
+		}
+		//TODO
+		//if ( size == 1 ) {
+			//optimization
+		//}
+		//else {
+		if (isMultiPhrase) {
+			MultiPhraseQuery query = new MultiPhraseQuery();
+			query.setSlop( queryContext.getSlop() );
+			for ( Map.Entry<Integer,List<Term>> entry : termsPerPosition.entrySet() ) {
+				final List<Term> value = entry.getValue();
+				query.add( value.toArray( new Term[value.size()] ), entry.getKey() );
+			}
+			perFieldQuery = query;
+		}
+		else {
+			PhraseQuery query = new PhraseQuery();
+			query.setSlop(  queryContext.getSlop() );
+			for ( Map.Entry<Integer,List<Term>> entry : termsPerPosition.entrySet() ) {
+				final List<Term> value = entry.getValue();
+				query.add( value.get(0), entry.getKey() );
+			}
+			perFieldQuery = query;
+		}
+		return fieldContext.getFieldCustomizer().setWrappedQuery( perFieldQuery ).createQuery();
+	}
+}
\ No newline at end of file

Added: search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/impl/ConnectedPhraseContext.java
===================================================================
--- search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/impl/ConnectedPhraseContext.java	                        (rev 0)
+++ search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/impl/ConnectedPhraseContext.java	2010-05-28 16:44:27 UTC (rev 19629)
@@ -0,0 +1,146 @@
+package org.hibernate.search.query.dsl.v2.impl;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.search.Filter;
+
+import org.hibernate.search.SearchFactory;
+import org.hibernate.search.query.dsl.v2.PhraseContext;
+import org.hibernate.search.query.dsl.v2.PhraseMatchingContext;
+import org.hibernate.search.query.dsl.v2.RangeContext;
+import org.hibernate.search.query.dsl.v2.RangeMatchingContext;
+
+/**
+ * @author Emmanuel Bernard
+ */
+class ConnectedPhraseContext implements PhraseContext {
+	private final SearchFactory factory;
+	private final Analyzer queryAnalyzer;
+	private final QueryCustomizer queryCustomizer;
+	private final PhraseQueryContext queryContext;
+
+
+	public ConnectedPhraseContext(Analyzer queryAnalyzer, SearchFactory factory) {
+		this.factory = factory;
+		this.queryAnalyzer = queryAnalyzer;
+		this.queryCustomizer = new QueryCustomizer();
+		this.queryContext = new PhraseQueryContext();
+	}
+
+	public PhraseContext slop(int slop) {
+		queryContext.setSlop( slop );
+		return this;
+	}
+
+	public PhraseMatchingContext onField(String fieldName) {
+		return new ConnectedPhraseMatchingContext(fieldName, queryContext, queryCustomizer, queryAnalyzer, factory);
+	}
+
+	public PhraseContext boostedTo(float boost) {
+		queryCustomizer.boostedTo( boost );
+		return this;
+	}
+
+	public PhraseContext constantScore() {
+		queryCustomizer.constantScore();
+		return this;
+	}
+
+	public PhraseContext filter(Filter filter) {
+		queryCustomizer.filter(filter);
+		return this;
+	}
+
+
+//
+//	public <T> FromRangeContext<T> from(T from) {
+//		context.setFrom( from );
+//		return new ConnectedFromRangeContext<T>(this);
+//	}
+//
+//
+//
+//	SearchFactory getFactory() {
+//		return factory;
+//	}
+//
+//	Analyzer getQueryAnalyzer() {
+//		return queryAnalyzer;
+//	}
+//
+//	QueryCustomizer getQueryCustomizer() {
+//		return queryCustomizer;
+//	}
+//
+//	static class ConnectedFromRangeContext<T> implements FromRangeContext<T> {
+//		private ConnectedRangeContext mother;
+//
+//		public ConnectedFromRangeContext(ConnectedRangeContext mother) {
+//			this.mother = mother;
+//		}
+//
+//		public ToRangeContext to(Object to) {
+//			mother.getContext().setTo( to );
+//			return new ConnectedToRangeContext(mother);
+//		}
+//
+//		public FromRangeContext<T> exclude() {
+//			mother.getContext().setExcludeFrom( true );
+//			return this;
+//		}
+//
+//		public FromRangeContext<T> boostedTo(float boost) {
+//			mother.boostedTo( boost );
+//			return this;
+//		}
+//
+//		public FromRangeContext<T> constantScore() {
+//			mother.constantScore();
+//			return this;
+//		}
+//
+//		public FromRangeContext<T> filter(Filter filter) {
+//			mother.filter( filter );
+//			return this;
+//		}
+//	}
+//
+//	static class ConnectedToRangeContext implements ToRangeContext {
+//		private ConnectedRangeContext mother;
+//
+//		public ConnectedToRangeContext(ConnectedRangeContext mother) {
+//			this.mother = mother;
+//		}
+//
+//		public TermMatchingContext onField(String field) {
+//			return new ConnectedTermMatchingContext(
+//					mother.getContext(),
+//					field,
+//					mother.getQueryCustomizer(),
+//					mother.getQueryAnalyzer(),
+//					mother.getFactory()
+//			);
+//		}
+//
+//		public ToRangeContext exclude() {
+//			mother.getContext().setExcludeTo( true );
+//			return this;
+//		}
+//
+//		public ToRangeContext boostedTo(float boost) {
+//			mother.boostedTo( boost );
+//			return this;
+//		}
+//
+//		public ToRangeContext constantScore() {
+//			mother.constantScore();
+//			return this;
+//		}
+//
+//		public ToRangeContext filter(Filter filter) {
+//			mother.filter( filter );
+//			return this;
+//		}
+//	}
+
+
+}
\ No newline at end of file

Added: search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/impl/ConnectedPhraseMatchingContext.java
===================================================================
--- search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/impl/ConnectedPhraseMatchingContext.java	                        (rev 0)
+++ search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/impl/ConnectedPhraseMatchingContext.java	2010-05-28 16:44:27 UTC (rev 19629)
@@ -0,0 +1,68 @@
+package org.hibernate.search.query.dsl.v2.impl;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.analysis.Analyzer;
+
+import org.hibernate.search.SearchFactory;
+import org.hibernate.search.query.dsl.v2.PhraseMatchingContext;
+import org.hibernate.search.query.dsl.v2.PhraseTermination;
+import org.hibernate.search.query.dsl.v2.RangeMatchingContext;
+import org.hibernate.search.query.dsl.v2.RangeTerminationExcludable;
+
+/**
+ * @author Emmanuel Bernard
+ */
+public class ConnectedPhraseMatchingContext implements PhraseMatchingContext {
+	private final SearchFactory factory;
+	private final Analyzer queryAnalyzer;
+	private final QueryCustomizer queryCustomizer;
+	private final PhraseQueryContext queryContext;
+	private final List<FieldContext> fieldContexts;
+	//when a varargs of fields are passed, apply the same customization for all.
+	//keep the index of the first context in this queue
+	private int firstOfContext = 0;
+
+	public ConnectedPhraseMatchingContext(String fieldName,
+											PhraseQueryContext queryContext,
+											QueryCustomizer queryCustomizer,
+											Analyzer queryAnalyzer,
+											SearchFactory factory) {
+		this.factory = factory;
+		this.queryAnalyzer = queryAnalyzer;
+		this.queryCustomizer = queryCustomizer;
+		this.queryContext = queryContext;
+		this.fieldContexts = new ArrayList<FieldContext>(4);
+		this.fieldContexts.add( new FieldContext( fieldName ) );
+	}
+
+	public PhraseMatchingContext andField(String field) {
+		this.fieldContexts.add( new FieldContext( field ) );
+		this.firstOfContext = fieldContexts.size() - 1;
+		return this;
+	}
+
+	public PhraseTermination sentence(String sentence) {
+		queryContext.setSentence(sentence);
+		return new ConnectedMultiFieldsPhraseQueryBuilder(queryContext, queryAnalyzer, queryCustomizer, fieldContexts);
+	}
+
+	public PhraseMatchingContext boostedTo(float boost) {
+		for ( FieldContext fieldContext : getCurrentFieldContexts() ) {
+			fieldContext.getFieldCustomizer().boostedTo( boost );
+		}
+		return this;
+	}
+
+	private List<FieldContext> getCurrentFieldContexts() {
+		return fieldContexts.subList( firstOfContext, fieldContexts.size() );
+	}
+
+	public PhraseMatchingContext ignoreAnalyzer() {
+		for ( FieldContext fieldContext : getCurrentFieldContexts() ) {
+			fieldContext.setIgnoreAnalyzer( true );
+		}
+		return this;
+	}
+}
\ No newline at end of file

Modified: search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/impl/ConnectedQueryBuilder.java
===================================================================
--- search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/impl/ConnectedQueryBuilder.java	2010-05-28 16:43:05 UTC (rev 19628)
+++ search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/impl/ConnectedQueryBuilder.java	2010-05-28 16:44:27 UTC (rev 19629)
@@ -6,6 +6,7 @@
 import org.hibernate.search.query.dsl.v2.AllContext;
 import org.hibernate.search.query.dsl.v2.BooleanJunction;
 import org.hibernate.search.query.dsl.v2.FuzzyContext;
+import org.hibernate.search.query.dsl.v2.PhraseContext;
 import org.hibernate.search.query.dsl.v2.QueryBuilder;
 import org.hibernate.search.query.dsl.v2.RangeContext;
 import org.hibernate.search.query.dsl.v2.TermContext;
@@ -41,6 +42,10 @@
 		return new ConnectedRangeContext( queryAnalyzer, factory );
 	}
 
+	public PhraseContext phrase() {
+		return new ConnectedPhraseContext( queryAnalyzer, factory );
+	}
+
 	//fixme Have to use raw types but would be nice to not have to
 	public BooleanJunction bool() {
 		return new BooleanQueryBuilder();

Added: search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/impl/PhraseQueryContext.java
===================================================================
--- search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/impl/PhraseQueryContext.java	                        (rev 0)
+++ search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/impl/PhraseQueryContext.java	2010-05-28 16:44:27 UTC (rev 19629)
@@ -0,0 +1,25 @@
+package org.hibernate.search.query.dsl.v2.impl;
+
+/**
+ * @author Emmanuel Bernard
+ */
+public class PhraseQueryContext {
+	private int slop = 0;
+	private String sentence;
+
+	public int getSlop() {
+		return slop;
+	}
+
+	public void setSlop(int slop) {
+		this.slop = slop;
+	}
+
+	public String getSentence() {
+		return sentence;
+	}
+
+	public void setSentence(String sentence) {
+		this.sentence = sentence;
+	}
+}

Modified: search/trunk/hibernate-search/src/test/java/org/hibernate/search/test/query/dsl/DSLTest.java
===================================================================
--- search/trunk/hibernate-search/src/test/java/org/hibernate/search/test/query/dsl/DSLTest.java	2010-05-28 16:43:05 UTC (rev 19628)
+++ search/trunk/hibernate-search/src/test/java/org/hibernate/search/test/query/dsl/DSLTest.java	2010-05-28 16:44:27 UTC (rev 19629)
@@ -296,7 +296,55 @@
 		cleanData( fts );
 	}
 
+	public void testPhraseQuery() throws Exception {
+		FullTextSession fts = initData();
 
+		Transaction transaction = fts.beginTransaction();
+		final QueryBuilder monthQb = fts.getSearchFactory()
+				.buildQueryBuilder().forEntity( Month.class ).get();
+
+		Query
+
+		query = monthQb.
+				phrase()
+					.onField( "mythology" )
+					.sentence( "colder and whitening" )
+					.createQuery();
+
+		assertEquals( 1, fts.createFullTextQuery( query, Month.class ).getResultSize() );
+
+		query = monthQb.
+				phrase()
+					.onField( "mythology" )
+					.sentence( "Month whitening" )
+					.createQuery();
+
+		assertEquals( 0, fts.createFullTextQuery( query, Month.class ).getResultSize() );
+
+		query = monthQb.
+				phrase()
+					.slop( 1 )
+					.onField( "mythology" )
+					.sentence( "Month whitening" )
+					.createQuery();
+
+		assertEquals( 1, fts.createFullTextQuery( query, Month.class ).getResultSize() );
+
+		//Does not work as the NGram filter does not seem to be skipping posiional increment between ngrams.
+//		query = monthQb
+//				.phrase()
+//					.onField( "mythology_ngram" )
+//					.sentence( "snobored" )
+//					.createQuery();
+//
+//		assertEquals( 1, fts.createFullTextQuery( query, Month.class ).getResultSize() );
+
+		transaction.commit();
+
+		cleanData( fts );
+	}
+
+
 //	public void testTermQueryOnAnalyzer() throws Exception {
 //		FullTextSession fts = initData();
 //



More information about the hibernate-commits mailing list