Author: epbernard
Date: 2010-05-28 12:44:27 -0400 (Fri, 28 May 2010)
New Revision: 19629
Added:
search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/PhraseContext.java
search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/PhraseMatchingContext.java
search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/PhraseTermination.java
search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/impl/ConnectedMultiFieldsPhraseQueryBuilder.java
search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/impl/ConnectedPhraseContext.java
search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/impl/ConnectedPhraseMatchingContext.java
search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/impl/PhraseQueryContext.java
Modified:
search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/QueryBuilder.java
search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/impl/ConnectedQueryBuilder.java
search/trunk/hibernate-search/src/test/java/org/hibernate/search/test/query/dsl/DSLTest.java
Log:
HSEARCH-414 Add support for phrase query
Support phrase queries by applying the analyzer on a sentence
Still to do is the ability to add words (terms really)
Added:
search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/PhraseContext.java
===================================================================
---
search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/PhraseContext.java
(rev 0)
+++
search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/PhraseContext.java 2010-05-28
16:44:27 UTC (rev 19629)
@@ -0,0 +1,20 @@
+package org.hibernate.search.query.dsl.v2;
+
+/**
+ * @author Emmanuel Bernard
+ */
+public interface PhraseContext extends QueryCustomization<PhraseContext> {
+ /**
+ * Sets the number of other words permitted between words in query phrase.
+ * If zero, then this is an exact phrase search. For larger values this works
+ * like a <code>WITHIN</code> or <code>NEAR</code> operator.
+ *
+ * Defaults to 0
+ */
+ PhraseContext slop(int slop);
+
+ /**
+ * field / property the term query is executed on
+ */
+ PhraseMatchingContext onField(String fieldName);
+}
Added:
search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/PhraseMatchingContext.java
===================================================================
---
search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/PhraseMatchingContext.java
(rev 0)
+++
search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/PhraseMatchingContext.java 2010-05-28
16:44:27 UTC (rev 19629)
@@ -0,0 +1,16 @@
+package org.hibernate.search.query.dsl.v2;
+
+/**
+ * @author Emmanuel Bernard
+ */
+public interface PhraseMatchingContext extends
FieldCustomization<PhraseMatchingContext> {
+ /**
+ * field / property the term query is executed on
+ */
+ PhraseMatchingContext andField(String field);
+
+ /**
+ * Sentence to match. It will be processed by the analyzer
+ */
+ PhraseTermination sentence(String sentence);
+}
Added:
search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/PhraseTermination.java
===================================================================
---
search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/PhraseTermination.java
(rev 0)
+++
search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/PhraseTermination.java 2010-05-28
16:44:27 UTC (rev 19629)
@@ -0,0 +1,7 @@
+package org.hibernate.search.query.dsl.v2;
+
+/**
+ * @author Emmanuel Bernard
+ */
+public interface PhraseTermination extends Termination<PhraseTermination> {
+}
Modified:
search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/QueryBuilder.java
===================================================================
---
search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/QueryBuilder.java 2010-05-28
16:43:05 UTC (rev 19628)
+++
search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/QueryBuilder.java 2010-05-28
16:44:27 UTC (rev 19629)
@@ -36,6 +36,11 @@
RangeContext range();
/**
+ * find an sentence (words can be inversed according to the slop factor
+ */
+ PhraseContext phrase();
+
+ /**
* Query matching all documents
* Typically mixed with a boolean query.
*/
Added:
search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/impl/ConnectedMultiFieldsPhraseQueryBuilder.java
===================================================================
---
search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/impl/ConnectedMultiFieldsPhraseQueryBuilder.java
(rev 0)
+++
search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/impl/ConnectedMultiFieldsPhraseQueryBuilder.java 2010-05-28
16:44:27 UTC (rev 19629)
@@ -0,0 +1,150 @@
+package org.hibernate.search.query.dsl.v2.impl;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.MultiPhraseQuery;
+import org.apache.lucene.search.PhraseQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermRangeQuery;
+
+import org.hibernate.annotations.common.AssertionFailure;
+import org.hibernate.search.SearchException;
+import org.hibernate.search.query.dsl.v2.PhraseTermination;
+import org.hibernate.search.query.dsl.v2.RangeTerminationExcludable;
+
+/**
+ * @author Emmanuel Bernard
+ */
+public class ConnectedMultiFieldsPhraseQueryBuilder implements PhraseTermination {
+ private final PhraseQueryContext queryContext;
+ private final Analyzer queryAnalyzer;
+ private final QueryCustomizer queryCustomizer;
+ private final List<FieldContext> fieldContexts;
+
+ public ConnectedMultiFieldsPhraseQueryBuilder(PhraseQueryContext queryContext, Analyzer
queryAnalyzer, QueryCustomizer queryCustomizer, List<FieldContext> fieldContexts) {
+ this.queryContext = queryContext;
+ this.queryAnalyzer = queryAnalyzer;
+ this.queryCustomizer = queryCustomizer;
+ this.fieldContexts = fieldContexts;
+ }
+
+ public Query createQuery() {
+ final int size = fieldContexts.size();
+ if ( size == 1 ) {
+ return queryCustomizer.setWrappedQuery( createQuery( fieldContexts.get( 0 ) )
).createQuery();
+ }
+ else {
+ BooleanQuery aggregatedFieldsQuery = new BooleanQuery( );
+ for ( FieldContext fieldContext : fieldContexts ) {
+ aggregatedFieldsQuery.add( createQuery( fieldContext ), BooleanClause.Occur.SHOULD
);
+ }
+ return queryCustomizer.setWrappedQuery( aggregatedFieldsQuery ).createQuery();
+ }
+ }
+
+ public Query createQuery(FieldContext fieldContext) {
+ final Query perFieldQuery;
+ final String fieldName = fieldContext.getField();
+
+ /*
+ * Store terms per position and detect if for a given position more than one term is
present
+ */
+ TokenStream stream = null;
+ boolean isMultiPhrase = false;
+ Map<Integer, List<Term>> termsPerPosition = new HashMap<Integer,
List<Term>>();
+ final String sentence = queryContext.getSentence();
+ try {
+ Reader reader = new StringReader( sentence );
+ stream = queryAnalyzer.reusableTokenStream( fieldName, reader);
+
+ TermAttribute termAttribute = (TermAttribute) stream.addAttribute( TermAttribute.class
);
+ PositionIncrementAttribute positionAttribute = (PositionIncrementAttribute)
stream.addAttribute( PositionIncrementAttribute.class );
+
+ stream.reset();
+ int position = -1; //start at -1 since we apply at least one increment
+ List<Term> termsAtSamePosition = null;
+ while ( stream.incrementToken() ) {
+ int positionIncrement = 1;
+ if ( positionAttribute != null ) {
+ positionIncrement = positionAttribute.getPositionIncrement();
+ }
+
+ if ( positionIncrement > 0 ) {
+ position+=positionIncrement;
+ termsAtSamePosition = termsPerPosition.get(position);
+ }
+
+ if (termsAtSamePosition == null) {
+ termsAtSamePosition = new ArrayList<Term>();
+ termsPerPosition.put( position, termsAtSamePosition );
+ }
+
+ termsAtSamePosition.add( new Term( fieldName, termAttribute.term() ) );
+ if ( termsAtSamePosition.size() > 1 ) {
+ isMultiPhrase = true;
+ }
+ }
+ }
+ catch ( IOException e ) {
+ throw new AssertionFailure( "IOException while reading a string. Doh!", e);
+ }
+ finally {
+ if ( stream != null ) {
+ try {
+ stream.end();
+ stream.close();
+ }
+ catch ( IOException e ) {
+ throw new AssertionFailure( "IOException while reading a string. Doh!",
e);
+ }
+ }
+ }
+
+ /*
+ * Create the appropriate query depending on the conditions
+ * note that a MultiPhraseQuery is needed if several terms share the same position
+ * as it will do a OR and not a AND like PhraseQuery
+ */
+ final int size = termsPerPosition.size();
+ if ( size == 0 ) {
+ throw new SearchException( "phrase query returns no term. Is there a problem with
your analyzers? " + sentence);
+ }
+ //TODO
+ //if ( size == 1 ) {
+ //optimization
+ //}
+ //else {
+ if (isMultiPhrase) {
+ MultiPhraseQuery query = new MultiPhraseQuery();
+ query.setSlop( queryContext.getSlop() );
+ for ( Map.Entry<Integer,List<Term>> entry : termsPerPosition.entrySet() )
{
+ final List<Term> value = entry.getValue();
+ query.add( value.toArray( new Term[value.size()] ), entry.getKey() );
+ }
+ perFieldQuery = query;
+ }
+ else {
+ PhraseQuery query = new PhraseQuery();
+ query.setSlop( queryContext.getSlop() );
+ for ( Map.Entry<Integer,List<Term>> entry : termsPerPosition.entrySet() )
{
+ final List<Term> value = entry.getValue();
+ query.add( value.get(0), entry.getKey() );
+ }
+ perFieldQuery = query;
+ }
+ return fieldContext.getFieldCustomizer().setWrappedQuery( perFieldQuery
).createQuery();
+ }
+}
\ No newline at end of file
Added:
search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/impl/ConnectedPhraseContext.java
===================================================================
---
search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/impl/ConnectedPhraseContext.java
(rev 0)
+++
search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/impl/ConnectedPhraseContext.java 2010-05-28
16:44:27 UTC (rev 19629)
@@ -0,0 +1,146 @@
+package org.hibernate.search.query.dsl.v2.impl;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.search.Filter;
+
+import org.hibernate.search.SearchFactory;
+import org.hibernate.search.query.dsl.v2.PhraseContext;
+import org.hibernate.search.query.dsl.v2.PhraseMatchingContext;
+import org.hibernate.search.query.dsl.v2.RangeContext;
+import org.hibernate.search.query.dsl.v2.RangeMatchingContext;
+
+/**
+ * @author Emmanuel Bernard
+ */
+class ConnectedPhraseContext implements PhraseContext {
+ private final SearchFactory factory;
+ private final Analyzer queryAnalyzer;
+ private final QueryCustomizer queryCustomizer;
+ private final PhraseQueryContext queryContext;
+
+
+ public ConnectedPhraseContext(Analyzer queryAnalyzer, SearchFactory factory) {
+ this.factory = factory;
+ this.queryAnalyzer = queryAnalyzer;
+ this.queryCustomizer = new QueryCustomizer();
+ this.queryContext = new PhraseQueryContext();
+ }
+
+ public PhraseContext slop(int slop) {
+ queryContext.setSlop( slop );
+ return this;
+ }
+
+ public PhraseMatchingContext onField(String fieldName) {
+ return new ConnectedPhraseMatchingContext(fieldName, queryContext, queryCustomizer,
queryAnalyzer, factory);
+ }
+
+ public PhraseContext boostedTo(float boost) {
+ queryCustomizer.boostedTo( boost );
+ return this;
+ }
+
+ public PhraseContext constantScore() {
+ queryCustomizer.constantScore();
+ return this;
+ }
+
+ public PhraseContext filter(Filter filter) {
+ queryCustomizer.filter(filter);
+ return this;
+ }
+
+
+//
+// public <T> FromRangeContext<T> from(T from) {
+// context.setFrom( from );
+// return new ConnectedFromRangeContext<T>(this);
+// }
+//
+//
+//
+// SearchFactory getFactory() {
+// return factory;
+// }
+//
+// Analyzer getQueryAnalyzer() {
+// return queryAnalyzer;
+// }
+//
+// QueryCustomizer getQueryCustomizer() {
+// return queryCustomizer;
+// }
+//
+// static class ConnectedFromRangeContext<T> implements FromRangeContext<T>
{
+// private ConnectedRangeContext mother;
+//
+// public ConnectedFromRangeContext(ConnectedRangeContext mother) {
+// this.mother = mother;
+// }
+//
+// public ToRangeContext to(Object to) {
+// mother.getContext().setTo( to );
+// return new ConnectedToRangeContext(mother);
+// }
+//
+// public FromRangeContext<T> exclude() {
+// mother.getContext().setExcludeFrom( true );
+// return this;
+// }
+//
+// public FromRangeContext<T> boostedTo(float boost) {
+// mother.boostedTo( boost );
+// return this;
+// }
+//
+// public FromRangeContext<T> constantScore() {
+// mother.constantScore();
+// return this;
+// }
+//
+// public FromRangeContext<T> filter(Filter filter) {
+// mother.filter( filter );
+// return this;
+// }
+// }
+//
+// static class ConnectedToRangeContext implements ToRangeContext {
+// private ConnectedRangeContext mother;
+//
+// public ConnectedToRangeContext(ConnectedRangeContext mother) {
+// this.mother = mother;
+// }
+//
+// public TermMatchingContext onField(String field) {
+// return new ConnectedTermMatchingContext(
+// mother.getContext(),
+// field,
+// mother.getQueryCustomizer(),
+// mother.getQueryAnalyzer(),
+// mother.getFactory()
+// );
+// }
+//
+// public ToRangeContext exclude() {
+// mother.getContext().setExcludeTo( true );
+// return this;
+// }
+//
+// public ToRangeContext boostedTo(float boost) {
+// mother.boostedTo( boost );
+// return this;
+// }
+//
+// public ToRangeContext constantScore() {
+// mother.constantScore();
+// return this;
+// }
+//
+// public ToRangeContext filter(Filter filter) {
+// mother.filter( filter );
+// return this;
+// }
+// }
+
+
+}
\ No newline at end of file
Added:
search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/impl/ConnectedPhraseMatchingContext.java
===================================================================
---
search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/impl/ConnectedPhraseMatchingContext.java
(rev 0)
+++
search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/impl/ConnectedPhraseMatchingContext.java 2010-05-28
16:44:27 UTC (rev 19629)
@@ -0,0 +1,68 @@
+package org.hibernate.search.query.dsl.v2.impl;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.analysis.Analyzer;
+
+import org.hibernate.search.SearchFactory;
+import org.hibernate.search.query.dsl.v2.PhraseMatchingContext;
+import org.hibernate.search.query.dsl.v2.PhraseTermination;
+import org.hibernate.search.query.dsl.v2.RangeMatchingContext;
+import org.hibernate.search.query.dsl.v2.RangeTerminationExcludable;
+
+/**
+ * @author Emmanuel Bernard
+ */
+public class ConnectedPhraseMatchingContext implements PhraseMatchingContext {
+ private final SearchFactory factory;
+ private final Analyzer queryAnalyzer;
+ private final QueryCustomizer queryCustomizer;
+ private final PhraseQueryContext queryContext;
+ private final List<FieldContext> fieldContexts;
+ //when a varargs of fields are passed, apply the same customization for all.
+ //keep the index of the first context in this queue
+ private int firstOfContext = 0;
+
+ public ConnectedPhraseMatchingContext(String fieldName,
+ PhraseQueryContext queryContext,
+ QueryCustomizer queryCustomizer,
+ Analyzer queryAnalyzer,
+ SearchFactory factory) {
+ this.factory = factory;
+ this.queryAnalyzer = queryAnalyzer;
+ this.queryCustomizer = queryCustomizer;
+ this.queryContext = queryContext;
+ this.fieldContexts = new ArrayList<FieldContext>(4);
+ this.fieldContexts.add( new FieldContext( fieldName ) );
+ }
+
+ public PhraseMatchingContext andField(String field) {
+ this.fieldContexts.add( new FieldContext( field ) );
+ this.firstOfContext = fieldContexts.size() - 1;
+ return this;
+ }
+
+ public PhraseTermination sentence(String sentence) {
+ queryContext.setSentence(sentence);
+ return new ConnectedMultiFieldsPhraseQueryBuilder(queryContext, queryAnalyzer,
queryCustomizer, fieldContexts);
+ }
+
+ public PhraseMatchingContext boostedTo(float boost) {
+ for ( FieldContext fieldContext : getCurrentFieldContexts() ) {
+ fieldContext.getFieldCustomizer().boostedTo( boost );
+ }
+ return this;
+ }
+
+ private List<FieldContext> getCurrentFieldContexts() {
+ return fieldContexts.subList( firstOfContext, fieldContexts.size() );
+ }
+
+ public PhraseMatchingContext ignoreAnalyzer() {
+ for ( FieldContext fieldContext : getCurrentFieldContexts() ) {
+ fieldContext.setIgnoreAnalyzer( true );
+ }
+ return this;
+ }
+}
\ No newline at end of file
Modified:
search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/impl/ConnectedQueryBuilder.java
===================================================================
---
search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/impl/ConnectedQueryBuilder.java 2010-05-28
16:43:05 UTC (rev 19628)
+++
search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/impl/ConnectedQueryBuilder.java 2010-05-28
16:44:27 UTC (rev 19629)
@@ -6,6 +6,7 @@
import org.hibernate.search.query.dsl.v2.AllContext;
import org.hibernate.search.query.dsl.v2.BooleanJunction;
import org.hibernate.search.query.dsl.v2.FuzzyContext;
+import org.hibernate.search.query.dsl.v2.PhraseContext;
import org.hibernate.search.query.dsl.v2.QueryBuilder;
import org.hibernate.search.query.dsl.v2.RangeContext;
import org.hibernate.search.query.dsl.v2.TermContext;
@@ -41,6 +42,10 @@
return new ConnectedRangeContext( queryAnalyzer, factory );
}
+ public PhraseContext phrase() {
+ return new ConnectedPhraseContext( queryAnalyzer, factory );
+ }
+
//fixme Have to use raw types but would be nice to not have to
public BooleanJunction bool() {
return new BooleanQueryBuilder();
Added:
search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/impl/PhraseQueryContext.java
===================================================================
---
search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/impl/PhraseQueryContext.java
(rev 0)
+++
search/trunk/hibernate-search/src/main/java/org/hibernate/search/query/dsl/v2/impl/PhraseQueryContext.java 2010-05-28
16:44:27 UTC (rev 19629)
@@ -0,0 +1,25 @@
+package org.hibernate.search.query.dsl.v2.impl;
+
+/**
+ * @author Emmanuel Bernard
+ */
+public class PhraseQueryContext {
+ private int slop = 0;
+ private String sentence;
+
+ public int getSlop() {
+ return slop;
+ }
+
+ public void setSlop(int slop) {
+ this.slop = slop;
+ }
+
+ public String getSentence() {
+ return sentence;
+ }
+
+ public void setSentence(String sentence) {
+ this.sentence = sentence;
+ }
+}
Modified:
search/trunk/hibernate-search/src/test/java/org/hibernate/search/test/query/dsl/DSLTest.java
===================================================================
---
search/trunk/hibernate-search/src/test/java/org/hibernate/search/test/query/dsl/DSLTest.java 2010-05-28
16:43:05 UTC (rev 19628)
+++
search/trunk/hibernate-search/src/test/java/org/hibernate/search/test/query/dsl/DSLTest.java 2010-05-28
16:44:27 UTC (rev 19629)
@@ -296,7 +296,55 @@
cleanData( fts );
}
+ public void testPhraseQuery() throws Exception {
+ FullTextSession fts = initData();
+ Transaction transaction = fts.beginTransaction();
+ final QueryBuilder monthQb = fts.getSearchFactory()
+ .buildQueryBuilder().forEntity( Month.class ).get();
+
+ Query
+
+ query = monthQb.
+ phrase()
+ .onField( "mythology" )
+ .sentence( "colder and whitening" )
+ .createQuery();
+
+ assertEquals( 1, fts.createFullTextQuery( query, Month.class ).getResultSize() );
+
+ query = monthQb.
+ phrase()
+ .onField( "mythology" )
+ .sentence( "Month whitening" )
+ .createQuery();
+
+ assertEquals( 0, fts.createFullTextQuery( query, Month.class ).getResultSize() );
+
+ query = monthQb.
+ phrase()
+ .slop( 1 )
+ .onField( "mythology" )
+ .sentence( "Month whitening" )
+ .createQuery();
+
+ assertEquals( 1, fts.createFullTextQuery( query, Month.class ).getResultSize() );
+
+ //Does not work as the NGram filter does not seem to be skipping posiional increment
between ngrams.
+// query = monthQb
+// .phrase()
+// .onField( "mythology_ngram" )
+// .sentence( "snobored" )
+// .createQuery();
+//
+// assertEquals( 1, fts.createFullTextQuery( query, Month.class ).getResultSize() );
+
+ transaction.commit();
+
+ cleanData( fts );
+ }
+
+
// public void testTermQueryOnAnalyzer() throws Exception {
// FullTextSession fts = initData();
//