Author: sannegrinovero
Date: 2008-11-29 08:10:14 -0500 (Sat, 29 Nov 2008)
New Revision: 15627
Added:
search/trunk/src/java/org/hibernate/search/filter/FilterOptimizationHelper.java
search/trunk/src/test/org/hibernate/search/test/filter/FiltersOptimizationTest.java
Modified:
search/trunk/src/java/org/hibernate/search/filter/ChainedFilter.java
Log:
HSEARCH-299 use of bit operations when possible to chain Filters
Modified: search/trunk/src/java/org/hibernate/search/filter/ChainedFilter.java
===================================================================
--- search/trunk/src/java/org/hibernate/search/filter/ChainedFilter.java 2008-11-27
17:26:25 UTC (rev 15626)
+++ search/trunk/src/java/org/hibernate/search/filter/ChainedFilter.java 2008-11-29
13:10:14 UTC (rev 15627)
@@ -12,7 +12,13 @@
import org.hibernate.annotations.common.AssertionFailure;
/**
+ * <p>A Filter capable of chaining other filters, so that it's
+ * possible to apply several filters on a Query.</p>
+ * <p>The resulting filter will only enable result Documents
+ * if no filter removed it.</p>
+ *
* @author Emmanuel Bernard
+ * @author Sanne Grinovero
*/
public class ChainedFilter extends Filter {
@@ -26,16 +32,6 @@
public BitSet bits(IndexReader reader) throws IOException {
throw new UnsupportedOperationException();
- /*
- if (chainedFilters.size() == 0) throw new AssertionFailure("Chainedfilter has no
filters to chain for");
- //we need to copy the first BitSet because BitSet is modified by .logicalOp
- Filter filter = chainedFilters.get( 0 );
- BitSet result = (BitSet) filter.bits( reader ).clone();
- for (int index = 1 ; index < chainedFilters.size() ; index++) {
- result.and( chainedFilters.get( index ).bits( reader ) );
- }
- return result;
- */
}
@Override
@@ -45,21 +41,25 @@
throw new AssertionFailure( "Chainedfilter has no filters to chain for" );
}
else if ( size == 1 ) {
- return chainedFilters.get(0).getDocIdSet(reader);
+ return chainedFilters.get( 0 ).getDocIdSet( reader );
}
else {
List<DocIdSet> subSets = new ArrayList<DocIdSet>( size );
for ( Filter f : chainedFilters ) {
subSets.add( f.getDocIdSet( reader ) );
}
+ subSets = FilterOptimizationHelper.mergeByBitAnds( subSets );
+ if ( subSets.size() == 1 ) {
+ return subSets.get( 0 );
+ }
return new AndDocIdSet( subSets, reader.maxDoc() );
}
}
public String toString() {
- StringBuilder sb = new StringBuilder("ChainedFilter [");
+ StringBuilder sb = new StringBuilder( "ChainedFilter [" );
for (Filter filter : chainedFilters) {
- sb.append( "\n ").append( filter.toString() );
+ sb.append( "\n " ).append( filter.toString() );
}
return sb.append("\n]" ).toString();
}
Added: search/trunk/src/java/org/hibernate/search/filter/FilterOptimizationHelper.java
===================================================================
--- search/trunk/src/java/org/hibernate/search/filter/FilterOptimizationHelper.java
(rev 0)
+++
search/trunk/src/java/org/hibernate/search/filter/FilterOptimizationHelper.java 2008-11-29
13:10:14 UTC (rev 15627)
@@ -0,0 +1,99 @@
+// $Id$
+package org.hibernate.search.filter;
+
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.List;
+
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.util.DocIdBitSet;
+import org.apache.lucene.util.OpenBitSet;
+
+/**
+ * Helper class to apply some common optimizations when
+ * several Filters are applied.
+ *
+ * @author Sanne Grinovero
+ */
+public class FilterOptimizationHelper {
+
+ /**
+ * Returns a new list of DocIdSet, applying binary AND
+ * on all DocIdSet implemented by using BitSet or OpenBitSet.
+ * @param docIdSets
+ * @return the same list if not change was done
+ */
+ public static List<DocIdSet> mergeByBitAnds(List<DocIdSet> docIdSets) {
+ int size = docIdSets.size();
+ List<OpenBitSet> openBitSets = new ArrayList<OpenBitSet>( size );
+ List<DocIdBitSet> docIdBitSets = new ArrayList<DocIdBitSet>( size );
+ List<DocIdSet> nonMergeAble = new ArrayList<DocIdSet>( size );
+ for (DocIdSet set : docIdSets) {
+ if (set instanceof OpenBitSet) {
+ openBitSets.add( (OpenBitSet) set );
+ }
+ else if (set instanceof DocIdBitSet) {
+ docIdBitSets.add( (DocIdBitSet) set );
+ }
+ else {
+ nonMergeAble.add( set );
+ }
+ }
+ if ( openBitSets.size() <= 1 && docIdBitSets.size() <= 1 ) {
+ //skip all work as no optimization is possible
+ return docIdSets;
+ }
+ if ( openBitSets.size() > 0 ) {
+ nonMergeAble.add( mergeByBitAnds( openBitSets ) );
+ }
+ if ( docIdBitSets.size() > 0 ) {
+ nonMergeAble.add( mergeByBitAnds( docIdBitSets ) );
+ }
+ return nonMergeAble;
+ }
+
+ /**
+ * Merges all DocIdBitSet in a new DocIdBitSet using
+ * binary AND operations, which is usually more efficient
+ * than using an iterator.
+ * @param docIdBitSets
+ * @return a new DocIdBitSet, or the first element if only
+ * one element was found in the list.
+ */
+ public static DocIdBitSet mergeByBitAnds(List<DocIdBitSet> docIdBitSets) {
+ int listSize = docIdBitSets.size();
+ if ( listSize == 1 ) {
+ return docIdBitSets.get( 0 );
+ }
+ //we need to copy the first BitSet because BitSet is modified by .logicalOp
+ BitSet result = (BitSet) docIdBitSets.get( 0 ).getBitSet().clone();
+ for ( int i=1; i<listSize; i++ ) {
+ BitSet bitSet = docIdBitSets.get( i ).getBitSet();
+ result.and( bitSet );
+ }
+ return new DocIdBitSet( result );
+ }
+
+ /**
+ * Merges all OpenBitSet in a new OpenBitSet using
+ * binary AND operations, which is usually more efficient
+ * than using an iterator.
+ * @param openBitSets
+ * @return a new OpenBitSet, or the first element if only
+ * one element was found in the list.
+ */
+ public static OpenBitSet mergeByBitAnds(List<OpenBitSet> openBitSets) {
+ int listSize = openBitSets.size();
+ if ( listSize == 1 ) {
+ return openBitSets.get( 0 );
+ }
+ //we need to copy the first OpenBitSet because BitSet is modified by .logicalOp
+ OpenBitSet result = (OpenBitSet) openBitSets.get( 0 ).clone();
+ for ( int i=1; i<listSize; i++ ) {
+ OpenBitSet openSet = openBitSets.get( i );
+ result.intersect( openSet );
+ }
+ return result;
+ }
+
+}
Property changes on:
search/trunk/src/java/org/hibernate/search/filter/FilterOptimizationHelper.java
___________________________________________________________________
Name: svn:executable
+ *
Name: svn:keywords
+ Id
Added:
search/trunk/src/test/org/hibernate/search/test/filter/FiltersOptimizationTest.java
===================================================================
--- search/trunk/src/test/org/hibernate/search/test/filter/FiltersOptimizationTest.java
(rev 0)
+++
search/trunk/src/test/org/hibernate/search/test/filter/FiltersOptimizationTest.java 2008-11-29
13:10:14 UTC (rev 15627)
@@ -0,0 +1,180 @@
+package org.hibernate.search.test.filter;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.DocIdBitSet;
+import org.apache.lucene.util.OpenBitSet;
+import org.hibernate.search.filter.FilterOptimizationHelper;
+
+import junit.framework.TestCase;
+
+/**
+ * Used to test org.hibernate.search.filter.FiltersOptimizationHelper
+ * @see org.hibernate.search.filter.FilterOptimizationHelper
+ * @author Sanne Grinovero
+ */
+public class FiltersOptimizationTest extends TestCase {
+
+ /**
+ * in some cases optimizations are not possible,
+ * test that mergeByBitAnds returns the same instance
+ * in that case.
+ */
+ public void testSkipMerging() {
+ List<DocIdSet> dataIn = new ArrayList<DocIdSet>( 3 );
+ dataIn.add( makeOpenBitSetTestSet( 1,2,3,5,8,9,10,11 ) );
+ dataIn.add( makeBitSetTestSet( 1,2,3,5,8,9,10,11,20 ) );
+ dataIn.add( makeAnonymousTestSet( 1,2,3,5,8,9,10,11 ) );
+ dataIn.add( makeAnonymousTestSet( 1,2,3,5,8,9,10,11,12 ) );
+ List<DocIdSet> merge = FilterOptimizationHelper.mergeByBitAnds( dataIn );
+ assertSame( dataIn, merge );
+ }
+
+ /**
+ * In case two filters are of OpenBitSet implementation,
+ * they should be AND-ed by using bit operations
+ * (rather than build the iterator).
+ * @throws IOException should not be thrown
+ */
+ public void testDoMergingOnOpenBitSet() throws IOException {
+ List<DocIdSet> dataIn = new ArrayList<DocIdSet>( 3 );
+ dataIn.add( makeOpenBitSetTestSet( 1,2,5,8,9,10,11 ) );
+ dataIn.add( makeOpenBitSetTestSet( 1,2,3,5,8,11 ) );
+ DocIdSet unmergedSet = makeAnonymousTestSet( 1,2,3,5,8,9,10,11 );
+ dataIn.add( unmergedSet );
+ List<DocIdSet> merge = FilterOptimizationHelper.mergeByBitAnds( dataIn );
+ assertNotSame( dataIn, merge );
+
+ assertEquals( 2, merge.size() );
+ assertSame( unmergedSet, merge.get( 0 ) );
+ assertTrue( isIdSetSequenceSameTo( merge.get( 1 ), 1,2,5,8,11 ) );
+ }
+
+ /**
+ * In case two filters are of OpenBitSet implementation,
+ * they should be AND-ed by using bit operations
+ * (rather than build the iterator).
+ * @throws IOException should be thrown
+ */
+ public void testDoMergingOnJavaBitSet() throws IOException {
+ List<DocIdSet> dataIn = new ArrayList<DocIdSet>( 3 );
+ dataIn.add( makeBitSetTestSet( 1,2,5,8,9,10,11 ) );
+ dataIn.add( makeBitSetTestSet( 1,2,3,5,8,11 ) );
+ DocIdSet unmergedSet = makeAnonymousTestSet( 1,2,3,5,8,9,10,11 );
+ dataIn.add( unmergedSet );
+ List<DocIdSet> merge = FilterOptimizationHelper.mergeByBitAnds( dataIn );
+ assertNotSame( dataIn, merge );
+
+ assertEquals( 2, merge.size() );
+ assertSame( unmergedSet, merge.get( 0 ) );
+ assertTrue( isIdSetSequenceSameTo( merge.get( 1 ), 1,2,5,8,11 ) );
+ }
+
+ /**
+ * Used to this test the testcase's helper method isIdSetSequenceSameTo
+ * @throws IOException
+ */
+ public void testSelfIdSequenceTester() throws IOException {
+ assertTrue( isIdSetSequenceSameTo(
+ makeOpenBitSetTestSet( 1,2,3,5,8,11 ),
+ 1,2,3,5,8,11 ) );
+ assertFalse( isIdSetSequenceSameTo(
+ makeOpenBitSetTestSet( 1,2,3,5,8 ),
+ 1,2,3,5,8,11 ) );
+ assertFalse( isIdSetSequenceSameTo(
+ makeOpenBitSetTestSet( 1,2,3,5,8,11 ),
+ 1,2,3,5,8 ) );
+ }
+
+ /**
+ * Verifies if the docIdSet is representing a specific
+ * sequence of docIds.
+ * @param docIdSet the docIdSet to test
+ * @param expectedIds an array of document ids
+ * @return true if iterating on docIdSet returns the expectedIds
+ * @throws IOException should not happen
+ */
+ private boolean isIdSetSequenceSameTo(DocIdSet docIdSet, int...expectedIds) throws
IOException {
+ DocIdSetIterator idSetIterator = docIdSet.iterator();
+ for ( int setBit : expectedIds ) {
+ if ( ! idSetIterator.next() ) {
+ return false;
+ }
+ if ( idSetIterator.doc() != setBit ) {
+ return false;
+ }
+ }
+ if ( idSetIterator.next() ){
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * test helper, makes an implementation of a DocIdSet
+ * @param docIds the ids it should contain
+ * @return
+ */
+ private DocIdSet makeAnonymousTestSet(int... docIds) {
+ DocIdSet idSet = makeOpenBitSetTestSet( docIds );
+ return new DocIdSetHiddenType( idSet );
+ }
+
+ /**
+ * test helper, makes a prefilled OpenBitSet
+ * @param enabledBits the ids it should contain
+ * @return a new OpenBitSet
+ */
+ private OpenBitSet makeOpenBitSetTestSet(int... enabledBits) {
+ OpenBitSet set = new OpenBitSet();
+ for (int position : enabledBits ) {
+ // a minimal check for input duplicates:
+ assertFalse( set.get( position ) );
+ set.set( position );
+ }
+ return set;
+ }
+
+ /**
+ * test helper, makes a prefilled DocIdBitSet
+ * using the java.lang.BitSet
+ * @see java.lang.BitSet
+ * @param enabledBits the ids it should contain
+ * @return a ne DocIdBitSet
+ */
+ private DocIdBitSet makeBitSetTestSet(int... enabledBits) {
+ BitSet set = new BitSet();
+ for (int position : enabledBits ) {
+ // a minimal check for input duplicates:
+ assertFalse( set.get( position ) );
+ set.set( position );
+ }
+ return new DocIdBitSet( set );
+ }
+
+ /**
+ * Implementation for testing: wraps a DocIdSet with a new type
+ * to make it not possible to cast/detect to the original type.
+ */
+ private class DocIdSetHiddenType extends DocIdSet {
+
+ private final DocIdSet bitSet;
+
+ DocIdSetHiddenType(DocIdSet wrapped) {
+ this.bitSet = wrapped;
+ }
+
+ @Override
+ public DocIdSetIterator iterator() {
+ return bitSet.iterator();
+ }
+
+ }
+
+}
Property changes on:
search/trunk/src/test/org/hibernate/search/test/filter/FiltersOptimizationTest.java
___________________________________________________________________
Name: svn:executable
+ *