[hibernate-commits] Hibernate SVN: r15627 - in search/trunk/src: test/org/hibernate/search/test/filter and 1 other directory.

hibernate-commits at lists.jboss.org hibernate-commits at lists.jboss.org
Sat Nov 29 08:10:15 EST 2008


Author: sannegrinovero
Date: 2008-11-29 08:10:14 -0500 (Sat, 29 Nov 2008)
New Revision: 15627

Added:
   search/trunk/src/java/org/hibernate/search/filter/FilterOptimizationHelper.java
   search/trunk/src/test/org/hibernate/search/test/filter/FiltersOptimizationTest.java
Modified:
   search/trunk/src/java/org/hibernate/search/filter/ChainedFilter.java
Log:
HSEARCH-299 use of bit operations when possible to chain Filters

Modified: search/trunk/src/java/org/hibernate/search/filter/ChainedFilter.java
===================================================================
--- search/trunk/src/java/org/hibernate/search/filter/ChainedFilter.java	2008-11-27 17:26:25 UTC (rev 15626)
+++ search/trunk/src/java/org/hibernate/search/filter/ChainedFilter.java	2008-11-29 13:10:14 UTC (rev 15627)
@@ -12,7 +12,13 @@
 import org.hibernate.annotations.common.AssertionFailure;
 
 /**
+ * <p>A Filter capable of chaining other filters, so that it's
+ * possible to apply several filters on a Query.</p>
+ * <p>The resulting filter will only enable result Documents
+ * if no filter removed it.</p>
+ * 
  * @author Emmanuel Bernard
+ * @author Sanne Grinovero
  */
 public class ChainedFilter extends Filter {
 	
@@ -26,16 +32,6 @@
 
 	public BitSet bits(IndexReader reader) throws IOException {
 		throw new UnsupportedOperationException();
-		/*
-		if (chainedFilters.size() == 0) throw new AssertionFailure("Chainedfilter has no filters to chain for");
-		//we need to copy the first BitSet because BitSet is modified by .logicalOp
-		Filter filter = chainedFilters.get( 0 );
-		BitSet result = (BitSet) filter.bits( reader ).clone();
-		for (int index = 1 ; index < chainedFilters.size() ; index++) {
-			result.and( chainedFilters.get( index ).bits( reader ) );
-		}
-		return result;
-		*/
 	}
 	
 	@Override
@@ -45,21 +41,25 @@
 			throw new AssertionFailure( "Chainedfilter has no filters to chain for" );
 		}
 		else if ( size == 1 ) {
-			return chainedFilters.get(0).getDocIdSet(reader);
+			return chainedFilters.get( 0 ).getDocIdSet( reader );
 		}
 		else {
 			List<DocIdSet> subSets = new ArrayList<DocIdSet>( size );
 			for ( Filter f : chainedFilters ) {
 				subSets.add( f.getDocIdSet( reader ) );
 			}
+			subSets = FilterOptimizationHelper.mergeByBitAnds( subSets );
+			if ( subSets.size() == 1 ) {
+				return subSets.get( 0 );
+			}
 			return new AndDocIdSet( subSets, reader.maxDoc() );
 		}
 	}
 
 	public String toString() {
-		StringBuilder sb = new StringBuilder("ChainedFilter [");
+		StringBuilder sb = new StringBuilder( "ChainedFilter [" );
 		for (Filter filter : chainedFilters) {
-			sb.append( "\n  ").append( filter.toString() );
+			sb.append( "\n  " ).append( filter.toString() );
 		}
 		return sb.append("\n]" ).toString();
 	}

Added: search/trunk/src/java/org/hibernate/search/filter/FilterOptimizationHelper.java
===================================================================
--- search/trunk/src/java/org/hibernate/search/filter/FilterOptimizationHelper.java	                        (rev 0)
+++ search/trunk/src/java/org/hibernate/search/filter/FilterOptimizationHelper.java	2008-11-29 13:10:14 UTC (rev 15627)
@@ -0,0 +1,99 @@
+// $Id$
+package org.hibernate.search.filter;
+
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.List;
+
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.util.DocIdBitSet;
+import org.apache.lucene.util.OpenBitSet;
+
+/**
+ * Helper class to apply some common optimizations when
+ * several Filters are applied.
+ * 
+ * @author Sanne Grinovero
+ */
+public class FilterOptimizationHelper {
+
+	/**
+	 * Returns a new list of DocIdSet, applying binary AND
+	 * on all DocIdSet implemented by using BitSet or OpenBitSet.
+	 * @param docIdSets
+	 * @return the same list if not change was done
+	 */
+	public static List<DocIdSet> mergeByBitAnds(List<DocIdSet> docIdSets) {
+		int size = docIdSets.size();
+		List<OpenBitSet> openBitSets = new ArrayList<OpenBitSet>( size );
+		List<DocIdBitSet> docIdBitSets = new ArrayList<DocIdBitSet>( size );
+		List<DocIdSet> nonMergeAble = new ArrayList<DocIdSet>( size );
+		for (DocIdSet set : docIdSets) {
+			if (set instanceof OpenBitSet) {
+				openBitSets.add( (OpenBitSet) set );
+			}
+			else if (set instanceof DocIdBitSet) {
+				docIdBitSets.add( (DocIdBitSet) set );
+			}
+			else {
+				nonMergeAble.add( set );
+			}
+		}
+		if ( openBitSets.size() <= 1 && docIdBitSets.size() <= 1 ) {
+			//skip all work as no optimization is possible
+			return docIdSets;
+		}
+		if ( openBitSets.size() > 0 ) {
+			nonMergeAble.add( mergeByBitAnds( openBitSets ) );
+		}
+		if ( docIdBitSets.size() > 0 ) {
+			nonMergeAble.add( mergeByBitAnds( docIdBitSets ) );
+		}
+		return nonMergeAble;
+	}
+
+	/**
+	 * Merges all DocIdBitSet in a new DocIdBitSet using
+	 * binary AND operations, which is usually more efficient
+	 * than using an iterator.
+	 * @param docIdBitSets
+	 * @return a new DocIdBitSet, or the first element if only
+	 * one element was found in the list.
+	 */
+	public static DocIdBitSet mergeByBitAnds(List<DocIdBitSet> docIdBitSets) {
+		int listSize = docIdBitSets.size();
+		if ( listSize == 1 ) {
+			return docIdBitSets.get( 0 );
+		}
+		//we need to copy the first BitSet because BitSet is modified by .logicalOp
+		BitSet result = (BitSet) docIdBitSets.get( 0 ).getBitSet().clone();
+		for ( int i=1; i<listSize; i++ ) {
+			BitSet bitSet = docIdBitSets.get( i ).getBitSet();
+			result.and( bitSet );
+		}
+		return new DocIdBitSet( result );
+	}
+
+	/**
+	 * Merges all OpenBitSet in a new OpenBitSet using
+	 * binary AND operations, which is usually more efficient
+	 * than using an iterator.
+	 * @param openBitSets
+	 * @return a new OpenBitSet, or the first element if only
+	 * one element was found in the list.
+	 */
+	public static OpenBitSet mergeByBitAnds(List<OpenBitSet> openBitSets) {
+		int listSize = openBitSets.size();
+		if ( listSize == 1 ) {
+			return openBitSets.get( 0 );
+		}
+		//we need to copy the first OpenBitSet because BitSet is modified by .logicalOp
+		OpenBitSet result = (OpenBitSet) openBitSets.get( 0 ).clone();
+		for ( int i=1; i<listSize; i++ ) {
+			OpenBitSet openSet = openBitSets.get( i );
+			result.intersect( openSet );
+		}
+		return result;
+	}
+
+}


Property changes on: search/trunk/src/java/org/hibernate/search/filter/FilterOptimizationHelper.java
___________________________________________________________________
Name: svn:executable
   + *
Name: svn:keywords
   + Id

Added: search/trunk/src/test/org/hibernate/search/test/filter/FiltersOptimizationTest.java
===================================================================
--- search/trunk/src/test/org/hibernate/search/test/filter/FiltersOptimizationTest.java	                        (rev 0)
+++ search/trunk/src/test/org/hibernate/search/test/filter/FiltersOptimizationTest.java	2008-11-29 13:10:14 UTC (rev 15627)
@@ -0,0 +1,180 @@
+package org.hibernate.search.test.filter;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.DocIdBitSet;
+import org.apache.lucene.util.OpenBitSet;
+import org.hibernate.search.filter.FilterOptimizationHelper;
+
+import junit.framework.TestCase;
+
+/**
+ * Used to test org.hibernate.search.filter.FiltersOptimizationHelper
+ * @see org.hibernate.search.filter.FilterOptimizationHelper
+ * @author Sanne Grinovero
+ */
+public class FiltersOptimizationTest extends TestCase {
+	
+	/**
+	 * in some cases optimizations are not possible,
+	 * test that mergeByBitAnds returns the same instance
+	 * in that case.
+	 */
+	public void testSkipMerging() {
+		List<DocIdSet> dataIn = new ArrayList<DocIdSet>( 3 );
+		dataIn.add( makeOpenBitSetTestSet( 1,2,3,5,8,9,10,11 ) );
+		dataIn.add( makeBitSetTestSet( 1,2,3,5,8,9,10,11,20 ) );
+		dataIn.add( makeAnonymousTestSet( 1,2,3,5,8,9,10,11 ) );
+		dataIn.add( makeAnonymousTestSet( 1,2,3,5,8,9,10,11,12 ) );
+		List<DocIdSet> merge = FilterOptimizationHelper.mergeByBitAnds( dataIn );
+		assertSame( dataIn, merge );
+	}
+	
+	/**
+	 * In case two filters are of OpenBitSet implementation,
+	 * they should be AND-ed by using bit operations
+	 * (rather than build the iterator).
+	 * @throws IOException should not be thrown
+	 */
+	public void testDoMergingOnOpenBitSet() throws IOException {
+		List<DocIdSet> dataIn = new ArrayList<DocIdSet>( 3 );
+		dataIn.add( makeOpenBitSetTestSet( 1,2,5,8,9,10,11 ) );
+		dataIn.add( makeOpenBitSetTestSet( 1,2,3,5,8,11 ) );
+		DocIdSet unmergedSet = makeAnonymousTestSet( 1,2,3,5,8,9,10,11 );
+		dataIn.add( unmergedSet );
+		List<DocIdSet> merge = FilterOptimizationHelper.mergeByBitAnds( dataIn );
+		assertNotSame( dataIn, merge );
+		
+		assertEquals( 2, merge.size() );
+		assertSame( unmergedSet, merge.get( 0 ) );
+		assertTrue( isIdSetSequenceSameTo( merge.get( 1 ), 1,2,5,8,11 ) );
+	}
+	
+	/**
+	 * In case two filters are of OpenBitSet implementation,
+	 * they should be AND-ed by using bit operations
+	 * (rather than build the iterator).
+	 * @throws IOException should be thrown
+	 */
+	public void testDoMergingOnJavaBitSet() throws IOException {
+		List<DocIdSet> dataIn = new ArrayList<DocIdSet>( 3 );
+		dataIn.add( makeBitSetTestSet( 1,2,5,8,9,10,11 ) );
+		dataIn.add( makeBitSetTestSet( 1,2,3,5,8,11 ) );
+		DocIdSet unmergedSet = makeAnonymousTestSet( 1,2,3,5,8,9,10,11 );
+		dataIn.add( unmergedSet );
+		List<DocIdSet> merge = FilterOptimizationHelper.mergeByBitAnds( dataIn );
+		assertNotSame( dataIn, merge );
+		
+		assertEquals( 2, merge.size() );
+		assertSame( unmergedSet, merge.get( 0 ) );
+		assertTrue( isIdSetSequenceSameTo( merge.get( 1 ), 1,2,5,8,11 ) );
+	}
+	
+	/**
+	 * Used to this test the testcase's helper method isIdSetSequenceSameTo
+	 * @throws IOException
+	 */
+	public void testSelfIdSequenceTester() throws IOException {
+		assertTrue( isIdSetSequenceSameTo(
+				makeOpenBitSetTestSet( 1,2,3,5,8,11 ),
+				1,2,3,5,8,11 ) );
+		assertFalse( isIdSetSequenceSameTo(
+				makeOpenBitSetTestSet( 1,2,3,5,8 ),
+				1,2,3,5,8,11 ) );
+		assertFalse( isIdSetSequenceSameTo(
+				makeOpenBitSetTestSet( 1,2,3,5,8,11 ),
+				1,2,3,5,8 ) );
+	}
+	
+	/**
+	 * Verifies if the docIdSet is representing a specific
+	 * sequence of docIds.
+	 * @param docIdSet the docIdSet to test
+	 * @param expectedIds an array of document ids
+	 * @return true if iterating on docIdSet returns the expectedIds
+	 * @throws IOException should not happen
+	 */
+	private boolean isIdSetSequenceSameTo(DocIdSet docIdSet, int...expectedIds) throws IOException {
+		DocIdSetIterator idSetIterator = docIdSet.iterator();
+		for ( int setBit : expectedIds ) {
+			if ( ! idSetIterator.next() ) {
+				return false;
+			}
+			if ( idSetIterator.doc() != setBit ) {
+				return false;
+			}
+		}
+		if ( idSetIterator.next() ){
+			return false;
+		}
+		return true;
+	}
+
+	/**
+	 * test helper, makes an implementation of a DocIdSet
+	 * @param docIds the ids it should contain
+	 * @return
+	 */
+	private DocIdSet makeAnonymousTestSet(int... docIds) {
+		DocIdSet idSet = makeOpenBitSetTestSet( docIds );
+		return new DocIdSetHiddenType( idSet );
+	}
+
+	/**
+	 * test helper, makes a prefilled OpenBitSet
+	 * @param enabledBits the ids it should contain
+	 * @return a new OpenBitSet
+	 */
+	private OpenBitSet makeOpenBitSetTestSet(int... enabledBits) {
+		OpenBitSet set = new OpenBitSet();
+		for (int position : enabledBits ) {
+			// a minimal check for input duplicates:
+			assertFalse( set.get( position ) );
+			set.set( position );
+		}
+		return set;
+	}
+	
+	/**
+	 * test helper, makes a prefilled DocIdBitSet
+	 * using the java.lang.BitSet
+	 * @see java.lang.BitSet
+	 * @param enabledBits the ids it should contain
+	 * @return a ne DocIdBitSet
+	 */
+	private DocIdBitSet makeBitSetTestSet(int... enabledBits) {
+		BitSet set = new BitSet();
+		for (int position : enabledBits ) {
+			// a minimal check for input duplicates:
+			assertFalse( set.get( position ) );
+			set.set( position );
+		}
+		return new DocIdBitSet( set );
+	}
+
+	/**
+	 * Implementation for testing: wraps a DocIdSet with a new type
+	 * to make it not possible to cast/detect to the original type.
+	 */
+	private class DocIdSetHiddenType extends DocIdSet {
+
+		private final DocIdSet bitSet;
+
+		DocIdSetHiddenType(DocIdSet wrapped) {
+			this.bitSet = wrapped;
+		}
+		
+		@Override
+		public DocIdSetIterator iterator() {
+			return bitSet.iterator();
+		}
+		
+	}
+
+}


Property changes on: search/trunk/src/test/org/hibernate/search/test/filter/FiltersOptimizationTest.java
___________________________________________________________________
Name: svn:executable
   + *




More information about the hibernate-commits mailing list