[hibernate-commits] Hibernate SVN: r15535 - in search/trunk/src: test/org/hibernate/search/test/filter and 1 other directory.

hibernate-commits at lists.jboss.org hibernate-commits at lists.jboss.org
Sun Nov 9 01:34:46 EST 2008


Author: epbernard
Date: 2008-11-09 01:34:45 -0500 (Sun, 09 Nov 2008)
New Revision: 15535

Added:
   search/trunk/src/java/org/hibernate/search/filter/AndDocIdSet.java
   search/trunk/src/java/org/hibernate/search/filter/EmptyDocIdBitSet.java
Modified:
   search/trunk/src/java/org/hibernate/search/filter/CachingWrapperFilter.java
   search/trunk/src/java/org/hibernate/search/filter/ChainedFilter.java
   search/trunk/src/test/org/hibernate/search/test/filter/ExcludeAllFilter.java
Log:
HSEARCH-289 move to DocIdSet use.

Added: search/trunk/src/java/org/hibernate/search/filter/AndDocIdSet.java
===================================================================
--- search/trunk/src/java/org/hibernate/search/filter/AndDocIdSet.java	                        (rev 0)
+++ search/trunk/src/java/org/hibernate/search/filter/AndDocIdSet.java	2008-11-09 06:34:45 UTC (rev 15535)
@@ -0,0 +1,141 @@
+package org.hibernate.search.filter;
+
+import java.io.IOException;
+import java.util.BitSet;
+import java.util.List;
+import static java.lang.Math.max;
+
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.DocIdBitSet;
+
+/**
+ * A DocIdSet built as applying "AND" operation to a list of other DocIdSet.
+ * The DocIdSetIterator returned will return only document ids contained
+ * in all DocIdSet handed to the constructor.
+ * 
+ * @author Sanne Grinovero
+ */
+public class AndDocIdSet extends DocIdSet {
+	
+	private DocIdBitSet docIdBitSet;
+	private final List<DocIdSet> andedDocIdSets;
+	
+	public AndDocIdSet(List<DocIdSet> andedDocIdSets) {
+		if ( andedDocIdSets == null || andedDocIdSets.size() < 2 )
+			throw new IllegalArgumentException( "To \"and\" some DocIdSet they should be at least 2" );
+		this.andedDocIdSets = andedDocIdSets;
+	}
+	
+	private synchronized void buildBitset() throws IOException {
+		if ( docIdBitSet != null ) return; // double check for concurrent initialization
+		//TODO if all andedDocIdSets are actually DocIdBitSet, use their internal BitSet instead of next algo.
+		//TODO if some andedDocIdSets are DocIdBitSet, merge them first.
+		int size = andedDocIdSets.size();
+		DocIdSetIterator[] iterators = new DocIdSetIterator[size];
+		int[] positions = new int[size];
+		boolean valuesExist = true;
+		int maxIndex = 0;
+		for (int i=0; i<size; i++) {
+			// build all iterators
+			DocIdSetIterator iterator = andedDocIdSets.get(i).iterator();
+			iterators[i] = iterator;
+			// and move to first position
+			boolean nextExists = iterator.next();
+			if ( ! nextExists ) {
+				valuesExist = false;
+				break;
+			}
+			int currentFilterValue = iterator.doc();
+			positions[i] = currentFilterValue;
+			// find the initial maximum position
+			maxIndex = max( maxIndex, currentFilterValue );
+		}
+		BitSet bitSet = new BitSet();
+		if ( valuesExist ) { // skip further processing if some idSet is empty
+			do {
+				if ( allSame( positions ) ) {
+					// enable a bit if all idSets agree on it:
+					bitSet.set( maxIndex );
+					maxIndex++;
+				}
+				maxIndex = advance( iterators, positions, maxIndex );
+			} while ( maxIndex != -1 ); // -1 means the end of some bitSet has been reached (end condition)
+		}
+		docIdBitSet = new DocIdBitSet( bitSet );
+	}
+
+	/**
+	 * Have all DocIdSetIterator having current doc id minor than currentMaxPosition
+	 * skip to at least this position.
+	 * @param iterators
+	 * @param positions
+	 * @return maximum position of all DocIdSetIterator after the operation, or -1 when at least one reached the end.
+	 * @throws IOException 
+	 */
+	private final int advance(final DocIdSetIterator[] iterators, final int[] positions, int currentMaxPosition) throws IOException {
+		for (int i=0; i<positions.length; i++) {
+			if ( positions[i] != currentMaxPosition ) {
+				boolean validPosition = iterators[i].skipTo( currentMaxPosition );
+				if ( ! validPosition )
+					return -1;
+				positions[i] = iterators[i].doc();
+				currentMaxPosition = max( currentMaxPosition, positions[i] );
+			}
+		}
+		return currentMaxPosition;
+	}
+
+	/**
+	 * see if all DocIdSetIterator stopped at the same position.
+	 * @param positions the array of current positions.
+	 * @return true if all DocIdSetIterator agree on the current docId.
+	 */
+	private final boolean allSame(final int[] positions) {
+		int base = positions[0];
+		for (int i=1; i<positions.length; i++) {
+			if ( base != positions[i] )
+				return false;
+		}
+		return true;
+	}
+
+	@Override
+	public DocIdSetIterator iterator() {
+		return new AndingDocIdSetIterator();
+	}
+	
+	private class AndingDocIdSetIterator extends DocIdSetIterator {
+
+		private DocIdSetIterator iterator;
+
+		@Override
+		public int doc() {
+			// should never happen when respecting interface contract; otherwise I
+			// prefer a NPE than a hard to debug return 0.
+			assert iterator != null : "Illegal state, can't be called before next() or skipTo(int)";
+			return iterator.doc();
+		}
+
+		@Override
+		public boolean next() throws IOException {
+			ensureInitialized(); //can't initialize before as it would not be allowed to throw IOException
+			return iterator.next();
+		}
+
+		@Override
+		public boolean skipTo(int target) throws IOException {
+			ensureInitialized(); //can't initialize before as it would not be allowed to throw IOException
+			return iterator.skipTo( target );
+		}
+		
+		private void ensureInitialized() throws IOException {
+			if ( docIdBitSet == null ) buildBitset();
+			if ( iterator == null ) {
+				iterator = docIdBitSet.iterator();
+			}
+		}
+		
+	}
+	
+}

Modified: search/trunk/src/java/org/hibernate/search/filter/CachingWrapperFilter.java
===================================================================
--- search/trunk/src/java/org/hibernate/search/filter/CachingWrapperFilter.java	2008-11-08 22:25:28 UTC (rev 15534)
+++ search/trunk/src/java/org/hibernate/search/filter/CachingWrapperFilter.java	2008-11-09 06:34:45 UTC (rev 15535)
@@ -5,6 +5,7 @@
 import java.util.BitSet;
 
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.DocIdSet;
 import org.apache.lucene.search.Filter;
 import org.slf4j.Logger;
 
@@ -27,53 +28,56 @@
 	
 	public static final int DEFAULT_SIZE = 5;
 	
-	private final int size;
-	
 	/**
 	 * The cache using soft references in order to store the filter bit sets.
 	 */
-	private transient SoftLimitMRUCache cache;
+	private final SoftLimitMRUCache cache;
 	
 	private final Filter filter;
 
 	/**
-	 * @param filter
-	 *            Filter to cache results of
+	 * @param filter Filter to cache results of
 	 */
 	public CachingWrapperFilter(Filter filter) {
 		this(filter, DEFAULT_SIZE);
 	}
 	
 	/**
-	 * @param filter
-	 *            Filter to cache results of
+	 * @param filter Filter to cache results of
 	 */
 	public CachingWrapperFilter(Filter filter, int size) {
 		this.filter = filter;
-		this.size = size;
+		log.debug( "Initialising SoftLimitMRUCache with hard ref size of {}", size );
+		this.cache = new SoftLimitMRUCache( size );
 	}	
 
+	@Override
 	public BitSet bits(IndexReader reader) throws IOException {
-		if (cache == null) {
-			log.debug("Initialising SoftLimitMRUCache with hard ref size of {}", size);
-			cache = new SoftLimitMRUCache(size);
+		throw new UnsupportedOperationException();
+		/* BitSet cached = (BitSet) cache.get(reader);
+		if (cached != null) {
+			return cached;
 		}
-
-		//memory barrier ensure cache == null will not always stay true on concurrent threads
-		synchronized (cache) { // check cache
-			BitSet cached = (BitSet) cache.get(reader);
-			if (cached != null) {
+		final BitSet bits = filter.bits(reader);
+		cache.put(reader, bits);
+		return bits; */
+	}
+	
+	@Override
+	public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
+		DocIdSet cached = (DocIdSet) cache.get( reader );
+		if ( cached != null ) {
+			return cached;
+		}
+		synchronized (cache) {
+			cached = (DocIdSet) cache.get( reader );
+			if ( cached != null ) {
 				return cached;
 			}
+			final DocIdSet docIdSet = filter.getDocIdSet( reader );
+			cache.put( reader, docIdSet );
+			return docIdSet;
 		}
-
-		final BitSet bits = filter.bits(reader);
-
-		synchronized (cache) { // update cache
-			cache.put(reader, bits);
-		}
-
-		return bits;
 	}
 
 	public String toString() {

Modified: search/trunk/src/java/org/hibernate/search/filter/ChainedFilter.java
===================================================================
--- search/trunk/src/java/org/hibernate/search/filter/ChainedFilter.java	2008-11-08 22:25:28 UTC (rev 15534)
+++ search/trunk/src/java/org/hibernate/search/filter/ChainedFilter.java	2008-11-09 06:34:45 UTC (rev 15535)
@@ -6,6 +6,7 @@
 import java.util.ArrayList;
 import java.io.IOException;
 
+import org.apache.lucene.search.DocIdSet;
 import org.apache.lucene.search.Filter;
 import org.apache.lucene.index.IndexReader;
 import org.hibernate.annotations.common.AssertionFailure;
@@ -14,6 +15,7 @@
  * @author Emmanuel Bernard
  */
 public class ChainedFilter extends Filter {
+	
 	private static final long serialVersionUID = -6153052295766531920L;
 	
 	private final List<Filter> chainedFilters = new ArrayList<Filter>();
@@ -23,6 +25,8 @@
 	}
 
 	public BitSet bits(IndexReader reader) throws IOException {
+		throw new UnsupportedOperationException();
+		/*
 		if (chainedFilters.size() == 0) throw new AssertionFailure("Chainedfilter has no filters to chain for");
 		//we need to copy the first BitSet because BitSet is modified by .logicalOp
 		Filter filter = chainedFilters.get( 0 );
@@ -31,7 +35,26 @@
 			result.and( chainedFilters.get( index ).bits( reader ) );
 		}
 		return result;
+		*/
 	}
+	
+	@Override
+	public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
+		int size = chainedFilters.size();
+		if ( size == 0 ) {
+			throw new AssertionFailure( "Chainedfilter has no filters to chain for" );
+		}
+		else if ( size == 1 ) {
+			return chainedFilters.get(0).getDocIdSet(reader);
+		}
+		else {
+			List<DocIdSet> subSets = new ArrayList<DocIdSet>( size );
+			for ( Filter f : chainedFilters ) {
+				subSets.add( f.getDocIdSet( reader ) );
+			}
+			return new AndDocIdSet( subSets );
+		}
+	}
 
 	public String toString() {
 		StringBuilder sb = new StringBuilder("ChainedFilter [");

Added: search/trunk/src/java/org/hibernate/search/filter/EmptyDocIdBitSet.java
===================================================================
--- search/trunk/src/java/org/hibernate/search/filter/EmptyDocIdBitSet.java	                        (rev 0)
+++ search/trunk/src/java/org/hibernate/search/filter/EmptyDocIdBitSet.java	2008-11-09 06:34:45 UTC (rev 15535)
@@ -0,0 +1,46 @@
+package org.hibernate.search.filter;
+
+import java.io.IOException;
+
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.DocIdSetIterator;
+
+public class EmptyDocIdBitSet extends DocIdSet {
+
+	public static final DocIdSet instance = new EmptyDocIdBitSet();
+	
+	private final DocIdSetIterator iterator = new EmptyDocIdSetIterator();
+	
+	private EmptyDocIdBitSet(){
+		// is singleton
+	}
+
+	@Override
+	public DocIdSetIterator iterator() {
+		return iterator;
+	}
+
+	/**
+	 * implements a DocIdSetIterator for an empty DocIdSet
+	 * As it is empty it also is stateless and so it can be reused.
+	 */
+	private static class EmptyDocIdSetIterator extends DocIdSetIterator {
+
+		@Override
+		public int doc() {
+			throw new IllegalStateException("Should never be called");
+		}
+
+		@Override
+		public boolean next() throws IOException {
+			return false;
+		}
+
+		@Override
+		public boolean skipTo(int target) throws IOException {
+			return false;
+		}
+
+	}
+	
+}

Modified: search/trunk/src/test/org/hibernate/search/test/filter/ExcludeAllFilter.java
===================================================================
--- search/trunk/src/test/org/hibernate/search/test/filter/ExcludeAllFilter.java	2008-11-08 22:25:28 UTC (rev 15534)
+++ search/trunk/src/test/org/hibernate/search/test/filter/ExcludeAllFilter.java	2008-11-09 06:34:45 UTC (rev 15535)
@@ -4,20 +4,33 @@
 import java.util.BitSet;
 import java.io.IOException;
 
+import org.apache.lucene.search.DocIdSet;
 import org.apache.lucene.search.Filter;
 import org.apache.lucene.index.IndexReader;
+import org.hibernate.search.filter.EmptyDocIdBitSet;
 
 /**
  * @author Emmanuel Bernard
  */
 @SuppressWarnings("serial")
 public class ExcludeAllFilter extends Filter {
+	
+	//ugly but useful for test purposes
 	private static volatile boolean done = false;
 
+	@Override
 	public BitSet bits(IndexReader reader) throws IOException {
-		if (done) throw new IllegalStateException("Called twice");
+		if ( done ) throw new IllegalStateException( "Called twice" );
 		BitSet bitSet = new BitSet( reader.maxDoc() );
 		done = true;
 		return bitSet;
 	}
+
+	@Override
+	public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
+		if ( done ) throw new IllegalStateException( "Called twice" );
+		done = true;
+		return EmptyDocIdBitSet.instance;
+	}
+	
 }




More information about the hibernate-commits mailing list