Author: sannegrinovero
Date: 2008-05-29 18:07:56 -0400 (Thu, 29 May 2008)
New Revision: 14714
Modified:
search/trunk/src/java/org/hibernate/search/backend/LuceneIndexingParameters.java
search/trunk/src/java/org/hibernate/search/backend/Workspace.java
search/trunk/src/java/org/hibernate/search/engine/DocumentBuilder.java
Log:
HSEARCH-202 and more optimizations in Workspace (early lock releases)
Modified:
search/trunk/src/java/org/hibernate/search/backend/LuceneIndexingParameters.java
===================================================================
---
search/trunk/src/java/org/hibernate/search/backend/LuceneIndexingParameters.java 2008-05-29
15:18:15 UTC (rev 14713)
+++
search/trunk/src/java/org/hibernate/search/backend/LuceneIndexingParameters.java 2008-05-29
22:07:56 UTC (rev 14714)
@@ -129,4 +129,13 @@
}
+ public void applyToWriter(IndexWriter writer, boolean batch) {
+ if ( batch ) {
+ getBatchIndexParameters().applyToWriter( writer );
+ }
+ else {
+ getTransactionIndexParameters().applyToWriter( writer );
+ }
+ }
+
}
Modified: search/trunk/src/java/org/hibernate/search/backend/Workspace.java
===================================================================
--- search/trunk/src/java/org/hibernate/search/backend/Workspace.java 2008-05-29 15:18:15
UTC (rev 14713)
+++ search/trunk/src/java/org/hibernate/search/backend/Workspace.java 2008-05-29 22:07:56
UTC (rev 14714)
@@ -2,9 +2,7 @@
package org.hibernate.search.backend;
import java.io.IOException;
-import java.util.ArrayList;
import java.util.HashMap;
-import java.util.List;
import java.util.Map;
import java.util.concurrent.locks.ReentrantLock;
@@ -12,6 +10,7 @@
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.store.Directory;
import org.hibernate.annotations.common.AssertionFailure;
import org.hibernate.search.SearchException;
import org.hibernate.search.engine.DocumentBuilder;
@@ -30,7 +29,7 @@
* <li>One cannot execute modification through an IndexReader when an IndexWriter
has been acquired
* on the same underlying directory
* </li>
- * <li>One cannot get an IndexWriter when an IndexReader have been acquired and
modificed on the same
+ * <li>One cannot get an IndexWriter when an IndexReader have been acquired and
modified on the same
* underlying directory
* </li>
* <li>The recommended approach is to execute all the modifications on the
IndexReaders, {@link #clean()}, and acquire the
@@ -40,22 +39,35 @@
*
* @author Emmanuel Bernard
* @author Hardy Ferentschik
+ * @author Sanne Grinovero
*/
-//TODO introduce the notion of read only IndexReader? We cannot enforce it because Lucene
use abstract classes, not interfaces
+//TODO introduce the notion of read only IndexReader? We cannot enforce it because Lucene
uses abstract classes, not interfaces.
public class Workspace {
+
private final Logger log = LoggerFactory.getLogger( Workspace.class );
- private Map<DirectoryProvider, IndexReader> readers = new
HashMap<DirectoryProvider, IndexReader>();
- private Map<DirectoryProvider, IndexWriter> writers = new
HashMap<DirectoryProvider, IndexWriter>();
- private List<DirectoryProvider> lockedProviders = new
ArrayList<DirectoryProvider>();
- private Map<DirectoryProvider, DPStatistics> dpStatistics = new
HashMap<DirectoryProvider, DPStatistics>();
- private SearchFactoryImplementor searchFactoryImplementor;
+ private final SearchFactoryImplementor searchFactoryImplementor;
+ private static final Analyzer SIMPLE_ANALYZER = new SimpleAnalyzer();
+ private final Map<DirectoryProvider, DPWorkspace> directorySpace = new
HashMap<DirectoryProvider, DPWorkspace>() {
+ @Override
+ public DPWorkspace get(Object key) {
+ DirectoryProvider dp = (DirectoryProvider) key;
+ DPWorkspace directoryWorkSpace = super.get( dp );
+ if ( directoryWorkSpace==null ) {
+ directoryWorkSpace = new DPWorkspace( dp );
+ put( dp, directoryWorkSpace );
+ }
+ return directoryWorkSpace;
+ }
+
+ };
+
/**
- * Flag indicating if the current work should be executed the Lucene parameters for
batch indexing.
+ * Flag indicating if the current work should be executed using
+ * the Lucene parameters for batch indexing.
*/
private boolean isBatch = false;
-
public Workspace(SearchFactoryImplementor searchFactoryImplementor) {
this.searchFactoryImplementor = searchFactoryImplementor;
}
@@ -65,29 +77,16 @@
}
/**
- * retrieve a read write IndexReader
+ * Retrieve a read write IndexReader; the purpose should be to
+ * modify the index. (mod count will be incremented)
* For a given DirectoryProvider, An IndexReader must be used before an IndexWriter
*/
- @SuppressWarnings( { "ThrowableInstanceNeverThrown" } )
public IndexReader getIndexReader(DirectoryProvider provider, Class entity) {
- //one cannot access a reader for update after a writer has been accessed
- if ( writers.containsKey( provider ) )
- throw new AssertionFailure( "Tries to read for update an index while a writer is
accessed" + entity );
- IndexReader reader = readers.get( provider );
- if ( reader != null ) return reader;
- lockProvider( provider );
- dpStatistics.get( provider ).operations++;
- try {
- reader = IndexReader.open( provider.getDirectory() );
- readers.put( provider, reader );
- }
- catch (IOException e) {
- cleanUp( new SearchException( "Unable to open IndexReader for " + entity, e
) );
- }
- return reader;
+ DPWorkspace space = directorySpace.get( provider );
+ return space.getIndexReader( entity );
}
- //for optimization
+ //for index optimization
public IndexWriter getIndexWriter(DirectoryProvider provider) {
return getIndexWriter( provider, null, false );
}
@@ -96,66 +95,17 @@
* retrieve a read write IndexWriter
* For a given DirectoryProvider, An IndexReader must be used before an IndexWriter
*/
- @SuppressWarnings( { "ThrowableInstanceNeverThrown" } )
public IndexWriter getIndexWriter(DirectoryProvider provider, Class entity, boolean
modificationOperation) {
- //one has to close a reader for update before a writer is accessed
- IndexReader reader = readers.get( provider );
- if ( reader != null ) {
- try {
- reader.close();
- }
- catch (IOException e) {
- throw new SearchException( "Exception while closing IndexReader", e );
- }
- readers.remove( provider );
- }
- IndexWriter writer = writers.get( provider );
- if ( writer != null ) return writer;
- lockProvider( provider );
- if ( modificationOperation ) dpStatistics.get( provider ).operations++;
- try {
- DocumentBuilder documentBuilder = searchFactoryImplementor.getDocumentBuilders().get(
entity );
- Analyzer analyzer = entity != null ?
- documentBuilder.getAnalyzer() :
- new SimpleAnalyzer(); //never used
- writer = new IndexWriter( provider.getDirectory(), analyzer, false ); //has been
created at init time
- writer.setSimilarity( documentBuilder.getSimilarity() );
-
- LuceneIndexingParameters indexingParams =
searchFactoryImplementor.getIndexingParameters( provider );
- if ( isBatch ) {
- indexingParams.getBatchIndexParameters().applyToWriter(writer);
- }
- else {
- indexingParams.getTransactionIndexParameters().applyToWriter(writer);
- }
-
- writers.put( provider, writer );
- }
- catch (IOException e) {
- cleanUp(
- new SearchException( "Unable to open IndexWriter" + ( entity != null ?
" for " + entity : "" ), e )
- );
- }
- return writer;
+ DPWorkspace space = directorySpace.get( provider );
+ return space.getIndexWriter( entity, modificationOperation );
}
- private void lockProvider(DirectoryProvider provider) {
- //make sure to use a semaphore
- ReentrantLock lock = searchFactoryImplementor.getLockableDirectoryProviders().get(
provider );
- //of course a given thread cannot have a race cond with itself
- if ( !lock.isHeldByCurrentThread() ) {
- lock.lock();
- lockedProviders.add( provider );
- dpStatistics.put( provider, new DPStatistics() );
- }
- }
-
private void cleanUp(SearchException originalException) {
//release all readers and writers, then release locks
SearchException raisedException = originalException;
- for (IndexReader reader : readers.values()) {
+ for ( DPWorkspace space : directorySpace.values() ) {
try {
- reader.close();
+ space.closeIndexReader();
}
catch (IOException e) {
if ( raisedException != null ) {
@@ -166,46 +116,47 @@
}
}
}
- readers.clear();
- //TODO release lock of all indexes that do not need optimization early
- //don't optimze if there is a failure
- if ( raisedException == null ) {
- for (DirectoryProvider provider : lockedProviders) {
- Workspace.DPStatistics stats = dpStatistics.get( provider );
- if ( !stats.optimizationForced ) {
- OptimizerStrategy optimizerStrategy = searchFactoryImplementor.getOptimizerStrategy(
provider );
- optimizerStrategy.addTransaction( stats.operations );
+ //first release all locks for DirectoryProviders not needing optimization
+ for ( DPWorkspace space : directorySpace.values() ) {
+ if ( ! space.needsOptimization() ) {
+ raisedException = closeWriterAndUnlock( space, raisedException );
+ }
+ }
+ //then for remaining DirectoryProvider
+ for ( DPWorkspace space : directorySpace.values() ) {
+ if ( space.needsOptimization() ) {
+ if ( raisedException == null ) {//avoid optimizations in case of errors or
exceptions
+ OptimizerStrategy optimizerStrategy = space.getOptimizerStrategy();
+ optimizerStrategy.addTransaction( space.countOperations() );
try {
optimizerStrategy.optimize( this );
}
catch (SearchException e) {
+ //this will also cause skipping other optimizations:
raisedException = new SearchException( "Exception while optimizing
directoryProvider: "
- + provider.getDirectory().toString(), e );
- break; //no point in continuing
+ + space.getDirectory().toString(), e );
}
}
+ raisedException = closeWriterAndUnlock( space, raisedException );
}
}
- for (IndexWriter writer : writers.values()) {
- try {
- writer.close();
+ if ( raisedException != null ) throw raisedException;
+ }
+
+ private SearchException closeWriterAndUnlock( DPWorkspace space, SearchException
raisedException ) {
+ try {
+ space.closeIndexWriter();
+ }
+ catch (IOException e) {
+ if ( raisedException != null ) {
+ log.error( "Subsequent Exception while closing IndexWriter", e );
}
- catch (IOException e) {
- if ( raisedException != null ) {
- log.error( "Subsequent Exception while closing IndexWriter", e );
- }
- else {
- raisedException = new SearchException( "Exception while closing
IndexWriter", e );
- }
+ else {
+ raisedException = new SearchException( "Exception while closing
IndexWriter", e );
}
}
- for (DirectoryProvider provider : lockedProviders) {
- searchFactoryImplementor.getLockableDirectoryProviders().get( provider ).unlock();
- }
- writers.clear();
- lockedProviders.clear();
- dpStatistics.clear();
- if ( raisedException != null ) throw raisedException;
+ space.unLock();
+ return raisedException;
}
/**
@@ -216,16 +167,12 @@
}
public void optimize(DirectoryProvider provider) {
- OptimizerStrategy optimizerStrategy = searchFactoryImplementor.getOptimizerStrategy(
provider );
- dpStatistics.get( provider ).optimizationForced = true;
+ DPWorkspace space = directorySpace.get( provider );
+ OptimizerStrategy optimizerStrategy = space.getOptimizerStrategy();
+ space.setOptimizationForced();
optimizerStrategy.optimizationForced();
}
- private class DPStatistics {
- boolean optimizationForced = false;
- public long operations;
- }
-
public boolean isBatch() {
return isBatch;
}
@@ -233,4 +180,180 @@
public void setBatch(boolean isBatch) {
this.isBatch = isBatch;
}
+
+ //TODO this should have it's own source file but currently needs the
container-Workspace cleanup()
+ //for exceptions. Best to wait for move to parallel batchWorkers (one per Directory)?
+ /**
+ * A per-DirectoryProvider Workspace
+ */
+ private class DPWorkspace {
+
+ private final DirectoryProvider directoryProvider;
+ private final ReentrantLock lock;
+
+ private IndexReader reader;
+ private IndexWriter writer;
+ private boolean locked = false;
+ private boolean optimizationForced = false;
+ private long operations = 0L;
+
+ DPWorkspace(DirectoryProvider dp) {
+ this.directoryProvider = dp;
+ this.lock = searchFactoryImplementor.getLockableDirectoryProviders().get( dp );
+ }
+
+ public boolean needsOptimization() {
+ return isLocked() && !isOptimizationForced();
+ }
+
+ /**
+ * Sets a flag to signal optimization has been forced.
+ * Cannot be undone.
+ */
+ void setOptimizationForced() {
+ optimizationForced = true;
+ }
+
+ /**
+ * @return the Directory from the DirectoryProvider
+ */
+ Directory getDirectory() {
+ return directoryProvider.getDirectory();
+ }
+
+ /**
+ * @return A count of performed modification operations
+ */
+ long countOperations() {
+ return operations;
+ }
+
+ /**
+ * @return The OptimizerStrategy configured for this DirectoryProvider
+ */
+ OptimizerStrategy getOptimizerStrategy() {
+ return searchFactoryImplementor.getOptimizerStrategy( directoryProvider );
+ }
+
+ /**
+ * @return true if optimization has been forced
+ */
+ boolean isOptimizationForced() {
+ return optimizationForced;
+ }
+
+ /**
+ * @return true if underlying DirectoryProvider is locked
+ */
+ boolean isLocked() {
+ return locked;
+ }
+
+ /**
+ * Gets the currently open IndexWriter, or creates one.
+ * If a IndexReader was open, it will be closed first.
+ * @param entity The entity for which the IndexWriter is needed
+ * @param modificationOperation set to true if needed to perform modifications to
index
+ * @return created or existing IndexWriter
+ */
+ IndexWriter getIndexWriter(Class entity, boolean modificationOperation) {
+ //one has to close a reader for update before a writer is accessed
+ try {
+ closeIndexReader();
+ }
+ catch (IOException e) {
+ throw new SearchException( "Exception while closing IndexReader", e );
+ }
+ if ( modificationOperation ) {
+ operations++; //estimate the number of modifications done on this index
+ }
+ if ( writer != null ) return writer;
+ lock();
+ try {
+ DocumentBuilder documentBuilder = searchFactoryImplementor.getDocumentBuilders().get(
entity );
+ Analyzer analyzer = entity != null ?
+ documentBuilder.getAnalyzer() :
+ SIMPLE_ANALYZER; //never used
+ writer = new IndexWriter( directoryProvider.getDirectory(), analyzer, false ); //has
been created at init time
+ writer.setSimilarity( documentBuilder.getSimilarity() );
+ LuceneIndexingParameters indexingParams =
searchFactoryImplementor.getIndexingParameters( directoryProvider );
+ indexingParams.applyToWriter( writer, isBatch );
+ }
+ catch (IOException e) {
+ cleanUp(
+ new SearchException( "Unable to open IndexWriter" + ( entity != null ?
" for " + entity : "" ), e )
+ );
+ }
+ return writer;
+ }
+
+ /**
+ * Gets an IndexReader to alter the index;
+ * (operations count will be incremented)
+ * @throws AssertionFailure if an IndexWriter is open
+ * @param entity The entity for which the IndexWriter is needed
+ * @return a new or existing IndexReader
+ */
+ IndexReader getIndexReader(Class entity) {
+ //one cannot access a reader for update after a writer has been accessed
+ if ( writer != null )
+ throw new AssertionFailure( "Tries to read for update an index while a writer is
accessed for " + entity );
+ operations++; //estimate the number of modifications done on this index
+ if ( reader != null ) return reader;
+ lock();
+ try {
+ reader = IndexReader.open( directoryProvider.getDirectory() );
+ }
+ catch (IOException e) {
+ cleanUp( new SearchException( "Unable to open IndexReader for " + entity, e
) );
+ }
+ return reader;
+ }
+
+ /**
+ * Unlocks underlying DirectoryProvider iff locked.
+ */
+ void unLock() {
+ if ( locked ) {
+ lock.unlock();
+ locked = false;
+ }
+ }
+
+ /**
+ * Locks underlying DirectoryProvider iff not locked already.
+ */
+ void lock() {
+ if ( !locked ) {
+ lock.lock();
+ locked = true;
+ }
+ }
+
+ /**
+ * Closes the IndexReader, if open.
+ * @throws IOException
+ */
+ void closeIndexReader() throws IOException {
+ IndexReader toClose = reader;
+ reader = null;
+ if ( toClose!=null ) {
+ toClose.close();
+ }
+ }
+
+ /**
+ * Closes the IndexWriter, if open.
+ * @throws IOException
+ */
+ void closeIndexWriter() throws IOException {
+ IndexWriter toClose = writer;
+ writer = null;
+ if ( toClose!=null ) {
+ toClose.close();
+ }
+ }
+
+ }
+
}
Modified: search/trunk/src/java/org/hibernate/search/engine/DocumentBuilder.java
===================================================================
--- search/trunk/src/java/org/hibernate/search/engine/DocumentBuilder.java 2008-05-29
15:18:15 UTC (rev 14713)
+++ search/trunk/src/java/org/hibernate/search/engine/DocumentBuilder.java 2008-05-29
22:07:56 UTC (rev 14714)
@@ -66,7 +66,7 @@
public class DocumentBuilder<T> {
private static final Logger log = LoggerFactory.getLogger( DocumentBuilder.class );
- private final PropertiesMetadata rootPropertiesMetadata;
+ private final PropertiesMetadata rootPropertiesMetadata = new PropertiesMetadata();
private final XClass beanClass;
private final DirectoryProvider[] directoryProviders;
private final IndexShardingStrategy shardingStrategy;
@@ -79,13 +79,12 @@
private ReflectionManager reflectionManager;
private int level = 0;
private int maxLevel = Integer.MAX_VALUE;
- private ScopedAnalyzer analyzer;
+ private final ScopedAnalyzer analyzer = new ScopedAnalyzer();
private Similarity similarity;
public DocumentBuilder(XClass clazz, InitContext context, DirectoryProvider[]
directoryProviders,
IndexShardingStrategy shardingStrategy, ReflectionManager reflectionManager) {
- this.analyzer = new ScopedAnalyzer();
this.beanClass = clazz;
this.directoryProviders = directoryProviders;
this.shardingStrategy = shardingStrategy;
@@ -94,7 +93,6 @@
this.similarity = context.getDefaultSimilarity();
if ( clazz == null ) throw new AssertionFailure( "Unable to build a
DocumentBuilder with a null class" );
- rootPropertiesMetadata = new PropertiesMetadata();
rootPropertiesMetadata.boost = getBoost( clazz );
rootPropertiesMetadata.analyzer = context.getDefaultAnalyzer();
Set<XClass> processedClasses = new HashSet<XClass>();