Author: hardy.ferentschik
Date: 2007-07-24 05:51:04 -0400 (Tue, 24 Jul 2007)
New Revision: 12806
Modified:
trunk/HibernateExt/search/src/java/org/hibernate/search/backend/LuceneWork.java
trunk/HibernateExt/search/src/java/org/hibernate/search/backend/WorkType.java
trunk/HibernateExt/search/src/java/org/hibernate/search/backend/Workspace.java
trunk/HibernateExt/search/src/java/org/hibernate/search/backend/impl/lucene/LuceneBackendQueueProcessor.java
trunk/HibernateExt/search/src/java/org/hibernate/search/backend/impl/lucene/LuceneWorker.java
trunk/HibernateExt/search/src/java/org/hibernate/search/engine/DocumentBuilder.java
trunk/HibernateExt/search/src/java/org/hibernate/search/engine/SearchFactoryImplementor.java
trunk/HibernateExt/search/src/java/org/hibernate/search/impl/FullTextSessionImpl.java
trunk/HibernateExt/search/src/java/org/hibernate/search/impl/SearchFactoryImpl.java
Log:
HSEARCH-67.
Enabled the WorkType.INDEX and chaged all required files to use the lucene indexing
parameters in case this new work type is used.
Modified: trunk/HibernateExt/search/src/java/org/hibernate/search/backend/LuceneWork.java
===================================================================
---
trunk/HibernateExt/search/src/java/org/hibernate/search/backend/LuceneWork.java 2007-07-24
09:48:22 UTC (rev 12805)
+++
trunk/HibernateExt/search/src/java/org/hibernate/search/backend/LuceneWork.java 2007-07-24
09:51:04 UTC (rev 12806)
@@ -9,12 +9,18 @@
* Represent a Serializable Lucene unit work
*
* @author Emmanuel Bernard
+ * @author Hardy Ferentschik
*/
public abstract class LuceneWork implements Serializable {
//TODO set a serial id
private Document document;
private Class entityClass;
private Serializable id;
+
+ /**
+ * Flag indicating if this lucene work has to be indexed in batch mode.
+ */
+ private boolean batch = false;
public LuceneWork(Serializable id, Class entity) {
this( id, entity, null );
@@ -26,7 +32,14 @@
this.document = document;
}
+ public boolean isBatch() {
+ return batch;
+ }
+ public void setBatch(boolean batch) {
+ this.batch = batch;
+ }
+
public Document getDocument() {
return document;
}
Modified: trunk/HibernateExt/search/src/java/org/hibernate/search/backend/WorkType.java
===================================================================
---
trunk/HibernateExt/search/src/java/org/hibernate/search/backend/WorkType.java 2007-07-24
09:48:22 UTC (rev 12805)
+++
trunk/HibernateExt/search/src/java/org/hibernate/search/backend/WorkType.java 2007-07-24
09:51:04 UTC (rev 12806)
@@ -2,11 +2,19 @@
package org.hibernate.search.backend;
/**
+ * Enumeration of the different types of Lucene work. This enumeration is used to specify
the type
+ * of index operation to be executed.
+ *
* @author Emmanuel Bernard
+ * @author Hardy Ferentschik
*/
public enum WorkType {
ADD,
UPDATE,
- DELETE
- //add INDEX at some point to behave differently during the queue process?
+ DELETE,
+
+ /**
+ * This type is used for batch indexing.
+ */
+ INDEX
}
Modified: trunk/HibernateExt/search/src/java/org/hibernate/search/backend/Workspace.java
===================================================================
---
trunk/HibernateExt/search/src/java/org/hibernate/search/backend/Workspace.java 2007-07-24
09:48:22 UTC (rev 12805)
+++
trunk/HibernateExt/search/src/java/org/hibernate/search/backend/Workspace.java 2007-07-24
09:51:04 UTC (rev 12806)
@@ -22,15 +22,24 @@
import org.hibernate.annotations.common.AssertionFailure;
/**
- * Lucene workspace
- * This is not intended to be used in a multithreaded environment
+ * Lucene workspace.
+ * <p>
+ * <b>This is not intended to be used in a multithreaded environment</b>.
* <p/>
- * One cannot execute modification through an IndexReader when an IndexWriter has been
acquired on the same underlying directory
- * One cannot get an IndexWriter when an IndexReader have been acquired and modificed on
the same underlying directory
- * The recommended approach is to execute all the modifications on the IndexReaders,
{@link #clean()} }, and acquire the
- * index writers
+ * <ul>
+ * <li>One cannot execute modification through an IndexReader when an IndexWriter
has been acquired
+ * on the same underlying directory
+ * </li>
+ * <li>One cannot get an IndexWriter when an IndexReader have been acquired and
modificed on the same
+ * underlying directory
+ * </li>
+ * <li>The recommended approach is to execute all the modifications on the
IndexReaders, {@link #clean()}, and acquire the
+ * index writers
+ * </li>
+ * </ul>
*
* @author Emmanuel Bernard
+ * @author Hardy Ferentschik
*/
//TODO introduce the notion of read only IndexReader? We cannot enforce it because Lucene
use abstract classes, not interfaces
public class Workspace {
@@ -41,12 +50,16 @@
private Map<DirectoryProvider, DPStatistics> dpStatistics = new
HashMap<DirectoryProvider, DPStatistics>();
private SearchFactoryImplementor searchFactoryImplementor;
+ /**
+ * Flag indicating if the current work should be executed the Lucene parameters for
batch indexing.
+ */
+ private boolean isBatch = false;
+
public Workspace(SearchFactoryImplementor searchFactoryImplementor) {
this.searchFactoryImplementor = searchFactoryImplementor;
}
-
public DocumentBuilder getDocumentBuilder(Class entity) {
return searchFactoryImplementor.getDocumentBuilders().get( entity );
}
@@ -115,7 +128,22 @@
Analyzer analyzer = entity != null ?
searchFactoryImplementor.getDocumentBuilders().get( entity ).getAnalyzer() :
new SimpleAnalyzer(); //never used
- writer = new IndexWriter( provider.getDirectory(), analyzer, false ); //have been
created at init time
+ writer = new IndexWriter( provider.getDirectory(), analyzer, false ); //has been
created at init time
+
+ LuceneIndexingParameters indexingParams =
searchFactoryImplementor.getIndexingParameters(provider);
+ if(isBatch)
+ {
+ writer.setMergeFactor(indexingParams.getBatchMergeFactor());
+ writer.setMaxMergeDocs(indexingParams.getBatchMaxMergeDocs());
+ writer.setMaxBufferedDocs(indexingParams.getBatchMaxBufferedDocs());
+ }
+ else
+ {
+ writer.setMergeFactor(indexingParams.getMergeFactor());
+ writer.setMaxMergeDocs(indexingParams.getMaxMergeDocs());
+ writer.setMaxBufferedDocs(indexingParams.getMaxBufferedDocs());
+ }
+
writers.put( provider, writer );
}
catch (IOException e) {
@@ -214,4 +242,11 @@
public long operations;
}
+ public boolean isBatch() {
+ return isBatch;
+ }
+
+ public void setBatch(boolean isBatch) {
+ this.isBatch = isBatch;
+ }
}
Modified:
trunk/HibernateExt/search/src/java/org/hibernate/search/backend/impl/lucene/LuceneBackendQueueProcessor.java
===================================================================
---
trunk/HibernateExt/search/src/java/org/hibernate/search/backend/impl/lucene/LuceneBackendQueueProcessor.java 2007-07-24
09:48:22 UTC (rev 12805)
+++
trunk/HibernateExt/search/src/java/org/hibernate/search/backend/impl/lucene/LuceneBackendQueueProcessor.java 2007-07-24
09:51:04 UTC (rev 12806)
@@ -5,6 +5,8 @@
import java.util.Comparator;
import java.util.List;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.hibernate.search.backend.AddLuceneWork;
import org.hibernate.search.backend.LuceneWork;
import org.hibernate.search.backend.Workspace;
@@ -13,12 +15,18 @@
import org.hibernate.search.store.DirectoryProvider;
/**
- * Apply the operations to Lucene directories
- * avoiding deadlocks
+ * Apply the operations to Lucene directories avoiding deadlocks.
*
* @author Emmanuel Bernard
+ * @author Hardy Ferentschik
*/
public class LuceneBackendQueueProcessor implements Runnable {
+
+ /**
+ * Class logger.
+ */
+ private static Log log = LogFactory.getLog( LuceneBackendQueueProcessor.class );
+
private List<LuceneWork> queue;
private SearchFactoryImplementor searchFactoryImplementor;
@@ -33,7 +41,8 @@
workspace = new Workspace( searchFactoryImplementor );
worker = new LuceneWorker( workspace );
try {
- deadlockFreeQueue(queue, workspace, searchFactoryImplementor);
+ deadlockFreeQueue(queue, workspace, searchFactoryImplementor);
+ checkForBatchIndexing(workspace);
for ( LuceneWork luceneWork : queue ) {
worker.performWork( luceneWork );
}
@@ -44,6 +53,16 @@
}
}
+ private void checkForBatchIndexing(Workspace workspace) {
+ for ( LuceneWork luceneWork : queue ) {
+ // if there is at least a single batch index job we put the work space into batch
indexing mode.
+ if(luceneWork.isBatch()){
+ log.debug("Setting batch indexing mode.");
+ workspace.setBatch(true);
+ }
+ }
+ }
+
/**
* one must lock the directory providers in the exact same order to avoid
* dead lock between concurrent threads or processes
Modified:
trunk/HibernateExt/search/src/java/org/hibernate/search/backend/impl/lucene/LuceneWorker.java
===================================================================
---
trunk/HibernateExt/search/src/java/org/hibernate/search/backend/impl/lucene/LuceneWorker.java 2007-07-24
09:48:22 UTC (rev 12805)
+++
trunk/HibernateExt/search/src/java/org/hibernate/search/backend/impl/lucene/LuceneWorker.java 2007-07-24
09:51:04 UTC (rev 12806)
@@ -21,9 +21,10 @@
import org.hibernate.search.engine.DocumentBuilder;
/**
- * Stateless implementation that perform a work
+ * Stateless implementation that performs a unit of work.
*
* @author Emmanuel Bernard
+ * @author Hardy Ferentschik
*/
public class LuceneWorker {
private Workspace workspace;
Modified:
trunk/HibernateExt/search/src/java/org/hibernate/search/engine/DocumentBuilder.java
===================================================================
---
trunk/HibernateExt/search/src/java/org/hibernate/search/engine/DocumentBuilder.java 2007-07-24
09:48:22 UTC (rev 12805)
+++
trunk/HibernateExt/search/src/java/org/hibernate/search/engine/DocumentBuilder.java 2007-07-24
09:51:04 UTC (rev 12806)
@@ -54,6 +54,7 @@
* @author Emmanuel Bernard
* @author Sylvain Vieujot
* @author Richard Hallier
+ * @author Hardy Ferentschik
*/
public class DocumentBuilder<T> {
private static final Log log = LogFactory.getLog( DocumentBuilder.class );
@@ -393,6 +394,15 @@
queue.add( new AddLuceneWork( id, entityClass, doc ) );
searchForContainers = true;
}
+ else if ( workType == WorkType.INDEX ) {
+ Document doc = getDocument( entity, id );
+ queue.add(new DeleteLuceneWork(id, entityClass) );
+ LuceneWork work = new AddLuceneWork( id, entityClass, doc );
+ work.setBatch(true);
+ queue.add(work);
+ searchForContainers = true;
+ }
+
else {
throw new AssertionFailure("Unknown WorkType: " + workType);
}
Modified:
trunk/HibernateExt/search/src/java/org/hibernate/search/engine/SearchFactoryImplementor.java
===================================================================
---
trunk/HibernateExt/search/src/java/org/hibernate/search/engine/SearchFactoryImplementor.java 2007-07-24
09:48:22 UTC (rev 12805)
+++
trunk/HibernateExt/search/src/java/org/hibernate/search/engine/SearchFactoryImplementor.java 2007-07-24
09:51:04 UTC (rev 12806)
@@ -1,4 +1,4 @@
-//$Id: $
+// $Id$
package org.hibernate.search.engine;
import java.util.Map;
@@ -8,10 +8,14 @@
import org.hibernate.search.store.DirectoryProvider;
import org.hibernate.search.store.optimization.OptimizerStrategy;
import org.hibernate.search.backend.BackendQueueProcessorFactory;
+import org.hibernate.search.backend.LuceneIndexingParameters;
import org.hibernate.search.backend.Worker;
/**
+ * Interface which gives access to the different directory providers and their
configuration.
+ *
* @author Emmanuel Bernard
+ * @author Hardy Ferentschik
*/
public interface SearchFactoryImplementor extends SearchFactory {
BackendQueueProcessorFactory getBackendQueueProcessorFactory();
@@ -27,4 +31,8 @@
void addOptimizerStrategy(DirectoryProvider<?> provider, OptimizerStrategy
optimizerStrategy);
public OptimizerStrategy getOptimizerStrategy(DirectoryProvider<?> provider);
+
+ public LuceneIndexingParameters getIndexingParameters(DirectoryProvider<?>
provider );
+
+ void addIndexingParmeters(DirectoryProvider<?> provider, LuceneIndexingParameters
indexingParams);
}
Property changes on:
trunk/HibernateExt/search/src/java/org/hibernate/search/engine/SearchFactoryImplementor.java
___________________________________________________________________
Name: svn:keywords
+ Id
Modified:
trunk/HibernateExt/search/src/java/org/hibernate/search/impl/FullTextSessionImpl.java
===================================================================
---
trunk/HibernateExt/search/src/java/org/hibernate/search/impl/FullTextSessionImpl.java 2007-07-24
09:48:22 UTC (rev 12805)
+++
trunk/HibernateExt/search/src/java/org/hibernate/search/impl/FullTextSessionImpl.java 2007-07-24
09:51:04 UTC (rev 12806)
@@ -52,7 +52,7 @@
import org.hibernate.type.Type;
/**
- * Lucene Full text search aware session
+ * Lucene full text search aware session.
*
* @author Emmanuel Bernard
*/
@@ -83,7 +83,7 @@
* Non indexable entities are ignored
* The entity must be associated with the session
*
- * @param entity must not be null
+ * @param entity The neity to index - must not be <code>null</code>.
*/
public void index(Object entity) {
if (entity == null) return;
@@ -94,7 +94,7 @@
DocumentBuilder<Object> builder =
searchFactoryImplementor.getDocumentBuilders().get( clazz );
if ( builder != null ) {
Serializable id = session.getIdentifier( entity );
- searchFactoryImplementor.getWorker().performWork( entity, id, WorkType.UPDATE,
eventSource );
+ searchFactoryImplementor.getWorker().performWork( entity, id, WorkType.INDEX,
eventSource );
}
//TODO
//need to add elements in a queue kept at the Session level
Modified:
trunk/HibernateExt/search/src/java/org/hibernate/search/impl/SearchFactoryImpl.java
===================================================================
---
trunk/HibernateExt/search/src/java/org/hibernate/search/impl/SearchFactoryImpl.java 2007-07-24
09:48:22 UTC (rev 12805)
+++
trunk/HibernateExt/search/src/java/org/hibernate/search/impl/SearchFactoryImpl.java 2007-07-24
09:51:04 UTC (rev 12806)
@@ -1,13 +1,13 @@
//$Id$
package org.hibernate.search.impl;
+import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
+import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.WeakHashMap;
-import java.util.List;
-import java.util.ArrayList;
import java.util.concurrent.locks.ReentrantLock;
import org.apache.lucene.analysis.Analyzer;
@@ -22,10 +22,11 @@
import org.hibernate.search.Version;
import org.hibernate.search.annotations.Indexed;
import org.hibernate.search.backend.BackendQueueProcessorFactory;
+import org.hibernate.search.backend.LuceneIndexingParameters;
+import org.hibernate.search.backend.LuceneWork;
+import org.hibernate.search.backend.OptimizeLuceneWork;
import org.hibernate.search.backend.Worker;
import org.hibernate.search.backend.WorkerFactory;
-import org.hibernate.search.backend.LuceneWork;
-import org.hibernate.search.backend.OptimizeLuceneWork;
import org.hibernate.search.engine.DocumentBuilder;
import org.hibernate.search.engine.SearchFactoryImplementor;
import org.hibernate.search.reader.ReaderProvider;
@@ -36,6 +37,7 @@
import org.hibernate.util.ReflectHelper;
/**
+ *
* @author Emmanuel Bernard
*/
public class SearchFactoryImpl implements SearchFactoryImplementor {
@@ -45,7 +47,7 @@
static {
Version.touch();
}
-
+
private Map<Class, DocumentBuilder<Object>> documentBuilders = new
HashMap<Class, DocumentBuilder<Object>>();
//keep track of the index modifiers per DirectoryProvider since multiple entity can use
the same directory provider
private Map<DirectoryProvider, ReentrantLock> lockableDirectoryProviders =
@@ -55,7 +57,15 @@
private Worker worker;
private ReaderProvider readerProvider;
private BackendQueueProcessorFactory backendQueueProcessorFactory;
+
+
+ /**
+ * Each directory provider (index) can have its own performance settings.
+ */
+ private Map<DirectoryProvider, LuceneIndexingParameters> dirProviderIndexingParams
=
+ new HashMap<DirectoryProvider, LuceneIndexingParameters>();
+
public BackendQueueProcessorFactory getBackendQueueProcessorFactory() {
return backendQueueProcessorFactory;
}
@@ -64,63 +74,16 @@
this.backendQueueProcessorFactory = backendQueueProcessorFactory;
}
+ @SuppressWarnings( "unchecked" )
public SearchFactoryImpl(Configuration cfg) {
//yuk
ReflectionManager reflectionManager = getReflectionManager( cfg );
- Class analyzerClass;
- String analyzerClassName = cfg.getProperty( Environment.ANALYZER_CLASS );
- if ( analyzerClassName != null ) {
- try {
- analyzerClass = ReflectHelper.classForName( analyzerClassName );
- }
- catch (Exception e) {
- throw new SearchException(
- "Lucene analyzer class '" + analyzerClassName + "' defined
in property '" + Environment.ANALYZER_CLASS + "' could not be
found.",
- e
- );
- }
- }
- else {
- analyzerClass = StandardAnalyzer.class;
- }
- // Initialize analyzer
- Analyzer defaultAnalyzer;
- try {
- defaultAnalyzer = (Analyzer) analyzerClass.newInstance();
- }
- catch (ClassCastException e) {
- throw new SearchException(
- "Lucene analyzer does not implement " + Analyzer.class.getName() + ":
" + analyzerClassName, e
- );
- }
- catch (Exception e) {
- throw new SearchException( "Failed to instantiate lucene analyzer with type
" + analyzerClassName, e );
- }
-
- Iterator iter = cfg.getClassMappings();
- DirectoryProviderFactory factory = new DirectoryProviderFactory();
- while ( iter.hasNext() ) {
- PersistentClass clazz = (PersistentClass) iter.next();
- Class<?> mappedClass = clazz.getMappedClass();
- if ( mappedClass != null ) {
- XClass mappedXClass = reflectionManager.toXClass( mappedClass );
- if ( mappedXClass != null && mappedXClass.isAnnotationPresent( Indexed.class
) ) {
- DirectoryProvider provider = factory.createDirectoryProvider( mappedXClass, cfg,
this );
- //TODO move that into DirectoryProviderFactory
- if ( !lockableDirectoryProviders.containsKey( provider ) ) {
- lockableDirectoryProviders.put( provider, new ReentrantLock() );
- }
- final DocumentBuilder<Object> documentBuilder = new
DocumentBuilder<Object>(
- mappedXClass, defaultAnalyzer, provider, reflectionManager
- );
-
- documentBuilders.put( mappedClass, documentBuilder );
- }
- }
- }
+ Analyzer analyzer = initAnalyzer(cfg);
+ initDocumentBuilders(cfg, reflectionManager, analyzer);
+
Set<Class> indexedClasses = documentBuilders.keySet();
- for (DocumentBuilder builder : documentBuilders.values()) {
+ for (DocumentBuilder builder : documentBuilders.values()) {
builder.postInitialize( indexedClasses );
}
worker = WorkerFactory.createWorker( cfg, this );
@@ -161,10 +124,18 @@
public void addOptimizerStrategy(DirectoryProvider<?> provider, OptimizerStrategy
optimizerStrategy) {
dirProviderOptimizerStrategies.put( provider, optimizerStrategy );
}
+
+ public void addIndexingParmeters(DirectoryProvider<?> provider,
LuceneIndexingParameters indexingParams) {
+ dirProviderIndexingParams.put( provider, indexingParams );
+ }
public OptimizerStrategy getOptimizerStrategy(DirectoryProvider<?> provider) {
return dirProviderOptimizerStrategies.get( provider );
}
+
+ public LuceneIndexingParameters getIndexingParameters(DirectoryProvider<?>
provider ) {
+ return dirProviderIndexingParams.get( provider );
+ }
public ReaderProvider getReaderProvider() {
return readerProvider;
@@ -207,4 +178,60 @@
queue.add( new OptimizeLuceneWork( entityType ) );
getBackendQueueProcessorFactory().getProcessor( queue ).run();
}
+
+ private void initDocumentBuilders(Configuration cfg, ReflectionManager
reflectionManager, Analyzer analyzer) {
+ Iterator iter = cfg.getClassMappings();
+ DirectoryProviderFactory factory = new DirectoryProviderFactory();
+ while (iter.hasNext()) {
+ PersistentClass clazz = (PersistentClass) iter.next();
+ Class<?> mappedClass = clazz.getMappedClass();
+ if (mappedClass != null) {
+ XClass mappedXClass = reflectionManager.toXClass(mappedClass);
+ if (mappedXClass != null && mappedXClass.isAnnotationPresent(Indexed.class))
{
+ DirectoryProvider provider = factory.createDirectoryProvider(mappedXClass, cfg,
this);
+ // TODO move that into DirectoryProviderFactory
+ if (!lockableDirectoryProviders.containsKey(provider)) {
+ lockableDirectoryProviders.put(provider, new ReentrantLock());
+ }
+ final DocumentBuilder<Object> documentBuilder = new
DocumentBuilder<Object>(mappedXClass, analyzer,
+ provider, reflectionManager);
+
+ documentBuilders.put(mappedClass, documentBuilder);
+ }
+ }
+ }
+ }
+
+ /**
+ * Initilises the Lucene analyzer to use by reading the analyzer class from the
configuration and instantiating it.
+ *
+ * @param cfg
+ * The current configuration.
+ * @return The Lucene analyzer to use for tokenisation.
+ */
+ private Analyzer initAnalyzer(Configuration cfg) {
+ Class analyzerClass;
+ String analyzerClassName = cfg.getProperty(Environment.ANALYZER_CLASS);
+ if (analyzerClassName != null) {
+ try {
+ analyzerClass = ReflectHelper.classForName(analyzerClassName);
+ } catch (Exception e) {
+ throw new SearchException("Lucene analyzer class '" + analyzerClassName
+ "' defined in property '"
+ + Environment.ANALYZER_CLASS + "' could not be found.", e);
+ }
+ } else {
+ analyzerClass = StandardAnalyzer.class;
+ }
+ // Initialize analyzer
+ Analyzer defaultAnalyzer;
+ try {
+ defaultAnalyzer = (Analyzer) analyzerClass.newInstance();
+ } catch (ClassCastException e) {
+ throw new SearchException("Lucene analyzer does not implement " +
Analyzer.class.getName() + ": "
+ + analyzerClassName, e);
+ } catch (Exception e) {
+ throw new SearchException("Failed to instantiate lucene analyzer with type "
+ analyzerClassName, e);
+ }
+ return defaultAnalyzer;
+ }
}