[exo-jcr-commits] exo-jcr SVN: r5444 - in jcr/branches/1.15.x/exo.jcr.component.core/src: test/java/org/exoplatform/services/jcr/api/core/query/lucene and 1 other directory.
do-not-reply at jboss.org
do-not-reply at jboss.org
Thu Jan 12 03:38:15 EST 2012
Author: dkuleshov
Date: 2012-01-12 03:38:11 -0500 (Thu, 12 Jan 2012)
New Revision: 5444
Added:
jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/TextFieldExtractor.java
Modified:
jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/AbstractIndex.java
jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/IndexingQueue.java
jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/IndexingQueueStore.java
jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/LazyTextExtractorField.java
jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/MultiIndex.java
jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/NodeIndexer.java
jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/OfflinePersistentIndex.java
jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/PersistentIndex.java
jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/TextExtractorJob.java
jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/TextExtractorReader.java
jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/Util.java
jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/VolatileIndex.java
jcr/branches/1.15.x/exo.jcr.component.core/src/test/java/org/exoplatform/services/jcr/api/core/query/lucene/IndexingAggregateTest.java
jcr/branches/1.15.x/exo.jcr.component.core/src/test/java/org/exoplatform/services/jcr/api/core/query/lucene/IndexingQueueTest.java
jcr/branches/1.15.x/exo.jcr.component.core/src/test/java/org/exoplatform/services/jcr/api/core/query/lucene/TestAll.java
Log:
EXOJCR-1629:
Removed some unnecessary classes:
- IndexingQueue
- TextExctractorReader
and all dependenant classes and tests
Reorganized LazyTextExctractorField class to TextFieldExtractor to keep the ability to serialize fields (for clustering)
Some code cleaning
Modified: jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/AbstractIndex.java
===================================================================
--- jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/AbstractIndex.java 2012-01-12 08:07:55 UTC (rev 5443)
+++ jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/AbstractIndex.java 2012-01-12 08:38:11 UTC (rev 5444)
@@ -31,9 +31,7 @@
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintStream;
-import java.io.StringReader;
import java.util.BitSet;
-import java.util.Iterator;
/**
* Implements common functionality for a lucene index.
@@ -99,11 +97,6 @@
private ReadOnlyIndexReader readOnlyReader;
/**
- * The indexing queue.
- */
- private IndexingQueue indexingQueue;
-
- /**
* Flag that indicates whether there was an index present in the directory
* when this AbstractIndex was created.
*/
@@ -119,17 +112,15 @@
* @param cache the document number cache if this index should use
* one; otherwise <code>cache</code> is
* <code>null</code>.
- * @param indexingQueue the indexing queue.
* @throws IOException if the index cannot be initialized.
*/
- AbstractIndex(final Analyzer analyzer, Similarity similarity, final Directory directory, DocNumberCache cache,
- IndexingQueue indexingQueue) throws IOException
+ AbstractIndex(final Analyzer analyzer, Similarity similarity, final Directory directory, DocNumberCache cache)
+ throws IOException
{
this.analyzer = analyzer;
this.similarity = similarity;
this.directory = directory;
this.cache = cache;
- this.indexingQueue = indexingQueue;
AbstractIndex.this.isExisting = IndexReader.indexExists(directory);
@@ -177,8 +168,7 @@
DynamicPooledExecutor.Command[] commands = new DynamicPooledExecutor.Command[docs.length];
for (int i = 0; i < docs.length; i++)
{
- // check if text extractor completed its work
- final Document doc = getFinishedDocument(docs[i]);
+ final Document doc = docs[i];
// create a command for inverting the document
commands[i] = new DynamicPooledExecutor.Command()
{
@@ -516,64 +506,6 @@
}
}
- /**
- * Returns a document that is finished with text extraction and is ready to
- * be added to the index.
- *
- * @param doc the document to check.
- * @return <code>doc</code> if it is finished already or a stripped down
- * copy of <code>doc</code> without text extractors.
- * @throws IOException if the document cannot be added to the indexing
- * queue.
- */
- private Document getFinishedDocument(Document doc) throws IOException
- {
- if (!Util.isDocumentReady(doc))
- {
- Document copy = new Document();
- // mark the document that reindexing is required
- copy.add(new Field(FieldNames.REINDEXING_REQUIRED, "", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
- Iterator fields = doc.getFields().iterator();
- while (fields.hasNext())
- {
- Fieldable f = (Fieldable)fields.next();
- Fieldable field = null;
- Field.TermVector tv = getTermVectorParameter(f);
- Field.Store stored = getStoreParameter(f);
- Field.Index indexed = getIndexParameter(f);
- if (f instanceof LazyTextExtractorField || f.readerValue() != null)
- {
- // replace all readers with empty string reader
- field = new Field(f.name(), new StringReader(""), tv);
- }
- else if (f.stringValue() != null)
- {
- field = new Field(f.name(), f.stringValue(), stored, indexed, tv);
- }
- else if (f.isBinary())
- {
- field = new Field(f.name(), f.binaryValue(), stored);
- }
- if (field != null)
- {
- field.setOmitNorms(f.getOmitNorms());
- copy.add(field);
- }
- }
- // schedule the original document for later indexing
- Document existing = indexingQueue.addDocument(doc);
- if (existing != null)
- {
- // the queue already contained a pending document for this
- // node. -> dispose the document
- Util.disposeDocument(existing);
- }
- // use the stripped down copy for now
- doc = copy;
- }
- return doc;
- }
-
//-------------------------< properties >-----------------------------------
/**
Modified: jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/IndexingQueue.java
===================================================================
--- jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/IndexingQueue.java 2012-01-12 08:07:55 UTC (rev 5443)
+++ jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/IndexingQueue.java 2012-01-12 08:38:11 UTC (rev 5444)
@@ -1,238 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.exoplatform.services.jcr.impl.core.query.lucene;
-
-import org.apache.lucene.document.Document;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermDocs;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-
-import javax.jcr.RepositoryException;
-
-/**
- * <code>IndexingQueue</code> implements a queue which contains all the
- * documents with pending text extractor jobs.
- */
-public class IndexingQueue
-{
-
- /**
- * Logger instance for this class.
- */
- private static final Logger log = LoggerFactory.getLogger("exo.jcr.component.core.IndexingQueue");
-
- /**
- * The store to persist uuids of pending documents.
- */
- private final IndexingQueueStore queueStore;
-
- /**
- * Maps UUID {@link String}s to {@link Document}s.
- */
- private final Map pendingDocuments = new HashMap(1);
-
- /**
- * Flag that indicates whether this indexing queue had been
- * {@link #initialize(MultiIndex) initialized}.
- */
- private volatile boolean initialized = false;
-
- /**
- * Creates an indexing queue.
- *
- * @param queueStore the store where to read the pending extraction jobs.
- */
- IndexingQueue(IndexingQueueStore queueStore)
- {
- this.queueStore = queueStore;
- }
-
- /**
- * Initializes the indexing queue.
- *
- * @param index the multi index this indexing queue belongs to.
- * @throws IOException if an error occurs while reading from the index.
- */
- void initialize(final MultiIndex index) throws IOException
- {
- if (initialized)
- {
- throw new IllegalStateException("already initialized");
- }
- // check index for nodes that need to be reindexed
- CachingMultiIndexReader reader = index.getIndexReader();
- try
- {
- TermDocs tDocs = reader.termDocs(new Term(FieldNames.REINDEXING_REQUIRED, ""));
- try
- {
- while (tDocs.next())
- {
- queueStore.addUUID(reader.document(tDocs.doc(), FieldSelectors.UUID).get(FieldNames.UUID));
- }
- }
- finally
- {
- tDocs.close();
- }
- }
- finally
- {
- reader.release();
- }
- String[] uuids = queueStore.getPending();
- for (int i = 0; i < uuids.length; i++)
- {
- try
- {
- Document doc = index.createDocument(uuids[i]);
- pendingDocuments.put(uuids[i], doc);
- log.debug("added node {}. New size of indexing queue: {}", uuids[i], new Integer(pendingDocuments.size()));
- }
- catch (IllegalArgumentException e)
- {
- log.warn("Invalid UUID in indexing queue store: " + uuids[i]);
- }
- catch (RepositoryException e)
- {
- // node does not exist anymore
- log.debug("Node with uuid {} does not exist anymore", uuids[i]);
- queueStore.removeUUID(uuids[i]);
- }
- }
- initialized = true;
- }
-
- /**
- * Returns the {@link Document}s that are finished.
- *
- * @return the {@link Document}s that are finished.
- */
- public Document[] getFinishedDocuments()
- {
- checkInitialized();
- List finished = new ArrayList(1);
- synchronized (this)
- {
- finished.addAll(pendingDocuments.values());
- }
-
- Iterator it = finished.iterator();
- while (it.hasNext())
- {
- Document doc = (Document)it.next();
- if (!Util.isDocumentReady(doc))
- {
- it.remove();
- }
- }
- return (Document[])finished.toArray(new Document[finished.size()]);
- }
-
- /**
- * Removes the document with the given <code>uuid</code> from the indexing
- * queue.
- *
- * @param uuid the uuid of the document to return.
- * @return the document for the given <code>uuid</code> or <code>null</code>
- * if this queue does not contain a document with the given
- * <code>uuid</code>.
- */
- public synchronized Document removeDocument(String uuid)
- {
- checkInitialized();
- Document doc = (Document)pendingDocuments.remove(uuid);
- if (doc != null)
- {
- queueStore.removeUUID(uuid);
- log.debug("removed node {}. New size of indexing queue: {}", uuid, new Integer(pendingDocuments.size()));
- }
- return doc;
- }
-
- /**
- * Adds a document to this indexing queue.
- *
- * @param doc the document to add.
- * @return an existing document in the queue with the same uuid as the one
- * in <code>doc</code> or <code>null</code> if there was no such
- * document.
- */
- public synchronized Document addDocument(Document doc)
- {
- checkInitialized();
- String uuid = doc.get(FieldNames.UUID);
- Document existing = (Document)pendingDocuments.put(uuid, doc);
- log.debug("added node {}. New size of indexing queue: {}", uuid, new Integer(pendingDocuments.size()));
- if (existing == null)
- {
- // document wasn't present, add it to the queue store
- queueStore.addUUID(uuid);
- }
- // return existing if any
- return existing;
- }
-
- /**
- * Closes this indexing queue and disposes all pending documents.
- */
- public synchronized void close()
- {
- checkInitialized();
- // go through pending documents and close readers
- Iterator it = pendingDocuments.values().iterator();
- while (it.hasNext())
- {
- Document doc = (Document)it.next();
- Util.disposeDocument(doc);
- it.remove();
- }
- queueStore.close();
- }
-
- /**
- * Checks if this indexing queue is initialized and otherwise throws a
- * {@link IllegalStateException}.
- */
- private void checkInitialized()
- {
- if (!initialized)
- {
- throw new IllegalStateException("not initialized");
- }
- }
-
- //----------------------------< testing only >------------------------------
-
- /**
- * <b>This method is for testing only!</b>
- *
- * @return the number of the currently pending documents.
- */
- public synchronized int getNumPendingDocuments()
- {
- return pendingDocuments.size();
- }
-}
Modified: jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/IndexingQueueStore.java
===================================================================
--- jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/IndexingQueueStore.java 2012-01-12 08:07:55 UTC (rev 5443)
+++ jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/IndexingQueueStore.java 2012-01-12 08:38:11 UTC (rev 5444)
@@ -1,188 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.exoplatform.services.jcr.impl.core.query.lucene;
-
-import org.apache.lucene.store.Directory;
-import org.exoplatform.services.jcr.impl.core.query.lucene.directory.IndexInputStream;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.util.HashSet;
-import java.util.Set;
-
-/**
- * <code>IndexingQueueStore</code> implements a store that keeps the uuids of
- * nodes that are pending in the indexing queue. Until Jackrabbit 1.4 this store
- * was also persisted to disk. Starting with 1.5 the pending
- * nodes are marked directly in the index with a special field.
- * See {@link FieldNames#REINDEXING_REQUIRED}.
- */
-class IndexingQueueStore
-{
-
- /**
- * The logger instance for this class.
- */
- private static final Logger log = LoggerFactory.getLogger("exo.jcr.component.core.IndexingQueueStore");
-
- /**
- * Encoding of the indexing queue store.
- */
- private static final String ENCODING = "UTF-8";
-
- /**
- * Operation identifier for an added node.
- */
- private static final String ADD = "ADD";
-
- /**
- * Operation identifier for an removed node.
- */
- private static final String REMOVE = "REMOVE";
-
- /**
- * Name of the file that contains the indexing queue log.
- */
- private static final String INDEXING_QUEUE_FILE = "indexing_queue.log";
-
- /**
- * The UUID Strings of the pending documents.
- */
- private final Set pending = new HashSet();
-
- /**
- * The directory from where to read pending document UUIDs.
- */
- private final Directory dir;
-
- /**
- * Creates a new <code>IndexingQueueStore</code> using the given directory.
- *
- * @param directory the directory to use.
- * @throws IOException if an error ocurrs while reading pending UUIDs.
- */
- IndexingQueueStore(Directory directory) throws IOException
- {
- this.dir = directory;
- readStore();
- }
-
- /**
- * @return the UUIDs of the pending text extraction jobs.
- */
- public String[] getPending()
- {
- return (String[])pending.toArray(new String[pending.size()]);
- }
-
- /**
- * Adds a <code>uuid</code> to the store.
- *
- * @param uuid the uuid to add.
- */
- public void addUUID(String uuid)
- {
- pending.add(uuid);
- }
-
- /**
- * Removes a <code>uuid</code> from the store.
- *
- * @param uuid the uuid to add.
- */
- public void removeUUID(String uuid)
- {
- pending.remove(uuid);
- }
-
- /**
- * Closes this queue store.
- */
- public void close()
- {
- if (pending.isEmpty())
- {
- try
- {
- if (dir.fileExists(INDEXING_QUEUE_FILE))
- {
- dir.deleteFile(INDEXING_QUEUE_FILE);
- }
- }
- catch (IOException e)
- {
- log.warn("unable to delete " + INDEXING_QUEUE_FILE);
- }
- }
- }
-
- //----------------------------< internal >----------------------------------
-
- /**
- * Reads all pending UUIDs from the file and puts them into {@link
- * #pending}.
- *
- * @throws IOException if an error occurs while reading.
- */
- private void readStore() throws IOException
- {
- if (dir.fileExists(INDEXING_QUEUE_FILE))
- {
- InputStream in = new IndexInputStream(dir.openInput(INDEXING_QUEUE_FILE));
- BufferedReader reader = new BufferedReader(new InputStreamReader(in, ENCODING));
- try
- {
- String line;
- while ((line = reader.readLine()) != null)
- {
- int idx = line.indexOf(' ');
- if (idx == -1)
- {
- // invalid line
- log.warn("invalid line in {}: {}", INDEXING_QUEUE_FILE, line);
- }
- else
- {
- String cmd = line.substring(0, idx);
- String uuid = line.substring(idx + 1, line.length());
- if (ADD.equals(cmd))
- {
- pending.add(uuid);
- }
- else if (REMOVE.equals(cmd))
- {
- pending.remove(uuid);
- }
- else
- {
- // invalid line
- log.warn("invalid line in {}: {}", INDEXING_QUEUE_FILE, line);
- }
- }
- }
- }
- finally
- {
- in.close();
- }
- }
- }
-}
Modified: jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/LazyTextExtractorField.java
===================================================================
--- jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/LazyTextExtractorField.java 2012-01-12 08:07:55 UTC (rev 5443)
+++ jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/LazyTextExtractorField.java 2012-01-12 08:38:11 UTC (rev 5444)
@@ -1,167 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.exoplatform.services.jcr.impl.core.query.lucene;
-
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.document.AbstractField;
-import org.apache.lucene.document.Field;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.io.Reader;
-
-/**
- * <code>LazyTextExtractorField</code> implements a Lucene field with a String
- * value that is lazily initialized from a given {@link Reader}. In addition
- * this class provides a method to find out whether the purpose of the reader
- * is to extract text and whether the extraction process is already finished.
- *
- * @see #isExtractorFinished()
- */
-public class LazyTextExtractorField extends AbstractField
-{
-
- /**
- * The serial version UID.
- */
- private static final long serialVersionUID = -2707986404659820071L;
-
- /**
- * The logger instance for this class.
- */
- private static final Logger log = LoggerFactory.getLogger("exo.jcr.component.core.LazyTextExtractorField");
-
- /**
- * The reader from where to read the text extract.
- */
- private final Reader reader;
-
- /**
- * The extract as obtained lazily from {@link #reader}.
- */
- volatile private String extract;
-
- /**
- * Creates a new <code>LazyTextExtractorField</code> with the given
- * <code>name</code>.
- *
- * @param name the name of the field.
- * @param reader the reader where to obtain the string from.
- * @param store when set <code>true</code> the string value is stored in the
- * index.
- * @param withOffsets when set <code>true</code> a term vector with offsets
- * is written into the index.
- */
- public LazyTextExtractorField(String name, Reader reader, boolean store, boolean withOffsets)
- {
- super(name, store ? Field.Store.YES : Field.Store.NO, Field.Index.ANALYZED, withOffsets
- ? Field.TermVector.WITH_OFFSETS : Field.TermVector.NO);
- this.reader = reader;
- }
-
- /**
- * @return the string value of this field.
- */
- public String stringValue()
- {
- if (extract == null)
- {
- synchronized (this)
- {
- if (extract == null)
- {
- StringBuffer textExtract = new StringBuffer();
- char[] buffer = new char[1024];
- int len;
- try
- {
- while ((len = reader.read(buffer)) > -1)
- {
- textExtract.append(buffer, 0, len);
- }
- }
- catch (IOException e)
- {
- log.warn("Exception reading value for field: " + e.getMessage());
- log.debug("Dump:", e);
- }
- finally
- {
- try
- {
- reader.close();
- }
- catch (IOException e)
- {
- log.error(e.getLocalizedMessage(), e);
- }
- }
- extract = textExtract.toString();
- }
- }
- }
- return extract;
- }
-
- /**
- * @return always <code>null</code>.
- */
- public Reader readerValue()
- {
- return null;
- }
-
- /**
- * @return always <code>null</code>.
- */
- public byte[] binaryValue()
- {
- return null;
- }
-
- /**
- * @return always <code>null</code>.
- */
- public TokenStream tokenStreamValue()
- {
- return null;
- }
-
- /**
- * @return <code>true</code> if the underlying reader is ready to provide
- * extracted text.
- */
- public boolean isExtractorFinished()
- {
- if (reader instanceof TextExtractorReader)
- {
- return ((TextExtractorReader)reader).isExtractorFinished();
- }
- return true;
- }
-
- /**
- * Disposes this field and closes the underlying reader.
- *
- * @throws IOException if an error occurs while closing the reader.
- */
- public void dispose() throws IOException
- {
- reader.close();
- }
-}
Modified: jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/MultiIndex.java
===================================================================
--- jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/MultiIndex.java 2012-01-12 08:07:55 UTC (rev 5443)
+++ jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/MultiIndex.java 2012-01-12 08:38:11 UTC (rev 5444)
@@ -217,11 +217,6 @@
private RedoLog redoLog = null;
/**
- * The indexing queue with pending text extraction jobs.
- */
- private IndexingQueue indexingQueue;
-
- /**
* Set<NodeId> of uuids that should not be indexed.
*/
private final IndexingTree indexingTree;
@@ -332,20 +327,13 @@
continue;
}
PersistentIndex index =
- new PersistentIndex(name, handler.getTextAnalyzer(), handler.getSimilarity(), cache, indexingQueue,
- directoryManager);
+ new PersistentIndex(name, handler.getTextAnalyzer(), handler.getSimilarity(), cache, directoryManager);
index.setMaxFieldLength(handler.getMaxFieldLength());
index.setUseCompoundFile(handler.getUseCompoundFile());
index.setTermInfosIndexDivisor(handler.getTermInfosIndexDivisor());
indexes.add(index);
}
- // this method is run in privileged mode internally
- IndexingQueueStore store = new IndexingQueueStore(indexDir);
-
- // initialize indexing queue
- this.indexingQueue = new IndexingQueue(store);
-
// init volatile index
resetVolatileIndex();
@@ -360,7 +348,6 @@
{
reader.release();
}
- indexingQueue.initialize(this);
if (modeHandler.getMode() == IndexerIoMode.READ_WRITE)
{
// will also initialize IndexMerger
@@ -1082,7 +1069,7 @@
try
{
index =
- new PersistentIndex(indexName, handler.getTextAnalyzer(), handler.getSimilarity(), cache, indexingQueue,
+ new PersistentIndex(indexName, handler.getTextAnalyzer(), handler.getSimilarity(), cache,
directoryManager);
}
catch (IOException e)
@@ -1391,9 +1378,6 @@
(indexes.get(i)).close();
}
- // close indexing queue
- indexingQueue.close();
-
// finally close directory
try
{
@@ -1438,16 +1422,6 @@
}
/**
- * Returns the indexing queue for this multi index.
- *
- * @return the indexing queue for this multi index.
- */
- public IndexingQueue getIndexingQueue()
- {
- return indexingQueue;
- }
-
- /**
* Returns a lucene Document for the <code>node</code>.
*
* @param node
@@ -1680,8 +1654,6 @@
@Override
public void run()
{
- // check if there are any indexing jobs finished
- checkIndexingQueue();
// check if volatile index should be flushed
checkFlush();
}
@@ -1696,7 +1668,7 @@
*/
private void resetVolatileIndex() throws IOException
{
- volatileIndex = new VolatileIndex(handler.getTextAnalyzer(), handler.getSimilarity(), indexingQueue);
+ volatileIndex = new VolatileIndex(handler.getTextAnalyzer(), handler.getSimilarity());
volatileIndex.setUseCompoundFile(handler.getUseCompoundFile());
volatileIndex.setMaxFieldLength(handler.getMaxFieldLength());
volatileIndex.setBufferSize(handler.getBufferSize());
@@ -1847,10 +1819,6 @@
synchronized (this)
{
- if (count.get() % 10 == 0)
- {
- checkIndexingQueue(true);
- }
checkVolatileCommit();
}
@@ -1977,10 +1945,6 @@
synchronized (this)
{
- if (count.get() % 10 == 0)
- {
- checkIndexingQueue(true);
- }
checkVolatileCommit();
}
}
@@ -2119,76 +2083,6 @@
}
}
- /**
- * Checks the indexing queue for finished text extrator jobs and updates the
- * index accordingly if there are any new ones. This method is synchronized
- * and should only be called by the timer task that periodically checks if
- * there are documents ready in the indexing queue. A new transaction is
- * used when documents are transfered from the indexing queue to the index.
- */
- private synchronized void checkIndexingQueue()
- {
- checkIndexingQueue(false);
- }
-
- /**
- * Checks the indexing queue for finished text extrator jobs and updates the
- * index accordingly if there are any new ones.
- *
- * @param transactionPresent
- * whether a transaction is in progress and the current
- * {@link #getTransactionId()} should be used. If
- * <code>false</code> a new transaction is created when documents
- * are transfered from the indexing queue to the index.
- */
- private void checkIndexingQueue(boolean transactionPresent)
- {
- // EXOJCR-1337, have been commented since it is not used
- // Document[] docs = indexingQueue.getFinishedDocuments();
- // Map<String, Document> finished = new HashMap<String, Document>();
- // for (int i = 0; i < docs.length; i++)
- // {
- // String uuid = docs[i].get(FieldNames.UUID);
- // finished.put(uuid, docs[i]);
- // }
- //
- // // now update index with the remaining ones if there are any
- // if (!finished.isEmpty())
- // {
- // log.info("updating index with {} nodes from indexing queue.", new Long(finished.size()));
- //
- // // remove documents from the queue
- // for (Iterator<String> it = finished.keySet().iterator(); it.hasNext();)
- // {
- // indexingQueue.removeDocument(it.next().toString());
- // }
- //
- // try
- // {
- // if (transactionPresent)
- // {
- // for (Iterator<String> it = finished.keySet().iterator(); it.hasNext();)
- // {
- // executeAndLog(new DeleteNode(getTransactionId(), it.next()));
- // }
- // for (Iterator<Document> it = finished.values().iterator(); it.hasNext();)
- // {
- // executeAndLog(new AddNode(getTransactionId(), it.next()));
- // }
- // }
- // else
- // {
- // update(finished.keySet(), finished.values());
- // }
- // }
- // catch (IOException e)
- // {
- // // update failed
- // log.warn("Failed to update index with deferred text extraction", e);
- // }
- // }
- }
-
// ------------------------< Actions
// >---------------------------------------
@@ -3001,13 +2895,6 @@
public void execute(MultiIndex index) throws IOException
{
String uuidString = uuid.toString();
- // check if indexing queue is still working on
- // this node from a previous update
- Document doc = index.indexingQueue.removeDocument(uuidString);
- if (doc != null)
- {
- Util.disposeDocument(doc);
- }
Term idTerm = new Term(FieldNames.UUID, uuidString);
// if the document cannot be deleted from the volatile index
// delete it from one of the persistent indexes.
@@ -3363,7 +3250,7 @@
continue;
}
PersistentIndex index =
- new PersistentIndex(name, handler.getTextAnalyzer(), handler.getSimilarity(), cache, indexingQueue,
+ new PersistentIndex(name, handler.getTextAnalyzer(), handler.getSimilarity(), cache,
directoryManager);
index.setMaxFieldLength(handler.getMaxFieldLength());
index.setUseCompoundFile(handler.getUseCompoundFile());
@@ -3456,8 +3343,7 @@
merger = null;
}
offlineIndex =
- new OfflinePersistentIndex(handler.getTextAnalyzer(), handler.getSimilarity(), cache, indexingQueue,
- directoryManager);
+ new OfflinePersistentIndex(handler.getTextAnalyzer(), handler.getSimilarity(), cache, directoryManager);
if (modeHandler.getMode() == IndexerIoMode.READ_WRITE)
{
flush();
Modified: jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/NodeIndexer.java
===================================================================
--- jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/NodeIndexer.java 2012-01-12 08:07:55 UTC (rev 5443)
+++ jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/NodeIndexer.java 2012-01-12 08:38:11 UTC (rev 5444)
@@ -884,11 +884,11 @@
{
if (supportHighlighting)
{
- return new LazyTextExtractorField(FieldNames.FULLTEXT, value, true, true);
+ return new TextFieldExtractor(FieldNames.FULLTEXT, value, true, true);
}
else
{
- return new LazyTextExtractorField(FieldNames.FULLTEXT, value, false, false);
+ return new TextFieldExtractor(FieldNames.FULLTEXT, value, false, false);
}
}
Modified: jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/OfflinePersistentIndex.java
===================================================================
--- jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/OfflinePersistentIndex.java 2012-01-12 08:07:55 UTC (rev 5443)
+++ jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/OfflinePersistentIndex.java 2012-01-12 08:38:11 UTC (rev 5444)
@@ -50,15 +50,14 @@
* @param analyzer the analyzer for text tokenizing.
* @param similarity the similarity implementation.
* @param cache the document number cache
- * @param indexingQueue the indexing queue.
* @param directoryManager the directory manager.
* @throws IOException if an error occurs while opening / creating the
* index.
*/
- OfflinePersistentIndex(Analyzer analyzer, Similarity similarity, DocNumberCache cache, IndexingQueue indexingQueue,
+ OfflinePersistentIndex(Analyzer analyzer, Similarity similarity, DocNumberCache cache,
DirectoryManager directoryManager) throws IOException
{
- super(NAME, analyzer, similarity, cache, indexingQueue, directoryManager);
+ super(NAME, analyzer, similarity, cache, directoryManager);
this.processedIDs = new ArrayList<String>();
}
Modified: jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/PersistentIndex.java
===================================================================
--- jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/PersistentIndex.java 2012-01-12 08:07:55 UTC (rev 5443)
+++ jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/PersistentIndex.java 2012-01-12 08:38:11 UTC (rev 5444)
@@ -50,15 +50,14 @@
* @param analyzer the analyzer for text tokenizing.
* @param similarity the similarity implementation.
* @param cache the document number cache
- * @param indexingQueue the indexing queue.
* @param directoryManager the directory manager.
* @throws IOException if an error occurs while opening / creating the
* index.
*/
PersistentIndex(String name, Analyzer analyzer, Similarity similarity, DocNumberCache cache,
- IndexingQueue indexingQueue, final DirectoryManager directoryManager) throws IOException
+ final DirectoryManager directoryManager) throws IOException
{
- super(analyzer, similarity, directoryManager.getDirectory(name), cache, indexingQueue);
+ super(analyzer, similarity, directoryManager.getDirectory(name), cache);
this.name = name;
if (isExisting())
{
Modified: jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/TextExtractorJob.java
===================================================================
--- jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/TextExtractorJob.java 2012-01-12 08:07:55 UTC (rev 5443)
+++ jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/TextExtractorJob.java 2012-01-12 08:38:11 UTC (rev 5444)
@@ -1,274 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.exoplatform.services.jcr.impl.core.query.lucene;
-
-import EDU.oswego.cs.dl.util.concurrent.Callable;
-import EDU.oswego.cs.dl.util.concurrent.FutureResult;
-
-import org.exoplatform.commons.utils.PrivilegedFileHelper;
-import org.exoplatform.services.document.DocumentReader;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.OutputStreamWriter;
-import java.io.Reader;
-import java.io.StringReader;
-import java.io.Writer;
-import java.lang.reflect.InvocationTargetException;
-
-/**
- * <code>TextExtractorJob</code> implements a future result and is runnable
- * in a background thread.
- */
-public class TextExtractorJob extends FutureResult implements Runnable
-{
-
- /**
- * UTF-8 encoding.
- */
- private static final String ENCODING_UTF8 = "UTF-8";
-
- /**
- * The logger instance for this class.
- */
- private static final Logger log = LoggerFactory.getLogger("exo.jcr.component.core.TextExtractorJob");
-
- /**
- * The command of the future result.
- */
- private final Runnable cmd;
-
- /**
- * The mime type of the resource to extract text from.
- */
- private final String type;
-
- /**
- * Set to <code>true</code> if this job timed out.
- */
- private transient boolean timedOut = false;
-
- /**
- * <code>true</code> if this extractor job has been flaged as discarded.
- */
- private transient boolean discarded = false;
-
- /**
- * Creates a new <code>TextExtractorJob</code> with the given
- * <code>extractor</code> on the <code>stream</code>.
- *
- * @param extractor the text extractor
- * @param stream the stream of the binary property.
- * @param type the mime-type of the binary content.
- * @param encoding the encoding of the binary content. May be
- * <code>null</code>.
- */
- public TextExtractorJob(final DocumentReader extractor, final InputStream stream, final String type,
- final String encoding)
- {
- this.type = type;
- this.cmd = setter(new Callable()
- {
- public Object call() throws Exception
- {
- Reader r = new StringReader(extractor.getContentAsText(stream, encoding));
- if (r != null)
- {
- if (discarded)
- {
- r.close();
- r = null;
- }
- else if (timedOut)
- {
- // spool a temp file to save memory
- r = getSwappedOutReader(r);
- }
- }
- return r;
- }
- });
- }
-
- /**
- * Returns the reader with the extracted text from the input stream passed
- * to the constructor of this <code>TextExtractorJob</code>. The caller of
- * this method is responsible for closing the returned reader. Returns
- * <code>null</code> if a <code>timeout</code>occurs while waiting for the
- * text extractor to get the reader.
- *
- * @return the Reader with the extracted text. Returns <code>null</code> if
- * a timeout or an exception occured extracting the text.
- */
- public Reader getReader(long timeout)
- {
- Reader reader = null;
- try
- {
- reader = (Reader)timedGet(timeout);
- }
- catch (InterruptedException e)
- {
- // also covers TimeoutException
- // text not extracted within timeout or interrupted
- if (timeout > 0)
- {
- log.debug("Text extraction for {} timed out (>{}ms).", type, new Long(timeout));
- timedOut = true;
- }
- }
- catch (InvocationTargetException e)
- {
- // extraction failed
- log.warn("Exception while indexing binary property: " + e.getCause());
- log.debug("Dump: ", e.getCause());
- }
- return reader;
- }
-
- /**
- * Discards this extractor job. If the reader within this job is ready at
- * the time of this call, it is closed. If the reader is not yet ready this
- * job will be flaged as discarded and any later call to
- * {@link #getReader(long)} will return <code>null</code>. The reader that
- * is about to be constructed by a background thread will be closed
- * automatically as soon as it becomes ready.
- */
- void discard()
- {
- discarded = true;
- Reader r = (Reader)peek();
- if (r != null)
- {
- try
- {
- r.close();
- }
- catch (IOException e)
- {
- log.warn("Exception when trying to discard extractor job: " + e);
- }
- }
- }
-
- /**
- * @return a String description for this job with the mime type.
- */
- @Override
- public String toString()
- {
- return "TextExtractorJob for " + type;
- }
-
- //----------------------------< Runnable >----------------------------------
-
- /**
- * Runs the actual text extraction.
- */
- public void run()
- {
- // forward to command
- cmd.run();
- }
-
- //----------------------------< internal >----------------------------------
-
- /**
- * Returns a <code>Reader</code> for <code>r</code> using a temp file.
- *
- * @param r the reader to swap out into a temp file.
- * @return a reader to the temp file.
- */
- private Reader getSwappedOutReader(Reader r)
- {
- final File temp;
- try
- {
- temp = PrivilegedFileHelper.createTempFile("extractor", null);
- }
- catch (IOException e)
- {
- // unable to create temp file
- // return reader as is
- return r;
- }
- Writer out;
- try
- {
- out = new BufferedWriter(new OutputStreamWriter(PrivilegedFileHelper.fileOutputStream(temp), ENCODING_UTF8));
- }
- catch (IOException e)
- {
- // should never happend actually
- if (!temp.delete())
- {
- PrivilegedFileHelper.deleteOnExit(temp);
- }
- return r;
- }
-
- // spool into temp file
- InputStream in = null;
- try
- {
- try
- {
- //IOUtils.copy(r, out);
- out.close();
- }
- finally
- {
- r.close();
- }
- //in = new LazyFileInputStream(temp);
-
- return new InputStreamReader(in, ENCODING_UTF8)
- {
- @Override
- public void close() throws IOException
- {
- super.close();
- // delete file
- if (!temp.delete())
- {
- PrivilegedFileHelper.deleteOnExit(temp);
- }
- }
- };
- }
- catch (IOException e)
- {
- // do some clean up
- //IOUtils.closeQuietly(out);
- //IOUtils.closeQuietly(in);
- //out.close();
- //in.close();
-
- if (!temp.delete())
- {
- PrivilegedFileHelper.deleteOnExit(temp);
- }
- // use empty string reader as fallback
- return new StringReader("");
- }
- }
-}
Modified: jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/TextExtractorReader.java
===================================================================
--- jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/TextExtractorReader.java 2012-01-12 08:07:55 UTC (rev 5443)
+++ jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/TextExtractorReader.java 2012-01-12 08:38:11 UTC (rev 5444)
@@ -1,149 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.exoplatform.services.jcr.impl.core.query.lucene;
-
-import java.io.IOException;
-import java.io.Reader;
-import java.io.StringReader;
-
-import EDU.oswego.cs.dl.util.concurrent.DirectExecutor;
-import EDU.oswego.cs.dl.util.concurrent.Executor;
-
-/**
- * <code>TextExtractorReader</code> implements a specialized reader that runs
- * the text extractor in a background thread.
- */
-class TextExtractorReader extends Reader {
-
- /**
- * A direct executor in case text extraction is requested for immediate use.
- */
- private static final Executor DIRECT_EXECUTOR = new DirectExecutor();
-
- /**
- * Reference to the extracted text. This reference is initially
- * <code>null</code> and later set to a valid reader when the text extractor
- * finished its work.
- */
- private Reader extractedText;
-
- /**
- * The extractor job.
- */
- private TextExtractorJob job;
-
- /**
- * The pooled executor.
- */
- private final Executor executor;
-
- /**
- * The timeout in milliseconds to wait at most for the text extractor
- * when {@link #isExtractorFinished()} is called.
- */
- private final long timeout;
-
- /**
- * Set to <code>true</code> when the text extractor job has been started
- * and is running.
- */
- private boolean jobStarted = false;
-
- /**
- * Creates a new <code>TextExtractorReader</code> with the given
- * <code>job</code>.
- *
- * @param job the extractor job.
- * @param executor the executor to use when text extraction is requested.
- * @param timeout the timeout to wait at most for the text extractor.
- */
- TextExtractorReader(TextExtractorJob job, Executor executor, long timeout) {
- this.job = job;
- this.executor = executor;
- this.timeout = timeout;
- }
-
- /**
- * Closes this reader and discards the contained {@link TextExtractorJob}.
- *
- * @throws IOException if an error occurs while closing this reader.
- */
- public void close() throws IOException {
- if (extractedText != null) {
- extractedText.close();
- }
- if (jobStarted) {
- job.discard();
- }
- }
-
- /**
- * {@inheritDoc}
- */
- public int read(char[] cbuf, int off, int len) throws IOException {
- if (extractedText == null) {
- // no reader present
- // check if job is started already
- if (jobStarted) {
- // wait until available
- extractedText = job.getReader(Long.MAX_VALUE);
- } else {
- // execute with current thread
- try {
- DIRECT_EXECUTOR.execute(job);
- } catch (InterruptedException e) {
- // current thread is in interrupted state
- // -> ignore (job will not return a reader, which is fine)
- }
- extractedText = job.getReader(0);
- }
-
- if (extractedText == null) {
- // exception occurred
- extractedText = new StringReader("");
- }
- }
- return extractedText.read(cbuf, off, len);
- }
-
- /**
- * @return <code>true</code> if the text extractor within this reader has
- * finished its work and this reader will return extracted text.
- */
- public boolean isExtractorFinished() {
- if (!jobStarted) {
- try {
- executor.execute(job);
- jobStarted = true;
- } catch (InterruptedException e) {
- // this thread is in interrupted state
- return false;
- }
- extractedText = job.getReader(timeout);
- } else {
- // job is already running, check for immediate result
- extractedText = job.getReader(0);
- }
-
- if (extractedText == null && job.getException() != null) {
- // exception occurred
- extractedText = new StringReader("");
- }
-
- return extractedText != null;
- }
-}
Added: jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/TextFieldExtractor.java
===================================================================
--- jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/TextFieldExtractor.java (rev 0)
+++ jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/TextFieldExtractor.java 2012-01-12 08:38:11 UTC (rev 5444)
@@ -0,0 +1,146 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.exoplatform.services.jcr.impl.core.query.lucene;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.document.AbstractField;
+import org.apache.lucene.document.Field;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.io.Reader;
+
+public class TextFieldExtractor extends AbstractField
+{
+
+ /**
+ * The serial version UID.
+ */
+ private static final long serialVersionUID = -2707986404659820071L;
+
+ /**
+ * The logger instance for this class.
+ */
+ private static final Logger log = LoggerFactory.getLogger("exo.jcr.component.core.TextFieldExtractor");
+
+ /**
+ * The reader from where to read the text extract.
+ */
+ private final Reader reader;
+
+ /**
+ * The extract as obtained lazily from {@link #reader}.
+ */
+ volatile private String extract;
+
+ /**
+ * Creates a new <code>TextFieldExtractor</code> with the given
+ * <code>name</code>.
+ *
+ * @param name the name of the field.
+ * @param reader the reader where to obtain the string from.
+ * @param store when set <code>true</code> the string value is stored in the
+ * index.
+ * @param withOffsets when set <code>true</code> a term vector with offsets
+ * is written into the index.
+ */
+ public TextFieldExtractor(String name, Reader reader, boolean store, boolean withOffsets)
+ {
+ super(name, store ? Field.Store.YES : Field.Store.NO, Field.Index.ANALYZED, withOffsets
+ ? Field.TermVector.WITH_OFFSETS : Field.TermVector.NO);
+ this.reader = reader;
+ }
+
+ /**
+ * @return the string value of this field.
+ */
+ public String stringValue()
+ {
+ if (extract == null)
+ {
+ synchronized (this)
+ {
+ if (extract == null)
+ {
+ StringBuffer textExtract = new StringBuffer();
+ char[] buffer = new char[1024];
+ int len;
+ try
+ {
+ while ((len = reader.read(buffer)) > -1)
+ {
+ textExtract.append(buffer, 0, len);
+ }
+ }
+ catch (IOException e)
+ {
+ log.warn("Exception reading value for field: " + e.getMessage());
+ log.debug("Dump:", e);
+ }
+ finally
+ {
+ try
+ {
+ reader.close();
+ }
+ catch (IOException e)
+ {
+ log.error(e.getLocalizedMessage(), e);
+ }
+ }
+ extract = textExtract.toString();
+ }
+ }
+ }
+ return extract;
+ }
+
+ /**
+ * @return always <code>null</code>.
+ */
+ public Reader readerValue()
+ {
+ return null;
+ }
+
+ /**
+ * @return always <code>null</code>.
+ */
+ public byte[] binaryValue()
+ {
+ return null;
+ }
+
+ /**
+ * @return always <code>null</code>.
+ */
+ public TokenStream tokenStreamValue()
+ {
+ return null;
+ }
+
+ /**
+ * Disposes this field and closes the underlying reader.
+ *
+ * @throws IOException if an error occurs while closing the reader.
+ */
+ public void dispose() throws IOException
+ {
+ reader.close();
+ }
+}
Modified: jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/Util.java
===================================================================
--- jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/Util.java 2012-01-12 08:07:55 UTC (rev 5443)
+++ jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/Util.java 2012-01-12 08:38:11 UTC (rev 5444)
@@ -66,9 +66,9 @@
{
f.readerValue().close();
}
- else if (f instanceof LazyTextExtractorField)
+ else if (f instanceof TextFieldExtractor)
{
- LazyTextExtractorField field = (LazyTextExtractorField)f;
+ TextFieldExtractor field = (TextFieldExtractor)f;
field.dispose();
}
}
@@ -80,31 +80,6 @@
}
/**
- * Returns <code>true</code> if the document is ready to be added to the
- * index. That is all text extractors have finished their work.
- *
- * @param doc the document to check.
- * @return <code>true</code> if the document is ready; <code>false</code>
- * otherwise.
- */
- public static boolean isDocumentReady(Document doc)
- {
- for (Iterator it = doc.getFields().iterator(); it.hasNext();)
- {
- Fieldable f = (Fieldable)it.next();
- if (f instanceof LazyTextExtractorField)
- {
- LazyTextExtractorField field = (LazyTextExtractorField)f;
- if (!field.isExtractorFinished())
- {
- return false;
- }
- }
- }
- return true;
- }
-
- /**
* Depending on the index format this method returns a query that matches
* all nodes that have a property with a given <code>name</code>.
*
Modified: jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/VolatileIndex.java
===================================================================
--- jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/VolatileIndex.java 2012-01-12 08:07:55 UTC (rev 5443)
+++ jcr/branches/1.15.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/VolatileIndex.java 2012-01-12 08:38:11 UTC (rev 5444)
@@ -57,12 +57,11 @@
*
* @param analyzer the analyzer to use.
* @param similarity the similarity implementation.
- * @param indexingQueue the indexing queue.
* @throws IOException if an error occurs while opening the index.
*/
- VolatileIndex(Analyzer analyzer, Similarity similarity, IndexingQueue indexingQueue) throws IOException
+ VolatileIndex(Analyzer analyzer, Similarity similarity) throws IOException
{
- super(analyzer, similarity, new RAMDirectory(), null, indexingQueue);
+ super(analyzer, similarity, new RAMDirectory(), null);
}
/**
Modified: jcr/branches/1.15.x/exo.jcr.component.core/src/test/java/org/exoplatform/services/jcr/api/core/query/lucene/IndexingAggregateTest.java
===================================================================
--- jcr/branches/1.15.x/exo.jcr.component.core/src/test/java/org/exoplatform/services/jcr/api/core/query/lucene/IndexingAggregateTest.java 2012-01-12 08:07:55 UTC (rev 5443)
+++ jcr/branches/1.15.x/exo.jcr.component.core/src/test/java/org/exoplatform/services/jcr/api/core/query/lucene/IndexingAggregateTest.java 2012-01-12 08:38:11 UTC (rev 5444)
@@ -17,23 +17,21 @@
package org.exoplatform.services.jcr.api.core.query.lucene;
import org.exoplatform.services.jcr.api.core.query.AbstractIndexingTest;
-import org.exoplatform.services.jcr.impl.core.query.lucene.IndexingQueue;
-import org.exoplatform.services.jcr.impl.core.query.lucene.SearchIndex;
-import javax.jcr.RepositoryException;
-import javax.jcr.Node;
-import javax.jcr.query.Query;
-
+import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
-import java.io.Writer;
import java.io.OutputStreamWriter;
-import java.io.ByteArrayInputStream;
-import java.util.Calendar;
-import java.util.List;
+import java.io.Writer;
import java.util.ArrayList;
+import java.util.Calendar;
import java.util.Collections;
import java.util.Iterator;
+import java.util.List;
+import javax.jcr.Node;
+import javax.jcr.RepositoryException;
+import javax.jcr.query.Query;
+
/**
* <code>IndexingAggregateTest</code> checks if the nt:file nt:resource
* aggregate defined in workspace indexing-test works properly.
@@ -60,7 +58,6 @@
resource.setProperty("jcr:data", new ByteArrayInputStream(out.toByteArray()));
testRootNode.save();
- waitUntilQueueEmpty();
executeSQLQuery(sqlDog, new Node[]{file});
@@ -70,7 +67,6 @@
writer.flush();
resource.setProperty("jcr:data", new ByteArrayInputStream(out.toByteArray()));
testRootNode.save();
- waitUntilQueueEmpty();
executeSQLQuery(sqlCat, new Node[]{file});
@@ -102,20 +98,6 @@
// executeSQLQuery(sqlCat, new Node[]{file});
}
- protected void waitUntilQueueEmpty() throws Exception
- {
- SearchIndex index = (SearchIndex)getQueryHandler();
- IndexingQueue queue = index.getIndex().getIndexingQueue();
- index.getIndex().flush();
- synchronized (index.getIndex())
- {
- while (queue.getNumPendingDocuments() > 0)
- {
- index.getIndex().wait(50);
- }
- }
- }
-
public void testContentLastModified() throws RepositoryException
{
List expected = new ArrayList();
Modified: jcr/branches/1.15.x/exo.jcr.component.core/src/test/java/org/exoplatform/services/jcr/api/core/query/lucene/IndexingQueueTest.java
===================================================================
--- jcr/branches/1.15.x/exo.jcr.component.core/src/test/java/org/exoplatform/services/jcr/api/core/query/lucene/IndexingQueueTest.java 2012-01-12 08:07:55 UTC (rev 5443)
+++ jcr/branches/1.15.x/exo.jcr.component.core/src/test/java/org/exoplatform/services/jcr/api/core/query/lucene/IndexingQueueTest.java 2012-01-12 08:38:11 UTC (rev 5444)
@@ -1,245 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.exoplatform.services.jcr.api.core.query.lucene;
-
-
-
-import org.exoplatform.services.jcr.api.core.query.AbstractIndexingTest;
-
-import org.exoplatform.services.jcr.impl.core.RepositoryImpl;
-import org.exoplatform.services.jcr.impl.core.query.lucene.IndexingQueue;
-import org.exoplatform.services.jcr.impl.core.query.lucene.SearchIndex;
-
-import javax.jcr.Node;
-import javax.jcr.NodeIterator;
-import javax.jcr.RepositoryException;
-import javax.jcr.query.Query;
-import java.io.Reader;
-import java.io.InputStream;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.io.ByteArrayInputStream;
-import java.io.File;
-import java.io.FilenameFilter;
-import java.util.Calendar;
-
-/**
- * <code>IndexingQueueTest</code> checks if the indexing queue properly indexes
- * nodes in a background thread when text extraction takes more than 10 ms. See
- * the workspace.xml file for the indexing-test workspace.
- */
-public class IndexingQueueTest extends AbstractIndexingTest {
-
- private static final File TEMP_DIR = new File(System.getProperty("java.io.tmpdir"));
-
- private static final String CONTENT_TYPE = "application/indexing-queue-test";
-
- private static final String ENCODING = "UTF-8";
- public void testname() throws Exception
- {
-
- }
-//
-// public void testQueue() throws Exception {
-// Extractor.sleepTime = 200;
-// SearchIndex index = (SearchIndex) getQueryHandler();
-// IndexingQueue queue = index.getIndex().getIndexingQueue();
-//
-// assertEquals(0, queue.getNumPendingDocuments());
-//
-// String text = "the quick brown fox jumps over the lazy dog.";
-// InputStream in = new ByteArrayInputStream(text.getBytes(ENCODING));
-// Node resource = testRootNode.addNode(nodeName1, "nt:resource");
-// resource.setProperty("jcr:data", in);
-// resource.setProperty("jcr:lastModified", Calendar.getInstance());
-// resource.setProperty("jcr:mimeType", CONTENT_TYPE);
-// resource.setProperty("jcr:encoding", ENCODING);
-// session.save();
-//
-// assertEquals(1, queue.getNumPendingDocuments());
-//
-// Query q = qm.createQuery(testPath + "/*[jcr:contains(., 'fox')]", Query.XPATH);
-// NodeIterator nodes = q.execute().getNodes();
-// assertFalse(nodes.hasNext());
-//
-// synchronized (index.getIndex()) {
-// while (queue.getNumPendingDocuments() > 0) {
-// index.getIndex().wait(50);
-// }
-// }
-//
-// q = qm.createQuery(testPath + "/*[jcr:contains(., 'fox')]", Query.XPATH);
-// nodes = q.execute().getNodes();
-// assertTrue(nodes.hasNext());
-// }
-//
-// public void testInitialIndex() throws Exception {
-// Extractor.sleepTime = 200;
-// SearchIndex index = (SearchIndex) getQueryHandler();
-// File indexDir = new File(index.getPath());
-//
-// // fill workspace
-// Node testFolder = testRootNode.addNode("folder", "nt:folder");
-// String text = "the quick brown fox jumps over the lazy dog.";
-// int num = createFiles(testFolder, text.getBytes(ENCODING), 10, 2, 0);
-// session.save();
-//
-// // shutdown workspace
-// RepositoryImpl repo = (RepositoryImpl) session.getRepository();
-// session.logout();
-// session = null;
-// superuser.logout();
-// superuser = null;
-// TestHelper.shutdownWorkspace(WORKSPACE_NAME, repo);
-//
-// // delete index
-// try {
-// FileUtil.delete(indexDir);
-// } catch (IOException e) {
-// fail("Unable to delete index directory");
-// }
-//
-// int initialNumExtractorFiles = getNumExtractorFiles();
-//
-// Extractor.sleepTime = 20;
-// Thread t = new Thread(new Runnable() {
-// public void run() {
-// try {
-// session = helper.getSuperuserSession(WORKSPACE_NAME);
-// } catch (RepositoryException e) {
-// throw new RuntimeException(e);
-// }
-// }
-// });
-// t.start();
-//
-// while (t.isAlive()) {
-// // there must not be more than 20 extractor files, because:
-// // - initial index creation checks indexing queue every 10 nodes
-// // - there is an aggregate definition on the workspace that causes
-// // 2 extractor jobs per nt:resource
-// // => 2 * 10 = 20
-// int numFiles = getNumExtractorFiles() - initialNumExtractorFiles;
-// assertTrue(numFiles <= 20);
-// Thread.sleep(50);
-// }
-//
-// qm = session.getWorkspace().getQueryManager();
-// index = (SearchIndex) getQueryHandler();
-// IndexingQueue queue = index.getIndex().getIndexingQueue();
-//
-// // flush index to make sure any documents in the buffer are written
-// // to the index. this is to make sure all nodes are pushed either to
-// // the index or to the indexing queue
-// index.getIndex().flush();
-//
-// synchronized (index.getIndex()) {
-// while (queue.getNumPendingDocuments() > 0) {
-// index.getIndex().wait(50);
-// }
-// }
-//
-// String stmt = testPath + "//element(*, nt:resource)[jcr:contains(., 'fox')]";
-// Query q = qm.createQuery(stmt, Query.XPATH);
-// assertEquals(num, q.execute().getNodes().getSize());
-// }
-//
-// /*
-// * Test case for JCR-2082
-// */
-// public void testReaderUpToDate() throws Exception {
-// Extractor.sleepTime = 10;
-// SearchIndex index = (SearchIndex) getQueryHandler();
-// File indexDir = new File(index.getPath());
-//
-// // shutdown workspace
-// RepositoryImpl repo = (RepositoryImpl) session.getRepository();
-// session.logout();
-// session = null;
-// superuser.logout();
-// superuser = null;
-// TestHelper.shutdownWorkspace(WORKSPACE_NAME, repo);
-//
-// // delete index
-// try {
-// FileUtil.delete(indexDir);
-// } catch (IOException e) {
-// fail("Unable to delete index directory");
-// }
-//
-// // start workspace again by getting a session
-// session = helper.getSuperuserSession(WORKSPACE_NAME);
-//
-// qm = session.getWorkspace().getQueryManager();
-//
-// Query q = qm.createQuery(testPath, Query.XPATH);
-// assertEquals(1, getSize(q.execute().getNodes()));
-// }
-//
-// private int createFiles(Node folder, byte[] data,
-// int filesPerLevel, int levels, int count)
-// throws RepositoryException {
-// levels--;
-// for (int i = 0; i < filesPerLevel; i++) {
-// // create files
-// Node file = folder.addNode("file" + i, "nt:file");
-// InputStream in = new ByteArrayInputStream(data);
-// Node resource = file.addNode("jcr:content", "nt:resource");
-// resource.setProperty("jcr:data", in);
-// resource.setProperty("jcr:lastModified", Calendar.getInstance());
-// resource.setProperty("jcr:mimeType", CONTENT_TYPE);
-// resource.setProperty("jcr:encoding", ENCODING);
-// count++;
-// }
-// if (levels > 0) {
-// for (int i = 0; i < filesPerLevel; i++) {
-// // create files
-// Node subFolder = folder.addNode("folder" + i, "nt:folder");
-// count = createFiles(subFolder, data,
-// filesPerLevel, levels, count);
-// }
-// }
-// return count;
-// }
-//
-// private int getNumExtractorFiles() throws IOException {
-// return TEMP_DIR.listFiles(new FilenameFilter() {
-// public boolean accept(File dir, String name) {
-// return name.startsWith("extractor");
-// }
-// }).length;
-// }
-
-// public static final class Extractor implements TextExtractor {
-//
-// protected static volatile int sleepTime = 200;
-//
-// public String[] getContentTypes() {
-// return new String[]{CONTENT_TYPE};
-// }
-//
-// public Reader extractText(InputStream stream, String type, String encoding)
-// throws IOException {
-// try {
-// Thread.sleep(sleepTime);
-// } catch (InterruptedException e) {
-// throw new IOException();
-// }
-// return new InputStreamReader(stream, encoding);
-// }
- // }
-}
Modified: jcr/branches/1.15.x/exo.jcr.component.core/src/test/java/org/exoplatform/services/jcr/api/core/query/lucene/TestAll.java
===================================================================
--- jcr/branches/1.15.x/exo.jcr.component.core/src/test/java/org/exoplatform/services/jcr/api/core/query/lucene/TestAll.java 2012-01-12 08:07:55 UTC (rev 5443)
+++ jcr/branches/1.15.x/exo.jcr.component.core/src/test/java/org/exoplatform/services/jcr/api/core/query/lucene/TestAll.java 2012-01-12 08:38:11 UTC (rev 5444)
@@ -37,7 +37,6 @@
{
TestSuite suite = new TestSuite("Search tests");
- suite.addTestSuite(IndexingQueueTest.class);
suite.addTestSuite(IndexingAggregateTest.class);
return suite;
More information about the exo-jcr-commits
mailing list