[exo-jcr-commits] exo-jcr SVN: r5924 - in jcr/trunk: exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene and 3 other directories.
do-not-reply at jboss.org
do-not-reply at jboss.org
Tue Mar 20 11:39:00 EDT 2012
Author: nzamosenchuk
Date: 2012-03-20 11:38:57 -0400 (Tue, 20 Mar 2012)
New Revision: 5924
Added:
jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/ScorerWrapper.java
Modified:
jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/AbstractExcerpt.java
jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/AbstractIndex.java
jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/DescendantSelfAxisQuery.java
jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/JcrQueryParser.java
jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/JcrStandartAnalyzer.java
jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/MoreLikeThis.java
jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/NotQuery.java
jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/PersistentIndex.java
jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/ReadOnlyIndexReader.java
jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/SearchIndex.java
jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/SingletonTokenStream.java
jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/spell/LuceneSpellChecker.java
jcr/trunk/exo.jcr.component.core/src/test/java/org/exoplatform/services/jcr/impl/core/query/TestIndexingConfig.java
jcr/trunk/packaging/module/src/main/javascript/jcr.packaging.module.js
jcr/trunk/pom.xml
Log:
EXOJCR-1752 : upgrade to lucene 3.5
Modified: jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/AbstractExcerpt.java
===================================================================
--- jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/AbstractExcerpt.java 2012-03-20 14:01:39 UTC (rev 5923)
+++ jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/AbstractExcerpt.java 2012-03-20 15:38:57 UTC (rev 5924)
@@ -17,8 +17,8 @@
package org.exoplatform.services.jcr.impl.core.query.lucene;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexReader;
@@ -268,9 +268,9 @@
{
while (ts.incrementToken())
{
- OffsetAttribute offset = (OffsetAttribute)ts.getAttribute(OffsetAttribute.class);
- TermAttribute term = (TermAttribute)ts.getAttribute(TermAttribute.class);
- String termText = term.term();
+ OffsetAttribute offset = ts.getAttribute(OffsetAttribute.class);
+ CharTermAttribute term = ts.getAttribute(CharTermAttribute.class);
+ String termText = new String(term.buffer(), 0, term.length());
TermVectorOffsetInfo[] info = termMap.get(termText);
if (info == null)
{
Modified: jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/AbstractIndex.java
===================================================================
--- jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/AbstractIndex.java 2012-03-20 14:01:39 UTC (rev 5923)
+++ jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/AbstractIndex.java 2012-03-20 15:38:57 UTC (rev 5924)
@@ -20,9 +20,12 @@
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.LogMergePolicy;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.Version;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -54,9 +57,6 @@
/** PrintStream that pipes all calls to println(String) into log.info() */
private static final LoggingPrintStream STREAM_LOGGER = new LoggingPrintStream();
- /** Executor with a pool size equal to the number of available processors */
- private static final DynamicPooledExecutor EXECUTOR = new DynamicPooledExecutor();
-
/** The currently set IndexWriter or <code>null</code> if none is set */
private IndexWriter indexWriter;
@@ -124,7 +124,8 @@
if (!isExisting)
{
- indexWriter = new IndexWriter(directory, analyzer, IndexWriter.MaxFieldLength.LIMITED);
+ IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_30, analyzer);
+ indexWriter = new IndexWriter(directory, config);
// immediately close, now that index has been created
indexWriter.close();
indexWriter = null;
@@ -231,7 +232,7 @@
log.debug("closing IndexWriter.");
indexWriter = null;
}
-
+
if (indexReader == null || !indexReader.isCurrent())
{
IndexReader reader = IndexReader.open(getDirectory(), null, false, termInfosIndexDivisor);
@@ -335,9 +336,17 @@
}
if (indexWriter == null)
{
- indexWriter = new IndexWriter(getDirectory(), analyzer, new IndexWriter.MaxFieldLength(maxFieldLength));
- indexWriter.setSimilarity(similarity);
- indexWriter.setUseCompoundFile(useCompoundFile);
+ IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_30, analyzer);
+ config.setSimilarity(similarity);
+ if (config.getMergePolicy() instanceof LogMergePolicy)
+ {
+ ((LogMergePolicy)config.getMergePolicy()).setUseCompoundFile(useCompoundFile);
+ }
+ else
+ {
+ log.error("Can't set \"UseCompoundFile\". Merge policy is not an instance of LogMergePolicy. ");
+ }
+ indexWriter = new IndexWriter(directory, config);
indexWriter.setInfoStream(STREAM_LOGGER);
}
return indexWriter;
@@ -501,7 +510,15 @@
useCompoundFile = b;
if (indexWriter != null)
{
- indexWriter.setUseCompoundFile(b);
+ IndexWriterConfig config = indexWriter.getConfig();
+ if (config.getMergePolicy() instanceof LogMergePolicy)
+ {
+ ((LogMergePolicy)config.getMergePolicy()).setUseCompoundFile(useCompoundFile);
+ }
+ else
+ {
+ log.error("Can't set \"UseCompoundFile\". Merge policy is not an instance of LogMergePolicy. ");
+ }
}
}
@@ -513,7 +530,7 @@
this.maxFieldLength = maxFieldLength;
if (indexWriter != null)
{
- indexWriter.setMaxFieldLength(maxFieldLength);
+ indexWriter.setMaxFieldLength(this.maxFieldLength);
}
}
Modified: jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/DescendantSelfAxisQuery.java
===================================================================
--- jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/DescendantSelfAxisQuery.java 2012-03-20 14:01:39 UTC (rev 5923)
+++ jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/DescendantSelfAxisQuery.java 2012-03-20 15:38:57 UTC (rev 5924)
@@ -542,7 +542,18 @@
}
collectContextHits();
- currentDoc = subScorer.nextDoc();
+ try
+ {
+ currentDoc = subScorer.nextDoc();
+ }
+ catch (UnsupportedOperationException e)
+ {
+ // workaround. Consider getting rid of it
+ ScorerWrapper collector = new ScorerWrapper(subScorer.getSimilarity());
+ subScorer.score(collector.getCollector());
+ subScorer = collector;
+ currentDoc = subScorer.nextDoc();
+ }
if (contextHits.isEmpty())
{
currentDoc = NO_MORE_DOCS;
Modified: jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/JcrQueryParser.java
===================================================================
--- jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/JcrQueryParser.java 2012-03-20 14:01:39 UTC (rev 5923)
+++ jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/JcrQueryParser.java 2012-03-20 15:38:57 UTC (rev 5924)
@@ -48,7 +48,7 @@
*/
public JcrQueryParser(String fieldName, Analyzer analyzer, SynonymProvider synonymProvider)
{
- super(Version.LUCENE_24, fieldName, analyzer);
+ super(Version.LUCENE_30, fieldName, analyzer);
this.synonymProvider = synonymProvider;
setAllowLeadingWildcard(true);
setDefaultOperator(Operator.AND);
@@ -155,7 +155,7 @@
}
else
{
- return super.getFieldQuery(field, queryText);
+ return super.getFieldQuery(field, queryText, true);
}
}
Modified: jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/JcrStandartAnalyzer.java
===================================================================
--- jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/JcrStandartAnalyzer.java 2012-03-20 14:01:39 UTC (rev 5923)
+++ jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/JcrStandartAnalyzer.java 2012-03-20 15:38:57 UTC (rev 5924)
@@ -36,7 +36,7 @@
* index the text of the property and to parse searchtext for this property.
*/
-public class JcrStandartAnalyzer extends Analyzer
+public final class JcrStandartAnalyzer extends Analyzer
{
/**
@@ -47,7 +47,7 @@
{
public Analyzer run()
{
- return new StandardAnalyzer(Version.LUCENE_24, Collections.EMPTY_SET);
+ return new StandardAnalyzer(Version.LUCENE_30, Collections.EMPTY_SET);
}
});
Modified: jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/MoreLikeThis.java
===================================================================
--- jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/MoreLikeThis.java 2012-03-20 14:01:39 UTC (rev 5923)
+++ jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/MoreLikeThis.java 2012-03-20 15:38:57 UTC (rev 5924)
@@ -19,7 +19,7 @@
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
@@ -153,7 +153,7 @@
* Default analyzer to parse source doc with.
* @see #getAnalyzer
*/
- public static final Analyzer DEFAULT_ANALYZER = new StandardAnalyzer(Version.LUCENE_24);
+ public static final Analyzer DEFAULT_ANALYZER = new StandardAnalyzer(Version.LUCENE_30);
/**
* Ignore terms with less than this frequency in the source doc.
@@ -203,12 +203,12 @@
* @see #setStopWords
* @see #getStopWords
*/
- public static final Set DEFAULT_STOP_WORDS = null;
+ public static final Set<String> DEFAULT_STOP_WORDS = null;
/**
* Current set of stop words.
*/
- private Set stopWords = DEFAULT_STOP_WORDS;
+ private Set<String> stopWords = DEFAULT_STOP_WORDS;
/**
* Return a Query with no more than this many terms.
@@ -466,7 +466,7 @@
* @see org.apache.lucene.analysis.StopFilter#makeStopSet StopFilter.makeStopSet()
* @see #getStopWords
*/
- public void setStopWords(Set stopWords)
+ public void setStopWords(Set<String> stopWords)
{
this.stopWords = stopWords;
}
@@ -475,7 +475,7 @@
* Get the current stop words being used.
* @see #setStopWords
*/
- public Set getStopWords()
+ public Set<String> getStopWords()
{
return stopWords;
}
@@ -530,8 +530,8 @@
if (fieldNames == null)
{
// gather list of valid fields from lucene
- Collection fields = ir.getFieldNames(IndexReader.FieldOption.INDEXED);
- fieldNames = (String[])fields.toArray(new String[fields.size()]);
+ Collection<String> fields = ir.getFieldNames(IndexReader.FieldOption.INDEXED);
+ fieldNames = fields.toArray(new String[fields.size()]);
}
return createQuery(retrieveTerms(docNum));
@@ -547,8 +547,8 @@
if (fieldNames == null)
{
// gather list of valid fields from lucene
- Collection fields = ir.getFieldNames(IndexReader.FieldOption.INDEXED);
- fieldNames = (String[])fields.toArray(new String[fields.size()]);
+ Collection<String> fields = ir.getFieldNames(IndexReader.FieldOption.INDEXED);
+ fieldNames = fields.toArray(new String[fields.size()]);
}
return like(new FileReader(f));
@@ -585,9 +585,9 @@
}
/**
- * Create the More like query from a PriorityQueue
+ * Create the More like query from a PriorityQueue<Object[]>
*/
- private Query createQuery(PriorityQueue q)
+ private Query createQuery(PriorityQueue<Object[]> q)
{
BooleanQuery query = new BooleanQuery();
Object cur;
@@ -630,22 +630,22 @@
}
/**
- * Create a PriorityQueue from a word->tf map.
+ * Create a PriorityQueue<Object[]> from a word->tf map.
*
* @param words a map of words keyed on the word(String) with Int objects as the values.
*/
- private PriorityQueue createQueue(Map words) throws IOException
+ private PriorityQueue<Object[]> createQueue(Map<String, Int> words) throws IOException
{
// have collected all words in doc and their freqs
int numDocs = ir.numDocs();
FreqQ res = new FreqQ(words.size()); // will order words by score
- Iterator it = words.keySet().iterator();
+ Iterator<String> it = words.keySet().iterator();
while (it.hasNext())
{ // for every word
- String word = (String)it.next();
+ String word = it.next();
- int tf = ((Int)words.get(word)).x; // term freq in the source doc
+ int tf = words.get(word).x; // term freq in the source doc
if (minTermFreq > 0 && tf < minTermFreq)
{
continue; // filter out words that don't occur enough times in the source
@@ -714,9 +714,9 @@
*
* @param docNum the id of the lucene document from which to find terms
*/
- public PriorityQueue retrieveTerms(int docNum) throws IOException
+ public PriorityQueue<Object[]> retrieveTerms(int docNum) throws IOException
{
- Map termFreqMap = new HashMap();
+ Map<String, Int> termFreqMap = new HashMap<String, Int>();
for (int i = 0; i < fieldNames.length; i++)
{
String fieldName = fieldNames[i];
@@ -750,7 +750,7 @@
* @param termFreqMap a Map of terms and their frequencies
* @param vector List of terms and their frequencies for a doc/field
*/
- private void addTermFrequencies(Map termFreqMap, TermFreqVector vector)
+ private void addTermFrequencies(Map<String, Int> termFreqMap, TermFreqVector vector)
{
String[] terms = vector.getTerms();
int[] freqs = vector.getTermFrequencies();
@@ -763,7 +763,7 @@
continue;
}
// increment frequency
- Int cnt = (Int)termFreqMap.get(term);
+ Int cnt = termFreqMap.get(term);
if (cnt == null)
{
cnt = new Int();
@@ -790,8 +790,8 @@
// for every token
while (ts.incrementToken())
{
- TermAttribute term = (TermAttribute)ts.getAttribute(TermAttribute.class);
- String word = term.term();
+ CharTermAttribute term = ts.getAttribute(CharTermAttribute.class);
+ String word = new String(term.buffer(), 0, term.length());
tokenCount++;
if (tokenCount > maxNumTokensParsed)
{
@@ -862,9 +862,9 @@
*
* @see #retrieveInterestingTerms
*/
- public PriorityQueue retrieveTerms(Reader r) throws IOException
+ public PriorityQueue<Object[]> retrieveTerms(Reader r) throws IOException
{
- Map words = new HashMap();
+ Map<String, Int> words = new HashMap<String, Int>();
for (int i = 0; i < fieldNames.length; i++)
{
String fieldName = fieldNames[i];
@@ -878,8 +878,8 @@
*/
public String[] retrieveInterestingTerms(int docNum) throws IOException
{
- ArrayList al = new ArrayList(maxQueryTerms);
- PriorityQueue pq = retrieveTerms(docNum);
+ ArrayList<Object> al = new ArrayList<Object>(maxQueryTerms);
+ PriorityQueue<Object[]> pq = retrieveTerms(docNum);
Object cur;
// have to be careful, retrieveTerms returns all words but that's probably not useful to our caller...
int lim = maxQueryTerms;
@@ -890,7 +890,7 @@
al.add(ar[0]); // the 1st entry is the interesting word
}
String[] res = new String[al.size()];
- return (String[])al.toArray(res);
+ return al.toArray(res);
}
/**
@@ -904,8 +904,8 @@
*/
public String[] retrieveInterestingTerms(Reader r) throws IOException
{
- ArrayList al = new ArrayList(maxQueryTerms);
- PriorityQueue pq = retrieveTerms(r);
+ ArrayList<Object> al = new ArrayList<Object>(maxQueryTerms);
+ PriorityQueue<Object[]> pq = retrieveTerms(r);
Object cur;
// have to be careful, retrieveTerms returns all words but that's probably not useful to our caller...
int lim = maxQueryTerms;
@@ -916,25 +916,23 @@
al.add(ar[0]); // the 1st entry is the interesting word
}
String[] res = new String[al.size()];
- return (String[])al.toArray(res);
+ return al.toArray(res);
}
/**
- * PriorityQueue that orders words by score.
+ * PriorityQueue<Object[]> that orders words by score.
*/
- private static class FreqQ extends PriorityQueue
+ private static class FreqQ extends PriorityQueue<Object[]>
{
FreqQ(int s)
{
initialize(s);
}
- protected boolean lessThan(Object a, Object b)
+ protected boolean lessThan(Object[] a, Object[] b)
{
- Object[] aa = (Object[])a;
- Object[] bb = (Object[])b;
- Float fa = (Float)aa[2];
- Float fb = (Float)bb[2];
+ Float fa = (Float)a[2];
+ Float fb = (Float)b[2];
return fa.floatValue() > fb.floatValue();
}
}
Modified: jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/NotQuery.java
===================================================================
--- jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/NotQuery.java 2012-03-20 14:01:39 UTC (rev 5923)
+++ jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/NotQuery.java 2012-03-20 15:38:57 UTC (rev 5924)
@@ -220,7 +220,7 @@
if (docNo == -1)
{
// get first doc of context scorer
- int docId = contextScorer.nextDoc();
+ int docId = contextScorer == null ? NO_MORE_DOCS : contextScorer.nextDoc();
if (docId != NO_MORE_DOCS)
{
contextNo = docId;
@@ -237,7 +237,7 @@
while (contextNo != -1 && contextNo == docNo)
{
docNo++;
- int docId = contextScorer.nextDoc();
+ int docId = contextScorer == null ? NO_MORE_DOCS : contextScorer.nextDoc();
contextNo = docId == NO_MORE_DOCS ? -1 : docId;
}
if (docNo >= reader.maxDoc())
Modified: jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/PersistentIndex.java
===================================================================
--- jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/PersistentIndex.java 2012-03-20 14:01:39 UTC (rev 5923)
+++ jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/PersistentIndex.java 2012-03-20 15:38:57 UTC (rev 5924)
@@ -91,7 +91,7 @@
void addIndexes(IndexReader[] readers) throws IOException
{
getIndexWriter().addIndexes(readers);
- getIndexWriter().optimize();
+ getIndexWriter().maybeMerge();
}
/**
Modified: jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/ReadOnlyIndexReader.java
===================================================================
--- jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/ReadOnlyIndexReader.java 2012-03-20 14:01:39 UTC (rev 5923)
+++ jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/ReadOnlyIndexReader.java 2012-03-20 15:38:57 UTC (rev 5924)
@@ -190,6 +190,10 @@
*/
protected final void doCommit(Map commitUserData)
{
+ if (!hasChanges)
+ {
+ return;
+ }
throw new UnsupportedOperationException("IndexReader is read-only");
}
Added: jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/ScorerWrapper.java
===================================================================
--- jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/ScorerWrapper.java (rev 0)
+++ jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/ScorerWrapper.java 2012-03-20 15:38:57 UTC (rev 5924)
@@ -0,0 +1,173 @@
+/*
+ * Copyright (C) 2003-2012 eXo Platform SAS.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Affero General Public License
+ * as published by the Free Software Foundation; either version 3
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see<http://www.gnu.org/licenses/>.
+ */
+package org.exoplatform.services.jcr.impl.core.query.lucene;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.Similarity;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * WorkAround wrapper, used as bridge between Scorer.score(Collector) and DocIdSetIterator.
+ * Some Scorers inside Lucene (BooleanScorer) doesn't support DocIdSetIterator interface,
+ * but required for JCR needs.
+ * Consider getting rid of this solution.
+ *
+ * @author <a href="mailto:nzamosenchuk at exoplatform.com">Nikolay Zamosenchuk</a>
+ * @version $Id: ScorerWrapper.java 34360 2009-07-22 23:58:59Z nzamosenchuk $
+ *
+ */
+public class ScorerWrapper extends Scorer
+{
+
+ List<DocData> docs = new ArrayList<DocData>();
+
+ int index;
+
+ DocData currentDocData = null;
+
+ CollectorWrapper collectorWrapper;
+
+ static class DocData
+ {
+ public int docID;
+
+ public float freq;
+
+ public float score;
+
+ public DocData(int docID, float freq, float score)
+ {
+ super();
+ this.docID = docID;
+ this.freq = freq;
+ this.score = score;
+ }
+
+ }
+
+ class CollectorWrapper extends Collector
+ {
+ private Scorer subScrorer;
+
+ @Override
+ public void setScorer(Scorer scorer) throws IOException
+ {
+ this.subScrorer = scorer;
+ }
+
+ @Override
+ public void collect(int doc) throws IOException
+ {
+ ScorerWrapper.this.docs.add(new DocData(doc, subScrorer.freq(), subScrorer.score()));
+ }
+
+ @Override
+ public void setNextReader(IndexReader reader, int docBase) throws IOException
+ {
+ }
+
+ @Override
+ public boolean acceptsDocsOutOfOrder()
+ {
+ return true;
+ }
+
+ }
+
+ /**
+ * @param similarity
+ */
+ protected ScorerWrapper(Similarity similarity)
+ {
+ super(similarity);
+
+ collectorWrapper = new CollectorWrapper();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public float score() throws IOException
+ {
+ if (currentDocData != null)
+ {
+ return currentDocData.score;
+ }
+ return 0;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public int docID()
+ {
+ if (currentDocData != null)
+ {
+ return currentDocData.docID;
+ }
+ return NO_MORE_DOCS;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public int nextDoc() throws IOException
+ {
+ if (index < docs.size())
+ {
+ currentDocData = docs.get(index);
+ index++;
+ return currentDocData.docID;
+ }
+ else
+ {
+ currentDocData = null;
+ return NO_MORE_DOCS;
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public int advance(int target) throws IOException
+ {
+ int doc;
+ while ((doc = nextDoc()) < target)
+ {
+ if (doc == NO_MORE_DOCS || doc == -1)
+ {
+ return NO_MORE_DOCS;
+ }
+ }
+ return doc;
+ }
+
+ public Collector getCollector()
+ {
+ return collectorWrapper;
+ }
+
+}
Modified: jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/SearchIndex.java
===================================================================
--- jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/SearchIndex.java 2012-03-20 14:01:39 UTC (rev 5923)
+++ jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/SearchIndex.java 2012-03-20 15:38:57 UTC (rev 5924)
@@ -21,7 +21,6 @@
import org.apache.commons.collections.collection.TransformedCollection;
import org.apache.commons.collections.iterators.TransformIterator;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.document.Document;
@@ -1959,7 +1958,6 @@
{
// find the right fields to transfer
Fieldable[] fields = aDoc.getFieldables(FieldNames.PROPERTIES);
- Token t = new Token();
for (int k = 0; k < fields.length; k++)
{
Fieldable field = fields[k];
@@ -1967,12 +1965,11 @@
// SingleTokenStream
//t = field.tokenStreamValue().next(t);
field.tokenStreamValue().incrementToken();
- TermAttribute term =
- field.tokenStreamValue().getAttribute(TermAttribute.class);
- PayloadAttribute payload =
- field.tokenStreamValue().getAttribute(PayloadAttribute.class);
+ TermAttribute term = field.tokenStreamValue().getAttribute(TermAttribute.class);
+ PayloadAttribute payload = field.tokenStreamValue().getAttribute(PayloadAttribute.class);
- String value = new String(t.termBuffer(), 0, t.termLength());
+ String value = new String(term.termBuffer(), 0, term.termLength());
+
if (value.startsWith(namePrefix))
{
// extract value
@@ -1981,7 +1978,8 @@
QPath p = getRelativePath(state, propState);
String path = getNamespaceMappings().translatePath(p);
value = FieldNames.createNamedValue(path, value);
- t.setTermBuffer(value);
+
+ term.setTermBuffer(value);
doc.add(new Field(field.name(), new SingletonTokenStream(term.term(), payload
.getPayload())));
doc.add(new Field(FieldNames.AGGREGATED_NODE_UUID, parent.getIdentifier(),
@@ -2026,30 +2024,8 @@
* if an error occurs while reading item states.
*/
protected QPath getRelativePath(NodeData nodeState, PropertyData propState) throws RepositoryException
-
{
-
- QPath nodePath = nodeState.getQPath();
- QPath propPath = propState.getQPath();
throw new RepositoryException();
- // Path p = nodePath.computeRelativePath(propPath);
- // // make sure it does not contain indexes
- // boolean clean = true;
- // Path.Element[] elements = p.getElements();
- // for (int i = 0; i < elements.length; i++)
- // {
- // if (elements[i].getIndex() != 0)
- // {
- // elements[i] = PATH_FACTORY.createElement(elements[i].getName());
- // clean = false;
- // }
- // }
- // if (!clean)
- // {
- // p = PATH_FACTORY.create(elements);
- // }
-
- // return p;
}
/**
@@ -2101,7 +2077,6 @@
* value=NodeState.
*/
protected void retrieveAggregateRoot(final Set<String> removedNodeIds, final Map<String, NodeData> map)
-
{
if (indexingConfig != null)
{
Modified: jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/SingletonTokenStream.java
===================================================================
--- jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/SingletonTokenStream.java 2012-03-20 14:01:39 UTC (rev 5923)
+++ jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/SingletonTokenStream.java 2012-03-20 15:38:57 UTC (rev 5924)
@@ -62,7 +62,8 @@
*/
public SingletonTokenStream()
{
-
+ termAttribute = addAttribute(TermAttribute.class);
+ payloadAttribute = addAttribute(PayloadAttribute.class);
}
/**
@@ -77,8 +78,8 @@
{
this.value = value;
this.payload = payload;
- termAttribute = (TermAttribute)addAttribute(TermAttribute.class);
- payloadAttribute = (PayloadAttribute)addAttribute(PayloadAttribute.class);
+ termAttribute = addAttribute(TermAttribute.class);
+ payloadAttribute = addAttribute(PayloadAttribute.class);
}
/**
@@ -95,17 +96,6 @@
this(value, new Payload(new PropertyMetaData(type).toByteArray()));
}
- /**
- * Creates a new SingleTokenStream with the given token.
- *
- * @param t the token.
- */
- @Deprecated
- public SingletonTokenStream(Token t)
- {
- this(t.term(), t.getPayload());
- }
-
@Override
public boolean incrementToken() throws IOException
{
@@ -136,8 +126,6 @@
public void close() throws IOException
{
consumed = true;
- value = null;
- payload = null;
payloadAttribute = null;
termAttribute = null;
}
Modified: jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/spell/LuceneSpellChecker.java
===================================================================
--- jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/spell/LuceneSpellChecker.java 2012-03-20 14:01:39 UTC (rev 5923)
+++ jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/spell/LuceneSpellChecker.java 2012-03-20 15:38:57 UTC (rev 5924)
@@ -17,15 +17,19 @@
package org.exoplatform.services.jcr.impl.core.query.lucene.spell;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.search.spell.Dictionary;
import org.apache.lucene.search.spell.LuceneDictionary;
import org.apache.lucene.search.spell.SpellChecker;
+import org.apache.lucene.search.spell.SuggestMode;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.Version;
import org.exoplatform.commons.utils.SecurityHelper;
import org.exoplatform.services.jcr.impl.core.query.QueryHandler;
import org.exoplatform.services.jcr.impl.core.query.QueryRootNode;
@@ -362,23 +366,23 @@
private void tokenize(String statement, List<String> words, List<TokenData> tokens) throws IOException
{
TokenStream ts = handler.getTextAnalyzer().tokenStream(FieldNames.FULLTEXT, new StringReader(statement));
- TermAttribute term = (TermAttribute)ts.getAttribute(TermAttribute.class);
- PositionIncrementAttribute positionIncrement =
- (PositionIncrementAttribute)ts.getAttribute(PositionIncrementAttribute.class);
- OffsetAttribute offset = (OffsetAttribute)ts.getAttribute(OffsetAttribute.class);
+ CharTermAttribute term = ts.getAttribute(CharTermAttribute.class);
+ PositionIncrementAttribute positionIncrement = ts.getAttribute(PositionIncrementAttribute.class);
+ OffsetAttribute offset = ts.getAttribute(OffsetAttribute.class);
try
{
+ String word;
while (ts.incrementToken())
{
- String word = term.term();
+ word = new String(term.buffer(), 0, term.length());
// while ((t = ts.next()) != null)
// {
String origWord = statement.substring(offset.startOffset(), offset.endOffset());
if (positionIncrement.getPositionIncrement() > 0)
{
words.add(word);
- tokens.add(new TokenData(offset.startOffset(), offset.endOffset(), term.term()));
+ tokens.add(new TokenData(offset.startOffset(), offset.endOffset(), word));
}
else
{
@@ -389,8 +393,7 @@
{
// replace current token and word
words.set(words.size() - 1, word);
- tokens
- .set(tokens.size() - 1, new TokenData(offset.startOffset(), offset.endOffset(), term.term()));
+ tokens.set(tokens.size() - 1, new TokenData(offset.startOffset(), offset.endOffset(), word));
}
}
}
@@ -459,8 +462,9 @@
{
public String[] run() throws Exception
{
- return spellChecker.suggestSimilar(words[currentIndex], 5, reader, FieldNames.FULLTEXT,
- morePopular);
+ return spellChecker
+ .suggestSimilar(words[currentIndex], 5, reader, FieldNames.FULLTEXT, morePopular
+ ? SuggestMode.SUGGEST_MORE_POPULAR : SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
}
});
@@ -548,7 +552,8 @@
long time = System.currentTimeMillis();
Dictionary dict = new LuceneDictionary(reader, FieldNames.FULLTEXT);
LOG.debug("Starting spell checker index refresh");
- spellChecker.indexDictionary(dict);
+ spellChecker.indexDictionary(dict, new IndexWriterConfig(Version.LUCENE_30,
+ new StandardAnalyzer(Version.LUCENE_30)), true);
time = System.currentTimeMillis() - time;
time = time / 1000;
LOG.info("Spell checker index refreshed in: " + new Long(time) + " s.");
Modified: jcr/trunk/exo.jcr.component.core/src/test/java/org/exoplatform/services/jcr/impl/core/query/TestIndexingConfig.java
===================================================================
--- jcr/trunk/exo.jcr.component.core/src/test/java/org/exoplatform/services/jcr/impl/core/query/TestIndexingConfig.java 2012-03-20 14:01:39 UTC (rev 5923)
+++ jcr/trunk/exo.jcr.component.core/src/test/java/org/exoplatform/services/jcr/impl/core/query/TestIndexingConfig.java 2012-03-20 15:38:57 UTC (rev 5924)
@@ -101,9 +101,9 @@
IndexingConfigurationImpl indexingConfigurationImpl = (IndexingConfigurationImpl)searchIndex.getIndexingConfig();
assertNotNull(indexingConfigurationImpl);
- indexingConfigurationImpl.addPropertyAnalyzer("FULL:" + simple, new SimpleAnalyzer());
- indexingConfigurationImpl.addPropertyAnalyzer("FULL:" + whitespace, new WhitespaceAnalyzer());
- indexingConfigurationImpl.addPropertyAnalyzer("FULL:" + stop, new StopAnalyzer(Version.LUCENE_24));
+ indexingConfigurationImpl.addPropertyAnalyzer("FULL:" + simple, new SimpleAnalyzer(Version.LUCENE_30));
+ indexingConfigurationImpl.addPropertyAnalyzer("FULL:" + whitespace, new WhitespaceAnalyzer(Version.LUCENE_30));
+ indexingConfigurationImpl.addPropertyAnalyzer("FULL:" + stop, new StopAnalyzer(Version.LUCENE_30));
testRoot = testSession.getRootNode().addNode("testrootAnalyzers");
root.save();
}
Modified: jcr/trunk/packaging/module/src/main/javascript/jcr.packaging.module.js
===================================================================
--- jcr/trunk/packaging/module/src/main/javascript/jcr.packaging.module.js 2012-03-20 14:01:39 UTC (rev 5923)
+++ jcr/trunk/packaging/module/src/main/javascript/jcr.packaging.module.js 2012-03-20 15:38:57 UTC (rev 5924)
@@ -26,9 +26,10 @@
addDependency(new Project("jboss.jbossts","jbossjts","jar","4.6.1.GA")).
addDependency(new Project("jboss.jbossts","jbossts-common","jar","4.6.1.GA")).
addDependency(new Project("org.apache.ws.commons","ws-commons-util","jar","1.0.1")).
- addDependency(new Project("org.apache.lucene", "lucene-core", "jar", "2.9.4")).
- addDependency(new Project("org.apache.lucene", "lucene-spellchecker", "jar", "2.9.4")).
- addDependency(new Project("org.apache.lucene", "lucene-memory", "jar", "2.9.4"));
+ addDependency(new Project("org.apache.lucene", "lucene-core", "jar", "3.5.0")).
+ addDependency(new Project("org.apache.lucene", "lucene-spellchecker", "jar", "3.5.0")).
+ addDependency(new Project("org.apache.lucene", "lucene-wordnet", "jar", "3.3.0")).
+ addDependency(new Project("org.apache.lucene", "lucene-memory", "jar", "3.5.0"));
module.frameworks = {}
module.frameworks.web =
Modified: jcr/trunk/pom.xml
===================================================================
--- jcr/trunk/pom.xml 2012-03-20 14:01:39 UTC (rev 5923)
+++ jcr/trunk/pom.xml 2012-03-20 15:38:57 UTC (rev 5924)
@@ -279,22 +279,22 @@
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
- <version>3.0.3</version>
+ <version>3.5.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-spellchecker</artifactId>
- <version>3.0.3</version>
+ <version>3.5.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-memory</artifactId>
- <version>3.0.3</version>
+ <version>3.5.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-wordnet</artifactId>
- <version>3.0.3</version>
+ <version>3.3.0</version>
</dependency>
<dependency>
<groupId>com.sun.xml.stream</groupId>
More information about the exo-jcr-commits
mailing list