[exo-jcr-commits] exo-jcr SVN: r5924 - in jcr/trunk: exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene and 3 other directories.

do-not-reply at jboss.org do-not-reply at jboss.org
Tue Mar 20 11:39:00 EDT 2012


Author: nzamosenchuk
Date: 2012-03-20 11:38:57 -0400 (Tue, 20 Mar 2012)
New Revision: 5924

Added:
   jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/ScorerWrapper.java
Modified:
   jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/AbstractExcerpt.java
   jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/AbstractIndex.java
   jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/DescendantSelfAxisQuery.java
   jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/JcrQueryParser.java
   jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/JcrStandartAnalyzer.java
   jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/MoreLikeThis.java
   jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/NotQuery.java
   jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/PersistentIndex.java
   jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/ReadOnlyIndexReader.java
   jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/SearchIndex.java
   jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/SingletonTokenStream.java
   jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/spell/LuceneSpellChecker.java
   jcr/trunk/exo.jcr.component.core/src/test/java/org/exoplatform/services/jcr/impl/core/query/TestIndexingConfig.java
   jcr/trunk/packaging/module/src/main/javascript/jcr.packaging.module.js
   jcr/trunk/pom.xml
Log:
EXOJCR-1752 : upgrade to lucene 3.5

Modified: jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/AbstractExcerpt.java
===================================================================
--- jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/AbstractExcerpt.java	2012-03-20 14:01:39 UTC (rev 5923)
+++ jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/AbstractExcerpt.java	2012-03-20 15:38:57 UTC (rev 5924)
@@ -17,8 +17,8 @@
 package org.exoplatform.services.jcr.impl.core.query.lucene;
 
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Fieldable;
 import org.apache.lucene.index.IndexReader;
@@ -268,9 +268,9 @@
       {
          while (ts.incrementToken())
          {
-            OffsetAttribute offset = (OffsetAttribute)ts.getAttribute(OffsetAttribute.class);
-            TermAttribute term = (TermAttribute)ts.getAttribute(TermAttribute.class);
-            String termText = term.term();
+            OffsetAttribute offset = ts.getAttribute(OffsetAttribute.class);
+            CharTermAttribute term = ts.getAttribute(CharTermAttribute.class);
+            String termText = new String(term.buffer(), 0, term.length());
             TermVectorOffsetInfo[] info = termMap.get(termText);
             if (info == null)
             {

Modified: jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/AbstractIndex.java
===================================================================
--- jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/AbstractIndex.java	2012-03-20 14:01:39 UTC (rev 5923)
+++ jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/AbstractIndex.java	2012-03-20 15:38:57 UTC (rev 5924)
@@ -20,9 +20,12 @@
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.LogMergePolicy;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.Similarity;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.Version;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -54,9 +57,6 @@
    /** PrintStream that pipes all calls to println(String) into log.info() */
    private static final LoggingPrintStream STREAM_LOGGER = new LoggingPrintStream();
 
-   /** Executor with a pool size equal to the number of available processors */
-   private static final DynamicPooledExecutor EXECUTOR = new DynamicPooledExecutor();
-
    /** The currently set IndexWriter or <code>null</code> if none is set */
    private IndexWriter indexWriter;
 
@@ -124,7 +124,8 @@
 
       if (!isExisting)
       {
-         indexWriter = new IndexWriter(directory, analyzer, IndexWriter.MaxFieldLength.LIMITED);
+         IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_30, analyzer);
+         indexWriter = new IndexWriter(directory, config);
          // immediately close, now that index has been created
          indexWriter.close();
          indexWriter = null;
@@ -231,7 +232,7 @@
          log.debug("closing IndexWriter.");
          indexWriter = null;
       }
-
+      
       if (indexReader == null || !indexReader.isCurrent())
       {
          IndexReader reader = IndexReader.open(getDirectory(), null, false, termInfosIndexDivisor);
@@ -335,9 +336,17 @@
       }
       if (indexWriter == null)
       {
-         indexWriter = new IndexWriter(getDirectory(), analyzer, new IndexWriter.MaxFieldLength(maxFieldLength));
-         indexWriter.setSimilarity(similarity);
-         indexWriter.setUseCompoundFile(useCompoundFile);
+         IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_30, analyzer);
+         config.setSimilarity(similarity);
+         if (config.getMergePolicy() instanceof LogMergePolicy)
+         {
+            ((LogMergePolicy)config.getMergePolicy()).setUseCompoundFile(useCompoundFile);
+         }
+         else
+         {
+            log.error("Can't set \"UseCompoundFile\". Merge policy is not an instance of LogMergePolicy. ");
+         }
+         indexWriter = new IndexWriter(directory, config);
          indexWriter.setInfoStream(STREAM_LOGGER);
       }
       return indexWriter;
@@ -501,7 +510,15 @@
       useCompoundFile = b;
       if (indexWriter != null)
       {
-         indexWriter.setUseCompoundFile(b);
+         IndexWriterConfig config = indexWriter.getConfig();
+         if (config.getMergePolicy() instanceof LogMergePolicy)
+         {
+            ((LogMergePolicy)config.getMergePolicy()).setUseCompoundFile(useCompoundFile);
+         }
+         else
+         {
+            log.error("Can't set \"UseCompoundFile\". Merge policy is not an instance of LogMergePolicy. ");
+         }
       }
    }
 
@@ -513,7 +530,7 @@
       this.maxFieldLength = maxFieldLength;
       if (indexWriter != null)
       {
-         indexWriter.setMaxFieldLength(maxFieldLength);
+         indexWriter.setMaxFieldLength(this.maxFieldLength);
       }
    }
 

Modified: jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/DescendantSelfAxisQuery.java
===================================================================
--- jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/DescendantSelfAxisQuery.java	2012-03-20 14:01:39 UTC (rev 5923)
+++ jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/DescendantSelfAxisQuery.java	2012-03-20 15:38:57 UTC (rev 5924)
@@ -542,7 +542,18 @@
          }
 
          collectContextHits();
-         currentDoc = subScorer.nextDoc();
+         try
+         {
+            currentDoc = subScorer.nextDoc();
+         }
+         catch (UnsupportedOperationException e)
+         {
+            // workaround. Consider getting rid of it
+            ScorerWrapper collector = new ScorerWrapper(subScorer.getSimilarity());
+            subScorer.score(collector.getCollector());
+            subScorer = collector;
+            currentDoc = subScorer.nextDoc();
+         }
          if (contextHits.isEmpty())
          {
             currentDoc = NO_MORE_DOCS;

Modified: jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/JcrQueryParser.java
===================================================================
--- jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/JcrQueryParser.java	2012-03-20 14:01:39 UTC (rev 5923)
+++ jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/JcrQueryParser.java	2012-03-20 15:38:57 UTC (rev 5924)
@@ -48,7 +48,7 @@
     */
    public JcrQueryParser(String fieldName, Analyzer analyzer, SynonymProvider synonymProvider)
    {
-      super(Version.LUCENE_24, fieldName, analyzer);
+      super(Version.LUCENE_30, fieldName, analyzer);
       this.synonymProvider = synonymProvider;
       setAllowLeadingWildcard(true);
       setDefaultOperator(Operator.AND);
@@ -155,7 +155,7 @@
       }
       else
       {
-         return super.getFieldQuery(field, queryText);
+         return super.getFieldQuery(field, queryText, true);
       }
    }
 

Modified: jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/JcrStandartAnalyzer.java
===================================================================
--- jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/JcrStandartAnalyzer.java	2012-03-20 14:01:39 UTC (rev 5923)
+++ jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/JcrStandartAnalyzer.java	2012-03-20 15:38:57 UTC (rev 5924)
@@ -36,7 +36,7 @@
  * index the text of the property and to parse searchtext for this property.
  */
 
-public class JcrStandartAnalyzer extends Analyzer
+public final class JcrStandartAnalyzer extends Analyzer
 {
 
    /**
@@ -47,7 +47,7 @@
    {
       public Analyzer run()
       {
-         return new StandardAnalyzer(Version.LUCENE_24, Collections.EMPTY_SET);
+         return new StandardAnalyzer(Version.LUCENE_30, Collections.EMPTY_SET);
       }
    });
 

Modified: jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/MoreLikeThis.java
===================================================================
--- jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/MoreLikeThis.java	2012-03-20 14:01:39 UTC (rev 5923)
+++ jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/MoreLikeThis.java	2012-03-20 15:38:57 UTC (rev 5924)
@@ -19,7 +19,7 @@
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
@@ -153,7 +153,7 @@
     * Default analyzer to parse source doc with.
     * @see #getAnalyzer
     */
-   public static final Analyzer DEFAULT_ANALYZER = new StandardAnalyzer(Version.LUCENE_24);
+   public static final Analyzer DEFAULT_ANALYZER = new StandardAnalyzer(Version.LUCENE_30);
 
    /**
     * Ignore terms with less than this frequency in the source doc.
@@ -203,12 +203,12 @@
     * @see #setStopWords
     * @see #getStopWords
     */
-   public static final Set DEFAULT_STOP_WORDS = null;
+   public static final Set<String> DEFAULT_STOP_WORDS = null;
 
    /**
     * Current set of stop words.
     */
-   private Set stopWords = DEFAULT_STOP_WORDS;
+   private Set<String> stopWords = DEFAULT_STOP_WORDS;
 
    /**
     * Return a Query with no more than this many terms.
@@ -466,7 +466,7 @@
     * @see org.apache.lucene.analysis.StopFilter#makeStopSet StopFilter.makeStopSet()
     * @see #getStopWords
     */
-   public void setStopWords(Set stopWords)
+   public void setStopWords(Set<String> stopWords)
    {
       this.stopWords = stopWords;
    }
@@ -475,7 +475,7 @@
     * Get the current stop words being used.
     * @see #setStopWords
     */
-   public Set getStopWords()
+   public Set<String> getStopWords()
    {
       return stopWords;
    }
@@ -530,8 +530,8 @@
       if (fieldNames == null)
       {
          // gather list of valid fields from lucene
-         Collection fields = ir.getFieldNames(IndexReader.FieldOption.INDEXED);
-         fieldNames = (String[])fields.toArray(new String[fields.size()]);
+         Collection<String> fields = ir.getFieldNames(IndexReader.FieldOption.INDEXED);
+         fieldNames = fields.toArray(new String[fields.size()]);
       }
 
       return createQuery(retrieveTerms(docNum));
@@ -547,8 +547,8 @@
       if (fieldNames == null)
       {
          // gather list of valid fields from lucene
-         Collection fields = ir.getFieldNames(IndexReader.FieldOption.INDEXED);
-         fieldNames = (String[])fields.toArray(new String[fields.size()]);
+         Collection<String> fields = ir.getFieldNames(IndexReader.FieldOption.INDEXED);
+         fieldNames = fields.toArray(new String[fields.size()]);
       }
 
       return like(new FileReader(f));
@@ -585,9 +585,9 @@
    }
 
    /**
-    * Create the More like query from a PriorityQueue
+    * Create the More like query from a PriorityQueue<Object[]>
     */
-   private Query createQuery(PriorityQueue q)
+   private Query createQuery(PriorityQueue<Object[]> q)
    {
       BooleanQuery query = new BooleanQuery();
       Object cur;
@@ -630,22 +630,22 @@
    }
 
    /**
-    * Create a PriorityQueue from a word->tf map.
+    * Create a PriorityQueue<Object[]> from a word->tf map.
     *
     * @param words a map of words keyed on the word(String) with Int objects as the values.
     */
-   private PriorityQueue createQueue(Map words) throws IOException
+   private PriorityQueue<Object[]> createQueue(Map<String, Int> words) throws IOException
    {
       // have collected all words in doc and their freqs
       int numDocs = ir.numDocs();
       FreqQ res = new FreqQ(words.size()); // will order words by score
 
-      Iterator it = words.keySet().iterator();
+      Iterator<String> it = words.keySet().iterator();
       while (it.hasNext())
       { // for every word
-         String word = (String)it.next();
+         String word = it.next();
 
-         int tf = ((Int)words.get(word)).x; // term freq in the source doc
+         int tf = words.get(word).x; // term freq in the source doc
          if (minTermFreq > 0 && tf < minTermFreq)
          {
             continue; // filter out words that don't occur enough times in the source
@@ -714,9 +714,9 @@
     *
     * @param docNum the id of the lucene document from which to find terms
     */
-   public PriorityQueue retrieveTerms(int docNum) throws IOException
+   public PriorityQueue<Object[]> retrieveTerms(int docNum) throws IOException
    {
-      Map termFreqMap = new HashMap();
+      Map<String, Int> termFreqMap = new HashMap<String, Int>();
       for (int i = 0; i < fieldNames.length; i++)
       {
          String fieldName = fieldNames[i];
@@ -750,7 +750,7 @@
     * @param termFreqMap a Map of terms and their frequencies
     * @param vector List of terms and their frequencies for a doc/field
     */
-   private void addTermFrequencies(Map termFreqMap, TermFreqVector vector)
+   private void addTermFrequencies(Map<String, Int> termFreqMap, TermFreqVector vector)
    {
       String[] terms = vector.getTerms();
       int[] freqs = vector.getTermFrequencies();
@@ -763,7 +763,7 @@
             continue;
          }
          // increment frequency
-         Int cnt = (Int)termFreqMap.get(term);
+         Int cnt = termFreqMap.get(term);
          if (cnt == null)
          {
             cnt = new Int();
@@ -790,8 +790,8 @@
       // for every token
       while (ts.incrementToken())
       {
-         TermAttribute term = (TermAttribute)ts.getAttribute(TermAttribute.class);
-         String word = term.term();
+         CharTermAttribute term = ts.getAttribute(CharTermAttribute.class);
+         String word = new String(term.buffer(), 0, term.length());
          tokenCount++;
          if (tokenCount > maxNumTokensParsed)
          {
@@ -862,9 +862,9 @@
     *
     * @see #retrieveInterestingTerms
     */
-   public PriorityQueue retrieveTerms(Reader r) throws IOException
+   public PriorityQueue<Object[]> retrieveTerms(Reader r) throws IOException
    {
-      Map words = new HashMap();
+      Map<String, Int> words = new HashMap<String, Int>();
       for (int i = 0; i < fieldNames.length; i++)
       {
          String fieldName = fieldNames[i];
@@ -878,8 +878,8 @@
     */
    public String[] retrieveInterestingTerms(int docNum) throws IOException
    {
-      ArrayList al = new ArrayList(maxQueryTerms);
-      PriorityQueue pq = retrieveTerms(docNum);
+      ArrayList<Object> al = new ArrayList<Object>(maxQueryTerms);
+      PriorityQueue<Object[]> pq = retrieveTerms(docNum);
       Object cur;
       // have to be careful, retrieveTerms returns all words but that's probably not useful to our caller...
       int lim = maxQueryTerms;
@@ -890,7 +890,7 @@
          al.add(ar[0]); // the 1st entry is the interesting word
       }
       String[] res = new String[al.size()];
-      return (String[])al.toArray(res);
+      return al.toArray(res);
    }
 
    /**
@@ -904,8 +904,8 @@
     */
    public String[] retrieveInterestingTerms(Reader r) throws IOException
    {
-      ArrayList al = new ArrayList(maxQueryTerms);
-      PriorityQueue pq = retrieveTerms(r);
+      ArrayList<Object> al = new ArrayList<Object>(maxQueryTerms);
+      PriorityQueue<Object[]> pq = retrieveTerms(r);
       Object cur;
       // have to be careful, retrieveTerms returns all words but that's probably not useful to our caller...
       int lim = maxQueryTerms;
@@ -916,25 +916,23 @@
          al.add(ar[0]); // the 1st entry is the interesting word
       }
       String[] res = new String[al.size()];
-      return (String[])al.toArray(res);
+      return al.toArray(res);
    }
 
    /**
-    * PriorityQueue that orders words by score.
+    * PriorityQueue<Object[]> that orders words by score.
     */
-   private static class FreqQ extends PriorityQueue
+   private static class FreqQ extends PriorityQueue<Object[]>
    {
       FreqQ(int s)
       {
          initialize(s);
       }
 
-      protected boolean lessThan(Object a, Object b)
+      protected boolean lessThan(Object[] a, Object[] b)
       {
-         Object[] aa = (Object[])a;
-         Object[] bb = (Object[])b;
-         Float fa = (Float)aa[2];
-         Float fb = (Float)bb[2];
+         Float fa = (Float)a[2];
+         Float fb = (Float)b[2];
          return fa.floatValue() > fb.floatValue();
       }
    }

Modified: jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/NotQuery.java
===================================================================
--- jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/NotQuery.java	2012-03-20 14:01:39 UTC (rev 5923)
+++ jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/NotQuery.java	2012-03-20 15:38:57 UTC (rev 5924)
@@ -220,7 +220,7 @@
          if (docNo == -1)
          {
             // get first doc of context scorer
-            int docId = contextScorer.nextDoc();
+            int docId = contextScorer == null ? NO_MORE_DOCS : contextScorer.nextDoc();
             if (docId != NO_MORE_DOCS)
             {
                contextNo = docId;
@@ -237,7 +237,7 @@
          while (contextNo != -1 && contextNo == docNo)
          {
             docNo++;
-            int docId = contextScorer.nextDoc();
+            int docId = contextScorer == null ? NO_MORE_DOCS : contextScorer.nextDoc();
             contextNo = docId == NO_MORE_DOCS ? -1 : docId;
          }
          if (docNo >= reader.maxDoc())

Modified: jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/PersistentIndex.java
===================================================================
--- jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/PersistentIndex.java	2012-03-20 14:01:39 UTC (rev 5923)
+++ jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/PersistentIndex.java	2012-03-20 15:38:57 UTC (rev 5924)
@@ -91,7 +91,7 @@
    void addIndexes(IndexReader[] readers) throws IOException
    {
       getIndexWriter().addIndexes(readers);
-      getIndexWriter().optimize();
+      getIndexWriter().maybeMerge();
    }
 
    /**

Modified: jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/ReadOnlyIndexReader.java
===================================================================
--- jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/ReadOnlyIndexReader.java	2012-03-20 14:01:39 UTC (rev 5923)
+++ jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/ReadOnlyIndexReader.java	2012-03-20 15:38:57 UTC (rev 5924)
@@ -190,6 +190,10 @@
     */
    protected final void doCommit(Map commitUserData)
    {
+      if (!hasChanges)
+      {
+         return;
+      }
       throw new UnsupportedOperationException("IndexReader is read-only");
    }
 

Added: jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/ScorerWrapper.java
===================================================================
--- jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/ScorerWrapper.java	                        (rev 0)
+++ jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/ScorerWrapper.java	2012-03-20 15:38:57 UTC (rev 5924)
@@ -0,0 +1,173 @@
+/*
+ * Copyright (C) 2003-2012 eXo Platform SAS.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Affero General Public License
+ * as published by the Free Software Foundation; either version 3
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see<http://www.gnu.org/licenses/>.
+ */
+package org.exoplatform.services.jcr.impl.core.query.lucene;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.Similarity;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * WorkAround wrapper, used as bridge between Scorer.score(Collector) and DocIdSetIterator.
+ * Some Scorers inside Lucene (BooleanScorer) doesn't support DocIdSetIterator interface, 
+ * but required for JCR needs.
+ * Consider getting rid of this solution.
+ * 
+ * @author <a href="mailto:nzamosenchuk at exoplatform.com">Nikolay Zamosenchuk</a>
+ * @version $Id: ScorerWrapper.java 34360 2009-07-22 23:58:59Z nzamosenchuk $
+ *
+ */
+public class ScorerWrapper extends Scorer
+{
+
+   List<DocData> docs = new ArrayList<DocData>();
+
+   int index;
+
+   DocData currentDocData = null;
+
+   CollectorWrapper collectorWrapper;
+
+   static class DocData
+   {
+      public int docID;
+
+      public float freq;
+
+      public float score;
+
+      public DocData(int docID, float freq, float score)
+      {
+         super();
+         this.docID = docID;
+         this.freq = freq;
+         this.score = score;
+      }
+
+   }
+
+   class CollectorWrapper extends Collector
+   {
+      private Scorer subScrorer;
+
+      @Override
+      public void setScorer(Scorer scorer) throws IOException
+      {
+         this.subScrorer = scorer;
+      }
+
+      @Override
+      public void collect(int doc) throws IOException
+      {
+         ScorerWrapper.this.docs.add(new DocData(doc, subScrorer.freq(), subScrorer.score()));
+      }
+
+      @Override
+      public void setNextReader(IndexReader reader, int docBase) throws IOException
+      {
+      }
+
+      @Override
+      public boolean acceptsDocsOutOfOrder()
+      {
+         return true;
+      }
+
+   }
+
+   /**
+    * @param similarity
+    */
+   protected ScorerWrapper(Similarity similarity)
+   {
+      super(similarity);
+
+      collectorWrapper = new CollectorWrapper();
+   }
+
+   /**
+   * {@inheritDoc}
+   */
+   @Override
+   public float score() throws IOException
+   {
+      if (currentDocData != null)
+      {
+         return currentDocData.score;
+      }
+      return 0;
+   }
+
+   /**
+   * {@inheritDoc}
+   */
+   @Override
+   public int docID()
+   {
+      if (currentDocData != null)
+      {
+         return currentDocData.docID;
+      }
+      return NO_MORE_DOCS;
+   }
+
+   /**
+   * {@inheritDoc}
+   */
+   @Override
+   public int nextDoc() throws IOException
+   {
+      if (index < docs.size())
+      {
+         currentDocData = docs.get(index);
+         index++;
+         return currentDocData.docID;
+      }
+      else
+      {
+         currentDocData = null;
+         return NO_MORE_DOCS;
+      }
+   }
+
+   /**
+   * {@inheritDoc}
+   */
+   @Override
+   public int advance(int target) throws IOException
+   {
+      int doc;
+      while ((doc = nextDoc()) < target)
+      {
+         if (doc == NO_MORE_DOCS || doc == -1)
+         {
+            return NO_MORE_DOCS;
+         }
+      }
+      return doc;
+   }
+
+   public Collector getCollector()
+   {
+      return collectorWrapper;
+   }
+
+}

Modified: jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/SearchIndex.java
===================================================================
--- jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/SearchIndex.java	2012-03-20 14:01:39 UTC (rev 5923)
+++ jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/SearchIndex.java	2012-03-20 15:38:57 UTC (rev 5924)
@@ -21,7 +21,6 @@
 import org.apache.commons.collections.collection.TransformedCollection;
 import org.apache.commons.collections.iterators.TransformIterator;
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import org.apache.lucene.document.Document;
@@ -1959,7 +1958,6 @@
                      {
                         // find the right fields to transfer
                         Fieldable[] fields = aDoc.getFieldables(FieldNames.PROPERTIES);
-                        Token t = new Token();
                         for (int k = 0; k < fields.length; k++)
                         {
                            Fieldable field = fields[k];
@@ -1967,12 +1965,11 @@
                            // SingleTokenStream
                            //t = field.tokenStreamValue().next(t);
                            field.tokenStreamValue().incrementToken();
-                           TermAttribute term =
-                              field.tokenStreamValue().getAttribute(TermAttribute.class);
-                           PayloadAttribute payload =
-                              field.tokenStreamValue().getAttribute(PayloadAttribute.class);
+                           TermAttribute term = field.tokenStreamValue().getAttribute(TermAttribute.class);
+                           PayloadAttribute payload = field.tokenStreamValue().getAttribute(PayloadAttribute.class);
 
-                           String value = new String(t.termBuffer(), 0, t.termLength());
+                           String value = new String(term.termBuffer(), 0, term.termLength());
+
                            if (value.startsWith(namePrefix))
                            {
                               // extract value
@@ -1981,7 +1978,8 @@
                               QPath p = getRelativePath(state, propState);
                               String path = getNamespaceMappings().translatePath(p);
                               value = FieldNames.createNamedValue(path, value);
-                              t.setTermBuffer(value);
+
+                              term.setTermBuffer(value);
                               doc.add(new Field(field.name(), new SingletonTokenStream(term.term(), payload
                                  .getPayload())));
                               doc.add(new Field(FieldNames.AGGREGATED_NODE_UUID, parent.getIdentifier(),
@@ -2026,30 +2024,8 @@
     *             if an error occurs while reading item states.
     */
    protected QPath getRelativePath(NodeData nodeState, PropertyData propState) throws RepositoryException
-
    {
-
-      QPath nodePath = nodeState.getQPath();
-      QPath propPath = propState.getQPath();
       throw new RepositoryException();
-      // Path p = nodePath.computeRelativePath(propPath);
-      // // make sure it does not contain indexes
-      // boolean clean = true;
-      // Path.Element[] elements = p.getElements();
-      // for (int i = 0; i < elements.length; i++)
-      // {
-      // if (elements[i].getIndex() != 0)
-      // {
-      // elements[i] = PATH_FACTORY.createElement(elements[i].getName());
-      // clean = false;
-      // }
-      // }
-      // if (!clean)
-      // {
-      // p = PATH_FACTORY.create(elements);
-      // }
-
-      // return p;
    }
 
    /**
@@ -2101,7 +2077,6 @@
     *            value=NodeState.
     */
    protected void retrieveAggregateRoot(final Set<String> removedNodeIds, final Map<String, NodeData> map)
-
    {
       if (indexingConfig != null)
       {

Modified: jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/SingletonTokenStream.java
===================================================================
--- jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/SingletonTokenStream.java	2012-03-20 14:01:39 UTC (rev 5923)
+++ jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/SingletonTokenStream.java	2012-03-20 15:38:57 UTC (rev 5924)
@@ -62,7 +62,8 @@
     */
    public SingletonTokenStream()
    {
-
+      termAttribute = addAttribute(TermAttribute.class);
+      payloadAttribute = addAttribute(PayloadAttribute.class);
    }
 
    /**
@@ -77,8 +78,8 @@
    {
       this.value = value;
       this.payload = payload;
-      termAttribute = (TermAttribute)addAttribute(TermAttribute.class);
-      payloadAttribute = (PayloadAttribute)addAttribute(PayloadAttribute.class);
+      termAttribute = addAttribute(TermAttribute.class);
+      payloadAttribute = addAttribute(PayloadAttribute.class);
    }
 
    /**
@@ -95,17 +96,6 @@
       this(value, new Payload(new PropertyMetaData(type).toByteArray()));
    }
 
-   /**
-    * Creates a new SingleTokenStream with the given token.
-    *
-    * @param t the token.
-    */
-   @Deprecated
-   public SingletonTokenStream(Token t)
-   {
-      this(t.term(), t.getPayload());
-   }
-
    @Override
    public boolean incrementToken() throws IOException
    {
@@ -136,8 +126,6 @@
    public void close() throws IOException
    {
       consumed = true;
-      value = null;
-      payload = null;
       payloadAttribute = null;
       termAttribute = null;
    }

Modified: jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/spell/LuceneSpellChecker.java
===================================================================
--- jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/spell/LuceneSpellChecker.java	2012-03-20 14:01:39 UTC (rev 5923)
+++ jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/spell/LuceneSpellChecker.java	2012-03-20 15:38:57 UTC (rev 5924)
@@ -17,15 +17,19 @@
 package org.exoplatform.services.jcr.impl.core.query.lucene.spell;
 
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.search.spell.Dictionary;
 import org.apache.lucene.search.spell.LuceneDictionary;
 import org.apache.lucene.search.spell.SpellChecker;
+import org.apache.lucene.search.spell.SuggestMode;
 import org.apache.lucene.store.AlreadyClosedException;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.Version;
 import org.exoplatform.commons.utils.SecurityHelper;
 import org.exoplatform.services.jcr.impl.core.query.QueryHandler;
 import org.exoplatform.services.jcr.impl.core.query.QueryRootNode;
@@ -362,23 +366,23 @@
       private void tokenize(String statement, List<String> words, List<TokenData> tokens) throws IOException
       {
          TokenStream ts = handler.getTextAnalyzer().tokenStream(FieldNames.FULLTEXT, new StringReader(statement));
-         TermAttribute term = (TermAttribute)ts.getAttribute(TermAttribute.class);
-         PositionIncrementAttribute positionIncrement =
-            (PositionIncrementAttribute)ts.getAttribute(PositionIncrementAttribute.class);
-         OffsetAttribute offset = (OffsetAttribute)ts.getAttribute(OffsetAttribute.class);
+         CharTermAttribute term = ts.getAttribute(CharTermAttribute.class);
+         PositionIncrementAttribute positionIncrement = ts.getAttribute(PositionIncrementAttribute.class);
+         OffsetAttribute offset = ts.getAttribute(OffsetAttribute.class);
          try
          {
+            String word;
             while (ts.incrementToken())
             {
 
-               String word = term.term();
+               word = new String(term.buffer(), 0, term.length());
                //            while ((t = ts.next()) != null)
                //            {
                String origWord = statement.substring(offset.startOffset(), offset.endOffset());
                if (positionIncrement.getPositionIncrement() > 0)
                {
                   words.add(word);
-                  tokens.add(new TokenData(offset.startOffset(), offset.endOffset(), term.term()));
+                  tokens.add(new TokenData(offset.startOffset(), offset.endOffset(), word));
                }
                else
                {
@@ -389,8 +393,7 @@
                   {
                      // replace current token and word
                      words.set(words.size() - 1, word);
-                     tokens
-                        .set(tokens.size() - 1, new TokenData(offset.startOffset(), offset.endOffset(), term.term()));
+                     tokens.set(tokens.size() - 1, new TokenData(offset.startOffset(), offset.endOffset(), word));
                   }
                }
             }
@@ -459,8 +462,9 @@
                         {
                            public String[] run() throws Exception
                            {
-                              return spellChecker.suggestSimilar(words[currentIndex], 5, reader, FieldNames.FULLTEXT,
-                                 morePopular);
+                              return spellChecker
+                                 .suggestSimilar(words[currentIndex], 5, reader, FieldNames.FULLTEXT, morePopular
+                                    ? SuggestMode.SUGGEST_MORE_POPULAR : SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
                            }
                         });
 
@@ -548,7 +552,8 @@
                                     long time = System.currentTimeMillis();
                                     Dictionary dict = new LuceneDictionary(reader, FieldNames.FULLTEXT);
                                     LOG.debug("Starting spell checker index refresh");
-                                    spellChecker.indexDictionary(dict);
+                                    spellChecker.indexDictionary(dict, new IndexWriterConfig(Version.LUCENE_30,
+                                       new StandardAnalyzer(Version.LUCENE_30)), true);
                                     time = System.currentTimeMillis() - time;
                                     time = time / 1000;
                                     LOG.info("Spell checker index refreshed in: " + new Long(time) + " s.");

Modified: jcr/trunk/exo.jcr.component.core/src/test/java/org/exoplatform/services/jcr/impl/core/query/TestIndexingConfig.java
===================================================================
--- jcr/trunk/exo.jcr.component.core/src/test/java/org/exoplatform/services/jcr/impl/core/query/TestIndexingConfig.java	2012-03-20 14:01:39 UTC (rev 5923)
+++ jcr/trunk/exo.jcr.component.core/src/test/java/org/exoplatform/services/jcr/impl/core/query/TestIndexingConfig.java	2012-03-20 15:38:57 UTC (rev 5924)
@@ -101,9 +101,9 @@
       IndexingConfigurationImpl indexingConfigurationImpl = (IndexingConfigurationImpl)searchIndex.getIndexingConfig();
       assertNotNull(indexingConfigurationImpl);
 
-      indexingConfigurationImpl.addPropertyAnalyzer("FULL:" + simple, new SimpleAnalyzer());
-      indexingConfigurationImpl.addPropertyAnalyzer("FULL:" + whitespace, new WhitespaceAnalyzer());
-      indexingConfigurationImpl.addPropertyAnalyzer("FULL:" + stop, new StopAnalyzer(Version.LUCENE_24));
+      indexingConfigurationImpl.addPropertyAnalyzer("FULL:" + simple, new SimpleAnalyzer(Version.LUCENE_30));
+      indexingConfigurationImpl.addPropertyAnalyzer("FULL:" + whitespace, new WhitespaceAnalyzer(Version.LUCENE_30));
+      indexingConfigurationImpl.addPropertyAnalyzer("FULL:" + stop, new StopAnalyzer(Version.LUCENE_30));
       testRoot = testSession.getRootNode().addNode("testrootAnalyzers");
       root.save();
    }

Modified: jcr/trunk/packaging/module/src/main/javascript/jcr.packaging.module.js
===================================================================
--- jcr/trunk/packaging/module/src/main/javascript/jcr.packaging.module.js	2012-03-20 14:01:39 UTC (rev 5923)
+++ jcr/trunk/packaging/module/src/main/javascript/jcr.packaging.module.js	2012-03-20 15:38:57 UTC (rev 5924)
@@ -26,9 +26,10 @@
     addDependency(new Project("jboss.jbossts","jbossjts","jar","4.6.1.GA")).
     addDependency(new Project("jboss.jbossts","jbossts-common","jar","4.6.1.GA")).
     addDependency(new Project("org.apache.ws.commons","ws-commons-util","jar","1.0.1")).
-    addDependency(new Project("org.apache.lucene", "lucene-core", "jar", "2.9.4")).
-    addDependency(new Project("org.apache.lucene", "lucene-spellchecker", "jar", "2.9.4")).
-    addDependency(new Project("org.apache.lucene", "lucene-memory", "jar", "2.9.4"));
+    addDependency(new Project("org.apache.lucene", "lucene-core", "jar", "3.5.0")).
+    addDependency(new Project("org.apache.lucene", "lucene-spellchecker", "jar", "3.5.0")).
+    addDependency(new Project("org.apache.lucene", "lucene-wordnet", "jar", "3.3.0")).
+    addDependency(new Project("org.apache.lucene", "lucene-memory", "jar", "3.5.0"));
 
   module.frameworks = {}
   module.frameworks.web = 

Modified: jcr/trunk/pom.xml
===================================================================
--- jcr/trunk/pom.xml	2012-03-20 14:01:39 UTC (rev 5923)
+++ jcr/trunk/pom.xml	2012-03-20 15:38:57 UTC (rev 5924)
@@ -279,22 +279,22 @@
       <dependency>
         <groupId>org.apache.lucene</groupId>
         <artifactId>lucene-core</artifactId>
-        <version>3.0.3</version>
+        <version>3.5.0</version>
       </dependency>
       <dependency>
         <groupId>org.apache.lucene</groupId>
         <artifactId>lucene-spellchecker</artifactId>
-        <version>3.0.3</version>
+        <version>3.5.0</version>
       </dependency>
       <dependency>
         <groupId>org.apache.lucene</groupId>
         <artifactId>lucene-memory</artifactId>
-        <version>3.0.3</version>
+        <version>3.5.0</version>
       </dependency>
       <dependency>
         <groupId>org.apache.lucene</groupId>
         <artifactId>lucene-wordnet</artifactId>
-        <version>3.0.3</version>
+        <version>3.3.0</version>
       </dependency>            
       <dependency>
         <groupId>com.sun.xml.stream</groupId>



More information about the exo-jcr-commits mailing list