[exo-jcr-commits] exo-jcr SVN: r2581 - in jcr/branches/1.12.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr: impl/core/query and 2 other directories.

do-not-reply at jboss.org do-not-reply at jboss.org
Mon Jun 14 11:24:49 EDT 2010


Author: sergiykarpenko
Date: 2010-06-14 11:24:48 -0400 (Mon, 14 Jun 2010)
New Revision: 2581

Modified:
   jcr/branches/1.12.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/config/QueryHandlerParams.java
   jcr/branches/1.12.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/SearchIndexConfigurationHelper.java
   jcr/branches/1.12.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/SearchIndex.java
   jcr/branches/1.12.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/SpellChecker.java
   jcr/branches/1.12.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/spell/LuceneSpellChecker.java
Log:
EXOJCR-787: make LuceneSpellChecker configurable

Modified: jcr/branches/1.12.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/config/QueryHandlerParams.java
===================================================================
--- jcr/branches/1.12.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/config/QueryHandlerParams.java	2010-06-14 14:35:10 UTC (rev 2580)
+++ jcr/branches/1.12.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/config/QueryHandlerParams.java	2010-06-14 15:24:48 UTC (rev 2581)
@@ -83,6 +83,10 @@
 
    public static final String PARAM_SPELLCHECKER_CLASS = "spellchecker-class";
 
+   public static final String PARAM_SPELLCHECKER_MORE_POPULAR = "spellchecker-more-popular";
+
+   public static final String PARAM_SPELLCHECKER_DISTANCE = "spellchecker-min-distance";
+
    public static final String PARAM_SUPPORT_HIGHLIGHTING = "support-highlighting";
 
    public static final String PARAM_SYNONYMPROVIDER_CLASS = "synonymprovider-class";
@@ -92,9 +96,9 @@
    public static final String PARAM_USE_COMPOUNDFILE = "use-compoundfile";
 
    public static final String PARAM_VOLATILE_IDLE_TIME = "volatile-idle-time";
-   
+
    public static final String PARAM_MAX_VOLATILE_SIZE = "max-volatile-size";
-   
+
    public static final String PARAM_MAX_VOLATILE_TIME = "max-volatile-time";
 
    //since https://jira.jboss.org/jira/browse/EXOJCR-17
@@ -106,8 +110,8 @@
    public static final String PARAM_CHANGES_FILTER_CLASS = "changesfilter-class";
 
    public static final String PARAM_JBOSSCACHE_CONFIGURATION = "jbosscache-configuration";
-   
+
    public static final String PARAM_JBOSSCACHE_PUSHSTATE = "jbosscache-sscl-push.state.enabled";
-   
+
    public static final String PARAM_JBOSSCACHE_PUSHSTATE_TIMEOUT = "jbosscache-sscl-push.state.timeout";
 }

Modified: jcr/branches/1.12.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/SearchIndexConfigurationHelper.java
===================================================================
--- jcr/branches/1.12.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/SearchIndexConfigurationHelper.java	2010-06-14 14:35:10 UTC (rev 2580)
+++ jcr/branches/1.12.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/SearchIndexConfigurationHelper.java	2010-06-14 15:24:48 UTC (rev 2581)
@@ -66,7 +66,6 @@
     */
    private void setParam(String name, String value)
    {
-
       if (QueryHandlerParams.PARAM_AUTO_REPAIR.equals(name))
       {
          searchIndex.setAutoRepair(Boolean.parseBoolean(value));
@@ -179,5 +178,13 @@
       {
          searchIndex.setAnalyzer(value);
       }
+      else if (QueryHandlerParams.PARAM_SPELLCHECKER_MORE_POPULAR.equals(name))
+      {
+         searchIndex.setSpellCheckerMorePopuar(Boolean.parseBoolean(value));
+      }
+      else if (QueryHandlerParams.PARAM_SPELLCHECKER_DISTANCE.equals(name))
+      {
+         searchIndex.setSpellCheckerMinDistance(StringNumberParser.parseNumber(value).floatValue());
+      }
    }
 }

Modified: jcr/branches/1.12.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/SearchIndex.java
===================================================================
--- jcr/branches/1.12.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/SearchIndex.java	2010-06-14 14:35:10 UTC (rev 2580)
+++ jcr/branches/1.12.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/SearchIndex.java	2010-06-14 15:24:48 UTC (rev 2581)
@@ -384,6 +384,16 @@
    private SpellChecker spellChecker;
 
    /**
+    * Return most popular results.
+    */
+   private boolean spellCheckerMorePopular = true;
+
+   /**
+    * Minimal distance between spell checked word and proposed word. 
+    */
+   private float spellCheckerMinDistance = 0.55f;
+
+   /**
     * The similarity in use for indexing and searching.
     */
    private Similarity similarity = Similarity.getDefault();
@@ -1357,13 +1367,14 @@
     */
    protected SpellChecker createSpellChecker()
    {
+      // spell checker config
       SpellChecker spCheck = null;
       if (spellCheckerClass != null)
       {
          try
          {
             spCheck = spellCheckerClass.newInstance();
-            spCheck.init(this);
+            spCheck.init(this, spellCheckerMinDistance, spellCheckerMorePopular);
          }
          catch (Exception e)
          {
@@ -2385,6 +2396,24 @@
    }
 
    /**
+    * Set SpellChecker morePopular parameter.
+    * @param morePopular boolean
+    */
+   public void setSpellCheckerMorePopuar(boolean morePopular)
+   {
+      spellCheckerMorePopular = morePopular;
+   }
+
+   /**
+    * Set SpellChecker minimal word distance.
+    * @param minDistance float
+    */
+   public void setSpellCheckerMinDistance(float minDistance)
+   {
+      spellCheckerMinDistance = minDistance;
+   }
+
+   /**
     * @return the class name of the spell checker implementation or
     *         <code>null</code> if none is set.
     */

Modified: jcr/branches/1.12.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/SpellChecker.java
===================================================================
--- jcr/branches/1.12.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/SpellChecker.java	2010-06-14 14:35:10 UTC (rev 2580)
+++ jcr/branches/1.12.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/SpellChecker.java	2010-06-14 15:24:48 UTC (rev 2581)
@@ -27,38 +27,43 @@
  * <code>SpellChecker</code> defines an interface to run a spellchecker over a
  * fulltext query statement.
  */
-public interface SpellChecker {
+public interface SpellChecker
+{
 
-    /**
-     * Initializes this spell checker with an abstract query tree.
-     * 
-     * @param handler
-     *            the query handler that created this spell checker.
-     * @throws IOException
-     *             if an error occurs while initializing the spell checker.
-     */
-    void init(QueryHandler handler) throws IOException;
+   /**
+    * Initializes this spell checker.
+    * 
+    * @param handler
+    *            the query handler that created this spell checker.
+    * @param minDistance
+    *            minimal distance between  word and proposed close word. Float value 0..1.
+    * @param morePopular
+    *            return only the suggest words that are as frequent or more frequent than the searched word 
+    * @throws IOException
+    *             if an error occurs while initializing the spell checker.
+    */
+   void init(QueryHandler handler, float minDistance, boolean morePopular) throws IOException;
 
-    /**
-     * Runs the spell checker over the first spellcheck relation query node in
-     * the abstract query tree and returns a suggestion in case this
-     * spellchecker thinks the words are misspelled. If the spellchecker
-     * determines that the words are spelled correctly <code>null</code> is
-     * returned.
-     * 
-     * @param aqt
-     *            the abstract query tree, which may contain a relation query
-     *            node with a spellcheck operation.
-     * @return a suggestion or <code>null</code> if this spell checker
-     *         determines that the fulltext query statement is spelled
-     *         correctly.
-     * @throws RepositoryException
-     */
-    String check(QueryRootNode aqt) throws IOException, RepositoryException;
+   /**
+    * Runs the spell checker over the first spellcheck relation query node in
+    * the abstract query tree and returns a suggestion in case this
+    * spellchecker thinks the words are misspelled. If the spellchecker
+    * determines that the words are spelled correctly <code>null</code> is
+    * returned.
+    * 
+    * @param aqt
+    *            the abstract query tree, which may contain a relation query
+    *            node with a spellcheck operation.
+    * @return a suggestion or <code>null</code> if this spell checker
+    *         determines that the fulltext query statement is spelled
+    *         correctly.
+    * @throws RepositoryException
+    */
+   String check(QueryRootNode aqt) throws IOException, RepositoryException;
 
-    /**
-     * Closes this spell checker and allows it to free resources.
-     */
-    void close();
+   /**
+    * Closes this spell checker and allows it to free resources.
+    */
+   void close();
 
 }

Modified: jcr/branches/1.12.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/spell/LuceneSpellChecker.java
===================================================================
--- jcr/branches/1.12.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/spell/LuceneSpellChecker.java	2010-06-14 14:35:10 UTC (rev 2580)
+++ jcr/branches/1.12.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/spell/LuceneSpellChecker.java	2010-06-14 15:24:48 UTC (rev 2581)
@@ -143,18 +143,13 @@
    }
 
    /**
-    * Initializes this spell checker.
-    * 
-    * @param handler
-    *            the query handler that created this spell checker.
-    * @throws IOException
-    *             if <code>handler</code> is not of type {@link SearchIndex}.
+    * {@inheritDoc}
     */
-   public void init(QueryHandler handler) throws IOException
+   public void init(QueryHandler handler, float minDistance, boolean morePopular) throws IOException
    {
       if (handler instanceof SearchIndex)
       {
-         this.spellChecker = new InternalSpellChecker((SearchIndex)handler);
+         this.spellChecker = new InternalSpellChecker((SearchIndex)handler, minDistance, morePopular);
       }
       else
       {
@@ -183,8 +178,7 @@
       spellChecker.close();
    }
 
-   // ------------------------------< internal
-   // >--------------------------------
+   // ------------------------------< internal >--------------------------------
 
    /**
     * Returns the fulltext statement of a spellcheck relation query node or
@@ -240,13 +234,19 @@
        */
       private SpellChecker spellChecker;
 
+      private final boolean morePopular;
+
       /**
        * Creates a new internal spell checker.
        * 
        * @param handler
        *            the associated query handler.
+       * @param minDistance
+       *            minimal distance between  word and proposed close word. Float value 0..1.
+       * @param morePopular
+       *            return only the suggest words that are as frequent or more frequent than the searched word 
        */
-      InternalSpellChecker(SearchIndex handler) throws IOException
+      InternalSpellChecker(SearchIndex handler, float minDistance, boolean morePopular) throws IOException
       {
          this.handler = handler;
          String path = handler.getContext().getIndexDirectory() + File.separatorChar + "spellchecker";
@@ -256,7 +256,8 @@
             this.lastRefresh = System.currentTimeMillis();
          }
          this.spellChecker = new SpellChecker(spellIndexDirectory);
-         this.spellChecker.setAccuracy(0.55f);
+         this.spellChecker.setAccuracy(minDistance);
+         this.morePopular = morePopular;
          refreshSpellChecker();
       }
 
@@ -291,7 +292,16 @@
                   sb.replace(t.startOffset(), t.endOffset(), suggestions[i]);
                }
             }
-            return sb.toString();
+            // if suggestion is same as a statement return null
+            String result = sb.toString();
+            if (statement.equalsIgnoreCase(result))
+            {
+               return null;
+            }
+            else
+            {
+               return result;
+            }
          }
          else
          {
@@ -389,7 +399,9 @@
                   String[] suggestion = new String[words.length];
                   for (int i = 0; i < words.length; i++)
                   {
-                     String[] similar = spellChecker.suggestSimilar(words[i], 5, reader, FieldNames.FULLTEXT, true);
+                     String[] similar =
+                        spellChecker.suggestSimilar(words[i], 5, reader, FieldNames.FULLTEXT, morePopular);
+
                      if (similar.length > 0)
                      {
                         suggestion[i] = similar[0];
@@ -479,6 +491,7 @@
                      }
                   };
                   new Thread(refresh, "SpellChecker Refresh").start();
+
                   lastRefresh = System.currentTimeMillis();
                }
             }



More information about the exo-jcr-commits mailing list