Author: sergiykarpenko
Date: 2010-06-14 11:24:48 -0400 (Mon, 14 Jun 2010)
New Revision: 2581
Modified:
jcr/branches/1.12.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/config/QueryHandlerParams.java
jcr/branches/1.12.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/SearchIndexConfigurationHelper.java
jcr/branches/1.12.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/SearchIndex.java
jcr/branches/1.12.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/SpellChecker.java
jcr/branches/1.12.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/spell/LuceneSpellChecker.java
Log:
EXOJCR-787: make LuceneSpellChecker configurable
Modified:
jcr/branches/1.12.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/config/QueryHandlerParams.java
===================================================================
---
jcr/branches/1.12.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/config/QueryHandlerParams.java 2010-06-14
14:35:10 UTC (rev 2580)
+++
jcr/branches/1.12.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/config/QueryHandlerParams.java 2010-06-14
15:24:48 UTC (rev 2581)
@@ -83,6 +83,10 @@
public static final String PARAM_SPELLCHECKER_CLASS = "spellchecker-class";
+ public static final String PARAM_SPELLCHECKER_MORE_POPULAR =
"spellchecker-more-popular";
+
+ public static final String PARAM_SPELLCHECKER_DISTANCE =
"spellchecker-min-distance";
+
public static final String PARAM_SUPPORT_HIGHLIGHTING =
"support-highlighting";
public static final String PARAM_SYNONYMPROVIDER_CLASS =
"synonymprovider-class";
@@ -92,9 +96,9 @@
public static final String PARAM_USE_COMPOUNDFILE = "use-compoundfile";
public static final String PARAM_VOLATILE_IDLE_TIME = "volatile-idle-time";
-
+
public static final String PARAM_MAX_VOLATILE_SIZE = "max-volatile-size";
-
+
public static final String PARAM_MAX_VOLATILE_TIME = "max-volatile-time";
//since
https://jira.jboss.org/jira/browse/EXOJCR-17
@@ -106,8 +110,8 @@
public static final String PARAM_CHANGES_FILTER_CLASS =
"changesfilter-class";
public static final String PARAM_JBOSSCACHE_CONFIGURATION =
"jbosscache-configuration";
-
+
public static final String PARAM_JBOSSCACHE_PUSHSTATE =
"jbosscache-sscl-push.state.enabled";
-
+
public static final String PARAM_JBOSSCACHE_PUSHSTATE_TIMEOUT =
"jbosscache-sscl-push.state.timeout";
}
Modified:
jcr/branches/1.12.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/SearchIndexConfigurationHelper.java
===================================================================
---
jcr/branches/1.12.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/SearchIndexConfigurationHelper.java 2010-06-14
14:35:10 UTC (rev 2580)
+++
jcr/branches/1.12.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/SearchIndexConfigurationHelper.java 2010-06-14
15:24:48 UTC (rev 2581)
@@ -66,7 +66,6 @@
*/
private void setParam(String name, String value)
{
-
if (QueryHandlerParams.PARAM_AUTO_REPAIR.equals(name))
{
searchIndex.setAutoRepair(Boolean.parseBoolean(value));
@@ -179,5 +178,13 @@
{
searchIndex.setAnalyzer(value);
}
+ else if (QueryHandlerParams.PARAM_SPELLCHECKER_MORE_POPULAR.equals(name))
+ {
+ searchIndex.setSpellCheckerMorePopuar(Boolean.parseBoolean(value));
+ }
+ else if (QueryHandlerParams.PARAM_SPELLCHECKER_DISTANCE.equals(name))
+ {
+
searchIndex.setSpellCheckerMinDistance(StringNumberParser.parseNumber(value).floatValue());
+ }
}
}
Modified:
jcr/branches/1.12.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/SearchIndex.java
===================================================================
---
jcr/branches/1.12.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/SearchIndex.java 2010-06-14
14:35:10 UTC (rev 2580)
+++
jcr/branches/1.12.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/SearchIndex.java 2010-06-14
15:24:48 UTC (rev 2581)
@@ -384,6 +384,16 @@
private SpellChecker spellChecker;
/**
+ * Return most popular results.
+ */
+ private boolean spellCheckerMorePopular = true;
+
+ /**
+ * Minimal distance between spell checked word and proposed word.
+ */
+ private float spellCheckerMinDistance = 0.55f;
+
+ /**
* The similarity in use for indexing and searching.
*/
private Similarity similarity = Similarity.getDefault();
@@ -1357,13 +1367,14 @@
*/
protected SpellChecker createSpellChecker()
{
+ // spell checker config
SpellChecker spCheck = null;
if (spellCheckerClass != null)
{
try
{
spCheck = spellCheckerClass.newInstance();
- spCheck.init(this);
+ spCheck.init(this, spellCheckerMinDistance, spellCheckerMorePopular);
}
catch (Exception e)
{
@@ -2385,6 +2396,24 @@
}
/**
+ * Set SpellChecker morePopular parameter.
+ * @param morePopular boolean
+ */
+ public void setSpellCheckerMorePopuar(boolean morePopular)
+ {
+ spellCheckerMorePopular = morePopular;
+ }
+
+ /**
+ * Set SpellChecker minimal word distance.
+ * @param minDistance float
+ */
+ public void setSpellCheckerMinDistance(float minDistance)
+ {
+ spellCheckerMinDistance = minDistance;
+ }
+
+ /**
* @return the class name of the spell checker implementation or
* <code>null</code> if none is set.
*/
Modified:
jcr/branches/1.12.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/SpellChecker.java
===================================================================
---
jcr/branches/1.12.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/SpellChecker.java 2010-06-14
14:35:10 UTC (rev 2580)
+++
jcr/branches/1.12.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/SpellChecker.java 2010-06-14
15:24:48 UTC (rev 2581)
@@ -27,38 +27,43 @@
* <code>SpellChecker</code> defines an interface to run a spellchecker over
a
* fulltext query statement.
*/
-public interface SpellChecker {
+public interface SpellChecker
+{
- /**
- * Initializes this spell checker with an abstract query tree.
- *
- * @param handler
- * the query handler that created this spell checker.
- * @throws IOException
- * if an error occurs while initializing the spell checker.
- */
- void init(QueryHandler handler) throws IOException;
+ /**
+ * Initializes this spell checker.
+ *
+ * @param handler
+ * the query handler that created this spell checker.
+ * @param minDistance
+ * minimal distance between word and proposed close word. Float value
0..1.
+ * @param morePopular
+ * return only the suggest words that are as frequent or more frequent than
the searched word
+ * @throws IOException
+ * if an error occurs while initializing the spell checker.
+ */
+ void init(QueryHandler handler, float minDistance, boolean morePopular) throws
IOException;
- /**
- * Runs the spell checker over the first spellcheck relation query node in
- * the abstract query tree and returns a suggestion in case this
- * spellchecker thinks the words are misspelled. If the spellchecker
- * determines that the words are spelled correctly <code>null</code> is
- * returned.
- *
- * @param aqt
- * the abstract query tree, which may contain a relation query
- * node with a spellcheck operation.
- * @return a suggestion or <code>null</code> if this spell checker
- * determines that the fulltext query statement is spelled
- * correctly.
- * @throws RepositoryException
- */
- String check(QueryRootNode aqt) throws IOException, RepositoryException;
+ /**
+ * Runs the spell checker over the first spellcheck relation query node in
+ * the abstract query tree and returns a suggestion in case this
+ * spellchecker thinks the words are misspelled. If the spellchecker
+ * determines that the words are spelled correctly <code>null</code> is
+ * returned.
+ *
+ * @param aqt
+ * the abstract query tree, which may contain a relation query
+ * node with a spellcheck operation.
+ * @return a suggestion or <code>null</code> if this spell checker
+ * determines that the fulltext query statement is spelled
+ * correctly.
+ * @throws RepositoryException
+ */
+ String check(QueryRootNode aqt) throws IOException, RepositoryException;
- /**
- * Closes this spell checker and allows it to free resources.
- */
- void close();
+ /**
+ * Closes this spell checker and allows it to free resources.
+ */
+ void close();
}
Modified:
jcr/branches/1.12.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/spell/LuceneSpellChecker.java
===================================================================
---
jcr/branches/1.12.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/spell/LuceneSpellChecker.java 2010-06-14
14:35:10 UTC (rev 2580)
+++
jcr/branches/1.12.x/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/spell/LuceneSpellChecker.java 2010-06-14
15:24:48 UTC (rev 2581)
@@ -143,18 +143,13 @@
}
/**
- * Initializes this spell checker.
- *
- * @param handler
- * the query handler that created this spell checker.
- * @throws IOException
- * if <code>handler</code> is not of type {@link
SearchIndex}.
+ * {@inheritDoc}
*/
- public void init(QueryHandler handler) throws IOException
+ public void init(QueryHandler handler, float minDistance, boolean morePopular) throws
IOException
{
if (handler instanceof SearchIndex)
{
- this.spellChecker = new InternalSpellChecker((SearchIndex)handler);
+ this.spellChecker = new InternalSpellChecker((SearchIndex)handler, minDistance,
morePopular);
}
else
{
@@ -183,8 +178,7 @@
spellChecker.close();
}
- // ------------------------------< internal
- // >--------------------------------
+ // ------------------------------< internal >--------------------------------
/**
* Returns the fulltext statement of a spellcheck relation query node or
@@ -240,13 +234,19 @@
*/
private SpellChecker spellChecker;
+ private final boolean morePopular;
+
/**
* Creates a new internal spell checker.
*
* @param handler
* the associated query handler.
+ * @param minDistance
+ * minimal distance between word and proposed close word. Float value
0..1.
+ * @param morePopular
+ * return only the suggest words that are as frequent or more frequent
than the searched word
*/
- InternalSpellChecker(SearchIndex handler) throws IOException
+ InternalSpellChecker(SearchIndex handler, float minDistance, boolean morePopular)
throws IOException
{
this.handler = handler;
String path = handler.getContext().getIndexDirectory() + File.separatorChar +
"spellchecker";
@@ -256,7 +256,8 @@
this.lastRefresh = System.currentTimeMillis();
}
this.spellChecker = new SpellChecker(spellIndexDirectory);
- this.spellChecker.setAccuracy(0.55f);
+ this.spellChecker.setAccuracy(minDistance);
+ this.morePopular = morePopular;
refreshSpellChecker();
}
@@ -291,7 +292,16 @@
sb.replace(t.startOffset(), t.endOffset(), suggestions[i]);
}
}
- return sb.toString();
+ // if suggestion is same as a statement return null
+ String result = sb.toString();
+ if (statement.equalsIgnoreCase(result))
+ {
+ return null;
+ }
+ else
+ {
+ return result;
+ }
}
else
{
@@ -389,7 +399,9 @@
String[] suggestion = new String[words.length];
for (int i = 0; i < words.length; i++)
{
- String[] similar = spellChecker.suggestSimilar(words[i], 5, reader,
FieldNames.FULLTEXT, true);
+ String[] similar =
+ spellChecker.suggestSimilar(words[i], 5, reader,
FieldNames.FULLTEXT, morePopular);
+
if (similar.length > 0)
{
suggestion[i] = similar[0];
@@ -479,6 +491,7 @@
}
};
new Thread(refresh, "SpellChecker Refresh").start();
+
lastRefresh = System.currentTimeMillis();
}
}