[exo-jcr-commits] exo-jcr SVN: r2871 - jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules/jcr/searching.

Wed Aug 4 06:49:40 EDT 2010

Author: sergiykarpenko
Date: 2010-08-04 06:49:39 -0400 (Wed, 04 Aug 2010)
New Revision: 2871

Added:
   jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules/jcr/searching/aggregation-rule.xml
   jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules/jcr/searching/ignore-accent-symbols.xml
Modified:
   jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules/jcr/searching/fulltext-search-and-settings.xml
   jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules/jcr/searching/jcr-query-usecases.xml
Log:
EXOJCR-869: jcr-query-usecases : fulltext search ported

Added: jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules/jcr/searching/aggregation-rule.xml
===================================================================

--- jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules/jcr/searching/aggregation-rule.xml	                        (rev 0)
+++ jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules/jcr/searching/aggregation-rule.xml	2010-08-04 10:49:39 UTC (rev 2871)
@@ -0,0 +1,163 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
+"http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+<section id="JCR.AggregationRule">
+  <title>Find nt:file node by content of child jcr:content node</title>
+
+  <para>The node type nt:file represents a file. It requires a single child
+  node, called jcr:content. This node type represents images and other binary
+  content in a JCRWiki entry. The node type of jcr:conent is nt:resource which
+  represents the actual content of a file.</para>
+
+  <para>Find node with the primary type is 'nt:file' and which whose
+  'jcr:content' child node contains "cats".</para>
+
+  <para>Normally, we can't find nodes (in our case) using just JCR SQL or
+  XPath queries. But we can configure indexing so that nt:file aggregates
+  jcr:content child node.</para>
+
+  <para>So, change indexing-configuration.xml:</para>
+
+  <programlisting>&lt;?xml version="1.0"?&gt;
+&lt;!DOCTYPE configuration SYSTEM "http://www.exoplatform.org/dtd/indexing-configuration-1.2.dtd"&gt;
+&lt;configuration xmlns:jcr="http://www.jcp.org/jcr/1.0"
+               xmlns:nt="http://www.jcp.org/jcr/nt/1.0"&gt;
+    &lt;aggregate primaryType="nt:file"&gt;
+        &lt;include&gt;jcr:content&lt;/include&gt;
+        &lt;include&gt;jcr:content/*&lt;/include&gt;
+        &lt;include-property&gt;jcr:content/jcr:lastModified&lt;/include-property&gt;
+    &lt;/aggregate&gt;
+&lt;/configuration&gt;</programlisting>
+
+  <para>Now the content of 'nt:file' and 'jcr:content' ('nt:resource') nodes
+  are concatenated in a single Lucene document. Then, we can make a fulltext
+  search query by content of 'nt:file'; this search includes the content of
+  child 'jcr:content' node.</para>
+
+  <section>
+    <title>Repository structure:</title>
+
+    <para>Repository contains different nt:file nodes.</para>
+
+    <itemizedlist>
+      <listitem>
+        <para>root</para>
+
+        <itemizedlist>
+          <listitem>
+            <para>document1 (nt:file)</para>
+
+            <itemizedlist>
+              <listitem>
+                <para>jcr:content (nt:resource) jcr:data = "The quick brown
+                fox jumps over the lazy dog."</para>
+              </listitem>
+            </itemizedlist>
+          </listitem>
+
+          <listitem>
+            <para>document2 (nt:file)</para>
+
+            <itemizedlist>
+              <listitem>
+                <para>jcr:content (nt:resource) jcr:data = "Dogs do not like
+                cats."</para>
+              </listitem>
+            </itemizedlist>
+          </listitem>
+
+          <listitem>
+            <para>document3 (nt:file)</para>
+
+            <itemizedlist>
+              <listitem>
+                <para>jcr:content (nt:resource) jcr:data = "Cats jumping
+                high."</para>
+              </listitem>
+            </itemizedlist>
+          </listitem>
+        </itemizedlist>
+      </listitem>
+    </itemizedlist>
+  </section>
+
+  <section>
+    <title>Query execution</title>
+
+    <para><emphasis role="bold">SQL</emphasis></para>
+
+    <programlisting>// make SQL query
+QueryManager queryManager = workspace.getQueryManager();
+// create query
+String sqlStatement = "SELECT * FROM nt:file WHERE CONTAINS(*,'cats')";
+Query query = queryManager.createQuery(sqlStatement, Query.SQL);
+// execute query and fetch result
+QueryResult result = query.execute();</programlisting>
+
+    <para><emphasis role="bold">XPath</emphasis></para>
+
+    <programlisting>// make XPath query
+QueryManager queryManager = workspace.getQueryManager();
+// create query
+String xpathStatement = "//element(*,nt:file)[jcr:contains(.,'cats')]";
+Query query = queryManager.createQuery(xpathStatement, Query.XPATH);
+// execute query and fetch result
+QueryResult result = query.execute();</programlisting>
+  </section>
+
+  <section>
+    <title>Fetching the result</title>
+
+    <para>Let's get nodes:</para>
+
+    <programlisting>NodeIterator it = result.getNodes();
+
+if(it.hasNext())
+{
+   Node findedNode = it.nextNode();
+}</programlisting>
+
+    <para>NodeIterator will return "document2" and "document3".</para>
+
+    <para>We can also get a table:</para>
+
+    <programlisting>String[] columnNames = result.getColumnNames();
+RowIterator rit = result.getRows();
+while (rit.hasNext())
+{
+   Row row = rit.nextRow();
+   // get values of the row
+   Value[] values = row.getValues();
+}</programlisting>
+
+    <para>Table content is:</para>
+
+    <table>
+      <title>Table content</title>
+
+      <tgroup cols="2">
+        <thead>
+          <row>
+            <entry>jcr:path</entry>
+
+            <entry>jcr:score</entry>
+          </row>
+        </thead>
+
+        <tbody>
+          <row>
+            <entry>/document2</entry>
+
+            <entry>1030</entry>
+          </row>
+
+          <row>
+            <entry>/document3</entry>
+
+            <entry>1030</entry>
+          </row>
+        </tbody>
+      </tgroup>
+    </table>
+  </section>
+</section>

Modified: jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules/jcr/searching/fulltext-search-and-settings.xml
===================================================================
--- jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules/jcr/searching/fulltext-search-and-settings.xml	2010-08-04 09:10:59 UTC (rev 2870)
+++ jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules/jcr/searching/fulltext-search-and-settings.xml	2010-08-04 10:49:39 UTC (rev 2871)
@@ -151,7 +151,7 @@
     can it be searched like fulltext by property or not.</para>
 
     <para>Only two property types are indexed as fulltext searcheable: STRING
-    and BINARY. </para>
+    and BINARY.</para>
 
     <table>
       <title>Fulltext search by different properties</title>
@@ -206,23 +206,23 @@
 
     <itemizedlist>
       <listitem>
-        <para><link linkend="???">JCR.Fulltext Search by
-        Property</link></para>
+        <para><link linkend="JCR.FulltextSearchByProperty">JCR.Fulltext Search
+        by Property</link></para>
       </listitem>
 
       <listitem>
-        <para><link linkend="???">JCR.Fulltext Search by All
-        Properties</link></para>
+        <para><link linkend="JCR.FulltextSearchByAllProperties">JCR.Fulltext
+        Search by All Properties</link></para>
       </listitem>
 
       <listitem>
-        <para><link linkend="???">Find nt:file document by content of its
-        child jcr:content node&gt;Aggregation rule</link></para>
+        <para><link linkend="JCR.AggregationRule">Find nt:file document by
+        content of its child jcr:content node</link></para>
       </listitem>
 
       <listitem>
-        <para><link linkend="???">How to set a new analyzer. Accent symbols
-        ignoring&gt;JCR.Ignore Accent Symbols</link></para>
+        <para><link linkend="JCR.IgnoreAccentSymbols">How to set a new
+        analyzer. Accent symbols ignoring</link></para>
       </listitem>
     </itemizedlist>
   </section>

Added: jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules/jcr/searching/ignore-accent-symbols.xml
===================================================================
--- jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules/jcr/searching/ignore-accent-symbols.xml	                        (rev 0)
+++ jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules/jcr/searching/ignore-accent-symbols.xml	2010-08-04 10:49:39 UTC (rev 2871)
@@ -0,0 +1,194 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
+"http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+<section id="JCR.IgnoreAccentSymbols">
+  <title>Ignore Accent Symbols. New Analyzer Setting.</title>
+
+  <para>In this example we will create new Analyzer, set it in QueryHandler
+  configuration, and make query for check it.</para>
+
+  <para>Standard analyzer do not normalize accents like é,è,à. So word like
+  'tréma' will be stored to index as 'tréma'. But what if we want normalize
+  such symbols? Wa want store 'tréma' word as 'trema'.</para>
+
+  <para>There is two ways of setting up new Analyzer (no matter standarts or
+  our):</para>
+
+  <itemizedlist>
+    <listitem>
+      <para>first is creating descendant class of SearchIndex with new
+      Analyzer (see <link linkend="JCR.SearchConfiguration">Search
+      Configuration</link>);</para>
+    </listitem>
+  </itemizedlist>
+
+  <para>There is only one way - create new Analyzer (if there is no previously
+  created and accepted for our needs) and set it in Search index.</para>
+
+  <itemizedlist>
+    <listitem>
+      <para>Second is - register new Analyzer in QueryHandler configuration
+      (this one eccepted since 1.12 version);</para>
+    </listitem>
+  </itemizedlist>
+
+  <para>We will use last one:</para>
+
+  <itemizedlist>
+    <listitem>
+      <para>Create new MyAnalyzer</para>
+    </listitem>
+  </itemizedlist>
+
+  <programlisting>public class MyAnalyzer extends Analyzer
+{
+   @Override
+   public TokenStream tokenStream(String fieldName, Reader reader)
+   {
+      StandardTokenizer tokenStream = new StandardTokenizer(reader);
+      // process all text with standard filter
+      // removes 's (as 's in "Peter's") from the end of words and removes dots from acronyms.
+      TokenStream result = new StandardFilter(tokenStream);
+      // this filter normalizes token text to lower case
+      result = new LowerCaseFilter(result);
+      // this one replaces accented characters in the ISO Latin 1 character set (ISO-8859-1) by their unaccented equivalents
+      result = new ISOLatin1AccentFilter(result);
+      // and finally return token stream
+      return result;
+   }
+}</programlisting>
+
+  <itemizedlist>
+    <listitem>
+      <para>then, register new MyAnalyzer in configuration</para>
+    </listitem>
+  </itemizedlist>
+
+  <programlisting>&lt;workspace name="ws"&gt;
+   ...
+   &lt;query-handler class="org.exoplatform.services.jcr.impl.core.query.lucene.SearchIndex"&gt;
+      &lt;properties&gt;
+         &lt;property name="analyzer" value="org.exoplatform.services.jcr.impl.core.MyAnalyzer"/&gt;
+         ...
+      &lt;/properties&gt;
+   &lt;/query-handler&gt;
+   ...
+&lt;/workspace&gt;</programlisting>
+
+  <para>Now is time to check it with query:</para>
+
+  <para>Find node with mixin type 'mix:title' where 'jcr:title' contains
+  "tréma" and "naïve" strings.</para>
+
+  <section>
+    <title>Repository structure:</title>
+
+    <para>Repository filled by nodes with mixin type 'mix:title' and different
+    values of 'jcr:title' property.</para>
+
+    <itemizedlist>
+      <listitem>
+        <para>root</para>
+
+        <itemizedlist>
+          <listitem>
+            <para>node1 (mix:title) jcr:title = "tréma blabla naïve"</para>
+          </listitem>
+
+          <listitem>
+            <para>node2 (mix:title) jcr:description = "trema come text
+            naive"</para>
+          </listitem>
+        </itemizedlist>
+      </listitem>
+    </itemizedlist>
+  </section>
+
+  <section>
+    <title>Query execution</title>
+
+    <para><emphasis role="bold">SQL</emphasis></para>
+
+    <programlisting>// make SQL query
+QueryManager queryManager = workspace.getQueryManager();
+// create query
+String sqlStatement = "SELECT * FROM mix:title WHERE CONTAINS(jcr:title, 'tr\u00E8ma na\u00EFve')";
+Query query = queryManager.createQuery(sqlStatement, Query.SQL);
+// execute query and fetch result
+QueryResult result = query.execute();</programlisting>
+
+    <para><emphasis role="bold">XPath</emphasis></para>
+
+    <programlisting>// make SQL query
+QueryManager queryManager = workspace.getQueryManager();
+// create query
+String xpathStatement = "//element(*,mix:title)[jcr:contains(@jcr:title, 'tr\u00E8ma na\u00EFve')]";
+Query query = queryManager.createQuery(xpathStatement, Query.XPATH);
+// execute query and fetch result
+QueryResult result = query.execute();</programlisting>
+  </section>
+
+  <section>
+    <title>Fetch result</title>
+
+    <para>Lets get nodes:</para>
+
+    <programlisting>NodeIterator it = result.getNodes();
+
+if(it.hasNext())
+{
+   Node findedNode = it.nextNode();
+}</programlisting>
+
+    <para>NodeIterator will return "node1" and "node2". How is it possible?
+    Remember that our MyAnalyzer transforms 'tréma' word to 'trema'. So node2
+    accept our constraints to.</para>
+
+    <para>Also we can get a table:</para>
+
+    <programlisting>String[] columnNames = result.getColumnNames();
+RowIterator rit = result.getRows();
+while (rit.hasNext())
+{
+   Row row = rit.nextRow();
+   // get values of the row
+   Value[] values = row.getValues();
+}</programlisting>
+
+    <para>Table contant is</para>
+
+    <table>
+      <title>Table content</title>
+
+      <tgroup cols="3">
+        <thead>
+          <row>
+            <entry>cr:title</entry>
+
+            <entry>...</entry>
+
+            <entry>cr:path</entry>
+          </row>
+        </thead>
+
+        <tbody>
+          <row>
+            <entry>trèma blabla naïve</entry>
+
+            <entry>...</entry>
+
+            <entry>/node1</entry>
+          </row>
+
+          <row>
+            <entry>trema come text naive</entry>
+
+            <entry>...</entry>
+
+            <entry>/node2</entry>
+          </row>
+        </tbody>
+      </tgroup>
+    </table>
+  </section>
+</section>

Modified: jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules/jcr/searching/jcr-query-usecases.xml
===================================================================
--- jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules/jcr/searching/jcr-query-usecases.xml	2010-08-04 09:10:59 UTC (rev 2870)
+++ jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules/jcr/searching/jcr-query-usecases.xml	2010-08-04 10:49:39 UTC (rev 2871)
@@ -239,17 +239,13 @@
         </listitem>
 
         <listitem>
-          <para><ulink
-          url="Find nt:file document by content of child jcr:content node&gt;Aggregation rule">Find
-          nt:file document by content of child jcr:content node&gt;Aggregation
-          rule</ulink></para>
+          <para><link linkend="JCR.AggregationRule">Find nt:file document by
+          content of child jcr:content node</link></para>
         </listitem>
 
         <listitem>
-          <para><ulink
-          url="How to set new Analyzer. Accent symblos ignoring&gt;JCR.Ignore Accent Symbols">How
-          to set new Analyzer. Accent symblos ignoring&gt;JCR.Ignore Accent
-          Symbols</ulink></para>
+          <para><link linkend="JCR.IgnoreAccentSymbols">How to set new
+          Analyzer. Accent symblos ignoring</link></para>
         </listitem>
       </itemizedlist>
     </section>