[exo-jcr-commits] exo-jcr SVN: r2870 - in jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules: jcr/searching and 1 other directory.

Wed Aug 4 05:11:00 EDT 2010

Author: sergiykarpenko
Date: 2010-08-04 05:10:59 -0400 (Wed, 04 Aug 2010)
New Revision: 2870

Added:
   jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules/jcr/searching/fulltext-search-and-settings.xml
   jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules/jcr/searching/fulltext-search-by-all-properties.xml
   jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules/jcr/searching/fulltext-search-by-property.xml
Modified:
   jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules/jcr.xml
   jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules/jcr/searching/jcr-query-usecases.xml
   jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules/jcr/searching/order-by-path-or-name.xml
Log:
EXOJCR-869: jcr-query-usecases : fulltext search ported

Added: jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules/jcr/searching/fulltext-search-and-settings.xml
===================================================================

--- jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules/jcr/searching/fulltext-search-and-settings.xml	                        (rev 0)
+++ jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules/jcr/searching/fulltext-search-and-settings.xml	2010-08-04 09:10:59 UTC (rev 2870)
@@ -0,0 +1,280 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
+"http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+<chapter id="JCR.FulltextSearchAndSettings">
+  <title>Fulltext Search And Affecting Settings</title>
+
+  <section>
+    <title>Property content indexing</title>
+
+    <para>Each property of a node (if it is indexable) is processed with
+    Lucene analyzer and stored in Lucene index. That's called indexing of a
+    property. After that we can perform a fulltext search among these indexed
+    properties.</para>
+  </section>
+
+  <section>
+    <title>Lucene Analyzers</title>
+
+    <para>The sense of analyzers is to transform all strings stored in the
+    index in a well-defined condition. The same analyzer(s) is/are used when
+    searching in order to adapt the query string to the index reality.</para>
+
+    <para>Therefore, performing the same query using different analyzers can
+    return different results.</para>
+
+    <para>Now, let's see how the same string is transformed by different
+    analyzers.</para>
+
+    <table>
+      <title>"The quick brown fox jumped over the lazy dogs"</title>
+
+      <tgroup cols="2">
+        <thead>
+          <row>
+            <entry>Analyzer</entry>
+
+            <entry>Parsed</entry>
+          </row>
+        </thead>
+
+        <tbody>
+          <row>
+            <entry>org.apache.lucene.analysis.WhitespaceAnalyzer</entry>
+
+            <entry>[The] [quick] [brown] [fox] [jumped] [over] [the] [lazy]
+            [dogs]</entry>
+          </row>
+
+          <row>
+            <entry>org.apache.lucene.analysis.SimpleAnalyzer</entry>
+
+            <entry>[the] [quick] [brown] [fox] [jumped] [over] [the] [lazy]
+            [dogs]</entry>
+          </row>
+
+          <row>
+            <entry>org.apache.lucene.analysis.StopAnalyzer</entry>
+
+            <entry>[quick] [brown] [fox] [jumped] [over] [lazy] [dogs]</entry>
+          </row>
+
+          <row>
+            <entry>org.apache.lucene.analysis.standard.StandardAnalyzer</entry>
+
+            <entry>[quick] [brown] [fox] [jumped] [over] [lazy] [dogs]</entry>
+          </row>
+
+          <row>
+            <entry>org.apache.lucene.analysis.snowball.SnowballAnalyzer</entry>
+
+            <entry>[quick] [brown] [fox] [jump] [over] [lazi] [dog]</entry>
+          </row>
+
+          <row>
+            <entry>org.apache.lucene.analysis.standard.StandardAnalyzer
+            (configured without stop word - jcr default analyzer)</entry>
+
+            <entry>[the] [quick] [brown] [fox] [jumped] [over] [the] [lazy]
+            [dogs]</entry>
+          </row>
+        </tbody>
+      </tgroup>
+    </table>
+
+    <table>
+      <title>"XY&amp;Z Corporation - xyz at example.com"</title>
+
+      <tgroup cols="2">
+        <thead>
+          <row>
+            <entry>Analyzer</entry>
+
+            <entry>Parsed</entry>
+          </row>
+        </thead>
+
+        <tbody>
+          <row>
+            <entry>org.apache.lucene.analysis.WhitespaceAnalyzer</entry>
+
+            <entry>[XY&amp;Z] [Corporation] [-] [xyz at example.com]</entry>
+          </row>
+
+          <row>
+            <entry>org.apache.lucene.analysis.SimpleAnalyzer</entry>
+
+            <entry>[xy] [z] [corporation] [xyz] [example] [com]</entry>
+          </row>
+
+          <row>
+            <entry>org.apache.lucene.analysis.StopAnalyzer</entry>
+
+            <entry>[xy] [z] [corporation] [xyz] [example] [com]</entry>
+          </row>
+
+          <row>
+            <entry>org.apache.lucene.analysis.standard.StandardAnalyzer</entry>
+
+            <entry>[xy&amp;z] [corporation] [xyz at example] [com]</entry>
+          </row>
+
+          <row>
+            <entry>org.apache.lucene.analysis.snowball.SnowballAnalyzer</entry>
+
+            <entry>[xy&amp;z] [corpor] [xyz at exampl] [com]</entry>
+          </row>
+
+          <row>
+            <entry>org.apache.lucene.analysis.standard.StandardAnalyzer
+            (configured without stop word - jcr default analyzer)</entry>
+
+            <entry>[xy&amp;z] [corporation] [xyz at example] [com]</entry>
+          </row>
+        </tbody>
+      </tgroup>
+    </table>
+
+    <note>
+      <para>StandardAnalyzer is the default analyzer in exo's jcr search
+      engine. But we do not use stop words.</para>
+    </note>
+
+    <para>You can assign your analyzer as described in <link
+    linkend="JCR.SearchConfiguration">Search Configuration</link></para>
+  </section>
+
+  <section>
+    <title>How are different properties indexed?</title>
+
+    <para>Different properties are indexed in different ways, this affect to
+    can it be searched like fulltext by property or not.</para>
+
+    <para>Only two property types are indexed as fulltext searcheable: STRING
+    and BINARY. </para>
+
+    <table>
+      <title>Fulltext search by different properties</title>
+
+      <tgroup cols="3">
+        <thead>
+          <row>
+            <entry>Property Type</entry>
+
+            <entry>Fulltext search by all properties</entry>
+
+            <entry>Fulltext search by exact property</entry>
+          </row>
+        </thead>
+
+        <tbody>
+          <row>
+            <entry>STRING</entry>
+
+            <entry>YES</entry>
+
+            <entry>YES</entry>
+          </row>
+
+          <row>
+            <entry>BINARY</entry>
+
+            <entry>YES</entry>
+
+            <entry>NO</entry>
+          </row>
+        </tbody>
+      </tgroup>
+    </table>
+
+    <para>For example. We have property jcr:data (it' BINARY). Its stored
+    well. But you will newer find any string with query like:</para>
+
+    <programlisting>SELECT * FROM nt:resource WHERE CONTAINS(jcr:data, 'some string')</programlisting>
+
+    <para>Because,  BINARY is not searchable by fulltext search on exact
+    property.</para>
+
+    <para>But, next query will return result (off course if node has searched
+    data):</para>
+
+    <programlisting>SELECT * FROM nt:resource WHERE CONTAINS( * , 'some string')</programlisting>
+  </section>
+
+  <section>
+    <title>Fulltext search query examples</title>
+
+    <itemizedlist>
+      <listitem>
+        <para><link linkend="???">JCR.Fulltext Search by
+        Property</link></para>
+      </listitem>
+
+      <listitem>
+        <para><link linkend="???">JCR.Fulltext Search by All
+        Properties</link></para>
+      </listitem>
+
+      <listitem>
+        <para><link linkend="???">Find nt:file document by content of its
+        child jcr:content node&gt;Aggregation rule</link></para>
+      </listitem>
+
+      <listitem>
+        <para><link linkend="???">How to set a new analyzer. Accent symbols
+        ignoring&gt;JCR.Ignore Accent Symbols</link></para>
+      </listitem>
+    </itemizedlist>
+  </section>
+
+  <section>
+    <title>Different analyzers in action</title>
+
+    <para>First of all, we will fill repository by nodes with mixin type
+    'mix:title' and different values of 'jcr:description' property.</para>
+
+    <itemizedlist>
+      <listitem>
+        <para>root</para>
+
+        <itemizedlist>
+          <listitem>
+            <para>document1 (mix:title) jcr:description = "The quick brown fox
+            jumped over the lazy dogs"</para>
+          </listitem>
+
+          <listitem>
+            <para>document2 (mix:title) jcr:description = "Brown fox live in
+            forest."</para>
+          </listitem>
+
+          <listitem>
+            <para>document3 (mix:title) jcr:description = "Fox is a nice
+            animal."</para>
+          </listitem>
+        </itemizedlist>
+      </listitem>
+    </itemizedlist>
+
+    <para>Lets see analyzers effect closer. In first case we use base jcr
+    settings, so, as mentioned above, string "The quick brown fox jumped over
+    the lazy dogs" will be transformed to set {[the] [quick] [brown] [fox]
+    [jumped] [over] [the] [lazy] [dogs] }</para>
+
+    <programlisting>// make SQL query
+QueryManager queryManager = workspace.getQueryManager();
+String sqlStatement = "SELECT * FROM mix:title WHERE CONTAINS(jcr:description, 'the')";
+// create query
+Query query = queryManager.createQuery(sqlStatement, Query.SQL);
+// execute query and fetch result
+QueryResult result = query.execute();</programlisting>
+
+    <para>NodeIterator will return "document1".</para>
+
+    <para>Now change default analyzer to
+    org.apache.lucene.analysis.StopAnalyzer. Fill repository again (new
+    Analyzer must process nodes properties) and run same query again. It will
+    return nothing, because stop words like "the" will be excluded from parsed
+    string set.</para>
+  </section>
+</chapter>

Added: jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules/jcr/searching/fulltext-search-by-all-properties.xml
===================================================================
--- jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules/jcr/searching/fulltext-search-by-all-properties.xml	                        (rev 0)
+++ jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules/jcr/searching/fulltext-search-by-all-properties.xml	2010-08-04 09:10:59 UTC (rev 2870)
@@ -0,0 +1,134 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
+"http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+<section id="JCR.FulltextSearchByAllProperties">
+  <title>Fulltext Search by All Properties in Node</title>
+
+  <para>Find nodes with mixin type 'mix:title' where any property contains
+  'break' string.</para>
+
+  <section>
+    <title>Repository structure:</title>
+
+    <para>Repository filled with different nodes with mixin type 'mix:title'
+    and different values of 'jcr:title' and 'jcr:description'
+    properties.</para>
+
+    <itemizedlist>
+      <listitem>
+        <para>root</para>
+
+        <itemizedlist>
+          <listitem>
+            <para>document1 (mix:title) jcr:title ='Star Wars' jcr:description
+            = 'Dart rules!!'</para>
+          </listitem>
+
+          <listitem>
+            <para>document2 (mix:title) jcr:title ='Prison
+            <emphasis>break</emphasis>' jcr:description = 'Run, Forest, run
+            ))'</para>
+          </listitem>
+
+          <listitem>
+            <para>document3 (mix:title) jcr:title ='Titanic' jcr:description =
+            'An iceberg <emphasis>break</emphasis>s a ship.'</para>
+          </listitem>
+        </itemizedlist>
+      </listitem>
+    </itemizedlist>
+  </section>
+
+  <section>
+    <title>Query execution</title>
+
+    <para><emphasis role="bold">SQL</emphasis></para>
+
+    <programlisting>// make SQL query
+QueryManager queryManager = workspace.getQueryManager();
+String sqlStatement = "SELECT * FROM mix:title WHERE CONTAINS(*,'break')";
+// create query
+Query query = queryManager.createQuery(sqlStatement, Query.SQL);
+// execute query and fetch result
+QueryResult result = query.execute();</programlisting>
+
+    <para><emphasis role="bold">XPath</emphasis></para>
+
+    <programlisting>// make SQL query
+QueryManager queryManager = workspace.getQueryManager();
+// we want find 'document1'
+String xpathStatement = "//element(*,mix:title)[jcr:contains(.,'break')]";
+// create query
+Query query = queryManager.createQuery(xpathStatement, Query.XPATH);
+// execute query and fetch result
+QueryResult result = query.execute();</programlisting>
+  </section>
+
+  <section>
+    <title>Fetch result</title>
+
+    <para>Let's get nodes:</para>
+
+    <programlisting>NodeIterator it = result.getNodes();
+
+while(it.hasNext())
+{
+   Node findedNode = it.nextNode();
+}</programlisting>
+
+    <para>NodeIterator will return "document1" and "document2".</para>
+
+    <para>We can also get a table:</para>
+
+    <programlisting>String[] columnNames = result.getColumnNames();
+RowIterator rit = result.getRows();
+while (rit.hasNext())
+{
+   Row row = rit.nextRow();
+   // get values of the row
+   Value[] values = row.getValues();
+}</programlisting>
+
+    <para>Table content is:</para>
+
+    <table>
+      <title>Table content</title>
+
+      <tgroup cols="4">
+        <thead>
+          <row>
+            <entry>jcr:title</entry>
+
+            <entry>jcr:description</entry>
+
+            <entry>...</entry>
+
+            <entry>jcr:path</entry>
+          </row>
+        </thead>
+
+        <tbody>
+          <row>
+            <entry>Prison break.</entry>
+
+            <entry>Run, Forest, run ))</entry>
+
+            <entry>...</entry>
+
+            <entry>/document2</entry>
+          </row>
+
+          <row>
+            <entry>Titanic</entry>
+
+            <entry>An iceberg breaks a ship.</entry>
+
+            <entry>...</entry>
+
+            <entry>/document3</entry>
+          </row>
+        </tbody>
+      </tgroup>
+    </table>
+  </section>
+</section>

Added: jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules/jcr/searching/fulltext-search-by-property.xml
===================================================================
--- jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules/jcr/searching/fulltext-search-by-property.xml	                        (rev 0)
+++ jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules/jcr/searching/fulltext-search-by-property.xml	2010-08-04 09:10:59 UTC (rev 2870)
@@ -0,0 +1,125 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
+"http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+<section id="JCR.FulltextSearchByProperty">
+  <title>Fulltext Search by Property</title>
+
+  <para>Find all nodes containing a mixin type 'mix:title' and whose
+  'jcr:description' contains "forest" string.</para>
+
+  <section>
+    <title>Repository Structure</title>
+
+    <para>The repository is filled with nodes of the mixin type 'mix:title'
+    and different values of the 'jcr:description' property.</para>
+
+    <itemizedlist>
+      <listitem>
+        <para>root</para>
+
+        <itemizedlist>
+          <listitem>
+            <para>document1 (mix:title) jcr:description = "The quick brown fox
+            jumps over the lazy dog."</para>
+          </listitem>
+
+          <listitem>
+            <para>document2 (mix:title) jcr:description = "The brown fox lives
+            in a <emphasis role="underscore">forest</emphasis>." // This is
+            the node we want to find</para>
+          </listitem>
+
+          <listitem>
+            <para>document3 (mix:title) jcr:description = "The fox is a nice
+            animal."</para>
+          </listitem>
+
+          <listitem>
+            <para>document4 (nt:unstructured) jcr:description = "There is the
+            word forest, too."</para>
+          </listitem>
+        </itemizedlist>
+      </listitem>
+    </itemizedlist>
+  </section>
+
+  <section>
+    <title>Query Execution</title>
+
+    <para><emphasis role="bold">SQL</emphasis></para>
+
+    <programlisting>// make SQL query
+QueryManager queryManager = workspace.getQueryManager();
+// we want find document which contains "forest" word
+String sqlStatement = "SELECT \* FROM mix:title WHERE CONTAINS(jcr:description, 'forest')";
+// create query
+Query query = queryManager.createQuery(sqlStatement, Query.SQL);
+// execute query and fetch result
+QueryResult result = query.execute();</programlisting>
+
+    <para><emphasis role="bold">XPath</emphasis></para>
+
+    <programlisting>// make SQL query
+QueryManager queryManager = workspace.getQueryManager();
+// we want find document which contains "forest" word
+String xpathStatement = "//element(*,mix:title)[jcr:contains(@jcr:description, 'forest')]";
+// create query
+Query query = queryManager.createQuery(xpathStatement, Query.XPATH);
+// execute query and fetch result
+QueryResult result = query.execute();</programlisting>
+  </section>
+
+  <section>
+    <title>Fetching the Result</title>
+
+    <para>Let's get nodes:</para>
+
+    <programlisting>NodeIterator it = result.getNodes();
+
+if(it.hasNext())
+{
+   Node findedNode = it.nextNode();
+}</programlisting>
+
+    <para>NodeIterator will return "document2".</para>
+
+    <para>We can also get a table:</para>
+
+    <programlisting>String[] columnNames = result.getColumnNames();
+RowIterator rit = result.getRows();
+while (rit.hasNext())
+{
+   Row row = rit.nextRow();
+   // get values of the row
+   Value[] values = row.getValues();
+}</programlisting>
+
+    <para>Table content is:</para>
+
+    <table>
+      <title>Table content</title>
+
+      <tgroup cols="3">
+        <thead>
+          <row>
+            <entry>jcr:description</entry>
+
+            <entry>...</entry>
+
+            <entry>jcr:path</entry>
+          </row>
+        </thead>
+
+        <tbody>
+          <row>
+            <entry>The brown fox lives in forest.</entry>
+
+            <entry>...</entry>
+
+            <entry>/document2</entry>
+          </row>
+        </tbody>
+      </tgroup>
+    </table>
+  </section>
+</section>

Modified: jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules/jcr/searching/jcr-query-usecases.xml
===================================================================
--- jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules/jcr/searching/jcr-query-usecases.xml	2010-08-04 07:59:52 UTC (rev 2869)
+++ jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules/jcr/searching/jcr-query-usecases.xml	2010-08-04 09:10:59 UTC (rev 2870)
@@ -224,17 +224,18 @@
     </section>
 
     <section>
-      <title><link linkend="???">Fulltext Search</link></title>
+      <title><link linkend="JCR.FulltextSearchAndSettings">Fulltext
+      Search</link></title>
 
       <itemizedlist>
         <listitem>
-          <para><link linkend="???">JCR.Fulltext Search by
-          Property</link></para>
+          <para><link linkend="JCR.FulltextSearchByProperty">Fulltext Search
+          by Property</link></para>
         </listitem>
 
         <listitem>
-          <para><link linkend="???">JCR.Fulltext Search by All
-          Properties</link></para>
+          <para><link linkend="JCR.FulltextSearchByAllProperties">Fulltext
+          Search by All Properties</link></para>
         </listitem>
 
         <listitem>

Modified: jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules/jcr/searching/order-by-path-or-name.xml
===================================================================
--- jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules/jcr/searching/order-by-path-or-name.xml	2010-08-04 07:59:52 UTC (rev 2869)
+++ jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules/jcr/searching/order-by-path-or-name.xml	2010-08-04 09:10:59 UTC (rev 2870)
@@ -24,8 +24,8 @@
   </itemizedlist>
 
   <para>If no order specification is supplied in the query statement,
-  implementations may support document order on the result nodes (see 6.6.4.2
-  Document Order). And its sorted by order number.</para>
+  implementations may support document order on the result nodes (see jsr-170
+  / 6.6.4.2 Document Order). And its sorted by order number.</para>
 
   <para>By default (if query do not contains any ordering statements) result
   nodes is sorted by document order.</para>

Modified: jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules/jcr.xml
===================================================================
--- jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules/jcr.xml	2010-08-04 07:59:52 UTC (rev 2869)
+++ jcr/branches/1.12.x/docs/reference/en/src/main/docbook/en-US/modules/jcr.xml	2010-08-04 09:10:59 UTC (rev 2870)
@@ -95,8 +95,13 @@
   <xi:include href="jcr/searching/jcr-query-usecases.xml"
               xmlns:xi="http://www.w3.org/2001/XInclude" />    
 
+
   <xi:include href="jcr/searching/searching-repository-content.xml"
               xmlns:xi="http://www.w3.org/2001/XInclude" />    
+
+  <xi:include href="jcr/searching/fulltext-search-and-settings.xml"
+                  xmlns:xi="http://www.w3.org/2001/XInclude" />
+
               
   <!-- protocols -->
   
@@ -118,27 +123,27 @@
   <!-- other  -->
 
   <xi:include href="jcr/statistics.xml"
-              xmlns:xi="http://www.w3.org/2001/XInclude" />
-              
+              xmlns:xi="http://www.w3.org/2001/XInclude" />
+              
   <xi:include href="jcr/other/acl.xml"
-              xmlns:xi="http://www.w3.org/2001/XInclude" />                            
-              
+              xmlns:xi="http://www.w3.org/2001/XInclude" />                            
+              
   <xi:include href="jcr/other/acl-ext.xml"
-              xmlns:xi="http://www.w3.org/2001/XInclude" />                                          
-              
+              xmlns:xi="http://www.w3.org/2001/XInclude" />                                          
+              
   <xi:include href="jcr/other/link-producer.xml"
-              xmlns:xi="http://www.w3.org/2001/XInclude" />
-              
+              xmlns:xi="http://www.w3.org/2001/XInclude" />
+              
   <xi:include href="jcr/other/binary-values-processing.xml"
-              xmlns:xi="http://www.w3.org/2001/XInclude" />
-              
+              xmlns:xi="http://www.w3.org/2001/XInclude" />
+              
   <xi:include href="jcr/other/jcr-organization-service.xml"
-              xmlns:xi="http://www.w3.org/2001/XInclude" />              
-              
-              
-              
-              
-              
+              xmlns:xi="http://www.w3.org/2001/XInclude" />              
+              
+              
+              
+              
+              
   <xi:include href="jcr/other/jcr-resources.xml"
               xmlns:xi="http://www.w3.org/2001/XInclude" />