DNA SVN: r1337 - in trunk/dna-search: src/main/java/org/jboss/dna/search and 1 other directories. - dna-commits

Tuesday, 24 November 2009

Author: rhauch
Date: 2009-11-24 12:43:33 -0500 (Tue, 24 Nov 2009)
New Revision: 1337

Added:
   trunk/dna-search/src/test/java/org/jboss/dna/search/SearchEngineTest.java
Modified:
   trunk/dna-search/pom.xml
   trunk/dna-search/src/main/java/org/jboss/dna/search/DualIndexSearchProvider.java
Log:
DNA-467 Changed the implementation of the method to delete nodes under a specified branch,
and added several test cases to verify that the content can be indexed, re-indexed
(multiple times), and searched.

Modified: trunk/dna-search/pom.xml
===================================================================

--- trunk/dna-search/pom.xml	2009-11-24 02:19:50 UTC (rev 1336)
+++ trunk/dna-search/pom.xml	2009-11-24 17:43:33 UTC (rev 1337)
@@ -45,22 +45,22 @@
     <dependency>
       <groupId>org.apache.lucene</groupId>
       <artifactId>lucene-core</artifactId>
-      <version>2.9.0</version>
+      <version>2.9.1</version>
     </dependency>
     <dependency>
       <groupId>org.apache.lucene</groupId>
       <artifactId>lucene-analyzers</artifactId>
-      <version>2.9.0</version>
+      <version>2.9.1</version>
     </dependency>
     <dependency>
       <groupId>org.apache.lucene</groupId>
       <artifactId>lucene-snowball</artifactId>
-      <version>2.9.0</version>
+      <version>2.9.1</version>
     </dependency>
     <dependency>
       <groupId>org.apache.lucene</groupId>
       <artifactId>lucene-regex</artifactId>
-      <version>2.9.0</version>
+      <version>2.9.1</version>
     </dependency>
     <!-- 
     Testing (note the scope)

Modified:
trunk/dna-search/src/main/java/org/jboss/dna/search/DualIndexSearchProvider.java
===================================================================
---
trunk/dna-search/src/main/java/org/jboss/dna/search/DualIndexSearchProvider.java	2009-11-24
02:19:50 UTC (rev 1336)
+++
trunk/dna-search/src/main/java/org/jboss/dna/search/DualIndexSearchProvider.java	2009-11-24
17:43:33 UTC (rev 1337)
@@ -134,7 +134,7 @@
     static {
         IndexRules.Builder builder = IndexRules.createBuilder();
         // Configure the default behavior ...
-        builder.defaultTo(IndexRules.INDEX | IndexRules.ANALYZE);
+        builder.defaultTo(IndexRules.INDEX | IndexRules.ANALYZE | IndexRules.FULL_TEXT);
         // Configure the UUID properties to be just indexed (not stored, not analyzed,
not included in full-text) ...
         builder.store(JcrLexicon.UUID, DnaLexicon.UUID);
         // Configure the properties that we'll treat as dates ...
@@ -324,7 +324,13 @@
         protected IndexWriter getPathsWriter() throws IOException {
             assert !readOnly;
             if (pathsWriter == null) {
-                pathsWriter = new IndexWriter(pathsIndexDirectory, analyzer, overwrite,
MaxFieldLength.UNLIMITED);
+                if (overwrite) {
+                    // Always overwrite it ...
+                    pathsWriter = new IndexWriter(pathsIndexDirectory, analyzer,
overwrite, MaxFieldLength.UNLIMITED);
+                } else {
+                    // Don't overwrite, but create if missing ...
+                    pathsWriter = new IndexWriter(pathsIndexDirectory, analyzer,
MaxFieldLength.UNLIMITED);
+                }
             }
             return pathsWriter;
         }
@@ -332,7 +338,13 @@
         protected IndexWriter getContentWriter() throws IOException {
             assert !readOnly;
             if (contentWriter == null) {
-                contentWriter = new IndexWriter(contentIndexDirectory, analyzer,
overwrite, MaxFieldLength.UNLIMITED);
+                if (overwrite) {
+                    // Always overwrite it ...
+                    contentWriter = new IndexWriter(contentIndexDirectory, analyzer,
overwrite, MaxFieldLength.UNLIMITED);
+                } else {
+                    // Don't overwrite, but create if missing ...
+                    contentWriter = new IndexWriter(contentIndexDirectory, analyzer,
MaxFieldLength.UNLIMITED);
+                }
             }
             return contentWriter;
         }
@@ -492,33 +504,14 @@
          */
         public int deleteBelow( Path path ) {
             assert !readOnly;
-            // Perform a query using the reader to find those nodes at/below the path
...
             try {
-                IndexReader pathReader = getPathsReader();
-                IndexSearcher pathSearcher = new IndexSearcher(pathReader);
-                String pathStr = stringFactory.create(path) + "/";
-                PrefixQuery query = new PrefixQuery(new Term(PathIndex.PATH, pathStr));
-                int numberDeleted = 0;
-                while (true) {
-                    // Execute the query and get the results ...
-                    TopDocs results = pathSearcher.search(query,
SIZE_OF_DELETE_BATCHES);
-                    int numResultsInBatch = results.scoreDocs.length;
-                    // Walk the results, delete the doc, and add to the query that
we'll use against the content index ...
-                    IndexReader contentReader = getContentReader();
-                    for (ScoreDoc result : results.scoreDocs) {
-                        int docId = result.doc;
-                        // Find the UUID of the node ...
-                        Document doc = pathReader.document(docId, UUID_FIELD_SELECTOR);
-                        String uuid = doc.get(PathIndex.UUID);
-                        // Delete the document from the paths index ...
-                        pathReader.deleteDocument(docId);
-                        // Delete the corresponding document from the content index ...
-                        contentReader.deleteDocuments(new Term(ContentIndex.UUID,
uuid));
-                    }
-                    numberDeleted += numResultsInBatch;
-                    if (numResultsInBatch < SIZE_OF_DELETE_BATCHES) break;
-                }
-                return numberDeleted;
+                // Create a query to find all the nodes at or below the specified path
...
+                Set<UUID> uuids = getUuidsForDescendantsOf(path, true);
+                Query uuidQuery = findAllNodesWithUuids(uuids);
+                // Now delete the documents from each index using this query, which we
can reuse ...
+                getPathsWriter().deleteDocuments(uuidQuery);
+                getContentWriter().deleteDocuments(uuidQuery);
+                return uuids.size();
             } catch (FileNotFoundException e) {
                 // There are no index files yet, so nothing to delete ...
                 return 0;
@@ -628,42 +621,42 @@
                 }
             }
             if (pathsWriter != null) {
+                // try {
+                // pathsWriter.commit();
+                // } catch (IOException e) {
+                // if (ioError == null) ioError = e;
+                // } catch (RuntimeException e) {
+                // if (runtimeError == null) runtimeError = e;
+                // } finally {
                 try {
-                    pathsWriter.commit();
+                    pathsWriter.close();
                 } catch (IOException e) {
-                    ioError = e;
+                    if (ioError == null) ioError = e;
                 } catch (RuntimeException e) {
-                    runtimeError = e;
+                    if (runtimeError == null) runtimeError = e;
                 } finally {
-                    try {
-                        pathsWriter.close();
-                    } catch (IOException e) {
-                        ioError = e;
-                    } catch (RuntimeException e) {
-                        runtimeError = e;
-                    } finally {
-                        pathsWriter = null;
-                    }
+                    pathsWriter = null;
                 }
+                // }
             }
             if (contentWriter != null) {
+                // try {
+                // contentWriter.commit();
+                // } catch (IOException e) {
+                // if (ioError == null) ioError = e;
+                // } catch (RuntimeException e) {
+                // if (runtimeError == null) runtimeError = e;
+                // } finally {
                 try {
-                    contentWriter.commit();
+                    contentWriter.close();
                 } catch (IOException e) {
                     if (ioError == null) ioError = e;
                 } catch (RuntimeException e) {
                     if (runtimeError == null) runtimeError = e;
                 } finally {
-                    try {
-                        contentWriter.close();
-                    } catch (IOException e) {
-                        ioError = e;
-                    } catch (RuntimeException e) {
-                        runtimeError = e;
-                    } finally {
-                        contentWriter = null;
-                    }
+                    contentWriter = null;
                 }
+                // }
             }
             if (ioError != null) {
                 String msg =
SearchI18n.errorWhileCommittingIndexChanges.text(workspaceName, sourceName,
ioError.getMessage());
@@ -916,7 +909,7 @@
                 }
                 return query;
             }
-            // Returna query that will always find all of the UUIDs ...
+            // Return a query that will always find all of the UUIDs ...
             return new UuidsQuery(ContentIndex.UUID, uuids,
getContext().getValueFactories().getUuidFactory());
         }
 
@@ -1515,8 +1508,9 @@
     protected static class UuidCollector extends Collector {
         private final Set<UUID> uuids = new HashSet<UUID>();
         private String[] uuidsByDocId;
-        private int baseDocId;
 
+        // private int baseDocId;
+
         protected UuidCollector() {
         }
 
@@ -1555,10 +1549,9 @@
          * @see org.apache.lucene.search.Collector#collect(int)
          */
         @Override
-        public void collect( int doc ) {
-            int index = doc - baseDocId;
-            assert index >= 0;
-            String uuidString = uuidsByDocId[index];
+        public void collect( int docId ) {
+            assert docId >= 0;
+            String uuidString = uuidsByDocId[docId];
             assert uuidString != null;
             uuids.add(UUID.fromString(uuidString));
         }
@@ -1572,7 +1565,7 @@
         public void setNextReader( IndexReader reader,
                                    int docBase ) throws IOException {
             this.uuidsByDocId = FieldCache.DEFAULT.getStrings(reader, UUID_FIELD);
-            this.baseDocId = docBase;
+            // this.baseDocId = docBase;
         }
     }
 }

Added: trunk/dna-search/src/test/java/org/jboss/dna/search/SearchEngineTest.java
===================================================================
--- trunk/dna-search/src/test/java/org/jboss/dna/search/SearchEngineTest.java	            
           (rev 0)
+++ trunk/dna-search/src/test/java/org/jboss/dna/search/SearchEngineTest.java	2009-11-24
17:43:33 UTC (rev 1337)
@@ -0,0 +1,193 @@
+/*
+ * JBoss DNA (http://www.jboss.org/dna)
+ * See the COPYRIGHT.txt file distributed with this work for information
+ * regarding copyright ownership.  Some portions may be licensed
+ * to Red Hat, Inc. under one or more contributor license agreements.
+ * See the AUTHORS.txt file in the distribution for a full listing of 
+ * individual contributors.
+ *
+ * JBoss DNA is free software. Unless otherwise indicated, all code in JBoss DNA
+ * is licensed to you under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ * 
+ * JBoss DNA is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this software; if not, write to the Free
+ * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
+ */
+package org.jboss.dna.search;
+
+import static org.hamcrest.core.Is.is;
+import static org.hamcrest.core.IsNull.notNullValue;
+import static org.junit.Assert.assertThat;
+import java.io.IOException;
+import java.util.List;
+import org.jboss.dna.graph.ExecutionContext;
+import org.jboss.dna.graph.Graph;
+import org.jboss.dna.graph.Location;
+import org.jboss.dna.graph.connector.RepositoryConnection;
+import org.jboss.dna.graph.connector.RepositoryConnectionFactory;
+import org.jboss.dna.graph.connector.RepositorySourceException;
+import org.jboss.dna.graph.connector.inmemory.InMemoryRepositorySource;
+import org.jboss.dna.graph.property.Path;
+import org.jboss.dna.graph.search.SearchEngine;
+import org.jboss.dna.graph.search.SearchProvider;
+import org.junit.Before;
+import org.junit.Test;
+import org.xml.sax.SAXException;
+
+public class SearchEngineTest {
+
+    private SearchEngine engine;
+    private SearchProvider provider;
+    private ExecutionContext context;
+    private String sourceName;
+    private String workspaceName1;
+    private String workspaceName2;
+    private InMemoryRepositorySource source;
+    private RepositoryConnectionFactory connectionFactory;
+    private Graph content;
+
+    @Before
+    public void beforeEach() throws Exception {
+        context = new ExecutionContext();
+        sourceName = "sourceA";
+        workspaceName1 = "workspace1";
+        workspaceName2 = "workspace2";
+
+        // Set up the source and graph instance ...
+        source = new InMemoryRepositorySource();
+        source.setName(sourceName);
+        content = Graph.create(source, context);
+
+        // Create the workspaces ...
+        content.createWorkspace().named(workspaceName1);
+        content.createWorkspace().named(workspaceName2);
+
+        // Set up the connection factory ...
+        connectionFactory = new RepositoryConnectionFactory() {
+            @SuppressWarnings( "synthetic-access" )
+            public RepositoryConnection createConnection( String sourceName ) throws
RepositorySourceException {
+                return source.getConnection();
+            }
+        };
+
+        // Set up the provider and the search engine ...
+        IndexRules rules = DualIndexSearchProvider.DEFAULT_RULES;
+        LuceneConfiguration luceneConfig = LuceneConfigurations.inMemory();
+        // LuceneConfiguration luceneConfig = LuceneConfigurations.using(new
File("target/testIndexes"));
+        provider = new DualIndexSearchProvider(luceneConfig, rules);
+        engine = new SearchEngine(context, sourceName, connectionFactory, provider);
+        loadContent();
+    }
+
+    protected Path path( String path ) {
+        return context.getValueFactories().getPathFactory().create(path);
+    }
+
+    protected void loadContent() throws IOException, SAXException {
+        // Load the content ...
+        content.useWorkspace(workspaceName1);
+       
content.importXmlFrom(getClass().getClassLoader().getResourceAsStream("cars.xml")).into("/");
+        content.useWorkspace(workspaceName2);
+       
content.importXmlFrom(getClass().getClassLoader().getResourceAsStream("aircraft.xml")).into("/");
+    }
+
+    @Test
+    public void shouldIndexAllContentInRepositorySource() throws Exception {
+        engine.index(3);
+    }
+
+    @Test
+    public void shouldIndexAllContentInWorkspace() throws Exception {
+        engine.index(workspaceName1, 3);
+        engine.index(workspaceName2, 5);
+    }
+
+    @Test
+    public void shouldIndexAllContentInWorkspaceBelowPath() throws Exception {
+        engine.index(workspaceName1, path("/Cars/Hybrid"), 3);
+        engine.index(workspaceName2, path("/Aircraft/Commercial"), 5);
+    }
+
+    @Test
+    public void shouldReIndexAllContentInWorkspaceBelowPath() throws Exception {
+        for (int i = 0; i != 0; i++) {
+            engine.index(workspaceName1, path("/Cars/Hybrid"), 3);
+            engine.index(workspaceName2, path("/Aircraft/Commercial"), 5);
+        }
+    }
+
+    @Test
+    public void shouldHaveLoadedTestContentIntoRepositorySource() {
+        content.useWorkspace(workspaceName1);
+        assertThat(content.getNodeAt("/Cars/Hybrid/Toyota
Prius").getProperty("msrp").getFirstValue(),
is((Object)"$21,500"));
+    }
+
+    @Test
+    public void shouldIndexRepositoryContentStartingAtRootAndUsingDepthOfOne() {
+        engine.index(workspaceName1, path("/"), 1);
+    }
+
+    @Test
+    public void shouldIndexRepositoryContentStartingAtRootAndUsingDepthOfTwo() {
+        engine.index(workspaceName1, path("/"), 2);
+    }
+
+    @Test
+    public void shouldIndexRepositoryContentStartingAtRootAndUsingDepthOfThree() {
+        engine.index(workspaceName1, path("/"), 3);
+    }
+
+    @Test
+    public void shouldIndexRepositoryContentStartingAtRootAndUsingDepthOfFour() {
+        engine.index(workspaceName1, path("/"), 4);
+    }
+
+    @Test
+    public void shouldIndexRepositoryContentStartingAtRootAndUsingDepthOfTen() {
+        engine.index(workspaceName1, path("/"), 10);
+    }
+
+    @Test
+    public void shouldIndexRepositoryContentStartingAtNonRootNode() {
+        engine.index(workspaceName1, path("/Cars"), 10);
+    }
+
+    @Test
+    public void shouldReIndexRepositoryContentStartingAtNonRootNode() {
+        engine.index(workspaceName1, path("/Cars"), 10);
+        engine.index(workspaceName1, path("/Cars"), 10);
+        engine.index(workspaceName1, path("/Cars"), 10);
+    }
+
+    @Test
+    public void shouldFindNodesByFullTextSearch() {
+        engine.index(workspaceName1, path("/"), 100);
+        List<Location> results = engine.fullTextSearch(context, workspaceName1,
"Toyota Prius", 10, 0);
+        assertThat(results, is(notNullValue()));
+        assertThat(results.size(), is(2));
+        assertThat(results.get(0).getPath(), is(path("/Cars/Hybrid/Toyota
Prius")));
+        assertThat(results.get(1).getPath(), is(path("/Cars/Hybrid/Toyota
Highlander")));
+    }
+
+    @Test
+    public void shouldFindNodesByFullTextSearchWithOffset() {
+        engine.index(workspaceName1, path("/"), 100);
+        List<Location> results = engine.fullTextSearch(context, workspaceName1,
"toyota prius", 1, 0);
+        assertThat(results, is(notNullValue()));
+        assertThat(results.size(), is(1));
+        assertThat(results.get(0).getPath(), is(path("/Cars/Hybrid/Toyota
Prius")));
+
+        results = engine.fullTextSearch(context, workspaceName1, "+Toyota", 1,
1);
+        assertThat(results, is(notNullValue()));
+        assertThat(results.size(), is(1));
+        assertThat(results.get(0).getPath(), is(path("/Cars/Hybrid/Toyota
Highlander")));
+    }
+}


Property changes on:
trunk/dna-search/src/test/java/org/jboss/dna/search/SearchEngineTest.java
___________________________________________________________________
Name: svn:keywords
   + Id Revision
Name: svn:eol-style
   + LF


    

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

2008

DNA SVN: r1337 - in trunk/dna-search: src/main/java/org/jboss/dna/search and 1 other directories.