Author: sergiykarpenko
Date: 2010-09-08 02:19:33 -0400 (Wed, 08 Sep 2010)
New Revision: 3088
Modified:
jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/NodeIndexer.java
jcr/trunk/exo.jcr.component.core/src/test/java/org/exoplatform/services/jcr/impl/core/query/TestExcelFileSearch.java
jcr/trunk/exo.jcr.component.core/src/test/java/org/exoplatform/services/jcr/usecases/index/SlowListTest.java
Log:
EXOJCR-752: NodeIndexer now can use AdvancedDocumentReaders Reader object
Modified:
jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/NodeIndexer.java
===================================================================
---
jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/NodeIndexer.java 2010-09-07
19:29:32 UTC (rev 3087)
+++
jcr/trunk/exo.jcr.component.core/src/main/java/org/exoplatform/services/jcr/impl/core/query/lucene/NodeIndexer.java 2010-09-08
06:19:33 UTC (rev 3088)
@@ -19,6 +19,7 @@
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
+import org.exoplatform.services.document.AdvancedDocumentReader;
import org.exoplatform.services.document.DocumentReadException;
import org.exoplatform.services.document.DocumentReader;
import org.exoplatform.services.document.DocumentReaderService;
@@ -328,42 +329,53 @@
PropertyData encProp =
(PropertyData)stateProvider.getItemData(node, new
QPathEntry(Constants.JCR_ENCODING, 0));
+ String encoding = null;
if (encProp != null)
{
// encoding parameter used
- String encoding = new
String(encProp.getValues().get(0).getAsByteArray());
+ encoding = new String(encProp.getValues().get(0).getAsByteArray());
+ }
+
+ if (dreader instanceof AdvancedDocumentReader)
+ {
+ // its a tika document reader that supports getContentAsReader
for (ValueData pvd : data)
{
+ // tikaDocumentReader will close inputStream, so no need to close
it at finally
+ // statement
+
InputStream is = null;
- try
+ is = pvd.getAsStream();
+ Reader reader;
+ if (encoding != null)
{
- is = pvd.getAsStream();
- Reader reader = new StringReader(dreader.getContentAsText(is,
encoding));
- doc.add(createFulltextField(reader));
-
+ reader =
((AdvancedDocumentReader)dreader).getContentAsReader(is, encoding);
}
- finally
+ else
{
- try
- {
- is.close();
- }
- catch (Throwable e)
- {
- }
+ reader =
((AdvancedDocumentReader)dreader).getContentAsReader(is);
}
+ doc.add(createFulltextField(reader));
}
}
else
{
- // no encoding parameter
+ // old-style document reader
for (ValueData pvd : data)
{
InputStream is = null;
try
{
is = pvd.getAsStream();
- Reader reader = new
StringReader(dreader.getContentAsText(is));
+ Reader reader;
+ if (encoding != null)
+ {
+ reader = new StringReader(dreader.getContentAsText(is,
encoding));
+ }
+ else
+ {
+ reader = new StringReader(dreader.getContentAsText(is));
+ }
doc.add(createFulltextField(reader));
}
finally
Modified:
jcr/trunk/exo.jcr.component.core/src/test/java/org/exoplatform/services/jcr/impl/core/query/TestExcelFileSearch.java
===================================================================
---
jcr/trunk/exo.jcr.component.core/src/test/java/org/exoplatform/services/jcr/impl/core/query/TestExcelFileSearch.java 2010-09-07
19:29:32 UTC (rev 3087)
+++
jcr/trunk/exo.jcr.component.core/src/test/java/org/exoplatform/services/jcr/impl/core/query/TestExcelFileSearch.java 2010-09-08
06:19:33 UTC (rev 3088)
@@ -26,6 +26,7 @@
import org.exoplatform.services.document.DocumentReader;
import org.exoplatform.services.document.DocumentReaderService;
import org.exoplatform.services.document.impl.MSExcelDocumentReader;
+import org.exoplatform.services.document.impl.tika.TikaDocumentReader;
import org.exoplatform.services.jcr.impl.core.NodeImpl;
import org.exoplatform.services.jcr.impl.core.query.lucene.FieldNames;
@@ -68,7 +69,19 @@
System.out.println(dreader);
- assertTrue(dreader instanceof MSExcelDocumentReader);
+ if (dreader instanceof MSExcelDocumentReader)
+ {
+ // OK
+ }
+ else if (dreader instanceof TikaDocumentReader)
+ {
+ String[] mimetypes = ((TikaDocumentReader)dreader).getMimeTypes();
+ assertEquals("application/excel", mimetypes[0]);
+ }
+ else
+ {
+ fail("Wrong document reader");
+ }
// String text = dreader.getContentAsText(fis);
Modified:
jcr/trunk/exo.jcr.component.core/src/test/java/org/exoplatform/services/jcr/usecases/index/SlowListTest.java
===================================================================
---
jcr/trunk/exo.jcr.component.core/src/test/java/org/exoplatform/services/jcr/usecases/index/SlowListTest.java 2010-09-07
19:29:32 UTC (rev 3087)
+++
jcr/trunk/exo.jcr.component.core/src/test/java/org/exoplatform/services/jcr/usecases/index/SlowListTest.java 2010-09-08
06:19:33 UTC (rev 3088)
@@ -56,10 +56,11 @@
assertNotNull("Can not create a test node for indexing", test);
for (int i = 0; i < 111; i++)
{
+ is = SlowListTest.class.getResourceAsStream("/index/test_index.xls");
String name = new String("nnn-" + i);
Node cool = test.addNode(name, "nt:file");
Node contentNode = cool.addNode("jcr:content",
"nt:resource");
- contentNode.setProperty("jcr:encoding", "UTF-8");
+ //contentNode.setProperty("jcr:encoding", "UTF-8");
contentNode.setProperty("jcr:data", is);
contentNode.setProperty("jcr:mimeType",
"application/excel");
contentNode.setProperty("jcr:lastModified",
session.getValueFactory().createValue(Calendar.getInstance()));