Author: nzamosenchuk
Date: 2010-08-31 07:43:20 -0400 (Tue, 31 Aug 2010)
New Revision: 3028
Modified:
core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/HTMLDocumentReader.java
core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSExcelDocumentReader.java
core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSOutlookDocumentReader.java
core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSWordDocumentReader.java
core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXExcelDocumentReader.java
core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXPPTDocumentReader.java
core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXWordDocumentReader.java
core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/POIPropertiesReader.java
core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/PPTDocumentReader.java
core/trunk/exo.core.component.document/src/main/resources/conf/portal/configuration.xml
core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestDocumentReadersIncomVals.java
core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXWordDocumentReader.java
core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestOpenOfficeDocumentReader.java
core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestPropertiesExtracting.java
core/trunk/exo.core.component.document/src/test/resources/test.doc
core/trunk/exo.core.component.document/src/test/resources/test.ppt
core/trunk/exo.core.component.document/src/test/resources/test.xls
Log:
EXOJCR-886: updating tests and DocumentReaders according to remarks. Implementing property
extraction from OOXML (MS 2007) formats.
Modified:
core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/HTMLDocumentReader.java
===================================================================
---
core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/HTMLDocumentReader.java 2010-08-31
09:16:47 UTC (rev 3027)
+++
core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/HTMLDocumentReader.java 2010-08-31
11:43:20 UTC (rev 3028)
@@ -18,7 +18,6 @@
*/
package org.exoplatform.services.document.impl;
-import org.exoplatform.container.xml.InitParams;
import org.exoplatform.services.document.DocumentReadException;
import org.htmlparser.Parser;
import org.htmlparser.beans.StringBean;
@@ -43,7 +42,7 @@
*
* @param params the container parameters.
*/
- public HTMLDocumentReader(InitParams params)
+ public HTMLDocumentReader()
{
}
@@ -54,7 +53,7 @@
*/
public String[] getMimeTypes()
{
- return new String[]{"text/html","application/x-groovy+html"};
+ return new String[]{"text/html", "application/x-groovy+html"};
}
/**
@@ -77,7 +76,9 @@
int len;
ByteArrayOutputStream bos = new ByteArrayOutputStream();
while ((len = is.read(buffer)) > 0)
+ {
bos.write(buffer, 0, len);
+ }
bos.close();
String html = new String(bos.toByteArray());
Modified:
core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSExcelDocumentReader.java
===================================================================
---
core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSExcelDocumentReader.java 2010-08-31
09:16:47 UTC (rev 3027)
+++
core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSExcelDocumentReader.java 2010-08-31
11:43:20 UTC (rev 3028)
@@ -42,8 +42,8 @@
public class MSExcelDocumentReader extends BaseDocumentReader
{
- private static final SimpleDateFormat DATE_FORMAT = new
SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSSZ");
-
+ private static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss.SSSZ";
+
/**
* Get the application/excel mime type.
*
@@ -68,9 +68,16 @@
}
StringBuilder builder = new StringBuilder("");
+
+ SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT);
try
{
+ if (is.available() == 0)
+ {
+ return "";
+ }
+
HSSFWorkbook wb;
try
{
@@ -78,7 +85,7 @@
}
catch (IOException e)
{
- return builder.toString();
+ throw new DocumentReadException("Can't open spreadsheet.", e);
}
for (int sheetNum = 0; sheetNum < wb.getNumberOfSheets(); sheetNum++)
{
@@ -104,7 +111,7 @@
if (isCellDateFormatted(cell))
{
Date date = HSSFDateUtil.getJavaDate(d);
- String cellText = this.DATE_FORMAT.format(date);
+ String cellText = dateFormat.format(date);
builder.append(cellText).append(" ");
}
else
Modified:
core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSOutlookDocumentReader.java
===================================================================
---
core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSOutlookDocumentReader.java 2010-08-31
09:16:47 UTC (rev 3027)
+++
core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSOutlookDocumentReader.java 2010-08-31
11:43:20 UTC (rev 3028)
@@ -61,6 +61,11 @@
}
try
{
+ if (is.available() == 0)
+ {
+ return "";
+ }
+
MAPIMessage message;
try
{
@@ -68,12 +73,12 @@
}
catch (IOException e)
{
- return "";
+ throw new DocumentReadException("Can't open message.", e);
}
- StringBuffer buffer = new StringBuffer();
+ StringBuilder builder = new StringBuilder();
try
{
- buffer.append(message.getDisplayFrom()).append('\n');
+ builder.append(message.getDisplayFrom()).append('\n');
}
catch (ChunkNotFoundException e)
{
@@ -81,7 +86,7 @@
}
try
{
- buffer.append(message.getDisplayTo()).append('\n');
+ builder.append(message.getDisplayTo()).append('\n');
}
catch (ChunkNotFoundException e)
{
@@ -89,7 +94,7 @@
}
try
{
- buffer.append(message.getSubject()).append('\n');
+ builder.append(message.getSubject()).append('\n');
}
catch (ChunkNotFoundException e)
{
@@ -97,13 +102,13 @@
}
try
{
- buffer.append(message.getTextBody());
+ builder.append(message.getTextBody());
}
catch (ChunkNotFoundException e)
{
// "textBody" is empty
}
- return buffer.toString();
+ return builder.toString();
}
finally
Modified:
core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSWordDocumentReader.java
===================================================================
---
core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSWordDocumentReader.java 2010-08-31
09:16:47 UTC (rev 3027)
+++
core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSWordDocumentReader.java 2010-08-31
11:43:20 UTC (rev 3028)
@@ -61,6 +61,11 @@
String text = "";
try
{
+ if (is.available() == 0)
+ {
+ return "";
+ }
+
HWPFDocument doc;
try
{
@@ -68,7 +73,7 @@
}
catch (IOException e)
{
- return "";
+ throw new DocumentReadException("Can't open document.", e);
}
Range range = doc.getRange();
@@ -77,6 +82,7 @@
finally
{
if (is != null)
+ {
try
{
is.close();
@@ -84,6 +90,7 @@
catch (IOException e)
{
}
+ }
}
return text.trim();
}
Modified:
core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXExcelDocumentReader.java
===================================================================
---
core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXExcelDocumentReader.java 2010-08-31
09:16:47 UTC (rev 3027)
+++
core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXExcelDocumentReader.java 2010-08-31
11:43:20 UTC (rev 3028)
@@ -18,8 +18,11 @@
*/
package org.exoplatform.services.document.impl;
+import org.apache.poi.POIXMLDocument;
+import org.apache.poi.POIXMLPropertiesTextExtractor;
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.openxml4j.exceptions.OpenXML4JRuntimeException;
+import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFRow;
@@ -44,9 +47,8 @@
*/
public class MSXExcelDocumentReader extends BaseDocumentReader
{
+ private static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss.SSSZ";
- private static final SimpleDateFormat DATE_FORMAT = new
SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSSZ");
-
/**
* @see org.exoplatform.services.document.DocumentReader#getMimeTypes()
*/
@@ -69,17 +71,25 @@
}
StringBuilder builder = new StringBuilder("");
+ SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT);
try
{
+ if (is.available() == 0)
+ {
+ return "";
+ }
+
XSSFWorkbook wb;
try
{
wb = new XSSFWorkbook(is);
+ OPCPackage pkg;
+
}
catch (IOException e)
{
- return builder.toString();
+ throw new DocumentReadException("Can't open spreadsheet.", e);
}
catch (OpenXML4JRuntimeException e)
{
@@ -109,7 +119,7 @@
if (isCellDateFormatted(cell))
{
Date date = HSSFDateUtil.getJavaDate(d);
- String cellText = this.DATE_FORMAT.format(date);
+ String cellText = dateFormat.format(date);
builder.append(cellText).append(" ");
}
else
@@ -171,7 +181,7 @@
public Properties getProperties(InputStream is) throws IOException,
DocumentReadException
{
POIPropertiesReader reader = new POIPropertiesReader();
- reader.readDCProperties(is);
+ reader.readDCProperties(new XSSFWorkbook(is));
return reader.getProperties();
}
Modified:
core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXPPTDocumentReader.java
===================================================================
---
core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXPPTDocumentReader.java 2010-08-31
09:16:47 UTC (rev 3027)
+++
core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXPPTDocumentReader.java 2010-08-31
11:43:20 UTC (rev 3028)
@@ -18,9 +18,11 @@
*/
package org.exoplatform.services.document.impl;
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JRuntimeException;
import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.xslf.XSLFSlideShow;
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
import org.apache.xmlbeans.XmlException;
import org.exoplatform.services.document.DocumentReadException;
@@ -62,6 +64,11 @@
}
try
{
+ if (is.available() == 0)
+ {
+ return "";
+ }
+
XSLFPowerPointExtractor ppe;
try
{
@@ -69,19 +76,19 @@
}
catch (IOException e)
{
- return "";
+ throw new DocumentReadException("Can't open presentation.",
e);
}
catch (OpenXML4JRuntimeException e)
{
- return "";
+ throw new DocumentReadException("Can't open presentation.",
e);
}
catch (OpenXML4JException e)
{
- return "";
+ throw new DocumentReadException("Can't open presentation.",
e);
}
catch (XmlException e)
{
- return "";
+ throw new DocumentReadException("Can't open presentation.",
e);
}
return ppe.getText(true, true);
}
@@ -115,7 +122,22 @@
public Properties getProperties(InputStream is) throws IOException,
DocumentReadException
{
POIPropertiesReader reader = new POIPropertiesReader();
- reader.readDCProperties(is);
+ try
+ {
+ reader.readDCProperties(new XSLFSlideShow(OPCPackage.open(is)));
+ }
+ catch (InvalidFormatException e)
+ {
+ throw new DocumentReadException("Can't read properties from OOXML
document", e);
+ }
+ catch (OpenXML4JException e)
+ {
+ throw new DocumentReadException("Can't read properties from OOXML
document", e);
+ }
+ catch (XmlException e)
+ {
+ throw new DocumentReadException("Can't read properties from OOXML
document", e);
+ }
return reader.getProperties();
}
Modified:
core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXWordDocumentReader.java
===================================================================
---
core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXWordDocumentReader.java 2010-08-31
09:16:47 UTC (rev 3027)
+++
core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXWordDocumentReader.java 2010-08-31
11:43:20 UTC (rev 3028)
@@ -62,6 +62,11 @@
String text = "";
try
{
+ if (is.available() == 0)
+ {
+ return "";
+ }
+
XWPFDocument doc;
try
{
@@ -69,11 +74,11 @@
}
catch (IOException e)
{
- return "";
+ throw new DocumentReadException("Can't open message.", e);
}
catch (OpenXML4JRuntimeException e)
{
- return "";
+ throw new DocumentReadException("Can't open message.", e);
}
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
@@ -110,7 +115,7 @@
public Properties getProperties(InputStream is) throws IOException,
DocumentReadException
{
POIPropertiesReader reader = new POIPropertiesReader();
- reader.readDCProperties(is);
+ reader.readDCProperties(new XWPFDocument(is));
return reader.getProperties();
}
Modified:
core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/POIPropertiesReader.java
===================================================================
---
core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/POIPropertiesReader.java 2010-08-31
09:16:47 UTC (rev 3027)
+++
core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/POIPropertiesReader.java 2010-08-31
11:43:20 UTC (rev 3028)
@@ -18,11 +18,17 @@
*/
package org.exoplatform.services.document.impl;
+import org.apache.poi.POIXMLDocument;
+import org.apache.poi.POIXMLPropertiesTextExtractor;
+import org.apache.poi.POIXMLProperties.CoreProperties;
+import org.apache.poi.POIXMLProperties.CustomProperties;
+import org.apache.poi.POIXMLProperties.ExtendedProperties;
import org.apache.poi.hpsf.MarkUnsupportedException;
import org.apache.poi.hpsf.NoPropertySetStreamException;
import org.apache.poi.hpsf.PropertySet;
import org.apache.poi.hpsf.PropertySetFactory;
import org.apache.poi.hpsf.SummaryInformation;
+import org.apache.poi.openxml4j.util.Nullable;
import org.apache.poi.poifs.eventfilesystem.POIFSReader;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
@@ -51,6 +57,14 @@
return props;
}
+ /**
+ * Metadata extraction from OLE2 documents (legacy MS office file formats)
+ *
+ * @param is
+ * @return
+ * @throws IOException
+ * @throws DocumentReadException
+ */
public Properties readDCProperties(InputStream is) throws IOException,
DocumentReadException
{
if (is == null)
@@ -89,23 +103,39 @@
SummaryInformation si = (SummaryInformation)ps;
if (si.getLastAuthor() != null && si.getLastAuthor().length()
> 0)
+ {
props.put(DCMetaData.CONTRIBUTOR, si.getLastAuthor());
+ }
if (si.getComments() != null && si.getComments().length() >
0)
+ {
props.put(DCMetaData.DESCRIPTION, si.getComments());
+ }
if (si.getCreateDateTime() != null)
+ {
props.put(DCMetaData.DATE, si.getCreateDateTime());
+ }
if (si.getAuthor() != null && si.getAuthor().length() > 0)
+ {
props.put(DCMetaData.CREATOR, si.getAuthor());
+ }
if (si.getKeywords() != null && si.getKeywords().length() >
0)
+ {
props.put(DCMetaData.SUBJECT, si.getKeywords());
+ }
if (si.getLastSaveDateTime() != null)
+ {
props.put(DCMetaData.DATE, si.getLastSaveDateTime());
+ }
// if(docInfo.getProducer() != null)
// props.put(DCMetaData.PUBLISHER, docInfo.getProducer());
if (si.getSubject() != null && si.getSubject().length() >
0)
+ {
props.put(DCMetaData.SUBJECT, si.getSubject());
+ }
if (si.getTitle() != null && si.getTitle().length() > 0)
+ {
props.put(DCMetaData.TITLE, si.getTitle());
+ }
}
}
@@ -163,4 +193,56 @@
return props;
}
+ /**
+ * Metadata extraction from ooxml documents (MS 2007 office file formats)
+ *
+ * @param document
+ * @return
+ * @throws IOException
+ * @throws DocumentReadException
+ */
+ public Properties readDCProperties(POIXMLDocument document) throws IOException,
DocumentReadException
+ {
+
+ POIXMLPropertiesTextExtractor extractor = new
POIXMLPropertiesTextExtractor(document);
+
+ CoreProperties coreProperties = extractor.getCoreProperties();
+
+ Nullable<String> lastModifiedBy =
coreProperties.getUnderlyingProperties().getLastModifiedByProperty();
+ if (lastModifiedBy != null && lastModifiedBy.getValue() != null &&
lastModifiedBy.getValue().length() > 0)
+ {
+ props.put(DCMetaData.CONTRIBUTOR, lastModifiedBy.getValue());
+ }
+ if (coreProperties.getDescription() != null &&
coreProperties.getDescription().length() > 0)
+ {
+ props.put(DCMetaData.DESCRIPTION, coreProperties.getDescription());
+ }
+ if (coreProperties.getCreated() != null)
+ {
+ props.put(DCMetaData.DATE, coreProperties.getCreated());
+ }
+ if (coreProperties.getCreator() != null &&
coreProperties.getCreator().length() > 0)
+ {
+ props.put(DCMetaData.CREATOR, coreProperties.getCreator());
+ }
+ if (coreProperties.getSubject() != null &&
coreProperties.getSubject().length() > 0)
+ {
+ props.put(DCMetaData.SUBJECT, coreProperties.getSubject());
+ }
+ if (coreProperties.getModified() != null)
+ {
+ props.put(DCMetaData.DATE, coreProperties.getModified());
+ }
+ if (coreProperties.getSubject() != null &&
coreProperties.getSubject().length() > 0)
+ {
+ props.put(DCMetaData.SUBJECT, coreProperties.getSubject());
+ }
+ if (coreProperties.getTitle() != null && coreProperties.getTitle().length()
> 0)
+ {
+ props.put(DCMetaData.TITLE, coreProperties.getTitle());
+ }
+
+ return props;
+ }
+
}
Modified:
core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/PPTDocumentReader.java
===================================================================
---
core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/PPTDocumentReader.java 2010-08-31
09:16:47 UTC (rev 3027)
+++
core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/PPTDocumentReader.java 2010-08-31
11:43:20 UTC (rev 3028)
@@ -60,6 +60,12 @@
}
try
{
+
+ if (is.available() == 0)
+ {
+ return "";
+ }
+
PowerPointExtractor ppe;
try
{
@@ -67,13 +73,14 @@
}
catch (IOException e)
{
- return "";
+ throw new DocumentReadException("Can't open presentation.",
e);
}
return ppe.getText(true, true);
}
finally
{
if (is != null)
+ {
try
{
is.close();
@@ -81,6 +88,7 @@
catch (IOException e)
{
}
+ }
}
}
Modified:
core/trunk/exo.core.component.document/src/main/resources/conf/portal/configuration.xml
===================================================================
---
core/trunk/exo.core.component.document/src/main/resources/conf/portal/configuration.xml 2010-08-31
09:16:47 UTC (rev 3027)
+++
core/trunk/exo.core.component.document/src/main/resources/conf/portal/configuration.xml 2010-08-31
11:43:20 UTC (rev 3028)
@@ -22,13 +22,6 @@
<set-method>addDocumentReader</set-method>
<type>org.exoplatform.services.document.impl.PDFDocumentReader</type>
<description>to read the pdf inputstream</description>
- <init-params>
- <values-param>
- <name>document.known.types</name>
- <description>description</description>
- <value>application/pdf</value>
- </values-param>
- </init-params>
</component-plugin>
<component-plugin>
@@ -36,13 +29,6 @@
<set-method>addDocumentReader</set-method>
<type>org.exoplatform.services.document.impl.MSWordDocumentReader</type>
<description>to read the ms word inputstream</description>
- <init-params>
- <values-param>
- <name>document.known.typesMSWord</name>
- <description>description</description>
- <value>application/msword</value>
- </values-param>
- </init-params>
</component-plugin>
<component-plugin>
@@ -50,13 +36,6 @@
<set-method>addDocumentReader</set-method>
<type>org.exoplatform.services.document.impl.MSXWordDocumentReader</type>
<description>to read the ms word inputstream</description>
- <init-params>
- <values-param>
- <name>document.known.typesMSXWord</name>
- <description>description</description>
- <value>application/msword</value>
- </values-param>
- </init-params>
</component-plugin>
<component-plugin>
@@ -64,13 +43,6 @@
<set-method>addDocumentReader</set-method>
<type>org.exoplatform.services.document.impl.MSExcelDocumentReader</type>
<description>to read the ms excel inputstream</description>
- <init-params>
- <values-param>
- <name>document.known.typesMSExcel</name>
- <description>description</description>
- <value>application/excel</value>
- </values-param>
- </init-params>
</component-plugin>
<component-plugin>
@@ -78,13 +50,6 @@
<set-method>addDocumentReader</set-method>
<type>org.exoplatform.services.document.impl.MSXExcelDocumentReader</type>
<description>to read the ms excel inputstream</description>
- <init-params>
- <values-param>
- <name>document.known.typesMSXExcel</name>
- <description>description</description>
- <value>application/excel</value>
- </values-param>
- </init-params>
</component-plugin>
<component-plugin>
@@ -92,13 +57,6 @@
<set-method>addDocumentReader</set-method>
<type>org.exoplatform.services.document.impl.MSOutlookDocumentReader</type>
<description>to read the ms outlook inputstream</description>
- <init-params>
- <values-param>
- <name>document.known.typesMSOutlook</name>
- <description>description</description>
- <value>application/vnd.ms-outlook</value>
- </values-param>
- </init-params>
</component-plugin>
<component-plugin>
@@ -106,13 +64,6 @@
<set-method>addDocumentReader</set-method>
<type>org.exoplatform.services.document.impl.PPTDocumentReader</type>
<description>to read the ms ppt inputstream</description>
- <init-params>
- <values-param>
- <name>document.known.typesPPT</name>
- <description>description</description>
- <value>application/ppt</value>
- </values-param>
- </init-params>
</component-plugin>
<component-plugin>
@@ -120,13 +71,6 @@
<set-method>addDocumentReader</set-method>
<type>org.exoplatform.services.document.impl.MSXPPTDocumentReader</type>
<description>to read the ms pptx inputstream</description>
- <init-params>
- <values-param>
- <name>document.known.typesPPTX</name>
- <description>description</description>
- <value>application/ppt</value>
- </values-param>
- </init-params>
</component-plugin>
<component-plugin>
@@ -134,13 +78,6 @@
<set-method>addDocumentReader</set-method>
<type>org.exoplatform.services.document.impl.HTMLDocumentReader</type>
<description>to read the html inputstream</description>
- <init-params>
- <values-param>
- <name>document.known.typesHTML</name>
- <description>description</description>
- <value>text/html</value>
- </values-param>
- </init-params>
</component-plugin>
<component-plugin>
@@ -148,13 +85,6 @@
<set-method>addDocumentReader</set-method>
<type>org.exoplatform.services.document.impl.XMLDocumentReader</type>
<description>to read the xml inputstream</description>
- <init-params>
- <values-param>
- <name>document.known.typesXML</name>
- <description>description</description>
- <value>text/xml</value>
- </values-param>
- </init-params>
</component-plugin>
<component-plugin>
@@ -163,11 +93,6 @@
<type>org.exoplatform.services.document.impl.TextPlainDocumentReader</type>
<description>to read the plain text inputstream</description>
<init-params>
- <values-param>
- <name>document.known.typesTextPlain</name>
- <description>description</description>
- <value>text/plain</value>
- </values-param>
<!--
values-param> <name>defaultEncoding</name>
<description>description</description> <value>UTF-8</value>
</values-param
Modified:
core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestDocumentReadersIncomVals.java
===================================================================
---
core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestDocumentReadersIncomVals.java 2010-08-31
09:16:47 UTC (rev 3027)
+++
core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestDocumentReadersIncomVals.java 2010-08-31
11:43:20 UTC (rev 3028)
@@ -53,7 +53,7 @@
public TestDocumentReadersIncomVals()
{
serviceList = new ArrayList<DocumentReader>();
- serviceList.add(new HTMLDocumentReader(null));
+ serviceList.add(new HTMLDocumentReader());
serviceList.add(new MSExcelDocumentReader());
serviceList.add(new MSXExcelDocumentReader());
serviceList.add(new MSOutlookDocumentReader());
Modified:
core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXWordDocumentReader.java
===================================================================
---
core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXWordDocumentReader.java 2010-08-31
09:16:47 UTC (rev 3027)
+++
core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXWordDocumentReader.java 2010-08-31
11:43:20 UTC (rev 3028)
@@ -46,15 +46,8 @@
String text =
service.getDocumentReader("application/vnd.openxmlformats-officedocument.wordprocessingml.document")
.getContentAsText(is);
- System.out.println("text [" + text + "]");
-
- /*
- * String etalon = "Hello.\n" +"This is the test document
12345\n"
- * +"Table\n" +"Title One Two\n" +"Hello_Title
Hello_One Hello_Two\n";
- * System.out.println("etalon ["+etalon+"]");
- * System.out.println("["+text.length()+"]
["+etalon.length()+"]");
- * assertEquals("Wrong string returned",etalon ,text );
- */
+ assertTrue(text
+ .contains("Before the test starts there is a directions section, which
takes a few minutes to read"));
}
finally
{
Modified:
core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestOpenOfficeDocumentReader.java
===================================================================
---
core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestOpenOfficeDocumentReader.java 2010-08-31
09:16:47 UTC (rev 3027)
+++
core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestOpenOfficeDocumentReader.java 2010-08-31
11:43:20 UTC (rev 3028)
@@ -33,6 +33,7 @@
{
DocumentReaderService service;
+ @Override
public void setUp() throws Exception
{
super.setUp();
@@ -45,41 +46,11 @@
try
{
String text =
service.getDocumentReader("application/vnd.oasis.opendocument.text").getContentAsText(is);
- System.out.println("[" + text + "]");
- /*
- * String etalon = "Subscription:" +
- * "\tEULA with add on warranties and non GPL viral effect (all
Customer's development free of GPL license limitations). This agreement continues to
be valid even if customers do not renew their subscription.\n"
- * +
- * "\tProduct documentation including user and admin guides to eXo platform
portal, ECM, JCR, and Portlet Container. (currently we have all the docs accessible for
free but it will not)\n"
- * +
- * "\tAccess to all Flash tutorial gives a visual guide to eXo platform and
demonstrates a comprehensive tutorial that enhance the understanding of eXo products.
(only part of Flash tutorials are accessible for free)\n"
- * +
- * "\tAdvanced Installer is an application that makes it quick and easy to
install eXo platform products with simple clicks and allows better configuration to
integrate existing database and directories (LDAP) in a multi platform
environment.\n"
- * +
- * "\tUnlimited access to online premium forum (customer request tracking?)
support, you will get answer from eXo platform company technical specialist employees.
(TODO) \n"
- * +
- * "\tUnlimited email support within 48 hours response time for limited
contact names per CPU.(i think we may join it with prev item using term customer request
tracking with email notification)\n"
- * +
- * "\tUpdate alert will send out email periodically to subscribers all the
latest change in eXo documentations, flash tutorials, and product services.
(TODO)\n"
- * +
- * "\tTechnical code improvement alert will send out email periodically to
subscribers to inform of all the latest code patches, latest version, latest code
improvement download. (TODO)\n"
- * +
- * "\tKnowledge Base subscribers can access to all of eXo platform wiki
knowledge bases. (TODO, i do not think we may put it into agreement yet)\n"
- * +
- * "\tFeature Request Priority As customers you have priorities to request
the latest features improvement for any of eXo products next version. (what does it
mean?)\n"
- * + "\n" +
- * "Subscription is annual and per CPU because it is the unit of load
increase and so more support demand. Subscription advantages are:\n"
- * + "\tEULA � it is obligatory to be Subscriber to get EULA\n" +
- * "\tAdditional documentations (Guides, Flash tutorials)\n" +
- * "\tAdditional software (Installer)\n" +
- * "\tAdditional product/documentation changes notifications\n" +
- * "\tProfessional support: limited contact name (3 customer contact with 1
eXo contact per CPU), 48 hours to answer via email\n"
- * + "\tFeature request priority (also depending on CPU number)\n" +
"\n";
- * System.out.println("["+etalon+"]");
- * System.out.println("TEXT size ["+text.
- * length()+"] ETALON LEN ["+etalon.length()+"]");
- * assertEquals("Wrong string returned", etalon, text);
- */
+
+ assertTrue(
+ "Wrong string returned",
+ text
+ .contains("Product documentation including user and admin guides to
eXo platform portal, ECM, JCR, and Portlet Container. (currently we have all the docs
accessible for free but it will not)"));
}
finally
{
Modified:
core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestPropertiesExtracting.java
===================================================================
---
core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestPropertiesExtracting.java 2010-08-31
09:16:47 UTC (rev 3027)
+++
core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestPropertiesExtracting.java 2010-08-31
11:43:20 UTC (rev 3028)
@@ -33,58 +33,26 @@
{
DocumentReaderService service;
+ @Override
public void setUp() throws Exception
{
super.setUp();
service =
(DocumentReaderService)getComponentInstanceOfType(DocumentReaderService.class);
- // service = new DocumentReaderServiceImpl(null);
- // InitParams params = new InitParams();
- // service.addDocumentReader(new TextPlainDocumentReader(params));
- // service.addDocumentReader(new XMLDocumentReader());
- // service.addDocumentReader(new HTMLDocumentReader(null));
- // service.addDocumentReader(new MSExcelDocumentReader());
- // service.addDocumentReader(new MSOutlookDocumentReader());
- // service.addDocumentReader(new MSWordDocumentReader());
- // service.addDocumentReader(new MSXExcelDocumentReader());
- // service.addDocumentReader(new MSXPPTDocumentReader());
- // service.addDocumentReader(new MSXWordDocumentReader());
- // service.addDocumentReader(new OpenOfficeDocumentReader());
- // service.addDocumentReader(new PDFDocumentReader());
- // service.addDocumentReader(new PPTDocumentReader());
}
- public void testPDFDocumentReaderService() throws Exception
- {
- InputStream is =
TestPropertiesExtracting.class.getResourceAsStream("/test.pdf");
- try
- {
- DocumentReader rdr = service.getDocumentReader("application/pdf");
- Properties props = rdr.getProperties(is);
- printProps(props);
- }
- finally
- {
- is.close();
- }
- }
-
public void testPDFDocumentReaderServiceXMPMetadata() throws Exception
{
InputStream is =
TestPropertiesExtracting.class.getResourceAsStream("/MyTest.pdf");
try
{
DocumentReader rdr = service.getDocumentReader("application/pdf");
-
Properties testprops = rdr.getProperties(is);
- printProps(testprops);
-
Properties etalon = new Properties();
etalon.put(DCMetaData.TITLE, "Test de convertion de fichier tif");
etalon.put(DCMetaData.CREATOR, "Christian Klaus");
etalon.put(DCMetaData.SUBJECT, "20080901 TEST Christian Etat OK");
- Calendar c = ISO8601.parseEx("2008-09-01T08:01:10+00:00");;
+ Calendar c = ISO8601.parseEx("2008-09-01T08:01:10+00:00");
etalon.put(DCMetaData.DATE, c);
-
evalProps(etalon, testprops);
}
finally
@@ -99,7 +67,19 @@
try
{
Properties props =
service.getDocumentReader("application/msword").getProperties(is);
- printProps(props);
+ Properties etalon = new Properties();
+ Calendar date = Calendar.getInstance();
+ date.setTimeInMillis(0);
+ date.set(2010, 7, 31, 12, 31, 0);
+
+ etalon.put(DCMetaData.TITLE, "test-Title");
+ etalon.put(DCMetaData.DATE, date.getTime());
+ etalon.put(DCMetaData.SUBJECT, "test-Subject");
+ etalon.put(DCMetaData.CREATOR, "Max Yakimenko");
+ etalon.put(DCMetaData.CONTRIBUTOR, "Max Yakimenko");
+ etalon.put(DCMetaData.DESCRIPTION, "test-Comments");
+
+ evalProps(etalon, props);
}
finally
{
@@ -113,7 +93,17 @@
try
{
Properties props =
service.getDocumentReader("application/powerpoint").getProperties(is);
- printProps(props);
+ Properties etalon = new Properties();
+ Calendar date = Calendar.getInstance();
+ date.setTimeInMillis(41);
+ date.set(2010, 7, 31, 12, 34, 15);
+ etalon.put(DCMetaData.TITLE, "test-Title");
+ etalon.put(DCMetaData.DATE, date.getTime());
+ etalon.put(DCMetaData.SUBJECT, "test-Subject");
+ etalon.put(DCMetaData.CREATOR, "Max Yakimenko");
+ etalon.put(DCMetaData.CONTRIBUTOR, "Max Yakimenko");
+ etalon.put(DCMetaData.DESCRIPTION, "test-Comments");
+ evalProps(etalon, props);
}
finally
{
@@ -127,7 +117,19 @@
try
{
Properties props =
service.getDocumentReader("application/excel").getProperties(is);
- printProps(props);
+ Properties etalon = new Properties();
+ Calendar date = Calendar.getInstance();
+ date.setTimeInMillis(0);
+ date.set(2010, 7, 31, 12, 34, 53);
+
+ etalon.put(DCMetaData.TITLE, "test-Title");
+ etalon.put(DCMetaData.DATE, date.getTime());
+ etalon.put(DCMetaData.SUBJECT, "test-Subject");
+ etalon.put(DCMetaData.CREATOR, "KHANH NGUYEN GIA");
+ etalon.put(DCMetaData.CONTRIBUTOR, "Max Yakimenko");
+ etalon.put(DCMetaData.DESCRIPTION, "test-Comments");
+
+ evalProps(etalon, props);
}
finally
{
@@ -135,6 +137,90 @@
}
}
+ public void testXWordDocumentReaderService() throws Exception
+ {
+ InputStream is =
TestPropertiesExtracting.class.getResourceAsStream("/test.docx");
+ try
+ {
+ Properties props =
+
service.getDocumentReader("application/vnd.openxmlformats-officedocument.wordprocessingml.document")
+ .getProperties(is);
+ Properties etalon = new Properties();
+ Calendar date = Calendar.getInstance();
+ date.setTimeInMillis(0);
+ date.set(2010, 7, 31, 7, 53, 0);
+
+ etalon.put(DCMetaData.TITLE, "test-Title");
+ etalon.put(DCMetaData.DATE, date.getTime());
+ etalon.put(DCMetaData.SUBJECT, "Subject");
+ etalon.put(DCMetaData.CREATOR, "nikolaz");
+ etalon.put(DCMetaData.CONTRIBUTOR, "Max Yakimenko");
+ etalon.put(DCMetaData.DESCRIPTION, "test-Comments");
+
+ evalProps(etalon, props);
+ }
+ finally
+ {
+ is.close();
+ }
+ }
+
+ public void testXPPTDocumentReaderService() throws Exception
+ {
+ InputStream is =
TestPropertiesExtracting.class.getResourceAsStream("/test.pptx");
+ try
+ {
+ Properties props =
+
service.getDocumentReader("application/vnd.openxmlformats-officedocument.presentationml.presentation")
+ .getProperties(is);
+ Properties etalon = new Properties();
+ Calendar date = Calendar.getInstance();
+ date.setTimeInMillis(0);
+ date.set(2010, 7, 31, 7, 59, 37);
+
+ etalon.put(DCMetaData.TITLE, "test-Title");
+ etalon.put(DCMetaData.DATE, date.getTime());
+ etalon.put(DCMetaData.SUBJECT, "test-Subject");
+ etalon.put(DCMetaData.CREATOR, "Max Yakimenko");
+ etalon.put(DCMetaData.CONTRIBUTOR, "Max Yakimenko");
+ etalon.put(DCMetaData.DESCRIPTION, "test-Comments");
+
+ evalProps(etalon, props);
+ }
+ finally
+ {
+ is.close();
+ }
+ }
+
+ public void testXExcelDocumentReaderService() throws Exception
+ {
+ InputStream is =
TestPropertiesExtracting.class.getResourceAsStream("/test.xlsx");
+ try
+ {
+ Properties props =
+
service.getDocumentReader("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
+ .getProperties(is);
+ Properties etalon = new Properties();
+ Calendar date = Calendar.getInstance();
+ date.setTimeInMillis(0);
+ date.set(2010, 7, 31, 8, 7, 25);
+
+ etalon.put(DCMetaData.TITLE, "test-Title");
+ etalon.put(DCMetaData.DATE, date.getTime());
+ etalon.put(DCMetaData.SUBJECT, "test-Subject");
+ etalon.put(DCMetaData.CREATOR, "KHANH NGUYEN GIA");
+ etalon.put(DCMetaData.CONTRIBUTOR, "Max Yakimenko");
+ etalon.put(DCMetaData.DESCRIPTION, "test-Comments");
+
+ evalProps(etalon, props);
+ }
+ finally
+ {
+ is.close();
+ }
+ }
+
public void testOODocumentReaderService() throws Exception
{
InputStream is =
TestPropertiesExtracting.class.getResourceAsStream("/test.odt");
@@ -142,6 +228,19 @@
{
Properties props =
service.getDocumentReader("application/vnd.oasis.opendocument.text").getProperties(is);
printProps(props);
+ Properties etalon = new Properties();
+ Calendar date = Calendar.getInstance();
+ date.setTimeInMillis(0);
+ date.set(2010, 7, 31, 14, 13, 23);
+
+ etalon.put(DCMetaData.TITLE, "test-Title");
+ etalon.put(DCMetaData.LANGUAGE, "ru-RU");
+ etalon.put(DCMetaData.DATE, "2010-08-31T14:13:23");
+ etalon.put(DCMetaData.SUBJECT, "test-Subject");
+ etalon.put(DCMetaData.CREATOR, "nikolaz ");
+ etalon.put(DCMetaData.DESCRIPTION, "test-Comments");
+
+ evalProps(etalon, props);
}
finally
{
@@ -149,16 +248,16 @@
}
}
- private void printProps(Properties props)
- {
- Iterator it = props.entrySet().iterator();
- props.toString();
- while (it.hasNext())
+ private void printProps(Properties props)
{
- Map.Entry entry = (Map.Entry)it.next();
- System.out.println(" " + entry.getKey() + " -> [" +
entry.getValue() + "]");
+ Iterator it = props.entrySet().iterator();
+ props.toString();
+ while (it.hasNext())
+ {
+ Map.Entry entry = (Map.Entry)it.next();
+ System.out.println(" " + entry.getKey() + " -> [" +
entry.getValue() + "]");
+ }
}
- }
private void evalProps(Properties etalon, Properties testedProps)
{
Modified: core/trunk/exo.core.component.document/src/test/resources/test.doc
===================================================================
(Binary files differ)
Modified: core/trunk/exo.core.component.document/src/test/resources/test.ppt
===================================================================
(Binary files differ)
Modified: core/trunk/exo.core.component.document/src/test/resources/test.xls
===================================================================
(Binary files differ)