Author: nzamosenchuk
Date: 2010-08-20 06:31:38 -0400 (Fri, 20 Aug 2010)
New Revision: 2958
Added:
core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXExcelDocumentReader.java
core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXPPTDocumentReader.java
core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXWordDocumentReader.java
core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXExcelDocumentReader.java
core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXPPTDocumentReader.java
core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXWordDocumentReader.java
core/branches/2.3.x/exo.core.component.document/src/test/resources/test.docx
core/branches/2.3.x/exo.core.component.document/src/test/resources/test.pptx
core/branches/2.3.x/exo.core.component.document/src/test/resources/test.xlsx
Modified:
core/branches/2.3.x/exo.core.component.document/pom.xml
core/branches/2.3.x/exo.core.component.document/src/main/resources/conf/portal/configuration.xml
core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestDocumentReadersIncomVals.java
core/branches/2.3.x/pom.xml
Log:
EXOJCR-886: adding document readers for MS 2007 file formats
Modified: core/branches/2.3.x/exo.core.component.document/pom.xml
===================================================================
--- core/branches/2.3.x/exo.core.component.document/pom.xml 2010-08-20 09:49:54 UTC (rev
2957)
+++ core/branches/2.3.x/exo.core.component.document/pom.xml 2010-08-20 10:31:38 UTC (rev
2958)
@@ -77,6 +77,18 @@
</exclusion>
</exclusions>
</dependency>
+
+ <dependency>
+ <groupId>org.apache.poi</groupId>
+ <artifactId>poi-ooxml</artifactId>
+ <exclusions>
+ <exclusion>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+
</dependencies>
<build>
<testResources>
@@ -88,9 +100,12 @@
<include>**/*.drl</include>
<include>**/*.vm</include>
<include>**/*.doc</include>
+ <include>**/*.docx</include>
<include>**/*.dot</include>
<include>**/*.xls</include>
+ <include>**/*.xlsx</include>
<include>**/*.ppt</include>
+ <include>**/*.pptx</include>
<include>**/*.txt</include>
<include>**/*.tiff</include>
<include>**/*.pdf</include>
Added:
core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXExcelDocumentReader.java
===================================================================
---
core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXExcelDocumentReader.java
(rev 0)
+++
core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXExcelDocumentReader.java 2010-08-20
10:31:38 UTC (rev 2958)
@@ -0,0 +1,218 @@
+/*
+ * Copyright (C) 2009 eXo Platform SAS.
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this software; if not, write to the Free
+ * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA, or see the FSF site:
http://www.fsf.org.
+ */
+package org.exoplatform.services.document.impl;
+
+import org.apache.poi.hssf.usermodel.HSSFDateUtil;
+import org.apache.poi.openxml4j.exceptions.OpenXML4JRuntimeException;
+import org.apache.poi.xssf.usermodel.XSSFCell;
+import org.apache.poi.xssf.usermodel.XSSFCellStyle;
+import org.apache.poi.xssf.usermodel.XSSFRow;
+import org.apache.poi.xssf.usermodel.XSSFSheet;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.exoplatform.services.document.DocumentReadException;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.Properties;
+
+/**
+ * Created by The eXo Platform SAS A parser of Microsoft Excel 2007 files (xlsx).
+ *
+ * @author <a href="mailto:phunghainam@gmail.com">Phung Hai
Nam</a>
+ * @author Gennady Azarenkov
+ * @author <a href="mailto:nikolazius@gmail.com">Nikolay
Zamosenchuk</a>
+ * @version $Id: MSXExcelDocumentReader.java 34360 2009-07-22 23:58:59Z nzamosenchuk $
+ *
+ */
+public class MSXExcelDocumentReader extends BaseDocumentReader
+{
+
+ private static final SimpleDateFormat DATE_FORMAT = new
SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSSZ");
+
+ /**
+ * @see org.exoplatform.services.document.DocumentReader#getMimeTypes()
+ */
+ public String[] getMimeTypes()
+ {
+ return new
String[]{"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"};
+ }
+
+ /**
+ * Returns only a text from .xlsx file content.
+ *
+ * @param is an input stream with .xls file content.
+ * @return The string only with text from file content.
+ */
+ public String getContentAsText(InputStream is) throws IOException,
DocumentReadException
+ {
+ if (is == null)
+ {
+ throw new NullPointerException("InputStream is null.");
+ }
+
+ StringBuilder builder = new StringBuilder("");
+
+ try
+ {
+ XSSFWorkbook wb;
+ try
+ {
+ wb = new XSSFWorkbook(is);
+ }
+ catch (IOException e)
+ {
+ return builder.toString();
+ }
+ catch (OpenXML4JRuntimeException e)
+ {
+ return builder.toString();
+ }
+ for (int sheetNum = 0; sheetNum < wb.getNumberOfSheets(); sheetNum++)
+ {
+ XSSFSheet sheet = wb.getSheetAt(sheetNum);
+ if (sheet != null)
+ {
+ for (int rowNum = sheet.getFirstRowNum(); rowNum <=
sheet.getLastRowNum(); rowNum++)
+ {
+ XSSFRow row = sheet.getRow(rowNum);
+
+ if (row != null)
+ {
+ int lastcell = row.getLastCellNum();
+ for (int k = 0; k < lastcell; k++)
+ {
+ XSSFCell cell = row.getCell(k);
+ if (cell != null)
+ {
+ switch (cell.getCellType())
+ {
+ case XSSFCell.CELL_TYPE_NUMERIC : {
+ double d = cell.getNumericCellValue();
+ if (isCellDateFormatted(cell))
+ {
+ Date date = HSSFDateUtil.getJavaDate(d);
+ String cellText = this.DATE_FORMAT.format(date);
+ builder.append(cellText).append(" ");
+ }
+ else
+ {
+ builder.append(d).append(" ");
+ }
+ break;
+ }
+ case XSSFCell.CELL_TYPE_FORMULA :
+
builder.append(cell.getCellFormula().toString()).append(" ");
+ break;
+ case XSSFCell.CELL_TYPE_BOOLEAN :
+ builder.append(cell.getBooleanCellValue()).append("
");
+ break;
+ case XSSFCell.CELL_TYPE_ERROR :
+ builder.append(cell.getErrorCellValue()).append("
");
+ break;
+ case XSSFCell.CELL_TYPE_STRING :
+
builder.append(cell.getStringCellValue().toString()).append(" ");
+ break;
+ default :
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ finally
+ {
+ if (is != null)
+ {
+ try
+ {
+ is.close();
+ }
+ catch (IOException e)
+ {
+ }
+ }
+ }
+ return builder.toString();
+ }
+
+ public String getContentAsText(InputStream is, String encoding) throws IOException,
DocumentReadException
+ {
+ // Ignore encoding
+ return getContentAsText(is);
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.exoplatform.services.document.DocumentReader#getProperties(java.io.
+ * InputStream)
+ */
+ public Properties getProperties(InputStream is) throws IOException,
DocumentReadException
+ {
+ POIPropertiesReader reader = new POIPropertiesReader();
+ reader.readDCProperties(is);
+ return reader.getProperties();
+ }
+
+ public static boolean isCellDateFormatted(XSSFCell cell)
+ {
+ boolean bDate = false;
+ double d = cell.getNumericCellValue();
+ if (HSSFDateUtil.isValidExcelDate(d))
+ {
+ XSSFCellStyle style = cell.getCellStyle();
+ int i = style.getDataFormat();
+ switch (i)
+ {
+ case 0xe : // m/d/yy
+ case 0xf : // d-mmm-yy
+ case 0x10 : // d-mmm
+ case 0x11 : // mmm-yy
+ case 0x12 : // h:mm AM/PM
+ case 0x13 : // h:mm:ss AM/PM
+ case 0x14 : // h:mm
+ case 0x15 : // h:mm:ss
+ case 0x16 : // m/d/yy h:mm
+ case 0x2d : // mm:ss
+ case 0x2e : // [h]:mm:ss
+ case 0x2f : // mm:ss.0
+
+ case 0xa5 : // ??
+ case 0xa7 : // ??
+ case 0xa9 : // ??
+
+ case 0xac : // mm:dd:yy not specified in javadoc
+ case 0xad : // yyyy-mm-dd not specified in javadoc
+ case 0xae : // mm:dd:yyyy not specified in javadoc
+ case 0xaf : // m:d:yy not specified in javadoc
+ bDate = true;
+ break;
+ default :
+ bDate = false;
+ break;
+ }
+ }
+ return bDate;
+ }
+}
Added:
core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXPPTDocumentReader.java
===================================================================
---
core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXPPTDocumentReader.java
(rev 0)
+++
core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXPPTDocumentReader.java 2010-08-20
10:31:38 UTC (rev 2958)
@@ -0,0 +1,122 @@
+/*
+ * Copyright (C) 2009 eXo Platform SAS.
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this software; if not, write to the Free
+ * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA, or see the FSF site:
http://www.fsf.org.
+ */
+package org.exoplatform.services.document.impl;
+
+import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
+import org.apache.poi.openxml4j.exceptions.OpenXML4JRuntimeException;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
+import org.apache.xmlbeans.XmlException;
+import org.exoplatform.services.document.DocumentReadException;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Properties;
+
+/**
+ * Created by The eXo Platform SAS A parser of Microsoft PowerPoint 2007 files (pptx).
+ *
+ * @author <a href="mailto:phunghainam@gmail.com">Phung Hai
Nam</a>
+ * @author Gennady Azarenkov
+ * @author <a href="mailto:nikolazius@gmail.com">Nikolay
Zamosenchuk</a>
+ * @version $Id: MSXPPTDocumentReader.java 34360 2009-07-22 23:58:59Z nzamosenchuk $
+ */
+public class MSXPPTDocumentReader extends BaseDocumentReader
+{
+
+ /**
+ * @see org.exoplatform.services.document.DocumentReader#getMimeTypes()
+ */
+ public String[] getMimeTypes()
+ {
+ return new
String[]{"application/vnd.openxmlformats-officedocument.presentationml.presentation"};
+ }
+
+ /**
+ * Returns only a text from .pptx file content.
+ *
+ * @param is an input stream with .pptx file content.
+ * @return The string only with text from file content.
+ */
+ public String getContentAsText(InputStream is) throws IOException,
DocumentReadException
+ {
+ if (is == null)
+ {
+ throw new NullPointerException("InputStream is null.");
+ }
+ try
+ {
+ XSLFPowerPointExtractor ppe;
+ try
+ {
+ ppe = new XSLFPowerPointExtractor(OPCPackage.open(is));
+ }
+ catch (IOException e)
+ {
+ return "";
+ }
+ catch (OpenXML4JRuntimeException e)
+ {
+ return "";
+ }
+ catch (OpenXML4JException e)
+ {
+ return "";
+ }
+ catch (XmlException e)
+ {
+ return "";
+ }
+ return ppe.getText(true, true);
+ }
+ finally
+ {
+ if (is != null)
+ {
+ try
+ {
+ is.close();
+ }
+ catch (IOException e)
+ {
+ }
+ }
+ }
+ }
+
+ public String getContentAsText(InputStream is, String encoding) throws IOException,
DocumentReadException
+ {
+ // Ignore encoding
+ return getContentAsText(is);
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.exoplatform.services.document.DocumentReader#getProperties(java.io.
+ * InputStream)
+ */
+ public Properties getProperties(InputStream is) throws IOException,
DocumentReadException
+ {
+ POIPropertiesReader reader = new POIPropertiesReader();
+ reader.readDCProperties(is);
+ return reader.getProperties();
+ }
+
+}
Added:
core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXWordDocumentReader.java
===================================================================
---
core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXWordDocumentReader.java
(rev 0)
+++
core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXWordDocumentReader.java 2010-08-20
10:31:38 UTC (rev 2958)
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2009 eXo Platform SAS.
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this software; if not, write to the Free
+ * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA, or see the FSF site:
http://www.fsf.org.
+ */
+package org.exoplatform.services.document.impl;
+
+import org.apache.poi.openxml4j.exceptions.OpenXML4JRuntimeException;
+import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
+import org.apache.poi.xwpf.usermodel.XWPFDocument;
+import org.exoplatform.services.document.DocumentReadException;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Properties;
+
+/**
+ * Created by The eXo Platform SAS A parser of Microsoft Word 2007 files (docx).
+ *
+ * @author <a href="mailto:phunghainam@gmail.com">Phung Hai
Nam</a>
+ * @author Gennady Azarenkov
+ * @author <a href="mailto:nikolazius@gmail.com">Nikolay
Zamosenchuk</a>
+ * @version $Id: MSXWordDocumentReader.java 34360 2009-07-22 23:58:59Z nzamosenchuk $
+ *
+ */
+public class MSXWordDocumentReader extends BaseDocumentReader
+{
+
+ /**
+ * @see org.exoplatform.services.document.DocumentReader#getMimeTypes()
+ */
+ public String[] getMimeTypes()
+ {
+ return new
String[]{"application/vnd.openxmlformats-officedocument.wordprocessingml.document"};
+ }
+
+ /**
+ * Returns only a text from .docx file content.
+ *
+ * @param is an input stream with .docx file content.
+ * @return The string only with text from file content.
+ */
+ public String getContentAsText(InputStream is) throws IOException,
DocumentReadException
+ {
+ if (is == null)
+ {
+ throw new NullPointerException("InputStream is null.");
+ }
+ String text = "";
+ try
+ {
+ XWPFDocument doc;
+ try
+ {
+ doc = new XWPFDocument(is);
+ }
+ catch (IOException e)
+ {
+ return "";
+ }
+ catch (OpenXML4JRuntimeException e)
+ {
+ return "";
+ }
+
+ XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
+ text = extractor.getText();
+ }
+ finally
+ {
+ if (is != null)
+ {
+ try
+ {
+ is.close();
+ }
+ catch (IOException e)
+ {
+ }
+ }
+ }
+ return text.trim();
+ }
+
+ /**
+ * @see
org.exoplatform.services.document.DocumentReader#getContentAsText(java.io.InputStream,
java.lang.String)
+ */
+ public String getContentAsText(InputStream is, String encoding) throws IOException,
DocumentReadException
+ {
+ // Ignore encoding
+ return getContentAsText(is);
+ }
+
+ /**
+ * @see
org.exoplatform.services.document.DocumentReader#getProperties(java.io.InputStream)
+ */
+ public Properties getProperties(InputStream is) throws IOException,
DocumentReadException
+ {
+ POIPropertiesReader reader = new POIPropertiesReader();
+ reader.readDCProperties(is);
+ return reader.getProperties();
+ }
+
+}
Modified:
core/branches/2.3.x/exo.core.component.document/src/main/resources/conf/portal/configuration.xml
===================================================================
---
core/branches/2.3.x/exo.core.component.document/src/main/resources/conf/portal/configuration.xml 2010-08-20
09:49:54 UTC (rev 2957)
+++
core/branches/2.3.x/exo.core.component.document/src/main/resources/conf/portal/configuration.xml 2010-08-20
10:31:38 UTC (rev 2958)
@@ -55,6 +55,20 @@
</component-plugin>
<component-plugin>
+ <name>document.readerMSXWord</name>
+ <set-method>addDocumentReader</set-method>
+
<type>org.exoplatform.services.document.impl.MSXWordDocumentReader</type>
+ <description>to read the ms word inputstream</description>
+ <init-params>
+ <values-param>
+ <name>document.known.typesMSXWord</name>
+ <description>description</description>
+ <value>application/msword</value>
+ </values-param>
+ </init-params>
+ </component-plugin>
+
+ <component-plugin>
<name>document.readerMSExcel</name>
<set-method>addDocumentReader</set-method>
<type>org.exoplatform.services.document.impl.MSExcelDocumentReader</type>
@@ -66,6 +80,20 @@
<value>application/excel</value>
</values-param>
</init-params>
+ </component-plugin>
+
+ <component-plugin>
+ <name>document.readerMSXExcel</name>
+ <set-method>addDocumentReader</set-method>
+
<type>org.exoplatform.services.document.impl.MSXExcelDocumentReader</type>
+ <description>to read the ms excel inputstream</description>
+ <init-params>
+ <values-param>
+ <name>document.known.typesMSXExcel</name>
+ <description>description</description>
+ <value>application/excel</value>
+ </values-param>
+ </init-params>
</component-plugin>
<component-plugin>
@@ -94,6 +122,20 @@
<value>application/ppt</value>
</values-param>
</init-params>
+ </component-plugin>
+
+ <component-plugin>
+ <name>MSXPPTdocument.reader</name>
+ <set-method>addDocumentReader</set-method>
+
<type>org.exoplatform.services.document.impl.MSXPPTDocumentReader</type>
+ <description>to read the ms pptx inputstream</description>
+ <init-params>
+ <values-param>
+ <name>document.known.typesPPTX</name>
+ <description>description</description>
+ <value>application/ppt</value>
+ </values-param>
+ </init-params>
</component-plugin>
<component-plugin>
Modified:
core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestDocumentReadersIncomVals.java
===================================================================
---
core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestDocumentReadersIncomVals.java 2010-08-20
09:49:54 UTC (rev 2957)
+++
core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestDocumentReadersIncomVals.java 2010-08-20
10:31:38 UTC (rev 2958)
@@ -24,6 +24,9 @@
import org.exoplatform.services.document.impl.MSExcelDocumentReader;
import org.exoplatform.services.document.impl.MSOutlookDocumentReader;
import org.exoplatform.services.document.impl.MSWordDocumentReader;
+import org.exoplatform.services.document.impl.MSXExcelDocumentReader;
+import org.exoplatform.services.document.impl.MSXPPTDocumentReader;
+import org.exoplatform.services.document.impl.MSXWordDocumentReader;
import org.exoplatform.services.document.impl.OpenOfficeDocumentReader;
import org.exoplatform.services.document.impl.PDFDocumentReader;
import org.exoplatform.services.document.impl.PPTDocumentReader;
@@ -55,11 +58,14 @@
serviceList = new ArrayList<DocumentReader>();
serviceList.add(new HTMLDocumentReader(null));
serviceList.add(new MSExcelDocumentReader());
+ serviceList.add(new MSXExcelDocumentReader());
serviceList.add(new MSOutlookDocumentReader());
serviceList.add(new MSWordDocumentReader());
+ serviceList.add(new MSXWordDocumentReader());
serviceList.add(new OpenOfficeDocumentReader());
serviceList.add(new PDFDocumentReader());
serviceList.add(new PPTDocumentReader());
+ serviceList.add(new MSXPPTDocumentReader());
serviceList.add(new TextPlainDocumentReader(new InitParams()));
serviceList.add(new XMLDocumentReader());
}
Added:
core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXExcelDocumentReader.java
===================================================================
---
core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXExcelDocumentReader.java
(rev 0)
+++
core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXExcelDocumentReader.java 2010-08-20
10:31:38 UTC (rev 2958)
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2009 eXo Platform SAS.
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this software; if not, write to the Free
+ * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA, or see the FSF site:
http://www.fsf.org.
+ */
+package org.exoplatform.services.document.test;
+
+import org.exoplatform.container.PortalContainer;
+import org.exoplatform.services.document.DocumentReaderService;
+import org.exoplatform.test.BasicTestCase;
+
+import java.io.InputStream;
+
+/**
+ * Created by The eXo Platform SAS Author : Sergey Karpenko
+ * <sergey.karpenko(a)exoplatform.com.ua>
+ *
+ * @version $Id: $
+ */
+
+public class TestMSXExcelDocumentReader extends BasicTestCase
+{
+ DocumentReaderService service_;
+
+ @Override
+ public void setUp() throws Exception
+ {
+ PortalContainer pcontainer = PortalContainer.getInstance();
+ service_ =
(DocumentReaderService)pcontainer.getComponentInstanceOfType(DocumentReaderService.class);
+ }
+
+ public void testGetContentAsString() throws Exception
+ {
+ InputStream is =
TestMSXExcelDocumentReader.class.getResourceAsStream("/test.xlsx");
+ String text =
service_.getDocumentReader("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet").getContentAsText(is);
+ System.out.println(" text [" + text + "]");
+ /*
+ * String etalon =
+ * "Ronaldo Eric Cantona Kaka Ronaldonho ID Group Functionality Executor Begin
End Tested "
+ * +
+ * "XNNL XNNL Xay dung vung quan li nguyen lieu NamPH 2005-02-02
00:00:00.000+0200 2005-10-02 00:00:00.000+0300 Tested "
+ * +
+ * "XNNL XNNL XNNL_HAVEST NamPH 1223554.0 2005-10-01 00:00:00.000+0300 Tested
"
+ * +
+ * "XNNL XNNL XNNL_PIECE_OF_GROUND NamPH 2005-10-12 00:00:00.000+0300
2005-10-02 00:00:00.000+0300 Tested "
+ * +"XNNL XNNL XNNL_76 NamPH TRUE 1984-12-10 00:00:00.000+0200 No "
+ * +"XNNL XNNL XNNL_CREATE_REAP NamPH none 2005-10-03 00:00:00.000+0300 No
"
+ * +
+ * "XNNL XNNL XNNL_SCALE NamPH 1984-12-10 00:00:00.000+0200 2005-10-05
00:00:00.000+0300 Tested "
+ * +
+ * "XNNL XNNL LASUCO_PROJECT NamPH 2005-10-05 00:00:00.000+0300 2005-10-06
00:00:00.000+0300 No "
+ * +"XNNL XNNL LASUCO_PROJECT NamPH Tested "+
+ * "XNNL XNNL XNNL_BRANCH NamPH 2005-12-12 00:00:00.000+0200 2005-06-10
00:00:00.000+0300 Tested "
+ * +
+ * "XNNL XNNL XNNL_SUGAR_RACE NamPH 2005-05-09 00:00:00.000+0300 2005-06-10
00:00:00.000+0300 No "
+ * +
+ * "XNNL XNNL F_XNNL_DISTRI NamPH 2005-05-09 00:00:00.000+0300 2005-06-10
00:00:00.000+0300 Tested "
+ * +
+ * "XNNL XNNL XNNL_LASUCO_USER NamPH 2005-09-09 00:00:00.000+0300 2005-06-10
00:00:00.000+0300 No "
+ * ; System.out.println(" etalon ["+etalon+"]");
+ * assertEquals("String length is
incorect",etalon.length(),text.length());
+ * assertEquals("Wrong string returned",etalon ,text );
+ */
+
+ }
+}
Added:
core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXPPTDocumentReader.java
===================================================================
---
core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXPPTDocumentReader.java
(rev 0)
+++
core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXPPTDocumentReader.java 2010-08-20
10:31:38 UTC (rev 2958)
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2009 eXo Platform SAS.
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this software; if not, write to the Free
+ * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA, or see the FSF site:
http://www.fsf.org.
+ */
+package org.exoplatform.services.document.test;
+
+import org.exoplatform.container.PortalContainer;
+import org.exoplatform.services.document.DocumentReaderService;
+import org.exoplatform.test.BasicTestCase;
+
+import java.io.InputStream;
+
+/**
+ * Created by The eXo Platform SAS Author : Sergey Karpenko
+ * <sergey.karpenko(a)exoplatform.com.ua>
+ *
+ * @version $Id: $
+ */
+
+public class TestMSXPPTDocumentReader extends BasicTestCase
+{
+ DocumentReaderService service_;
+
+ @Override
+ public void setUp() throws Exception
+ {
+ PortalContainer pcontainer = PortalContainer.getInstance();
+ service_ =
(DocumentReaderService)pcontainer.getComponentInstanceOfType(DocumentReaderService.class);
+ }
+
+ public void testGetContentAsString() throws Exception
+ {
+ InputStream is =
TestMSXPPTDocumentReader.class.getResourceAsStream("/test.pptx");
+ String text =
+
service_.getDocumentReader("application/vnd.openxmlformats-officedocument.presentationml.presentation")
+ .getContentAsText(is);
+ String etalon =
+ "TEST POWERPOINT\n" + "Manchester United \n" + "AC
Milan\n" + "SLIDE 2 \n" + "Eric Cantona\n" + "Kaka\n"
+ + "Ronaldo\n" + "The natural scients universitys\n";
+
+ assertEquals("Wrong string returned", etalon, text);
+ }
+}
Added:
core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXWordDocumentReader.java
===================================================================
---
core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXWordDocumentReader.java
(rev 0)
+++
core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXWordDocumentReader.java 2010-08-20
10:31:38 UTC (rev 2958)
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2009 eXo Platform SAS.
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this software; if not, write to the Free
+ * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA, or see the FSF site:
http://www.fsf.org.
+ */
+package org.exoplatform.services.document.test;
+
+import org.exoplatform.container.PortalContainer;
+import org.exoplatform.services.document.DocumentReaderService;
+import org.exoplatform.test.BasicTestCase;
+
+import java.io.InputStream;
+
+/**
+ * @author <a href="mailto:nikolazius@gmail.com">Nikolay
Zamosenchuk</a>
+ * @version $Id: TestMSXWordDocumentReader.java 34360 2009-07-22 23:58:59Z nzamosenchuk
$
+ *
+ */
+public class TestMSXWordDocumentReader extends BasicTestCase
+{
+ DocumentReaderService service_;
+
+ @Override
+ public void setUp() throws Exception
+ {
+ PortalContainer pcontainer = PortalContainer.getInstance();
+ service_ =
(DocumentReaderService)pcontainer.getComponentInstanceOfType(DocumentReaderService.class);
+ }
+
+ public void testGetContentAsStringDoc() throws Exception
+ {
+ InputStream is =
TestMSXWordDocumentReader.class.getResourceAsStream("/test.docx");
+ String text =
service_.getDocumentReader("application/vnd.openxmlformats-officedocument.wordprocessingml.document").getContentAsText(is);
+ System.out.println("text [" + text + "]");
+
+ /*
+ * String etalon = "Hello.\n" +"This is the test document
12345\n"
+ * +"Table\n" +"Title One Two\n" +"Hello_Title Hello_One
Hello_Two\n";
+ * System.out.println("etalon ["+etalon+"]");
+ * System.out.println("["+text.length()+"]
["+etalon.length()+"]");
+ * assertEquals("Wrong string returned",etalon ,text );
+ */
+ }
+}
Added: core/branches/2.3.x/exo.core.component.document/src/test/resources/test.docx
===================================================================
(Binary files differ)
Property changes on:
core/branches/2.3.x/exo.core.component.document/src/test/resources/test.docx
___________________________________________________________________
Name: svn:mime-type
+ application/octet-stream
Added: core/branches/2.3.x/exo.core.component.document/src/test/resources/test.pptx
===================================================================
(Binary files differ)
Property changes on:
core/branches/2.3.x/exo.core.component.document/src/test/resources/test.pptx
___________________________________________________________________
Name: svn:mime-type
+ application/octet-stream
Added: core/branches/2.3.x/exo.core.component.document/src/test/resources/test.xlsx
===================================================================
(Binary files differ)
Property changes on:
core/branches/2.3.x/exo.core.component.document/src/test/resources/test.xlsx
___________________________________________________________________
Name: svn:mime-type
+ application/octet-stream
Modified: core/branches/2.3.x/pom.xml
===================================================================
--- core/branches/2.3.x/pom.xml 2010-08-20 09:49:54 UTC (rev 2957)
+++ core/branches/2.3.x/pom.xml 2010-08-20 10:31:38 UTC (rev 2958)
@@ -295,14 +295,20 @@
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
- <version>3.0.2-FINAL</version>
+ <version>3.6</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
- <version>3.0.2-FINAL</version>
+ <version>3.6</version>
</dependency>
+
+ <dependency>
+ <groupId>org.apache.poi</groupId>
+ <artifactId>poi-ooxml</artifactId>
+ <version>3.6</version>
+ </dependency>
<dependency>
<groupId>com.novell.ldap</groupId>