[exo-jcr-commits] exo-jcr SVN: r2959 - in core/trunk: exo.core.component.document and 4 other directories.

do-not-reply at jboss.org do-not-reply at jboss.org
Fri Aug 20 06:38:24 EDT 2010


Author: nzamosenchuk
Date: 2010-08-20 06:38:23 -0400 (Fri, 20 Aug 2010)
New Revision: 2959

Added:
   core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXExcelDocumentReader.java
   core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXPPTDocumentReader.java
   core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXWordDocumentReader.java
   core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXExcelDocumentReader.java
   core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXPPTDocumentReader.java
   core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXWordDocumentReader.java
   core/trunk/exo.core.component.document/src/test/resources/test.docx
   core/trunk/exo.core.component.document/src/test/resources/test.pptx
   core/trunk/exo.core.component.document/src/test/resources/test.xlsx
Modified:
   core/trunk/exo.core.component.document/pom.xml
   core/trunk/exo.core.component.document/src/main/resources/conf/portal/configuration.xml
   core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestDocumentReadersIncomVals.java
   core/trunk/pom.xml
Log:
EXOJCR-886: adding document readers for MS 2007 file formats

Modified: core/trunk/exo.core.component.document/pom.xml
===================================================================
--- core/trunk/exo.core.component.document/pom.xml	2010-08-20 10:31:38 UTC (rev 2958)
+++ core/trunk/exo.core.component.document/pom.xml	2010-08-20 10:38:23 UTC (rev 2959)
@@ -82,6 +82,18 @@
             </exclusion>
          </exclusions>
       </dependency>
+
+      <dependency>
+         <groupId>org.apache.poi</groupId>
+         <artifactId>poi-ooxml</artifactId>
+         <exclusions>
+            <exclusion>
+               <groupId>log4j</groupId>
+               <artifactId>log4j</artifactId>
+            </exclusion>
+         </exclusions>
+      </dependency>
+
    </dependencies>
    <build>
       <testResources>
@@ -93,9 +105,12 @@
                <include>**/*.drl</include>
                <include>**/*.vm</include>
                <include>**/*.doc</include>
+               <include>**/*.docx</include>
                <include>**/*.dot</include>
                <include>**/*.xls</include>
+               <include>**/*.xlsx</include>
                <include>**/*.ppt</include>
+               <include>**/*.pptx</include>
                <include>**/*.txt</include>
                <include>**/*.tiff</include>
                <include>**/*.pdf</include>

Added: core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXExcelDocumentReader.java
===================================================================
--- core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXExcelDocumentReader.java	                        (rev 0)
+++ core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXExcelDocumentReader.java	2010-08-20 10:38:23 UTC (rev 2959)
@@ -0,0 +1,218 @@
+/*
+ * Copyright (C) 2009 eXo Platform SAS.
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this software; if not, write to the Free
+ * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
+ */
+package org.exoplatform.services.document.impl;
+
+import org.apache.poi.hssf.usermodel.HSSFDateUtil;
+import org.apache.poi.openxml4j.exceptions.OpenXML4JRuntimeException;
+import org.apache.poi.xssf.usermodel.XSSFCell;
+import org.apache.poi.xssf.usermodel.XSSFCellStyle;
+import org.apache.poi.xssf.usermodel.XSSFRow;
+import org.apache.poi.xssf.usermodel.XSSFSheet;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.exoplatform.services.document.DocumentReadException;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.Properties;
+
+/**
+ * Created by The eXo Platform SAS A parser of Microsoft Excel 2007 files (xlsx).
+ * 
+ * @author <a href="mailto:phunghainam at gmail.com">Phung Hai Nam</a>
+ * @author Gennady Azarenkov
+ * @author <a href="mailto:nikolazius at gmail.com">Nikolay Zamosenchuk</a>
+ * @version $Id: MSXExcelDocumentReader.java 34360 2009-07-22 23:58:59Z nzamosenchuk $
+ *
+ */
+public class MSXExcelDocumentReader extends BaseDocumentReader
+{
+
+   private static final SimpleDateFormat DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSSZ");
+
+   /**
+    * @see org.exoplatform.services.document.DocumentReader#getMimeTypes()
+    */
+   public String[] getMimeTypes()
+   {
+      return new String[]{"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"};
+   }
+
+   /**
+    * Returns only a text from .xlsx file content.
+    * 
+    * @param is an input stream with .xls file content.
+    * @return The string only with text from file content.
+    */
+   public String getContentAsText(InputStream is) throws IOException, DocumentReadException
+   {
+      if (is == null)
+      {
+         throw new NullPointerException("InputStream is null.");
+      }
+
+      StringBuilder builder = new StringBuilder("");
+
+      try
+      {
+         XSSFWorkbook wb;
+         try
+         {
+            wb = new XSSFWorkbook(is);
+         }
+         catch (IOException e)
+         {
+            return builder.toString();
+         }
+         catch (OpenXML4JRuntimeException e)
+         {
+            return builder.toString();
+         }
+         for (int sheetNum = 0; sheetNum < wb.getNumberOfSheets(); sheetNum++)
+         {
+            XSSFSheet sheet = wb.getSheetAt(sheetNum);
+            if (sheet != null)
+            {
+               for (int rowNum = sheet.getFirstRowNum(); rowNum <= sheet.getLastRowNum(); rowNum++)
+               {
+                  XSSFRow row = sheet.getRow(rowNum);
+
+                  if (row != null)
+                  {
+                     int lastcell = row.getLastCellNum();
+                     for (int k = 0; k < lastcell; k++)
+                     {
+                        XSSFCell cell = row.getCell(k);
+                        if (cell != null)
+                        {
+                           switch (cell.getCellType())
+                           {
+                              case XSSFCell.CELL_TYPE_NUMERIC : {
+                                 double d = cell.getNumericCellValue();
+                                 if (isCellDateFormatted(cell))
+                                 {
+                                    Date date = HSSFDateUtil.getJavaDate(d);
+                                    String cellText = this.DATE_FORMAT.format(date);
+                                    builder.append(cellText).append(" ");
+                                 }
+                                 else
+                                 {
+                                    builder.append(d).append(" ");
+                                 }
+                                 break;
+                              }
+                              case XSSFCell.CELL_TYPE_FORMULA :
+                                 builder.append(cell.getCellFormula().toString()).append(" ");
+                                 break;
+                              case XSSFCell.CELL_TYPE_BOOLEAN :
+                                 builder.append(cell.getBooleanCellValue()).append(" ");
+                                 break;
+                              case XSSFCell.CELL_TYPE_ERROR :
+                                 builder.append(cell.getErrorCellValue()).append(" ");
+                                 break;
+                              case XSSFCell.CELL_TYPE_STRING :
+                                 builder.append(cell.getStringCellValue().toString()).append(" ");
+                                 break;
+                              default :
+                                 break;
+                           }
+                        }
+                     }
+                  }
+               }
+            }
+         }
+      }
+      finally
+      {
+         if (is != null)
+         {
+            try
+            {
+               is.close();
+            }
+            catch (IOException e)
+            {
+            }
+         }
+      }
+      return builder.toString();
+   }
+
+   public String getContentAsText(InputStream is, String encoding) throws IOException, DocumentReadException
+   {
+      // Ignore encoding
+      return getContentAsText(is);
+   }
+
+   /*
+    * (non-Javadoc)
+    * 
+    * @see org.exoplatform.services.document.DocumentReader#getProperties(java.io.
+    *      InputStream)
+    */
+   public Properties getProperties(InputStream is) throws IOException, DocumentReadException
+   {
+      POIPropertiesReader reader = new POIPropertiesReader();
+      reader.readDCProperties(is);
+      return reader.getProperties();
+   }
+
+   public static boolean isCellDateFormatted(XSSFCell cell)
+   {
+      boolean bDate = false;
+      double d = cell.getNumericCellValue();
+      if (HSSFDateUtil.isValidExcelDate(d))
+      {
+         XSSFCellStyle style = cell.getCellStyle();
+         int i = style.getDataFormat();
+         switch (i)
+         {
+            case 0xe : // m/d/yy
+            case 0xf : // d-mmm-yy
+            case 0x10 : // d-mmm
+            case 0x11 : // mmm-yy
+            case 0x12 : // h:mm AM/PM
+            case 0x13 : // h:mm:ss AM/PM
+            case 0x14 : // h:mm
+            case 0x15 : // h:mm:ss
+            case 0x16 : // m/d/yy h:mm
+            case 0x2d : // mm:ss
+            case 0x2e : // [h]:mm:ss
+            case 0x2f : // mm:ss.0
+
+            case 0xa5 : // ??
+            case 0xa7 : // ??
+            case 0xa9 : // ??
+
+            case 0xac : // mm:dd:yy not specified in javadoc
+            case 0xad : // yyyy-mm-dd not specified in javadoc
+            case 0xae : // mm:dd:yyyy not specified in javadoc
+            case 0xaf : // m:d:yy not specified in javadoc
+               bDate = true;
+               break;
+            default :
+               bDate = false;
+               break;
+         }
+      }
+      return bDate;
+   }
+}

Added: core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXPPTDocumentReader.java
===================================================================
--- core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXPPTDocumentReader.java	                        (rev 0)
+++ core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXPPTDocumentReader.java	2010-08-20 10:38:23 UTC (rev 2959)
@@ -0,0 +1,122 @@
+/*
+ * Copyright (C) 2009 eXo Platform SAS.
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this software; if not, write to the Free
+ * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
+ */
+package org.exoplatform.services.document.impl;
+
+import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
+import org.apache.poi.openxml4j.exceptions.OpenXML4JRuntimeException;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
+import org.apache.xmlbeans.XmlException;
+import org.exoplatform.services.document.DocumentReadException;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Properties;
+
+/**
+ * Created by The eXo Platform SAS A parser of Microsoft PowerPoint 2007 files (pptx).
+ * 
+ * @author <a href="mailto:phunghainam at gmail.com">Phung Hai Nam</a>
+ * @author Gennady Azarenkov
+ * @author <a href="mailto:nikolazius at gmail.com">Nikolay Zamosenchuk</a>
+ * @version $Id: MSXPPTDocumentReader.java 34360 2009-07-22 23:58:59Z nzamosenchuk $
+ */
+public class MSXPPTDocumentReader extends BaseDocumentReader
+{
+
+   /**
+    * @see org.exoplatform.services.document.DocumentReader#getMimeTypes()
+    */
+   public String[] getMimeTypes()
+   {
+      return new String[]{"application/vnd.openxmlformats-officedocument.presentationml.presentation"};
+   }
+
+   /**
+    * Returns only a text from .pptx file content.
+    * 
+    * @param is an input stream with .pptx file content.
+    * @return The string only with text from file content.
+    */
+   public String getContentAsText(InputStream is) throws IOException, DocumentReadException
+   {
+      if (is == null)
+      {
+         throw new NullPointerException("InputStream is null.");
+      }
+      try
+      {
+         XSLFPowerPointExtractor ppe;
+         try
+         {
+            ppe = new XSLFPowerPointExtractor(OPCPackage.open(is));
+         }
+         catch (IOException e)
+         {
+            return "";
+         }
+         catch (OpenXML4JRuntimeException e)
+         {
+            return "";
+         }
+         catch (OpenXML4JException e)
+         {
+            return "";
+         }
+         catch (XmlException e)
+         {
+            return "";
+         }
+         return ppe.getText(true, true);
+      }
+      finally
+      {
+         if (is != null)
+         {
+            try
+            {
+               is.close();
+            }
+            catch (IOException e)
+            {
+            }
+         }
+      }
+   }
+
+   public String getContentAsText(InputStream is, String encoding) throws IOException, DocumentReadException
+   {
+      // Ignore encoding
+      return getContentAsText(is);
+   }
+
+   /*
+    * (non-Javadoc)
+    * 
+    * @see org.exoplatform.services.document.DocumentReader#getProperties(java.io.
+    *      InputStream)
+    */
+   public Properties getProperties(InputStream is) throws IOException, DocumentReadException
+   {
+      POIPropertiesReader reader = new POIPropertiesReader();
+      reader.readDCProperties(is);
+      return reader.getProperties();
+   }
+
+}

Added: core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXWordDocumentReader.java
===================================================================
--- core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXWordDocumentReader.java	                        (rev 0)
+++ core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXWordDocumentReader.java	2010-08-20 10:38:23 UTC (rev 2959)
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2009 eXo Platform SAS.
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this software; if not, write to the Free
+ * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
+ */
+package org.exoplatform.services.document.impl;
+
+import org.apache.poi.openxml4j.exceptions.OpenXML4JRuntimeException;
+import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
+import org.apache.poi.xwpf.usermodel.XWPFDocument;
+import org.exoplatform.services.document.DocumentReadException;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Properties;
+
+/**
+ * Created by The eXo Platform SAS A parser of Microsoft Word 2007 files (docx).
+ * 
+ * @author <a href="mailto:phunghainam at gmail.com">Phung Hai Nam</a>
+ * @author Gennady Azarenkov
+ * @author <a href="mailto:nikolazius at gmail.com">Nikolay Zamosenchuk</a>
+ * @version $Id: MSXWordDocumentReader.java 34360 2009-07-22 23:58:59Z nzamosenchuk $
+ *
+ */
+public class MSXWordDocumentReader extends BaseDocumentReader
+{
+
+   /**
+    * @see org.exoplatform.services.document.DocumentReader#getMimeTypes()
+    */
+   public String[] getMimeTypes()
+   {
+      return new String[]{"application/vnd.openxmlformats-officedocument.wordprocessingml.document"};
+   }
+
+   /**
+    * Returns only a text from .docx file content.
+    * 
+    * @param is an input stream with .docx file content.
+    * @return The string only with text from file content.
+    */
+   public String getContentAsText(InputStream is) throws IOException, DocumentReadException
+   {
+      if (is == null)
+      {
+         throw new NullPointerException("InputStream is null.");
+      }
+      String text = "";
+      try
+      {
+         XWPFDocument doc;
+         try
+         {
+            doc = new XWPFDocument(is);
+         }
+         catch (IOException e)
+         {
+            return "";
+         }
+         catch (OpenXML4JRuntimeException e)
+         {
+            return "";
+         }
+
+         XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
+         text = extractor.getText();
+      }
+      finally
+      {
+         if (is != null)
+         {
+            try
+            {
+               is.close();
+            }
+            catch (IOException e)
+            {
+            }
+         }
+      }
+      return text.trim();
+   }
+
+   /**
+    * @see org.exoplatform.services.document.DocumentReader#getContentAsText(java.io.InputStream, java.lang.String)
+    */
+   public String getContentAsText(InputStream is, String encoding) throws IOException, DocumentReadException
+   {
+      // Ignore encoding
+      return getContentAsText(is);
+   }
+
+   /**
+    * @see org.exoplatform.services.document.DocumentReader#getProperties(java.io.InputStream)
+    */
+   public Properties getProperties(InputStream is) throws IOException, DocumentReadException
+   {
+      POIPropertiesReader reader = new POIPropertiesReader();
+      reader.readDCProperties(is);
+      return reader.getProperties();
+   }
+
+}

Modified: core/trunk/exo.core.component.document/src/main/resources/conf/portal/configuration.xml
===================================================================
--- core/trunk/exo.core.component.document/src/main/resources/conf/portal/configuration.xml	2010-08-20 10:31:38 UTC (rev 2958)
+++ core/trunk/exo.core.component.document/src/main/resources/conf/portal/configuration.xml	2010-08-20 10:38:23 UTC (rev 2959)
@@ -1,25 +1,16 @@
 <?xml version="1.0" encoding="ISO-8859-1"?>
-<!--
+   <!--
 
-    Copyright (C) 2009 eXo Platform SAS.
-
-    This is free software; you can redistribute it and/or modify it
-    under the terms of the GNU Lesser General Public License as
-    published by the Free Software Foundation; either version 2.1 of
-    the License, or (at your option) any later version.
-
-    This software is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-    Lesser General Public License for more details.
-
-    You should have received a copy of the GNU Lesser General Public
-    License along with this software; if not, write to the Free
-    Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-    02110-1301 USA, or see the FSF site: http://www.fsf.org.
-
--->
-<configuration xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.exoplaform.org/xml/ns/kernel_1_0.xsd http://www.exoplaform.org/xml/ns/kernel_1_0.xsd"
+      Copyright (C) 2009 eXo Platform SAS. This is free software; you can redistribute it and/or modify it under the
+      terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of
+      the License, or (at your option) any later version. This software is distributed in the hope that it will be
+      useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+      PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU
+      Lesser General Public License along with this software; if not, write to the Free Software Foundation, Inc., 51
+      Franklin St, Fifth Floor, Boston, MA 02110-1301 USA, or see the FSF site: http://www.fsf.org.
+   -->
+<configuration xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+   xsi:schemaLocation="http://www.exoplaform.org/xml/ns/kernel_1_0.xsd http://www.exoplaform.org/xml/ns/kernel_1_0.xsd"
    xmlns="http://www.exoplaform.org/xml/ns/kernel_1_0.xsd">
 
    <component>
@@ -55,6 +46,20 @@
          </component-plugin>
 
          <component-plugin>
+            <name>document.readerMSXWord</name>
+            <set-method>addDocumentReader</set-method>
+            <type>org.exoplatform.services.document.impl.MSXWordDocumentReader</type>
+            <description>to read the ms word inputstream</description>
+            <init-params>
+               <values-param>
+                  <name>document.known.typesMSXWord</name>
+                  <description>description</description>
+                  <value>application/msword</value>
+               </values-param>
+            </init-params>
+         </component-plugin>
+
+         <component-plugin>
             <name>document.readerMSExcel</name>
             <set-method>addDocumentReader</set-method>
             <type>org.exoplatform.services.document.impl.MSExcelDocumentReader</type>
@@ -69,6 +74,20 @@
          </component-plugin>
 
          <component-plugin>
+            <name>document.readerMSXExcel</name>
+            <set-method>addDocumentReader</set-method>
+            <type>org.exoplatform.services.document.impl.MSXExcelDocumentReader</type>
+            <description>to read the ms excel inputstream</description>
+            <init-params>
+               <values-param>
+                  <name>document.known.typesMSXExcel</name>
+                  <description>description</description>
+                  <value>application/excel</value>
+               </values-param>
+            </init-params>
+         </component-plugin>
+
+         <component-plugin>
             <name>document.readerMSOutlook</name>
             <set-method>addDocumentReader</set-method>
             <type>org.exoplatform.services.document.impl.MSOutlookDocumentReader</type>
@@ -97,6 +116,20 @@
          </component-plugin>
 
          <component-plugin>
+            <name>MSXPPTdocument.reader</name>
+            <set-method>addDocumentReader</set-method>
+            <type>org.exoplatform.services.document.impl.MSXPPTDocumentReader</type>
+            <description>to read the ms pptx inputstream</description>
+            <init-params>
+               <values-param>
+                  <name>document.known.typesPPTX</name>
+                  <description>description</description>
+                  <value>application/ppt</value>
+               </values-param>
+            </init-params>
+         </component-plugin>
+
+         <component-plugin>
             <name>document.readerHTML</name>
             <set-method>addDocumentReader</set-method>
             <type>org.exoplatform.services.document.impl.HTMLDocumentReader</type>
@@ -135,11 +168,10 @@
                   <description>description</description>
                   <value>text/plain</value>
                </values-param>
-               <!-- values-param>
-                  <name>defaultEncoding</name>
-                  <description>description</description>
-                  <value>UTF-8</value>
-               </values-param -->
+               <!--
+                  values-param> <name>defaultEncoding</name> <description>description</description> <value>UTF-8</value>
+                  </values-param
+               -->
             </init-params>
          </component-plugin>
 

Modified: core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestDocumentReadersIncomVals.java
===================================================================
--- core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestDocumentReadersIncomVals.java	2010-08-20 10:31:38 UTC (rev 2958)
+++ core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestDocumentReadersIncomVals.java	2010-08-20 10:38:23 UTC (rev 2959)
@@ -24,6 +24,9 @@
 import org.exoplatform.services.document.impl.MSExcelDocumentReader;
 import org.exoplatform.services.document.impl.MSOutlookDocumentReader;
 import org.exoplatform.services.document.impl.MSWordDocumentReader;
+import org.exoplatform.services.document.impl.MSXExcelDocumentReader;
+import org.exoplatform.services.document.impl.MSXPPTDocumentReader;
+import org.exoplatform.services.document.impl.MSXWordDocumentReader;
 import org.exoplatform.services.document.impl.OpenOfficeDocumentReader;
 import org.exoplatform.services.document.impl.PDFDocumentReader;
 import org.exoplatform.services.document.impl.PPTDocumentReader;
@@ -52,11 +55,14 @@
       serviceList = new ArrayList<DocumentReader>();
       serviceList.add(new HTMLDocumentReader(null));
       serviceList.add(new MSExcelDocumentReader());
+      serviceList.add(new MSXExcelDocumentReader());
       serviceList.add(new MSOutlookDocumentReader());
       serviceList.add(new MSWordDocumentReader());
+      serviceList.add(new MSXWordDocumentReader());
       serviceList.add(new OpenOfficeDocumentReader());
       serviceList.add(new PDFDocumentReader());
       serviceList.add(new PPTDocumentReader());
+      serviceList.add(new MSXPPTDocumentReader());
       serviceList.add(new TextPlainDocumentReader(new InitParams()));
       serviceList.add(new XMLDocumentReader());
    }

Added: core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXExcelDocumentReader.java
===================================================================
--- core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXExcelDocumentReader.java	                        (rev 0)
+++ core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXExcelDocumentReader.java	2010-08-20 10:38:23 UTC (rev 2959)
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2009 eXo Platform SAS.
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this software; if not, write to the Free
+ * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
+ */
+package org.exoplatform.services.document.test;
+
+import org.exoplatform.services.document.DocumentReaderService;
+
+import java.io.InputStream;
+
+/**
+ * Created by The eXo Platform SAS Author : Sergey Karpenko
+ * <sergey.karpenko at exoplatform.com.ua>
+ * 
+ * @version $Id: $
+ */
+
+public class TestMSXExcelDocumentReader extends BaseStandaloneTest
+{
+   DocumentReaderService service_;
+
+   @Override
+   public void setUp() throws Exception
+   {
+      super.setUp();
+      service_ = (DocumentReaderService)getComponentInstanceOfType(DocumentReaderService.class);
+   }
+
+   public void testGetContentAsString() throws Exception
+   {
+      InputStream is = TestMSXExcelDocumentReader.class.getResourceAsStream("/test.xlsx");
+      String text = service_.getDocumentReader("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet").getContentAsText(is);
+      System.out.println(" text [" + text + "]");
+      /*
+       * String etalon =
+       * "Ronaldo Eric Cantona Kaka Ronaldonho ID Group Functionality Executor Begin End Tested "
+       * +
+       * "XNNL XNNL Xay dung vung quan li nguyen lieu NamPH 2005-02-02 00:00:00.000+0200 2005-10-02 00:00:00.000+0300 Tested "
+       * +
+       * "XNNL XNNL XNNL_HAVEST NamPH 1223554.0 2005-10-01 00:00:00.000+0300 Tested "
+       * +
+       * "XNNL XNNL XNNL_PIECE_OF_GROUND NamPH 2005-10-12 00:00:00.000+0300 2005-10-02 00:00:00.000+0300 Tested "
+       * +"XNNL XNNL XNNL_76 NamPH TRUE 1984-12-10 00:00:00.000+0200 No "
+       * +"XNNL XNNL XNNL_CREATE_REAP NamPH none 2005-10-03 00:00:00.000+0300 No "
+       * +
+       * "XNNL XNNL XNNL_SCALE NamPH 1984-12-10 00:00:00.000+0200 2005-10-05 00:00:00.000+0300 Tested "
+       * +
+       * "XNNL XNNL LASUCO_PROJECT NamPH 2005-10-05 00:00:00.000+0300 2005-10-06 00:00:00.000+0300 No "
+       * +"XNNL XNNL LASUCO_PROJECT NamPH Tested "+
+       * "XNNL XNNL XNNL_BRANCH NamPH 2005-12-12 00:00:00.000+0200 2005-06-10 00:00:00.000+0300 Tested "
+       * +
+       * "XNNL XNNL XNNL_SUGAR_RACE NamPH 2005-05-09 00:00:00.000+0300 2005-06-10 00:00:00.000+0300 No "
+       * +
+       * "XNNL XNNL F_XNNL_DISTRI NamPH 2005-05-09 00:00:00.000+0300 2005-06-10 00:00:00.000+0300 Tested "
+       * +
+       * "XNNL XNNL XNNL_LASUCO_USER NamPH 2005-09-09 00:00:00.000+0300 2005-06-10 00:00:00.000+0300 No "
+       * ; System.out.println(" etalon ["+etalon+"]");
+       * assertEquals("String length is incorect",etalon.length(),text.length());
+       * assertEquals("Wrong string returned",etalon ,text );
+       */
+
+   }
+}

Added: core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXPPTDocumentReader.java
===================================================================
--- core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXPPTDocumentReader.java	                        (rev 0)
+++ core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXPPTDocumentReader.java	2010-08-20 10:38:23 UTC (rev 2959)
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2009 eXo Platform SAS.
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this software; if not, write to the Free
+ * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
+ */
+package org.exoplatform.services.document.test;
+
+import org.exoplatform.services.document.DocumentReaderService;
+
+import java.io.InputStream;
+
+/**
+ * Created by The eXo Platform SAS Author : Sergey Karpenko
+ * <sergey.karpenko at exoplatform.com.ua>
+ * 
+ * @version $Id: $
+ */
+
+public class TestMSXPPTDocumentReader extends BaseStandaloneTest
+{
+   DocumentReaderService service_;
+
+   @Override
+   public void setUp() throws Exception
+   {
+      super.setUp();
+      service_ = (DocumentReaderService)getComponentInstanceOfType(DocumentReaderService.class);
+   }
+
+   public void testGetContentAsString() throws Exception
+   {
+      InputStream is = TestMSXPPTDocumentReader.class.getResourceAsStream("/test.pptx");
+      String text =
+         service_.getDocumentReader("application/vnd.openxmlformats-officedocument.presentationml.presentation")
+            .getContentAsText(is);
+      String etalon =
+         "TEST POWERPOINT\n" + "Manchester United \n" + "AC Milan\n" + "SLIDE 2 \n" + "Eric Cantona\n" + "Kaka\n"
+            + "Ronaldo\n" + "The natural scients universitys\n";
+
+      assertEquals("Wrong string returned", etalon, text);
+   }
+}

Added: core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXWordDocumentReader.java
===================================================================
--- core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXWordDocumentReader.java	                        (rev 0)
+++ core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXWordDocumentReader.java	2010-08-20 10:38:23 UTC (rev 2959)
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2009 eXo Platform SAS.
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this software; if not, write to the Free
+ * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
+ */
+package org.exoplatform.services.document.test;
+
+import org.exoplatform.services.document.DocumentReaderService;
+
+import java.io.InputStream;
+
+/**
+ * @author <a href="mailto:nikolazius at gmail.com">Nikolay Zamosenchuk</a>
+ * @version $Id: TestMSXWordDocumentReader.java 34360 2009-07-22 23:58:59Z nzamosenchuk $
+ *
+ */
+public class TestMSXWordDocumentReader extends BaseStandaloneTest
+{
+   DocumentReaderService service_;
+
+   @Override
+   public void setUp() throws Exception
+   {
+      super.setUp();
+      service_ = (DocumentReaderService)getComponentInstanceOfType(DocumentReaderService.class);
+   }
+
+   public void testGetContentAsStringDoc() throws Exception
+   {
+      InputStream is = TestMSXWordDocumentReader.class.getResourceAsStream("/test.docx");
+      String text = service_.getDocumentReader("application/vnd.openxmlformats-officedocument.wordprocessingml.document").getContentAsText(is);
+      System.out.println("text [" + text + "]");
+
+      /*
+       * String etalon = "Hello.\n" +"This is the test document 12345\n"
+       * +"Table\n" +"Title One Two\n" +"Hello_Title Hello_One Hello_Two\n";
+       * System.out.println("etalon ["+etalon+"]");
+       * System.out.println("["+text.length()+"] ["+etalon.length()+"]");
+       * assertEquals("Wrong string returned",etalon ,text );
+       */
+   }
+}

Added: core/trunk/exo.core.component.document/src/test/resources/test.docx
===================================================================
(Binary files differ)


Property changes on: core/trunk/exo.core.component.document/src/test/resources/test.docx
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: core/trunk/exo.core.component.document/src/test/resources/test.pptx
===================================================================
(Binary files differ)


Property changes on: core/trunk/exo.core.component.document/src/test/resources/test.pptx
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: core/trunk/exo.core.component.document/src/test/resources/test.xlsx
===================================================================
(Binary files differ)


Property changes on: core/trunk/exo.core.component.document/src/test/resources/test.xlsx
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Modified: core/trunk/pom.xml
===================================================================
--- core/trunk/pom.xml	2010-08-20 10:31:38 UTC (rev 2958)
+++ core/trunk/pom.xml	2010-08-20 10:38:23 UTC (rev 2959)
@@ -301,16 +301,22 @@
          <dependency>
             <groupId>org.apache.poi</groupId>
             <artifactId>poi</artifactId>
-            <version>3.0.2-FINAL</version>
+            <version>3.6</version>
          </dependency>
 
          <dependency>
             <groupId>org.apache.poi</groupId>
             <artifactId>poi-scratchpad</artifactId>
-            <version>3.0.2-FINAL</version>
+            <version>3.6</version>
          </dependency>
-         
+
          <dependency>
+            <groupId>org.apache.poi</groupId>
+            <artifactId>poi-ooxml</artifactId>
+            <version>3.6</version>
+         </dependency>
+
+         <dependency>
             <groupId>com.novell.ldap</groupId>
             <artifactId>jldap</artifactId>
             <version>4.3</version>



More information about the exo-jcr-commits mailing list