[exo-jcr-commits] exo-jcr SVN: r3028 - in core/trunk/exo.core.component.document/src: main/resources/conf/portal and 2 other directories.

do-not-reply at jboss.org do-not-reply at jboss.org
Tue Aug 31 07:43:22 EDT 2010


Author: nzamosenchuk
Date: 2010-08-31 07:43:20 -0400 (Tue, 31 Aug 2010)
New Revision: 3028

Modified:
   core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/HTMLDocumentReader.java
   core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSExcelDocumentReader.java
   core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSOutlookDocumentReader.java
   core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSWordDocumentReader.java
   core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXExcelDocumentReader.java
   core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXPPTDocumentReader.java
   core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXWordDocumentReader.java
   core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/POIPropertiesReader.java
   core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/PPTDocumentReader.java
   core/trunk/exo.core.component.document/src/main/resources/conf/portal/configuration.xml
   core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestDocumentReadersIncomVals.java
   core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXWordDocumentReader.java
   core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestOpenOfficeDocumentReader.java
   core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestPropertiesExtracting.java
   core/trunk/exo.core.component.document/src/test/resources/test.doc
   core/trunk/exo.core.component.document/src/test/resources/test.ppt
   core/trunk/exo.core.component.document/src/test/resources/test.xls
Log:
EXOJCR-886: updating tests and DocumentReaders according to remarks. Implementing property extraction from OOXML (MS 2007) formats.

Modified: core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/HTMLDocumentReader.java
===================================================================
--- core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/HTMLDocumentReader.java	2010-08-31 09:16:47 UTC (rev 3027)
+++ core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/HTMLDocumentReader.java	2010-08-31 11:43:20 UTC (rev 3028)
@@ -18,7 +18,6 @@
  */
 package org.exoplatform.services.document.impl;
 
-import org.exoplatform.container.xml.InitParams;
 import org.exoplatform.services.document.DocumentReadException;
 import org.htmlparser.Parser;
 import org.htmlparser.beans.StringBean;
@@ -43,7 +42,7 @@
     * 
     * @param params the container parameters.
     */
-   public HTMLDocumentReader(InitParams params)
+   public HTMLDocumentReader()
    {
    }
 
@@ -54,7 +53,7 @@
     */
    public String[] getMimeTypes()
    {
-      return new String[]{"text/html","application/x-groovy+html"};
+      return new String[]{"text/html", "application/x-groovy+html"};
    }
 
    /**
@@ -77,7 +76,9 @@
          int len;
          ByteArrayOutputStream bos = new ByteArrayOutputStream();
          while ((len = is.read(buffer)) > 0)
+         {
             bos.write(buffer, 0, len);
+         }
          bos.close();
 
          String html = new String(bos.toByteArray());

Modified: core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSExcelDocumentReader.java
===================================================================
--- core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSExcelDocumentReader.java	2010-08-31 09:16:47 UTC (rev 3027)
+++ core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSExcelDocumentReader.java	2010-08-31 11:43:20 UTC (rev 3028)
@@ -42,8 +42,8 @@
 public class MSExcelDocumentReader extends BaseDocumentReader
 {
 
-   private static final SimpleDateFormat DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSSZ");
-
+   private static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss.SSSZ";
+   
    /**
     * Get the application/excel mime type.
     * 
@@ -68,9 +68,16 @@
       }
 
       StringBuilder builder = new StringBuilder("");
+      
+      SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT);
 
       try
       {
+         if (is.available() == 0)
+         {
+            return "";
+         }
+         
          HSSFWorkbook wb;
          try
          {
@@ -78,7 +85,7 @@
          }
          catch (IOException e)
          {
-            return builder.toString();
+            throw new DocumentReadException("Can't open spreadsheet.", e);
          }
          for (int sheetNum = 0; sheetNum < wb.getNumberOfSheets(); sheetNum++)
          {
@@ -104,7 +111,7 @@
                                  if (isCellDateFormatted(cell))
                                  {
                                     Date date = HSSFDateUtil.getJavaDate(d);
-                                    String cellText = this.DATE_FORMAT.format(date);
+                                    String cellText = dateFormat.format(date);
                                     builder.append(cellText).append(" ");
                                  }
                                  else

Modified: core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSOutlookDocumentReader.java
===================================================================
--- core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSOutlookDocumentReader.java	2010-08-31 09:16:47 UTC (rev 3027)
+++ core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSOutlookDocumentReader.java	2010-08-31 11:43:20 UTC (rev 3028)
@@ -61,6 +61,11 @@
       }
       try
       {
+         if (is.available() == 0)
+         {
+            return "";
+         }
+         
          MAPIMessage message;
          try
          {
@@ -68,12 +73,12 @@
          }
          catch (IOException e)
          {
-            return "";
+            throw new DocumentReadException("Can't open message.", e);
          }
-         StringBuffer buffer = new StringBuffer();
+         StringBuilder builder = new StringBuilder();
          try
          {
-            buffer.append(message.getDisplayFrom()).append('\n');
+            builder.append(message.getDisplayFrom()).append('\n');
          }
          catch (ChunkNotFoundException e)
          {
@@ -81,7 +86,7 @@
          }
          try
          {
-            buffer.append(message.getDisplayTo()).append('\n');
+            builder.append(message.getDisplayTo()).append('\n');
          }
          catch (ChunkNotFoundException e)
          {
@@ -89,7 +94,7 @@
          }
          try
          {
-            buffer.append(message.getSubject()).append('\n');
+            builder.append(message.getSubject()).append('\n');
          }
          catch (ChunkNotFoundException e)
          {
@@ -97,13 +102,13 @@
          }
          try
          {
-            buffer.append(message.getTextBody());
+            builder.append(message.getTextBody());
          }
          catch (ChunkNotFoundException e)
          {
             // "textBody" is empty
          }
-         return buffer.toString();
+         return builder.toString();
 
       }
       finally

Modified: core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSWordDocumentReader.java
===================================================================
--- core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSWordDocumentReader.java	2010-08-31 09:16:47 UTC (rev 3027)
+++ core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSWordDocumentReader.java	2010-08-31 11:43:20 UTC (rev 3028)
@@ -61,6 +61,11 @@
       String text = "";
       try
       {
+         if (is.available() == 0)
+         {
+            return "";
+         }
+         
          HWPFDocument doc;
          try
          {
@@ -68,7 +73,7 @@
          }
          catch (IOException e)
          {
-            return "";
+            throw new DocumentReadException("Can't open document.", e);
          }
 
          Range range = doc.getRange();
@@ -77,6 +82,7 @@
       finally
       {
          if (is != null)
+         {
             try
             {
                is.close();
@@ -84,6 +90,7 @@
             catch (IOException e)
             {
             }
+         }
       }
       return text.trim();
    }

Modified: core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXExcelDocumentReader.java
===================================================================
--- core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXExcelDocumentReader.java	2010-08-31 09:16:47 UTC (rev 3027)
+++ core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXExcelDocumentReader.java	2010-08-31 11:43:20 UTC (rev 3028)
@@ -18,8 +18,11 @@
  */
 package org.exoplatform.services.document.impl;
 
+import org.apache.poi.POIXMLDocument;
+import org.apache.poi.POIXMLPropertiesTextExtractor;
 import org.apache.poi.hssf.usermodel.HSSFDateUtil;
 import org.apache.poi.openxml4j.exceptions.OpenXML4JRuntimeException;
+import org.apache.poi.openxml4j.opc.OPCPackage;
 import org.apache.poi.xssf.usermodel.XSSFCell;
 import org.apache.poi.xssf.usermodel.XSSFCellStyle;
 import org.apache.poi.xssf.usermodel.XSSFRow;
@@ -44,9 +47,8 @@
  */
 public class MSXExcelDocumentReader extends BaseDocumentReader
 {
+   private static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss.SSSZ";
 
-   private static final SimpleDateFormat DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSSZ");
-
    /**
     * @see org.exoplatform.services.document.DocumentReader#getMimeTypes()
     */
@@ -69,17 +71,25 @@
       }
 
       StringBuilder builder = new StringBuilder("");
+      SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT);
 
       try
       {
+         if (is.available() == 0)
+         {
+            return "";
+         }
+
          XSSFWorkbook wb;
          try
          {
             wb = new XSSFWorkbook(is);
+            OPCPackage pkg;
+
          }
          catch (IOException e)
          {
-            return builder.toString();
+            throw new DocumentReadException("Can't open spreadsheet.", e);
          }
          catch (OpenXML4JRuntimeException e)
          {
@@ -109,7 +119,7 @@
                                  if (isCellDateFormatted(cell))
                                  {
                                     Date date = HSSFDateUtil.getJavaDate(d);
-                                    String cellText = this.DATE_FORMAT.format(date);
+                                    String cellText = dateFormat.format(date);
                                     builder.append(cellText).append(" ");
                                  }
                                  else
@@ -171,7 +181,7 @@
    public Properties getProperties(InputStream is) throws IOException, DocumentReadException
    {
       POIPropertiesReader reader = new POIPropertiesReader();
-      reader.readDCProperties(is);
+      reader.readDCProperties(new XSSFWorkbook(is));
       return reader.getProperties();
    }
 

Modified: core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXPPTDocumentReader.java
===================================================================
--- core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXPPTDocumentReader.java	2010-08-31 09:16:47 UTC (rev 3027)
+++ core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXPPTDocumentReader.java	2010-08-31 11:43:20 UTC (rev 3028)
@@ -18,9 +18,11 @@
  */
 package org.exoplatform.services.document.impl;
 
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
 import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
 import org.apache.poi.openxml4j.exceptions.OpenXML4JRuntimeException;
 import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.xslf.XSLFSlideShow;
 import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
 import org.apache.xmlbeans.XmlException;
 import org.exoplatform.services.document.DocumentReadException;
@@ -62,6 +64,11 @@
       }
       try
       {
+         if (is.available() == 0)
+         {
+            return "";
+         }
+         
          XSLFPowerPointExtractor ppe;
          try
          {
@@ -69,19 +76,19 @@
          }
          catch (IOException e)
          {
-            return "";
+            throw new DocumentReadException("Can't open presentation.", e);
          }
          catch (OpenXML4JRuntimeException e)
          {
-            return "";
+            throw new DocumentReadException("Can't open presentation.", e);
          }
          catch (OpenXML4JException e)
          {
-            return "";
+            throw new DocumentReadException("Can't open presentation.", e);
          }
          catch (XmlException e)
          {
-            return "";
+            throw new DocumentReadException("Can't open presentation.", e);
          }
          return ppe.getText(true, true);
       }
@@ -115,7 +122,22 @@
    public Properties getProperties(InputStream is) throws IOException, DocumentReadException
    {
       POIPropertiesReader reader = new POIPropertiesReader();
-      reader.readDCProperties(is);
+      try
+      {
+         reader.readDCProperties(new XSLFSlideShow(OPCPackage.open(is)));
+      }
+      catch (InvalidFormatException e)
+      {
+         throw new DocumentReadException("Can't read properties from OOXML document", e);
+      }
+      catch (OpenXML4JException e)
+      {
+         throw new DocumentReadException("Can't read properties from OOXML document", e);
+      }
+      catch (XmlException e)
+      {
+         throw new DocumentReadException("Can't read properties from OOXML document", e);
+      }
       return reader.getProperties();
    }
 

Modified: core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXWordDocumentReader.java
===================================================================
--- core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXWordDocumentReader.java	2010-08-31 09:16:47 UTC (rev 3027)
+++ core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXWordDocumentReader.java	2010-08-31 11:43:20 UTC (rev 3028)
@@ -62,6 +62,11 @@
       String text = "";
       try
       {
+         if (is.available() == 0)
+         {
+            return "";
+         }
+         
          XWPFDocument doc;
          try
          {
@@ -69,11 +74,11 @@
          }
          catch (IOException e)
          {
-            return "";
+            throw new DocumentReadException("Can't open message.", e);
          }
          catch (OpenXML4JRuntimeException e)
          {
-            return "";
+            throw new DocumentReadException("Can't open message.", e);
          }
 
          XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
@@ -110,7 +115,7 @@
    public Properties getProperties(InputStream is) throws IOException, DocumentReadException
    {
       POIPropertiesReader reader = new POIPropertiesReader();
-      reader.readDCProperties(is);
+      reader.readDCProperties(new XWPFDocument(is));
       return reader.getProperties();
    }
 

Modified: core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/POIPropertiesReader.java
===================================================================
--- core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/POIPropertiesReader.java	2010-08-31 09:16:47 UTC (rev 3027)
+++ core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/POIPropertiesReader.java	2010-08-31 11:43:20 UTC (rev 3028)
@@ -18,11 +18,17 @@
  */
 package org.exoplatform.services.document.impl;
 
+import org.apache.poi.POIXMLDocument;
+import org.apache.poi.POIXMLPropertiesTextExtractor;
+import org.apache.poi.POIXMLProperties.CoreProperties;
+import org.apache.poi.POIXMLProperties.CustomProperties;
+import org.apache.poi.POIXMLProperties.ExtendedProperties;
 import org.apache.poi.hpsf.MarkUnsupportedException;
 import org.apache.poi.hpsf.NoPropertySetStreamException;
 import org.apache.poi.hpsf.PropertySet;
 import org.apache.poi.hpsf.PropertySetFactory;
 import org.apache.poi.hpsf.SummaryInformation;
+import org.apache.poi.openxml4j.util.Nullable;
 import org.apache.poi.poifs.eventfilesystem.POIFSReader;
 import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
 import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
@@ -51,6 +57,14 @@
       return props;
    }
 
+   /**
+    * Metadata extraction from OLE2 documents (legacy MS office file formats)
+    * 
+    * @param is
+    * @return
+    * @throws IOException
+    * @throws DocumentReadException
+    */
    public Properties readDCProperties(InputStream is) throws IOException, DocumentReadException
    {
       if (is == null)
@@ -89,23 +103,39 @@
                   SummaryInformation si = (SummaryInformation)ps;
 
                   if (si.getLastAuthor() != null && si.getLastAuthor().length() > 0)
+                  {
                      props.put(DCMetaData.CONTRIBUTOR, si.getLastAuthor());
+                  }
                   if (si.getComments() != null && si.getComments().length() > 0)
+                  {
                      props.put(DCMetaData.DESCRIPTION, si.getComments());
+                  }
                   if (si.getCreateDateTime() != null)
+                  {
                      props.put(DCMetaData.DATE, si.getCreateDateTime());
+                  }
                   if (si.getAuthor() != null && si.getAuthor().length() > 0)
+                  {
                      props.put(DCMetaData.CREATOR, si.getAuthor());
+                  }
                   if (si.getKeywords() != null && si.getKeywords().length() > 0)
+                  {
                      props.put(DCMetaData.SUBJECT, si.getKeywords());
+                  }
                   if (si.getLastSaveDateTime() != null)
+                  {
                      props.put(DCMetaData.DATE, si.getLastSaveDateTime());
+                  }
                   // if(docInfo.getProducer() != null)
                   // props.put(DCMetaData.PUBLISHER, docInfo.getProducer());
                   if (si.getSubject() != null && si.getSubject().length() > 0)
+                  {
                      props.put(DCMetaData.SUBJECT, si.getSubject());
+                  }
                   if (si.getTitle() != null && si.getTitle().length() > 0)
+                  {
                      props.put(DCMetaData.TITLE, si.getTitle());
+                  }
 
                }
             }
@@ -163,4 +193,56 @@
       return props;
    }
 
+   /**
+    * Metadata extraction from ooxml documents (MS 2007 office file formats)
+    * 
+    * @param document
+    * @return
+    * @throws IOException
+    * @throws DocumentReadException
+    */
+   public Properties readDCProperties(POIXMLDocument document) throws IOException, DocumentReadException
+   {
+
+      POIXMLPropertiesTextExtractor extractor = new POIXMLPropertiesTextExtractor(document);
+
+      CoreProperties coreProperties = extractor.getCoreProperties();
+
+      Nullable<String> lastModifiedBy = coreProperties.getUnderlyingProperties().getLastModifiedByProperty();
+      if (lastModifiedBy != null && lastModifiedBy.getValue() != null && lastModifiedBy.getValue().length() > 0)
+      {
+         props.put(DCMetaData.CONTRIBUTOR, lastModifiedBy.getValue());
+      }
+      if (coreProperties.getDescription() != null && coreProperties.getDescription().length() > 0)
+      {
+         props.put(DCMetaData.DESCRIPTION, coreProperties.getDescription());
+      }
+      if (coreProperties.getCreated() != null)
+      {
+         props.put(DCMetaData.DATE, coreProperties.getCreated());
+      }
+      if (coreProperties.getCreator() != null && coreProperties.getCreator().length() > 0)
+      {
+         props.put(DCMetaData.CREATOR, coreProperties.getCreator());
+      }
+      if (coreProperties.getSubject() != null && coreProperties.getSubject().length() > 0)
+      {
+         props.put(DCMetaData.SUBJECT, coreProperties.getSubject());
+      }
+      if (coreProperties.getModified() != null)
+      {
+         props.put(DCMetaData.DATE, coreProperties.getModified());
+      }
+      if (coreProperties.getSubject() != null && coreProperties.getSubject().length() > 0)
+      {
+         props.put(DCMetaData.SUBJECT, coreProperties.getSubject());
+      }
+      if (coreProperties.getTitle() != null && coreProperties.getTitle().length() > 0)
+      {
+         props.put(DCMetaData.TITLE, coreProperties.getTitle());
+      }
+
+      return props;
+   }
+
 }

Modified: core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/PPTDocumentReader.java
===================================================================
--- core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/PPTDocumentReader.java	2010-08-31 09:16:47 UTC (rev 3027)
+++ core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/PPTDocumentReader.java	2010-08-31 11:43:20 UTC (rev 3028)
@@ -60,6 +60,12 @@
       }
       try
       {
+         
+         if (is.available() == 0)
+         {
+            return "";
+         }
+         
          PowerPointExtractor ppe;
          try
          {
@@ -67,13 +73,14 @@
          }
          catch (IOException e)
          {
-            return "";
+            throw new DocumentReadException("Can't open presentation.", e);
          }
          return ppe.getText(true, true);
       }
       finally
       {
          if (is != null)
+         {
             try
             {
                is.close();
@@ -81,6 +88,7 @@
             catch (IOException e)
             {
             }
+         }
       }
    }
 

Modified: core/trunk/exo.core.component.document/src/main/resources/conf/portal/configuration.xml
===================================================================
--- core/trunk/exo.core.component.document/src/main/resources/conf/portal/configuration.xml	2010-08-31 09:16:47 UTC (rev 3027)
+++ core/trunk/exo.core.component.document/src/main/resources/conf/portal/configuration.xml	2010-08-31 11:43:20 UTC (rev 3028)
@@ -22,13 +22,6 @@
             <set-method>addDocumentReader</set-method>
             <type>org.exoplatform.services.document.impl.PDFDocumentReader</type>
             <description>to read the pdf inputstream</description>
-            <init-params>
-               <values-param>
-                  <name>document.known.types</name>
-                  <description>description</description>
-                  <value>application/pdf</value>
-               </values-param>
-            </init-params>
          </component-plugin>
 
          <component-plugin>
@@ -36,13 +29,6 @@
             <set-method>addDocumentReader</set-method>
             <type>org.exoplatform.services.document.impl.MSWordDocumentReader</type>
             <description>to read the ms word inputstream</description>
-            <init-params>
-               <values-param>
-                  <name>document.known.typesMSWord</name>
-                  <description>description</description>
-                  <value>application/msword</value>
-               </values-param>
-            </init-params>
          </component-plugin>
 
          <component-plugin>
@@ -50,13 +36,6 @@
             <set-method>addDocumentReader</set-method>
             <type>org.exoplatform.services.document.impl.MSXWordDocumentReader</type>
             <description>to read the ms word inputstream</description>
-            <init-params>
-               <values-param>
-                  <name>document.known.typesMSXWord</name>
-                  <description>description</description>
-                  <value>application/msword</value>
-               </values-param>
-            </init-params>
          </component-plugin>
 
          <component-plugin>
@@ -64,13 +43,6 @@
             <set-method>addDocumentReader</set-method>
             <type>org.exoplatform.services.document.impl.MSExcelDocumentReader</type>
             <description>to read the ms excel inputstream</description>
-            <init-params>
-               <values-param>
-                  <name>document.known.typesMSExcel</name>
-                  <description>description</description>
-                  <value>application/excel</value>
-               </values-param>
-            </init-params>
          </component-plugin>
 
          <component-plugin>
@@ -78,13 +50,6 @@
             <set-method>addDocumentReader</set-method>
             <type>org.exoplatform.services.document.impl.MSXExcelDocumentReader</type>
             <description>to read the ms excel inputstream</description>
-            <init-params>
-               <values-param>
-                  <name>document.known.typesMSXExcel</name>
-                  <description>description</description>
-                  <value>application/excel</value>
-               </values-param>
-            </init-params>
          </component-plugin>
 
          <component-plugin>
@@ -92,13 +57,6 @@
             <set-method>addDocumentReader</set-method>
             <type>org.exoplatform.services.document.impl.MSOutlookDocumentReader</type>
             <description>to read the ms outlook inputstream</description>
-            <init-params>
-               <values-param>
-                  <name>document.known.typesMSOutlook</name>
-                  <description>description</description>
-                  <value>application/vnd.ms-outlook</value>
-               </values-param>
-            </init-params>
          </component-plugin>
 
          <component-plugin>
@@ -106,13 +64,6 @@
             <set-method>addDocumentReader</set-method>
             <type>org.exoplatform.services.document.impl.PPTDocumentReader</type>
             <description>to read the ms ppt inputstream</description>
-            <init-params>
-               <values-param>
-                  <name>document.known.typesPPT</name>
-                  <description>description</description>
-                  <value>application/ppt</value>
-               </values-param>
-            </init-params>
          </component-plugin>
 
          <component-plugin>
@@ -120,13 +71,6 @@
             <set-method>addDocumentReader</set-method>
             <type>org.exoplatform.services.document.impl.MSXPPTDocumentReader</type>
             <description>to read the ms pptx inputstream</description>
-            <init-params>
-               <values-param>
-                  <name>document.known.typesPPTX</name>
-                  <description>description</description>
-                  <value>application/ppt</value>
-               </values-param>
-            </init-params>
          </component-plugin>
 
          <component-plugin>
@@ -134,13 +78,6 @@
             <set-method>addDocumentReader</set-method>
             <type>org.exoplatform.services.document.impl.HTMLDocumentReader</type>
             <description>to read the html inputstream</description>
-            <init-params>
-               <values-param>
-                  <name>document.known.typesHTML</name>
-                  <description>description</description>
-                  <value>text/html</value>
-               </values-param>
-            </init-params>
          </component-plugin>
 
          <component-plugin>
@@ -148,13 +85,6 @@
             <set-method>addDocumentReader</set-method>
             <type>org.exoplatform.services.document.impl.XMLDocumentReader</type>
             <description>to read the xml inputstream</description>
-            <init-params>
-               <values-param>
-                  <name>document.known.typesXML</name>
-                  <description>description</description>
-                  <value>text/xml</value>
-               </values-param>
-            </init-params>
          </component-plugin>
 
          <component-plugin>
@@ -163,11 +93,6 @@
             <type>org.exoplatform.services.document.impl.TextPlainDocumentReader</type>
             <description>to read the plain text inputstream</description>
             <init-params>
-               <values-param>
-                  <name>document.known.typesTextPlain</name>
-                  <description>description</description>
-                  <value>text/plain</value>
-               </values-param>
                <!--
                   values-param> <name>defaultEncoding</name> <description>description</description> <value>UTF-8</value>
                   </values-param

Modified: core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestDocumentReadersIncomVals.java
===================================================================
--- core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestDocumentReadersIncomVals.java	2010-08-31 09:16:47 UTC (rev 3027)
+++ core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestDocumentReadersIncomVals.java	2010-08-31 11:43:20 UTC (rev 3028)
@@ -53,7 +53,7 @@
    public TestDocumentReadersIncomVals()
    {
       serviceList = new ArrayList<DocumentReader>();
-      serviceList.add(new HTMLDocumentReader(null));
+      serviceList.add(new HTMLDocumentReader());
       serviceList.add(new MSExcelDocumentReader());
       serviceList.add(new MSXExcelDocumentReader());
       serviceList.add(new MSOutlookDocumentReader());

Modified: core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXWordDocumentReader.java
===================================================================
--- core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXWordDocumentReader.java	2010-08-31 09:16:47 UTC (rev 3027)
+++ core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXWordDocumentReader.java	2010-08-31 11:43:20 UTC (rev 3028)
@@ -46,15 +46,8 @@
          String text =
             service.getDocumentReader("application/vnd.openxmlformats-officedocument.wordprocessingml.document")
                .getContentAsText(is);
-         System.out.println("text [" + text + "]");
-
-         /*
-          * String etalon = "Hello.\n" +"This is the test document 12345\n"
-          * +"Table\n" +"Title One Two\n" +"Hello_Title Hello_One Hello_Two\n";
-          * System.out.println("etalon ["+etalon+"]");
-          * System.out.println("["+text.length()+"] ["+etalon.length()+"]");
-          * assertEquals("Wrong string returned",etalon ,text );
-          */
+         assertTrue(text
+            .contains("Before the test starts there is a directions section, which takes a few minutes to read"));
       }
       finally
       {

Modified: core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestOpenOfficeDocumentReader.java
===================================================================
--- core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestOpenOfficeDocumentReader.java	2010-08-31 09:16:47 UTC (rev 3027)
+++ core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestOpenOfficeDocumentReader.java	2010-08-31 11:43:20 UTC (rev 3028)
@@ -33,6 +33,7 @@
 {
    DocumentReaderService service;
 
+   @Override
    public void setUp() throws Exception
    {
       super.setUp();
@@ -45,41 +46,11 @@
       try
       {
          String text = service.getDocumentReader("application/vnd.oasis.opendocument.text").getContentAsText(is);
-         System.out.println("[" + text + "]");
-         /*
-          * String etalon = "Subscription:" +
-          * "\tEULA with add on warranties and non GPL viral effect (all Customer's development free of GPL license limitations). This agreement continues to be valid even if customers do not renew their subscription.\n"
-          * +
-          * "\tProduct documentation including user and admin guides to eXo platform portal, ECM, JCR, and Portlet Container. (currently we have all the docs accessible for free but it will not)\n"
-          * +
-          * "\tAccess to all Flash tutorial gives a visual guide to eXo platform and demonstrates a comprehensive tutorial that enhance the understanding of eXo products. (only part of Flash tutorials are accessible for free)\n"
-          * +
-          * "\tAdvanced Installer is an application that makes it quick and easy to install eXo platform products with simple clicks and allows better configuration to integrate existing database and directories (LDAP) in a multi platform environment.\n"
-          * +
-          * "\tUnlimited access to online premium forum (customer request tracking?) support, you will get answer from eXo platform company technical specialist employees. (TODO) \n"
-          * +
-          * "\tUnlimited email support within 48 hours response time for limited contact names per CPU.(i think we may join it with prev item using term customer request tracking with email notification)\n"
-          * +
-          * "\tUpdate alert will send out email periodically to subscribers all the latest change in eXo documentations, flash tutorials, and product services. (TODO)\n"
-          * +
-          * "\tTechnical code improvement alert will send out email periodically to subscribers to inform of all the latest code patches, latest version, latest code improvement download. (TODO)\n"
-          * +
-          * "\tKnowledge Base subscribers can access to all of eXo platform wiki knowledge bases. (TODO, i do not think we may put it into agreement yet)\n"
-          * +
-          * "\tFeature Request Priority As customers you have priorities to request the latest features improvement for any of eXo products next version. (what does it mean?)\n"
-          * + "\n" +
-          * "Subscription is annual and per CPU because it is the unit of load increase and so more support demand. Subscription advantages are:\n"
-          * + "\tEULA � it is obligatory to be Subscriber to get EULA\n" +
-          * "\tAdditional documentations (Guides, Flash tutorials)\n" +
-          * "\tAdditional software (Installer)\n" +
-          * "\tAdditional product/documentation changes notifications\n" +
-          * "\tProfessional support: limited contact name (3 customer contact with 1 eXo contact per CPU), 48 hours to answer via email\n"
-          * + "\tFeature request priority (also depending on CPU number)\n" + "\n";
-          * System.out.println("["+etalon+"]");
-          * System.out.println("TEXT size ["+text.
-          * length()+"]  ETALON LEN ["+etalon.length()+"]");
-          * assertEquals("Wrong string returned", etalon, text);
-          */
+
+         assertTrue(
+            "Wrong string returned",
+            text
+               .contains("Product documentation including user and admin guides to eXo platform portal, ECM, JCR, and Portlet Container. (currently we have all the docs accessible for free but it will not)"));
       }
       finally
       {

Modified: core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestPropertiesExtracting.java
===================================================================
--- core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestPropertiesExtracting.java	2010-08-31 09:16:47 UTC (rev 3027)
+++ core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestPropertiesExtracting.java	2010-08-31 11:43:20 UTC (rev 3028)
@@ -33,58 +33,26 @@
 {
    DocumentReaderService service;
 
+   @Override
    public void setUp() throws Exception
    {
       super.setUp();
       service = (DocumentReaderService)getComponentInstanceOfType(DocumentReaderService.class);
-      //      service = new DocumentReaderServiceImpl(null);
-      //      InitParams params = new InitParams();
-      //      service.addDocumentReader(new TextPlainDocumentReader(params));
-      //      service.addDocumentReader(new XMLDocumentReader());
-      //      service.addDocumentReader(new HTMLDocumentReader(null));
-      //      service.addDocumentReader(new MSExcelDocumentReader());
-      //      service.addDocumentReader(new MSOutlookDocumentReader());
-      //      service.addDocumentReader(new MSWordDocumentReader());
-      //      service.addDocumentReader(new MSXExcelDocumentReader());
-      //      service.addDocumentReader(new MSXPPTDocumentReader());
-      //      service.addDocumentReader(new MSXWordDocumentReader());
-      //      service.addDocumentReader(new OpenOfficeDocumentReader());
-      //      service.addDocumentReader(new PDFDocumentReader());
-      //      service.addDocumentReader(new PPTDocumentReader());
    }
 
-   public void testPDFDocumentReaderService() throws Exception
-   {
-      InputStream is = TestPropertiesExtracting.class.getResourceAsStream("/test.pdf");
-      try
-      {
-         DocumentReader rdr = service.getDocumentReader("application/pdf");
-         Properties props = rdr.getProperties(is);
-         printProps(props);
-      }
-      finally
-      {
-         is.close();
-      }
-   }
-
    public void testPDFDocumentReaderServiceXMPMetadata() throws Exception
    {
       InputStream is = TestPropertiesExtracting.class.getResourceAsStream("/MyTest.pdf");
       try
       {
          DocumentReader rdr = service.getDocumentReader("application/pdf");
-
          Properties testprops = rdr.getProperties(is);
-         printProps(testprops);
-
          Properties etalon = new Properties();
          etalon.put(DCMetaData.TITLE, "Test de convertion de fichier tif");
          etalon.put(DCMetaData.CREATOR, "Christian Klaus");
          etalon.put(DCMetaData.SUBJECT, "20080901 TEST Christian Etat OK");
-         Calendar c = ISO8601.parseEx("2008-09-01T08:01:10+00:00");;
+         Calendar c = ISO8601.parseEx("2008-09-01T08:01:10+00:00");
          etalon.put(DCMetaData.DATE, c);
-
          evalProps(etalon, testprops);
       }
       finally
@@ -99,7 +67,19 @@
       try
       {
          Properties props = service.getDocumentReader("application/msword").getProperties(is);
-         printProps(props);
+         Properties etalon = new Properties();
+         Calendar date = Calendar.getInstance();
+         date.setTimeInMillis(0);
+         date.set(2010, 7, 31, 12, 31, 0);
+
+         etalon.put(DCMetaData.TITLE, "test-Title");
+         etalon.put(DCMetaData.DATE, date.getTime());
+         etalon.put(DCMetaData.SUBJECT, "test-Subject");
+         etalon.put(DCMetaData.CREATOR, "Max Yakimenko");
+         etalon.put(DCMetaData.CONTRIBUTOR, "Max Yakimenko");
+         etalon.put(DCMetaData.DESCRIPTION, "test-Comments");
+
+         evalProps(etalon, props);
       }
       finally
       {
@@ -113,7 +93,17 @@
       try
       {
          Properties props = service.getDocumentReader("application/powerpoint").getProperties(is);
-         printProps(props);
+         Properties etalon = new Properties();
+         Calendar date = Calendar.getInstance();
+         date.setTimeInMillis(41);
+         date.set(2010, 7, 31, 12, 34, 15);
+         etalon.put(DCMetaData.TITLE, "test-Title");
+         etalon.put(DCMetaData.DATE, date.getTime());
+         etalon.put(DCMetaData.SUBJECT, "test-Subject");
+         etalon.put(DCMetaData.CREATOR, "Max Yakimenko");
+         etalon.put(DCMetaData.CONTRIBUTOR, "Max Yakimenko");
+         etalon.put(DCMetaData.DESCRIPTION, "test-Comments");
+         evalProps(etalon, props);
       }
       finally
       {
@@ -127,7 +117,19 @@
       try
       {
          Properties props = service.getDocumentReader("application/excel").getProperties(is);
-         printProps(props);
+         Properties etalon = new Properties();
+         Calendar date = Calendar.getInstance();
+         date.setTimeInMillis(0);
+         date.set(2010, 7, 31, 12, 34, 53);
+
+         etalon.put(DCMetaData.TITLE, "test-Title");
+         etalon.put(DCMetaData.DATE, date.getTime());
+         etalon.put(DCMetaData.SUBJECT, "test-Subject");
+         etalon.put(DCMetaData.CREATOR, "KHANH NGUYEN GIA");
+         etalon.put(DCMetaData.CONTRIBUTOR, "Max Yakimenko");
+         etalon.put(DCMetaData.DESCRIPTION, "test-Comments");
+
+         evalProps(etalon, props);
       }
       finally
       {
@@ -135,6 +137,90 @@
       }
    }
 
+   public void testXWordDocumentReaderService() throws Exception
+   {
+      InputStream is = TestPropertiesExtracting.class.getResourceAsStream("/test.docx");
+      try
+      {
+         Properties props =
+            service.getDocumentReader("application/vnd.openxmlformats-officedocument.wordprocessingml.document")
+               .getProperties(is);
+         Properties etalon = new Properties();
+         Calendar date = Calendar.getInstance();
+         date.setTimeInMillis(0);
+         date.set(2010, 7, 31, 7, 53, 0);
+
+         etalon.put(DCMetaData.TITLE, "test-Title");
+         etalon.put(DCMetaData.DATE, date.getTime());
+         etalon.put(DCMetaData.SUBJECT, "Subject");
+         etalon.put(DCMetaData.CREATOR, "nikolaz");
+         etalon.put(DCMetaData.CONTRIBUTOR, "Max Yakimenko");
+         etalon.put(DCMetaData.DESCRIPTION, "test-Comments");
+
+         evalProps(etalon, props);
+      }
+      finally
+      {
+         is.close();
+      }
+   }
+
+   public void testXPPTDocumentReaderService() throws Exception
+   {
+      InputStream is = TestPropertiesExtracting.class.getResourceAsStream("/test.pptx");
+      try
+      {
+         Properties props =
+            service.getDocumentReader("application/vnd.openxmlformats-officedocument.presentationml.presentation")
+               .getProperties(is);
+         Properties etalon = new Properties();
+         Calendar date = Calendar.getInstance();
+         date.setTimeInMillis(0);
+         date.set(2010, 7, 31, 7, 59, 37);
+
+         etalon.put(DCMetaData.TITLE, "test-Title");
+         etalon.put(DCMetaData.DATE, date.getTime());
+         etalon.put(DCMetaData.SUBJECT, "test-Subject");
+         etalon.put(DCMetaData.CREATOR, "Max Yakimenko");
+         etalon.put(DCMetaData.CONTRIBUTOR, "Max Yakimenko");
+         etalon.put(DCMetaData.DESCRIPTION, "test-Comments");
+
+         evalProps(etalon, props);
+      }
+      finally
+      {
+         is.close();
+      }
+   }
+
+   public void testXExcelDocumentReaderService() throws Exception
+   {
+      InputStream is = TestPropertiesExtracting.class.getResourceAsStream("/test.xlsx");
+      try
+      {
+         Properties props =
+            service.getDocumentReader("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
+               .getProperties(is);
+         Properties etalon = new Properties();
+         Calendar date = Calendar.getInstance();
+         date.setTimeInMillis(0);
+         date.set(2010, 7, 31, 8, 7, 25);
+
+         etalon.put(DCMetaData.TITLE, "test-Title");
+         etalon.put(DCMetaData.DATE, date.getTime());
+         etalon.put(DCMetaData.SUBJECT, "test-Subject");
+         etalon.put(DCMetaData.CREATOR, "KHANH NGUYEN GIA");
+         etalon.put(DCMetaData.CONTRIBUTOR, "Max Yakimenko");
+         etalon.put(DCMetaData.DESCRIPTION, "test-Comments");
+
+         evalProps(etalon, props);
+      }
+      finally
+      {
+         is.close();
+      }
+   }
+
    public void testOODocumentReaderService() throws Exception
    {
       InputStream is = TestPropertiesExtracting.class.getResourceAsStream("/test.odt");
@@ -142,6 +228,19 @@
       {
          Properties props = service.getDocumentReader("application/vnd.oasis.opendocument.text").getProperties(is);
          printProps(props);
+         Properties etalon = new Properties();
+         Calendar date = Calendar.getInstance();
+         date.setTimeInMillis(0);
+         date.set(2010, 7, 31, 14, 13, 23);
+
+         etalon.put(DCMetaData.TITLE, "test-Title");
+         etalon.put(DCMetaData.LANGUAGE, "ru-RU");
+         etalon.put(DCMetaData.DATE, "2010-08-31T14:13:23");
+         etalon.put(DCMetaData.SUBJECT, "test-Subject");
+         etalon.put(DCMetaData.CREATOR, "nikolaz ");
+         etalon.put(DCMetaData.DESCRIPTION, "test-Comments");
+
+         evalProps(etalon, props);
       }
       finally
       {
@@ -149,16 +248,16 @@
       }
    }
 
-   private void printProps(Properties props)
-   {
-      Iterator it = props.entrySet().iterator();
-      props.toString();
-      while (it.hasNext())
+      private void printProps(Properties props)
       {
-         Map.Entry entry = (Map.Entry)it.next();
-         System.out.println(" " + entry.getKey() + " -> [" + entry.getValue() + "]");
+         Iterator it = props.entrySet().iterator();
+         props.toString();
+         while (it.hasNext())
+         {
+            Map.Entry entry = (Map.Entry)it.next();
+            System.out.println(" " + entry.getKey() + " -> [" + entry.getValue() + "]");
+         }
       }
-   }
 
    private void evalProps(Properties etalon, Properties testedProps)
    {

Modified: core/trunk/exo.core.component.document/src/test/resources/test.doc
===================================================================
(Binary files differ)

Modified: core/trunk/exo.core.component.document/src/test/resources/test.ppt
===================================================================
(Binary files differ)

Modified: core/trunk/exo.core.component.document/src/test/resources/test.xls
===================================================================
(Binary files differ)



More information about the exo-jcr-commits mailing list