[exo-jcr-commits] exo-jcr SVN: r3688 - in core/branches/2.3.x/exo.core.component.document: src/main/java/org/exoplatform/services/document/impl and 2 other directories.

do-not-reply at jboss.org do-not-reply at jboss.org
Mon Dec 20 10:57:29 EST 2010


Author: dkuleshov
Date: 2010-12-20 10:57:29 -0500 (Mon, 20 Dec 2010)
New Revision: 3688

Added:
   core/branches/2.3.x/exo.core.component.document/src/test/resources/testEXCEL.xls
   core/branches/2.3.x/exo.core.component.document/src/test/resources/testEXCEL.xlsb
   core/branches/2.3.x/exo.core.component.document/src/test/resources/testPPT.potm
   core/branches/2.3.x/exo.core.component.document/src/test/resources/testPPT.ppsm
   core/branches/2.3.x/exo.core.component.document/src/test/resources/testPPT.ppsx
   core/branches/2.3.x/exo.core.component.document/src/test/resources/testPPT.pptm
   core/branches/2.3.x/exo.core.component.document/src/test/resources/testRTF.rtf
   core/branches/2.3.x/exo.core.component.document/src/test/resources/testWORD.docm
   core/branches/2.3.x/exo.core.component.document/src/test/resources/testWORD.dotm
   core/branches/2.3.x/exo.core.component.document/src/test/resources/testWORD.dotx
   core/branches/2.3.x/exo.core.component.document/src/test/resources/testXHTML.html
Modified:
   core/branches/2.3.x/exo.core.component.document/pom.xml
   core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/HTMLDocumentReader.java
   core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSExcelDocumentReader.java
   core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXExcelDocumentReader.java
   core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXPPTDocumentReader.java
   core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXWordDocumentReader.java
   core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/PPTDocumentReader.java
   core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/TextPlainDocumentReader.java
   core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestHtmlDocumentReader.java
   core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSExcelDocumentReader.java
   core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXPPTDocumentReader.java
   core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXWordDocumentReader.java
   core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestTextPlainDocumentReader.java
Log:
COR-218: provided support for more MIME types

Modified: core/branches/2.3.x/exo.core.component.document/pom.xml
===================================================================
--- core/branches/2.3.x/exo.core.component.document/pom.xml	2010-12-20 14:32:26 UTC (rev 3687)
+++ core/branches/2.3.x/exo.core.component.document/pom.xml	2010-12-20 15:57:29 UTC (rev 3688)
@@ -1,122 +1,132 @@
-
-   <!--
-
-      Copyright (C) 2009 eXo Platform SAS. This is free software; you can redistribute it and/or modify it under the
-      terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of
-      the License, or (at your option) any later version. This software is distributed in the hope that it will be
-      useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
-      PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU
-      Lesser General Public License along with this software; if not, write to the Free Software Foundation, Inc., 51
-      Franklin St, Fifth Floor, Boston, MA 02110-1301 USA, or see the FSF site: http://www.fsf.org.
-   -->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
-   <modelVersion>4.0.0</modelVersion>
-   <parent>
-      <groupId>org.exoplatform.core</groupId>
-      <artifactId>core-parent</artifactId>
-      <version>2.3.7-GA-SNAPSHOT</version>
-   </parent>
-   <artifactId>exo.core.component.document</artifactId>
-   <name>eXo Core :: Component :: Demo Service</name>
-   <description>eXo demo service Impl</description>
-   <dependencies>
-      <dependency>
-         <groupId>org.exoplatform.tool</groupId>
-         <artifactId>exo.tool.framework.junit</artifactId>
-      </dependency>
-      <dependency>
-         <groupId>org.exoplatform.kernel</groupId>
-         <artifactId>exo.kernel.commons</artifactId>
-      </dependency>
-      <dependency>
-         <groupId>org.exoplatform.kernel</groupId>
-         <artifactId>exo.kernel.container</artifactId>
-      </dependency>
-      <dependency>
-         <groupId>org.slf4j</groupId>
-         <artifactId>slf4j-log4j12</artifactId>
-      </dependency>
-      <dependency>
-         <groupId>org.apache.pdfbox</groupId>
-         <artifactId>pdfbox</artifactId>
-      </dependency>
-      <dependency>
-         <groupId>com.lowagie</groupId>
-         <artifactId>itext</artifactId>
-      </dependency>
-      <dependency>
-         <groupId>org.htmlparser</groupId>
-         <artifactId>htmlparser</artifactId>
-      </dependency>
-      <dependency>
-         <groupId>org.apache.poi</groupId>
-         <artifactId>poi</artifactId>
-         <exclusions>
-            <exclusion>
-               <groupId>log4j</groupId>
-               <artifactId>log4j</artifactId>
-            </exclusion>
-         </exclusions>
-      </dependency>
-      <dependency>
-         <groupId>org.apache.poi</groupId>
-         <artifactId>poi-scratchpad</artifactId>
-         <exclusions>
-            <exclusion>
-               <groupId>log4j</groupId>
-               <artifactId>log4j</artifactId>
-            </exclusion>
-         </exclusions>
-      </dependency>
-
-      <dependency>
-         <groupId>org.apache.poi</groupId>
-         <artifactId>poi-ooxml</artifactId>
-         <exclusions>
-            <exclusion>
-               <groupId>log4j</groupId>
-               <artifactId>log4j</artifactId>
-            </exclusion>
-         </exclusions>
-      </dependency>
-
-   </dependencies>
-   <build>
-      <testResources>
-         <testResource>
-            <directory>src/test/resources</directory>
-            <includes>
-               <include>**/*.properties</include>
-               <include>**/*.xml</include>
-               <include>**/*.drl</include>
-               <include>**/*.vm</include>
-               <include>**/*.doc</include>
-               <include>**/*.docx</include>
-               <include>**/*.dot</include>
-               <include>**/*.xls</include>
-               <include>**/*.xlsx</include>
-               <include>**/*.ppt</include>
-               <include>**/*.pptx</include>
-               <include>**/*.txt</include>
-               <include>**/*.tiff</include>
-               <include>**/*.pdf</include>
-               <include>**/*.odt</include>
-               <include>**/*.html</include>
-               <include>**/*.msg</include>
-               <include>**/*.pst</include>
-            </includes>
-         </testResource>
-      </testResources>
-      <plugins>
-         <plugin>
-            <groupId>org.apache.maven.plugins</groupId>
-            <artifactId>maven-surefire-plugin</artifactId>
-            <configuration>
-               <includes>
-                  <include>org/exoplatform/services/document/**/Test*.java</include>
-               </includes>
-            </configuration>
-         </plugin>
-      </plugins>
-   </build>
-</project>
+
+   <!--
+
+      Copyright (C) 2009 eXo Platform SAS. This is free software; you can redistribute it and/or modify it under the
+      terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of
+      the License, or (at your option) any later version. This software is distributed in the hope that it will be
+      useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+      PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU
+      Lesser General Public License along with this software; if not, write to the Free Software Foundation, Inc., 51
+      Franklin St, Fifth Floor, Boston, MA 02110-1301 USA, or see the FSF site: http://www.fsf.org.
+   -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+   <modelVersion>4.0.0</modelVersion>
+   <parent>
+      <groupId>org.exoplatform.core</groupId>
+      <artifactId>core-parent</artifactId>
+      <version>2.3.7-GA-SNAPSHOT</version>
+   </parent>
+   <artifactId>exo.core.component.document</artifactId>
+   <name>eXo Core :: Component :: Demo Service</name>
+   <description>eXo demo service Impl</description>
+   <dependencies>
+      <dependency>
+         <groupId>org.exoplatform.tool</groupId>
+         <artifactId>exo.tool.framework.junit</artifactId>
+      </dependency>
+      <dependency>
+         <groupId>org.exoplatform.kernel</groupId>
+         <artifactId>exo.kernel.commons</artifactId>
+      </dependency>
+      <dependency>
+         <groupId>org.exoplatform.kernel</groupId>
+         <artifactId>exo.kernel.container</artifactId>
+      </dependency>
+      <dependency>
+         <groupId>org.slf4j</groupId>
+         <artifactId>slf4j-log4j12</artifactId>
+      </dependency>
+      <dependency>
+         <groupId>org.apache.pdfbox</groupId>
+         <artifactId>pdfbox</artifactId>
+      </dependency>
+      <dependency>
+         <groupId>com.lowagie</groupId>
+         <artifactId>itext</artifactId>
+      </dependency>
+      <dependency>
+         <groupId>org.htmlparser</groupId>
+         <artifactId>htmlparser</artifactId>
+      </dependency>
+      <dependency>
+         <groupId>org.apache.poi</groupId>
+         <artifactId>poi</artifactId>
+         <exclusions>
+            <exclusion>
+               <groupId>log4j</groupId>
+               <artifactId>log4j</artifactId>
+            </exclusion>
+         </exclusions>
+      </dependency>
+      <dependency>
+         <groupId>org.apache.poi</groupId>
+         <artifactId>poi-scratchpad</artifactId>
+         <exclusions>
+            <exclusion>
+               <groupId>log4j</groupId>
+               <artifactId>log4j</artifactId>
+            </exclusion>
+         </exclusions>
+      </dependency>
+
+      <dependency>
+         <groupId>org.apache.poi</groupId>
+         <artifactId>poi-ooxml</artifactId>
+         <exclusions>
+            <exclusion>
+               <groupId>log4j</groupId>
+               <artifactId>log4j</artifactId>
+            </exclusion>
+         </exclusions>
+      </dependency>
+
+   </dependencies>
+   <build>
+      <testResources>
+         <testResource>
+            <directory>src/test/resources</directory>
+            <includes>
+               <include>**/*.properties</include>
+               <include>**/*.rtf</include>
+               <include>**/*.xml</include>
+               <include>**/*.drl</include>
+               <include>**/*.vm</include>
+               <include>**/*.doc</include>
+               <include>**/*.docx</include>
+               <include>**/*.dotx</include>
+               <include>**/*.docm</include>
+               <include>**/*.dotm</include>
+               <include>**/*.dot</include>
+               <include>**/*.xls</include>
+               <include>**/*.xlsx</include>
+               <include>**/*.xlsb</include>
+               <include>**/*.xltx</include>
+               <include>**/*.ppt</include>
+               <include>**/*.pptm</include>
+               <include>**/*.pptx</include>
+               <include>**/*.ppsx</include>
+               <include>**/*.ppsm</include>
+               <include>**/*.potm</include>
+               <include>**/*.txt</include>
+               <include>**/*.tiff</include>
+               <include>**/*.pdf</include>
+               <include>**/*.odt</include>
+               <include>**/*.html</include>
+               <include>**/*.msg</include>
+               <include>**/*.pst</include>
+            </includes>
+         </testResource>
+      </testResources>
+      <plugins>
+         <plugin>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-surefire-plugin</artifactId>
+            <configuration>
+               <includes>
+                  <include>org/exoplatform/services/document/**/Test*.java</include>
+               </includes>
+            </configuration>
+         </plugin>
+      </plugins>
+   </build>
+</project>

Modified: core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/HTMLDocumentReader.java
===================================================================
--- core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/HTMLDocumentReader.java	2010-12-20 14:32:26 UTC (rev 3687)
+++ core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/HTMLDocumentReader.java	2010-12-20 15:57:29 UTC (rev 3688)
@@ -53,7 +53,7 @@
    public HTMLDocumentReader()
    {
    }
-   
+
    /**
     * Get the text/html,application/x-groovy+html mime type.
     * 
@@ -61,7 +61,7 @@
     */
    public String[] getMimeTypes()
    {
-      return new String[]{"text/html", "application/x-groovy+html"};
+      return new String[]{"text/html", "application/x-groovy+html", "application/xhtml+xml"};
    }
 
    /**

Modified: core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSExcelDocumentReader.java
===================================================================
--- core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSExcelDocumentReader.java	2010-12-20 14:32:26 UTC (rev 3687)
+++ core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSExcelDocumentReader.java	2010-12-20 15:57:29 UTC (rev 3688)
@@ -43,7 +43,7 @@
 {
 
    private static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss.SSSZ";
-   
+
    /**
     * Get the application/excel mime type.
     * 
@@ -51,7 +51,7 @@
     */
    public String[] getMimeTypes()
    {
-      return new String[]{"application/excel", "application/xls"};
+      return new String[]{"application/excel", "application/xls", "application/vnd.ms-excel"};
    }
 
    /**
@@ -68,7 +68,7 @@
       }
 
       StringBuilder builder = new StringBuilder("");
-      
+
       SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT);
 
       try
@@ -77,7 +77,7 @@
          {
             return "";
          }
-         
+
          HSSFWorkbook wb;
          try
          {
@@ -116,7 +116,7 @@
                                  }
                                  else
                                  {
-                                   builder.append(d).append(" ");
+                                    builder.append(d).append(" ");
                                  }
                                  break;
                               }

Modified: core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXExcelDocumentReader.java
===================================================================
--- core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXExcelDocumentReader.java	2010-12-20 14:32:26 UTC (rev 3687)
+++ core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXExcelDocumentReader.java	2010-12-20 15:57:29 UTC (rev 3688)
@@ -51,7 +51,17 @@
     */
    public String[] getMimeTypes()
    {
+      //Supported mimetypes:
+      // "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" - "x.xlsx"
+      //
+      //Unsupported mimetypes:
+      // "application/vnd.ms-excel.sheet.binary.macroenabled.12" - "*.xlsb"; There is exceptions at parsing
+      // "application/vnd.openxmlformats-officedocument.spreadsheetml.template" - "x.xltx"; Not tested
+      // "application/vnd.ms-excel.sheet.macroenabled.12" - "x.xlsm"; Not tested
+      // "application/vnd.ms-excel.template.macroenabled.12" - "x.xltm"; Not tested
+      // "application/vnd.ms-excel.addin.macroenabled.12" - "x.xlam"; Not tested
       return new String[]{"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"};
+
    }
 
    /**

Modified: core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXPPTDocumentReader.java
===================================================================
--- core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXPPTDocumentReader.java	2010-12-20 14:32:26 UTC (rev 3687)
+++ core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXPPTDocumentReader.java	2010-12-20 15:57:29 UTC (rev 3688)
@@ -47,7 +47,21 @@
     */
    public String[] getMimeTypes()
    {
-      return new String[]{"application/vnd.openxmlformats-officedocument.presentationml.presentation"};
+      //Supported mimetypes:
+      // "application/vnd.openxmlformats-officedocument.presentationml.presentation" -"x.pptx";
+      // "application/vnd.openxmlformats-officedocument.presentationml.slideshow" - "x.ppsx";
+      // "application/vnd.ms-powerpoint.presentation.macroenabled.12" - "testPPT.pptm";
+      // "application/vnd.ms-powerpoint.slideshow.macroenabled.12" - "testPPT.ppsm";
+      //
+      //Not supported mimetypes:
+      // "application/vnd.ms-powerpoint.template.macroenabled.12" - "testPPT.potm"; Has errors
+      // "application/vnd.openxmlformats-officedocument.presentationml.template" - "x.potx"; Not tested
+      // "application/vnd.ms-powerpoint.addin.macroenabled.12" - "x.ppam"; Not tested
+
+      return new String[]{"application/vnd.openxmlformats-officedocument.presentationml.presentation",
+         "application/vnd.openxmlformats-officedocument.presentationml.slideshow",
+         "application/vnd.ms-powerpoint.presentation.macroenabled.12",
+         "application/vnd.ms-powerpoint.slideshow.macroenabled.12"};
    }
 
    /**
@@ -68,7 +82,7 @@
          {
             return "";
          }
-         
+
          XSLFPowerPointExtractor ppe;
          try
          {

Modified: core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXWordDocumentReader.java
===================================================================
--- core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXWordDocumentReader.java	2010-12-20 14:32:26 UTC (rev 3687)
+++ core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXWordDocumentReader.java	2010-12-20 15:57:29 UTC (rev 3688)
@@ -44,7 +44,15 @@
     */
    public String[] getMimeTypes()
    {
-      return new String[]{"application/vnd.openxmlformats-officedocument.wordprocessingml.document"};
+      //Supported document types:
+      // "application/vnd.openxmlformats-officedocument.wordprocessingml.document" - "x.docx"
+      // "application/vnd.openxmlformats-officedocument.wordprocessingml.template" - "x.dotx"
+      // "application/vnd.ms-word.document.macroenabled.12" - "x.docm"
+      // "application/vnd.ms-word.template.macroenabled.12" - "x.dotm"
+
+      return new String[]{"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+         "application/vnd.openxmlformats-officedocument.wordprocessingml.template",
+         "application/vnd.ms-word.document.macroenabled.12", "application/vnd.ms-word.template.macroenabled.12"};
    }
 
    /**
@@ -66,7 +74,7 @@
          {
             return "";
          }
-         
+
          XWPFDocument doc;
          try
          {

Modified: core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/PPTDocumentReader.java
===================================================================
--- core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/PPTDocumentReader.java	2010-12-20 14:32:26 UTC (rev 3687)
+++ core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/PPTDocumentReader.java	2010-12-20 15:57:29 UTC (rev 3688)
@@ -43,7 +43,7 @@
     */
    public String[] getMimeTypes()
    {
-      return new String[]{"application/powerpoint", "application/ppt"};
+      return new String[]{"application/powerpoint", "application/ppt", "application/vnd.ms-powerpoint"};
    }
 
    /**
@@ -60,12 +60,12 @@
       }
       try
       {
-         
+
          if (is.available() == 0)
          {
             return "";
          }
-         
+
          PowerPointExtractor ppe;
          try
          {

Modified: core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/TextPlainDocumentReader.java
===================================================================
--- core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/TextPlainDocumentReader.java	2010-12-20 14:32:26 UTC (rev 3687)
+++ core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/TextPlainDocumentReader.java	2010-12-20 15:57:29 UTC (rev 3688)
@@ -67,10 +67,9 @@
     */
    public String[] getMimeTypes()
    {
-      return new String[]{"text/plain","script/groovy",
-                          "application/x-groovy","application/x-javascript",
-                          "application/javascript","text/javascript",
-                          "application/x-jaxrs+groovy"};
+      return new String[]{"text/plain", "script/groovy", "application/x-groovy", "application/x-javascript",
+         "application/javascript", "text/javascript", "application/x-jaxrs+groovy"};
+      // "text/rtf", "application/rtf" excluded since there must be RTF parser - because plain text contains a lot formatting tags.
    }
 
    /**

Modified: core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestHtmlDocumentReader.java
===================================================================
--- core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestHtmlDocumentReader.java	2010-12-20 14:32:26 UTC (rev 3687)
+++ core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestHtmlDocumentReader.java	2010-12-20 15:57:29 UTC (rev 3688)
@@ -56,4 +56,20 @@
          is.close();
       }
    }
+
+   public void testXHTMLGetContentAsString() throws Exception
+   {
+      InputStream is = TestHtmlDocumentReader.class.getResourceAsStream("/testXHTML.html");
+      try
+      {
+         DocumentReader dr = service.getDocumentReader("application/xhtml+xml");
+         String text = dr.getContentAsText(is);
+         assertTrue(text
+            .contains("This document tests the ability of Apache Tika to extract content from an XHTML document."));
+      }
+      finally
+      {
+         is.close();
+      }
+   }
 }

Modified: core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSExcelDocumentReader.java
===================================================================
--- core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSExcelDocumentReader.java	2010-12-20 14:32:26 UTC (rev 3687)
+++ core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSExcelDocumentReader.java	2010-12-20 15:57:29 UTC (rev 3688)
@@ -34,9 +34,9 @@
 public class TestMSExcelDocumentReader extends BaseStandaloneTest
 {
    private static final SimpleDateFormat DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSSZ");
-   
+
    DocumentReaderService service;
-   
+
    @Override
    public void setUp() throws Exception
    {
@@ -108,7 +108,24 @@
       {
          is.close();
       }
+   }
 
+   public void testClassicExcelGetContentAsString() throws Exception
+   {
+      InputStream is = TestMSXExcelDocumentReader.class.getResourceAsStream("/testEXCEL.xls");
+      try
+      {
+         String content = service.getDocumentReader("application/vnd.ms-excel").getContentAsText(is);
+         assertTrue(content.contains("Sample Excel Worksheet"));
+         assertTrue(content.contains("Numbers and their Squares"));
+         assertTrue(content.contains("Number"));
+         assertTrue(content.contains("9"));
+         assertFalse(content.contains("9.0"));
+      }
+      finally
+      {
+         is.close();
+      }
    }
 
    public String getDate(int year, int month, int day)

Modified: core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXPPTDocumentReader.java
===================================================================
--- core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXPPTDocumentReader.java	2010-12-20 14:32:26 UTC (rev 3687)
+++ core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXPPTDocumentReader.java	2010-12-20 15:57:29 UTC (rev 3688)
@@ -59,4 +59,72 @@
          is.close();
       }
    }
+
+   public void testPPSXGetContentAsString() throws Exception
+   {
+      InputStream is = TestMSXPPTDocumentReader.class.getResourceAsStream("/testPPT.ppsx");
+      try
+      {
+         String content =
+            service.getDocumentReader("application/vnd.openxmlformats-officedocument.presentationml.slideshow")
+               .getContentAsText(is);
+         assertTrue(content
+            .contains("This is a test file data with the same content as every other file being tested for"));
+         assertTrue(content.contains("Different words to test against"));
+         assertTrue(content.contains("Quest"));
+         assertTrue(content.contains("Hello"));
+         assertTrue(content.contains("Watershed"));
+         assertTrue(content.contains("Avalanche"));
+         assertTrue(content.contains("Black Panther"));
+      }
+      finally
+      {
+         is.close();
+      }
+   }
+
+   public void testPPTMGetContentAsString() throws Exception
+   {
+      InputStream is = TestMSXPPTDocumentReader.class.getResourceAsStream("/testPPT.pptm");
+      try
+      {
+         String content =
+            service.getDocumentReader("application/vnd.ms-powerpoint.presentation.macroenabled.12")
+               .getContentAsText(is);
+         assertTrue(content
+            .contains("This is a test file data with the same content as every other file being tested for"));
+         assertTrue(content.contains("Different words to test against"));
+         assertTrue(content.contains("Quest"));
+         assertTrue(content.contains("Hello"));
+         assertTrue(content.contains("Watershed"));
+         assertTrue(content.contains("Avalanche"));
+         assertTrue(content.contains("Black Panther"));
+      }
+      finally
+      {
+         is.close();
+      }
+   }
+
+   public void testPPSMGetContentAsString() throws Exception
+   {
+      InputStream is = TestMSXPPTDocumentReader.class.getResourceAsStream("/testPPT.ppsm");
+      try
+      {
+         String content =
+            service.getDocumentReader("application/vnd.ms-powerpoint.slideshow.macroenabled.12").getContentAsText(is);
+         assertTrue(content
+            .contains("This is a test file data with the same content as every other file being tested for"));
+         assertTrue(content.contains("Different words to test against"));
+         assertTrue(content.contains("Quest"));
+         assertTrue(content.contains("Hello"));
+         assertTrue(content.contains("Watershed"));
+         assertTrue(content.contains("Avalanche"));
+         assertTrue(content.contains("Black Panther"));
+      }
+      finally
+      {
+         is.close();
+      }
+   }
 }

Modified: core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXWordDocumentReader.java
===================================================================
--- core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXWordDocumentReader.java	2010-12-20 14:32:26 UTC (rev 3687)
+++ core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXWordDocumentReader.java	2010-12-20 15:57:29 UTC (rev 3688)
@@ -38,7 +38,7 @@
       service = (DocumentReaderService)getComponentInstanceOfType(DocumentReaderService.class);
    }
 
-   public void testGetContentAsStringDoc() throws Exception
+   public void testDOCXGetContentAsStringDoc() throws Exception
    {
       InputStream is = TestMSXWordDocumentReader.class.getResourceAsStream("/test.docx");
       try
@@ -54,4 +54,50 @@
          is.close();
       }
    }
+
+   public void testDOTXGetContentAsStringDoc() throws Exception
+   {
+      InputStream is = TestMSXWordDocumentReader.class.getResourceAsStream("/testWORD.dotx");
+      try
+      {
+         String text =
+            service.getDocumentReader("application/vnd.openxmlformats-officedocument.wordprocessingml.template")
+               .getContentAsText(is);
+         assertTrue(text.contains("template"));
+      }
+      finally
+      {
+         is.close();
+      }
+   }
+
+   public void testDOCMGetContentAsStringDoc() throws Exception
+   {
+      InputStream is = TestMSXWordDocumentReader.class.getResourceAsStream("/testWORD.docm");
+      try
+      {
+         String text =
+            service.getDocumentReader("application/vnd.ms-word.document.macroenabled.12").getContentAsText(is);
+         assertTrue(text.contains("template"));
+      }
+      finally
+      {
+         is.close();
+      }
+   }
+
+   public void testDOTMGetContentAsStringDoc() throws Exception
+   {
+      InputStream is = TestMSXWordDocumentReader.class.getResourceAsStream("/testWORD.dotm");
+      try
+      {
+         String text =
+            service.getDocumentReader("application/vnd.ms-word.template.macroenabled.12").getContentAsText(is);
+         assertTrue(text.contains("Template with macros"));
+      }
+      finally
+      {
+         is.close();
+      }
+   }
 }

Modified: core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestTextPlainDocumentReader.java
===================================================================
--- core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestTextPlainDocumentReader.java	2010-12-20 14:32:26 UTC (rev 3687)
+++ core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestTextPlainDocumentReader.java	2010-12-20 15:57:29 UTC (rev 3688)
@@ -68,5 +68,4 @@
          is.close();
       }
    }
-
 }

Added: core/branches/2.3.x/exo.core.component.document/src/test/resources/testEXCEL.xls
===================================================================
(Binary files differ)


Property changes on: core/branches/2.3.x/exo.core.component.document/src/test/resources/testEXCEL.xls
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: core/branches/2.3.x/exo.core.component.document/src/test/resources/testEXCEL.xlsb
===================================================================
(Binary files differ)


Property changes on: core/branches/2.3.x/exo.core.component.document/src/test/resources/testEXCEL.xlsb
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: core/branches/2.3.x/exo.core.component.document/src/test/resources/testPPT.potm
===================================================================
(Binary files differ)


Property changes on: core/branches/2.3.x/exo.core.component.document/src/test/resources/testPPT.potm
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: core/branches/2.3.x/exo.core.component.document/src/test/resources/testPPT.ppsm
===================================================================
(Binary files differ)


Property changes on: core/branches/2.3.x/exo.core.component.document/src/test/resources/testPPT.ppsm
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: core/branches/2.3.x/exo.core.component.document/src/test/resources/testPPT.ppsx
===================================================================
(Binary files differ)


Property changes on: core/branches/2.3.x/exo.core.component.document/src/test/resources/testPPT.ppsx
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: core/branches/2.3.x/exo.core.component.document/src/test/resources/testPPT.pptm
===================================================================
(Binary files differ)


Property changes on: core/branches/2.3.x/exo.core.component.document/src/test/resources/testPPT.pptm
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: core/branches/2.3.x/exo.core.component.document/src/test/resources/testRTF.rtf
===================================================================
(Binary files differ)


Property changes on: core/branches/2.3.x/exo.core.component.document/src/test/resources/testRTF.rtf
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: core/branches/2.3.x/exo.core.component.document/src/test/resources/testWORD.docm
===================================================================
(Binary files differ)


Property changes on: core/branches/2.3.x/exo.core.component.document/src/test/resources/testWORD.docm
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: core/branches/2.3.x/exo.core.component.document/src/test/resources/testWORD.dotm
===================================================================
(Binary files differ)


Property changes on: core/branches/2.3.x/exo.core.component.document/src/test/resources/testWORD.dotm
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: core/branches/2.3.x/exo.core.component.document/src/test/resources/testWORD.dotx
===================================================================
(Binary files differ)


Property changes on: core/branches/2.3.x/exo.core.component.document/src/test/resources/testWORD.dotx
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: core/branches/2.3.x/exo.core.component.document/src/test/resources/testXHTML.html
===================================================================
--- core/branches/2.3.x/exo.core.component.document/src/test/resources/testXHTML.html	                        (rev 0)
+++ core/branches/2.3.x/exo.core.component.document/src/test/resources/testXHTML.html	2010-12-20 15:57:29 UTC (rev 3688)
@@ -0,0 +1,493 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <title>XHTML test document</title>
+    <meta name="Author" content="Tika Developers"/>
+    <meta http-equiv="refresh" content="5"/>
+  </head>
+  <body>
+    <p>
+      This document tests the ability of Apache Tika to extract content
+      from an <a href="http://www.w3.org/TR/xhtml1/">XHTML document</a>.
+    </p>
+  </body>
+</html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <title>XHTML test document</title>
+    <meta name="Author" content="Tika Developers"/>
+    <meta http-equiv="refresh" content="5"/>
+  </head>
+  <body>
+    <p>
+      This document tests the ability of Apache Tika to extract content
+      from an <a href="http://www.w3.org/TR/xhtml1/">XHTML document</a>.
+    </p>
+  </body>
+</html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <title>XHTML test document</title>
+    <meta name="Author" content="Tika Developers"/>
+    <meta http-equiv="refresh" content="5"/>
+  </head>
+  <body>
+    <p>
+      This document tests the ability of Apache Tika to extract content
+      from an <a href="http://www.w3.org/TR/xhtml1/">XHTML document</a>.
+    </p>
+  </body>
+</html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <title>XHTML test document</title>
+    <meta name="Author" content="Tika Developers"/>
+    <meta http-equiv="refresh" content="5"/>
+  </head>
+  <body>
+    <p>
+      This document tests the ability of Apache Tika to extract content
+      from an <a href="http://www.w3.org/TR/xhtml1/">XHTML document</a>.
+    </p>
+  </body>
+</html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <title>XHTML test document</title>
+    <meta name="Author" content="Tika Developers"/>
+    <meta http-equiv="refresh" content="5"/>
+  </head>
+  <body>
+    <p>
+      This document tests the ability of Apache Tika to extract content
+      from an <a href="http://www.w3.org/TR/xhtml1/">XHTML document</a>.
+    </p>
+  </body>
+</html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <title>XHTML test document</title>
+    <meta name="Author" content="Tika Developers"/>
+    <meta http-equiv="refresh" content="5"/>
+  </head>
+  <body>
+    <p>
+      This document tests the ability of Apache Tika to extract content
+      from an <a href="http://www.w3.org/TR/xhtml1/">XHTML document</a>.
+    </p>
+  </body>
+</html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <title>XHTML test document</title>
+    <meta name="Author" content="Tika Developers"/>
+    <meta http-equiv="refresh" content="5"/>
+  </head>
+  <body>
+    <p>
+      This document tests the ability of Apache Tika to extract content
+      from an <a href="http://www.w3.org/TR/xhtml1/">XHTML document</a>.
+    </p>
+  </body>
+</html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <title>XHTML test document</title>
+    <meta name="Author" content="Tika Developers"/>
+    <meta http-equiv="refresh" content="5"/>
+  </head>
+  <body>
+    <p>
+      This document tests the ability of Apache Tika to extract content
+      from an <a href="http://www.w3.org/TR/xhtml1/">XHTML document</a>.
+    </p>
+  </body>
+</html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <title>XHTML test document</title>
+    <meta name="Author" content="Tika Developers"/>
+    <meta http-equiv="refresh" content="5"/>
+  </head>
+  <body>
+    <p>
+      This document tests the ability of Apache Tika to extract content
+      from an <a href="http://www.w3.org/TR/xhtml1/">XHTML document</a>.
+    </p>
+  </body>
+</html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <title>XHTML test document</title>
+    <meta name="Author" content="Tika Developers"/>
+    <meta http-equiv="refresh" content="5"/>
+  </head>
+  <body>
+    <p>
+      This document tests the ability of Apache Tika to extract content
+      from an <a href="http://www.w3.org/TR/xhtml1/">XHTML document</a>.
+    </p>
+  </body>
+</html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <title>XHTML test document</title>
+    <meta name="Author" content="Tika Developers"/>
+    <meta http-equiv="refresh" content="5"/>
+  </head>
+  <body>
+    <p>
+      This document tests the ability of Apache Tika to extract content
+      from an <a href="http://www.w3.org/TR/xhtml1/">XHTML document</a>.
+    </p>
+  </body>
+</html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <title>XHTML test document</title>
+    <meta name="Author" content="Tika Developers"/>
+    <meta http-equiv="refresh" content="5"/>
+  </head>
+  <body>
+    <p>
+      This document tests the ability of Apache Tika to extract content
+      from an <a href="http://www.w3.org/TR/xhtml1/">XHTML document</a>.
+    </p>
+  </body>
+</html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <title>XHTML test document</title>
+    <meta name="Author" content="Tika Developers"/>
+    <meta http-equiv="refresh" content="5"/>
+  </head>
+  <body>
+    <p>
+      This document tests the ability of Apache Tika to extract content
+      from an <a href="http://www.w3.org/TR/xhtml1/">XHTML document</a>.
+    </p>
+  </body>
+</html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <title>XHTML test document</title>
+    <meta name="Author" content="Tika Developers"/>
+    <meta http-equiv="refresh" content="5"/>
+  </head>
+  <body>
+    <p>
+      This document tests the ability of Apache Tika to extract content
+      from an <a href="http://www.w3.org/TR/xhtml1/">XHTML document</a>.
+    </p>
+  </body>
+</html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <title>XHTML test document</title>
+    <meta name="Author" content="Tika Developers"/>
+    <meta http-equiv="refresh" content="5"/>
+  </head>
+  <body>
+    <p>
+      This document tests the ability of Apache Tika to extract content
+      from an <a href="http://www.w3.org/TR/xhtml1/">XHTML document</a>.
+    </p>
+  </body>
+</html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <title>XHTML test document</title>
+    <meta name="Author" content="Tika Developers"/>
+    <meta http-equiv="refresh" content="5"/>
+  </head>
+  <body>
+    <p>
+      This document tests the ability of Apache Tika to extract content
+      from an <a href="http://www.w3.org/TR/xhtml1/">XHTML document</a>.
+    </p>
+  </body>
+</html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <title>XHTML test document</title>
+    <meta name="Author" content="Tika Developers"/>
+    <meta http-equiv="refresh" content="5"/>
+  </head>
+  <body>
+    <p>
+      This document tests the ability of Apache Tika to extract content
+      from an <a href="http://www.w3.org/TR/xhtml1/">XHTML document</a>.
+    </p>
+  </body>
+</html>


More information about the exo-jcr-commits mailing list