[exo-jcr-commits] exo-jcr SVN: r3688 - in core/branches/2.3.x/exo.core.component.document: src/main/java/org/exoplatform/services/document/impl and 2 other directories.
do-not-reply at jboss.org
do-not-reply at jboss.org
Mon Dec 20 10:57:29 EST 2010
Author: dkuleshov
Date: 2010-12-20 10:57:29 -0500 (Mon, 20 Dec 2010)
New Revision: 3688
Added:
core/branches/2.3.x/exo.core.component.document/src/test/resources/testEXCEL.xls
core/branches/2.3.x/exo.core.component.document/src/test/resources/testEXCEL.xlsb
core/branches/2.3.x/exo.core.component.document/src/test/resources/testPPT.potm
core/branches/2.3.x/exo.core.component.document/src/test/resources/testPPT.ppsm
core/branches/2.3.x/exo.core.component.document/src/test/resources/testPPT.ppsx
core/branches/2.3.x/exo.core.component.document/src/test/resources/testPPT.pptm
core/branches/2.3.x/exo.core.component.document/src/test/resources/testRTF.rtf
core/branches/2.3.x/exo.core.component.document/src/test/resources/testWORD.docm
core/branches/2.3.x/exo.core.component.document/src/test/resources/testWORD.dotm
core/branches/2.3.x/exo.core.component.document/src/test/resources/testWORD.dotx
core/branches/2.3.x/exo.core.component.document/src/test/resources/testXHTML.html
Modified:
core/branches/2.3.x/exo.core.component.document/pom.xml
core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/HTMLDocumentReader.java
core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSExcelDocumentReader.java
core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXExcelDocumentReader.java
core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXPPTDocumentReader.java
core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXWordDocumentReader.java
core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/PPTDocumentReader.java
core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/TextPlainDocumentReader.java
core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestHtmlDocumentReader.java
core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSExcelDocumentReader.java
core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXPPTDocumentReader.java
core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXWordDocumentReader.java
core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestTextPlainDocumentReader.java
Log:
COR-218: provided support for more MIME types
Modified: core/branches/2.3.x/exo.core.component.document/pom.xml
===================================================================
--- core/branches/2.3.x/exo.core.component.document/pom.xml 2010-12-20 14:32:26 UTC (rev 3687)
+++ core/branches/2.3.x/exo.core.component.document/pom.xml 2010-12-20 15:57:29 UTC (rev 3688)
@@ -1,122 +1,132 @@
-
- <!--
-
- Copyright (C) 2009 eXo Platform SAS. This is free software; you can redistribute it and/or modify it under the
- terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of
- the License, or (at your option) any later version. This software is distributed in the hope that it will be
- useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
- PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU
- Lesser General Public License along with this software; if not, write to the Free Software Foundation, Inc., 51
- Franklin St, Fifth Floor, Boston, MA 02110-1301 USA, or see the FSF site: http://www.fsf.org.
- -->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
- <modelVersion>4.0.0</modelVersion>
- <parent>
- <groupId>org.exoplatform.core</groupId>
- <artifactId>core-parent</artifactId>
- <version>2.3.7-GA-SNAPSHOT</version>
- </parent>
- <artifactId>exo.core.component.document</artifactId>
- <name>eXo Core :: Component :: Demo Service</name>
- <description>eXo demo service Impl</description>
- <dependencies>
- <dependency>
- <groupId>org.exoplatform.tool</groupId>
- <artifactId>exo.tool.framework.junit</artifactId>
- </dependency>
- <dependency>
- <groupId>org.exoplatform.kernel</groupId>
- <artifactId>exo.kernel.commons</artifactId>
- </dependency>
- <dependency>
- <groupId>org.exoplatform.kernel</groupId>
- <artifactId>exo.kernel.container</artifactId>
- </dependency>
- <dependency>
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-log4j12</artifactId>
- </dependency>
- <dependency>
- <groupId>org.apache.pdfbox</groupId>
- <artifactId>pdfbox</artifactId>
- </dependency>
- <dependency>
- <groupId>com.lowagie</groupId>
- <artifactId>itext</artifactId>
- </dependency>
- <dependency>
- <groupId>org.htmlparser</groupId>
- <artifactId>htmlparser</artifactId>
- </dependency>
- <dependency>
- <groupId>org.apache.poi</groupId>
- <artifactId>poi</artifactId>
- <exclusions>
- <exclusion>
- <groupId>log4j</groupId>
- <artifactId>log4j</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
- <dependency>
- <groupId>org.apache.poi</groupId>
- <artifactId>poi-scratchpad</artifactId>
- <exclusions>
- <exclusion>
- <groupId>log4j</groupId>
- <artifactId>log4j</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
-
- <dependency>
- <groupId>org.apache.poi</groupId>
- <artifactId>poi-ooxml</artifactId>
- <exclusions>
- <exclusion>
- <groupId>log4j</groupId>
- <artifactId>log4j</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
-
- </dependencies>
- <build>
- <testResources>
- <testResource>
- <directory>src/test/resources</directory>
- <includes>
- <include>**/*.properties</include>
- <include>**/*.xml</include>
- <include>**/*.drl</include>
- <include>**/*.vm</include>
- <include>**/*.doc</include>
- <include>**/*.docx</include>
- <include>**/*.dot</include>
- <include>**/*.xls</include>
- <include>**/*.xlsx</include>
- <include>**/*.ppt</include>
- <include>**/*.pptx</include>
- <include>**/*.txt</include>
- <include>**/*.tiff</include>
- <include>**/*.pdf</include>
- <include>**/*.odt</include>
- <include>**/*.html</include>
- <include>**/*.msg</include>
- <include>**/*.pst</include>
- </includes>
- </testResource>
- </testResources>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-surefire-plugin</artifactId>
- <configuration>
- <includes>
- <include>org/exoplatform/services/document/**/Test*.java</include>
- </includes>
- </configuration>
- </plugin>
- </plugins>
- </build>
-</project>
+
+ <!--
+
+ Copyright (C) 2009 eXo Platform SAS. This is free software; you can redistribute it and/or modify it under the
+ terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of
+ the License, or (at your option) any later version. This software is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU
+ Lesser General Public License along with this software; if not, write to the Free Software Foundation, Inc., 51
+ Franklin St, Fifth Floor, Boston, MA 02110-1301 USA, or see the FSF site: http://www.fsf.org.
+ -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.exoplatform.core</groupId>
+ <artifactId>core-parent</artifactId>
+ <version>2.3.7-GA-SNAPSHOT</version>
+ </parent>
+ <artifactId>exo.core.component.document</artifactId>
+ <name>eXo Core :: Component :: Demo Service</name>
+ <description>eXo demo service Impl</description>
+ <dependencies>
+ <dependency>
+ <groupId>org.exoplatform.tool</groupId>
+ <artifactId>exo.tool.framework.junit</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.exoplatform.kernel</groupId>
+ <artifactId>exo.kernel.commons</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.exoplatform.kernel</groupId>
+ <artifactId>exo.kernel.container</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-log4j12</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.pdfbox</groupId>
+ <artifactId>pdfbox</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.lowagie</groupId>
+ <artifactId>itext</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.htmlparser</groupId>
+ <artifactId>htmlparser</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.poi</groupId>
+ <artifactId>poi</artifactId>
+ <exclusions>
+ <exclusion>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.poi</groupId>
+ <artifactId>poi-scratchpad</artifactId>
+ <exclusions>
+ <exclusion>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.poi</groupId>
+ <artifactId>poi-ooxml</artifactId>
+ <exclusions>
+ <exclusion>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+
+ </dependencies>
+ <build>
+ <testResources>
+ <testResource>
+ <directory>src/test/resources</directory>
+ <includes>
+ <include>**/*.properties</include>
+ <include>**/*.rtf</include>
+ <include>**/*.xml</include>
+ <include>**/*.drl</include>
+ <include>**/*.vm</include>
+ <include>**/*.doc</include>
+ <include>**/*.docx</include>
+ <include>**/*.dotx</include>
+ <include>**/*.docm</include>
+ <include>**/*.dotm</include>
+ <include>**/*.dot</include>
+ <include>**/*.xls</include>
+ <include>**/*.xlsx</include>
+ <include>**/*.xlsb</include>
+ <include>**/*.xltx</include>
+ <include>**/*.ppt</include>
+ <include>**/*.pptm</include>
+ <include>**/*.pptx</include>
+ <include>**/*.ppsx</include>
+ <include>**/*.ppsm</include>
+ <include>**/*.potm</include>
+ <include>**/*.txt</include>
+ <include>**/*.tiff</include>
+ <include>**/*.pdf</include>
+ <include>**/*.odt</include>
+ <include>**/*.html</include>
+ <include>**/*.msg</include>
+ <include>**/*.pst</include>
+ </includes>
+ </testResource>
+ </testResources>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <configuration>
+ <includes>
+ <include>org/exoplatform/services/document/**/Test*.java</include>
+ </includes>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+</project>
Modified: core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/HTMLDocumentReader.java
===================================================================
--- core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/HTMLDocumentReader.java 2010-12-20 14:32:26 UTC (rev 3687)
+++ core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/HTMLDocumentReader.java 2010-12-20 15:57:29 UTC (rev 3688)
@@ -53,7 +53,7 @@
public HTMLDocumentReader()
{
}
-
+
/**
* Get the text/html,application/x-groovy+html mime type.
*
@@ -61,7 +61,7 @@
*/
public String[] getMimeTypes()
{
- return new String[]{"text/html", "application/x-groovy+html"};
+ return new String[]{"text/html", "application/x-groovy+html", "application/xhtml+xml"};
}
/**
Modified: core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSExcelDocumentReader.java
===================================================================
--- core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSExcelDocumentReader.java 2010-12-20 14:32:26 UTC (rev 3687)
+++ core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSExcelDocumentReader.java 2010-12-20 15:57:29 UTC (rev 3688)
@@ -43,7 +43,7 @@
{
private static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss.SSSZ";
-
+
/**
* Get the application/excel mime type.
*
@@ -51,7 +51,7 @@
*/
public String[] getMimeTypes()
{
- return new String[]{"application/excel", "application/xls"};
+ return new String[]{"application/excel", "application/xls", "application/vnd.ms-excel"};
}
/**
@@ -68,7 +68,7 @@
}
StringBuilder builder = new StringBuilder("");
-
+
SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT);
try
@@ -77,7 +77,7 @@
{
return "";
}
-
+
HSSFWorkbook wb;
try
{
@@ -116,7 +116,7 @@
}
else
{
- builder.append(d).append(" ");
+ builder.append(d).append(" ");
}
break;
}
Modified: core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXExcelDocumentReader.java
===================================================================
--- core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXExcelDocumentReader.java 2010-12-20 14:32:26 UTC (rev 3687)
+++ core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXExcelDocumentReader.java 2010-12-20 15:57:29 UTC (rev 3688)
@@ -51,7 +51,17 @@
*/
public String[] getMimeTypes()
{
+ //Supported mimetypes:
+ // "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" - "x.xlsx"
+ //
+ //Unsupported mimetypes:
+ // "application/vnd.ms-excel.sheet.binary.macroenabled.12" - "*.xlsb"; There is exceptions at parsing
+ // "application/vnd.openxmlformats-officedocument.spreadsheetml.template" - "x.xltx"; Not tested
+ // "application/vnd.ms-excel.sheet.macroenabled.12" - "x.xlsm"; Not tested
+ // "application/vnd.ms-excel.template.macroenabled.12" - "x.xltm"; Not tested
+ // "application/vnd.ms-excel.addin.macroenabled.12" - "x.xlam"; Not tested
return new String[]{"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"};
+
}
/**
Modified: core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXPPTDocumentReader.java
===================================================================
--- core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXPPTDocumentReader.java 2010-12-20 14:32:26 UTC (rev 3687)
+++ core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXPPTDocumentReader.java 2010-12-20 15:57:29 UTC (rev 3688)
@@ -47,7 +47,21 @@
*/
public String[] getMimeTypes()
{
- return new String[]{"application/vnd.openxmlformats-officedocument.presentationml.presentation"};
+ //Supported mimetypes:
+ // "application/vnd.openxmlformats-officedocument.presentationml.presentation" -"x.pptx";
+ // "application/vnd.openxmlformats-officedocument.presentationml.slideshow" - "x.ppsx";
+ // "application/vnd.ms-powerpoint.presentation.macroenabled.12" - "testPPT.pptm";
+ // "application/vnd.ms-powerpoint.slideshow.macroenabled.12" - "testPPT.ppsm";
+ //
+ //Not supported mimetypes:
+ // "application/vnd.ms-powerpoint.template.macroenabled.12" - "testPPT.potm"; Has errors
+ // "application/vnd.openxmlformats-officedocument.presentationml.template" - "x.potx"; Not tested
+ // "application/vnd.ms-powerpoint.addin.macroenabled.12" - "x.ppam"; Not tested
+
+ return new String[]{"application/vnd.openxmlformats-officedocument.presentationml.presentation",
+ "application/vnd.openxmlformats-officedocument.presentationml.slideshow",
+ "application/vnd.ms-powerpoint.presentation.macroenabled.12",
+ "application/vnd.ms-powerpoint.slideshow.macroenabled.12"};
}
/**
@@ -68,7 +82,7 @@
{
return "";
}
-
+
XSLFPowerPointExtractor ppe;
try
{
Modified: core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXWordDocumentReader.java
===================================================================
--- core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXWordDocumentReader.java 2010-12-20 14:32:26 UTC (rev 3687)
+++ core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/MSXWordDocumentReader.java 2010-12-20 15:57:29 UTC (rev 3688)
@@ -44,7 +44,15 @@
*/
public String[] getMimeTypes()
{
- return new String[]{"application/vnd.openxmlformats-officedocument.wordprocessingml.document"};
+ //Supported document types:
+ // "application/vnd.openxmlformats-officedocument.wordprocessingml.document" - "x.docx"
+ // "application/vnd.openxmlformats-officedocument.wordprocessingml.template" - "x.dotx"
+ // "application/vnd.ms-word.document.macroenabled.12" - "x.docm"
+ // "application/vnd.ms-word.template.macroenabled.12" - "x.dotm"
+
+ return new String[]{"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.template",
+ "application/vnd.ms-word.document.macroenabled.12", "application/vnd.ms-word.template.macroenabled.12"};
}
/**
@@ -66,7 +74,7 @@
{
return "";
}
-
+
XWPFDocument doc;
try
{
Modified: core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/PPTDocumentReader.java
===================================================================
--- core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/PPTDocumentReader.java 2010-12-20 14:32:26 UTC (rev 3687)
+++ core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/PPTDocumentReader.java 2010-12-20 15:57:29 UTC (rev 3688)
@@ -43,7 +43,7 @@
*/
public String[] getMimeTypes()
{
- return new String[]{"application/powerpoint", "application/ppt"};
+ return new String[]{"application/powerpoint", "application/ppt", "application/vnd.ms-powerpoint"};
}
/**
@@ -60,12 +60,12 @@
}
try
{
-
+
if (is.available() == 0)
{
return "";
}
-
+
PowerPointExtractor ppe;
try
{
Modified: core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/TextPlainDocumentReader.java
===================================================================
--- core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/TextPlainDocumentReader.java 2010-12-20 14:32:26 UTC (rev 3687)
+++ core/branches/2.3.x/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/TextPlainDocumentReader.java 2010-12-20 15:57:29 UTC (rev 3688)
@@ -67,10 +67,9 @@
*/
public String[] getMimeTypes()
{
- return new String[]{"text/plain","script/groovy",
- "application/x-groovy","application/x-javascript",
- "application/javascript","text/javascript",
- "application/x-jaxrs+groovy"};
+ return new String[]{"text/plain", "script/groovy", "application/x-groovy", "application/x-javascript",
+ "application/javascript", "text/javascript", "application/x-jaxrs+groovy"};
+ // "text/rtf", "application/rtf" excluded since there must be RTF parser - because plain text contains a lot formatting tags.
}
/**
Modified: core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestHtmlDocumentReader.java
===================================================================
--- core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestHtmlDocumentReader.java 2010-12-20 14:32:26 UTC (rev 3687)
+++ core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestHtmlDocumentReader.java 2010-12-20 15:57:29 UTC (rev 3688)
@@ -56,4 +56,20 @@
is.close();
}
}
+
+ public void testXHTMLGetContentAsString() throws Exception
+ {
+ InputStream is = TestHtmlDocumentReader.class.getResourceAsStream("/testXHTML.html");
+ try
+ {
+ DocumentReader dr = service.getDocumentReader("application/xhtml+xml");
+ String text = dr.getContentAsText(is);
+ assertTrue(text
+ .contains("This document tests the ability of Apache Tika to extract content from an XHTML document."));
+ }
+ finally
+ {
+ is.close();
+ }
+ }
}
Modified: core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSExcelDocumentReader.java
===================================================================
--- core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSExcelDocumentReader.java 2010-12-20 14:32:26 UTC (rev 3687)
+++ core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSExcelDocumentReader.java 2010-12-20 15:57:29 UTC (rev 3688)
@@ -34,9 +34,9 @@
public class TestMSExcelDocumentReader extends BaseStandaloneTest
{
private static final SimpleDateFormat DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSSZ");
-
+
DocumentReaderService service;
-
+
@Override
public void setUp() throws Exception
{
@@ -108,7 +108,24 @@
{
is.close();
}
+ }
+ public void testClassicExcelGetContentAsString() throws Exception
+ {
+ InputStream is = TestMSXExcelDocumentReader.class.getResourceAsStream("/testEXCEL.xls");
+ try
+ {
+ String content = service.getDocumentReader("application/vnd.ms-excel").getContentAsText(is);
+ assertTrue(content.contains("Sample Excel Worksheet"));
+ assertTrue(content.contains("Numbers and their Squares"));
+ assertTrue(content.contains("Number"));
+ assertTrue(content.contains("9"));
+ assertFalse(content.contains("9.0"));
+ }
+ finally
+ {
+ is.close();
+ }
}
public String getDate(int year, int month, int day)
Modified: core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXPPTDocumentReader.java
===================================================================
--- core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXPPTDocumentReader.java 2010-12-20 14:32:26 UTC (rev 3687)
+++ core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXPPTDocumentReader.java 2010-12-20 15:57:29 UTC (rev 3688)
@@ -59,4 +59,72 @@
is.close();
}
}
+
+ public void testPPSXGetContentAsString() throws Exception
+ {
+ InputStream is = TestMSXPPTDocumentReader.class.getResourceAsStream("/testPPT.ppsx");
+ try
+ {
+ String content =
+ service.getDocumentReader("application/vnd.openxmlformats-officedocument.presentationml.slideshow")
+ .getContentAsText(is);
+ assertTrue(content
+ .contains("This is a test file data with the same content as every other file being tested for"));
+ assertTrue(content.contains("Different words to test against"));
+ assertTrue(content.contains("Quest"));
+ assertTrue(content.contains("Hello"));
+ assertTrue(content.contains("Watershed"));
+ assertTrue(content.contains("Avalanche"));
+ assertTrue(content.contains("Black Panther"));
+ }
+ finally
+ {
+ is.close();
+ }
+ }
+
+ public void testPPTMGetContentAsString() throws Exception
+ {
+ InputStream is = TestMSXPPTDocumentReader.class.getResourceAsStream("/testPPT.pptm");
+ try
+ {
+ String content =
+ service.getDocumentReader("application/vnd.ms-powerpoint.presentation.macroenabled.12")
+ .getContentAsText(is);
+ assertTrue(content
+ .contains("This is a test file data with the same content as every other file being tested for"));
+ assertTrue(content.contains("Different words to test against"));
+ assertTrue(content.contains("Quest"));
+ assertTrue(content.contains("Hello"));
+ assertTrue(content.contains("Watershed"));
+ assertTrue(content.contains("Avalanche"));
+ assertTrue(content.contains("Black Panther"));
+ }
+ finally
+ {
+ is.close();
+ }
+ }
+
+ public void testPPSMGetContentAsString() throws Exception
+ {
+ InputStream is = TestMSXPPTDocumentReader.class.getResourceAsStream("/testPPT.ppsm");
+ try
+ {
+ String content =
+ service.getDocumentReader("application/vnd.ms-powerpoint.slideshow.macroenabled.12").getContentAsText(is);
+ assertTrue(content
+ .contains("This is a test file data with the same content as every other file being tested for"));
+ assertTrue(content.contains("Different words to test against"));
+ assertTrue(content.contains("Quest"));
+ assertTrue(content.contains("Hello"));
+ assertTrue(content.contains("Watershed"));
+ assertTrue(content.contains("Avalanche"));
+ assertTrue(content.contains("Black Panther"));
+ }
+ finally
+ {
+ is.close();
+ }
+ }
}
Modified: core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXWordDocumentReader.java
===================================================================
--- core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXWordDocumentReader.java 2010-12-20 14:32:26 UTC (rev 3687)
+++ core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestMSXWordDocumentReader.java 2010-12-20 15:57:29 UTC (rev 3688)
@@ -38,7 +38,7 @@
service = (DocumentReaderService)getComponentInstanceOfType(DocumentReaderService.class);
}
- public void testGetContentAsStringDoc() throws Exception
+ public void testDOCXGetContentAsStringDoc() throws Exception
{
InputStream is = TestMSXWordDocumentReader.class.getResourceAsStream("/test.docx");
try
@@ -54,4 +54,50 @@
is.close();
}
}
+
+ public void testDOTXGetContentAsStringDoc() throws Exception
+ {
+ InputStream is = TestMSXWordDocumentReader.class.getResourceAsStream("/testWORD.dotx");
+ try
+ {
+ String text =
+ service.getDocumentReader("application/vnd.openxmlformats-officedocument.wordprocessingml.template")
+ .getContentAsText(is);
+ assertTrue(text.contains("template"));
+ }
+ finally
+ {
+ is.close();
+ }
+ }
+
+ public void testDOCMGetContentAsStringDoc() throws Exception
+ {
+ InputStream is = TestMSXWordDocumentReader.class.getResourceAsStream("/testWORD.docm");
+ try
+ {
+ String text =
+ service.getDocumentReader("application/vnd.ms-word.document.macroenabled.12").getContentAsText(is);
+ assertTrue(text.contains("template"));
+ }
+ finally
+ {
+ is.close();
+ }
+ }
+
+ public void testDOTMGetContentAsStringDoc() throws Exception
+ {
+ InputStream is = TestMSXWordDocumentReader.class.getResourceAsStream("/testWORD.dotm");
+ try
+ {
+ String text =
+ service.getDocumentReader("application/vnd.ms-word.template.macroenabled.12").getContentAsText(is);
+ assertTrue(text.contains("Template with macros"));
+ }
+ finally
+ {
+ is.close();
+ }
+ }
}
Modified: core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestTextPlainDocumentReader.java
===================================================================
--- core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestTextPlainDocumentReader.java 2010-12-20 14:32:26 UTC (rev 3687)
+++ core/branches/2.3.x/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestTextPlainDocumentReader.java 2010-12-20 15:57:29 UTC (rev 3688)
@@ -68,5 +68,4 @@
is.close();
}
}
-
}
Added: core/branches/2.3.x/exo.core.component.document/src/test/resources/testEXCEL.xls
===================================================================
(Binary files differ)
Property changes on: core/branches/2.3.x/exo.core.component.document/src/test/resources/testEXCEL.xls
___________________________________________________________________
Name: svn:mime-type
+ application/octet-stream
Added: core/branches/2.3.x/exo.core.component.document/src/test/resources/testEXCEL.xlsb
===================================================================
(Binary files differ)
Property changes on: core/branches/2.3.x/exo.core.component.document/src/test/resources/testEXCEL.xlsb
___________________________________________________________________
Name: svn:mime-type
+ application/octet-stream
Added: core/branches/2.3.x/exo.core.component.document/src/test/resources/testPPT.potm
===================================================================
(Binary files differ)
Property changes on: core/branches/2.3.x/exo.core.component.document/src/test/resources/testPPT.potm
___________________________________________________________________
Name: svn:mime-type
+ application/octet-stream
Added: core/branches/2.3.x/exo.core.component.document/src/test/resources/testPPT.ppsm
===================================================================
(Binary files differ)
Property changes on: core/branches/2.3.x/exo.core.component.document/src/test/resources/testPPT.ppsm
___________________________________________________________________
Name: svn:mime-type
+ application/octet-stream
Added: core/branches/2.3.x/exo.core.component.document/src/test/resources/testPPT.ppsx
===================================================================
(Binary files differ)
Property changes on: core/branches/2.3.x/exo.core.component.document/src/test/resources/testPPT.ppsx
___________________________________________________________________
Name: svn:mime-type
+ application/octet-stream
Added: core/branches/2.3.x/exo.core.component.document/src/test/resources/testPPT.pptm
===================================================================
(Binary files differ)
Property changes on: core/branches/2.3.x/exo.core.component.document/src/test/resources/testPPT.pptm
___________________________________________________________________
Name: svn:mime-type
+ application/octet-stream
Added: core/branches/2.3.x/exo.core.component.document/src/test/resources/testRTF.rtf
===================================================================
(Binary files differ)
Property changes on: core/branches/2.3.x/exo.core.component.document/src/test/resources/testRTF.rtf
___________________________________________________________________
Name: svn:mime-type
+ application/octet-stream
Added: core/branches/2.3.x/exo.core.component.document/src/test/resources/testWORD.docm
===================================================================
(Binary files differ)
Property changes on: core/branches/2.3.x/exo.core.component.document/src/test/resources/testWORD.docm
___________________________________________________________________
Name: svn:mime-type
+ application/octet-stream
Added: core/branches/2.3.x/exo.core.component.document/src/test/resources/testWORD.dotm
===================================================================
(Binary files differ)
Property changes on: core/branches/2.3.x/exo.core.component.document/src/test/resources/testWORD.dotm
___________________________________________________________________
Name: svn:mime-type
+ application/octet-stream
Added: core/branches/2.3.x/exo.core.component.document/src/test/resources/testWORD.dotx
===================================================================
(Binary files differ)
Property changes on: core/branches/2.3.x/exo.core.component.document/src/test/resources/testWORD.dotx
___________________________________________________________________
Name: svn:mime-type
+ application/octet-stream
Added: core/branches/2.3.x/exo.core.component.document/src/test/resources/testXHTML.html
===================================================================
--- core/branches/2.3.x/exo.core.component.document/src/test/resources/testXHTML.html (rev 0)
+++ core/branches/2.3.x/exo.core.component.document/src/test/resources/testXHTML.html 2010-12-20 15:57:29 UTC (rev 3688)
@@ -0,0 +1,493 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <title>XHTML test document</title>
+ <meta name="Author" content="Tika Developers"/>
+ <meta http-equiv="refresh" content="5"/>
+ </head>
+ <body>
+ <p>
+ This document tests the ability of Apache Tika to extract content
+ from an <a href="http://www.w3.org/TR/xhtml1/">XHTML document</a>.
+ </p>
+ </body>
+</html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <title>XHTML test document</title>
+ <meta name="Author" content="Tika Developers"/>
+ <meta http-equiv="refresh" content="5"/>
+ </head>
+ <body>
+ <p>
+ This document tests the ability of Apache Tika to extract content
+ from an <a href="http://www.w3.org/TR/xhtml1/">XHTML document</a>.
+ </p>
+ </body>
+</html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <title>XHTML test document</title>
+ <meta name="Author" content="Tika Developers"/>
+ <meta http-equiv="refresh" content="5"/>
+ </head>
+ <body>
+ <p>
+ This document tests the ability of Apache Tika to extract content
+ from an <a href="http://www.w3.org/TR/xhtml1/">XHTML document</a>.
+ </p>
+ </body>
+</html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <title>XHTML test document</title>
+ <meta name="Author" content="Tika Developers"/>
+ <meta http-equiv="refresh" content="5"/>
+ </head>
+ <body>
+ <p>
+ This document tests the ability of Apache Tika to extract content
+ from an <a href="http://www.w3.org/TR/xhtml1/">XHTML document</a>.
+ </p>
+ </body>
+</html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <title>XHTML test document</title>
+ <meta name="Author" content="Tika Developers"/>
+ <meta http-equiv="refresh" content="5"/>
+ </head>
+ <body>
+ <p>
+ This document tests the ability of Apache Tika to extract content
+ from an <a href="http://www.w3.org/TR/xhtml1/">XHTML document</a>.
+ </p>
+ </body>
+</html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <title>XHTML test document</title>
+ <meta name="Author" content="Tika Developers"/>
+ <meta http-equiv="refresh" content="5"/>
+ </head>
+ <body>
+ <p>
+ This document tests the ability of Apache Tika to extract content
+ from an <a href="http://www.w3.org/TR/xhtml1/">XHTML document</a>.
+ </p>
+ </body>
+</html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <title>XHTML test document</title>
+ <meta name="Author" content="Tika Developers"/>
+ <meta http-equiv="refresh" content="5"/>
+ </head>
+ <body>
+ <p>
+ This document tests the ability of Apache Tika to extract content
+ from an <a href="http://www.w3.org/TR/xhtml1/">XHTML document</a>.
+ </p>
+ </body>
+</html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <title>XHTML test document</title>
+ <meta name="Author" content="Tika Developers"/>
+ <meta http-equiv="refresh" content="5"/>
+ </head>
+ <body>
+ <p>
+ This document tests the ability of Apache Tika to extract content
+ from an <a href="http://www.w3.org/TR/xhtml1/">XHTML document</a>.
+ </p>
+ </body>
+</html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <title>XHTML test document</title>
+ <meta name="Author" content="Tika Developers"/>
+ <meta http-equiv="refresh" content="5"/>
+ </head>
+ <body>
+ <p>
+ This document tests the ability of Apache Tika to extract content
+ from an <a href="http://www.w3.org/TR/xhtml1/">XHTML document</a>.
+ </p>
+ </body>
+</html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <title>XHTML test document</title>
+ <meta name="Author" content="Tika Developers"/>
+ <meta http-equiv="refresh" content="5"/>
+ </head>
+ <body>
+ <p>
+ This document tests the ability of Apache Tika to extract content
+ from an <a href="http://www.w3.org/TR/xhtml1/">XHTML document</a>.
+ </p>
+ </body>
+</html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <title>XHTML test document</title>
+ <meta name="Author" content="Tika Developers"/>
+ <meta http-equiv="refresh" content="5"/>
+ </head>
+ <body>
+ <p>
+ This document tests the ability of Apache Tika to extract content
+ from an <a href="http://www.w3.org/TR/xhtml1/">XHTML document</a>.
+ </p>
+ </body>
+</html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <title>XHTML test document</title>
+ <meta name="Author" content="Tika Developers"/>
+ <meta http-equiv="refresh" content="5"/>
+ </head>
+ <body>
+ <p>
+ This document tests the ability of Apache Tika to extract content
+ from an <a href="http://www.w3.org/TR/xhtml1/">XHTML document</a>.
+ </p>
+ </body>
+</html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <title>XHTML test document</title>
+ <meta name="Author" content="Tika Developers"/>
+ <meta http-equiv="refresh" content="5"/>
+ </head>
+ <body>
+ <p>
+ This document tests the ability of Apache Tika to extract content
+ from an <a href="http://www.w3.org/TR/xhtml1/">XHTML document</a>.
+ </p>
+ </body>
+</html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <title>XHTML test document</title>
+ <meta name="Author" content="Tika Developers"/>
+ <meta http-equiv="refresh" content="5"/>
+ </head>
+ <body>
+ <p>
+ This document tests the ability of Apache Tika to extract content
+ from an <a href="http://www.w3.org/TR/xhtml1/">XHTML document</a>.
+ </p>
+ </body>
+</html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <title>XHTML test document</title>
+ <meta name="Author" content="Tika Developers"/>
+ <meta http-equiv="refresh" content="5"/>
+ </head>
+ <body>
+ <p>
+ This document tests the ability of Apache Tika to extract content
+ from an <a href="http://www.w3.org/TR/xhtml1/">XHTML document</a>.
+ </p>
+ </body>
+</html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <title>XHTML test document</title>
+ <meta name="Author" content="Tika Developers"/>
+ <meta http-equiv="refresh" content="5"/>
+ </head>
+ <body>
+ <p>
+ This document tests the ability of Apache Tika to extract content
+ from an <a href="http://www.w3.org/TR/xhtml1/">XHTML document</a>.
+ </p>
+ </body>
+</html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <title>XHTML test document</title>
+ <meta name="Author" content="Tika Developers"/>
+ <meta http-equiv="refresh" content="5"/>
+ </head>
+ <body>
+ <p>
+ This document tests the ability of Apache Tika to extract content
+ from an <a href="http://www.w3.org/TR/xhtml1/">XHTML document</a>.
+ </p>
+ </body>
+</html>
More information about the exo-jcr-commits
mailing list