[exo-jcr-commits] exo-jcr SVN: r2432 - in core/trunk/exo.core.component.document/src: test/java/org/exoplatform/services/document/test and 1 other directories.
do-not-reply at jboss.org
do-not-reply at jboss.org
Tue May 25 09:14:27 EDT 2010
Author: sergiykarpenko
Date: 2010-05-25 09:14:26 -0400 (Tue, 25 May 2010)
New Revision: 2432
Added:
core/trunk/exo.core.component.document/src/test/resources/testCDATA.xml
core/trunk/exo.core.component.document/src/test/resources/testUTF8.xml
Modified:
core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/XMLDocumentReader.java
core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestXMLDocumentReader.java
Log:
EXOJCR-738: XMLDocumentReader now uses SAXParser. Test added.
Modified: core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/XMLDocumentReader.java
===================================================================
--- core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/XMLDocumentReader.java 2010-05-25 07:37:50 UTC (rev 2431)
+++ core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/XMLDocumentReader.java 2010-05-25 13:14:26 UTC (rev 2432)
@@ -19,15 +19,19 @@
package org.exoplatform.services.document.impl;
import org.exoplatform.services.document.DocumentReadException;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
-import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.io.StringWriter;
+import java.io.Writer;
import java.util.Properties;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-import java.util.regex.PatternSyntaxException;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
+
/**
* Created by The eXo Platform SAS A parser of XML files.
*
@@ -61,14 +65,15 @@
}
try
{
- byte[] buffer = new byte[2048];
- int len;
- ByteArrayOutputStream bos = new ByteArrayOutputStream();
- while ((len = is.read(buffer)) > 0)
- bos.write(buffer, 0, len);
- bos.close();
- String xml = new String(bos.toByteArray());
- return delete(xml);
+
+ // byte[] buffer = new byte[2048];
+ // int len;
+ // ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ // while ((len = is.read(buffer)) > 0)
+ // bos.write(buffer, 0, len);
+ // bos.close();
+ // String xml = new String(bos.toByteArray());
+ return parse(is);
}
finally
{
@@ -113,27 +118,74 @@
* @param str the string which contain a text with user's tags.
* @return The string cleaned from user's tags and their bodies.
*/
- private String delete(String str)
+ private String parse(InputStream is)
{
+ SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
+ // saxParserFactory.setNamespaceAware(true);
+ // saxParserFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
+ SAXParser saxParser;
+ StringWriter writer = new StringWriter();
+
+ DefaultHandler dh = new WriteOutContentHandler(writer);
try
{
- StringBuffer input = new StringBuffer(str);
- String patternString = "<+[^>]*>+";
- Pattern pattern = Pattern.compile(patternString, Pattern.CASE_INSENSITIVE + Pattern.DOTALL);
- Matcher matcher = pattern.matcher(input);
- while (matcher.find())
+ saxParser = saxParserFactory.newSAXParser();
+ saxParser.parse(is, dh);
+ }
+ catch (SAXException e)
+ {
+ return "";
+ }
+ catch (IOException e)
+ {
+ return "";
+ }
+ catch (ParserConfigurationException e)
+ {
+ return "";
+ }
+
+ return writer.toString();
+
+ }
+
+ class WriteOutContentHandler extends DefaultHandler
+ {
+ private final Writer writer;
+
+ public WriteOutContentHandler(Writer writer)
+ {
+ this.writer = writer;
+ }
+
+ /**
+ * Writes the given characters to the given character stream.
+ */
+ @Override
+ public void characters(char[] ch, int start, int length) throws SAXException
+ {
+ try
{
- int start = matcher.start();
- int end = matcher.end();
- input.delete(start, end);
- matcher = pattern.matcher(input);
+ writer.write(ch, start, length);
}
- return input.substring(0, input.length());
+ catch (IOException e)
+ {
+ throw new SAXException(e.getMessage(), e);
+ }
}
- catch (PatternSyntaxException e)
+
+ @Override
+ public void endDocument() throws SAXException
{
+ try
+ {
+ writer.flush();
+ }
+ catch (IOException e)
+ {
+ throw new SAXException(e.getMessage(), e);
+ }
}
- return "";
}
}
Modified: core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestXMLDocumentReader.java
===================================================================
--- core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestXMLDocumentReader.java 2010-05-25 07:37:50 UTC (rev 2431)
+++ core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestXMLDocumentReader.java 2010-05-25 13:14:26 UTC (rev 2432)
@@ -45,7 +45,26 @@
{
InputStream is = TestXMLDocumentReader.class.getResourceAsStream("/test.xml");
String text = service_.getDocumentReader("text/xml").getContentAsText(is);
- String etalon = "\n\n John\n" + " Alice\n" + " Reminder\n" + " Don't forget it this weekend!\n\n";
- assertEquals("Wrong string returned", etalon, text);
+ String expected = "John\n" + " Alice\n" + " Reminder\n" + " Don't forget it this weekend!";
+ assertEquals("Wrong string returned", expected, text.trim());
}
+
+ public void testCDATAGetContentAsString() throws Exception
+ {
+ InputStream is = TestXMLDocumentReader.class.getResourceAsStream("/testCDATA.xml");
+ String text = service_.getDocumentReader("text/xml").getContentAsText(is);
+ String expected = "This is a text inside CDATA.";
+ assertEquals("Wrong string returned", expected, text.trim());
+ }
+
+ public void testI18ngetContentAsString() throws Exception
+ {
+ InputStream is = TestXMLDocumentReader.class.getResourceAsStream("/testUTF8.xml");
+ String text = service_.getDocumentReader("text/xml").getContentAsText(is);
+ final String expected =
+ "\u0426\u0435 \u0442\u0435\u0441\u0442\u043e\u0432\u0438\u0439 \u0442\u0435\u043a\u0441\u0442.\n"
+ + "Archim\u00E8de et Lius \u00E0 Ch\u00E2teauneuf testing chars en \u00E9t\u00E9";
+ assertEquals("Wrong string returned", expected, text.trim());
+ }
+
}
Added: core/trunk/exo.core.component.document/src/test/resources/testCDATA.xml
===================================================================
--- core/trunk/exo.core.component.document/src/test/resources/testCDATA.xml (rev 0)
+++ core/trunk/exo.core.component.document/src/test/resources/testCDATA.xml 2010-05-25 13:14:26 UTC (rev 2432)
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<Module>
+ <ModulePrefs title="Hello World! title" />
+ <Content type="html">
+<![CDATA[This is a text inside CDATA.]]></Content>
+</Module>
\ No newline at end of file
Added: core/trunk/exo.core.component.document/src/test/resources/testUTF8.xml
===================================================================
--- core/trunk/exo.core.component.document/src/test/resources/testUTF8.xml (rev 0)
+++ core/trunk/exo.core.component.document/src/test/resources/testUTF8.xml 2010-05-25 13:14:26 UTC (rev 2432)
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<Module>
+ <ModulePrefs title="Hello World! title" />
+ <Content type="html">
+ <Title>Це тестовий текст.</Title>
+<![CDATA[Archimède et Lius à Châteauneuf testing chars en été]]></Content>
+</Module>
\ No newline at end of file
More information about the exo-jcr-commits
mailing list