[exo-jcr-commits] exo-jcr SVN: r2432 - in core/trunk/exo.core.component.document/src: test/java/org/exoplatform/services/document/test and 1 other directories.

do-not-reply at jboss.org do-not-reply at jboss.org
Tue May 25 09:14:27 EDT 2010


Author: sergiykarpenko
Date: 2010-05-25 09:14:26 -0400 (Tue, 25 May 2010)
New Revision: 2432

Added:
   core/trunk/exo.core.component.document/src/test/resources/testCDATA.xml
   core/trunk/exo.core.component.document/src/test/resources/testUTF8.xml
Modified:
   core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/XMLDocumentReader.java
   core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestXMLDocumentReader.java
Log:
EXOJCR-738: XMLDocumentReader now uses SAXParser. Test added.

Modified: core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/XMLDocumentReader.java
===================================================================
--- core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/XMLDocumentReader.java	2010-05-25 07:37:50 UTC (rev 2431)
+++ core/trunk/exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/XMLDocumentReader.java	2010-05-25 13:14:26 UTC (rev 2432)
@@ -19,15 +19,19 @@
 package org.exoplatform.services.document.impl;
 
 import org.exoplatform.services.document.DocumentReadException;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
 
-import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.StringWriter;
+import java.io.Writer;
 import java.util.Properties;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-import java.util.regex.PatternSyntaxException;
 
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
+
 /**
  * Created by The eXo Platform SAS A parser of XML files.
  * 
@@ -61,14 +65,15 @@
       }
       try
       {
-         byte[] buffer = new byte[2048];
-         int len;
-         ByteArrayOutputStream bos = new ByteArrayOutputStream();
-         while ((len = is.read(buffer)) > 0)
-            bos.write(buffer, 0, len);
-         bos.close();
-         String xml = new String(bos.toByteArray());
-         return delete(xml);
+
+         //         byte[] buffer = new byte[2048];
+         //         int len;
+         //         ByteArrayOutputStream bos = new ByteArrayOutputStream();
+         //         while ((len = is.read(buffer)) > 0)
+         //            bos.write(buffer, 0, len);
+         //         bos.close();
+         //         String xml = new String(bos.toByteArray());
+         return parse(is);
       }
       finally
       {
@@ -113,27 +118,74 @@
     * @param str the string which contain a text with user's tags.
     * @return The string cleaned from user's tags and their bodies.
     */
-   private String delete(String str)
+   private String parse(InputStream is)
    {
+      SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
+      //      saxParserFactory.setNamespaceAware(true);
+      //      saxParserFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
+      SAXParser saxParser;
+      StringWriter writer = new StringWriter();
+
+      DefaultHandler dh = new WriteOutContentHandler(writer);
       try
       {
-         StringBuffer input = new StringBuffer(str);
-         String patternString = "<+[^>]*>+";
-         Pattern pattern = Pattern.compile(patternString, Pattern.CASE_INSENSITIVE + Pattern.DOTALL);
-         Matcher matcher = pattern.matcher(input);
-         while (matcher.find())
+         saxParser = saxParserFactory.newSAXParser();
+         saxParser.parse(is, dh);
+      }
+      catch (SAXException e)
+      {
+         return "";
+      }
+      catch (IOException e)
+      {
+         return "";
+      }
+      catch (ParserConfigurationException e)
+      {
+         return "";
+      }
+
+      return writer.toString();
+
+   }
+
+   class WriteOutContentHandler extends DefaultHandler
+   {
+      private final Writer writer;
+
+      public WriteOutContentHandler(Writer writer)
+      {
+         this.writer = writer;
+      }
+
+      /**
+       * Writes the given characters to the given character stream.
+       */
+      @Override
+      public void characters(char[] ch, int start, int length) throws SAXException
+      {
+         try
          {
-            int start = matcher.start();
-            int end = matcher.end();
-            input.delete(start, end);
-            matcher = pattern.matcher(input);
+            writer.write(ch, start, length);
          }
-         return input.substring(0, input.length());
+         catch (IOException e)
+         {
+            throw new SAXException(e.getMessage(), e);
+         }
       }
-      catch (PatternSyntaxException e)
+
+      @Override
+      public void endDocument() throws SAXException
       {
+         try
+         {
+            writer.flush();
+         }
+         catch (IOException e)
+         {
+            throw new SAXException(e.getMessage(), e);
+         }
       }
-      return "";
    }
 
 }

Modified: core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestXMLDocumentReader.java
===================================================================
--- core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestXMLDocumentReader.java	2010-05-25 07:37:50 UTC (rev 2431)
+++ core/trunk/exo.core.component.document/src/test/java/org/exoplatform/services/document/test/TestXMLDocumentReader.java	2010-05-25 13:14:26 UTC (rev 2432)
@@ -45,7 +45,26 @@
    {
       InputStream is = TestXMLDocumentReader.class.getResourceAsStream("/test.xml");
       String text = service_.getDocumentReader("text/xml").getContentAsText(is);
-      String etalon = "\n\n  John\n" + "  Alice\n" + "  Reminder\n" + "  Don't forget it this weekend!\n\n";
-      assertEquals("Wrong string returned", etalon, text);
+      String expected = "John\n" + "  Alice\n" + "  Reminder\n" + "  Don't forget it this weekend!";
+      assertEquals("Wrong string returned", expected, text.trim());
    }
+
+   public void testCDATAGetContentAsString() throws Exception
+   {
+      InputStream is = TestXMLDocumentReader.class.getResourceAsStream("/testCDATA.xml");
+      String text = service_.getDocumentReader("text/xml").getContentAsText(is);
+      String expected = "This is a text inside CDATA.";
+      assertEquals("Wrong string returned", expected, text.trim());
+   }
+
+   public void testI18ngetContentAsString() throws Exception
+   {
+      InputStream is = TestXMLDocumentReader.class.getResourceAsStream("/testUTF8.xml");
+      String text = service_.getDocumentReader("text/xml").getContentAsText(is);
+      final String expected =
+         "\u0426\u0435 \u0442\u0435\u0441\u0442\u043e\u0432\u0438\u0439 \u0442\u0435\u043a\u0441\u0442.\n"
+            + "Archim\u00E8de et Lius \u00E0 Ch\u00E2teauneuf testing chars en \u00E9t\u00E9";
+      assertEquals("Wrong string returned", expected, text.trim());
+   }
+
 }

Added: core/trunk/exo.core.component.document/src/test/resources/testCDATA.xml
===================================================================
--- core/trunk/exo.core.component.document/src/test/resources/testCDATA.xml	                        (rev 0)
+++ core/trunk/exo.core.component.document/src/test/resources/testCDATA.xml	2010-05-25 13:14:26 UTC (rev 2432)
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<Module>
+	<ModulePrefs title="Hello World! title" />
+	<Content type="html"> 
+<![CDATA[This is a text inside CDATA.]]></Content>
+</Module>
\ No newline at end of file

Added: core/trunk/exo.core.component.document/src/test/resources/testUTF8.xml
===================================================================
--- core/trunk/exo.core.component.document/src/test/resources/testUTF8.xml	                        (rev 0)
+++ core/trunk/exo.core.component.document/src/test/resources/testUTF8.xml	2010-05-25 13:14:26 UTC (rev 2432)
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<Module>
+	<ModulePrefs title="Hello World! title" />
+	<Content type="html"> 
+	<Title>Це тестовий текст.</Title>
+<![CDATA[Archimède et Lius à Châteauneuf testing chars en été]]></Content>
+</Module>
\ No newline at end of file



More information about the exo-jcr-commits mailing list