[exo-jcr-commits] exo-jcr SVN: r4483 - core/branches/2.3.x/patch/2.3.8/COR-228.

do-not-reply at jboss.org do-not-reply at jboss.org
Tue Jun 7 05:06:07 EDT 2011


Author: tolusha
Date: 2011-06-07 05:06:06 -0400 (Tue, 07 Jun 2011)
New Revision: 4483

Modified:
   core/branches/2.3.x/patch/2.3.8/COR-228/COR-228.patch
Log:
COR-228: patch updated

Modified: core/branches/2.3.x/patch/2.3.8/COR-228/COR-228.patch
===================================================================
--- core/branches/2.3.x/patch/2.3.8/COR-228/COR-228.patch	2011-06-07 08:29:22 UTC (rev 4482)
+++ core/branches/2.3.x/patch/2.3.8/COR-228/COR-228.patch	2011-06-07 09:06:06 UTC (rev 4483)
@@ -40527,7 +40527,7 @@
 
 Index: exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/PDFDocumentReader.java
 ===================================================================
---- exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/PDFDocumentReader.java	(revision 4476)
+--- exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/PDFDocumentReader.java	(revision 4481)
 +++ exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/PDFDocumentReader.java	(working copy)
 @@ -18,37 +18,31 @@
   */
@@ -40576,7 +40576,7 @@
  /**
   * Created by The eXo Platform SAS A parser of Adobe PDF files.
   * 
-@@ -168,204 +162,264 @@
+@@ -168,204 +162,269 @@
      */
     public Properties getProperties(InputStream is) throws IOException, DocumentReadException
     {
@@ -40589,28 +40589,25 @@
 -      byte[] metadata = reader.getMetadata();
 -
 -      if (metadata != null)
--      {
++      PDDocument pdDocument = PDDocument.load(is);
++      Properties props = new Properties();
++      try
+       {
 -         // there is XMP metadata try exctract it
 -         props = getPropertiesFromMetadata(metadata);
 -      }
 -
 -      if (props == null)
-+      PDDocument pdDocument = PDDocument.load(is);
-+      Properties props = new Properties();
-+      try
-       {
+-      {
 -         // it's old pdf document version
 -         props = getPropertiesFromInfo(reader.getInfo());
 -      }
 -      reader.close();
 -      if (is != null)
 -         try
--         {
--            is.close();
--         }
--         catch (IOException e)
 +         if (pdDocument.isEncrypted())
           {
+-            is.close();
 +            try
 +            {
 +               pdDocument.decrypt("");
@@ -40624,8 +40621,16 @@
 +               throw new DocumentReadException(e.getMessage(), e);
 +            }
           }
+-         catch (IOException e)
++
++         PDDocumentCatalog catalog = pdDocument.getDocumentCatalog();
++         PDMetadata meta = catalog.getMetadata();
++         if (meta != null)
+          {
+-         }
 -      return props;
 -   }
++            XMPMetadata metadata = meta.exportXMPMetadata();
  
 -   /**
 -    * Extract properties from XMP xml.
@@ -40637,45 +40642,6 @@
 -    */
 -   protected Properties getPropertiesFromMetadata(byte[] metadata) throws IOException, DocumentReadException
 -   {
--
--      Properties props = null;
--
--      // parse xml
--
--      Document doc;
--      try
--      {
--         DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
--         DocumentBuilder docBuilder = dbf.newDocumentBuilder();
--         doc = docBuilder.parse(new ByteArrayInputStream(metadata));
--      }
--      catch (SAXException e)
--      {
--         throw new DocumentReadException(e.getMessage(), e);
--      }
--      catch (ParserConfigurationException e)
--      {
--         throw new DocumentReadException(e.getMessage(), e);
--      }
--
--      // Check is there PDF/A-1 XMP
--      String version = "";
--      NodeList list = doc.getElementsByTagName("pdfaid:conformance");
--      if (list != null && list.item(0) != null)
--      {
--         version += list.item(0).getTextContent() + "-";
--      }
-+         PDDocumentCatalog catalog = pdDocument.getDocumentCatalog();
-+         PDMetadata meta = catalog.getMetadata();
-+         if (meta != null)
-+         {
-+            XMPMetadata metadata = meta.exportXMPMetadata();
- 
--      list = doc.getElementsByTagName("pdfaid:part");
--      if (list != null && list.item(0) != null)
--      {
--         version += list.item(0).getTextContent();
--      }
 +            XMPSchemaDublinCore dc = metadata.getDublinCoreSchema();
 +            if (dc != null)
 +            {
@@ -40698,11 +40664,7 @@
 +                  log.warn("getSubject failed: " + e);
 +               }
  
--      // PDF/A-1a or PDF/A-1b
--      if (version.equalsIgnoreCase("A-1"))
--      {
--         props = getPropsFromPDFAMetadata(doc);
--      }
+-      Properties props = null;
 +               try
 +               {
 +                  if (dc.getCreators() != null)
@@ -40719,25 +40681,28 @@
 +                  log.warn("getCreator failed: " + e);
 +               }
  
--      return props;
--   }
+-      // parse xml
 +               try
 +               {
 +                  if (dc.getDates() != null)
 +                  {
 +                     List<Calendar> list = dc.getDates();
  
--   /**
--    * Extracts properties from PDF Info hash set.
--    * 
--    * @param Pdf Info hash set
--    * @return Extracted properties
--    * @throws Exception if extracting fails
--    */
--   @SuppressWarnings("unchecked")
--   protected Properties getPropertiesFromInfo(HashMap info) throws IOException
--   {
--      Properties props = new Properties();
+-      Document doc;
+-      try
+-      {
+-         DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
+-         DocumentBuilder docBuilder = dbf.newDocumentBuilder();
+-         doc = docBuilder.parse(new ByteArrayInputStream(metadata));
+-      }
+-      catch (SAXException e)
+-      {
+-         throw new DocumentReadException(e.getMessage(), e);
+-      }
+-      catch (ParserConfigurationException e)
+-      {
+-         throw new DocumentReadException(e.getMessage(), e);
+-      }
 +                     for (Calendar date : list)
 +                     {
 +                        props.put(DCMetaData.DATE, date);
@@ -40750,10 +40715,12 @@
 +               }
 +            }
  
--      String title = (String)info.get("Title");
--      if (title != null)
+-      // Check is there PDF/A-1 XMP
+-      String version = "";
+-      NodeList list = doc.getElementsByTagName("pdfaid:conformance");
+-      if (list != null && list.item(0) != null)
 -      {
--         props.put(DCMetaData.TITLE, title);
+-         version += list.item(0).getTextContent() + "-";
 -      }
 +            XMPSchemaBasic basic = metadata.getBasicSchema();
 +            if (basic != null)
@@ -40779,10 +40746,10 @@
 +            }
 +         }
  
--      String author = (String)info.get("Author");
--      if (author != null)
+-      list = doc.getElementsByTagName("pdfaid:part");
+-      if (list != null && list.item(0) != null)
 -      {
--         props.put(DCMetaData.CREATOR, author);
+-         version += list.item(0).getTextContent();
 -      }
 +         if (props.isEmpty())
 +         {
@@ -40790,10 +40757,10 @@
 +            // usefull data, try to use the document information instead
 +            PDDocumentInformation docInfo = pdDocument.getDocumentInformation();
  
--      String subject = (String)info.get("Subject");
--      if (subject != null)
+-      // PDF/A-1a or PDF/A-1b
+-      if (version.equalsIgnoreCase("A-1"))
 -      {
--         props.put(DCMetaData.SUBJECT, subject);
+-         props = getPropsFromPDFAMetadata(doc);
 -      }
 +            if (docInfo != null)
 +            {
@@ -40818,10 +40785,25 @@
 +               try
 +               {
  
--      String creationDate = (String)info.get("CreationDate");
--      if (creationDate != null)
+-      return props;
+-   }
+-
+-   /**
+-    * Extracts properties from PDF Info hash set.
+-    * 
+-    * @param Pdf Info hash set
+-    * @return Extracted properties
+-    * @throws Exception if extracting fails
+-    */
+-   @SuppressWarnings("unchecked")
+-   protected Properties getPropertiesFromInfo(HashMap info) throws IOException
+-   {
+-      Properties props = new Properties();
+-
+-      String title = (String)info.get("Title");
+-      if (title != null)
 -      {
--         props.put(DCMetaData.DATE, PdfDate.decode(creationDate));
+-         props.put(DCMetaData.TITLE, title);
 +                  if (docInfo.getKeywords() != null)
 +                     props.put(DCMetaData.SUBJECT, docInfo.getKeywords());
 +               }
@@ -40860,22 +40842,40 @@
 +         }
        }
 -
--      String modDate = (String)info.get("ModDate");
--      if (modDate != null)
+-      String author = (String)info.get("Author");
+-      if (author != null)
 +      finally
        {
--         props.put(DCMetaData.DATE, PdfDate.decode(modDate));
+-         props.put(DCMetaData.CREATOR, author);
 +         if (pdDocument != null)
 +         {
 +            pdDocument.close();
 +         }
        }
  
+-      String subject = (String)info.get("Subject");
+-      if (subject != null)
+-      {
+-         props.put(DCMetaData.SUBJECT, subject);
+-      }
+-
+-      String creationDate = (String)info.get("CreationDate");
+-      if (creationDate != null)
+-      {
+-         props.put(DCMetaData.DATE, PdfDate.decode(creationDate));
+-      }
+-
+-      String modDate = (String)info.get("ModDate");
+-      if (modDate != null)
+-      {
+-         props.put(DCMetaData.DATE, PdfDate.decode(modDate));
+-      }
+-
        return props;
     }
  
 -   private Properties getPropsFromPDFAMetadata(Document doc) throws IOException, DocumentReadException
-+   private String fixEncoding(String str)
++   private String fixEncoding(String str) throws DocumentReadException
     {
 -      Properties props = new Properties();
 -      // get properties
@@ -40884,7 +40884,11 @@
 +      try
        {
 -         for (int i = 0; i < list.getLength(); i++)
--         {
++         String encoding = null;
++         int orderMaskOffset = 0;
++
++         if (str.startsWith("\\000\\000\\376\\377"))
+          {
 -
 -            Node n = list.item(i);
 -            // dc:title - TITLE
@@ -40893,9 +40897,7 @@
 -               String title = n.getLastChild().getTextContent();
 -               props.put(DCMetaData.TITLE, title);
 -            }
-+         String encoding = null;
-+         int orderMaskOffset = 0;
- 
+-
 -            // dc:creator - CREATOR
 -            if (n.getParentNode().getParentNode().getNodeName().equals("dc:creator"))
 -            {
@@ -40910,8 +40912,6 @@
 -               props.put(DCMetaData.SUBJECT, description);
 -               // props.put(DCMetaData.DESCRIPTION, description);
 -            }
-+         if (str.startsWith("\\000\\000\\376\\377"))
-+         {
 +            encoding = "UTF-32BE";
 +            orderMaskOffset = 16;
           }
@@ -40933,7 +40933,7 @@
 -            }
 +            encoding = "UTF-32LE";
 +            orderMaskOffset = 16;
-+         }
+          }
 +         else if (str.startsWith("\\376\\377"))
 +         {
 +            encoding = "UTF-16BE";
@@ -40943,7 +40943,7 @@
 +         {
 +            encoding = "UTF-16LE";
 +            orderMaskOffset = 8;
-          }
++         }
  
 -         // xmp:ModifyDate - DATE
 -         list = doc.getElementsByTagName("xmp:ModifyDate");
@@ -40978,15 +40978,21 @@
 +                     }
 +                     catch (NumberFormatException e)
 +                     {
-+                        log.warn(
-+                           "PDF metadata exctraction warning: can not decode octal code - "
-+                              + str.substring(i - 1, i + 3) + ".", e);
++                        if (log.isDebugEnabled())
++                        {
++                           log.debug(
++                              "PDF metadata exctraction warning: can not decode octal code - "
++                                 + str.substring(i - 1, i + 3) + ".", e);
++                        }
 +                     }
 +                  }
 +                  else
 +                  {
-+                     log.warn("PDF metadata exctraction warning: octal code is not complete - "
-+                        + str.substring(i - 1, len));
++                     if (log.isDebugEnabled())
++                     {
++                        log.debug("PDF metadata exctraction warning: octal code is not complete - "
++                           + str.substring(i - 1, len));
++                     }
 +                  }
 +               }
 +               sb.append(c);
@@ -40999,9 +41005,7 @@
 -      catch (ParseException e)
 +      catch (UnsupportedEncodingException e)
        {
--         throw new DocumentReadException(e.getMessage(), e);
-+         log.warn("PDF metadata exctraction warning: can not convert metadata string " + str, e);
-+         return "";
+          throw new DocumentReadException(e.getMessage(), e);
        }
 -      return props;
     }



More information about the exo-jcr-commits mailing list