Author: tolusha
Date: 2011-06-07 05:06:06 -0400 (Tue, 07 Jun 2011)
New Revision: 4483
Modified:
core/branches/2.3.x/patch/2.3.8/COR-228/COR-228.patch
Log:
COR-228: patch updated
Modified: core/branches/2.3.x/patch/2.3.8/COR-228/COR-228.patch
===================================================================
--- core/branches/2.3.x/patch/2.3.8/COR-228/COR-228.patch 2011-06-07 08:29:22 UTC (rev
4482)
+++ core/branches/2.3.x/patch/2.3.8/COR-228/COR-228.patch 2011-06-07 09:06:06 UTC (rev
4483)
@@ -40527,7 +40527,7 @@
Index:
exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/PDFDocumentReader.java
===================================================================
----
exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/PDFDocumentReader.java (revision
4476)
+---
exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/PDFDocumentReader.java (revision
4481)
+++
exo.core.component.document/src/main/java/org/exoplatform/services/document/impl/PDFDocumentReader.java (working
copy)
@@ -18,37 +18,31 @@
*/
@@ -40576,7 +40576,7 @@
/**
* Created by The eXo Platform SAS A parser of Adobe PDF files.
*
-@@ -168,204 +162,264 @@
+@@ -168,204 +162,269 @@
*/
public Properties getProperties(InputStream is) throws IOException,
DocumentReadException
{
@@ -40589,28 +40589,25 @@
- byte[] metadata = reader.getMetadata();
-
- if (metadata != null)
-- {
++ PDDocument pdDocument = PDDocument.load(is);
++ Properties props = new Properties();
++ try
+ {
- // there is XMP metadata try exctract it
- props = getPropertiesFromMetadata(metadata);
- }
-
- if (props == null)
-+ PDDocument pdDocument = PDDocument.load(is);
-+ Properties props = new Properties();
-+ try
- {
+- {
- // it's old pdf document version
- props = getPropertiesFromInfo(reader.getInfo());
- }
- reader.close();
- if (is != null)
- try
-- {
-- is.close();
-- }
-- catch (IOException e)
+ if (pdDocument.isEncrypted())
{
+- is.close();
+ try
+ {
+ pdDocument.decrypt("");
@@ -40624,8 +40621,16 @@
+ throw new DocumentReadException(e.getMessage(), e);
+ }
}
+- catch (IOException e)
++
++ PDDocumentCatalog catalog = pdDocument.getDocumentCatalog();
++ PDMetadata meta = catalog.getMetadata();
++ if (meta != null)
+ {
+- }
- return props;
- }
++ XMPMetadata metadata = meta.exportXMPMetadata();
- /**
- * Extract properties from XMP xml.
@@ -40637,45 +40642,6 @@
- */
- protected Properties getPropertiesFromMetadata(byte[] metadata) throws IOException,
DocumentReadException
- {
--
-- Properties props = null;
--
-- // parse xml
--
-- Document doc;
-- try
-- {
-- DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
-- DocumentBuilder docBuilder = dbf.newDocumentBuilder();
-- doc = docBuilder.parse(new ByteArrayInputStream(metadata));
-- }
-- catch (SAXException e)
-- {
-- throw new DocumentReadException(e.getMessage(), e);
-- }
-- catch (ParserConfigurationException e)
-- {
-- throw new DocumentReadException(e.getMessage(), e);
-- }
--
-- // Check is there PDF/A-1 XMP
-- String version = "";
-- NodeList list = doc.getElementsByTagName("pdfaid:conformance");
-- if (list != null && list.item(0) != null)
-- {
-- version += list.item(0).getTextContent() + "-";
-- }
-+ PDDocumentCatalog catalog = pdDocument.getDocumentCatalog();
-+ PDMetadata meta = catalog.getMetadata();
-+ if (meta != null)
-+ {
-+ XMPMetadata metadata = meta.exportXMPMetadata();
-
-- list = doc.getElementsByTagName("pdfaid:part");
-- if (list != null && list.item(0) != null)
-- {
-- version += list.item(0).getTextContent();
-- }
+ XMPSchemaDublinCore dc = metadata.getDublinCoreSchema();
+ if (dc != null)
+ {
@@ -40698,11 +40664,7 @@
+ log.warn("getSubject failed: " + e);
+ }
-- // PDF/A-1a or PDF/A-1b
-- if (version.equalsIgnoreCase("A-1"))
-- {
-- props = getPropsFromPDFAMetadata(doc);
-- }
+- Properties props = null;
+ try
+ {
+ if (dc.getCreators() != null)
@@ -40719,25 +40681,28 @@
+ log.warn("getCreator failed: " + e);
+ }
-- return props;
-- }
+- // parse xml
+ try
+ {
+ if (dc.getDates() != null)
+ {
+ List<Calendar> list = dc.getDates();
-- /**
-- * Extracts properties from PDF Info hash set.
-- *
-- * @param Pdf Info hash set
-- * @return Extracted properties
-- * @throws Exception if extracting fails
-- */
-- @SuppressWarnings("unchecked")
-- protected Properties getPropertiesFromInfo(HashMap info) throws IOException
-- {
-- Properties props = new Properties();
+- Document doc;
+- try
+- {
+- DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
+- DocumentBuilder docBuilder = dbf.newDocumentBuilder();
+- doc = docBuilder.parse(new ByteArrayInputStream(metadata));
+- }
+- catch (SAXException e)
+- {
+- throw new DocumentReadException(e.getMessage(), e);
+- }
+- catch (ParserConfigurationException e)
+- {
+- throw new DocumentReadException(e.getMessage(), e);
+- }
+ for (Calendar date : list)
+ {
+ props.put(DCMetaData.DATE, date);
@@ -40750,10 +40715,12 @@
+ }
+ }
-- String title = (String)info.get("Title");
-- if (title != null)
+- // Check is there PDF/A-1 XMP
+- String version = "";
+- NodeList list = doc.getElementsByTagName("pdfaid:conformance");
+- if (list != null && list.item(0) != null)
- {
-- props.put(DCMetaData.TITLE, title);
+- version += list.item(0).getTextContent() + "-";
- }
+ XMPSchemaBasic basic = metadata.getBasicSchema();
+ if (basic != null)
@@ -40779,10 +40746,10 @@
+ }
+ }
-- String author = (String)info.get("Author");
-- if (author != null)
+- list = doc.getElementsByTagName("pdfaid:part");
+- if (list != null && list.item(0) != null)
- {
-- props.put(DCMetaData.CREATOR, author);
+- version += list.item(0).getTextContent();
- }
+ if (props.isEmpty())
+ {
@@ -40790,10 +40757,10 @@
+ // usefull data, try to use the document information instead
+ PDDocumentInformation docInfo = pdDocument.getDocumentInformation();
-- String subject = (String)info.get("Subject");
-- if (subject != null)
+- // PDF/A-1a or PDF/A-1b
+- if (version.equalsIgnoreCase("A-1"))
- {
-- props.put(DCMetaData.SUBJECT, subject);
+- props = getPropsFromPDFAMetadata(doc);
- }
+ if (docInfo != null)
+ {
@@ -40818,10 +40785,25 @@
+ try
+ {
-- String creationDate = (String)info.get("CreationDate");
-- if (creationDate != null)
+- return props;
+- }
+-
+- /**
+- * Extracts properties from PDF Info hash set.
+- *
+- * @param Pdf Info hash set
+- * @return Extracted properties
+- * @throws Exception if extracting fails
+- */
+- @SuppressWarnings("unchecked")
+- protected Properties getPropertiesFromInfo(HashMap info) throws IOException
+- {
+- Properties props = new Properties();
+-
+- String title = (String)info.get("Title");
+- if (title != null)
- {
-- props.put(DCMetaData.DATE, PdfDate.decode(creationDate));
+- props.put(DCMetaData.TITLE, title);
+ if (docInfo.getKeywords() != null)
+ props.put(DCMetaData.SUBJECT, docInfo.getKeywords());
+ }
@@ -40860,22 +40842,40 @@
+ }
}
-
-- String modDate = (String)info.get("ModDate");
-- if (modDate != null)
+- String author = (String)info.get("Author");
+- if (author != null)
+ finally
{
-- props.put(DCMetaData.DATE, PdfDate.decode(modDate));
+- props.put(DCMetaData.CREATOR, author);
+ if (pdDocument != null)
+ {
+ pdDocument.close();
+ }
}
+- String subject = (String)info.get("Subject");
+- if (subject != null)
+- {
+- props.put(DCMetaData.SUBJECT, subject);
+- }
+-
+- String creationDate = (String)info.get("CreationDate");
+- if (creationDate != null)
+- {
+- props.put(DCMetaData.DATE, PdfDate.decode(creationDate));
+- }
+-
+- String modDate = (String)info.get("ModDate");
+- if (modDate != null)
+- {
+- props.put(DCMetaData.DATE, PdfDate.decode(modDate));
+- }
+-
return props;
}
- private Properties getPropsFromPDFAMetadata(Document doc) throws IOException,
DocumentReadException
-+ private String fixEncoding(String str)
++ private String fixEncoding(String str) throws DocumentReadException
{
- Properties props = new Properties();
- // get properties
@@ -40884,7 +40884,11 @@
+ try
{
- for (int i = 0; i < list.getLength(); i++)
-- {
++ String encoding = null;
++ int orderMaskOffset = 0;
++
++ if (str.startsWith("\\000\\000\\376\\377"))
+ {
-
- Node n = list.item(i);
- // dc:title - TITLE
@@ -40893,9 +40897,7 @@
- String title = n.getLastChild().getTextContent();
- props.put(DCMetaData.TITLE, title);
- }
-+ String encoding = null;
-+ int orderMaskOffset = 0;
-
+-
- // dc:creator - CREATOR
- if
(n.getParentNode().getParentNode().getNodeName().equals("dc:creator"))
- {
@@ -40910,8 +40912,6 @@
- props.put(DCMetaData.SUBJECT, description);
- // props.put(DCMetaData.DESCRIPTION, description);
- }
-+ if (str.startsWith("\\000\\000\\376\\377"))
-+ {
+ encoding = "UTF-32BE";
+ orderMaskOffset = 16;
}
@@ -40933,7 +40933,7 @@
- }
+ encoding = "UTF-32LE";
+ orderMaskOffset = 16;
-+ }
+ }
+ else if (str.startsWith("\\376\\377"))
+ {
+ encoding = "UTF-16BE";
@@ -40943,7 +40943,7 @@
+ {
+ encoding = "UTF-16LE";
+ orderMaskOffset = 8;
- }
++ }
- // xmp:ModifyDate - DATE
- list = doc.getElementsByTagName("xmp:ModifyDate");
@@ -40978,15 +40978,21 @@
+ }
+ catch (NumberFormatException e)
+ {
-+ log.warn(
-+ "PDF metadata exctraction warning: can not decode octal
code - "
-+ + str.substring(i - 1, i + 3) + ".", e);
++ if (log.isDebugEnabled())
++ {
++ log.debug(
++ "PDF metadata exctraction warning: can not decode
octal code - "
++ + str.substring(i - 1, i + 3) + ".", e);
++ }
+ }
+ }
+ else
+ {
-+ log.warn("PDF metadata exctraction warning: octal code is not
complete - "
-+ + str.substring(i - 1, len));
++ if (log.isDebugEnabled())
++ {
++ log.debug("PDF metadata exctraction warning: octal code is
not complete - "
++ + str.substring(i - 1, len));
++ }
+ }
+ }
+ sb.append(c);
@@ -40999,9 +41005,7 @@
- catch (ParseException e)
+ catch (UnsupportedEncodingException e)
{
-- throw new DocumentReadException(e.getMessage(), e);
-+ log.warn("PDF metadata exctraction warning: can not convert metadata
string " + str, e);
-+ return "";
+ throw new DocumentReadException(e.getMessage(), e);
}
- return props;
}