Author: rhauch
Date: 2008-11-11 23:35:25 -0500 (Tue, 11 Nov 2008)
New Revision: 620
Modified:
trunk/extensions/dna-sequencer-xml/src/main/java/org/jboss/dna/sequencer/xml/XmlSequencerHandler.java
Log:
DNA-231 - org.jboss.dna.graph.xml.XmlSequencerTest failed with jdk 1.6 (not with jdk 1.5)
http://jira.jboss.com/jira/browse/DNA-231
Apparently the SAX parser in Java 1.6.0_07 (build 1.6.0_07-b06-153) on OS-X 10.5.5 does
not call the handler methods in the proper order for entity references. For example, in
1.5 (and in 1.6 on Fedora), this will result in calls to 'startEntity(...)',
'characters(...)', and 'endEntity(...)'. Java 1.6.0_07 on OS-X results in
'startEntity(...)', 'endEntity(...)', and , 'characters(...)'
where the 'characters' method is called with character content representing the
replaced entity reference PLUS the next characters that would normally be supplied in the
subsequent call to 'characters'.
This was addressed by changing how the flag used by these methods to record which entity
reference is being processed, and by recording the replacement value when the entity
declaration is processed (so that we can separate the entity replacement value from the
'extra' characters).
New test cases were added to properly verify this behavior, and to ensure that the changes
work correctly. Tests were run in both Java 1.5 and 1.6 on both OS-X and on Fedora.
Modified:
trunk/extensions/dna-sequencer-xml/src/main/java/org/jboss/dna/sequencer/xml/XmlSequencerHandler.java
===================================================================
---
trunk/extensions/dna-sequencer-xml/src/main/java/org/jboss/dna/sequencer/xml/XmlSequencerHandler.java 2008-11-12
04:31:14 UTC (rev 619)
+++
trunk/extensions/dna-sequencer-xml/src/main/java/org/jboss/dna/sequencer/xml/XmlSequencerHandler.java 2008-11-12
04:35:25 UTC (rev 620)
@@ -126,6 +126,7 @@
private StringBuilder cDataContent;
private StringBuilder contentBuilder;
private final Problems problems;
+ private final Map<String, String> entityValues = new HashMap<String,
String>();
/**
* @param output
@@ -136,11 +137,11 @@
* @param scoping
*/
XmlSequencerHandler( SequencerOutput output,
- SequencerContext context,
- Name nameAttribute,
- Name defaultPrimaryType,
- TextDecoder textDecoder,
- XmlSequencer.AttributeScoping scoping ) {
+ SequencerContext context,
+ Name nameAttribute,
+ Name defaultPrimaryType,
+ TextDecoder textDecoder,
+ XmlSequencer.AttributeScoping scoping ) {
CheckArg.isNotNull(output, "output");
CheckArg.isNotNull(context, "context");
@@ -283,6 +284,8 @@
output.setProperty(currentPath, JcrLexicon.PRIMARY_TYPE, DnaDtdLexicon.ENTITY);
output.setProperty(currentPath, DnaDtdLexicon.NAME, name);
output.setProperty(currentPath, DnaDtdLexicon.VALUE, value);
+ // Record the name/value pair ...
+ entityValues.put(name, value);
endNode();
}
@@ -459,6 +462,18 @@
// into this method), we want to keep the entity reference ...
contentBuilder.append('&').append(currentEntityName).append(';');
+ // Normally, 'characters' is called with just the entity
replacement characters,
+ // and is called between 'startEntity' and 'endEntity'.
However, per DNA-231, some JVMs
+ // use an incorrect ordering: 'startEntity', 'endEntity'
and then 'characters', and the
+ // content passed to the 'characters' call not only includes the
entity replacement characters
+ // followed by other content. Look for this condition ...
+ String entityValue = entityValues.get(currentEntityName);
+ if (!content.equals(entityValue) && entityValue != null
&& entityValue.length() < content.length()) {
+ // Per DNA-231, there's extra content after the entity value. So
replace the entity value in the
+ // content with the entity reference (not the replacement
characters), and add the extra content ...
+ String extraContent = content.substring(entityValue.length());
+ contentBuilder.append(extraContent);
+ }
// We're done reading the entity characters, so null it out
currentEntityName = null;
} else {