Author: nbelaevski
Date: 2009-07-07 13:08:12 -0400 (Tue, 07 Jul 2009)
New Revision: 14813
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Anchor.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/AttVal.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/AttrCheck.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/AttrCheckImpl.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Attribute.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/AttributeTable.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Clean.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Configuration.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMAttrImpl.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMAttrMapImpl.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMCDATASectionImpl.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMCharacterDataImpl.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMCommentImpl.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMDocumentImpl.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMDocumentTypeImpl.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMElementImpl.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMNodeImpl.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMNodeListByTagNameImpl.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMNodeListImpl.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMProcessingInstructionImpl.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMTextImpl.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Dict.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/EncodingNameMapper.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/EncodingUtils.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Entity.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/EntityTable.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/IStack.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Lexer.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Node.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Out.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/OutFactory.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/OutImpl.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/OutJavaImpl.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/PPrint.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/ParseProperty.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/ParsePropertyImpl.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Parser.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/ParserImpl.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Report.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/StreamIn.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/StreamInFactory.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/StreamInImpl.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/StreamInJavaImpl.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Style.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/StyleProp.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/TagCheck.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/TagCheckImpl.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/TagTable.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Tidy.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/TidyBeanInfo.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/TidyLexerListener.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/TidyMessage.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/TidyMessageListener.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/TidyUtils.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/ValidUTF8Sequence.java
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/package-info.java
Log:
Reintegrated community/3.3.x into JSF 2.0 branch
Added: branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Anchor.java
===================================================================
--- branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Anchor.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Anchor.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,80 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+/**
+ * Anchor/node Linked list.
+ * @author hoehrmann
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public class Anchor
+{
+
+ /**
+ * Anchor name.
+ */
+ protected String name;
+
+ /**
+ * Next anchor.
+ */
+ protected Anchor next;
+
+ /**
+ * linked node.
+ */
+ protected Node node;
+
+}
\ No newline at end of file
Added: branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/AttVal.java
===================================================================
--- branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/AttVal.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/AttVal.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,423 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+import org.w3c.dom.Attr;
+
+
+/**
+ * Attribute/Value linked list node.
+ * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org
</a>
+ * @author Andy Quick <a
href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a>
(translation to Java)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public class AttVal extends Object implements Cloneable
+{
+
+ /**
+ * next AttVal.
+ */
+ protected AttVal next;
+
+ /**
+ * Attribute definition.
+ */
+ protected Attribute dict;
+
+ /**
+ * Asp node.
+ */
+ protected Node asp;
+
+ /**
+ * Php node.
+ */
+ protected Node php;
+
+ /**
+ * Delimiter (" or ').
+ */
+ protected int delim;
+
+ /**
+ * Attribute name.
+ */
+ protected String attribute;
+
+ /**
+ * Attribute value.
+ */
+ protected String value;
+
+ /**
+ * DOM adapter.
+ */
+ protected Attr adapter;
+
+ /**
+ * Instantiates a new empty AttVal.
+ */
+ public AttVal()
+ {
+ super();
+ }
+
+ /**
+ * Instantiates a new AttVal.
+ * @param next next linked AttVal
+ * @param dict Attribute from dictionary
+ * @param delim delimitator for attribute value
+ * @param attribute attribute name
+ * @param value attribute value
+ */
+ public AttVal(AttVal next, Attribute dict, int delim, String attribute, String
value)
+ {
+ this.next = next;
+ this.dict = dict;
+ this.delim = delim;
+ this.attribute = attribute;
+ this.value = value;
+ }
+
+ /**
+ * Instantiates a new AttVal.
+ * @param next next linked AttVal
+ * @param dict Attribute from dictionary
+ * @param asp contained asp node
+ * @param php contained php node
+ * @param delim delimitator for attribute value
+ * @param attribute attribute name
+ * @param value attribute value
+ */
+ public AttVal(AttVal next, Attribute dict, Node asp, Node php, int delim, String
attribute, String value)
+ {
+ this.next = next;
+ this.dict = dict;
+ this.asp = asp;
+ this.php = php;
+ this.delim = delim;
+ this.attribute = attribute;
+ this.value = value;
+ }
+
+ /**
+ * @see java.lang.Object#clone()
+ */
+ protected Object clone()
+ {
+ AttVal av = null;
+ try
+ {
+ av = (AttVal) super.clone();
+ }
+ catch (CloneNotSupportedException e)
+ {
+ // should never happen
+ }
+
+ if (this.next != null)
+ {
+ av.next = (AttVal) this.next.clone();
+ }
+ if (this.asp != null)
+ {
+ av.asp = (Node) this.asp.clone();
+ }
+ if (this.php != null)
+ {
+ av.php = (Node) this.php.clone();
+ }
+
+ return av;
+ }
+
+ /**
+ * Is this a boolean attribute.
+ * @return <code>true</code> if this is a boolean attribute
+ */
+ public boolean isBoolAttribute()
+ {
+ Attribute attr = this.dict;
+ if (attr != null)
+ {
+ if (attr.getAttrchk() == AttrCheckImpl.BOOL)
+ {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * Check the attribute value for uppercase letters (only if the value should be
lowercase, required for literal
+ * values in xhtml).
+ * @param lexer Lexer
+ * @param node Node which contains this attribute
+ */
+ void checkLowerCaseAttrValue(Lexer lexer, Node node)
+ {
+ if (this.value == null)
+ {
+ return;
+ }
+
+ String lowercase = this.value.toLowerCase();
+
+ if (!this.value.equals(lowercase))
+ {
+ if (lexer.isvoyager)
+ {
+ lexer.report.attrError(lexer, node, this, Report.ATTR_VALUE_NOT_LCASE);
+ }
+
+ if (lexer.isvoyager || lexer.configuration.lowerLiterals)
+ {
+ this.value = lowercase;
+ }
+ }
+ }
+
+ /**
+ * Check attribute name/value and report errors.
+ * @param lexer Lexer
+ * @param node node which contains this attribute
+ * @return Attribute
+ */
+ public Attribute checkAttribute(Lexer lexer, Node node)
+ {
+ TagTable tt = lexer.configuration.tt;
+
+ Attribute attr = this.dict;
+
+ // ignore unknown attributes for proprietary elements
+ if (attr != null)
+ {
+
+ // if attribute looks like <foo/> check XML is ok
+ if (TidyUtils.toBoolean(attr.getVersions() & Dict.VERS_XML))
+ {
+ if (!(lexer.configuration.xmlTags || lexer.configuration.xmlOut))
+ {
+ lexer.report.attrError(lexer, node, this,
Report.XML_ATTRIBUTE_VALUE);
+ }
+ }
+ // title first appeared in HTML 4.0 except for a/link
+ else if (attr != AttributeTable.attrTitle || !(node.tag == tt.tagA ||
node.tag == tt.tagLink))
+ {
+ lexer.constrainVersion(attr.getVersions());
+ }
+
+ if (attr.getAttrchk() != null)
+ {
+ attr.getAttrchk().check(lexer, node, this);
+ }
+ else if (TidyUtils.toBoolean(this.dict.getVersions() &
Dict.VERS_PROPRIETARY))
+ {
+ lexer.report.attrError(lexer, node, this, Report.PROPRIETARY_ATTRIBUTE);
+ }
+
+ }
+ else if (!lexer.configuration.xmlTags
+ && !(node.tag == null)
+ && this.asp == null
+ && !(node.tag != null &&
(TidyUtils.toBoolean(node.tag.versions & Dict.VERS_PROPRIETARY))))
+ {
+ lexer.report.attrError(lexer, node, this, Report.UNKNOWN_ATTRIBUTE);
+ }
+
+ return attr;
+ }
+
+ /**
+ * Return the org.w3c.dom.Attr adapter.
+ * @return org.w3c.dom.Attr adapter
+ */
+ protected org.w3c.dom.Attr getAdapter()
+ {
+ if (this.adapter == null)
+ {
+ this.adapter = new DOMAttrImpl(this);
+ }
+ return this.adapter;
+ }
+
+ /**
+ * Getter for <code>asp</code>.
+ * @return Returns the asp.
+ */
+ public Node getAsp()
+ {
+ return this.asp;
+ }
+
+ /**
+ * Setter for <code>asp</code>.
+ * @param asp The asp to set.
+ */
+ public void setAsp(Node asp)
+ {
+ this.asp = asp;
+ }
+
+ /**
+ * Getter for <code>attribute</code>.
+ * @return Returns the attribute.
+ */
+ public String getAttribute()
+ {
+ return this.attribute;
+ }
+
+ /**
+ * Setter for <code>attribute</code>.
+ * @param attribute The attribute to set.
+ */
+ public void setAttribute(String attribute)
+ {
+ this.attribute = attribute;
+ }
+
+ /**
+ * Getter for <code>delim</code>.
+ * @return Returns the delim.
+ */
+ public int getDelim()
+ {
+ return this.delim;
+ }
+
+ /**
+ * Setter for <code>delim</code>.
+ * @param delim The delim to set.
+ */
+ public void setDelim(int delim)
+ {
+ this.delim = delim;
+ }
+
+ /**
+ * Getter for <code>dict</code>.
+ * @return Returns the dict.
+ */
+ public Attribute getDict()
+ {
+ return this.dict;
+ }
+
+ /**
+ * Setter for <code>dict</code>.
+ * @param dict The dict to set.
+ */
+ public void setDict(Attribute dict)
+ {
+ this.dict = dict;
+ }
+
+ /**
+ * Getter for <code>next</code>.
+ * @return Returns the next.
+ */
+ public AttVal getNext()
+ {
+ return this.next;
+ }
+
+ /**
+ * Setter for <code>next</code>.
+ * @param next The next to set.
+ */
+ public void setNext(AttVal next)
+ {
+ this.next = next;
+ }
+
+ /**
+ * Getter for <code>php</code>.
+ * @return Returns the php.
+ */
+ public Node getPhp()
+ {
+ return this.php;
+ }
+
+ /**
+ * Setter for <code>php</code>.
+ * @param php The php to set.
+ */
+ public void setPhp(Node php)
+ {
+ this.php = php;
+ }
+
+ /**
+ * Getter for <code>value</code>.
+ * @return Returns the value.
+ */
+ public String getValue()
+ {
+ return this.value;
+ }
+
+ /**
+ * Setter for <code>value</code>.
+ * @param value The value to set.
+ */
+ public void setValue(String value)
+ {
+ this.value = value;
+ }
+
+}
\ No newline at end of file
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/AttrCheck.java
===================================================================
--- branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/AttrCheck.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/AttrCheck.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,74 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+/**
+ * Check attribute values.
+ * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org
</a>
+ * @author Andy Quick <a
href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a>
(translation to Java)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public interface AttrCheck
+{
+
+ /**
+ * Check the value of an attribute.
+ * @param lexer Lexer
+ * @param node current Node
+ * @param attval attribute value
+ */
+ void check(Lexer lexer, Node node, AttVal attval);
+
+}
\ No newline at end of file
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/AttrCheckImpl.java
===================================================================
---
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/AttrCheckImpl.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/AttrCheckImpl.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,1105 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Map.Entry;
+
+
+/**
+ * Check attribute values implementations.
+ * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org
</a>
+ * @author Andy Quick <a
href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a>
(translation to Java)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public final class AttrCheckImpl
+{
+
+ /**
+ * checker for URLs.
+ */
+ public static final AttrCheck URL = new CheckUrl();
+
+ /**
+ * checker for scripts.
+ */
+ public static final AttrCheck SCRIPT = new CheckScript();
+
+ /**
+ * checker for "name" attribute.
+ */
+ public static final AttrCheck NAME = new CheckName();
+
+ /**
+ * checker for ids.
+ */
+ public static final AttrCheck ID = new CheckId();
+
+ /**
+ * checker for "align" attribute.
+ */
+ public static final AttrCheck ALIGN = new CheckAlign();
+
+ /**
+ * checker for "valign" attribute.
+ */
+ public static final AttrCheck VALIGN = new CheckValign();
+
+ /**
+ * checker for boolean attributes.
+ */
+ public static final AttrCheck BOOL = new CheckBool();
+
+ /**
+ * checker for "lenght" attribute.
+ */
+ public static final AttrCheck LENGTH = new CheckLength();
+
+ /**
+ * checker for "target" attribute.
+ */
+ public static final AttrCheck TARGET = new CheckTarget();
+
+ /**
+ * checker for "submit" attribute.
+ */
+ public static final AttrCheck FSUBMIT = new CheckFsubmit();
+
+ /**
+ * checker for "clear" attribute.
+ */
+ public static final AttrCheck CLEAR = new CheckClear();
+
+ /**
+ * checker for "shape" attribute.
+ */
+ public static final AttrCheck SHAPE = new CheckShape();
+
+ /**
+ * checker for "number" attribute.
+ */
+ public static final AttrCheck NUMBER = new CheckNumber();
+
+ /**
+ * checker for "scope" attribute.
+ */
+ public static final AttrCheck SCOPE = new CheckScope();
+
+ /**
+ * checker for "color" attribute.
+ */
+ public static final AttrCheck COLOR = new CheckColor();
+
+ /**
+ * checker for "vtype" attribute.
+ */
+ public static final AttrCheck VTYPE = new CheckVType();
+
+ /**
+ * checker for "scroll" attribute.
+ */
+ public static final AttrCheck SCROLL = new CheckScroll();
+
+ /**
+ * checker for "dir" attribute.
+ */
+ public static final AttrCheck TEXTDIR = new CheckTextDir();
+
+ /**
+ * checker for "lang" and "xml:lang" attributes.
+ */
+ public static final AttrCheck LANG = new CheckLang();
+
+ /**
+ * checker for text attributes. Actually null (no validation).
+ */
+ public static final AttrCheck TEXT = null;
+
+ /**
+ * checker for "charset" attribute. Actually null (no validation).
+ */
+ public static final AttrCheck CHARSET = null;
+
+ /**
+ * checker for "type" attribute. Actually null (no validation).
+ */
+ public static final AttrCheck TYPE = null;
+
+ /**
+ * checker for attributes that can contain a single character. Actually null (no
validation).
+ */
+ public static final AttrCheck CHARACTER = null;
+
+ /**
+ * checker for attributes which contain a list of urls. Actually null (no
validation).
+ */
+ public static final AttrCheck URLS = null;
+
+ /**
+ * checker for "cols" attribute. Actually null (no validation).
+ */
+ public static final AttrCheck COLS = null;
+
+ /**
+ * checker for "coords" attribute. Actually null (no validation).
+ */
+ public static final AttrCheck COORDS = null;
+
+ /**
+ * checker for attributes containing dates. Actually null (no validation).
+ */
+ public static final AttrCheck DATE = null;
+
+ /**
+ * checker for attributes referencng an id. Actually null (no validation).
+ */
+ public static final AttrCheck IDREF = null;
+
+ /**
+ * checker for table "frame" attribute. Actually null (no validation).
+ */
+ public static final AttrCheck TFRAME = null;
+
+ /**
+ * checker for "frameborder" attribute. Actually null (no validation).
+ */
+ public static final AttrCheck FBORDER = null;
+
+ /**
+ * checker for "media" attribute. Actually null (no validation).
+ */
+ public static final AttrCheck MEDIA = null;
+
+ /**
+ * checker for "rel" and "rev" attributes. Actually null (no
validation).
+ */
+ public static final AttrCheck LINKTYPES = null;
+
+ /**
+ * checker for table "rules" attribute. Actually null (no validation).
+ */
+ public static final AttrCheck TRULES = null;
+
+ /**
+ * utility class, don't instantiate.
+ */
+ private AttrCheckImpl()
+ {
+ // empty private constructor
+ }
+
+ /**
+ * AttrCheck implementation for checking URLs.
+ */
+ public static class CheckUrl implements AttrCheck
+ {
+
+ /**
+ * @see AttrCheck#check(Lexer, Node, AttVal)
+ */
+ public void check(Lexer lexer, Node node, AttVal attval)
+ {
+ char c;
+ StringBuffer dest;
+ boolean escapeFound = false;
+ boolean backslashFound = false;
+ int i = 0;
+
+ if (attval.value == null)
+ {
+ lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE);
+ return;
+ }
+
+ String p = attval.value;
+
+ for (i = 0; i < p.length(); ++i)
+ {
+ c = p.charAt(i);
+ // find \
+ if (c == '\\')
+ {
+ backslashFound = true;
+ }
+ // find non-ascii chars
+ else if ((c > 0x7e) || (c <= 0x20) || (c == '<') || (c
== '>'))
+ {
+ escapeFound = true;
+ }
+ }
+
+ // backslashes found, fix them
+ if (lexer.configuration.fixBackslash && backslashFound)
+ {
+ attval.value = attval.value.replace('\\', '/');
+ p = attval.value;
+ }
+
+ // non-ascii chars found, fix them
+ if (lexer.configuration.fixUri && escapeFound)
+ {
+ dest = new StringBuffer();
+
+ for (i = 0; i < p.length(); ++i)
+ {
+ c = p.charAt(i);
+ if ((c > 0x7e) || (c <= 0x20) || (c == '<') || (c ==
'>'))
+ {
+ dest.append('%');
+ dest.append(Integer.toHexString(c).toUpperCase());
+ }
+ else
+ {
+ dest.append(c);
+ }
+ }
+
+ attval.value = dest.toString();
+ }
+ if (backslashFound)
+ {
+ if (lexer.configuration.fixBackslash)
+ {
+ lexer.report.attrError(lexer, node, attval, Report.FIXED_BACKSLASH);
+ }
+ else
+ {
+ lexer.report.attrError(lexer, node, attval,
Report.BACKSLASH_IN_URI);
+ }
+ }
+ if (escapeFound)
+ {
+ if (lexer.configuration.fixUri)
+ {
+ lexer.report.attrError(lexer, node, attval,
Report.ESCAPED_ILLEGAL_URI);
+ }
+ else
+ {
+ lexer.report.attrError(lexer, node, attval,
Report.ILLEGAL_URI_REFERENCE);
+ }
+
+ lexer.badChars |= Report.INVALID_URI;
+ }
+
+ }
+ }
+
+ /**
+ * AttrCheck implementation for checking scripts.
+ */
+ public static class CheckScript implements AttrCheck
+ {
+
+ /**
+ * @see AttrCheck#check(Lexer, Node, AttVal)
+ */
+ public void check(Lexer lexer, Node node, AttVal attval)
+ {
+ // not implemented
+ }
+
+ }
+
+ /**
+ * AttrCheck implementation for checking the "align" attribute.
+ */
+ public static class CheckAlign implements AttrCheck
+ {
+
+ /**
+ * valid values for this attribute.
+ */
+ private static final String[] VALID_VALUES = new String[]{"left",
"center", "right", "justify"};
+
+ /**
+ * @see AttrCheck#check(Lexer, Node, AttVal)
+ */
+ public void check(Lexer lexer, Node node, AttVal attval)
+ {
+ // IMG, OBJECT, APPLET and EMBED use align for vertical position
+ if (node.tag != null && ((node.tag.model & Dict.CM_IMG) != 0))
+ {
+ VALIGN.check(lexer, node, attval);
+ return;
+ }
+
+ if (attval.value == null)
+ {
+ lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE);
+ return;
+ }
+
+ attval.checkLowerCaseAttrValue(lexer, node);
+
+ if (!TidyUtils.isInValuesIgnoreCase(VALID_VALUES, attval.value))
+ {
+ lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE);
+ }
+ }
+
+ }
+
+ /**
+ * AttrCheck implementation for checking the "valign" attribute.
+ */
+ public static class CheckValign implements AttrCheck
+ {
+
+ /**
+ * valid values for this attribute.
+ */
+ private static final String[] VALID_VALUES = new String[]{"top",
"middle", "bottom", "baseline"};
+
+ /**
+ * valid values for this attribute (only for img tag).
+ */
+ private static final String[] VALID_VALUES_IMG = new String[]{"left",
"right"};
+
+ /**
+ * proprietary values for this attribute.
+ */
+ private static final String[] VALID_VALUES_PROPRIETARY = new String[]{
+ "texttop",
+ "absmiddle",
+ "absbottom",
+ "textbottom"};
+
+ /**
+ * @see AttrCheck#check(Lexer, Node, AttVal)
+ */
+ public void check(Lexer lexer, Node node, AttVal attval)
+ {
+ String value;
+
+ if (attval.value == null)
+ {
+ lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE);
+ return;
+ }
+
+ attval.checkLowerCaseAttrValue(lexer, node);
+
+ value = attval.value;
+
+ if (TidyUtils.isInValuesIgnoreCase(VALID_VALUES, value))
+ {
+ // all is fine
+ return;
+ }
+
+ if (TidyUtils.isInValuesIgnoreCase(VALID_VALUES_IMG, value))
+ {
+ if (!(node.tag != null && ((node.tag.model & Dict.CM_IMG) !=
0)))
+ {
+ lexer.report.attrError(lexer, node, attval,
Report.BAD_ATTRIBUTE_VALUE);
+ }
+ }
+ else if (TidyUtils.isInValuesIgnoreCase(VALID_VALUES_PROPRIETARY, value))
+ {
+ lexer.constrainVersion(Dict.VERS_PROPRIETARY);
+ lexer.report.attrError(lexer, node, attval,
Report.PROPRIETARY_ATTR_VALUE);
+ }
+ else
+ {
+ lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE);
+ }
+ }
+
+ }
+
+ /**
+ * AttrCheck implementation for checking boolean attributes.
+ */
+ public static class CheckBool implements AttrCheck
+ {
+
+ /**
+ * @see AttrCheck#check(Lexer, Node, AttVal)
+ */
+ public void check(Lexer lexer, Node node, AttVal attval)
+ {
+ if (attval.value == null)
+ {
+ return;
+ }
+
+ attval.checkLowerCaseAttrValue(lexer, node);
+ }
+
+ }
+
+ /**
+ * AttrCheck implementation for checking the "length" attribute.
+ */
+ public static class CheckLength implements AttrCheck
+ {
+
+ /**
+ * @see AttrCheck#check(Lexer, Node, AttVal)
+ */
+ public void check(Lexer lexer, Node node, AttVal attval)
+ {
+
+ if (attval.value == null)
+ {
+ lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE);
+ return;
+ }
+
+ // don't check for <col width=...> and <colgroup width=...>
+ if ("width".equalsIgnoreCase(attval.attribute)
+ && (node.tag == lexer.configuration.tt.tagCol || node.tag ==
lexer.configuration.tt.tagColgroup))
+ {
+ return;
+ }
+
+ String p = attval.value;
+
+ if (p.length() == 0 || (!Character.isDigit(p.charAt(0)) &&
!('%' == p.charAt(0))))
+ {
+ lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE);
+ }
+ else
+ {
+
+ TagTable tt = lexer.configuration.tt;
+
+ for (int j = 1; j < p.length(); j++)
+ {
+ // elements th and td must not use percentages
+ if ((!Character.isDigit(p.charAt(j)) && (node.tag == tt.tagTd
|| node.tag == tt.tagTh))
+ || (!Character.isDigit(p.charAt(j)) && p.charAt(j) !=
'%'))
+ {
+ lexer.report.attrError(lexer, node, attval,
Report.BAD_ATTRIBUTE_VALUE);
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * AttrCheck implementation for checking the "target" attribute.
+ */
+ public static class CheckTarget implements AttrCheck
+ {
+
+ /**
+ * valid values for this attribute.
+ */
+ private static final String[] VALID_VALUES = new String[]{"_blank",
"_self", "_parent", "_top"};
+
+ /**
+ * @see AttrCheck#check(Lexer, Node, AttVal)
+ */
+ public void check(Lexer lexer, Node node, AttVal attval)
+ {
+
+ // No target attribute in strict HTML versions
+ lexer.constrainVersion(~Dict.VERS_HTML40_STRICT);
+
+ if (attval.value == null || attval.value.length() == 0)
+ {
+ lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE);
+ return;
+ }
+
+ String value = attval.value;
+
+ // target names must begin with A-Za-z ...
+ if (Character.isLetter(value.charAt(0)))
+ {
+ return;
+ }
+
+ // or be one of _blank, _self, _parent and _top
+ if (!TidyUtils.isInValuesIgnoreCase(VALID_VALUES, value))
+ {
+ lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE);
+ }
+
+ }
+ }
+
+ /**
+ * AttrCheck implementation for checking the "submit" attribute.
+ */
+ public static class CheckFsubmit implements AttrCheck
+ {
+
+ /**
+ * valid values for this attribute.
+ */
+ private static final String[] VALID_VALUES = new String[]{"get",
"post"};
+
+ /**
+ * @see AttrCheck#check(Lexer, Node, AttVal)
+ */
+ public void check(Lexer lexer, Node node, AttVal attval)
+ {
+ if (attval.value == null)
+ {
+ lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE);
+ return;
+ }
+
+ attval.checkLowerCaseAttrValue(lexer, node);
+
+ if (!TidyUtils.isInValuesIgnoreCase(VALID_VALUES, attval.value))
+ {
+ lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE);
+ }
+ }
+ }
+
+ /**
+ * AttrCheck implementation for checking the "clear" attribute.
+ */
+ public static class CheckClear implements AttrCheck
+ {
+
+ /**
+ * valid values for this attribute.
+ */
+ private static final String[] VALID_VALUES = new String[]{"none",
"left", "right", "all"};
+
+ /**
+ * @see AttrCheck#check(Lexer, Node, AttVal)
+ */
+ public void check(Lexer lexer, Node node, AttVal attval)
+ {
+ if (attval.value == null)
+ {
+ lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE);
+ attval.value = VALID_VALUES[0];
+ return;
+ }
+
+ attval.checkLowerCaseAttrValue(lexer, node);
+
+ if (!TidyUtils.isInValuesIgnoreCase(VALID_VALUES, attval.value))
+ {
+ lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE);
+ }
+
+ }
+ }
+
+ /**
+ * AttrCheck implementation for checking the "shape" attribute.
+ */
+ public static class CheckShape implements AttrCheck
+ {
+
+ /**
+ * valid values for this attribute.
+ */
+ private static final String[] VALID_VALUES = new String[]{"rect",
"default", "circle", "poly"};
+
+ /**
+ * @see AttrCheck#check(Lexer, Node, AttVal)
+ */
+ public void check(Lexer lexer, Node node, AttVal attval)
+ {
+ if (attval.value == null)
+ {
+ lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE);
+ return;
+ }
+
+ attval.checkLowerCaseAttrValue(lexer, node);
+
+ if (!TidyUtils.isInValuesIgnoreCase(VALID_VALUES, attval.value))
+ {
+ lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE);
+ }
+
+ }
+ }
+
+ /**
+ * AttrCheck implementation for checking Scope.
+ */
+ public static class CheckScope implements AttrCheck
+ {
+
+ /**
+ * valid values for this attribute.
+ */
+ private static final String[] VALID_VALUES = new String[]{"row",
"rowgroup", "col", "colgroup"};
+
+ /**
+ * @see AttrCheck#check(Lexer, Node, AttVal)
+ */
+ public void check(Lexer lexer, Node node, AttVal attval)
+ {
+
+ if (attval.value == null)
+ {
+ lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE);
+ return;
+ }
+
+ attval.checkLowerCaseAttrValue(lexer, node);
+
+ if (!TidyUtils.isInValuesIgnoreCase(VALID_VALUES, attval.value))
+ {
+ lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE);
+ }
+ }
+ }
+
+ /**
+ * AttrCheck implementation for checking numbers.
+ */
+ public static class CheckNumber implements AttrCheck
+ {
+
+ /**
+ * @see AttrCheck#check(Lexer, Node, AttVal)
+ */
+ public void check(Lexer lexer, Node node, AttVal attval)
+ {
+
+ if (attval.value == null)
+ {
+ lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE);
+ return;
+ }
+
+ // don't check <frameset cols=... rows=...>
+ if (("cols".equalsIgnoreCase(attval.attribute) ||
"rows".equalsIgnoreCase(attval.attribute))
+ && node.tag == lexer.configuration.tt.tagFrameset)
+ {
+ return;
+ }
+
+ String value = attval.value;
+
+ int j = 0;
+
+ // font size may be preceded by + or -
+ if (node.tag == lexer.configuration.tt.tagFont &&
(value.startsWith("+") || value.startsWith("-")))
+ {
+ ++j;
+ }
+
+ for (; j < value.length(); j++)
+ {
+ char p = value.charAt(j);
+ if (!Character.isDigit(p))
+ {
+ lexer.report.attrError(lexer, node, attval,
Report.BAD_ATTRIBUTE_VALUE);
+ break;
+ }
+ }
+ }
+ }
+
+ /**
+ * AttrCheck implementation for checking ids.
+ */
+ public static class CheckId implements AttrCheck
+ {
+
+ /**
+ * @see AttrCheck#check(Lexer, Node, AttVal)
+ */
+ public void check(Lexer lexer, Node node, AttVal attval)
+ {
+ Node old;
+
+ if (attval.value == null || attval.value.length() == 0)
+ {
+ lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE);
+ return;
+ }
+
+ String p = attval.value;
+ char s = p.charAt(0);
+
+ if (p.length() == 0 || !( Character.isLetter( s ) || s=='_' ))
+ {
+ if (lexer.isvoyager && (TidyUtils.isXMLLetter(s) || s ==
':'))
+ {
+ lexer.report.attrError(lexer, node, attval, Report.XML_ID_SYNTAX);
+ }
+ else
+ {
+ lexer.report.attrError(lexer, node, attval,
Report.BAD_ATTRIBUTE_VALUE);
+ }
+ }
+ else
+ {
+
+ for (int j = 1; j < p.length(); j++)
+ {
+ s = p.charAt(j);
+
+ if (!TidyUtils.isNamechar(s))
+ {
+ if (lexer.isvoyager && TidyUtils.isXMLNamechar(s))
+ {
+ lexer.report.attrError(lexer, node, attval,
Report.XML_ID_SYNTAX);
+ }
+ else
+ {
+ lexer.report.attrError(lexer, node, attval,
Report.BAD_ATTRIBUTE_VALUE);
+ }
+ break;
+ }
+ }
+ }
+
+ if (((old = lexer.configuration.tt.getNodeByAnchor(attval.value)) != null)
&& old != node)
+ {
+ lexer.report.attrError(lexer, node, attval, Report.ANCHOR_NOT_UNIQUE);
+ }
+ else
+ {
+ lexer.configuration.tt.anchorList =
lexer.configuration.tt.addAnchor(attval.value, node);
+ }
+ }
+
+ }
+
+ /**
+ * AttrCheck implementation for checking the "name" attribute.
+ */
+ public static class CheckName implements AttrCheck
+ {
+
+ /**
+ * @see AttrCheck#check(Lexer, Node, AttVal)
+ */
+ public void check(Lexer lexer, Node node, AttVal attval)
+ {
+ Node old;
+
+ if (attval.value == null)
+ {
+ lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE);
+ return;
+ }
+ else if (lexer.configuration.tt.isAnchorElement(node))
+ {
+ lexer.constrainVersion(~Dict.VERS_XHTML11);
+
+ if (((old = lexer.configuration.tt.getNodeByAnchor(attval.value)) !=
null) && old != node)
+ {
+ lexer.report.attrError(lexer, node, attval,
Report.ANCHOR_NOT_UNIQUE);
+ }
+ else
+ {
+ lexer.configuration.tt.anchorList =
lexer.configuration.tt.addAnchor(attval.value, node);
+ }
+ }
+ }
+
+ }
+
+ /**
+ * AttrCheck implementation for checking colors.
+ */
+ public static class CheckColor implements AttrCheck
+ {
+
+ /**
+ * valid html colors.
+ */
+ private static final Map<String, String> COLORS = new HashMap<String,
String>();
+
+ static
+ {
+ COLORS.put("black", "#000000");
+ COLORS.put("green", "#008000");
+ COLORS.put("silver", "#C0C0C0");
+ COLORS.put("lime", "#00FF00");
+ COLORS.put("gray", "#808080");
+ COLORS.put("olive", "#808000");
+ COLORS.put("white", "#FFFFFF");
+ COLORS.put("yellow", "#FFFF00");
+ COLORS.put("maroon", "#800000");
+ COLORS.put("navy", "#000080");
+ COLORS.put("red", "#FF0000");
+ COLORS.put("blue", "#0000FF");
+ COLORS.put("purple", "#800080");
+ COLORS.put("teal", "#008080");
+ COLORS.put("fuchsia", "#FF00FF");
+ COLORS.put("aqua", "#00FFFF");
+ }
+
+ /**
+ * @see AttrCheck#check(Lexer, Node, AttVal)
+ */
+ public void check(Lexer lexer, Node node, AttVal attval)
+ {
+ boolean hexUppercase = true;
+ boolean invalid = false;
+ boolean found = false;
+
+ if (attval.value == null || attval.value.length() == 0)
+ {
+ lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE);
+ return;
+ }
+
+ String given = attval.value;
+
+ Iterator<Entry<String, String>> colorIter =
COLORS.entrySet().iterator();
+
+ while (colorIter.hasNext())
+ {
+ Entry<String,String> color = colorIter.next();
+
+ if (given.charAt(0) == '#')
+ {
+ if (given.length() != 7)
+ {
+ lexer.report.attrError(lexer, node, attval,
Report.BAD_ATTRIBUTE_VALUE);
+ invalid = true;
+ break;
+ }
+ else if (given.equalsIgnoreCase((String) color.getValue()))
+ {
+ if (lexer.configuration.replaceColor)
+ {
+ attval.value = (String) color.getKey();
+ }
+ found = true;
+ break;
+ }
+ }
+ else if (TidyUtils.isLetter(given.charAt(0)))
+ {
+ if (given.equalsIgnoreCase((String) color.getKey()))
+ {
+ if (lexer.configuration.replaceColor)
+ {
+ attval.value = (String) color.getKey();
+ }
+ found = true;
+ break;
+ }
+ }
+ else
+ {
+
+ lexer.report.attrError(lexer, node, attval,
Report.BAD_ATTRIBUTE_VALUE);
+
+ invalid = true;
+ break;
+ }
+ }
+ if (!found && !invalid)
+ {
+ if (given.charAt(0) == '#')
+ {
+ // check if valid hex digits and letters
+
+ for (int i = 1; i < 7; ++i)
+ {
+ if (!TidyUtils.isDigit(given.charAt(i))
+ &&
("abcdef".indexOf(Character.toLowerCase(given.charAt(i))) == -1))
+ {
+ lexer.report.attrError(lexer, node, attval,
Report.BAD_ATTRIBUTE_VALUE);
+ invalid = true;
+ break;
+ }
+ }
+ // convert hex letters to uppercase
+ if (!invalid && hexUppercase)
+ {
+ for (int i = 1; i < 7; ++i)
+ {
+ attval.value = given.toUpperCase();
+ }
+ }
+ }
+
+ else
+ {
+ // we could search for more colors and mark the file as HTML
Proprietary, but I don't thinks
+ // it's worth the effort, so values not in HTML 4.01 are invalid
+ lexer.report.attrError(lexer, node, attval,
Report.BAD_ATTRIBUTE_VALUE);
+ invalid = true;
+ }
+ }
+ }
+ }
+
+ /**
+ * AttrCheck implementation for checking valuetype.
+ */
+ public static class CheckVType implements AttrCheck
+ {
+
+ /**
+ * valid values for this attribute.
+ */
+ private static final String[] VALID_VALUES = new String[]{"data",
"object", "ref"};
+
+ /**
+ * @see AttrCheck#check(Lexer, Node, AttVal)
+ */
+ public void check(Lexer lexer, Node node, AttVal attval)
+ {
+ if (attval.value == null)
+ {
+ lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE);
+ return;
+ }
+
+ attval.checkLowerCaseAttrValue(lexer, node);
+
+ if (!TidyUtils.isInValuesIgnoreCase(VALID_VALUES, attval.value))
+ {
+ lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE);
+ }
+ }
+ }
+
+ /**
+ * AttrCheck implementation for checking scroll.
+ */
+ public static class CheckScroll implements AttrCheck
+ {
+
+ /**
+ * valid values for this attribute.
+ */
+ private static final String[] VALID_VALUES = new String[]{"no",
"yes", "auto"};
+
+ /**
+ * @see AttrCheck#check(Lexer, Node, AttVal)
+ */
+ public void check(Lexer lexer, Node node, AttVal attval)
+ {
+
+ if (attval.value == null)
+ {
+ lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE);
+ return;
+ }
+
+ attval.checkLowerCaseAttrValue(lexer, node);
+
+ if (!TidyUtils.isInValuesIgnoreCase(VALID_VALUES, attval.value))
+ {
+ lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE);
+ }
+ }
+ }
+
+ /**
+ * AttrCheck implementation for checking dir.
+ */
+ public static class CheckTextDir implements AttrCheck
+ {
+
+ /**
+ * valid values for this attribute.
+ */
+ private static final String[] VALID_VALUES = new String[]{"rtl",
"ltr"};
+
+ /**
+ * @see AttrCheck#check(Lexer, Node, AttVal)
+ */
+ public void check(Lexer lexer, Node node, AttVal attval)
+ {
+
+ if (attval.value == null)
+ {
+ lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE);
+ return;
+ }
+
+ attval.checkLowerCaseAttrValue(lexer, node);
+
+ if (!TidyUtils.isInValuesIgnoreCase(VALID_VALUES, attval.value))
+ {
+ lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE);
+ }
+ }
+ }
+
+ /**
+ * AttrCheck implementation for checking lang and xml:lang.
+ */
+ public static class CheckLang implements AttrCheck
+ {
+
+ /**
+ * @see AttrCheck#check(Lexer, Node, AttVal)
+ */
+ public void check(Lexer lexer, Node node, AttVal attval)
+ {
+
+ if ("lang".equals(attval.attribute))
+ {
+ lexer.constrainVersion(~Dict.VERS_XHTML11);
+ }
+
+ if (attval.value == null)
+ {
+ lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE);
+ return;
+ }
+ }
+ }
+
+}
\ No newline at end of file
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Attribute.java
===================================================================
--- branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Attribute.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Attribute.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,168 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+/**
+ * HTML attribute.
+ * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org
</a>
+ * @author Andy Quick <a
href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a>
(translation to Java)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public class Attribute
+{
+
+ /**
+ * attribute name.
+ */
+ private String name;
+
+ /**
+ * don't wrap attribute.
+ */
+ private boolean nowrap;
+
+ /**
+ * unmodifiable attribute?
+ */
+ private boolean literal;
+
+ /**
+ * html versions for this attribute.
+ */
+ private short versions;
+
+ /**
+ * checker for the attribute.
+ */
+ private AttrCheck attrchk;
+
+ /**
+ * Instantiates a new Attribute.
+ * @param attributeName attribute name
+ * @param htmlVersions versions in which this attribute is supported
+ * @param check AttrCheck instance
+ */
+ public Attribute(String attributeName, short htmlVersions, AttrCheck check)
+ {
+ this.name = attributeName;
+ this.versions = htmlVersions;
+ this.attrchk = check;
+ }
+
+ /**
+ * Is this a literal (unmodifiable) attribute?
+ * @param isLiteral boolean <code>true</code> for a literal attribute
+ */
+ public void setLiteral(boolean isLiteral)
+ {
+ this.literal = isLiteral;
+ }
+
+ /**
+ * Don't wrap this attribute?
+ * @param isNowrap boolean <code>true</code>= don't wrap
+ */
+ public void setNowrap(boolean isNowrap)
+ {
+ this.nowrap = isNowrap;
+ }
+
+ /**
+ * Returns the checker for this attribute.
+ * @return instance of AttrCheck.
+ */
+ public AttrCheck getAttrchk()
+ {
+ return this.attrchk;
+ }
+
+ /**
+ * Is this a literal (unmodifiable) attribute?
+ * @return <code>true</code> for a literal attribute
+ */
+ public boolean isLiteral()
+ {
+ return this.literal;
+ }
+
+ /**
+ * Returns the attribute name.
+ * @return attribute name.
+ */
+ public String getName()
+ {
+ return this.name;
+ }
+
+ /**
+ * Don't wrap this attribute?
+ * @return <code>true</code>= don't wrap
+ */
+ public boolean isNowrap()
+ {
+ return this.nowrap;
+ }
+
+ /**
+ * Returns the html versions in which this attribute is supported.
+ * @return html versions for this attribute.
+ * @see Dict
+ */
+ public short getVersions()
+ {
+ return this.versions;
+ }
+
+}
\ No newline at end of file
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/AttributeTable.java
===================================================================
---
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/AttributeTable.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/AttributeTable.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,464 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+import java.util.Hashtable;
+import java.util.Map;
+
+
+/**
+ * HTML attribute hash table.
+ * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org
</a>
+ * @author Andy Quick <a
href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a>
(translation to Java)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public class AttributeTable
+{
+
+ /**
+ * href attribute.
+ */
+ protected static Attribute attrHref;
+
+ /**
+ * src attribute.
+ */
+ protected static Attribute attrSrc;
+
+ /**
+ * id attribute.
+ */
+ protected static Attribute attrId;
+
+ /**
+ * name attribute.
+ */
+ protected static Attribute attrName;
+
+ /**
+ * summary attribute.
+ */
+ protected static Attribute attrSummary;
+
+ /**
+ * alt attribute.
+ */
+ protected static Attribute attrAlt;
+
+ /**
+ * logdesc attribute.
+ */
+ protected static Attribute attrLongdesc;
+
+ /**
+ * usemap attribute.
+ */
+ protected static Attribute attrUsemap;
+
+ /**
+ * ismap attribute.
+ */
+ protected static Attribute attrIsmap;
+
+ /**
+ * language attribute.
+ */
+ protected static Attribute attrLanguage;
+
+ /**
+ * type attribute.
+ */
+ protected static Attribute attrType;
+
+ /**
+ * title attribute.
+ */
+ protected static Attribute attrTitle;
+
+ /**
+ * xmlns attribute.
+ */
+ protected static Attribute attrXmlns;
+
+ /**
+ * value attribute.
+ */
+ protected static Attribute attrValue;
+
+ /**
+ * content attribute.
+ */
+ protected static Attribute attrContent;
+
+ /**
+ * datafld attribute.
+ */
+ protected static Attribute attrDatafld;
+
+ /**
+ * width attribute.
+ */
+ protected static Attribute attrWidth;
+
+ /**
+ * height attribute.
+ */
+ protected static Attribute attrHeight;
+
+ /**
+ * attribute table instance.
+ */
+ private static AttributeTable defaultAttributeTable;
+
+ /**
+ * all the known attributes.
+ */
+ private static final Attribute[] ATTRS = {
+ new Attribute("abbr", Dict.VERS_HTML40, AttrCheckImpl.TEXT),
+ new Attribute("accept-charset", Dict.VERS_HTML40,
AttrCheckImpl.CHARSET),
+ new Attribute("accept", Dict.VERS_ALL, AttrCheckImpl.TYPE),
+ new Attribute("accesskey", Dict.VERS_HTML40, AttrCheckImpl.CHARACTER),
+ new Attribute("action", Dict.VERS_ALL, AttrCheckImpl.URL),
+ new Attribute("add_date", Dict.VERS_NETSCAPE, AttrCheckImpl.TEXT), //
A
+ new Attribute("align", Dict.VERS_ALL, AttrCheckImpl.ALIGN), // set
varies with element
+ new Attribute("alink", Dict.VERS_LOOSE, AttrCheckImpl.COLOR),
+ new Attribute("alt", Dict.VERS_ALL, AttrCheckImpl.TEXT),
+ new Attribute("archive", Dict.VERS_HTML40, AttrCheckImpl.URLS), //
space or comma separated list
+ new Attribute("axis", Dict.VERS_HTML40, AttrCheckImpl.TEXT),
+ new Attribute("background", Dict.VERS_LOOSE, AttrCheckImpl.URL),
+ new Attribute("bgcolor", Dict.VERS_LOOSE, AttrCheckImpl.COLOR),
+ new Attribute("bgproperties", Dict.VERS_PROPRIETARY,
AttrCheckImpl.TEXT), // BODY "fixed" fixes background
+ new Attribute("border", Dict.VERS_ALL, AttrCheckImpl.BOOL), // like
LENGTH + "border"
+ new Attribute("bordercolor", Dict.VERS_MICROSOFT, AttrCheckImpl.COLOR),
// used on TABLE
+ new Attribute("bottommargin", Dict.VERS_MICROSOFT,
AttrCheckImpl.NUMBER), // used on BODY
+ new Attribute("cellpadding", Dict.VERS_FROM32, AttrCheckImpl.LENGTH),
// % or pixel values
+ new Attribute("cellspacing", Dict.VERS_FROM32, AttrCheckImpl.LENGTH),
+ new Attribute("char", Dict.VERS_HTML40, AttrCheckImpl.CHARACTER),
+ new Attribute("charoff", Dict.VERS_HTML40, AttrCheckImpl.LENGTH),
+ new Attribute("charset", Dict.VERS_HTML40, AttrCheckImpl.CHARSET),
+ new Attribute("checked", Dict.VERS_ALL, AttrCheckImpl.BOOL), // i.e.
"checked" or absent
+ new Attribute("cite", Dict.VERS_HTML40, AttrCheckImpl.URL),
+ new Attribute("class", Dict.VERS_HTML40, AttrCheckImpl.TEXT),
+ new Attribute("classid", Dict.VERS_HTML40, AttrCheckImpl.URL),
+ new Attribute("clear", Dict.VERS_LOOSE, AttrCheckImpl.CLEAR), // BR:
left, right, all
+ new Attribute("code", Dict.VERS_LOOSE, AttrCheckImpl.TEXT), // APPLET
+ new Attribute("codebase", Dict.VERS_HTML40, AttrCheckImpl.URL), //
OBJECT
+ new Attribute("codetype", Dict.VERS_HTML40, AttrCheckImpl.TYPE), //
OBJECT
+ new Attribute("color", Dict.VERS_LOOSE, AttrCheckImpl.COLOR), //
BASEFONT, FONT
+ new Attribute("cols", Dict.VERS_IFRAME, AttrCheckImpl.COLS), // TABLE
& FRAMESET
+ new Attribute("colspan", Dict.VERS_FROM32, AttrCheckImpl.NUMBER),
+ new Attribute("compact", Dict.VERS_ALL, AttrCheckImpl.BOOL), // lists
+ new Attribute("content", Dict.VERS_ALL, AttrCheckImpl.TEXT), // META
+ new Attribute("coords", Dict.VERS_FROM32, AttrCheckImpl.COORDS), //
AREA, A
+ new Attribute("data", Dict.VERS_HTML40, AttrCheckImpl.URL), // OBJECT
+ new Attribute("datafld", Dict.VERS_MICROSOFT, AttrCheckImpl.TEXT), //
used on DIV, IMG
+ new Attribute("dataformatas", Dict.VERS_MICROSOFT, AttrCheckImpl.TEXT),
// used on DIV, IMG
+ new Attribute("datapagesize", Dict.VERS_MICROSOFT,
AttrCheckImpl.NUMBER), // used on DIV, IMG
+ new Attribute("datasrc", Dict.VERS_MICROSOFT, AttrCheckImpl.URL), //
used on TABLE
+ new Attribute("datetime", Dict.VERS_HTML40, AttrCheckImpl.DATE), //
INS, DEL
+ new Attribute("declare", Dict.VERS_HTML40, AttrCheckImpl.BOOL), //
OBJECT
+ new Attribute("defer", Dict.VERS_HTML40, AttrCheckImpl.BOOL), //
SCRIPT
+ new Attribute("dir", Dict.VERS_HTML40, AttrCheckImpl.TEXTDIR), // ltr
or rtl
+ new Attribute("disabled", Dict.VERS_HTML40, AttrCheckImpl.BOOL), //
form fields
+ new Attribute("enctype", Dict.VERS_ALL, AttrCheckImpl.TYPE), // FORM
+ new Attribute("face", Dict.VERS_LOOSE, AttrCheckImpl.TEXT), //
BASEFONT, FONT
+ new Attribute("for", Dict.VERS_HTML40, AttrCheckImpl.IDREF), // LABEL
+ new Attribute("frame", Dict.VERS_HTML40, AttrCheckImpl.TFRAME), //
TABLE
+ new Attribute("frameborder", (short) (Dict.VERS_FRAMESET |
Dict.VERS_IFRAME), AttrCheckImpl.FBORDER), // 0 or 1
+ new Attribute("framespacing", Dict.VERS_PROPRIETARY,
AttrCheckImpl.NUMBER), // pixel value
+ new Attribute("gridx", Dict.VERS_PROPRIETARY, AttrCheckImpl.NUMBER), //
TABLE Adobe golive
+ new Attribute("gridy", Dict.VERS_PROPRIETARY, AttrCheckImpl.NUMBER), //
TABLE Adobe golive
+ new Attribute("headers", Dict.VERS_HTML40, AttrCheckImpl.IDREF), //
table cells
+ new Attribute("height", Dict.VERS_ALL, AttrCheckImpl.LENGTH), // pixels
only for TH/TD
+ new Attribute("href", Dict.VERS_ALL, AttrCheckImpl.URL), // A, AREA,
LINK and BASE
+ new Attribute("hreflang", Dict.VERS_HTML40, AttrCheckImpl.LANG), // A,
LINK
+ new Attribute("hspace", Dict.VERS_ALL, AttrCheckImpl.NUMBER), //
APPLET, IMG, OBJECT
+ new Attribute("http-equiv", Dict.VERS_ALL, AttrCheckImpl.TEXT), //
META
+ new Attribute("id", Dict.VERS_HTML40, AttrCheckImpl.ID),
+ new Attribute("ismap", Dict.VERS_ALL, AttrCheckImpl.BOOL), // IMG
+ new Attribute("label", Dict.VERS_HTML40, AttrCheckImpl.TEXT), // OPT,
OPTGROUP
+ new Attribute("lang", Dict.VERS_HTML40, AttrCheckImpl.LANG),
+ new Attribute("language", Dict.VERS_LOOSE, AttrCheckImpl.TEXT), //
SCRIPT
+ new Attribute("last_modified", Dict.VERS_NETSCAPE, AttrCheckImpl.TEXT),
// A
+ new Attribute("last_visit", Dict.VERS_NETSCAPE, AttrCheckImpl.TEXT), //
A
+ new Attribute("leftmargin", Dict.VERS_MICROSOFT, AttrCheckImpl.NUMBER),
// used on BODY
+ new Attribute("link", Dict.VERS_LOOSE, AttrCheckImpl.COLOR), // BODY
+ new Attribute("longdesc", Dict.VERS_HTML40, AttrCheckImpl.URL), // IMG
+ new Attribute("lowsrc", Dict.VERS_PROPRIETARY, AttrCheckImpl.URL), //
IMG
+ new Attribute("marginheight", Dict.VERS_IFRAME, AttrCheckImpl.NUMBER),
// FRAME, IFRAME, BODY
+ new Attribute("marginwidth", Dict.VERS_IFRAME, AttrCheckImpl.NUMBER),
// ditto
+ new Attribute("maxlength", Dict.VERS_ALL, AttrCheckImpl.NUMBER), //
INPUT
+ new Attribute("media", Dict.VERS_HTML40, AttrCheckImpl.MEDIA), //
STYLE, LINK
+ new Attribute("method", Dict.VERS_ALL, AttrCheckImpl.FSUBMIT), // FORM:
get or post
+ new Attribute("multiple", Dict.VERS_ALL, AttrCheckImpl.BOOL), //
SELECT
+ new Attribute("name", Dict.VERS_ALL, AttrCheckImpl.NAME),
+ new Attribute("nohref", Dict.VERS_FROM32, AttrCheckImpl.BOOL), // AREA
+ new Attribute("noresize", Dict.VERS_FRAMESET, AttrCheckImpl.BOOL), //
FRAME
+ new Attribute("noshade", Dict.VERS_LOOSE, AttrCheckImpl.BOOL), // HR
+ new Attribute("nowrap", Dict.VERS_LOOSE, AttrCheckImpl.BOOL), // table
cells
+ new Attribute("object", Dict.VERS_HTML40_LOOSE, AttrCheckImpl.TEXT), //
APPLET
+ new Attribute("onblur", Dict.VERS_EVENTS, AttrCheckImpl.SCRIPT), //
event
+ new Attribute("onchange", Dict.VERS_EVENTS, AttrCheckImpl.SCRIPT), //
event
+ new Attribute("onclick", Dict.VERS_EVENTS, AttrCheckImpl.SCRIPT), //
event
+ new Attribute("ondblclick", Dict.VERS_EVENTS, AttrCheckImpl.SCRIPT), //
event
+ new Attribute("onkeydown", Dict.VERS_EVENTS, AttrCheckImpl.SCRIPT), //
event
+ new Attribute("onkeypress", Dict.VERS_EVENTS, AttrCheckImpl.SCRIPT), //
event
+ new Attribute("onkeyup", Dict.VERS_EVENTS, AttrCheckImpl.SCRIPT), //
event
+ new Attribute("onload", Dict.VERS_EVENTS, AttrCheckImpl.SCRIPT), //
event
+ new Attribute("onmousedown", Dict.VERS_EVENTS, AttrCheckImpl.SCRIPT),
// event
+ new Attribute("onmousemove", Dict.VERS_EVENTS, AttrCheckImpl.SCRIPT),
// event
+ new Attribute("onmouseout", Dict.VERS_EVENTS, AttrCheckImpl.SCRIPT), //
event
+ new Attribute("onmouseover", Dict.VERS_EVENTS, AttrCheckImpl.SCRIPT),
// event
+ new Attribute("onmouseup", Dict.VERS_EVENTS, AttrCheckImpl.SCRIPT), //
event
+ new Attribute("onsubmit", Dict.VERS_EVENTS, AttrCheckImpl.SCRIPT), //
event
+ new Attribute("onreset", Dict.VERS_EVENTS, AttrCheckImpl.SCRIPT), //
event
+ new Attribute("onselect", Dict.VERS_EVENTS, AttrCheckImpl.SCRIPT), //
event
+ new Attribute("onunload", Dict.VERS_EVENTS, AttrCheckImpl.SCRIPT), //
event
+ new Attribute("onfocus", Dict.VERS_EVENTS, AttrCheckImpl.SCRIPT), //
event
+ new Attribute("onafterupdate", Dict.VERS_MICROSOFT,
AttrCheckImpl.SCRIPT), // form fields
+ new Attribute("onbeforeupdate", Dict.VERS_MICROSOFT,
AttrCheckImpl.SCRIPT), // form fields
+ new Attribute("onerrorupdate", Dict.VERS_MICROSOFT,
AttrCheckImpl.SCRIPT), // form fields
+ new Attribute("onrowenter", Dict.VERS_MICROSOFT, AttrCheckImpl.SCRIPT),
// form fields
+ new Attribute("onrowexit", Dict.VERS_MICROSOFT, AttrCheckImpl.SCRIPT),
// form fields
+ new Attribute("onbeforeunload", Dict.VERS_MICROSOFT,
AttrCheckImpl.SCRIPT), // form fields
+ new Attribute("ondatasetchanged", Dict.VERS_MICROSOFT,
AttrCheckImpl.SCRIPT), // object, applet
+ new Attribute("ondataavailable", Dict.VERS_MICROSOFT,
AttrCheckImpl.SCRIPT), // object, applet
+ new Attribute("ondatasetcomplete", Dict.VERS_MICROSOFT,
AttrCheckImpl.SCRIPT), // object, applet
+ new Attribute("profile", Dict.VERS_HTML40, AttrCheckImpl.URL), // HEAD
+ new Attribute("prompt", Dict.VERS_LOOSE, AttrCheckImpl.TEXT), //
ISINDEX
+ new Attribute("readonly", Dict.VERS_HTML40, AttrCheckImpl.BOOL), //
form fields
+ new Attribute("rel", Dict.VERS_ALL, AttrCheckImpl.LINKTYPES), // A,
LINK
+ new Attribute("rev", Dict.VERS_ALL, AttrCheckImpl.LINKTYPES), // A,
LINK
+ new Attribute("rightmargin", Dict.VERS_MICROSOFT,
AttrCheckImpl.NUMBER), // used on BODY
+ new Attribute("rows", Dict.VERS_ALL, AttrCheckImpl.NUMBER), //
TEXTAREA
+ new Attribute("rowspan", Dict.VERS_ALL, AttrCheckImpl.NUMBER), // table
cells
+ new Attribute("rules", Dict.VERS_HTML40, AttrCheckImpl.TRULES), //
TABLE
+ new Attribute("scheme", Dict.VERS_HTML40, AttrCheckImpl.TEXT), // META
+ new Attribute("scope", Dict.VERS_HTML40, AttrCheckImpl.SCOPE), // table
cells
+ new Attribute("scrolling", Dict.VERS_IFRAME, AttrCheckImpl.SCROLL), //
yes, no or auto
+ new Attribute("selected", Dict.VERS_ALL, AttrCheckImpl.BOOL), //
OPTION
+ new Attribute("shape", Dict.VERS_FROM32, AttrCheckImpl.SHAPE), // AREA,
A
+ new Attribute("showgrid", Dict.VERS_PROPRIETARY, AttrCheckImpl.BOOL),
// TABLE Adobe golive
+ new Attribute("showgridx", Dict.VERS_PROPRIETARY, AttrCheckImpl.BOOL),
// TABLE Adobe golive
+ new Attribute("showgridy", Dict.VERS_PROPRIETARY, AttrCheckImpl.BOOL),
// TABLE Adobe golive
+ new Attribute("size", Dict.VERS_LOOSE, AttrCheckImpl.NUMBER), // HR,
FONT, BASEFONT, SELECT
+ new Attribute("span", Dict.VERS_HTML40, AttrCheckImpl.NUMBER), // COL,
COLGROUP
+ new Attribute("src", Dict.VERS_ALL, AttrCheckImpl.URL), // IMG, FRAME,
IFRAME
+ new Attribute("standby", Dict.VERS_HTML40, AttrCheckImpl.TEXT), //
OBJECT
+ new Attribute("start", Dict.VERS_ALL, AttrCheckImpl.NUMBER), // OL
+ new Attribute("style", Dict.VERS_HTML40, AttrCheckImpl.TEXT),
+ new Attribute("summary", Dict.VERS_HTML40, AttrCheckImpl.TEXT), //
TABLE
+ new Attribute("tabindex", Dict.VERS_HTML40, AttrCheckImpl.NUMBER), //
fields, OBJECT and A
+ new Attribute("target", Dict.VERS_HTML40, AttrCheckImpl.TARGET), //
names a frame/window
+ new Attribute("text", Dict.VERS_LOOSE, AttrCheckImpl.COLOR), // BODY
+ new Attribute("title", Dict.VERS_HTML40, AttrCheckImpl.TEXT), // text
tool tip
+ new Attribute("topmargin", Dict.VERS_MICROSOFT, AttrCheckImpl.NUMBER),
// used on BODY
+ new Attribute("type", Dict.VERS_FROM32, AttrCheckImpl.TYPE), // also
used by SPACER
+ new Attribute("usemap", Dict.VERS_ALL, AttrCheckImpl.BOOL), // things
with images
+ new Attribute("valign", Dict.VERS_FROM32, AttrCheckImpl.VALIGN),
+ new Attribute("value", Dict.VERS_ALL, AttrCheckImpl.TEXT), // OPTION,
PARAM
+ new Attribute("valuetype", Dict.VERS_HTML40, AttrCheckImpl.VTYPE), //
PARAM: data, ref, object
+ new Attribute("version", Dict.VERS_ALL, AttrCheckImpl.TEXT), // HTML
+ new Attribute("vlink", Dict.VERS_LOOSE, AttrCheckImpl.COLOR), // BODY
+ new Attribute("vspace", Dict.VERS_LOOSE, AttrCheckImpl.NUMBER), // IMG,
OBJECT, APPLET
+ new Attribute("width", Dict.VERS_ALL, AttrCheckImpl.LENGTH), // pixels
only for TD/TH
+ new Attribute("wrap", Dict.VERS_NETSCAPE, AttrCheckImpl.TEXT), //
textarea
+ new Attribute("xml:lang", Dict.VERS_XML, AttrCheckImpl.TEXT), // XML
language
+ new Attribute("xml:space", Dict.VERS_XML, AttrCheckImpl.TEXT), // XML
language
+ new Attribute("xmlns", Dict.VERS_ALL, AttrCheckImpl.TEXT), // name
space
+ new Attribute("rbspan", Dict.VERS_XHTML11, AttrCheckImpl.NUMBER), //
ruby markup
+ };
+
+ /**
+ * Map containing all the installed attributes.
+ */
+ private Map<String, Attribute> attributeHashtable = new Hashtable<String,
Attribute>();
+
+ /**
+ * lookup an installed Attribute.
+ * @param name attribute name
+ * @return Attribute or null if the attribute is not found
+ */
+ public Attribute lookup(String name)
+ {
+ return (Attribute) this.attributeHashtable.get(name);
+ }
+
+ /**
+ * installs a new Attribute.
+ * @param attr Atribute
+ * @return installed Attribute
+ */
+ public Attribute install(Attribute attr)
+ {
+ return (Attribute) this.attributeHashtable.put(attr.getName(), attr);
+ }
+
+ /**
+ * public method for finding attribute definition by name.
+ * @param attval AttVal instance
+ * @return Attribute with name = attval.name
+ */
+ public Attribute findAttribute(AttVal attval)
+ {
+ Attribute np;
+
+ if (attval.attribute != null)
+ {
+ np = lookup(attval.attribute);
+ return np;
+ }
+
+ return null;
+ }
+
+ /**
+ * Does the given attibute contains an url?
+ * @param attrname attribute name
+ * @return <code>true</code> if the given attribute is expected to
contain an URL
+ */
+ public boolean isUrl(String attrname)
+ {
+ Attribute np;
+
+ np = lookup(attrname);
+ return (np != null && np.getAttrchk() == AttrCheckImpl.URL);
+ }
+
+ /**
+ * Does the given attibute contains a script?
+ * @param attrname attribute name
+ * @return <code>true</code> if the given attribute is expected to
contain a script
+ */
+ public boolean isScript(String attrname)
+ {
+ Attribute np;
+
+ np = lookup(attrname);
+ return (np != null && np.getAttrchk() == AttrCheckImpl.SCRIPT);
+ }
+
+ /**
+ * Does the given attibute contains a literal attribute?
+ * @param attrname attribute name
+ * @return <code>true</code> if the given attribute is expected to
contain a literal attribute
+ */
+ public boolean isLiteralAttribute(String attrname)
+ {
+ Attribute np;
+
+ np = lookup(attrname);
+ return (np != null && np.isLiteral());
+ }
+
+ /**
+ * Declare a new literal attribute.
+ * @param name atribute name
+ */
+ public void declareLiteralAttrib(String name)
+ {
+ // Henry Zrepa reports that some folk are using embed with script attributes
where newlines are signficant.
+ // These
+ // need to be declared and handled specially!
+ Attribute attrib = lookup(name);
+
+ if (attrib == null)
+ {
+ attrib = install(new Attribute(name, Dict.VERS_PROPRIETARY, null));
+ }
+
+ attrib.setLiteral(true);
+ }
+
+ /**
+ * Returns the default attribute table instance.
+ * @return AttributeTable instance
+ */
+ public static AttributeTable getDefaultAttributeTable()
+ {
+ if (defaultAttributeTable == null)
+ {
+ defaultAttributeTable = new AttributeTable();
+ for (int i = 0; i < ATTRS.length; i++)
+ {
+ defaultAttributeTable.install(ATTRS[i]);
+ }
+ attrHref = defaultAttributeTable.lookup("href");
+ attrSrc = defaultAttributeTable.lookup("src");
+ attrId = defaultAttributeTable.lookup("id");
+ attrName = defaultAttributeTable.lookup("name");
+ attrSummary = defaultAttributeTable.lookup("summary");
+ attrAlt = defaultAttributeTable.lookup("alt");
+ attrLongdesc = defaultAttributeTable.lookup("longdesc");
+ attrUsemap = defaultAttributeTable.lookup("usemap");
+ attrIsmap = defaultAttributeTable.lookup("ismap");
+ attrLanguage = defaultAttributeTable.lookup("language");
+ attrType = defaultAttributeTable.lookup("type");
+ attrTitle = defaultAttributeTable.lookup("title");
+ attrXmlns = defaultAttributeTable.lookup("xmlns");
+ attrValue = defaultAttributeTable.lookup("value");
+ attrContent = defaultAttributeTable.lookup("content");
+ attrDatafld = defaultAttributeTable.lookup("datafld");
+ attrWidth = defaultAttributeTable.lookup("width");
+ attrHeight = defaultAttributeTable.lookup("height");
+
+ attrAlt.setNowrap(true);
+ attrValue.setNowrap(true);
+ attrContent.setNowrap(true);
+ }
+ return defaultAttributeTable;
+ }
+
+}
\ No newline at end of file
Added: branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Clean.java
===================================================================
--- branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Clean.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Clean.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,2401 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+/**
+ * Clean up misuse of presentation markup. Filters from other formats such as Microsoft
Word often make excessive use of
+ * presentation markup such as font tags, B, I, and the align attribute. By applying a
set of production rules, it is
+ * straight forward to transform this to use CSS. Some rules replace some of the children
of an element by style
+ * properties on the element, e.g.
+ * <p>
+ * <b>... </b>
+ * </p>.
+ * <p style="font-weight: bold">
+ * ...
+ * </p>
+ * Such rules are applied to the element's content and then to the element itself
until none of the rules more apply.
+ * Having applied all the rules to an element, it will have a style attribute with one or
more properties. Other rules
+ * strip the element they apply to, replacing it by style properties on the contents,
e.g. <dir>
+ * <li>
+ * <p>
+ * ...</li>
+ * </dir>.
+ * <p style="margin-left 1em">
+ * ... These rules are applied to an element before processing its content and replace
the current element by the first
+ * element in the exposed content. After applying both sets of rules, you can replace the
style attribute by a class
+ * value and style rule in the document head. To support this, an association of styles
and class names is built. A
+ * naive approach is to rely on string matching to test when two property lists are the
same. A better approach would be
+ * to first sort the properties before matching.
+ * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org
</a>
+ * @author Andy Quick <a
href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a>
(translation to Java)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public class Clean
+{
+
+ /**
+ * sequential number for generated css classes.
+ */
+ private int classNum = 1;
+
+ /**
+ * Tag table.
+ */
+ private TagTable tt;
+
+ /**
+ * Instantiates a new Clean.
+ * @param tagTable tag table instance
+ */
+ public Clean(TagTable tagTable)
+ {
+ this.tt = tagTable;
+ }
+
+ /**
+ * Insert a css style property.
+ * @param props StyleProp instance
+ * @param name property name
+ * @param value property value
+ * @return StyleProp containin the given property
+ */
+ private StyleProp insertProperty(StyleProp props, String name, String value)
+ {
+ StyleProp first, prev, prop;
+ int cmp;
+
+ prev = null;
+ first = props;
+
+ while (props != null)
+ {
+ cmp = props.name.compareTo(name);
+
+ if (cmp == 0)
+ {
+ // this property is already defined, ignore new value
+ return first;
+ }
+
+ if (cmp > 0) // props.name > name
+ {
+ // insert before this
+
+ prop = new StyleProp(name, value, props);
+
+ if (prev != null)
+ {
+ prev.next = prop;
+ }
+ else
+ {
+ first = prop;
+ }
+
+ return first;
+ }
+
+ prev = props;
+ props = props.next;
+ }
+
+ prop = new StyleProp(name, value, null);
+
+ if (prev != null)
+ {
+ prev.next = prop;
+ }
+ else
+ {
+ first = prop;
+ }
+
+ return first;
+ }
+
+ /**
+ * Create sorted linked list of properties from style string.
+ * @param prop StyleProp
+ * @param style style string
+ * @return StyleProp with given style
+ */
+ private StyleProp createProps(StyleProp prop, String style)
+ {
+ int nameEnd;
+ int valueEnd;
+ int valueStart = 0;
+ int nameStart = 0;
+ boolean more;
+
+ nameStart = 0;
+ while (nameStart < style.length())
+ {
+ while (nameStart < style.length() && style.charAt(nameStart) ==
' ')
+ {
+ ++nameStart;
+ }
+
+ nameEnd = nameStart;
+
+ while (nameEnd < style.length())
+ {
+ if (style.charAt(nameEnd) == ':')
+ {
+ valueStart = nameEnd + 1;
+ break;
+ }
+
+ ++nameEnd;
+ }
+
+ if (nameEnd >= style.length() || style.charAt(nameEnd) != ':')
+ {
+ break;
+ }
+
+ while (valueStart < style.length() && style.charAt(valueStart) ==
' ')
+ {
+ ++valueStart;
+ }
+
+ valueEnd = valueStart;
+ more = false;
+
+ while (valueEnd < style.length())
+ {
+ if (style.charAt(valueEnd) == ';')
+ {
+ more = true;
+ break;
+ }
+
+ ++valueEnd;
+ }
+
+ prop = insertProperty(prop, style.substring(nameStart, nameEnd),
style.substring(valueStart, valueEnd));
+
+ if (more)
+ {
+ nameStart = valueEnd + 1;
+ continue;
+ }
+
+ break;
+ }
+
+ return prop;
+ }
+
+ /**
+ * Create a css property.
+ * @param props StyleProp
+ * @return css property as String
+ */
+ private String createPropString(StyleProp props)
+ {
+ String style = "";
+ int len;
+ StyleProp prop;
+
+ // compute length
+ for (len = 0, prop = props; prop != null; prop = prop.next)
+ {
+ len += prop.name.length() + 2;
+ len += prop.value.length() + 2;
+ }
+
+ for (prop = props; prop != null; prop = prop.next)
+ {
+ style = style.concat(prop.name);
+ style = style.concat(": ");
+
+ style = style.concat(prop.value);
+
+ if (prop.next == null)
+ {
+ break;
+ }
+
+ style = style.concat("; ");
+ }
+
+ return style;
+ }
+
+ /**
+ * Creates a string with merged properties.
+ * @param style css style
+ * @param property css properties
+ * @return merged string
+ */
+ private String addProperty(String style, String property)
+ {
+ StyleProp prop;
+
+ prop = createProps(null, style);
+ prop = createProps(prop, property);
+ style = createPropString(prop);
+ return style;
+ }
+
+ /**
+ * Generates a new css class name.
+ * @param lexer Lexer
+ * @param tag Tag
+ * @return generated css class
+ */
+ private String gensymClass(Lexer lexer, String tag)
+ {
+ String str;
+
+ str = lexer.configuration.cssPrefix == null ? lexer.configuration.cssPrefix +
this.classNum : "c"
+ + this.classNum;
+ this.classNum++;
+ return str;
+ }
+
+ /**
+ * Finds a css style.
+ * @param lexer Lexer
+ * @param tag tag name
+ * @param properties css properties
+ * @return style string
+ */
+ private String findStyle(Lexer lexer, String tag, String properties)
+ {
+ Style style;
+
+ for (style = lexer.styles; style != null; style = style.next)
+ {
+ if (style.tag.equals(tag) && style.properties.equals(properties))
+ {
+ return style.tagClass;
+ }
+ }
+
+ style = new Style(tag, gensymClass(lexer, tag), properties, lexer.styles);
+ lexer.styles = style;
+ return style.tagClass;
+ }
+
+ /**
+ * Find style attribute in node, and replace it by corresponding class attribute.
Search for class in style
+ * dictionary otherwise gensym new class and add to dictionary. Assumes that node
doesn't have a class attribute.
+ * @param lexer Lexer
+ * @param node node with a style attribute
+ */
+ private void style2Rule(Lexer lexer, Node node)
+ {
+ AttVal styleattr, classattr;
+ String classname;
+
+ styleattr = node.getAttrByName("style");
+
+ if (styleattr != null)
+ {
+ classname = findStyle(lexer, node.element, styleattr.value);
+ classattr = node.getAttrByName("class");
+
+ // if there already is a class attribute then append class name after a
space
+
+ if (classattr != null)
+ {
+ classattr.value = classattr.value + " " + classname;
+ node.removeAttribute(styleattr);
+ }
+ else
+ {
+ // reuse style attribute for class attribute
+ styleattr.attribute = "class";
+ styleattr.value = classname;
+ }
+ }
+ }
+
+ /**
+ * Adds a css rule for color.
+ * @param lexer Lexer
+ * @param selector css selector
+ * @param color color value
+ */
+ private void addColorRule(Lexer lexer, String selector, String color)
+ {
+ if (color != null)
+ {
+ lexer.addStringLiteral(selector);
+ lexer.addStringLiteral(" { color: ");
+ lexer.addStringLiteral(color);
+ lexer.addStringLiteral(" }\n");
+ }
+ }
+
+ /**
+ * Move presentation attribs from body to style element.
+ *
+ * <pre>
+ * background="foo" . body { background-image: url(foo) }
+ * bgcolor="foo" . body { background-color: foo }
+ * text="foo" . body { color: foo }
+ * link="foo" . :link { color: foo }
+ * vlink="foo" . :visited { color: foo }
+ * alink="foo" . :active { color: foo }
+ * </pre>
+ *
+ * @param lexer Lexer
+ * @param body body node
+ */
+ private void cleanBodyAttrs(Lexer lexer, Node body)
+ {
+ AttVal attr;
+ String bgurl = null;
+ String bgcolor = null;
+ String color = null;
+
+ attr = body.getAttrByName("background");
+
+ if (attr != null)
+ {
+ bgurl = attr.value;
+ attr.value = null;
+ body.removeAttribute(attr);
+ }
+
+ attr = body.getAttrByName("bgcolor");
+
+ if (attr != null)
+ {
+ bgcolor = attr.value;
+ attr.value = null;
+ body.removeAttribute(attr);
+ }
+
+ attr = body.getAttrByName("text");
+
+ if (attr != null)
+ {
+ color = attr.value;
+ attr.value = null;
+ body.removeAttribute(attr);
+ }
+
+ if (bgurl != null || bgcolor != null || color != null)
+ {
+ lexer.addStringLiteral(" body {\n");
+
+ if (bgurl != null)
+ {
+ lexer.addStringLiteral(" background-image: url(");
+ lexer.addStringLiteral(bgurl);
+ lexer.addStringLiteral(");\n");
+ }
+
+ if (bgcolor != null)
+ {
+ lexer.addStringLiteral(" background-color: ");
+ lexer.addStringLiteral(bgcolor);
+ lexer.addStringLiteral(";\n");
+ }
+
+ if (color != null)
+ {
+ lexer.addStringLiteral(" color: ");
+ lexer.addStringLiteral(color);
+ lexer.addStringLiteral(";\n");
+ }
+
+ lexer.addStringLiteral(" }\n");
+ }
+
+ attr = body.getAttrByName("link");
+
+ if (attr != null)
+ {
+ addColorRule(lexer, " :link", attr.value);
+ body.removeAttribute(attr);
+ }
+
+ attr = body.getAttrByName("vlink");
+
+ if (attr != null)
+ {
+ addColorRule(lexer, " :visited", attr.value);
+ body.removeAttribute(attr);
+ }
+
+ attr = body.getAttrByName("alink");
+
+ if (attr != null)
+ {
+ addColorRule(lexer, " :active", attr.value);
+ body.removeAttribute(attr);
+ }
+ }
+
+ /**
+ * Check deprecated attributes in body tag.
+ * @param lexer Lexer
+ * @param doc document root node
+ * @return <code>true</code> is the body doesn't contain deprecated
attributes, false otherwise.
+ */
+ private boolean niceBody(Lexer lexer, Node doc)
+ {
+ Node body = doc.findBody(lexer.configuration.tt);
+
+ if (body != null)
+ {
+ if (body.getAttrByName("background") != null
+ || body.getAttrByName("bgcolor") != null
+ || body.getAttrByName("text") != null
+ || body.getAttrByName("link") != null
+ || body.getAttrByName("vlink") != null
+ || body.getAttrByName("alink") != null)
+ {
+ lexer.badLayout |= Report.USING_BODY;
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ /**
+ * Create style element using rules from dictionary.
+ * @param lexer Lexer
+ * @param doc root node
+ */
+ private void createStyleElement(Lexer lexer, Node doc)
+ {
+ Node node, head, body;
+ Style style;
+ AttVal av;
+
+ if (lexer.styles == null && niceBody(lexer, doc))
+ {
+ return;
+ }
+
+ node = lexer.newNode(Node.START_TAG, null, 0, 0, "style");
+ node.implicit = true;
+
+ // insert type attribute
+ av = new AttVal(null, null, '"', "type",
"text/css");
+ av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av);
+ node.attributes = av;
+
+ body = doc.findBody(lexer.configuration.tt);
+
+ lexer.txtstart = lexer.lexsize;
+
+ if (body != null)
+ {
+ cleanBodyAttrs(lexer, body);
+ }
+
+ for (style = lexer.styles; style != null; style = style.next)
+ {
+ lexer.addCharToLexer(' ');
+ lexer.addStringLiteral(style.tag);
+ lexer.addCharToLexer('.');
+ lexer.addStringLiteral(style.tagClass);
+ lexer.addCharToLexer(' ');
+ lexer.addCharToLexer('{');
+ lexer.addStringLiteral(style.properties);
+ lexer.addCharToLexer('}');
+ lexer.addCharToLexer('\n');
+ }
+
+ lexer.txtend = lexer.lexsize;
+
+ node.insertNodeAtEnd(lexer.newNode(Node.TEXT_NODE, lexer.lexbuf, lexer.txtstart,
lexer.txtend));
+
+ // now insert style element into document head doc is root node. search its
children for html node the head
+ // node should be first child of html node
+
+ head = doc.findHEAD(lexer.configuration.tt);
+
+ if (head != null)
+ {
+ head.insertNodeAtEnd(node);
+ }
+ }
+
+ /**
+ * Ensure bidirectional links are consistent.
+ * @param node root node
+ */
+ private void fixNodeLinks(Node node)
+ {
+ Node child;
+
+ if (node.prev != null)
+ {
+ node.prev.next = node;
+ }
+ else
+ {
+ node.parent.content = node;
+ }
+
+ if (node.next != null)
+ {
+ node.next.prev = node;
+ }
+ else
+ {
+ node.parent.last = node;
+ }
+
+ for (child = node.content; child != null; child = child.next)
+ {
+ child.parent = node;
+ }
+ }
+
+ /**
+ * Used to strip child of node when the node has one and only one child.
+ * @param node parent node
+ */
+ private void stripOnlyChild(Node node)
+ {
+ Node child;
+
+ child = node.content;
+ node.content = child.content;
+ node.last = child.last;
+ child.content = null;
+
+ for (child = node.content; child != null; child = child.next)
+ {
+ child.parent = node;
+ }
+ }
+
+ /**
+ * Used to strip font start and end tags.
+ * @param element original node
+ * @param pnode passed in as array to allow modification. pnode[0] will contain the
final node
+ * @todo remove the pnode parameter and make it a return value
+ */
+ private void discardContainer(Node element, Node[] pnode)
+ {
+ Node node;
+ Node parent = element.parent;
+
+ if (element.content != null)
+ {
+ element.last.next = element.next;
+
+ if (element.next != null)
+ {
+ element.next.prev = element.last;
+ element.last.next = element.next;
+ }
+ else
+ {
+ parent.last = element.last;
+ }
+
+ if (element.prev != null)
+ {
+ element.content.prev = element.prev;
+ element.prev.next = element.content;
+ }
+ else
+ {
+ parent.content = element.content;
+ }
+
+ for (node = element.content; node != null; node = node.next)
+ {
+ node.parent = parent;
+ }
+
+ pnode[0] = element.content;
+ }
+ else
+ {
+ if (element.next != null)
+ {
+ element.next.prev = element.prev;
+ }
+ else
+ {
+ parent.last = element.prev;
+ }
+
+ if (element.prev != null)
+ {
+ element.prev.next = element.next;
+ }
+ else
+ {
+ parent.content = element.next;
+ }
+
+ pnode[0] = element.next;
+ }
+
+ element.next = null;
+ element.content = null;
+ }
+
+ /**
+ * Add style property to element, creating style attribute as needed and adding ;
delimiter.
+ * @param node node
+ * @param property property added to node
+ */
+ private void addStyleProperty(Node node, String property)
+ {
+ AttVal av;
+
+ for (av = node.attributes; av != null; av = av.next)
+ {
+ if (av.attribute.equals("style"))
+ {
+ break;
+ }
+ }
+
+ // if style attribute already exists then insert property
+
+ if (av != null)
+ {
+ String s;
+
+ s = addProperty(av.value, property);
+ av.value = s;
+ }
+ else
+ {
+ // else create new style attribute
+ av = new AttVal(node.attributes, null, '"', "style",
property);
+ av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av);
+ node.attributes = av;
+ }
+ }
+
+ /**
+ * Create new string that consists of the combined style properties in s1 and s2. To
merge property lists, we build
+ * a linked list of property/values and insert properties into the list in order,
merging values for the same
+ * property name.
+ * @param s1 first property
+ * @param s2 second property
+ * @return merged properties
+ */
+ private String mergeProperties(String s1, String s2)
+ {
+ String s;
+ StyleProp prop;
+
+ prop = createProps(null, s1);
+ prop = createProps(prop, s2);
+ s = createPropString(prop);
+ return s;
+ }
+
+ /**
+ * Merge class attributes from 2 nodes.
+ * @param node Node
+ * @param child Child node
+ */
+ private void mergeClasses(Node node, Node child)
+ {
+ AttVal av;
+ String s1, s2, names;
+
+ for (s2 = null, av = child.attributes; av != null; av = av.next)
+ {
+ if ("class".equals(av.attribute))
+ {
+ s2 = av.value;
+ break;
+ }
+ }
+
+ for (s1 = null, av = node.attributes; av != null; av = av.next)
+ {
+ if ("class".equals(av.attribute))
+ {
+ s1 = av.value;
+ break;
+ }
+ }
+
+ if (s1 != null)
+ {
+ if (s2 != null) // merge class names from both
+ {
+ names = s1 + ' ' + s2;
+ av.value = names;
+ }
+ }
+ else if (s2 != null) // copy class names from child
+ {
+ av = new AttVal(node.attributes, null, '"', "class",
s2);
+ av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av);
+ node.attributes = av;
+ }
+ }
+
+ /**
+ * Merge style from 2 nodes.
+ * @param node Node
+ * @param child Child node
+ */
+ private void mergeStyles(Node node, Node child)
+ {
+ AttVal av;
+ String s1, s2, style;
+
+ // the child may have a class attribute used for attaching styles, if so the
class name needs to be copied to
+ // node's class
+ mergeClasses(node, child);
+
+ for (s2 = null, av = child.attributes; av != null; av = av.next)
+ {
+ if (av.attribute.equals("style"))
+ {
+ s2 = av.value;
+ break;
+ }
+ }
+
+ for (s1 = null, av = node.attributes; av != null; av = av.next)
+ {
+ if (av.attribute.equals("style"))
+ {
+ s1 = av.value;
+ break;
+ }
+ }
+
+ if (s1 != null)
+ {
+ if (s2 != null) // merge styles from both
+ {
+ style = mergeProperties(s1, s2);
+ av.value = style;
+ }
+ }
+ else if (s2 != null) // copy style of child
+ {
+ av = new AttVal(node.attributes, null, '"', "style",
s2);
+ av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av);
+ node.attributes = av;
+ }
+ }
+
+ /**
+ * Map a % font size to a named font size.
+ * @param size size in %
+ * @return font size name
+ */
+ private String fontSize2Name(String size)
+ {
+ String[] sizes = {"60%", "70%", "80%", null,
"120%", "150%", "200%"};
+ String buf;
+
+ if (size.length() > 0 && '0' <= size.charAt(0) &&
size.charAt(0) <= '6')
+ {
+ int n = size.charAt(0) - '0';
+ return sizes[n];
+ }
+
+ if (size.length() > 0 && size.charAt(0) == '-')
+ {
+ if (size.length() > 1 && '0' <= size.charAt(1)
&& size.charAt(1) <= '6')
+ {
+ int n = size.charAt(1) - '0';
+ double x;
+
+ for (x = 1.0; n > 0; --n)
+ {
+ x *= 0.8;
+ }
+
+ x *= 100.0;
+ buf = "" + (int) x + "%";
+
+ return buf;
+ }
+
+ return "smaller"; /* "70%"; */
+ }
+
+ if (size.length() > 1 && '0' <= size.charAt(1) &&
size.charAt(1) <= '6')
+ {
+ int n = size.charAt(1) - '0';
+ double x;
+
+ for (x = 1.0; n > 0; --n)
+ {
+ x *= 1.2;
+ }
+
+ x *= 100.0;
+ buf = "" + (int) x + "%";
+
+ return buf;
+ }
+
+ return "larger"; /* "140%" */
+ }
+
+ /**
+ * Adds a font-family style.
+ * @param node Node
+ * @param face font face
+ */
+ private void addFontFace(Node node, String face)
+ {
+ addStyleProperty(node, "font-family: " + face);
+ }
+
+ /**
+ * Adds a font size style.
+ * @param node Node
+ * @param size font size
+ */
+ private void addFontSize(Node node, String size)
+ {
+ String value;
+
+ if (size.equals("6") && node.tag == this.tt.tagP)
+ {
+ node.element = "h1";
+ this.tt.findTag(node);
+ return;
+ }
+
+ if (size.equals("5") && node.tag == this.tt.tagP)
+ {
+ node.element = "h2";
+ this.tt.findTag(node);
+ return;
+ }
+
+ if (size.equals("4") && node.tag == this.tt.tagP)
+ {
+ node.element = "h3";
+ this.tt.findTag(node);
+ return;
+ }
+
+ value = fontSize2Name(size);
+
+ if (value != null)
+ {
+ addStyleProperty(node, "font-size: " + value);
+ }
+ }
+
+ /**
+ * Adds a font color style.
+ * @param node Node
+ * @param color color value
+ */
+ private void addFontColor(Node node, String color)
+ {
+ addStyleProperty(node, "color: " + color);
+ }
+
+ /**
+ * Adds an align style.
+ * @param node Node
+ * @param align align value
+ */
+ private void addAlign(Node node, String align)
+ {
+ // force alignment value to lower case
+ addStyleProperty(node, "text-align: " + align.toLowerCase());
+ }
+
+ /**
+ * Add style properties to node corresponding to the font face, size and color
attributes.
+ * @param node font tag
+ * @param av attribute list for node
+ */
+ private void addFontStyles(Node node, AttVal av)
+ {
+ while (av != null)
+ {
+ if (av.attribute.equals("face"))
+ {
+ addFontFace(node, av.value);
+ }
+ else if (av.attribute.equals("size"))
+ {
+ addFontSize(node, av.value);
+ }
+ else if (av.attribute.equals("color"))
+ {
+ addFontColor(node, av.value);
+ }
+
+ av = av.next;
+ }
+ }
+
+ /**
+ * Symptom: <code><p align=center></code>. Action:
<code><p style="text-align: center"></code>.
+ * @param lexer Lexer
+ * @param node node with center attribute. Will be modified to use css style.
+ */
+ private void textAlign(Lexer lexer, Node node)
+ {
+ AttVal av, prev;
+
+ prev = null;
+
+ for (av = node.attributes; av != null; av = av.next)
+ {
+ if (av.attribute.equals("align"))
+ {
+ if (prev != null)
+ {
+ prev.next = av.next;
+ }
+ else
+ {
+ node.attributes = av.next;
+ }
+
+ if (av.value != null)
+ {
+ addAlign(node, av.value);
+ }
+
+ break;
+ }
+
+ prev = av;
+ }
+ }
+
+ /**
+ * Symptom: <code><dir><li></code> where
<code><li></code> is only child. Action: coerce
+ * <code><dir> <li></code> to
<code><div></code> with indent. The clean up rules use the pnode
argument
+ * to return the next node when the original node has been deleted.
+ * @param lexer Lexer
+ * @param node dir tag
+ * @return <code>true</code> if a dir tag has been coerced to a div
+ */
+ private boolean dir2Div(Lexer lexer, Node node)
+ {
+ Node child;
+
+ if (node.tag == this.tt.tagDir || node.tag == this.tt.tagUl || node.tag ==
this.tt.tagOl)
+ {
+ child = node.content;
+
+ if (child == null)
+ {
+ return false;
+ }
+
+ // check child has no peers
+ if (child.next != null)
+ {
+ return false;
+ }
+
+ if (child.tag != this.tt.tagLi)
+ {
+ return false;
+ }
+
+ if (!child.implicit)
+ {
+ return false;
+ }
+
+ // coerce dir to div
+ node.tag = this.tt.tagDiv;
+ node.element = "div";
+ addStyleProperty(node, "margin-left: 2em");
+ stripOnlyChild(node);
+ return true;
+ }
+
+ return false;
+ }
+
+ /**
+ * Symptom:
+ *
+ * <pre>
+ * <center>
+ * </pre>.
+ * <p>
+ * Action: replace <code><center></code> by
<code><div style="text-align: center"></code>
+ * </p>
+ * @param lexer Lexer
+ * @param node center tag
+ * @param pnode pnode[0] is the same as node, passed in as an array to allow
modification
+ * @return <code>true</code> if a center tag has been replaced by a div
+ */
+ private boolean center2Div(Lexer lexer, Node node, Node[] pnode)
+ {
+ if (node.tag == this.tt.tagCenter)
+ {
+ if (lexer.configuration.dropFontTags)
+ {
+ if (node.content != null)
+ {
+ Node last = node.last;
+ Node parent = node.parent;
+
+ discardContainer(node, pnode);
+
+ node = lexer.inferredTag("br");
+
+ if (last.next != null)
+ {
+ last.next.prev = node;
+ }
+
+ node.next = last.next;
+ last.next = node;
+ node.prev = last;
+
+ if (parent.last == last)
+ {
+ parent.last = node;
+ }
+
+ node.parent = parent;
+ }
+ else
+ {
+ Node prev = node.prev;
+ Node next = node.next;
+ Node parent = node.parent;
+ discardContainer(node, pnode);
+
+ node = lexer.inferredTag("br");
+ node.next = next;
+ node.prev = prev;
+ node.parent = parent;
+
+ if (next != null)
+ {
+ next.prev = node;
+ }
+ else
+ {
+ parent.last = node;
+ }
+
+ if (prev != null)
+ {
+ prev.next = node;
+ }
+ else
+ {
+ parent.content = node;
+ }
+ }
+
+ return true;
+ }
+ node.tag = this.tt.tagDiv;
+ node.element = "div";
+ addStyleProperty(node, "text-align: center");
+ return true;
+ }
+
+ return false;
+ }
+
+ /**
+ * Symptom:
<code><div><div>...</div></div></code>
Action: merge the two divs. This is useful after
+ * nested <dir>s used by Word for indenting have been converted to
<div>s.
+ * @param lexer Lexer
+ * @param node first div
+ * @return true if the divs have been merged
+ */
+ private boolean mergeDivs(Lexer lexer, Node node)
+ {
+ Node child;
+
+ if (node.tag != this.tt.tagDiv)
+ {
+ return false;
+ }
+
+ child = node.content;
+
+ if (child == null)
+ {
+ return false;
+ }
+
+ if (child.tag != this.tt.tagDiv)
+ {
+ return false;
+ }
+
+ if (child.next != null)
+ {
+ return false;
+ }
+
+ mergeStyles(node, child);
+ stripOnlyChild(node);
+ return true;
+ }
+
+ /**
+ * Symptom:
+ * <ul>
+ * <li>
+ * <ul>
+ * ...
+ * </ul>
+ * </li>
+ * </ul>
+ * Action: discard outer list.
+ * @param lexer Lexer
+ * @param node Node
+ * @param pnode passed in as array to allow modifications.
+ * @return <code>true</code> if nested lists have been found and
replaced
+ */
+ private boolean nestedList(Lexer lexer, Node node, Node[] pnode)
+ {
+ Node child, list;
+
+ if (node.tag == this.tt.tagUl || node.tag == this.tt.tagOl)
+ {
+ child = node.content;
+
+ if (child == null)
+ {
+ return false;
+ }
+
+ // check child has no peers
+
+ if (child.next != null)
+ {
+ return false;
+ }
+
+ list = child.content;
+
+ if (list == null)
+ {
+ return false;
+ }
+
+ if (list.tag != node.tag)
+ {
+ return false;
+ }
+
+ pnode[0] = list; // Set node to resume iteration
+
+ // move inner list node into position of outer node
+ list.prev = node.prev;
+ list.next = node.next;
+ list.parent = node.parent;
+ fixNodeLinks(list);
+
+ // get rid of outer ul and its li
+ // XXX: Are we leaking the child node? -creitzel 7 Jun, 01
+ child.content = null;
+ node.content = null;
+ node.next = null;
+ node = null;
+
+ // If prev node was a list the chances are this node should be appended to
that list. Word has no way of
+ // recognizing nested lists and just uses indents
+ if (list.prev != null)
+ {
+ if (list.prev.tag == this.tt.tagUl || list.prev.tag == this.tt.tagOl)
+ {
+
+ node = list;
+ list = node.prev;
+
+ list.next = node.next;
+
+ if (list.next != null)
+ {
+ list.next.prev = list;
+ }
+
+ child = list.last; /* <li> */
+
+ node.parent = child;
+ node.next = null;
+ node.prev = child.last;
+ fixNodeLinks(node);
+ cleanNode(lexer, node);
+ }
+ }
+
+ return true;
+ }
+
+ return false;
+ }
+
+ /**
+ * Symptom: the only child of a block-level element is a presentation element such as
B, I or FONT. Action: add
+ * style "font-weight: bold" to the block and strip the
<b>element, leaving its children. example:
+ *
+ * <pre>
+ * <p>
+ * <b><font face="Arial" size="6">Draft
Recommended Practice</font></b>
+ * </p>
+ * </pre>
+ *
+ * becomes:
+ *
+ * <pre>
+ * <p style="font-weight: bold; font-family: Arial; font-size:
6">
+ * Draft Recommended Practice
+ * </p>
+ * </pre>
+ *
+ * <p>
+ * This code also replaces the align attribute by a style attribute. However, to
avoid CSS problems with Navigator
+ * 4, this isn't done for the elements: caption, tr and table
+ * </p>
+ * @param lexer Lexer
+ * @param node parent node
+ * @return <code>true</code> if the child node has been removed
+ */
+ private boolean blockStyle(Lexer lexer, Node node)
+ {
+ Node child;
+
+ if ((node.tag.model & (Dict.CM_BLOCK | Dict.CM_LIST | Dict.CM_DEFLIST |
Dict.CM_TABLE)) != 0)
+ {
+ if (node.tag != this.tt.tagTable && node.tag != this.tt.tagTr
&& node.tag != this.tt.tagLi)
+ {
+ // check for align attribute
+ if (node.tag != this.tt.tagCaption)
+ {
+ textAlign(lexer, node);
+ }
+
+ child = node.content;
+
+ if (child == null)
+ {
+ return false;
+ }
+
+ // check child has no peers
+ if (child.next != null)
+ {
+ return false;
+ }
+
+ if (child.tag == this.tt.tagB)
+ {
+ mergeStyles(node, child);
+ addStyleProperty(node, "font-weight: bold");
+ stripOnlyChild(node);
+ return true;
+ }
+
+ if (child.tag == this.tt.tagI)
+ {
+ mergeStyles(node, child);
+ addStyleProperty(node, "font-style: italic");
+ stripOnlyChild(node);
+ return true;
+ }
+
+ if (child.tag == this.tt.tagFont)
+ {
+ mergeStyles(node, child);
+ addFontStyles(node, child.attributes);
+ stripOnlyChild(node);
+ return true;
+ }
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * If the node has only one b, i, or font child remove the child node and add the
appropriate style attributes to
+ * parent.
+ * @param lexer Lexer
+ * @param node parent node
+ * @param pnode passed as an array to allow modifications
+ * @return <code>true</code> if child node has been stripped, replaced by
style attributes.
+ */
+ private boolean inlineStyle(Lexer lexer, Node node, Node[] pnode)
+ {
+ Node child;
+
+ if (node.tag != this.tt.tagFont && (node.tag.model & (Dict.CM_INLINE
| Dict.CM_ROW)) != 0)
+ {
+ child = node.content;
+
+ if (child == null)
+ {
+ return false;
+ }
+
+ // check child has no peers
+ if (child.next != null)
+ {
+ return false;
+ }
+
+ if (child.tag == this.tt.tagB &&
lexer.configuration.logicalEmphasis)
+ {
+ mergeStyles(node, child);
+ addStyleProperty(node, "font-weight: bold");
+ stripOnlyChild(node);
+ return true;
+ }
+
+ if (child.tag == this.tt.tagI &&
lexer.configuration.logicalEmphasis)
+ {
+ mergeStyles(node, child);
+ addStyleProperty(node, "font-style: italic");
+ stripOnlyChild(node);
+ return true;
+ }
+
+ if (child.tag == this.tt.tagFont)
+ {
+ mergeStyles(node, child);
+ addFontStyles(node, child.attributes);
+ stripOnlyChild(node);
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * Replace font elements by span elements, deleting the font element's attributes
and replacing them by a single
+ * style attribute.
+ * @param lexer Lexer
+ * @param node font tag
+ * @param pnode passed as an array to allow modifications
+ * @return <code>true</code> if a font tag has been dropped and replaced
by style attributes
+ */
+ private boolean font2Span(Lexer lexer, Node node, Node[] pnode)
+ {
+ AttVal av, style, next;
+
+ if (node.tag == this.tt.tagFont)
+ {
+ if (lexer.configuration.dropFontTags)
+ {
+ discardContainer(node, pnode);
+ return false;
+ }
+
+ // if FONT is only child of parent element then leave alone
+ if (node.parent.content == node && node.next == null)
+ {
+ return false;
+ }
+
+ addFontStyles(node, node.attributes);
+
+ // extract style attribute and free the rest
+ av = node.attributes;
+ style = null;
+
+ while (av != null)
+ {
+ next = av.next;
+
+ if (av.attribute.equals("style"))
+ {
+ av.next = null;
+ style = av;
+ }
+
+ av = next;
+ }
+
+ node.attributes = style;
+
+ node.tag = this.tt.tagSpan;
+ node.element = "span";
+
+ return true;
+ }
+
+ return false;
+ }
+
+ /**
+ * Applies all matching rules to a node.
+ * @param lexer Lexer
+ * @param node original node
+ * @return cleaned up node
+ */
+ private Node cleanNode(Lexer lexer, Node node)
+ {
+ Node next = null;
+ Node[] o = new Node[1];
+ boolean b = false;
+
+ for (next = node; node != null && node.isElement(); node = next)
+ {
+ o[0] = next;
+
+ b = dir2Div(lexer, node);
+ next = o[0];
+ if (b)
+ {
+ continue;
+ }
+
+ // Special case: true result means that arg node and its parent no longer
exist.
+ // So we must jump back up the CreateStyleProperties() call stack until we
have a valid node reference.
+ b = nestedList(lexer, node, o);
+ next = o[0];
+ if (b)
+ {
+ return next;
+ }
+
+ b = center2Div(lexer, node, o);
+ next = o[0];
+ if (b)
+ {
+ continue;
+ }
+
+ b = mergeDivs(lexer, node);
+ next = o[0];
+ if (b)
+ {
+ continue;
+ }
+
+ b = blockStyle(lexer, node);
+ next = o[0];
+ if (b)
+ {
+ continue;
+ }
+
+ b = inlineStyle(lexer, node, o);
+ next = o[0];
+ if (b)
+ {
+ continue;
+ }
+
+ b = font2Span(lexer, node, o);
+ next = o[0];
+ if (b)
+ {
+ continue;
+ }
+
+ break;
+ }
+
+ return next;
+ }
+
+ /**
+ * Special case: if the current node is destroyed by CleanNode() lower in the tree,
this node and its parent no
+ * longer exist. So we must jump back up the CreateStyleProperties() call stack until
we have a valid node
+ * reference.
+ * @param lexer Lexer
+ * @param node Node
+ * @param prepl passed in as array to allow modifications
+ * @return cleaned Node
+ */
+ private Node createStyleProperties(Lexer lexer, Node node, Node[] prepl)
+ {
+ Node child;
+
+ if (node.content != null)
+ {
+ Node[] repl = new Node[1];
+ repl[0] = node;
+ for (child = node.content; child != null; child = child.next)
+ {
+ child = createStyleProperties(lexer, child, repl);
+ if (repl[0] != node)
+ {
+ return repl[0];
+ }
+ }
+ }
+
+ return cleanNode(lexer, node);
+ }
+
+ /**
+ * Find style attribute in node content, and replace it by corresponding class
attribute.
+ * @param lexer Lexer
+ * @param node parent node
+ */
+ private void defineStyleRules(Lexer lexer, Node node)
+ {
+ Node child;
+
+ if (node.content != null)
+ {
+ child = node.content;
+ while (child != null)
+ {
+ defineStyleRules(lexer, child);
+ child = child.next;
+ }
+ }
+
+ style2Rule(lexer, node);
+ }
+
+ /**
+ * Clean an html tree.
+ * @param lexer Lexer
+ * @param doc root node
+ */
+ public void cleanTree(Lexer lexer, Node doc)
+ {
+ Node[] repl = new Node[1];
+ repl[0] = doc;
+ doc = createStyleProperties(lexer, doc, repl);
+
+ if (!lexer.configuration.makeClean)
+ {
+ defineStyleRules(lexer, doc);
+ createStyleElement(lexer, doc);
+ }
+ }
+
+ /**
+ * simplifies <b><b>... </b> ... </b> etc.
+ * @param node root Node
+ */
+ public void nestedEmphasis(Node node)
+ {
+ Node[] o = new Node[1];
+ Node next;
+
+ while (node != null)
+ {
+ next = node.next;
+
+ if ((node.tag == this.tt.tagB || node.tag == this.tt.tagI)
+ && node.parent != null
+ && node.parent.tag == node.tag)
+ {
+ // strip redundant inner element
+ o[0] = next;
+ discardContainer(node, o);
+ next = o[0];
+ node = next;
+ continue;
+ }
+
+ if (node.content != null)
+ {
+ nestedEmphasis(node.content);
+ }
+
+ node = next;
+ }
+ }
+
+ /**
+ * Replace i by em and b by strong.
+ * @param node root Node
+ */
+ public void emFromI(Node node)
+ {
+ while (node != null)
+ {
+ if (node.tag == this.tt.tagI)
+ {
+ node.element = this.tt.tagEm.name;
+ node.tag = this.tt.tagEm;
+ }
+ else if (node.tag == this.tt.tagB)
+ {
+ node.element = this.tt.tagStrong.name;
+ node.tag = this.tt.tagStrong;
+ }
+
+ if (node.content != null)
+ {
+ emFromI(node.content);
+ }
+
+ node = node.next;
+ }
+ }
+
+ /**
+ * Some people use dir or ul without an li to indent the content. The pattern to look
for is a list with a single
+ * implicit li. This is recursively replaced by an implicit blockquote.
+ * @param node root Node
+ */
+ public void list2BQ(Node node)
+ {
+ while (node != null)
+ {
+ if (node.content != null)
+ {
+ list2BQ(node.content);
+ }
+
+ if (node.tag != null
+ && node.tag.getParser() == ParserImpl.LIST
+ && node.hasOneChild()
+ && node.content.implicit)
+ {
+ stripOnlyChild(node);
+ node.element = this.tt.tagBlockquote.name;
+ node.tag = this.tt.tagBlockquote;
+ node.implicit = true;
+ }
+
+ node = node.next;
+ }
+ }
+
+ /**
+ * Replace implicit blockquote by div with an indent taking care to reduce nested
blockquotes to a single div with
+ * the indent set to match the nesting depth.
+ * @param node root Node
+ */
+ public void bQ2Div(Node node)
+ {
+ int indent;
+ String indentBuf;
+ AttVal attval;
+
+ while (node != null)
+ {
+ if (node.tag == this.tt.tagBlockquote && node.implicit)
+ {
+ indent = 1;
+
+ while (node.hasOneChild() && node.content.tag ==
this.tt.tagBlockquote && node.implicit)
+ {
+ ++indent;
+ stripOnlyChild(node);
+ }
+
+ if (node.content != null)
+ {
+ bQ2Div(node.content);
+ }
+
+ indentBuf = "margin-left: " + (new Integer(2 *
indent)).toString() + "em";
+
+ node.element = this.tt.tagDiv.name;
+ node.tag = this.tt.tagDiv;
+
+ attval = node.getAttrByName("style");
+
+ if (attval != null && attval.value != null)
+ {
+ attval.value = indentBuf + "; " + attval.value;
+ }
+ else
+ {
+ node.addAttribute("style", indentBuf);
+ }
+ }
+ else if (node.content != null)
+ {
+ bQ2Div(node.content);
+ }
+
+ node = node.next;
+ }
+ }
+
+ /**
+ * Find the enclosing table cell for the given node.
+ * @param node Node
+ * @return enclosing cell node
+ */
+ Node findEnclosingCell(Node node)
+ {
+ Node check;
+
+ for (check = node; check != null; check = check.parent)
+ {
+ if (check.tag == tt.tagTd)
+ {
+ return check;
+ }
+ }
+ return null;
+ }
+
+ /**
+ * node is <code><![if ...]></code> prune up to
<code><![endif]></code>.
+ * @param lexer Lexer
+ * @param node Node
+ * @return cleaned up Node
+ */
+ public Node pruneSection(Lexer lexer, Node node)
+ {
+ for (;;)
+ {
+
+ // FG: commented out - don't add to empty cells
+
+ // if ((Lexer.getString(node.textarray, node.start, 21)).equals("if
!supportEmptyParas"))
+ // {
+ // Node cell = findEnclosingCell(node);
+ // if (cell != null)
+ // {
+ // // Need to put into cell so it doesn't look weird
+ // char onesixty[] = {(char) 160, (char) 0};
+ // Node nbsp = lexer.newLiteralTextNode(lexer, onesixty);
+ // Node.insertNodeBeforeElement(node, nbsp);
+ // }
+ // }
+
+ // discard node and returns next
+ node = Node.discardElement(node);
+
+ if (node == null)
+ {
+ return null;
+ }
+
+ if (node.type == Node.SECTION_TAG)
+ {
+ if ((TidyUtils.getString(node.textarray, node.start,
2)).equals("if"))
+ {
+ node = pruneSection(lexer, node);
+ continue;
+ }
+
+ if ((TidyUtils.getString(node.textarray, node.start,
5)).equals("endif"))
+ {
+ node = Node.discardElement(node);
+ break;
+ }
+ }
+ }
+
+ return node;
+ }
+
+ /**
+ * Drop if/endif sections inserted by word2000.
+ * @param lexer Lexer
+ * @param node Node root node
+ */
+ public void dropSections(Lexer lexer, Node node)
+ {
+ while (node != null)
+ {
+ if (node.type == Node.SECTION_TAG)
+ {
+ // prune up to matching endif
+ if ((TidyUtils.getString(node.textarray, node.start,
2)).equals("if")
+ && (!(TidyUtils.getString(node.textarray, node.start,
7)).equals("if !vml"))) // #444394 - fix 13
+ // Sep 01
+ {
+ node = pruneSection(lexer, node);
+ continue;
+ }
+
+ // discard others as well
+ node = Node.discardElement(node);
+ continue;
+ }
+
+ if (node.content != null)
+ {
+ dropSections(lexer, node.content);
+ }
+
+ node = node.next;
+ }
+ }
+
+ /**
+ * Remove word2000 attributes from node.
+ * @param node node to cleanup
+ */
+ public void purgeWord2000Attributes(Node node)
+ {
+ AttVal attr = null;
+ AttVal next = null;
+ AttVal prev = null;
+
+ for (attr = node.attributes; attr != null; attr = next)
+ {
+ next = attr.next;
+
+ // special check for class="Code" denoting pre text
+ // Pass thru user defined styles as HTML class names
+ if (attr.attribute != null && attr.value != null &&
attr.attribute.equals("class"))
+ {
+ if (attr.value.equals("Code") ||
!attr.value.startsWith("Mso"))
+ {
+ prev = attr;
+ continue;
+ }
+ }
+
+ if (attr.attribute != null
+ && (attr.attribute.equals("class")
+ || attr.attribute.equals("style")
+ || attr.attribute.equals("lang")
+ || attr.attribute.startsWith("x:") ||
((attr.attribute.equals("height") || attr.attribute
+ .equals("width")) && //
+ (node.tag == this.tt.tagTd || node.tag == this.tt.tagTr || node.tag ==
this.tt.tagTh))))
+ {
+ if (prev != null)
+ {
+ prev.next = next;
+ }
+ else
+ {
+ node.attributes = next;
+ }
+
+ }
+ else
+ {
+ prev = attr;
+ }
+ }
+ }
+
+ /**
+ * Word2000 uses span excessively, so we strip span out.
+ * @param lexer Lexer
+ * @param span Node span
+ * @return cleaned node
+ */
+ public Node stripSpan(Lexer lexer, Node span)
+ {
+ Node node;
+ Node prev = null;
+ Node content;
+
+ // deal with span elements that have content by splicing the content in place of
the span after having
+ // processed it
+
+ cleanWord2000(lexer, span.content);
+ content = span.content;
+
+ if (span.prev != null)
+ {
+ prev = span.prev;
+ }
+ else if (content != null)
+ {
+ node = content;
+ content = content.next;
+ node.removeNode();
+ Node.insertNodeBeforeElement(span, node);
+ prev = node;
+ }
+
+ while (content != null)
+ {
+ node = content;
+ content = content.next;
+ node.removeNode();
+ prev.insertNodeAfterElement(node);
+ prev = node;
+ }
+
+ if (span.next == null)
+ {
+ span.parent.last = prev;
+ }
+
+ node = span.next;
+ span.content = null;
+ Node.discardElement(span);
+ return node;
+ }
+
+ /**
+ * Map non-breaking spaces to regular spaces.
+ * @param lexer Lexer
+ * @param node Node
+ */
+ private void normalizeSpaces(Lexer lexer, Node node)
+ {
+ while (node != null)
+ {
+ if (node.content != null)
+ {
+ normalizeSpaces(lexer, node.content);
+ }
+
+ if (node.type == Node.TEXT_NODE)
+ {
+ int i;
+ int[] c = new int[1];
+ int p = node.start;
+
+ for (i = node.start; i < node.end; ++i)
+ {
+ c[0] = node.textarray[i];
+
+ // look for UTF-8 multibyte character
+ if (c[0] > 0x7F)
+ {
+ i += PPrint.getUTF8(node.textarray, i, c);
+ }
+
+ if (c[0] == 160)
+ {
+ c[0] = ' ';
+ }
+
+ p = PPrint.putUTF8(node.textarray, p, c[0]);
+ }
+ }
+
+ node = node.next;
+ }
+ }
+
+ /**
+ * Used to hunt for hidden preformatted sections.
+ * @param node checked node
+ * @return <code>true</code> if the node has a "margin-top: 0"
or "margin-bottom: 0" style
+ */
+ boolean noMargins(Node node)
+ {
+ AttVal attval = node.getAttrByName("style");
+
+ if (attval == null || attval.value == null)
+ {
+ return false;
+ }
+
+ // search for substring "margin-top: 0"
+ if (attval.value.indexOf("margin-top: 0") == -1)
+ {
+ return false;
+ }
+
+ // search for substring "margin-top: 0"
+ if (attval.value.indexOf("margin-bottom: 0") == -1)
+ {
+ return false;
+ }
+
+ return true;
+ }
+
+ /**
+ * Does element have a single space as its content?
+ * @param lexer Lexer
+ * @param node checked node
+ * @return <code>true</code> if the element has a single space as its
content
+ */
+ boolean singleSpace(Lexer lexer, Node node)
+ {
+ if (node.content != null)
+ {
+ node = node.content;
+
+ if (node.next != null)
+ {
+ return false;
+ }
+
+ if (node.type != Node.TEXT_NODE)
+ {
+ return false;
+ }
+
+ if (((node.end - node.start) == 1) && lexer.lexbuf[node.start] ==
' ')
+ {
+ return true;
+ }
+
+ if ((node.end - node.start) == 2)
+ {
+ int[] c = new int[1];
+
+ PPrint.getUTF8(lexer.lexbuf, node.start, c);
+
+ if (c[0] == 160)
+ {
+ return true;
+ }
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * This is a major clean up to strip out all the extra stuff you get when you save as
web page from Word 2000. It
+ * doesn't yet know what to do with VML tags, but these will appear as errors
unless you declare them as new tags,
+ * such as o:p which needs to be declared as inline.
+ * @param lexer Lexer
+ * @param node node to clean up
+ */
+ public void cleanWord2000(Lexer lexer, Node node)
+ {
+ // used to a list from a sequence of bulletted p's
+ Node list = null;
+
+ while (node != null)
+ {
+
+ // get rid of Word's xmlns attributes
+ if (node.tag == tt.tagHtml)
+ {
+ // check that it's a Word 2000 document
+ if ((node.getAttrByName("xmlns:o") == null))
+ {
+ return;
+ }
+ lexer.configuration.tt.freeAttrs(node);
+ }
+
+ // fix up preformatted sections by looking for a sequence of paragraphs with
zero top/bottom margin
+ if (node.tag == tt.tagP)
+ {
+ if (noMargins(node))
+ {
+ Node pre;
+ Node next;
+ Node.coerceNode(lexer, node, tt.tagPre);
+
+ purgeWord2000Attributes(node);
+
+ if (node.content != null)
+ {
+ cleanWord2000(lexer, node.content);
+ }
+
+ pre = node;
+ node = node.next;
+
+ // continue to strip p's
+ while (node.tag == tt.tagP && noMargins(node))
+ {
+ next = node.next;
+ node.removeNode();
+ pre.insertNodeAtEnd(lexer.newLineNode());
+ pre.insertNodeAtEnd(node);
+ stripSpan(lexer, node);
+ node = next;
+ }
+
+ if (node == null)
+ {
+ break;
+ }
+ }
+ }
+
+ if (node.tag != null && TidyUtils.toBoolean(node.tag.model &
Dict.CM_BLOCK) && singleSpace(lexer, node))
+ {
+ node = stripSpan(lexer, node);
+ continue;
+ }
+
+ // discard Word's style verbiage
+ if (node.tag == this.tt.tagStyle || node.tag == this.tt.tagMeta || node.type
== Node.COMMENT_TAG)
+ {
+ node = Node.discardElement(node);
+ continue;
+ }
+
+ // strip out all span and font tags Word scatters so liberally!
+ if (node.tag == this.tt.tagSpan || node.tag == this.tt.tagFont)
+ {
+ node = stripSpan(lexer, node);
+ continue;
+ }
+
+ if (node.tag == this.tt.tagLink)
+ {
+ AttVal attr = node.getAttrByName("rel");
+
+ if (attr != null && attr.value != null &&
attr.value.equals("File-List"))
+ {
+ node = Node.discardElement(node);
+ continue;
+ }
+ }
+
+ // discard empty paragraphs
+ if (node.content == null && node.tag == this.tt.tagP)
+ {
+ node = Node.discardElement(node);
+ continue;
+ }
+
+ if (node.tag == this.tt.tagP)
+ {
+ AttVal attr = node.getAttrByName("class");
+ AttVal atrStyle = node.getAttrByName("style");
+
+ // (JES) Sometimes Word marks a list item with the following hokie
syntax
+ // <p class="MsoNormal" style="...;mso-list:l1 level1
lfo1;
+ // translate these into <li>
+
+ // map sequence of <p class="MsoListBullet"> to
<ul> ... </ul>
+ // map <p class="MsoListNumber"> to
<ol>...</ol>
+ if (attr != null
+ && attr.value != null
+ && ((attr.value.equals("MsoListBullet") ||
attr.value.equals("MsoListNumber")) //
+ || (atrStyle != null &&
(atrStyle.value.indexOf("mso-list:") != -1)))) // 463066 - fix by Joel
+ // Shafer 19 Sep 01
+ {
+ Dict listType = tt.tagUl;
+
+ if (attr.value.equals("MsoListNumber"))
+ {
+ listType = tt.tagOl;
+ }
+
+ Node.coerceNode(lexer, node, this.tt.tagLi);
+
+ if (list == null || list.tag != listType)
+ {
+ list = lexer.inferredTag(listType.name);
+ Node.insertNodeBeforeElement(node, list);
+ }
+
+ purgeWord2000Attributes(node);
+
+ if (node.content != null)
+ {
+ cleanWord2000(lexer, node.content);
+ }
+
+ // remove node and append to contents of list
+ node.removeNode();
+ list.insertNodeAtEnd(node);
+ node = list;
+ }
+ // map sequence of <p class="Code"> to <pre> ...
</pre>
+ else if (attr != null && attr.value != null &&
attr.value.equals("Code"))
+ {
+ Node br = lexer.newLineNode();
+ normalizeSpaces(lexer, node);
+
+ if (list == null || list.tag != this.tt.tagPre)
+ {
+ list = lexer.inferredTag("pre");
+ Node.insertNodeBeforeElement(node, list);
+ }
+
+ // remove node and append to contents of list
+ node.removeNode();
+ list.insertNodeAtEnd(node);
+ stripSpan(lexer, node);
+ list.insertNodeAtEnd(br);
+ node = list.next;
+ }
+ else
+ {
+ list = null;
+ }
+ }
+ else
+ {
+ list = null;
+ }
+
+ // strip out style and class attributes
+ if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
+ {
+ purgeWord2000Attributes(node);
+ }
+
+ if (node.content != null)
+ {
+ cleanWord2000(lexer, node.content);
+ }
+
+ node = node.next;
+ }
+ }
+
+ /**
+ * Check if the current document is a converted Word document.
+ * @param root root Node
+ * @return <code>true</code> if the document has been geenrated by
Microsoft Word.
+ */
+ public boolean isWord2000(Node root)
+ {
+ AttVal attval;
+ Node node;
+ Node head;
+ Node html = root.findHTML(this.tt);
+
+ if (html != null && html.getAttrByName("xmlns:o") != null)
+ {
+ return true;
+ }
+
+ // search for <meta name="GENERATOR" content="Microsoft
...">
+ head = root.findHEAD(tt);
+
+ if (head != null)
+ {
+ for (node = head.content; node != null; node = node.next)
+ {
+ if (node.tag != tt.tagMeta)
+ {
+ continue;
+ }
+
+ attval = node.getAttrByName("name");
+
+ if (attval == null || attval.value == null)
+ {
+ continue;
+ }
+
+ if (!"generator".equals(attval.value))
+ {
+ continue;
+ }
+
+ attval = node.getAttrByName("content");
+
+ if (attval == null || attval.value == null)
+ {
+ continue;
+ }
+
+ if (attval.value.indexOf("Microsoft") != -1)
+ {
+ return true;
+ }
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * Where appropriate move object elements from head to body.
+ * @param lexer Lexer
+ * @param html html node
+ */
+ static void bumpObject(Lexer lexer, Node html)
+ {
+ if (html == null)
+ {
+ return;
+ }
+
+ Node node, next, head = null, body = null;
+ TagTable tt = lexer.configuration.tt;
+ for (node = html.content; node != null; node = node.next)
+ {
+ if (node.tag == tt.tagHead)
+ {
+ head = node;
+ }
+
+ if (node.tag == tt.tagBody)
+ {
+ body = node;
+ }
+ }
+
+ if (head != null && body != null)
+ {
+ for (node = head.content; node != null; node = next)
+ {
+ next = node.next;
+
+ if (node.tag == tt.tagObject)
+ {
+ Node child;
+ boolean bump = false;
+
+ for (child = node.content; child != null; child = child.next)
+ {
+ // bump to body unless content is param
+ if ((child.type == Node.TEXT_NODE &&
!node.isBlank(lexer)) || child.tag != tt.tagParam)
+ {
+ bump = true;
+ break;
+ }
+ }
+
+ if (bump)
+ {
+ node.removeNode();
+ body.insertNodeAtStart(node);
+ }
+ }
+ }
+ }
+ }
+
+}
\ No newline at end of file
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Configuration.java
===================================================================
---
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Configuration.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Configuration.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,1259 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.Serializable;
+import java.io.Writer;
+import java.lang.reflect.Field;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Enumeration;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import org.ajax4jsf.Messages;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+
+/**
+ * Read configuration file and manage configuration properties. Configuration files
associate a property name with a
+ * value. The format is that of a Java .properties file.
+ * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org
</a>
+ * @author Andy Quick <a
href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a>
(translation to Java)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public class Configuration implements Serializable
+{
+
+ private static final Log log = LogFactory.getLog(Configuration.class);
+
+ /**
+ * character encoding = RAW.
+ * @deprecated use <code>Tidy.setRawOut(true)</code> for raw output
+ */
+ public static final int RAW = 0;
+
+ /**
+ * character encoding = ASCII.
+ * @deprecated
+ */
+ public static final int ASCII = 1;
+
+ /**
+ * character encoding = LATIN1.
+ * @deprecated
+ */
+ public static final int LATIN1 = 2;
+
+ /**
+ * character encoding = UTF8.
+ * @deprecated
+ */
+ public static final int UTF8 = 3;
+
+ /**
+ * character encoding = ISO2022.
+ * @deprecated
+ */
+ public static final int ISO2022 = 4;
+
+ /**
+ * character encoding = MACROMAN.
+ * @deprecated
+ */
+ public static final int MACROMAN = 5;
+
+ /**
+ * character encoding = UTF16LE.
+ * @deprecated
+ */
+ public static final int UTF16LE = 6;
+
+ /**
+ * character encoding = UTF16BE.
+ * @deprecated
+ */
+ public static final int UTF16BE = 7;
+
+ /**
+ * character encoding = UTF16.
+ * @deprecated
+ */
+ public static final int UTF16 = 8;
+
+ /**
+ * character encoding = WIN1252.
+ * @deprecated
+ */
+ public static final int WIN1252 = 9;
+
+ /**
+ * character encoding = BIG5.
+ * @deprecated
+ */
+ public static final int BIG5 = 10;
+
+ /**
+ * character encoding = SHIFTJIS.
+ * @deprecated
+ */
+ public static final int SHIFTJIS = 11;
+
+ /**
+ * Convert from deprecated tidy encoding constant to standard java encoding name.
+ */
+ private final String[] ENCODING_NAMES = new String[]{
+ "raw", // rawOut, it will not be mapped to a java encoding
+ "ASCII",
+ "ISO8859_1",
+ "UTF8",
+ "JIS",
+ "MacRoman",
+ "UnicodeLittle",
+ "UnicodeBig",
+ "Unicode",
+ "Cp1252",
+ "Big5",
+ "SJIS"};
+
+ /**
+ * treatment of doctype: omit.
+ * @todo should be an enumeration DocTypeMode
+ */
+ public static final int DOCTYPE_OMIT = 0;
+
+ /**
+ * treatment of doctype: auto.
+ */
+ public static final int DOCTYPE_AUTO = 1;
+
+ /**
+ * treatment of doctype: strict.
+ */
+ public static final int DOCTYPE_STRICT = 2;
+
+ /**
+ * treatment of doctype: loose.
+ */
+ public static final int DOCTYPE_LOOSE = 3;
+
+ /**
+ * treatment of doctype: user.
+ */
+ public static final int DOCTYPE_USER = 4;
+
+ /**
+ * treatment of doctype: ignore doctype ( keep existing, if any ).
+ */
+ public static final int DOCTYPE_IGNORE = 5;
+ /**
+ * Keep last duplicate attribute.
+ * @todo should be an enumeration DupAttrMode
+ */
+ public static final int KEEP_LAST = 0;
+
+ /**
+ * Keep first duplicate attribute.
+ */
+ public static final int KEEP_FIRST = 1;
+
+ /**
+ * Map containg all the valid configuration options and the related parser. Tag entry
contains String(option
+ * name)-Flag instance.
+ */
+ private static final Map<String, Flag> OPTIONS = new HashMap<String,
Flag>();
+
+ /**
+ * serial version UID for this class.
+ */
+ private static final long serialVersionUID = -4955155037138560842L;
+
+ static
+ {
+ addConfigOption(new Flag("indent-spaces", "spaces",
ParsePropertyImpl.INT));
+ addConfigOption(new Flag("wrap", "wraplen",
ParsePropertyImpl.INT));
+ addConfigOption(new Flag("show-errors", "showErrors",
ParsePropertyImpl.INT));
+ addConfigOption(new Flag("tab-size", "tabsize",
ParsePropertyImpl.INT));
+
+ addConfigOption(new Flag("wrap-attributes", "wrapAttVals",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("wrap-script-literals",
"wrapScriptlets", ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("wrap-sections", "wrapSection",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("wrap-asp", "wrapAsp",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("wrap-jste", "wrapJste",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("wrap-php", "wrapPhp",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("literal-attributes",
"literalAttribs", ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("show-body-only", "bodyOnly",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("fix-uri", "fixUri",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("lower-literals", "lowerLiterals",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("hide-comments", "hideComments",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("indent-cdata", "indentCdata",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("force-output", "forceOutput",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("ascii-chars", "asciiChars",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("join-classes", "joinClasses",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("join-styles", "joinStyles",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("escape-cdata", "escapeCdata",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("replace-color", "replaceColor",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("quiet", "quiet",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("tidy-mark", "tidyMark",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("indent-attributes",
"indentAttributes", ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("hide-endtags", "hideEndTags",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("input-xml", "xmlTags",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("output-xml", "xmlOut",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("output-html", "htmlOut",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("output-xhtml", "xHTML",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("add-xml-pi", "xmlPi",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("add-xml-decl", "xmlPi",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("assume-xml-procins", "xmlPIs",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("uppercase-tags", "upperCaseTags",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("uppercase-attributes",
"upperCaseAttrs", ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("bare", "makeBare",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("clean", "makeClean",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("logical-emphasis",
"logicalEmphasis", ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("word-2000", "word2000",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("drop-empty-paras",
"dropEmptyParas", ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("drop-font-tags", "dropFontTags",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("drop-proprietary-attributes",
"dropProprietaryAttributes", ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("enclose-text", "encloseBodyText",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("enclose-block-text",
"encloseBlockText", ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("add-xml-space", "xmlSpace",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("fix-bad-comments", "fixComments",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("split", "burstSlides",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("break-before-br", "breakBeforeBR",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("numeric-entities", "numEntities",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("quote-marks", "quoteMarks",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("quote-nbsp", "quoteNbsp",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("quote-ampersand", "quoteAmpersand",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("write-back", "writeback",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("keep-time", "keepFileTimes",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("show-warnings", "showWarnings",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("ncr", "ncr",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("fix-backslash", "fixBackslash",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("gnu-emacs", "emacs",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("only-errors", "onlyErrors",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("output-raw", "rawOut",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("trim-empty-elements", "trimEmpty",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("move-elements", "moveElements",
ParsePropertyImpl.BOOL));
+ addConfigOption(new Flag("smart-indent", "smartIndent",
ParsePropertyImpl.BOOL));
+
+ addConfigOption(new Flag("markup", "onlyErrors",
ParsePropertyImpl.INVBOOL));
+
+ addConfigOption(new Flag("char-encoding", null,
ParsePropertyImpl.CHAR_ENCODING));
+ addConfigOption(new Flag("input-encoding", null,
ParsePropertyImpl.CHAR_ENCODING));
+ addConfigOption(new Flag("output-encoding", null,
ParsePropertyImpl.CHAR_ENCODING));
+
+ addConfigOption(new Flag("error-file", "errfile",
ParsePropertyImpl.NAME));
+ addConfigOption(new Flag("slide-style", "slidestyle",
ParsePropertyImpl.NAME));
+ addConfigOption(new Flag("language", "language",
ParsePropertyImpl.NAME));
+
+ addConfigOption(new Flag("new-inline-tags", null,
ParsePropertyImpl.TAGNAMES));
+ addConfigOption(new Flag("new-blocklevel-tags", null,
ParsePropertyImpl.TAGNAMES));
+ addConfigOption(new Flag("new-empty-tags", null,
ParsePropertyImpl.TAGNAMES));
+ addConfigOption(new Flag("new-pre-tags", null,
ParsePropertyImpl.TAGNAMES));
+
+ addConfigOption(new Flag("doctype", "docTypeStr",
ParsePropertyImpl.DOCTYPE));
+
+ addConfigOption(new Flag("repeated-attributes",
"duplicateAttrs", ParsePropertyImpl.REPEATED_ATTRIBUTES));
+
+ addConfigOption(new Flag("alt-text", "altText",
ParsePropertyImpl.STRING));
+
+ addConfigOption(new Flag("indent", "indentContent",
ParsePropertyImpl.INDENT));
+
+ addConfigOption(new Flag("css-prefix", "cssPrefix",
ParsePropertyImpl.CSS1SELECTOR));
+
+ addConfigOption(new Flag("newline", null, ParsePropertyImpl.NEWLINE));
+ }
+
+ /**
+ * default indentation.
+ */
+ protected int spaces = 2;
+
+ /**
+ * default wrap margin (68).
+ */
+ protected int wraplen = 68;
+
+ /**
+ * default tab size (8).
+ */
+ protected int tabsize = 8;
+
+ /**
+ * see doctype property.
+ */
+ protected int docTypeMode = DOCTYPE_AUTO;
+
+ /**
+ * Keep first or last duplicate attribute.
+ */
+ protected int duplicateAttrs = KEEP_LAST;
+
+ /**
+ * default text for alt attribute.
+ */
+ protected String altText;
+
+ /**
+ * style sheet for slides.
+ * @deprecated does nothing
+ */
+ protected String slidestyle;
+
+ /**
+ * RJ language property.
+ */
+ protected String language; // #431953
+
+ /**
+ * user specified doctype.
+ */
+ protected String docTypeStr;
+
+ /**
+ * file name to write errors to.
+ */
+ protected String errfile;
+
+ /**
+ * if true then output tidied markup.
+ */
+ protected boolean writeback;
+
+ /**
+ * if true normal output is suppressed.
+ */
+ protected boolean onlyErrors;
+
+ /**
+ * however errors are always shown.
+ */
+ protected boolean showWarnings = true;
+
+ /**
+ * no 'Parsing X', guessed DTD or summary.
+ */
+ protected boolean quiet;
+
+ /**
+ * indent content of appropriate tags.
+ */
+ protected boolean indentContent;
+
+ /**
+ * does text/block level content effect indentation.
+ */
+ protected boolean smartIndent;
+
+ /**
+ * suppress optional end tags.
+ */
+ protected boolean hideEndTags;
+
+ /**
+ * treat input as XML.
+ */
+ protected boolean xmlTags;
+
+ /**
+ * create output as XML.
+ */
+ protected boolean xmlOut;
+
+ /**
+ * output extensible HTML.
+ */
+ protected boolean xHTML;
+
+ /**
+ * output plain-old HTML, even for XHTML input. Yes means set explicitly.
+ */
+ protected boolean htmlOut;
+
+ /**
+ * add <code><?xml?></code> for XML docs.
+ */
+ protected boolean xmlPi;
+
+ /**
+ * output tags in upper not lower case.
+ */
+ protected boolean upperCaseTags;
+
+ /**
+ * output attributes in upper not lower case.
+ */
+ protected boolean upperCaseAttrs;
+
+ /**
+ * remove presentational clutter.
+ */
+ protected boolean makeClean;
+
+ /**
+ * Make bare HTML: remove Microsoft cruft.
+ */
+ protected boolean makeBare;
+
+ /**
+ * replace i by em and b by strong.
+ */
+ protected boolean logicalEmphasis;
+
+ /**
+ * discard presentation tags.
+ */
+ protected boolean dropFontTags;
+
+ /**
+ * discard proprietary attributes.
+ */
+ protected boolean dropProprietaryAttributes;
+
+ /**
+ * discard empty p elements.
+ */
+ protected boolean dropEmptyParas = true;
+
+ /**
+ * fix comments with adjacent hyphens.
+ */
+ protected boolean fixComments = true;
+
+ /**
+ * trim empty elements.
+ */
+ protected boolean trimEmpty = true;
+
+ /**
+ * o/p newline before br or not?
+ */
+ protected boolean breakBeforeBR;
+
+ /**
+ * create slides on each h2 element.
+ */
+ protected boolean burstSlides;
+
+ /**
+ * use numeric entities.
+ */
+ protected boolean numEntities;
+
+ /**
+ * output " marks as ".
+ */
+ protected boolean quoteMarks;
+
+ /**
+ * output non-breaking space as entity.
+ */
+ protected boolean quoteNbsp = true;
+
+ /**
+ * output naked ampersand as &.
+ */
+ protected boolean quoteAmpersand = true;
+
+ /**
+ * wrap within attribute values.
+ */
+ protected boolean wrapAttVals;
+
+ /**
+ * wrap within JavaScript string literals.
+ */
+ protected boolean wrapScriptlets;
+
+ /**
+ * wrap within CDATA section tags.
+ */
+ protected boolean wrapSection = true;
+
+ /**
+ * wrap within ASP pseudo elements.
+ */
+ protected boolean wrapAsp = true;
+
+ /**
+ * wrap within JSTE pseudo elements.
+ */
+ protected boolean wrapJste = true;
+
+ /**
+ * wrap within PHP pseudo elements.
+ */
+ protected boolean wrapPhp = true;
+
+ /**
+ * fix URLs by replacing \ with /.
+ */
+ protected boolean fixBackslash = true;
+
+ /**
+ * newline+indent before each attribute.
+ */
+ protected boolean indentAttributes;
+
+ /**
+ * If set to yes PIs must end with <code>?></code>.
+ */
+ protected boolean xmlPIs;
+
+ /**
+ * if set to yes adds xml:space attr as needed.
+ */
+ protected boolean xmlSpace;
+
+ /**
+ * if yes text at body is wrapped in p's.
+ */
+ protected boolean encloseBodyText;
+
+ /**
+ * if yes text in blocks is wrapped in p's.
+ */
+ protected boolean encloseBlockText;
+
+ /**
+ * if yes last modied time is preserved.
+ */
+ protected boolean keepFileTimes = true;
+
+ /**
+ * draconian cleaning for Word2000.
+ */
+ protected boolean word2000;
+
+ /**
+ * add meta element indicating tidied doc.
+ */
+ protected boolean tidyMark = true;
+
+ /**
+ * if true format error output for GNU Emacs.
+ */
+ protected boolean emacs;
+
+ /**
+ * if true attributes may use newlines.
+ */
+ protected boolean literalAttribs;
+
+ /**
+ * output BODY content only.
+ */
+ protected boolean bodyOnly;
+
+ /**
+ * properly escape URLs.
+ */
+ protected boolean fixUri = true;
+
+ /**
+ * folds known attribute values to lower case.
+ */
+ protected boolean lowerLiterals = true;
+
+ /**
+ * replace hex color attribute values with names.
+ */
+ protected boolean replaceColor;
+
+ /**
+ * hides all (real) comments in output.
+ */
+ protected boolean hideComments;
+
+ /**
+ * indent CDATA sections.
+ */
+ protected boolean indentCdata;
+
+ /**
+ * output document even if errors were found.
+ */
+ protected boolean forceOutput;
+
+ /**
+ * number of errors to put out.
+ */
+ protected int showErrors = 6;
+
+ /**
+ * convert quotes and dashes to nearest ASCII char.
+ */
+ protected boolean asciiChars = true;
+
+ /**
+ * join multiple class attributes.
+ */
+ protected boolean joinClasses;
+
+ /**
+ * join multiple style attributes.
+ */
+ protected boolean joinStyles = true;
+
+ /**
+ * replace CDATA sections with escaped text.
+ */
+ protected boolean escapeCdata = true;
+
+
+ /**
+ * Move elements ( style to head, to body, reformat tables )
+ */
+ protected boolean moveElements = true;
+ /**
+ * allow numeric character references.
+ */
+ protected boolean ncr = true; // #431953
+
+ /**
+ * CSS class naming for -clean option.
+ */
+ protected String cssPrefix;
+
+ /**
+ * char encoding used when replacing illegal SGML chars, regardless of specified
encoding.
+ */
+ protected int replacementCharEncoding = WIN1252; // by default
+
+ /**
+ * TagTable associated with this Configuration.
+ */
+ protected TagTable tt;
+
+ /**
+ * Report instance. Used for messages.
+ */
+ protected Report report;
+
+ /**
+ * track what types of tags user has defined to eliminate unnecessary searches.
+ */
+ protected int definedTags;
+
+ /**
+ * bytes for the newline marker.
+ */
+ protected char[] newline =
(System.getProperty("line.separator")).toCharArray();
+
+ /**
+ * Input character encoding (defaults to LATIN1).
+ */
+ private int inCharEncoding = LATIN1;
+
+ /**
+ * Input character encoding (defaults to "ISO8859_1").
+ */
+ private String inCharEncodingName = "ISO8859_1";
+
+ /**
+ * Output character encoding (defaults to ASCII).
+ */
+ private int outCharEncoding = ASCII;
+
+ /**
+ * Output character encoding (defaults to "ASCII").
+ */
+ private String outCharEncodingName = "ASCII";
+
+ /**
+ * Avoid mapping values > 127 to entities.
+ */
+ protected boolean rawOut;
+
+ /**
+ * configuration properties.
+ */
+ private transient Properties properties = new Properties();
+
+ /**
+ * Instantiates a new Configuration. This method should be called by Tidy only.
+ * @param report Report instance
+ */
+ protected Configuration(Report report)
+ {
+ this.report = report;
+ }
+
+ /**
+ * adds a config option to the map.
+ * @param flag configuration options added
+ */
+ private static void addConfigOption(Flag flag)
+ {
+ OPTIONS.put(flag.getName(), flag);
+ }
+
+ /**
+ * adds configuration Properties.
+ * @param p Properties
+ */
+ public void addProps(Properties p)
+ {
+ Enumeration<?> propEnum = p.propertyNames();
+ while (propEnum.hasMoreElements())
+ {
+ String key = (String) propEnum.nextElement();
+ String value = p.getProperty(key);
+ properties.put(key, value);
+ }
+ parseProps();
+ }
+
+ /**
+ * Parses a property file.
+ * @param filename file name
+ */
+ public void parseFile(String filename)
+ {
+ FileInputStream input = null;
+ try
+ {
+ input = new FileInputStream(filename);
+ properties.load(input);
+ }
+ catch (IOException e)
+ {
+ log.error(filename + " " + e.toString());
+ return;
+ } finally {
+ if(input != null) {
+ try {
+ input.close();
+ } catch (IOException e1) {
+ //ignore
+ }
+ }
+ }
+ parseProps();
+ }
+
+ /**
+ * Is the given String a valid configuration flag?
+ * @param name configuration parameter name
+ * @return <code>true</code> if the given String is a valid config
option
+ */
+ public static boolean isKnownOption(String name)
+ {
+ return name != null && OPTIONS.containsKey(name);
+ }
+
+ /**
+ * Parses the configuration properties file.
+ */
+ private void parseProps()
+ {
+ Iterator<Object> iterator = properties.keySet().iterator();
+
+ while (iterator.hasNext())
+ {
+ String key = (String) iterator.next();
+ Flag flag = (Flag) OPTIONS.get(key);
+ if (flag == null)
+ {
+ report.unknownOption(key);
+ continue;
+ }
+
+ String stringValue = properties.getProperty(key);
+ Object value = flag.getParser().parse(stringValue, key, this);
+ if (flag.getLocation() != null)
+ {
+ try
+ {
+ flag.getLocation().set(this, value);
+ }
+ catch (IllegalArgumentException e)
+ {
+ throw new RuntimeException(
+ Messages.getMessage(Messages.CONFIG_INITIALIZATION_ERROR, new
Object[]{"IllegalArgumentException", key, value, e.getMessage()}));
+ }
+ catch (IllegalAccessException e)
+ {
+ throw new RuntimeException(
+ Messages.getMessage(Messages.CONFIG_INITIALIZATION_ERROR, new
Object[]{"IllegalAccessException", key, value, e.getMessage()}));
+ }
+ }
+ }
+ }
+
+ /**
+ * Ensure that config is self consistent.
+ */
+ public void adjust()
+ {
+ if (encloseBlockText)
+ {
+ encloseBodyText = true;
+ }
+
+ // avoid the need to set IndentContent when SmartIndent is set
+ if (smartIndent)
+ {
+ indentContent = true;
+ }
+
+ // disable wrapping
+ if (wraplen == 0)
+ {
+ wraplen = 0x7FFFFFFF;
+ }
+
+ // Word 2000 needs o:p to be declared as inline
+ if (word2000)
+ {
+ definedTags |= Dict.TAGTYPE_INLINE;
+ tt.defineTag(Dict.TAGTYPE_INLINE, "o:p");
+ }
+
+ // #480701 disable XHTML output flag if both output-xhtml and xml are set
+ if (xmlTags)
+ {
+ xHTML = false;
+ }
+
+ // XHTML is written in lower case
+ if (xHTML)
+ {
+ xmlOut = true;
+ upperCaseTags = false;
+ upperCaseAttrs = false;
+ }
+
+ // if XML in, then XML out
+ if (xmlTags)
+ {
+ xmlOut = true;
+ xmlPIs = true;
+ }
+
+ // #427837 - fix by Dave Raggett 02 Jun 01
+ // generate <?xml version="1.0" encoding="iso-8859-1"?>
if the output character encoding is Latin-1 etc.
+ if (getOutCharEncoding() != UTF8 && getOutCharEncoding() != ASCII
&& xmlOut)
+ {
+ xmlPi = true;
+ }
+
+ // XML requires end tags
+ if (xmlOut)
+ {
+ quoteAmpersand = true;
+ hideEndTags = false;
+ }
+ }
+
+ /**
+ * prints available configuration options.
+ * @param errout where to write
+ * @param showActualConfiguration print actual configuration values
+ */
+ void printConfigOptions(Writer errout, boolean showActualConfiguration)
+ {
+ String pad = "
";
+ try
+ {
+ errout.write("\nConfiguration File Settings:\n\n");
+
+ if (showActualConfiguration)
+ {
+ errout.write("Name Type Current
Value\n");
+ }
+ else
+ {
+ errout.write("Name Type Allowable
values\n");
+ }
+
+ errout.write("=========================== =========
========================================\n");
+
+ Flag configItem;
+
+ // sort configuration options
+ List<Flag> values = new ArrayList<Flag>(OPTIONS.values());
+ Collections.sort(values);
+
+ Iterator<Flag> iterator = values.iterator();
+
+ while (iterator.hasNext())
+ {
+ configItem = (Flag) iterator.next();
+
+ errout.write(configItem.getName());
+ errout.write(pad, 0, 28 - configItem.getName().length());
+
+ errout.write(configItem.getParser().getType());
+ errout.write(pad, 0, 11 - configItem.getParser().getType().length());
+
+ if (showActualConfiguration)
+ {
+ Field field = configItem.getLocation();
+ Object actualValue = null;
+
+ if (field != null)
+ {
+ try
+ {
+ actualValue = field.get(this);
+ }
+ catch (IllegalArgumentException e1)
+ {
+ // should never happen
+ throw new RuntimeException(
+ Messages.getMessage(Messages.FIELD_READING_ERROR,
"IllegalArgumentException", field.getName()));
+ }
+ catch (IllegalAccessException e1)
+ {
+ // should never happen
+ throw new RuntimeException(
+ Messages.getMessage(Messages.FIELD_READING_ERROR,
"IllegalAccessException", field.getName()));
+ }
+ }
+
+
errout.write(configItem.getParser().getFriendlyName(configItem.getName(), actualValue,
this));
+ }
+ else
+ {
+ errout.write(configItem.getParser().getOptionValues());
+ }
+
+ errout.write("\n");
+
+ }
+ errout.flush();
+ }
+ catch (IOException e)
+ {
+ throw new RuntimeException(e.getMessage());
+ }
+
+ }
+
+ /**
+ * A configuration option.
+ */
+ static class Flag implements Comparable<Flag>
+ {
+
+ /**
+ * option name.
+ */
+ private String name;
+
+ /**
+ * field name.
+ */
+ private String fieldName;
+
+ /**
+ * Field where the evaluated value is saved.
+ */
+ private Field location;
+
+ /**
+ * Parser for the configuration property.
+ */
+ private ParseProperty parser;
+
+ /**
+ * Instantiates a new Flag.
+ * @param name option name
+ * @param fieldName field name (can be null)
+ * @param parser parser for property
+ */
+ Flag(String name, String fieldName, ParseProperty parser)
+ {
+
+ this.fieldName = fieldName;
+ this.name = name;
+ this.parser = parser;
+ }
+
+ /**
+ * Getter for <code>location</code>.
+ * @return Returns the location.
+ */
+ public Field getLocation()
+ {
+ // lazy initialization to speed up loading
+ if (fieldName != null && this.location == null)
+ {
+ try
+ {
+ this.location = Configuration.class.getDeclaredField(fieldName);
+ }
+ catch (NoSuchFieldException e)
+ {
+ throw new
RuntimeException(Messages.getMessage(Messages.CONFIG_INITIALIZATION_ERROR_2,
"NoSuchFieldException", fieldName));
+ }
+ catch (SecurityException e)
+ {
+ throw new
RuntimeException(Messages.getMessage(Messages.CONFIG_INITIALIZATION_ERROR_2,
"SecurityException", fieldName) + ": " + e.getMessage());
+ }
+ }
+
+ return this.location;
+ }
+
+ /**
+ * Getter for <code>name</code>.
+ * @return Returns the name.
+ */
+ public String getName()
+ {
+ return this.name;
+ }
+
+ /**
+ * Getter for <code>parser</code>.
+ * @return Returns the parser.
+ */
+ public ParseProperty getParser()
+ {
+ return this.parser;
+ }
+
+ /**
+ * @see java.lang.Object#equals(java.lang.Object)
+ */
+ public boolean equals(Object obj)
+ {
+ return this.name.equals(((Flag) obj).name);
+ }
+
+ /**
+ * @see java.lang.Object#hashCode()
+ */
+ public int hashCode()
+ {
+ // returning the hashCode of String, to be consistent with equals and
compareTo
+ return this.name.hashCode();
+ }
+
+ /**
+ * @see java.lang.Comparable#compareTo(java.lang.Object)
+ */
+ public int compareTo(Flag o)
+ {
+ return this.name.compareTo(((Flag) o).name);
+ }
+
+ }
+
+ /**
+ * Getter for <code>inCharEncoding</code>.
+ * @return Returns the inCharEncoding.
+ * @deprecated use getInCharEncodingName()
+ */
+ protected int getInCharEncoding()
+ {
+ return this.inCharEncoding;
+ }
+
+ /**
+ * Setter for <code>inCharEncoding</code>.
+ * @param encoding The inCharEncoding to set.
+ * @deprecated use setInCharEncodingName(String)
+ */
+ protected void setInCharEncoding(int encoding)
+ {
+ if (encoding == RAW)
+ {
+ rawOut = true;
+ }
+ else
+ {
+ rawOut = false;
+ this.inCharEncoding = encoding;
+ }
+ }
+
+ /**
+ * Getter for <code>inCharEncodingName</code>.
+ * @return Returns the inCharEncodingName.
+ */
+ protected String getInCharEncodingName()
+ {
+ return this.inCharEncodingName;
+ }
+
+ /**
+ * Setter for <code>inCharEncodingName</code>.
+ * @param encoding The inCharEncodingName to set.
+ */
+ protected void setInCharEncodingName(String encoding)
+ {
+ String javaEncoding = EncodingNameMapper.toJava(encoding);
+ if (javaEncoding != null)
+ {
+ this.inCharEncodingName = javaEncoding;
+ this.inCharEncoding = convertCharEncoding(javaEncoding);
+ }
+ }
+
+ /**
+ * Getter for <code>outCharEncoding</code>.
+ * @return Returns the outCharEncoding.
+ * @deprecated use getOutCharEncodingName()
+ */
+ protected int getOutCharEncoding()
+ {
+ return this.outCharEncoding;
+ }
+
+ /**
+ * Setter for <code>outCharEncoding</code>.
+ * @param encoding The outCharEncoding to set.
+ * @deprecated use setOutCharEncodingName(String)
+ */
+ protected void setOutCharEncoding(int encoding)
+ {
+ switch (encoding)
+ {
+ case RAW :
+ this.rawOut = true;
+ break;
+
+ case MACROMAN :
+ case WIN1252 :
+ this.rawOut = false;
+ this.outCharEncoding = ASCII;
+ break;
+
+ default :
+ this.rawOut = false;
+ this.outCharEncoding = encoding;
+ break;
+ }
+ }
+
+ /**
+ * Getter for <code>outCharEncodingName</code>.
+ * @return Returns the outCharEncodingName.
+ */
+ protected String getOutCharEncodingName()
+ {
+ return this.outCharEncodingName;
+ }
+
+ /**
+ * Setter for <code>outCharEncodingName</code>.
+ * @param encoding The outCharEncodingName to set.
+ */
+ protected void setOutCharEncodingName(String encoding)
+ {
+ String javaEncoding = EncodingNameMapper.toJava(encoding);
+ if (javaEncoding != null)
+ {
+ this.outCharEncodingName = javaEncoding;
+ this.outCharEncoding = convertCharEncoding(javaEncoding);
+ }
+ }
+
+ /**
+ * Setter for <code>inOutCharEncodingName</code>.
+ * @param encoding The CharEncodingName to set.
+ */
+ protected void setInOutEncodingName(String encoding)
+ {
+ setInCharEncodingName(encoding);
+ setOutCharEncodingName(encoding);
+ }
+
+ /**
+ * Convert a char encoding from the deprecated tidy constant to a standard java
encoding name.
+ * @param code encoding code
+ * @return encoding name
+ */
+ protected String convertCharEncoding(int code)
+ {
+ if (code != 0 && code < ENCODING_NAMES.length)
+ {
+ return ENCODING_NAMES[code];
+ }
+ return null;
+ }
+
+ /**
+ * Convert a char encoding from a standard java encoding name to the deprecated tidy
constant.
+ * @param name encoding name
+ * @return encoding code
+ */
+ protected int convertCharEncoding(String name)
+ {
+ if (name == null)
+ {
+ return -1;
+ }
+
+ for (int j = 1; j < ENCODING_NAMES.length; j++)
+ {
+ if (name.equals(ENCODING_NAMES[j]))
+ {
+ return j;
+ }
+ }
+
+ return -1;
+ }
+
+}
\ No newline at end of file
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMAttrImpl.java
===================================================================
---
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMAttrImpl.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMAttrImpl.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,333 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+import org.ajax4jsf.Messages;
+import org.w3c.dom.DOMException;
+import org.w3c.dom.TypeInfo;
+
+
+/**
+ * Tidy implementation of org.w3c.dom.DOMAttrImpl.
+ * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org
</a>
+ * @author Andy Quick <a
href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a>
(translation to Java)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public class DOMAttrImpl extends DOMNodeImpl implements org.w3c.dom.Attr, Cloneable
+{
+
+ /**
+ * wrapped org.w3c.tidy.AttVal.
+ */
+ protected AttVal avAdaptee;
+
+ /**
+ * instantiates a new DOMAttrImpl which wraps the given AttVal.
+ * @param adaptee wrapped AttVal
+ */
+ protected DOMAttrImpl(AttVal adaptee)
+ {
+ super(null); // must override all methods of DOMNodeImpl
+ this.avAdaptee = adaptee;
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getNodeValue()
+ */
+ public String getNodeValue() throws DOMException
+ {
+ return getValue();
+ }
+
+ /**
+ * @see org.w3c.dom.Node#setNodeValue(java.lang.String)
+ */
+ public void setNodeValue(String nodeValue) throws DOMException
+ {
+ setValue(nodeValue);
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getNodeName()
+ */
+ public String getNodeName()
+ {
+ return getName();
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getNodeType()
+ */
+ public short getNodeType()
+ {
+ return org.w3c.dom.Node.ATTRIBUTE_NODE;
+ }
+
+ /**
+ * @see org.w3c.dom.Attr#getName
+ */
+ public String getName()
+ {
+ return avAdaptee.attribute;
+ }
+
+ /**
+ * @see org.w3c.dom.Attr#getSpecified
+ */
+ public boolean getSpecified()
+ {
+ return avAdaptee.value != null;
+ }
+
+ /**
+ * @see org.w3c.dom.Attr#getValue
+ */
+ public String getValue()
+ {
+ // Thanks to Brett Knights brett(a)knightsofthenet.com for this fix.
+ return (avAdaptee.value == null) ? avAdaptee.attribute : avAdaptee.value;
+ }
+
+ /**
+ * @see org.w3c.dom.Attr#setValue(java.lang.String)
+ */
+ public void setValue(String value)
+ {
+ avAdaptee.value = value;
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getParentNode()
+ */
+ public org.w3c.dom.Node getParentNode()
+ {
+ // Attr.getParentNode() should always return null
+ //
http://www.w3.org/TR/DOM-Level-2-Core/core.html#ID-637646024
+ return null;
+ }
+
+ /**
+ * @todo DOM level 2 getChildNodes() Not implemented. Returns an empty NodeList.
+ * @see org.w3c.dom.Node#getChildNodes()
+ */
+ public org.w3c.dom.NodeList getChildNodes()
+ {
+ // Calling getChildNodes on a DOM Attr node does return the children of the Attr,
which are the text and
+ // EntityReference nodes that make up the Attr's content.
+ return new DOMNodeListImpl(null);
+ }
+
+ /**
+ * @todo DOM level 2 getFirstChild() Not implemented. Returns null.
+ * @see org.w3c.dom.Node#getFirstChild()
+ */
+ public org.w3c.dom.Node getFirstChild()
+ {
+ return null;
+ }
+
+ /**
+ * @todo DOM level 2 getLastChild() Not implemented. Returns null.
+ * @see org.w3c.dom.Node#getLastChild()
+ */
+ public org.w3c.dom.Node getLastChild()
+ {
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getPreviousSibling()
+ */
+ public org.w3c.dom.Node getPreviousSibling()
+ {
+ // Attr.getPreviousSibling() should always return null
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getNextSibling()
+ */
+ public org.w3c.dom.Node getNextSibling()
+ {
+ // Attr.getNextSibling() should always return null
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getAttributes()
+ */
+ public org.w3c.dom.NamedNodeMap getAttributes()
+ {
+ return null;
+ }
+
+ /**
+ * @todo DOM level 2 getOwnerDocument() Not implemented. Returns null.
+ * @see org.w3c.dom.Node#getOwnerDocument()
+ */
+ public org.w3c.dom.Document getOwnerDocument()
+ {
+ return null;
+ }
+
+ /**
+ * Not supported.
+ * @see org.w3c.dom.Node#insertBefore(org.w3c.dom.Node, org.w3c.dom.Node)
+ */
+ public org.w3c.dom.Node insertBefore(org.w3c.dom.Node newChild, org.w3c.dom.Node
refChild) throws DOMException
+ {
+ throw new DOMException(DOMException.NO_MODIFICATION_ALLOWED_ERR,
Messages.getMessage(Messages.DOM_METHOD_NOT_SUPPORTED));
+ }
+
+ /**
+ * Not supported.
+ * @see org.w3c.dom.Node#replaceChild(org.w3c.dom.Node, org.w3c.dom.Node)
+ */
+ public org.w3c.dom.Node replaceChild(org.w3c.dom.Node newChild, org.w3c.dom.Node
oldChild) throws DOMException
+ {
+ throw new DOMException(DOMException.NO_MODIFICATION_ALLOWED_ERR,
Messages.getMessage(Messages.DOM_METHOD_NOT_SUPPORTED));
+ }
+
+ /**
+ * Not supported.
+ * @see org.w3c.dom.Node#removeChild(org.w3c.dom.Node)
+ */
+ public org.w3c.dom.Node removeChild(org.w3c.dom.Node oldChild) throws DOMException
+ {
+ throw new DOMException(DOMException.NO_MODIFICATION_ALLOWED_ERR,
Messages.getMessage(Messages.DOM_METHOD_NOT_SUPPORTED));
+ }
+
+ /**
+ * Not supported.
+ * @see org.w3c.dom.Node#appendChild(org.w3c.dom.Node)
+ */
+ public org.w3c.dom.Node appendChild(org.w3c.dom.Node newChild) throws DOMException
+ {
+ throw new DOMException(DOMException.NO_MODIFICATION_ALLOWED_ERR,
Messages.getMessage(Messages.DOM_METHOD_NOT_SUPPORTED));
+ }
+
+ /**
+ * @see org.w3c.dom.Node#hasChildNodes()
+ */
+ public boolean hasChildNodes()
+ {
+ return false;
+ }
+
+ /**
+ * @see org.w3c.dom.Node#cloneNode(boolean)
+ */
+ public org.w3c.dom.Node cloneNode(boolean deep)
+ {
+ //
http://java.sun.com/j2se/1.5.0/docs/api/index.html?org/w3c/dom/Attr.html
+ // Cloning an Attr always clones its children, since they represent its value, no
matter whether this is a deep
+ // clone or not.
+ return (org.w3c.dom.Node) clone();
+ }
+
+ /**
+ * @todo DOM level 2 getOwnerElement() Not implemented. Returns null.
+ * @see org.w3c.dom.Attr#getOwnerElement()
+ */
+ public org.w3c.dom.Element getOwnerElement()
+ {
+ return null;
+ }
+
+ /**
+ * @todo DOM level 3 getSchemaTypeInfo() Not implemented. Returns null.
+ * @see org.w3c.dom.Attr#getSchemaTypeInfo()
+ */
+/* public TypeInfo getSchemaTypeInfo()
+ {
+ return null;
+ }
+*/
+ /**
+ * @see org.w3c.dom.Attr#isId()
+ */
+ public boolean isId()
+ {
+ return "id".equals(this.avAdaptee.getAttribute());
+ }
+
+ /**
+ * @see java.lang.Object#clone()
+ */
+ protected Object clone()
+ {
+ DOMAttrImpl clone;
+ try
+ {
+ clone = (DOMAttrImpl) super.clone();
+ }
+ catch (CloneNotSupportedException e)
+ {
+ // should never happen
+ throw new RuntimeException("Clone not supported");
+ }
+ clone.avAdaptee = (AttVal) this.avAdaptee.clone();
+ return clone;
+ }
+
+ /* (non-Javadoc)
+ * @see org.w3c.dom.Attr#getSchemaTypeInfo()
+ */
+ public TypeInfo getSchemaTypeInfo() {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+}
\ No newline at end of file
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMAttrMapImpl.java
===================================================================
---
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMAttrMapImpl.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMAttrMapImpl.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,222 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+import org.ajax4jsf.Messages;
+import org.w3c.dom.DOMException;
+
+
+/**
+ * Tidy implementation of org.w3c.dom.NamedNodeMap.
+ * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org
</a>
+ * @author Andy Quick <a
href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a>
(translation to Java)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public class DOMAttrMapImpl implements org.w3c.dom.NamedNodeMap
+{
+
+ /**
+ * wrapped org.w3c.tidy.AttVal.
+ */
+ private AttVal first;
+
+ /**
+ * instantiates a new DOMAttrMapImpl for the given AttVal.
+ * @param firstAttVal wrapped AttVal
+ */
+ protected DOMAttrMapImpl(AttVal firstAttVal)
+ {
+ this.first = firstAttVal;
+ }
+
+ /**
+ * @see org.w3c.dom.NamedNodeMap#getNamedItem(java.lang.String)
+ */
+ public org.w3c.dom.Node getNamedItem(String name)
+ {
+ AttVal att = this.first;
+ while (att != null)
+ {
+ if (att.attribute.equals(name))
+ {
+ break;
+ }
+ att = att.next;
+ }
+ if (att != null)
+ {
+ return att.getAdapter();
+ }
+
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.NamedNodeMap#item
+ */
+ public org.w3c.dom.Node item(int index)
+ {
+ int i = 0;
+ AttVal att = this.first;
+ while (att != null)
+ {
+ if (i >= index)
+ {
+ break;
+ }
+ i++;
+ att = att.next;
+ }
+ if (att != null)
+ {
+ return att.getAdapter();
+ }
+
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.NamedNodeMap#getLength
+ */
+ public int getLength()
+ {
+ int len = 0;
+ AttVal att = this.first;
+ while (att != null)
+ {
+ len++;
+ att = att.next;
+ }
+ return len;
+ }
+
+ /**
+ * @todo DOM level 2 setNamedItem() Not implemented. Throws NOT_SUPPORTED_ERR.
+ * @see org.w3c.dom.NamedNodeMap#setNamedItem
+ */
+ public org.w3c.dom.Node setNamedItem(org.w3c.dom.Node arg) throws DOMException
+ {
+ throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
Messages.getMessage(Messages.DOM_METHOD_NOT_SUPPORTED));
+ }
+
+ /**
+ * @see org.w3c.dom.NamedNodeMap#removeNamedItem
+ */
+ public org.w3c.dom.Node removeNamedItem(String name) throws DOMException
+ {
+ AttVal att = this.first;
+ AttVal previous = null;
+
+ while (att != null)
+ {
+ if (att.attribute.equals(name))
+ {
+ if (previous == null)
+ {
+ this.first = att.getNext();
+ }
+ else
+ {
+ previous.setNext(att.getNext());
+ }
+
+ break;
+ }
+ previous = att;
+ att = att.next;
+ }
+
+ if (att != null)
+ {
+ return att.getAdapter();
+ }
+
+ throw new DOMException(DOMException.NOT_FOUND_ERR,
Messages.getMessage(Messages.NAMED_ITEM_NOT_FOUND_ERROR, name));
+ }
+
+ /**
+ * Not supported, returns <code>DOMException.NOT_SUPPORTED_ERR</code>.
+ * @see org.w3c.dom.NamedNodeMap#getNamedItemNS(java.lang.String, java.lang.String)
+ */
+ public org.w3c.dom.Node getNamedItemNS(String namespaceURI, String localName)
+ {
+ // NOT_SUPPORTED_ERR: May be raised if the implementation does not support the
feature "XML" and the language
+ // exposed through the Document does not support XML Namespaces (such as HTML
4.01).
+ throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
Messages.getMessage(Messages.DOM_METHOD_NOT_SUPPORTED));
+ }
+
+ /**
+ * Not supported, returns <code>DOMException.NOT_SUPPORTED_ERR</code>.
+ * @see org.w3c.dom.NamedNodeMap#setNamedItemNS(org.w3c.dom.Node)
+ */
+ public org.w3c.dom.Node setNamedItemNS(org.w3c.dom.Node arg) throws
org.w3c.dom.DOMException
+ {
+ // NOT_SUPPORTED_ERR: May be raised if the implementation does not support the
feature "XML" and the language
+ // exposed through the Document does not support XML Namespaces (such as HTML
4.01).
+ throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
Messages.getMessage(Messages.DOM_METHOD_NOT_SUPPORTED));
+ }
+
+ /**
+ * Not supported, returns <code>DOMException.NOT_SUPPORTED_ERR</code>.
+ * @see org.w3c.dom.NamedNodeMap#removeNamedItemNS(java.lang.String,
java.lang.String)
+ */
+ public org.w3c.dom.Node removeNamedItemNS(String namespaceURI, String localName)
throws org.w3c.dom.DOMException
+ {
+ // NOT_SUPPORTED_ERR: May be raised if the implementation does not support the
feature "XML" and the language
+ // exposed through the Document does not support XML Namespaces (such as HTML
4.01).
+ throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
Messages.getMessage(Messages.DOM_METHOD_NOT_SUPPORTED));
+ }
+
+}
\ No newline at end of file
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMCDATASectionImpl.java
===================================================================
---
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMCDATASectionImpl.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMCDATASectionImpl.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,90 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+/**
+ * Tidy implementation of org.w3c.dom.CDATASection.
+ * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org
</a>
+ * @author Andy Quick <a
href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a>
(translation to Java)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public class DOMCDATASectionImpl extends DOMTextImpl implements org.w3c.dom.CDATASection
+{
+
+ /**
+ * Instantiates a new DOMCDATASectionImpl which wraps the given Node.
+ * @param adaptee wrapped node.
+ */
+ protected DOMCDATASectionImpl(Node adaptee)
+ {
+ super(adaptee);
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getNodeName
+ */
+ public String getNodeName()
+ {
+ return "#cdata-section";
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getNodeType
+ */
+ public short getNodeType()
+ {
+ return org.w3c.dom.Node.CDATA_SECTION_NODE;
+ }
+}
\ No newline at end of file
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMCharacterDataImpl.java
===================================================================
---
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMCharacterDataImpl.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMCharacterDataImpl.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,177 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+import org.w3c.dom.DOMException;
+
+
+/**
+ * Tidy implementation of org.w3c.dom.CharacterData.
+ * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org
</a>
+ * @author Andy Quick <a
href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a>
(translation to Java)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public class DOMCharacterDataImpl extends DOMNodeImpl implements
org.w3c.dom.CharacterData
+{
+
+ /**
+ * Instantiates a new DOMCharacterDataImpl which wraps the given Node.
+ * @param adaptee wrapped node.
+ */
+ protected DOMCharacterDataImpl(Node adaptee)
+ {
+ super(adaptee);
+ }
+
+ /**
+ * @see org.w3c.dom.CharacterData#getData
+ */
+ public String getData() throws DOMException
+ {
+ return getNodeValue();
+ }
+
+ /**
+ * @see org.w3c.dom.CharacterData#getLength
+ */
+ public int getLength()
+ {
+ int len = 0;
+ if (adaptee.textarray != null && adaptee.start < adaptee.end)
+ {
+ len = adaptee.end - adaptee.start;
+ }
+ return len;
+ }
+
+ /**
+ * @see org.w3c.dom.CharacterData#substringData
+ */
+ public String substringData(int offset, int count) throws DOMException
+ {
+ int len;
+ String value = null;
+ if (count < 0)
+ {
+ throw new DOMException(DOMException.INDEX_SIZE_ERR, "Invalid
length");
+ }
+ if (adaptee.textarray != null && adaptee.start < adaptee.end)
+ {
+ if (adaptee.start + offset >= adaptee.end)
+ {
+ throw new DOMException(DOMException.INDEX_SIZE_ERR, "Invalid
offset");
+ }
+ len = count;
+ if (adaptee.start + offset + len - 1 >= adaptee.end)
+ {
+ len = adaptee.end - adaptee.start - offset;
+ }
+
+ value = TidyUtils.getString(adaptee.textarray, adaptee.start + offset, len);
+ }
+ return value;
+ }
+
+ /**
+ * Not supported.
+ * @see org.w3c.dom.CharacterData#setData
+ */
+ public void setData(String data) throws DOMException
+ {
+ // NOT SUPPORTED
+ throw new DOMException(DOMException.NO_MODIFICATION_ALLOWED_ERR, "Not
supported");
+ }
+
+ /**
+ * Not supported.
+ * @see org.w3c.dom.CharacterData#appendData
+ */
+ public void appendData(String arg) throws DOMException
+ {
+ // NOT SUPPORTED
+ throw new DOMException(DOMException.NO_MODIFICATION_ALLOWED_ERR, "Not
supported");
+ }
+
+ /**
+ * Not supported.
+ * @see org.w3c.dom.CharacterData#insertData
+ */
+ public void insertData(int offset, String arg) throws DOMException
+ {
+ // NOT SUPPORTED
+ throw new DOMException(DOMException.NO_MODIFICATION_ALLOWED_ERR, "Not
supported");
+ }
+
+ /**
+ * Not supported.
+ * @see org.w3c.dom.CharacterData#deleteData
+ */
+ public void deleteData(int offset, int count) throws DOMException
+ {
+ // NOT SUPPORTED
+ throw new DOMException(DOMException.NO_MODIFICATION_ALLOWED_ERR, "Not
supported");
+ }
+
+ /**
+ * Not supported.
+ * @see org.w3c.dom.CharacterData#replaceData
+ */
+ public void replaceData(int offset, int count, String arg) throws DOMException
+ {
+ // NOT SUPPORTED
+ throw new DOMException(DOMException.NO_MODIFICATION_ALLOWED_ERR, "Not
supported");
+ }
+
+}
\ No newline at end of file
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMCommentImpl.java
===================================================================
---
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMCommentImpl.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMCommentImpl.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,91 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+/**
+ * Tidy implementation of org.w3c.dom.Comment.
+ * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org
</a>
+ * @author Andy Quick <a
href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a>
(translation to Java)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public class DOMCommentImpl extends DOMCharacterDataImpl implements org.w3c.dom.Comment
+{
+
+ /**
+ * Instantiates a new DOMCommentImpl which wraps the given Node.
+ * @param adaptee wrapped node.
+ */
+ protected DOMCommentImpl(Node adaptee)
+ {
+ super(adaptee);
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getNodeName
+ */
+ public String getNodeName()
+ {
+ return "#comment";
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getNodeType
+ */
+ public short getNodeType()
+ {
+ return org.w3c.dom.Node.COMMENT_NODE;
+ }
+
+}
\ No newline at end of file
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMDocumentImpl.java
===================================================================
---
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMDocumentImpl.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMDocumentImpl.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,460 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+import org.ajax4jsf.Messages;
+import org.w3c.dom.DOMConfiguration;
+import org.w3c.dom.DOMException;
+
+
+/**
+ * DOMDocumentImpl.
+ * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org
</a>
+ * @author Andy Quick <a
href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a>
(translation to Java)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public class DOMDocumentImpl extends DOMNodeImpl implements org.w3c.dom.Document
+{
+
+ /**
+ * A DOM Document has its own TagTable.
+ */
+ private TagTable tt;
+
+ /**
+ * Instantiates a new Dom document with a default tag table.
+ * @param adaptee tidy Node
+ */
+ protected DOMDocumentImpl(Node adaptee)
+ {
+ super(adaptee);
+ this.tt = new TagTable();
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getNodeName
+ */
+ public String getNodeName()
+ {
+ return "#document";
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getNodeType
+ */
+ public short getNodeType()
+ {
+ return org.w3c.dom.Node.DOCUMENT_NODE;
+ }
+
+ /**
+ * @see org.w3c.dom.Document#getDoctype
+ */
+ public org.w3c.dom.DocumentType getDoctype()
+ {
+ Node node = this.adaptee.content;
+ while (node != null)
+ {
+ if (node.type == Node.DOCTYPE_TAG)
+ {
+ break;
+ }
+ node = node.next;
+ }
+ if (node != null)
+ {
+ return (org.w3c.dom.DocumentType) node.getAdapter();
+ }
+
+ return null;
+ }
+
+ /**
+ * @todo DOM level 2 getImplementation() Not implemented. Throws NOT_SUPPORTED_ERR.
+ * @see org.w3c.dom.Document#getImplementation
+ */
+ public org.w3c.dom.DOMImplementation getImplementation()
+ {
+ throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
Messages.getMessage(Messages.DOM_METHOD_NOT_SUPPORTED));
+ }
+
+ /**
+ * @see org.w3c.dom.Document#getDocumentElement
+ */
+ public org.w3c.dom.Element getDocumentElement()
+ {
+ Node node = this.adaptee.content;
+ while (node != null)
+ {
+ if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
+ {
+ break;
+ }
+ node = node.next;
+ }
+ if (node != null)
+ {
+ return (org.w3c.dom.Element) node.getAdapter();
+ }
+
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.Document#createElement
+ */
+ public org.w3c.dom.Element createElement(String tagName) throws DOMException
+ {
+ Node node = new Node(Node.START_END_TAG, null, 0, 0, tagName, this.tt);
+ if (node != null)
+ {
+ if (node.tag == null) // Fix Bug 121206
+ {
+ node.tag = TagTable.XML_TAGS;
+ }
+ return (org.w3c.dom.Element) node.getAdapter();
+ }
+
+ return null;
+ }
+
+ /**
+ * @todo DOM level 2 createDocumentFragment() Not implemented. Throws
NOT_SUPPORTED_ERR.
+ * @see org.w3c.dom.Document#createDocumentFragment
+ */
+ public org.w3c.dom.DocumentFragment createDocumentFragment()
+ {
+ throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
Messages.getMessage(Messages.DOM_METHOD_NOT_SUPPORTED));
+ }
+
+ /**
+ * @see org.w3c.dom.Document#createTextNode
+ */
+ public org.w3c.dom.Text createTextNode(String data)
+ {
+ byte[] textarray = TidyUtils.getBytes(data);
+ Node node = new Node(Node.TEXT_NODE, textarray, 0, textarray.length);
+ if (node != null)
+ {
+ return (org.w3c.dom.Text) node.getAdapter();
+ }
+
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.Document#createComment
+ */
+ public org.w3c.dom.Comment createComment(String data)
+ {
+ byte[] textarray = TidyUtils.getBytes(data);
+ Node node = new Node(Node.COMMENT_TAG, textarray, 0, textarray.length);
+ if (node != null)
+ {
+ return (org.w3c.dom.Comment) node.getAdapter();
+ }
+
+ return null;
+ }
+
+ /**
+ * @todo DOM level 2 createCDATASection() Not supported. Throws NOT_SUPPORTED_ERR.
+ * @see org.w3c.dom.Document#createCDATASection
+ */
+ public org.w3c.dom.CDATASection createCDATASection(String data) throws DOMException
+ {
+ // NOT_SUPPORTED_ERR: Raised if this document is an HTML document.
+// throw new DOMException(DOMException.NOT_SUPPORTED_ERR, "HTML
document");
+ // HACK - create raw section, do not escape xml symbols.
+ byte[] textarray = TidyUtils.getBytes(data);
+ Node node = new Node(Node.CDATA_TEXT, textarray, 0, textarray.length);
+ if (node != null)
+ {
+ return (org.w3c.dom.CDATASection) node.getAdapter();
+ }
+
+ return null;
+ }
+
+ /**
+ * @todo DOM level 2 createProcessingInstruction() Not supported. Throws
NOT_SUPPORTED_ERR.
+ * @see org.w3c.dom.Document#createProcessingInstruction
+ */
+ public org.w3c.dom.ProcessingInstruction createProcessingInstruction(String target,
String data)
+ throws DOMException
+ {
+ // NOT_SUPPORTED_ERR: Raised if this document is an HTML document.
+ throw new DOMException(DOMException.NOT_SUPPORTED_ERR, "HTML
document");
+ }
+
+ /**
+ * @see org.w3c.dom.Document#createAttribute
+ */
+ public org.w3c.dom.Attr createAttribute(String name) throws DOMException
+ {
+ AttVal av = new AttVal(null, null, '"', name, null);
+ if (av != null)
+ {
+ av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av);
+ return av.getAdapter();
+ }
+
+ return null;
+ }
+
+ /**
+ * @todo DOM level 2 createEntityReference() Not supported. Throws
NOT_SUPPORTED_ERR.
+ * @see org.w3c.dom.Document#createEntityReference
+ */
+ public org.w3c.dom.EntityReference createEntityReference(String name) throws
DOMException
+ {
+ // NOT_SUPPORTED_ERR: Raised if this document is an HTML document
+ throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
Messages.getMessage(Messages.METHOD_NOT_SUPPORTED, "createEntityReference"));
+ }
+
+ /**
+ * @see org.w3c.dom.Document#getElementsByTagName
+ */
+ public org.w3c.dom.NodeList getElementsByTagName(String tagname)
+ {
+ return new DOMNodeListByTagNameImpl(this.adaptee, tagname);
+ }
+
+ /**
+ * @todo DOM level 2 importNode() Not supported. Throws NOT_SUPPORTED_ERR.
+ * @see org.w3c.dom.Document#importNode(org.w3c.dom.Node, boolean)
+ */
+ public org.w3c.dom.Node importNode(org.w3c.dom.Node importedNode, boolean deep)
throws org.w3c.dom.DOMException
+ {
+ throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
Messages.getMessage(Messages.METHOD_NOT_SUPPORTED, "importNode"));
+ }
+
+ /**
+ * @todo DOM level 2 createAttributeNS() Not supported. Throws NOT_SUPPORTED_ERR.
+ * @see org.w3c.dom.Document#createAttributeNS(java.lang.String, java.lang.String)
+ */
+ public org.w3c.dom.Attr createAttributeNS(String namespaceURI, String qualifiedName)
+ throws org.w3c.dom.DOMException
+ {
+ throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
Messages.getMessage(Messages.METHOD_NOT_SUPPORTED, "createAttributeNS"));
+ }
+
+ /**
+ * @todo DOM level 2 createElementNS() Not supported. Throws NOT_SUPPORTED_ERR.
+ * @see org.w3c.dom.Document#createElementNS(java.lang.String, java.lang.String)
+ */
+ public org.w3c.dom.Element createElementNS(String namespaceURI, String
qualifiedName)
+ throws org.w3c.dom.DOMException
+ {
+ throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
Messages.getMessage(Messages.METHOD_NOT_SUPPORTED, "createElementNS"));
+ }
+
+ /**
+ * @todo DOM level 2 getElementsByTagNameNS() Not supported. Throws
NOT_SUPPORTED_ERR.
+ * @see org.w3c.dom.Document#getElementsByTagNameNS(java.lang.String,
java.lang.String)
+ */
+ public org.w3c.dom.NodeList getElementsByTagNameNS(String namespaceURI, String
localName)
+ {
+ throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
Messages.getMessage(Messages.METHOD_NOT_SUPPORTED, "getElementsByTagNameNS"));
+ }
+
+ /**
+ * @todo DOM level 2 getElementById() Not implemented. Returns null.
+ * @see org.w3c.dom.Document#getElementById(java.lang.String)
+ */
+ public org.w3c.dom.Element getElementById(String elementId)
+ {
+ return null;
+ }
+
+ /**
+ * @todo DOM level 3 adoptNode() Not implemented.
+ * @see org.w3c.dom.Document#adoptNode(org.w3c.dom.Node)
+ */
+ public org.w3c.dom.Node adoptNode(org.w3c.dom.Node source) throws DOMException
+ {
+ throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
Messages.getMessage(Messages.DOM_METHOD_NOT_SUPPORTED));
+ }
+
+ /**
+ * @todo DOM level 3 getDocumentURI() Not implemented. Returns null.
+ * @see org.w3c.dom.Document#getDocumentURI()
+ */
+ public String getDocumentURI()
+ {
+ return null;
+ }
+
+ /**
+ * @todo DOM level 3 getDomConfig() Not implemented. Returns null.
+ * @see org.w3c.dom.Document#getDomConfig()
+ */
+/* public DOMConfiguration getDomConfig()
+ {
+ return null;
+ }
+*/
+ /**
+ * @todo DOM level 3 getInputEncoding() Not implemented. Returns null.
+ * @see org.w3c.dom.Document#getInputEncoding()
+ */
+ public String getInputEncoding()
+ {
+ return null;
+ }
+
+ /**
+ * @todo DOM level 3 getStrictErrorChecking() Not implemented. Returns true.
+ * @see org.w3c.dom.Document#getStrictErrorChecking()
+ */
+ public boolean getStrictErrorChecking()
+ {
+ return true;
+ }
+
+ /**
+ * @todo DOM level 3 getXmlEncoding() Not implemented. Returns null.
+ * @see org.w3c.dom.Document#getXmlEncoding()
+ */
+ public String getXmlEncoding()
+ {
+ return null;
+ }
+
+ /**
+ * @todo DOM level 3 getXmlStandalone() Not implemented. Returns false.
+ * @see org.w3c.dom.Document#getXmlStandalone()
+ */
+ public boolean getXmlStandalone()
+ {
+ return false;
+ }
+
+ /**
+ * @todo DOM level 3 getXmlVersion() Not implemented. Always returns
"1.0".
+ * @see org.w3c.dom.Document#getXmlVersion()
+ */
+ public String getXmlVersion()
+ {
+ // An attribute specifying, as part of the XML declaration, the version number of
this document. If there is no
+ // declaration and if this document supports the "XML" feature, the
value is "1.0"
+ return "1.0";
+ }
+
+ /**
+ * @todo DOM level 3 normalizeDocument() Not implemented. Do nothing.
+ * @see org.w3c.dom.Document#normalizeDocument()
+ */
+ public void normalizeDocument()
+ {
+ // do nothing
+ }
+
+ /**
+ * @todo DOM level 3 renameNode() Not implemented. Throws NOT_SUPPORTED_ERR.
+ * @see org.w3c.dom.Document#renameNode(org.w3c.dom.Node, java.lang.String,
java.lang.String)
+ */
+ public org.w3c.dom.Node renameNode(org.w3c.dom.Node n, String namespaceURI, String
qualifiedName)
+ throws DOMException
+ {
+ throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
Messages.getMessage(Messages.DOM_METHOD_NOT_SUPPORTED));
+ }
+
+ /**
+ * @todo DOM level 3 setDocumentURI() Not implemented. Do nothing.
+ * @see org.w3c.dom.Document#setDocumentURI(java.lang.String)
+ */
+ public void setDocumentURI(String documentURI)
+ {
+ // do nothing
+ }
+
+ /**
+ * @todo DOM level 3 setStrictErrorChecking() Not implemented. Do nothing.
+ * @see org.w3c.dom.Document#setStrictErrorChecking(boolean)
+ */
+ public void setStrictErrorChecking(boolean strictErrorChecking)
+ {
+ // do nothing
+ }
+
+ /**
+ * @todo DOM level 3 setXmlStandalone() Not implemented. Do nothing.
+ * @see org.w3c.dom.Document#setXmlStandalone(boolean)
+ */
+ public void setXmlStandalone(boolean xmlStandalone) throws DOMException
+ {
+ // do nothing
+ }
+
+ /**
+ * @todo DOM level 3 setXmlVersion() Not implemented. Do nothing.
+ * @see org.w3c.dom.Document#setXmlVersion(java.lang.String)
+ */
+ public void setXmlVersion(String xmlVersion) throws DOMException
+ {
+ // do nothing
+ }
+
+ /* (non-Javadoc)
+ * @see org.w3c.dom.Document#getDomConfig()
+ */
+ public DOMConfiguration getDomConfig() {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+
+}
\ No newline at end of file
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMDocumentTypeImpl.java
===================================================================
---
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMDocumentTypeImpl.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMDocumentTypeImpl.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,154 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+
+package org.ajax4jsf.org.w3c.tidy;
+
+/**
+ * DOMDocumentTypeImpl.
+ * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org
</a>
+ * @author Andy Quick <a
href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a>
(translation to Java)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public class DOMDocumentTypeImpl extends DOMNodeImpl implements org.w3c.dom.DocumentType
+{
+
+ /**
+ * Instantiates a new DOM document type.
+ * @param adaptee Tidy Node
+ */
+ protected DOMDocumentTypeImpl(Node adaptee)
+ {
+ super(adaptee);
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getNodeType
+ */
+ public short getNodeType()
+ {
+ return org.w3c.dom.Node.DOCUMENT_TYPE_NODE;
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getNodeName
+ */
+ public String getNodeName()
+ {
+ return getName();
+ }
+
+ /**
+ * @see org.w3c.dom.DocumentType#getName
+ */
+ public String getName()
+ {
+ String value = null;
+ if (adaptee.type == Node.DOCTYPE_TAG)
+ {
+
+ if (adaptee.textarray != null && adaptee.start < adaptee.end)
+ {
+ value = TidyUtils.getString(adaptee.textarray, adaptee.start, adaptee.end
- adaptee.start);
+ }
+ }
+ return value;
+ }
+
+ /**
+ * @todo DOM level 2 getEntities() Not implemented. Returns null.
+ * @see org.w3c.dom.DocumentType#getEntities()
+ */
+ public org.w3c.dom.NamedNodeMap getEntities()
+ {
+ return null;
+ }
+
+ /**
+ * @todo DOM level 2 getNotations() Not implemented. Returns null.
+ * @see org.w3c.dom.DocumentType#getNotations()
+ */
+ public org.w3c.dom.NamedNodeMap getNotations()
+ {
+ return null;
+ }
+
+ /**
+ * @todo DOM level 2 getPublicId() Not implemented. Returns null.
+ * @see org.w3c.dom.DocumentType#getPublicId()
+ */
+ public String getPublicId()
+ {
+ return null;
+ }
+
+ /**
+ * @todo DOM level 2 getSystemId() Not implemented. Returns null.
+ * @see org.w3c.dom.DocumentType#getSystemId()
+ */
+ public String getSystemId()
+ {
+ return null;
+ }
+
+ /**
+ * @todo DOM level 2 getInternalSubset() Not implemented. Returns null.
+ * @see org.w3c.dom.DocumentType#getInternalSubset()
+ */
+ public String getInternalSubset()
+ {
+ return null;
+ }
+
+}
\ No newline at end of file
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMElementImpl.java
===================================================================
---
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMElementImpl.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMElementImpl.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,448 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+import org.ajax4jsf.Messages;
+import org.w3c.dom.Attr;
+import org.w3c.dom.DOMException;
+import org.w3c.dom.TypeInfo;
+
+
+/**
+ * DOMElementImpl.
+ * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org
</a>
+ * @author Andy Quick <a
href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a>
(translation to Java)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public class DOMElementImpl extends DOMNodeImpl implements org.w3c.dom.Element
+{
+
+ /**
+ * Instantiates a new DOM element.
+ * @param adaptee Tidy Node.
+ */
+ protected DOMElementImpl(Node adaptee)
+ {
+ super(adaptee);
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getNodeType
+ */
+ public short getNodeType()
+ {
+ return org.w3c.dom.Node.ELEMENT_NODE;
+ }
+
+ /**
+ * @see org.w3c.dom.Element#getTagName
+ */
+ public String getTagName()
+ {
+ return super.getNodeName();
+ }
+
+ /**
+ * @see org.w3c.dom.Element#getAttribute(java.lang.String)
+ */
+ public String getAttribute(String name)
+ {
+ if (this.adaptee == null)
+ {
+ return null;
+ }
+
+ AttVal att = this.adaptee.attributes;
+ while (att != null)
+ {
+ if (att.attribute.equals(name))
+ {
+ break;
+ }
+ att = att.next;
+ }
+ if (att != null)
+ {
+ return att.value;
+ }
+
+ return "";
+ }
+
+ /**
+ * @see org.w3c.dom.Element#setAttribute(java.lang.String, java.lang.String)
+ */
+ public void setAttribute(String name, String value) throws DOMException
+ {
+ if (this.adaptee == null)
+ {
+ return;
+ }
+
+ AttVal att = this.adaptee.attributes;
+ while (att != null)
+ {
+ if (att.attribute.equals(name))
+ {
+ break;
+ }
+ att = att.next;
+ }
+ if (att != null)
+ {
+ att.value = value;
+ }
+ else
+ {
+ att = new AttVal(null, null, '"', name, value);
+ att.dict = AttributeTable.getDefaultAttributeTable().findAttribute(att);
+ if (this.adaptee.attributes == null)
+ {
+ this.adaptee.attributes = att;
+ }
+ else
+ {
+ att.next = this.adaptee.attributes;
+ this.adaptee.attributes = att;
+ }
+ }
+ }
+
+ /**
+ * @see org.w3c.dom.Element#removeAttribute(java.lang.String)
+ */
+ public void removeAttribute(String name) throws DOMException
+ {
+ if (this.adaptee == null)
+ {
+ return;
+ }
+
+ AttVal att = this.adaptee.attributes;
+ AttVal pre = null;
+ while (att != null)
+ {
+ if (att.attribute.equals(name))
+ {
+ break;
+ }
+ pre = att;
+ att = att.next;
+ }
+ if (att != null)
+ {
+ if (pre == null)
+ {
+ this.adaptee.attributes = att.next;
+ }
+ else
+ {
+ pre.next = att.next;
+ }
+ }
+ }
+
+ /**
+ * @see org.w3c.dom.Element#getAttributeNode(java.lang.String)
+ */
+ public org.w3c.dom.Attr getAttributeNode(String name)
+ {
+ if (this.adaptee == null)
+ {
+ return null;
+ }
+
+ AttVal att = this.adaptee.attributes;
+ while (att != null)
+ {
+ if (att.attribute.equals(name))
+ {
+ break;
+ }
+ att = att.next;
+ }
+ if (att != null)
+ {
+ return att.getAdapter();
+ }
+
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.Element#setAttributeNode(org.w3c.dom.Attr)
+ */
+ public org.w3c.dom.Attr setAttributeNode(org.w3c.dom.Attr newAttr) throws
DOMException
+ {
+ if (newAttr == null)
+ {
+ return null;
+ }
+ if (!(newAttr instanceof DOMAttrImpl))
+ {
+ throw new DOMException(DOMException.WRONG_DOCUMENT_ERR,
Messages.getMessage(Messages.NOT_INSTANCE_OF_ERROR, "newAttr",
"DOMAttrImpl"));
+ }
+
+ DOMAttrImpl newatt = (DOMAttrImpl) newAttr;
+ String name = newatt.avAdaptee.attribute;
+ org.w3c.dom.Attr result = null;
+
+ AttVal att = this.adaptee.attributes;
+ while (att != null)
+ {
+ if (att.attribute.equals(name))
+ {
+ break;
+ }
+ att = att.next;
+ }
+ if (att != null)
+ {
+ result = att.getAdapter();
+ att.adapter = newAttr;
+ }
+ else
+ {
+ if (this.adaptee.attributes == null)
+ {
+ this.adaptee.attributes = newatt.avAdaptee;
+ }
+ else
+ {
+ newatt.avAdaptee.next = this.adaptee.attributes;
+ this.adaptee.attributes = newatt.avAdaptee;
+ }
+ }
+ return result;
+ }
+
+ /**
+ * @see org.w3c.dom.Element#removeAttributeNode(org.w3c.dom.Attr)
+ */
+ public org.w3c.dom.Attr removeAttributeNode(org.w3c.dom.Attr oldAttr) throws
DOMException
+ {
+ if (oldAttr == null)
+ {
+ return null;
+ }
+
+ org.w3c.dom.Attr result = null;
+ AttVal att = this.adaptee.attributes;
+ AttVal pre = null;
+ while (att != null)
+ {
+ if (att.getAdapter() == oldAttr)
+ {
+ break;
+ }
+ pre = att;
+ att = att.next;
+ }
+ if (att != null)
+ {
+ if (pre == null)
+ {
+ this.adaptee.attributes = att.next;
+ }
+ else
+ {
+ pre.next = att.next;
+ }
+ result = oldAttr;
+ }
+ else
+ {
+ throw new DOMException(DOMException.NOT_FOUND_ERR, "oldAttr not
found");
+ }
+ return result;
+ }
+
+ /**
+ * @see org.w3c.dom.Element#getElementsByTagName(java.lang.String)
+ */
+ public org.w3c.dom.NodeList getElementsByTagName(String name)
+ {
+ return new DOMNodeListByTagNameImpl(this.adaptee, name);
+ }
+
+ /**
+ * @todo DOM level 2 getOwnerDocument() Not supported. Do nothing.
+ * @see org.w3c.dom.Element#normalize
+ */
+ public void normalize()
+ {
+ // do nothing
+ }
+
+ /**
+ * @todo DOM level 2 getAttributeNS() Not supported. Throws NOT_SUPPORTED_ERR.
+ * @see org.w3c.dom.Element#getAttributeNS(java.lang.String, java.lang.String)
+ */
+ public String getAttributeNS(String namespaceURI, String localName)
+ {
+ // DOMException - NOT_SUPPORTED_ERR: May be raised if the implementation does not
support the feature "XML" and
+ // the language exposed through the Document does not support XML Namespaces
(such as HTML 4.01).
+ throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
Messages.getMessage(Messages.DOM_METHOD_NOT_SUPPORTED));
+ }
+
+ /**
+ * @todo DOM level 2 setAttributeNS() Not supported. Throws NOT_SUPPORTED_ERR.
+ * @see org.w3c.dom.Element#setAttributeNS(java.lang.String, java.lang.String,
java.lang.String)
+ */
+ public void setAttributeNS(String namespaceURI, String qualifiedName, String value)
throws org.w3c.dom.DOMException
+ {
+ throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
Messages.getMessage(Messages.DOM_METHOD_NOT_SUPPORTED));
+ }
+
+ /**
+ * @todo DOM level 2 removeAttributeNS() Not supported. Throws NOT_SUPPORTED_ERR.
+ * @see org.w3c.dom.Element#removeAttributeNS(java.lang.String, java.lang.String)
+ */
+ public void removeAttributeNS(String namespaceURI, String localName) throws
org.w3c.dom.DOMException
+ {
+ throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
Messages.getMessage(Messages.DOM_METHOD_NOT_SUPPORTED));
+ }
+
+ /**
+ * @todo DOM level 2 getAttributeNodeNS() Not supported. Throws NOT_SUPPORTED_ERR.
+ * @see org.w3c.dom.Element#getAttributeNodeNS(java.lang.String, java.lang.String)
+ */
+ public org.w3c.dom.Attr getAttributeNodeNS(String namespaceURI, String localName)
+ {
+ throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
Messages.getMessage(Messages.DOM_METHOD_NOT_SUPPORTED));
+ }
+
+ /**
+ * @todo DOM level 2 setAttributeNodeNS() Not supported. Throws NOT_SUPPORTED_ERR.
+ * @see org.w3c.dom.Element#setAttributeNodeNS(org.w3c.dom.Attr)
+ */
+ public org.w3c.dom.Attr setAttributeNodeNS(org.w3c.dom.Attr newAttr) throws
org.w3c.dom.DOMException
+ {
+ throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
Messages.getMessage(Messages.DOM_METHOD_NOT_SUPPORTED));
+ }
+
+ /**
+ * @todo DOM level 2 getElementsByTagNameNS() Not supported. Throws
NOT_SUPPORTED_ERR.
+ * @see org.w3c.dom.Element#getElementsByTagNameNS(java.lang.String,
java.lang.String)
+ */
+ public org.w3c.dom.NodeList getElementsByTagNameNS(String namespaceURI, String
localName)
+ {
+ throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
Messages.getMessage(Messages.DOM_METHOD_NOT_SUPPORTED));
+ }
+
+ /**
+ * @todo DOM level 2 hasAttribute() Not supported. Returns false.
+ * @see org.w3c.dom.Element#hasAttribute(java.lang.String)
+ */
+ public boolean hasAttribute(String name)
+ {
+ return false;
+ }
+
+ /**
+ * @todo DOM level 2 hasAttribute() Not supported. Returns false.
+ * @see org.w3c.dom.Element#hasAttributeNS(java.lang.String, java.lang.String)
+ */
+ public boolean hasAttributeNS(String namespaceURI, String localName)
+ {
+ return false;
+ }
+
+ /**
+ * @todo DOM level 3 getSchemaTypeInfo() Not supported. Returns null.
+ * @see org.w3c.dom.Element#getSchemaTypeInfo()
+ */
+/* public TypeInfo getSchemaTypeInfo()
+ {
+ return null;
+ }
+*/
+ /**
+ * @todo DOM level 3 setIdAttribute() Not supported. Throws NOT_SUPPORTED_ERR.
+ * @see org.w3c.dom.Element#setIdAttribute(java.lang.String, boolean)
+ */
+ public void setIdAttribute(String name, boolean isId) throws DOMException
+ {
+ throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
Messages.getMessage(Messages.DOM_METHOD_NOT_SUPPORTED));
+ }
+
+ /**
+ * @todo DOM level 3 setIdAttributeNode() Not supported. Throws NOT_SUPPORTED_ERR.
+ * @see org.w3c.dom.Element#setIdAttributeNode(org.w3c.dom.Attr, boolean)
+ */
+ public void setIdAttributeNode(Attr idAttr, boolean isId) throws DOMException
+ {
+ throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
Messages.getMessage(Messages.DOM_METHOD_NOT_SUPPORTED));
+ }
+
+ /**
+ * @todo DOM level 3 setIdAttributeNS() Not supported. Throws NOT_SUPPORTED_ERR.
+ * @see org.w3c.dom.Element#setIdAttributeNS(java.lang.String, java.lang.String,
boolean)
+ */
+ public void setIdAttributeNS(String namespaceURI, String localName, boolean isId)
throws DOMException
+ {
+ throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
Messages.getMessage(Messages.DOM_METHOD_NOT_SUPPORTED));
+ }
+
+ /* (non-Javadoc)
+ * @see org.w3c.dom.Element#getSchemaTypeInfo()
+ */
+ public TypeInfo getSchemaTypeInfo() {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+
+}
\ No newline at end of file
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMNodeImpl.java
===================================================================
---
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMNodeImpl.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMNodeImpl.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,692 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+
+package org.ajax4jsf.org.w3c.tidy;
+
+import org.ajax4jsf.Messages;
+import org.w3c.dom.DOMException;
+import org.w3c.dom.UserDataHandler;
+
+
+/**
+ * DOMNodeImpl.
+ * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org
</a>
+ * @author Andy Quick <a
href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a>
(translation to Java)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public class DOMNodeImpl implements org.w3c.dom.Node
+{
+
+ /**
+ * Wrapped tidy node.
+ */
+ protected Node adaptee;
+
+ /**
+ * Intantiates a new DOM node.
+ * @param adaptee wrapped Tidy node
+ */
+ protected DOMNodeImpl(Node adaptee)
+ {
+ this.adaptee = adaptee;
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getNodeValue
+ */
+ public String getNodeValue()
+ {
+ String value = ""; // BAK 10/10/2000 replaced null
+ if (adaptee.type == Node.TEXT_NODE
+ || adaptee.type == Node.CDATA_TAG
+ || adaptee.type == Node.COMMENT_TAG
+ || adaptee.type == Node.CDATA_TEXT
+ || adaptee.type == Node.PROC_INS_TAG)
+ {
+
+ if (adaptee.textarray != null && adaptee.start < adaptee.end)
+ {
+ value = TidyUtils.getString(adaptee.textarray, adaptee.start, adaptee.end
- adaptee.start);
+ }
+ }
+ return value;
+ }
+
+ /**
+ * @see org.w3c.dom.Node#setNodeValue
+ */
+ public void setNodeValue(String nodeValue)
+ {
+ if (adaptee.type == Node.TEXT_NODE
+ || adaptee.type == Node.CDATA_TAG
+ || adaptee.type == Node.COMMENT_TAG
+ || adaptee.type == Node.CDATA_TEXT
+ || adaptee.type == Node.PROC_INS_TAG)
+ {
+ byte[] textarray = TidyUtils.getBytes(nodeValue);
+ adaptee.textarray = textarray;
+ adaptee.start = 0;
+ adaptee.end = textarray.length;
+ }
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getNodeName
+ */
+ public String getNodeName()
+ {
+ return adaptee.element;
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getNodeType
+ */
+ public short getNodeType()
+ {
+ short result = -1;
+ switch (adaptee.type)
+ {
+ case Node.ROOT_NODE :
+ result = org.w3c.dom.Node.DOCUMENT_NODE;
+ break;
+ case Node.DOCTYPE_TAG :
+ result = org.w3c.dom.Node.DOCUMENT_TYPE_NODE;
+ break;
+ case Node.COMMENT_TAG :
+ result = org.w3c.dom.Node.COMMENT_NODE;
+ break;
+ case Node.PROC_INS_TAG :
+ result = org.w3c.dom.Node.PROCESSING_INSTRUCTION_NODE;
+ break;
+ case Node.TEXT_NODE :
+ result = org.w3c.dom.Node.TEXT_NODE;
+ break;
+ case Node.CDATA_TEXT :
+ case Node.CDATA_TAG :
+ result = org.w3c.dom.Node.CDATA_SECTION_NODE;
+ break;
+ case Node.START_TAG :
+ case Node.START_END_TAG :
+ result = org.w3c.dom.Node.ELEMENT_NODE;
+ break;
+ }
+ return result;
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getParentNode
+ */
+ public org.w3c.dom.Node getParentNode()
+ {
+ // Attributes are not children in the DOM, and do not have parents
+ if (adaptee.parent != null)
+ {
+ return adaptee.parent.getAdapter();
+ }
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getChildNodes
+ */
+ public org.w3c.dom.NodeList getChildNodes()
+ {
+ return new DOMNodeListImpl(adaptee);
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getFirstChild
+ */
+ public org.w3c.dom.Node getFirstChild()
+ {
+ if (adaptee.content != null)
+ {
+ return adaptee.content.getAdapter();
+ }
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getLastChild
+ */
+ public org.w3c.dom.Node getLastChild()
+ {
+ if (adaptee.last != null)
+ {
+ return adaptee.last.getAdapter();
+ }
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getPreviousSibling
+ */
+ public org.w3c.dom.Node getPreviousSibling()
+ {
+ if (adaptee.prev != null)
+ {
+ return adaptee.prev.getAdapter();
+ }
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getNextSibling
+ */
+ public org.w3c.dom.Node getNextSibling()
+ {
+ if (adaptee.next != null)
+ {
+ return adaptee.next.getAdapter();
+ }
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getAttributes
+ */
+ public org.w3c.dom.NamedNodeMap getAttributes()
+ {
+ return new DOMAttrMapImpl(adaptee.attributes);
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getOwnerDocument
+ */
+ public org.w3c.dom.Document getOwnerDocument()
+ {
+ Node node = this.adaptee;
+ if (node != null && node.type == Node.ROOT_NODE)
+ {
+ return null;
+ }
+
+ while (node != null && node.type != Node.ROOT_NODE)
+ {
+ node = node.parent;
+ }
+
+ if (node != null)
+ {
+ return (org.w3c.dom.Document) node.getAdapter();
+ }
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.Node#insertBefore
+ */
+ public org.w3c.dom.Node insertBefore(org.w3c.dom.Node newChild, org.w3c.dom.Node
refChild)
+ {
+ // TODO - handle newChild already in tree
+
+ if (newChild == null)
+ {
+ return null;
+ }
+ if (!(newChild instanceof DOMNodeImpl))
+ {
+ throw new DOMException(DOMException.WRONG_DOCUMENT_ERR,
Messages.getMessage(Messages.NOT_INSTANCE_OF_ERROR, "newChild",
"DOMNodeImpl"));
+ }
+ DOMNodeImpl newCh = (DOMNodeImpl) newChild;
+
+ if (this.adaptee.type == Node.ROOT_NODE)
+ {
+ if (newCh.adaptee.type != Node.DOCTYPE_TAG && newCh.adaptee.type !=
Node.PROC_INS_TAG)
+ {
+ throw new DOMException(DOMException.HIERARCHY_REQUEST_ERR, "newChild
cannot be a child of this node");
+ }
+ }
+ else if (this.adaptee.type == Node.START_TAG)
+ {
+ if (newCh.adaptee.type != Node.START_TAG
+ && newCh.adaptee.type != Node.START_END_TAG
+ && newCh.adaptee.type != Node.COMMENT_TAG
+ && newCh.adaptee.type != Node.TEXT_NODE
+ && newCh.adaptee.type != Node.CDATA_TEXT
+ && newCh.adaptee.type != Node.CDATA_TAG)
+ {
+ throw new DOMException(DOMException.HIERARCHY_REQUEST_ERR, "newChild
cannot be a child of this node");
+ }
+ }
+ if (refChild == null)
+ {
+ this.adaptee.insertNodeAtEnd(newCh.adaptee);
+ if (this.adaptee.type == Node.START_END_TAG)
+ {
+ this.adaptee.setType(Node.START_TAG);
+ }
+ }
+ else
+ {
+ Node ref = this.adaptee.content;
+ while (ref != null)
+ {
+ if (ref.getAdapter() == refChild)
+ {
+ break;
+ }
+ ref = ref.next;
+ }
+ if (ref == null)
+ {
+ throw new DOMException(DOMException.NOT_FOUND_ERR, "refChild not
found");
+ }
+ Node.insertNodeBeforeElement(ref, newCh.adaptee);
+ }
+ return newChild;
+ }
+
+ /**
+ * @see org.w3c.dom.Node#replaceChild
+ */
+ public org.w3c.dom.Node replaceChild(org.w3c.dom.Node newChild, org.w3c.dom.Node
oldChild)
+ {
+ // TODO - handle newChild already in tree
+
+ if (newChild == null)
+ {
+ return null;
+ }
+ if (!(newChild instanceof DOMNodeImpl))
+ {
+ throw new DOMException(DOMException.WRONG_DOCUMENT_ERR,
Messages.getMessage(Messages.NOT_INSTANCE_OF_ERROR, "newChild",
"DOMNodeImpl"));
+ }
+ DOMNodeImpl newCh = (DOMNodeImpl) newChild;
+
+ if (this.adaptee.type == Node.ROOT_NODE)
+ {
+ if (newCh.adaptee.type != Node.DOCTYPE_TAG && newCh.adaptee.type !=
Node.PROC_INS_TAG)
+ {
+ throw new DOMException(DOMException.HIERARCHY_REQUEST_ERR, "newChild
cannot be a child of this node");
+ }
+ }
+ else if (this.adaptee.type == Node.START_TAG)
+ {
+ if (newCh.adaptee.type != Node.START_TAG
+ && newCh.adaptee.type != Node.START_END_TAG
+ && newCh.adaptee.type != Node.COMMENT_TAG
+ && newCh.adaptee.type != Node.TEXT_NODE
+ && newCh.adaptee.type != Node.CDATA_TEXT
+ && newCh.adaptee.type != Node.CDATA_TAG)
+ {
+ throw new DOMException(DOMException.HIERARCHY_REQUEST_ERR, "newChild
cannot be a child of this node");
+ }
+ }
+ if (oldChild == null)
+ {
+ throw new DOMException(DOMException.NOT_FOUND_ERR, "oldChild not
found");
+ }
+
+ Node n;
+ Node ref = this.adaptee.content;
+ while (ref != null)
+ {
+ if (ref.getAdapter() == oldChild)
+ {
+ break;
+ }
+ ref = ref.next;
+ }
+ if (ref == null)
+ {
+ throw new DOMException(DOMException.NOT_FOUND_ERR, "oldChild not
found");
+ }
+ newCh.adaptee.next = ref.next;
+ newCh.adaptee.prev = ref.prev;
+ newCh.adaptee.last = ref.last;
+ newCh.adaptee.parent = ref.parent;
+ newCh.adaptee.content = ref.content;
+ if (ref.parent != null)
+ {
+ if (ref.parent.content == ref)
+ {
+ ref.parent.content = newCh.adaptee;
+ }
+ if (ref.parent.last == ref)
+ {
+ ref.parent.last = newCh.adaptee;
+ }
+ }
+ if (ref.prev != null)
+ {
+ ref.prev.next = newCh.adaptee;
+ }
+ if (ref.next != null)
+ {
+ ref.next.prev = newCh.adaptee;
+ }
+ for (n = ref.content; n != null; n = n.next)
+ {
+ if (n.parent == ref)
+ {
+ n.parent = newCh.adaptee;
+ }
+ }
+
+ return oldChild;
+ }
+
+ /**
+ * @see org.w3c.dom.Node#removeChild
+ */
+ public org.w3c.dom.Node removeChild(org.w3c.dom.Node oldChild)
+ {
+ if (oldChild == null)
+ {
+ return null;
+ }
+
+ Node ref = this.adaptee.content;
+ while (ref != null)
+ {
+ if (ref.getAdapter() == oldChild)
+ {
+ break;
+ }
+ ref = ref.next;
+ }
+ if (ref == null)
+ {
+ throw new DOMException(DOMException.NOT_FOUND_ERR, "refChild not
found");
+ }
+ Node.discardElement(ref);
+
+ if (this.adaptee.content == null && this.adaptee.type == Node.START_TAG)
+ {
+ this.adaptee.setType(Node.START_END_TAG);
+ }
+
+ return oldChild;
+ }
+
+ /**
+ * @see org.w3c.dom.Node#appendChild
+ */
+ public org.w3c.dom.Node appendChild(org.w3c.dom.Node newChild)
+ {
+ // TODO - handle newChild already in tree
+
+ if (newChild == null)
+ {
+ return null;
+ }
+ if (!(newChild instanceof DOMNodeImpl))
+ {
+ throw new DOMException(DOMException.WRONG_DOCUMENT_ERR,
Messages.getMessage(Messages.NOT_INSTANCE_OF_ERROR, "newChild",
"DOMNodeImpl"));
+ }
+ DOMNodeImpl newCh = (DOMNodeImpl) newChild;
+
+ if (this.adaptee.type == Node.ROOT_NODE)
+ {
+ if (newCh.adaptee.type != Node.DOCTYPE_TAG && newCh.adaptee.type !=
Node.PROC_INS_TAG)
+ {
+ throw new DOMException(DOMException.HIERARCHY_REQUEST_ERR, "newChild
cannot be a child of this node");
+ }
+ }
+ else if (this.adaptee.type == Node.START_TAG)
+ {
+ if (newCh.adaptee.type != Node.START_TAG
+ && newCh.adaptee.type != Node.START_END_TAG
+ && newCh.adaptee.type != Node.COMMENT_TAG
+ && newCh.adaptee.type != Node.TEXT_NODE
+ && newCh.adaptee.type != Node.CDATA_TEXT
+ && newCh.adaptee.type != Node.CDATA_TAG)
+ {
+ throw new DOMException(DOMException.HIERARCHY_REQUEST_ERR, "newChild
cannot be a child of this node");
+ }
+ }
+ this.adaptee.insertNodeAtEnd(newCh.adaptee);
+
+ if (this.adaptee.type == Node.START_END_TAG)
+ {
+ this.adaptee.setType(Node.START_TAG);
+ }
+
+ return newChild;
+ }
+
+ /**
+ * @see org.w3c.dom.Node#hasChildNodes
+ */
+ public boolean hasChildNodes()
+ {
+ return (adaptee.content != null);
+ }
+
+ /**
+ * @see org.w3c.dom.Node#cloneNode(boolean)
+ */
+ public org.w3c.dom.Node cloneNode(boolean deep)
+ {
+ Node node = adaptee.cloneNode(deep);
+ node.parent = null;
+ return node.getAdapter();
+ }
+
+ /**
+ * Do nothing: text nodes in html documents are important and jtidy already removes
useless text during parsing.
+ * @see org.w3c.dom.Node#normalize()
+ */
+ public void normalize()
+ {
+ // do nothing
+ }
+
+ /**
+ * DOM2 - not implemented.
+ * @see #isSupported(java.lang.String, java.lang.String)
+ */
+ public boolean supports(String feature, String version)
+ {
+ return isSupported(feature, version);
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getNamespaceURI()
+ */
+ public String getNamespaceURI()
+ {
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getPrefix()
+ */
+ public String getPrefix()
+ {
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.Node#setPrefix(java.lang.String)
+ */
+ public void setPrefix(String prefix) throws DOMException
+ {
+ // The namespace prefix of this node, or null if it is unspecified. When it is
defined to be null, setting it
+ // has no effect, including if the node is read-only.
+ // do nothing
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getLocalName()
+ */
+ public String getLocalName()
+ {
+ return getNodeName();
+ }
+
+ /**
+ * @see org.w3c.dom.Node#isSupported(java.lang.String, java.lang.String)
+ */
+ public boolean isSupported(String feature, String version)
+ {
+ return false;
+ }
+
+ /**
+ * @see org.w3c.dom.Node#hasAttributes
+ */
+ public boolean hasAttributes()
+ {
+ // contributed by dlp(a)users.sourceforge.net
+ return this.adaptee.attributes != null;
+ }
+
+ /**
+ * @todo DOM level 3 compareDocumentPosition() Not implemented.
+ * @see org.w3c.dom.Node#compareDocumentPosition(org.w3c.dom.Node)
+ */
+ public short compareDocumentPosition(org.w3c.dom.Node other) throws DOMException
+ {
+ throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
Messages.getMessage(Messages.DOM_METHOD_NOT_SUPPORTED));
+ }
+
+ /**
+ * @todo DOM level 3 getBaseURI() Not implemented. Returns null.
+ * @see org.w3c.dom.Node#getBaseURI()
+ */
+ public String getBaseURI()
+ {
+ return null;
+ }
+
+ /**
+ * @todo DOM level 3 getFeature() Not implemented. Returns null.
+ * @see org.w3c.dom.Node#getFeature(java.lang.String, java.lang.String)
+ */
+ public Object getFeature(String feature, String version)
+ {
+ return null;
+ }
+
+ /**
+ * @todo DOM level 3 getTextContent() Not implemented. Returns null.
+ * @see org.w3c.dom.Node#getTextContent()
+ */
+ public String getTextContent() throws DOMException
+ {
+ return null;
+ }
+
+ /**
+ * @todo DOM level 3 getUserData() Not implemented. Returns null.
+ * @see org.w3c.dom.Node#getUserData(java.lang.String)
+ */
+ public Object getUserData(String key)
+ {
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.Node#isDefaultNamespace(java.lang.String)
+ */
+ public boolean isDefaultNamespace(String namespaceURI)
+ {
+ return false;
+ }
+
+ /**
+ * @todo DOM level 3 isEqualNode() Not implemented. Returns false.
+ * @see org.w3c.dom.Node#isEqualNode(org.w3c.dom.Node)
+ */
+ public boolean isEqualNode(org.w3c.dom.Node arg)
+ {
+ return false;
+ }
+
+ /**
+ * @todo DOM level 3 isSameNode() Not implemented. Returns false.
+ * @see org.w3c.dom.Node#isSameNode(org.w3c.dom.Node)
+ */
+ public boolean isSameNode(org.w3c.dom.Node other)
+ {
+ return false;
+ }
+
+ /**
+ * @see org.w3c.dom.Node#lookupNamespaceURI(java.lang.String)
+ */
+ public String lookupNamespaceURI(String prefix)
+ {
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.Node#lookupPrefix(java.lang.String)
+ */
+ public String lookupPrefix(String namespaceURI)
+ {
+ return null;
+ }
+
+ /**
+ * @todo DOM level 3 setTextContent() Not implemented. Throws
NO_MODIFICATION_ALLOWED_ERR
+ * @see org.w3c.dom.Node#setTextContent(java.lang.String)
+ */
+ public void setTextContent(String textContent) throws DOMException
+ {
+ throw new DOMException(DOMException.NO_MODIFICATION_ALLOWED_ERR, "Node is
read only");
+ }
+
+ /**
+ * @todo DOM level 3 setUserData() Not implemented. Returns null.
+ * @see org.w3c.dom.Node#setUserData(java.lang.String, java.lang.Object,
org.w3c.dom.UserDataHandler)
+ */
+ public Object setUserData(String key, Object data, UserDataHandler handler)
+ {
+ return null;
+ }
+}
\ No newline at end of file
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMNodeListByTagNameImpl.java
===================================================================
---
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMNodeListByTagNameImpl.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMNodeListByTagNameImpl.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,164 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+
+package org.ajax4jsf.org.w3c.tidy;
+
+/**
+ * DOMNodeListByTagNameImpl. The items in the <code>NodeList</code> are
accessible via an integral index, starting
+ * from 0.
+ * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org
</a>
+ * @author Andy Quick <a
href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a>
(translation to Java)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public class DOMNodeListByTagNameImpl implements org.w3c.dom.NodeList
+{
+
+ /**
+ * First node.
+ */
+ private Node first;
+
+ /**
+ * Tag name.
+ */
+ private String tagName;
+
+ /**
+ * Current index.
+ */
+ private int currIndex;
+
+ /**
+ * Max index (number of nodes).
+ */
+ private int maxIndex;
+
+ /**
+ * Current node.
+ */
+ private Node currNode;
+
+ /**
+ * Instantiates a new DOMNodeListByTagName.
+ * @param first first node.
+ * @param tagName tag name
+ */
+ protected DOMNodeListByTagNameImpl(Node first, String tagName)
+ {
+ this.first = first;
+ this.tagName = tagName;
+ }
+
+ /**
+ * @see org.w3c.dom.NodeList#item
+ */
+ public org.w3c.dom.Node item(int index)
+ {
+ currIndex = 0;
+ maxIndex = index;
+ preTraverse(first);
+
+ if (currIndex > maxIndex && currNode != null)
+ {
+ return currNode.getAdapter();
+ }
+
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.NodeList#getLength
+ */
+ public int getLength()
+ {
+ currIndex = 0;
+ maxIndex = Integer.MAX_VALUE;
+ preTraverse(first);
+ return currIndex;
+ }
+
+ /**
+ * Traverse the node list.
+ * @param node Node
+ */
+ protected void preTraverse(Node node)
+ {
+ if (node == null)
+ {
+ return;
+ }
+
+ if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
+ {
+ if (currIndex <= maxIndex && (tagName.equals("*") ||
tagName.equals(node.element)))
+ {
+ currIndex += 1;
+ currNode = node;
+ }
+ }
+ if (currIndex > maxIndex)
+ {
+ return;
+ }
+
+ node = node.content;
+ while (node != null)
+ {
+ preTraverse(node);
+ node = node.next;
+ }
+ }
+
+}
\ No newline at end of file
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMNodeListImpl.java
===================================================================
---
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMNodeListImpl.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMNodeListImpl.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,129 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+/**
+ * DOMNodeListImpl. The items in the <code>NodeList</code> are accessible via
an integral index, starting from 0.
+ * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org
</a>
+ * @author Andy Quick <a
href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a>
(translation to Java)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public class DOMNodeListImpl implements org.w3c.dom.NodeList
+{
+
+ /**
+ * Parent Node.
+ */
+ private Node parent;
+
+ /**
+ * Instantiates a new DOM node list.
+ * @param parent parent Node
+ */
+ protected DOMNodeListImpl(Node parent)
+ {
+ this.parent = parent;
+ }
+
+ /**
+ * @see org.w3c.dom.NodeList#item(int)
+ */
+ public org.w3c.dom.Node item(int index)
+ {
+ if (parent == null)
+ {
+ return null;
+ }
+
+ int i = 0;
+ Node node = this.parent.content;
+ while (node != null)
+ {
+ if (i >= index)
+ {
+ break;
+ }
+ i++;
+ node = node.next;
+ }
+ if (node != null)
+ {
+ return node.getAdapter();
+ }
+
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.NodeList#getLength
+ */
+ public int getLength()
+ {
+ if (parent == null)
+ {
+ return 0;
+ }
+
+ int len = 0;
+ Node node = this.parent.content;
+ while (node != null)
+ {
+ len++;
+ node = node.next;
+ }
+ return len;
+ }
+
+}
\ No newline at end of file
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMProcessingInstructionImpl.java
===================================================================
---
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMProcessingInstructionImpl.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMProcessingInstructionImpl.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,112 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+import org.ajax4jsf.Messages;
+import org.w3c.dom.DOMException;
+
+
+/**
+ * DOMProcessingInstructionImpl.
+ * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org
</a>
+ * @author Andy Quick <a
href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a>
(translation to Java)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public class DOMProcessingInstructionImpl extends DOMNodeImpl implements
org.w3c.dom.ProcessingInstruction
+{
+
+ /**
+ * Instantiates a new DOM processing instruction.
+ * @param adaptee wrapped Tidy node
+ */
+ protected DOMProcessingInstructionImpl(Node adaptee)
+ {
+ super(adaptee);
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getNodeType
+ */
+ public short getNodeType()
+ {
+ return org.w3c.dom.Node.PROCESSING_INSTRUCTION_NODE;
+ }
+
+ /**
+ * @todo DOM level 2 getTarget() Not implemented. Returns null.
+ * @see org.w3c.dom.ProcessingInstruction#getTarget
+ */
+ public String getTarget()
+ {
+ return null;
+ }
+
+ /**
+ * @see org.w3c.dom.ProcessingInstruction#getData
+ */
+ public String getData()
+ {
+ return getNodeValue();
+ }
+
+ /**
+ * @see org.w3c.dom.ProcessingInstruction#setData(java.lang.String)
+ */
+ public void setData(String data) throws DOMException
+ {
+ throw new DOMException(DOMException.NO_MODIFICATION_ALLOWED_ERR,
Messages.getMessage(Messages.READ_ONLY_NODE_ERROR));
+ }
+
+}
\ No newline at end of file
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMTextImpl.java
===================================================================
---
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMTextImpl.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/DOMTextImpl.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,131 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+import org.ajax4jsf.Messages;
+import org.w3c.dom.DOMException;
+import org.w3c.dom.Text;
+
+
+/**
+ * DOMTextImpl.
+ * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org
</a>
+ * @author Andy Quick <a
href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a>
(translation to Java)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public class DOMTextImpl extends DOMCharacterDataImpl implements org.w3c.dom.Text
+{
+
+ /**
+ * Instantiates a new DOM text node.
+ * @param adaptee wrapped Tidy node
+ */
+ protected DOMTextImpl(Node adaptee)
+ {
+ super(adaptee);
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getNodeName
+ */
+ public String getNodeName()
+ {
+ return "#text";
+ }
+
+ /**
+ * @see org.w3c.dom.Node#getNodeType
+ */
+ public short getNodeType()
+ {
+ return org.w3c.dom.Node.TEXT_NODE;
+ }
+
+ /**
+ * @todo DOM level 2 splitText() Not supported. Throws NO_MODIFICATION_ALLOWED_ERR.
+ * @see org.w3c.dom.Text#splitText(int)
+ */
+ public org.w3c.dom.Text splitText(int offset) throws DOMException
+ {
+ throw new DOMException(DOMException.NO_MODIFICATION_ALLOWED_ERR,
Messages.getMessage(Messages.DOM_METHOD_NOT_SUPPORTED));
+ }
+
+ /**
+ * @todo DOM level 3 getWholeText() Not implemented. Returns null.
+ * @see org.w3c.dom.Text#getWholeText()
+ */
+ public String getWholeText()
+ {
+ return null;
+ }
+
+ /**
+ * @todo DOM level 3 isElementContentWhitespace() Not implemented. Returns false.
+ * @see org.w3c.dom.Text#isElementContentWhitespace()
+ */
+ public boolean isElementContentWhitespace()
+ {
+ return false;
+ }
+
+ /**
+ * @todo DOM level 3 replaceWholeText() Not implemented. Returns the same node.
+ * @see org.w3c.dom.Text#isElementContentWhitespace()
+ */
+ public Text replaceWholeText(String content) throws DOMException
+ {
+ return this;
+ }
+}
\ No newline at end of file
Added: branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Dict.java
===================================================================
--- branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Dict.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Dict.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,406 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+/**
+ * Tag dictionary node. If the document uses just HTML 2.0 tags and attributes described
it as HTML 2.0 Similarly for
+ * HTML 3.2 and the 3 flavors of HTML 4.0. If there are proprietary tags and attributes
then describe it as HTML
+ * Proprietary. If it includes the xml-lang or xmlns attributes but is otherwise HTML
2.0, 3.2 or 4.0 then describe it
+ * as one of the flavors of Voyager (strict, loose or frameset).
+ * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org
</a>
+ * @author Andy Quick <a
href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a>
(translation to Java)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public class Dict
+{
+
+ /**
+ * Content model: unknown.
+ */
+ public static final int CM_UNKNOWN = 0;
+
+ /**
+ * Content model: empty.
+ */
+ public static final int CM_EMPTY = (1 << 0);
+
+ /**
+ * Content model: html.
+ */
+ public static final int CM_HTML = (1 << 1);
+
+ /**
+ * Content model: head.
+ */
+ public static final int CM_HEAD = (1 << 2);
+
+ /**
+ * Content model: block.
+ */
+ public static final int CM_BLOCK = (1 << 3);
+
+ /**
+ * Content model: inline.
+ */
+ public static final int CM_INLINE = (1 << 4);
+
+ /**
+ * Content model: list.
+ */
+ public static final int CM_LIST = (1 << 5);
+
+ /**
+ * Content model: definition list.
+ */
+ public static final int CM_DEFLIST = (1 << 6);
+
+ /**
+ * Content model: table.
+ */
+ public static final int CM_TABLE = (1 << 7);
+
+ /**
+ * Content model: rowgroup.
+ */
+ public static final int CM_ROWGRP = (1 << 8);
+
+ /**
+ * Content model: row.
+ */
+ public static final int CM_ROW = (1 << 9);
+
+ /**
+ * Content model: field.
+ */
+ public static final int CM_FIELD = (1 << 10);
+
+ /**
+ * Content model: object.
+ */
+ public static final int CM_OBJECT = (1 << 11);
+
+ /**
+ * Content model: param.
+ */
+ public static final int CM_PARAM = (1 << 12);
+
+ /**
+ * Content model: frames.
+ */
+ public static final int CM_FRAMES = (1 << 13);
+
+ /**
+ * Content model: heading.
+ */
+ public static final int CM_HEADING = (1 << 14);
+
+ /**
+ * Content model: opt.
+ */
+ public static final int CM_OPT = (1 << 15);
+
+ /**
+ * Content model: img.
+ */
+ public static final int CM_IMG = (1 << 16);
+
+ /**
+ * Content model: mixed.
+ */
+ public static final int CM_MIXED = (1 << 17);
+
+ /**
+ * Content model: no indent.
+ */
+ public static final int CM_NO_INDENT = (1 << 18);
+
+ /**
+ * Content model: obsolete.
+ */
+ public static final int CM_OBSOLETE = (1 << 19);
+
+ /**
+ * Content model: new.
+ */
+ public static final int CM_NEW = (1 << 20);
+
+ /**
+ * Content model: omitst.
+ */
+ public static final int CM_OMITST = (1 << 21);
+
+ /**
+ * Version: unknown.
+ */
+ public static final short VERS_UNKNOWN = 0;
+
+ /**
+ * Version: html 2.0.
+ */
+ public static final short VERS_HTML20 = 1;
+
+ /**
+ * Version: html 3.2.
+ */
+ public static final short VERS_HTML32 = 2;
+
+ /**
+ * Version: html 4.0 strict.
+ */
+ public static final short VERS_HTML40_STRICT = 4;
+
+ /**
+ * Version: html 4.0 transitional.
+ */
+ public static final short VERS_HTML40_LOOSE = 8;
+
+ /**
+ * Version: html 4.0 frameset.
+ */
+ public static final short VERS_FRAMESET = 16;
+
+ /**
+ * Version: xml.
+ */
+ public static final short VERS_XML = 32;
+
+ /**
+ * Version: netscape.
+ */
+ public static final short VERS_NETSCAPE = 64;
+
+ /**
+ * Version: microsoft.
+ */
+ public static final short VERS_MICROSOFT = 128;
+
+ /**
+ * Version: sun.
+ */
+ public static final short VERS_SUN = 256;
+
+ /**
+ * Version: malformed.
+ */
+ public static final short VERS_MALFORMED = 512;
+
+ /**
+ * Version: xhtml 1.1.
+ */
+ public static final short VERS_XHTML11 = 1024;
+
+ /**
+ * Version: xhtml basic.
+ */
+ public static final short VERS_BASIC = 2048;
+
+ /**
+ * all tags and attributes are ok in proprietary version of HTML.
+ */
+ public static final short VERS_PROPRIETARY = (VERS_NETSCAPE | VERS_MICROSOFT |
VERS_SUN);
+
+ /**
+ * tags/attrs in HTML4 but not in earlier version.
+ */
+ public static final short VERS_HTML40 = (VERS_HTML40_STRICT | VERS_HTML40_LOOSE |
VERS_FRAMESET);
+
+ /**
+ * tags/attrs which are in all versions of HTML except strict.
+ */
+ public static final short VERS_LOOSE = (VERS_HTML32 | VERS_HTML40_LOOSE |
VERS_FRAMESET);
+
+ /**
+ * tags/attrs in HTML 4 loose and frameset.
+ */
+ public static final short VERS_IFRAME = (VERS_HTML40_LOOSE | VERS_FRAMESET);
+
+ /**
+ * tags/attrs in all versions from HTML 3.2 onwards.
+ */
+ public static final short VERS_FROM32 = (VERS_HTML40_STRICT | VERS_LOOSE);
+
+ /**
+ * versions with on... attributes.
+ */
+ public static final short VERS_EVENTS = (VERS_HTML40 | VERS_XHTML11);
+
+ /**
+ * tags/attrs in any version.
+ */
+ public static final short VERS_ALL = (VERS_HTML20 | VERS_HTML32 | VERS_HTML40 |
VERS_XHTML11 | VERS_BASIC);
+
+ /**
+ * types of tags that the user can define: empty tag.
+ */
+ public static final short TAGTYPE_EMPTY = 1;
+
+ /**
+ * types of tags that the user can define: inline tag.
+ */
+ public static final short TAGTYPE_INLINE = 2;
+
+ /**
+ * types of tags that the user can define: block tag.
+ */
+ public static final short TAGTYPE_BLOCK = 4;
+
+ /**
+ * types of tags that the user can define: pre tag.
+ */
+ public static final short TAGTYPE_PRE = 8;
+
+ /**
+ * Tag name.
+ */
+ protected String name;
+
+ /**
+ * Version in which this tag is defined.
+ */
+ protected short versions;
+
+ /**
+ * model (CM_* constants).
+ */
+ protected int model;
+
+ /**
+ * Parser for this tag.
+ */
+ private Parser parser;
+
+ /**
+ * Validator for this tag.
+ */
+ private TagCheck chkattrs;
+
+ /**
+ * Instantiates a new Tag definition.
+ * @param name tag name
+ * @param versions version in which this tag is defined
+ * @param model model (CM_* constants)
+ * @param parser parser for this tag
+ * @param chkattrs validator for this tag (can be null)
+ */
+ public Dict(String name, short versions, int model, Parser parser, TagCheck
chkattrs)
+ {
+ this.name = name;
+ this.versions = versions;
+ this.model = model;
+ this.parser = parser;
+ this.chkattrs = chkattrs;
+ }
+
+ /**
+ * Getter for <code>chkattrs</code>.
+ * @return Returns the chkattrs.
+ */
+ public TagCheck getChkattrs()
+ {
+ return this.chkattrs;
+ }
+
+ /**
+ * Getter for <code>model</code>.
+ * @return Returns the model.
+ */
+ public int getModel()
+ {
+ return this.model;
+ }
+
+ /**
+ * Getter for <code>name</code>.
+ * @return Returns the name.
+ */
+ public String getName()
+ {
+ return this.name;
+ }
+
+ /**
+ * Getter for <code>parser</code>.
+ * @return Returns the parser.
+ */
+ public Parser getParser()
+ {
+ return this.parser;
+ }
+
+ /**
+ * Setter for <code>chkattrs</code>.
+ * @param chkattrs The chkattrs to set.
+ */
+ public void setChkattrs(TagCheck chkattrs)
+ {
+ this.chkattrs = chkattrs;
+ }
+ /**
+ * Getter for <code>versions</code>.
+ * @return Returns the versions.
+ */
+ public short getVersions()
+ {
+ return this.versions;
+ }
+ /**
+ * Setter for <code>parser</code>.
+ * @param parser The parser to set.
+ */
+ public void setParser(Parser parser)
+ {
+ this.parser = parser;
+ }
+}
\ No newline at end of file
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/EncodingNameMapper.java
===================================================================
---
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/EncodingNameMapper.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/EncodingNameMapper.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,356 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+import java.util.HashMap;
+import java.util.Map;
+
+
+/**
+ * Maps between Java and IANA character encoding names. Also handles encoding alias used
in tidy c.
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ * @see
http://www.iana.org/assignments/character-sets
+ */
+public abstract class EncodingNameMapper
+{
+
+ /**
+ * Map containing uppercase alias - {standard iana, standard java}.
+ */
+ private static Map<String, String[]> encodingNameMap = new HashMap<String,
String[]>();
+
+ static
+ {
+ encodingNameMap.put("ISO-8859-1", new String[]{"ISO-8859-1",
"ISO8859_1"});
+ encodingNameMap.put("ISO8859_1", new String[]{"ISO-8859-1",
"ISO8859_1"});
+ encodingNameMap.put("ISO-IR-100", new String[]{"ISO-8859-1",
"ISO8859_1"});
+ encodingNameMap.put("LATIN1", new String[]{"ISO-8859-1",
"ISO8859_1"});
+ encodingNameMap.put("CSISOLATIN1", new String[]{"ISO-8859-1",
"ISO8859_1"});
+ encodingNameMap.put("L1", new String[]{"ISO-8859-1",
"ISO8859_1"});
+ encodingNameMap.put("819", new String[]{"ISO-8859-1",
"ISO8859_1"});
+
+ encodingNameMap.put("US-ASCII", new String[]{"US-ASCII",
"ASCII"});
+ encodingNameMap.put("ASCII", new String[]{"US-ASCII",
"ASCII"});
+ encodingNameMap.put("ISO-IR-6", new String[]{"US-ASCII",
"ASCII"});
+ encodingNameMap.put("CSASCII", new String[]{"US-ASCII",
"ASCII"});
+ encodingNameMap.put("ISO646-US", new String[]{"US-ASCII",
"ASCII"});
+ encodingNameMap.put("US", new String[]{"US-ASCII",
"ASCII"});
+ encodingNameMap.put("367", new String[]{"US-ASCII",
"ASCII"});
+
+ encodingNameMap.put("UTF-8", new String[]{"UTF-8",
"UTF8"});
+ encodingNameMap.put("UTF8", new String[]{"UTF-8",
"UTF8"});
+ encodingNameMap.put("UTF-16", new String[]{"UTF-16",
"Unicode"});
+ encodingNameMap.put("UNICODE", new String[]{"UTF-16",
"Unicode"});
+ encodingNameMap.put("UTF16", new String[]{"UTF-16",
"Unicode"});
+ encodingNameMap.put("UTF16", new String[]{"UTF-16",
"Unicode"}); // tidy
+
+ encodingNameMap.put("UTF-16BE", new String[]{"UTF-16BE",
"UnicodeBig"});
+ encodingNameMap.put("UNICODEBIG", new String[]{"UTF-16BE",
"UnicodeBig"});
+ encodingNameMap.put("UTF16-BE", new String[]{"UTF-16BE",
"UnicodeBig"});
+ encodingNameMap.put("UTF-16LE", new String[]{"UTF-16LE",
"UnicodeLittle"});
+ encodingNameMap.put("UNICODELITTLE", new String[]{"UTF-16LE",
"UnicodeLittle"});
+ encodingNameMap.put("UTF16-LE", new String[]{"UTF-16LE",
"UnicodeLittle"});
+ encodingNameMap.put("UTF16BE", new String[]{"UTF-16BE",
"UnicodeBig"}); // tidy
+ encodingNameMap.put("UTF16LE", new String[]{"UTF-16LE",
"UnicodeLittle"}); // tidy
+
+ encodingNameMap.put("BIG5", new String[]{"BIG5",
"Big5"});
+ encodingNameMap.put("CSBIG5", new String[]{"BIG5",
"Big5"});
+
+ encodingNameMap.put("SJIS", new String[]{"SHIFT_JIS",
"SJIS"});
+ encodingNameMap.put("SHIFT_JIS", new String[]{"SHIFT_JIS",
"SJIS"});
+ encodingNameMap.put("CSSHIFTJIS", new String[]{"CSSHIFTJIS",
"SJIS"});
+ encodingNameMap.put("MS_KANJI", new String[]{"MS_KANJI",
"SJIS"});
+ encodingNameMap.put("SHIFTJIS", new String[]{"SHIFT_JIS",
"SJIS"}); // tidy
+
+ encodingNameMap.put("JIS", new String[]{"ISO-2022-JP",
"JIS"});
+ encodingNameMap.put("ISO-2022-JP", new
String[]{"ISO-2022-JP", "JIS"});
+ encodingNameMap.put("CSISO2022JP", new
String[]{"CSISO2022JP", "JIS"});
+ encodingNameMap.put("ISO2022", new String[]{"ISO-2022-JP",
"JIS"}); // tidy
+
+ encodingNameMap.put("ISO2022KR", new String[]{"ISO-2022-KR",
"ISO2022KR"});
+ encodingNameMap.put("ISO-2022-KR", new
String[]{"ISO-2022-KR", "ISO2022KR"});
+ encodingNameMap.put("CSISO2022KR", new
String[]{"CSISO2022KR", "ISO2022KR"});
+ encodingNameMap.put("ISO-2022-CN", new
String[]{"ISO-2022-CN", "ISO2022CN"});
+ encodingNameMap.put("ISO2022CN", new String[]{"ISO-2022-CN",
"ISO2022CN"});
+
+ encodingNameMap.put("MACROMAN", new String[]{"macintosh",
"MacRoman"}); // tidy
+ encodingNameMap.put("MACINTOSH", new String[]{"macintosh",
"MacRoman"});
+ encodingNameMap.put("MACINTOSH ROMAN", new
String[]{"macintosh", "MacRoman"});
+
+ encodingNameMap.put("37", new String[]{"IBM037",
"CP037"});
+ encodingNameMap.put("273", new String[]{"IBM273",
"CP273"});
+ encodingNameMap.put("277", new String[]{"IBM277",
"CP277"});
+ encodingNameMap.put("278", new String[]{"IBM278",
"CP278"});
+ encodingNameMap.put("280", new String[]{"IBM280",
"CP280"});
+ encodingNameMap.put("284", new String[]{"IBM284",
"CP284"});
+ encodingNameMap.put("285", new String[]{"IBM285",
"CP285"});
+ encodingNameMap.put("290", new String[]{"IBM290",
"CP290"});
+ encodingNameMap.put("297", new String[]{"IBM297",
"CP297"});
+ encodingNameMap.put("420", new String[]{"IBM420",
"CP420"});
+ encodingNameMap.put("424", new String[]{"IBM424",
"CP424"});
+ encodingNameMap.put("437", new String[]{"IBM437",
"CP437"});
+ encodingNameMap.put("500", new String[]{"IBM500",
"CP500"});
+ encodingNameMap.put("775", new String[]{"IBM775",
"CP775"});
+ encodingNameMap.put("850", new String[]{"IBM850",
"CP850"});
+ encodingNameMap.put("852", new String[]{"IBM852",
"CP852"});
+ encodingNameMap.put("CSPCP852", new String[]{"IBM852",
"CP852"});
+ encodingNameMap.put("855", new String[]{"IBM855",
"CP855"});
+ encodingNameMap.put("857", new String[]{"IBM857",
"CP857"});
+ encodingNameMap.put("858", new String[]{"IBM00858",
"Cp858"});
+ encodingNameMap.put("0858", new String[]{"IBM00858",
"Cp858"});
+ encodingNameMap.put("860", new String[]{"IBM860",
"CP860"});
+ encodingNameMap.put("861", new String[]{"IBM861",
"CP861"});
+ encodingNameMap.put("IS", new String[]{"IBM861",
"CP861"});
+ encodingNameMap.put("862", new String[]{"IBM862",
"CP862"});
+ encodingNameMap.put("863", new String[]{"IBM863",
"CP863"});
+ encodingNameMap.put("864", new String[]{"IBM864",
"CP864"});
+ encodingNameMap.put("865", new String[]{"IBM865",
"CP865"});
+ encodingNameMap.put("866", new String[]{"IBM866",
"CP866"});
+ encodingNameMap.put("868", new String[]{"IBM868",
"CP868"});
+ encodingNameMap.put("AR", new String[]{"IBM868",
"CP868"});
+ encodingNameMap.put("869", new String[]{"IBM869",
"CP869"});
+ encodingNameMap.put("GR", new String[]{"IBM869",
"CP869"});
+ encodingNameMap.put("870", new String[]{"IBM870",
"CP870"});
+ encodingNameMap.put("871", new String[]{"IBM871",
"CP871"});
+ encodingNameMap.put("EBCDIC-CP-IS", new String[]{"IBM871",
"CP871"});
+ encodingNameMap.put("918", new String[]{"CP918",
"CP918"});
+ encodingNameMap.put("924", new String[]{"IBM00924",
"CP924"});
+ encodingNameMap.put("0924", new String[]{"IBM00924",
"CP924"});
+ encodingNameMap.put("1026", new String[]{"IBM1026",
"CP1026"});
+ encodingNameMap.put("1047", new String[]{"IBM1047",
"Cp1047"});
+ encodingNameMap.put("1140", new String[]{"IBM01140",
"Cp1140"});
+ encodingNameMap.put("1141", new String[]{"IBM01141",
"Cp1141"});
+ encodingNameMap.put("1142", new String[]{"IBM01142",
"Cp1142"});
+ encodingNameMap.put("1143", new String[]{"IBM01143",
"Cp1143"});
+ encodingNameMap.put("1144", new String[]{"IBM01144",
"Cp1144"});
+ encodingNameMap.put("1145", new String[]{"IBM01145",
"Cp1145"});
+ encodingNameMap.put("1146", new String[]{"IBM01146",
"Cp1146"});
+ encodingNameMap.put("1147", new String[]{"IBM01147",
"Cp1147"});
+ encodingNameMap.put("1148", new String[]{"IBM01148",
"Cp1148"});
+ encodingNameMap.put("1149", new String[]{"IBM01149",
"Cp1149"});
+ encodingNameMap.put("1250", new String[]{"WINDOWS-1250",
"Cp1250"});
+ encodingNameMap.put("1251", new String[]{"WINDOWS-1251",
"Cp1251"});
+ encodingNameMap.put("1252", new String[]{"WINDOWS-1252",
"Cp1252"});
+ encodingNameMap.put("WIN1252", new String[]{"WINDOWS-1252",
"Cp1252"}); // tidy
+ encodingNameMap.put("1253", new String[]{"WINDOWS-1253",
"Cp1253"});
+ encodingNameMap.put("1254", new String[]{"WINDOWS-1254",
"Cp1254"});
+ encodingNameMap.put("1255", new String[]{"WINDOWS-1255",
"Cp1255"});
+ encodingNameMap.put("1256", new String[]{"WINDOWS-1256",
"Cp1256"});
+ encodingNameMap.put("1257", new String[]{"WINDOWS-1257",
"Cp1257"});
+ encodingNameMap.put("1258", new String[]{"WINDOWS-1258",
"Cp1258"});
+
+ encodingNameMap.put("EUC-JP", new String[]{"EUC-JP",
"EUCJIS"});
+ encodingNameMap.put("EUCJIS", new String[]{"EUC-JP",
"EUCJIS"});
+ encodingNameMap.put("EUC-KR", new String[]{"EUC-KR",
"KSC5601"});
+ encodingNameMap.put("KSC5601", new String[]{"EUC-KR",
"KSC5601"});
+ encodingNameMap.put("GB2312", new String[]{"GB2312",
"GB2312"});
+ encodingNameMap.put("CSGB2312", new String[]{"GB2312",
"GB2312"});
+ encodingNameMap.put("X0201", new String[]{"X0201",
"JIS0201"});
+ encodingNameMap.put("JIS0201", new String[]{"X0201",
"JIS0201"});
+ encodingNameMap.put("X0208", new String[]{"X0208",
"JIS0208"});
+ encodingNameMap.put("JIS0208", new String[]{"X0208",
"JIS0208"});
+ encodingNameMap.put("ISO-IR-87", new String[]{"ISO-IR-87",
"JIS0208"});
+ encodingNameMap.put("JIS0208", new String[]{"ISO-IR-87",
"JIS0208"});
+ encodingNameMap.put("X0212", new String[]{"X0212",
"JIS0212"});
+ encodingNameMap.put("JIS0212", new String[]{"X0212",
"JIS0212"});
+ encodingNameMap.put("ISO-IR-159", new String[]{"X0212",
"JIS0212"});
+ encodingNameMap.put("GB18030", new String[]{"GB18030",
"GB18030"});
+
+ encodingNameMap.put("936", new String[]{"GBK",
"GBK"});
+ encodingNameMap.put("MS936", new String[]{"GBK",
"GBK"});
+
+ encodingNameMap.put("MS932", new String[]{"WINDOWS-31J",
"MS932"});
+ encodingNameMap.put("WINDOWS-31J", new
String[]{"WINDOWS-31J", "MS932"});
+ encodingNameMap.put("CSWINDOWS31J", new
String[]{"WINDOWS-31J", "MS932"});
+ encodingNameMap.put("TIS-620", new String[]{"TIS-620",
"TIS620"});
+ encodingNameMap.put("TIS620", new String[]{"TIS-620",
"TIS620"});
+
+ encodingNameMap.put("ISO-8859-2", new String[]{"ISO-8859-2",
"ISO8859_2"});
+ encodingNameMap.put("ISO8859_2", new String[]{"ISO-8859-2",
"ISO8859_2"});
+ encodingNameMap.put("ISO-IR-101", new String[]{"ISO-8859-2",
"ISO8859_2"});
+ encodingNameMap.put("LATIN2", new String[]{"ISO-8859-2",
"ISO8859_2"});
+ encodingNameMap.put("L2", new String[]{"ISO-8859-2",
"ISO8859_2"});
+
+ encodingNameMap.put("ISO-8859-3", new String[]{"ISO-8859-3",
"ISO8859_3"});
+ encodingNameMap.put("ISO8859_3", new String[]{"ISO-8859-3",
"ISO8859_3"});
+ encodingNameMap.put("ISO-IR-109", new String[]{"ISO-8859-3",
"ISO8859_3"});
+ encodingNameMap.put("LATIN3", new String[]{"ISO-8859-3",
"ISO8859_3"});
+ encodingNameMap.put("L3", new String[]{"ISO-8859-3",
"ISO8859_3"});
+
+ encodingNameMap.put("ISO-8859-4", new String[]{"ISO-8859-4",
"ISO8859_4"});
+ encodingNameMap.put("ISO8859_4", new String[]{"ISO-8859-4",
"ISO8859_4"});
+ encodingNameMap.put("ISO-IR-110", new String[]{"ISO-8859-4",
"ISO8859_4"});
+ encodingNameMap.put("ISO-IR-110", new String[]{"ISO-8859-4",
"ISO8859_4"});
+ encodingNameMap.put("L4", new String[]{"ISO-8859-4",
"ISO8859_4"});
+
+ encodingNameMap.put("ISO-8859-5", new String[]{"ISO-8859-5",
"ISO8859_5"});
+ encodingNameMap.put("ISO8859_5", new String[]{"ISO-8859-5",
"ISO8859_5"});
+ encodingNameMap.put("ISO-IR-144", new String[]{"ISO-8859-5",
"ISO8859_5"});
+ encodingNameMap.put("CYRILLIC", new String[]{"ISO-8859-5",
"ISO8859_5"});
+
+ encodingNameMap.put("ISO-8859-6", new String[]{"ISO-8859-6",
"ISO8859_6"});
+ encodingNameMap.put("ISO8859_6", new String[]{"ISO-8859-6",
"ISO8859_6"});
+ encodingNameMap.put("ISO-IR-127", new String[]{"ISO-8859-6",
"ISO8859_6"});
+ encodingNameMap.put("ARABIC", new String[]{"ISO-8859-6",
"ISO8859_6"});
+
+ encodingNameMap.put("ISO-8859-7", new String[]{"ISO-8859-7",
"ISO8859_7"});
+ encodingNameMap.put("ISO8859_7", new String[]{"ISO-8859-7",
"ISO8859_7"});
+ encodingNameMap.put("ISO-IR-126", new String[]{"ISO-8859-7",
"ISO8859_7"});
+ encodingNameMap.put("GREEK", new String[]{"ISO-8859-7",
"ISO8859_7"});
+
+ encodingNameMap.put("ISO-8859-8", new String[]{"ISO-8859-8",
"ISO8859_8"});
+ encodingNameMap.put("ISO8859_8", new String[]{"ISO-8859-8",
"ISO8859_8"});
+ encodingNameMap.put("ISO-8859-8-I", new
String[]{"ISO-8859-8", "ISO8859_8"});
+ encodingNameMap.put("ISO-IR-138", new String[]{"ISO-8859-8",
"ISO8859_8"});
+ encodingNameMap.put("HEBREW", new String[]{"ISO-8859-8",
"ISO8859_8"});
+
+ encodingNameMap.put("ISO-8859-9", new String[]{"ISO-8859-9",
"ISO8859_8"});
+ encodingNameMap.put("ISO8859_8", new String[]{"ISO-8859-9",
"ISO8859_8"});
+ encodingNameMap.put("CSISOLATINHEBREW", new
String[]{"ISO-8859-9", "ISO8859_9"});
+ encodingNameMap.put("ISO-IR-148", new String[]{"ISO-8859-9",
"ISO8859_9"});
+ encodingNameMap.put("LATIN5", new String[]{"ISO-8859-9",
"ISO8859_9"});
+ encodingNameMap.put("CSISOLATIN5", new String[]{"ISO-8859-9",
"ISO8859_9"});
+ encodingNameMap.put("L5", new String[]{"ISO-8859-9",
"ISO8859_9"});
+
+ encodingNameMap.put("ISO-8859-15", new
String[]{"ISO-8859-15", "ISO8859_15"});
+ encodingNameMap.put("ISO8859_15", new String[]{"ISO-8859-15",
"ISO8859_15"});
+
+ encodingNameMap.put("KOI8-R", new String[]{"KOI8-R",
"KOI8_R"});
+ encodingNameMap.put("KOI8_R", new String[]{"CSKOI8R",
"KOI8_R"});
+ encodingNameMap.put("CSKOI8R", new String[]{"CSKOI8R",
"KOI8_R"});
+ }
+
+ /**
+ * Convert a Java character encoding name to its IANA equivalent.
+ * @param encoding java encoding name or alias
+ * @return iana equivalent or null if no match is found.
+ */
+ public static String toIana(String encoding)
+ {
+ if (encoding == null)
+ {
+ return null;
+ }
+
+ String[] values = (String[]) encodingNameMap.get(handlecommonAlias(encoding));
+ if (values != null)
+ {
+ return values[0];
+ }
+
+ return null;
+ }
+
+ /**
+ * "Fix" the name for common alias to reduce the number of entries needed
in the hashmap. It actually removes CSIBM,
+ * CCSID, IBM-, IBM0, CP-0, IBM, CP0, CP-, CP, WINDOWS- prefixes from given name.
+ * @param encoding encoding name
+ * @return "fixed" encoding.
+ */
+ private static String handlecommonAlias(String encoding)
+ {
+ String key = encoding.toUpperCase();
+
+ // handle common alias
+ if (key.startsWith("CSIBM") || key.startsWith("CCSID"))
+ {
+ key = key.substring(5);
+ }
+ else if (key.startsWith("IBM-") || key.startsWith("IBM0") ||
key.startsWith("CP-0"))
+ {
+ key = key.substring(4);
+ }
+ else if (key.startsWith("IBM") || key.startsWith("CP0") ||
key.startsWith("CP-"))
+ {
+ key = key.substring(3);
+ }
+ else if (key.startsWith("CP"))
+ {
+ key = key.substring(2);
+ }
+ else if (key.startsWith("WINDOWS-"))
+ {
+ key = key.substring(8);
+ }
+ else if (key.startsWith("ISO_"))
+ {
+ key = "ISO-" + key.substring(4);
+ }
+
+ return key;
+ }
+
+ /**
+ * Converts an encoding name to the standard java name. Handles IANA names, legacy
names used in tidy and different
+ * java encoding alias. See
http://www.iana.org/assignments/character-sets.
+ * @param encoding IANA encoding name or alias
+ * @return java equivalent or null if no match is found.
+ */
+ public static String toJava(String encoding)
+ {
+ if (encoding == null)
+ {
+ return null;
+ }
+
+ String[] values = (String[]) encodingNameMap.get(handlecommonAlias(encoding));
+ if (values != null)
+ {
+ return values[1];
+ }
+
+ return null;
+ }
+}
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/EncodingUtils.java
===================================================================
---
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/EncodingUtils.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/EncodingUtils.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,1007 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+/**
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public final class EncodingUtils
+{
+
+ /**
+ * the big-endian (default) UNICODE BOM.
+ */
+ public static final int UNICODE_BOM_BE = 0xFEFF;
+
+ /**
+ * the default (big-endian) UNICODE BOM.
+ */
+ public static final int UNICODE_BOM = UNICODE_BOM_BE;
+
+ /**
+ * the little-endian UNICODE BOM.
+ */
+ public static final int UNICODE_BOM_LE = 0xFFFE;
+
+ /**
+ * the UTF-8 UNICODE BOM.
+ */
+ public static final int UNICODE_BOM_UTF8 = 0xEFBBBF;
+
+ /**
+ * states for ISO 2022 A document in ISO-2022 based encoding uses some ESC sequences
called "designator" to switch
+ * character sets. The designators defined and used in ISO-2022-JP are:
"ESC" + "(" + ? for ISO646 variants "ESC" +
+ * "$" + ? and "ESC" + "$" + "(" + ? for
multibyte character sets. State ASCII.
+ */
+ public static final int FSM_ASCII = 0;
+
+ /**
+ * state ESC.
+ */
+ public static final int FSM_ESC = 1;
+
+ /**
+ * state ESCD.
+ */
+ public static final int FSM_ESCD = 2;
+
+ /**
+ * state ESCDP.
+ */
+ public static final int FSM_ESCDP = 3;
+
+ /**
+ * state ESCP.
+ */
+ public static final int FSM_ESCP = 4;
+
+ /**
+ * state NONASCII.
+ */
+ public static final int FSM_NONASCII = 5;
+
+ /**
+ * Max UTF-88 valid char value.
+ */
+ public static final int MAX_UTF8_FROM_UCS4 = 0x10FFFF;
+
+ /**
+ * Max UTF-16 value.
+ */
+ public static final int MAX_UTF16_FROM_UCS4 = 0x10FFFF;
+
+ /**
+ * utf16 low surrogate.
+ */
+ public static final int LOW_UTF16_SURROGATE = 0xD800;
+
+ /**
+ * UTF-16 surrogates begin.
+ */
+ public static final int UTF16_SURROGATES_BEGIN = 0x10000;
+
+ /**
+ * UTF-16 surrogate pair areas: low surrogates begin.
+ */
+ public static final int UTF16_LOW_SURROGATE_BEGIN = 0xD800;
+
+ /**
+ * UTF-16 surrogate pair areas: low surrogates end.
+ */
+ public static final int UTF16_LOW_SURROGATE_END = 0xDBFF;
+
+ /**
+ * UTF-16 surrogate pair areas: high surrogates begin.
+ */
+ public static final int UTF16_HIGH_SURROGATE_BEGIN = 0xDC00;
+
+ /**
+ * UTF-16 surrogate pair areas: high surrogates end.
+ */
+ public static final int UTF16_HIGH_SURROGATE_END = 0xDFFF;
+
+ /**
+ * UTF-16 high surrogate.
+ */
+ public static final int HIGH_UTF16_SURROGATE = 0xDFFF;
+
+ /**
+ * UTF-8 bye swap: invalid char.
+ */
+ private static final int UTF8_BYTE_SWAP_NOT_A_CHAR = 0xFFFE;
+
+ /**
+ * UTF-8 invalid char.
+ */
+ private static final int UTF8_NOT_A_CHAR = 0xFFFF;
+
+ /**
+ * Mapping for Windows Western character set (128-159) to Unicode.
+ */
+ private static final int[] WIN2UNICODE = {
+ 0x20AC,
+ 0x0000,
+ 0x201A,
+ 0x0192,
+ 0x201E,
+ 0x2026,
+ 0x2020,
+ 0x2021,
+ 0x02C6,
+ 0x2030,
+ 0x0160,
+ 0x2039,
+ 0x0152,
+ 0x0000,
+ 0x017D,
+ 0x0000,
+ 0x0000,
+ 0x2018,
+ 0x2019,
+ 0x201C,
+ 0x201D,
+ 0x2022,
+ 0x2013,
+ 0x2014,
+ 0x02DC,
+ 0x2122,
+ 0x0161,
+ 0x203A,
+ 0x0153,
+ 0x0000,
+ 0x017E,
+ 0x0178};
+
+ /**
+ * John Love-Jensen contributed this table for mapping MacRoman character set to
Unicode.
+ */
+ private static final int[] MAC2UNICODE = { // modified to only need chars
128-255/U+0080-U+00FF Terry T 19 Aug 01
+ // x7F = DEL
+ 0x00C4,
+ 0x00C5,
+ 0x00C7,
+ 0x00C9,
+ 0x00D1,
+ 0x00D6,
+ 0x00DC,
+ 0x00E1,
+ 0x00E0,
+ 0x00E2,
+ 0x00E4,
+ 0x00E3,
+ 0x00E5,
+ 0x00E7,
+ 0x00E9,
+ 0x00E8,
+ 0x00EA,
+ 0x00EB,
+ 0x00ED,
+ 0x00EC,
+ 0x00EE,
+ 0x00EF,
+ 0x00F1,
+ 0x00F3,
+ 0x00F2,
+ 0x00F4,
+ 0x00F6,
+ 0x00F5,
+ 0x00FA,
+ 0x00F9,
+ 0x00FB,
+ 0x00FC,
+ 0x2020,
+ 0x00B0,
+ 0x00A2,
+ 0x00A3,
+ 0x00A7,
+ 0x2022,
+ 0x00B6,
+ 0x00DF,
+ 0x00AE,
+ 0x00A9,
+ 0x2122,
+ 0x00B4,
+ 0x00A8,
+ 0x2260,
+ 0x00C6,
+ 0x00D8,
+ 0x221E,
+ 0x00B1,
+ 0x2264,
+ 0x2265,
+ 0x00A5,
+ 0x00B5,
+ 0x2202,
+ 0x2211,
+ // =BD U+2126 OHM SIGN
+ 0x220F,
+ 0x03C0,
+ 0x222B,
+ 0x00AA,
+ 0x00BA,
+ 0x03A9,
+ 0x00E6,
+ 0x00F8,
+ 0x00BF,
+ 0x00A1,
+ 0x00AC,
+ 0x221A,
+ 0x0192,
+ 0x2248,
+ 0x2206,
+ 0x00AB,
+ 0x00BB,
+ 0x2026,
+ 0x00A0,
+ 0x00C0,
+ 0x00C3,
+ 0x00D5,
+ 0x0152,
+ 0x0153,
+ 0x2013,
+ 0x2014,
+ 0x201C,
+ 0x201D,
+ 0x2018,
+ 0x2019,
+ 0x00F7,
+ 0x25CA,
+ // =DB U+00A4 CURRENCY SIGN
+ 0x00FF,
+ 0x0178,
+ 0x2044,
+ 0x20AC,
+ 0x2039,
+ 0x203A,
+ 0xFB01,
+ 0xFB02,
+ 0x2021,
+ 0x00B7,
+ 0x201A,
+ 0x201E,
+ 0x2030,
+ 0x00C2,
+ 0x00CA,
+ 0x00C1,
+ 0x00CB,
+ 0x00C8,
+ 0x00CD,
+ 0x00CE,
+ 0x00CF,
+ 0x00CC,
+ 0x00D3,
+ 0x00D4,
+ // xF0 = Apple Logo
+ // =F0 U+2665 BLACK HEART SUIT
+ 0xF8FF,
+ 0x00D2,
+ 0x00DA,
+ 0x00DB,
+ 0x00D9,
+ 0x0131,
+ 0x02C6,
+ 0x02DC,
+ 0x00AF,
+ 0x02D8,
+ 0x02D9,
+ 0x02DA,
+ 0x00B8,
+ 0x02DD,
+ 0x02DB,
+ 0x02C7};
+
+ /**
+ * table to map symbol font characters to Unicode; undefined characters are mapped to
0x0000 and characters without
+ * any unicode equivalent are mapped to '?'. Is this appropriate?
+ */
+ private static final int[] SYMBOL2UNICODE = {
+ 0x0000,
+ 0x0001,
+ 0x0002,
+ 0x0003,
+ 0x0004,
+ 0x0005,
+ 0x0006,
+ 0x0007,
+ 0x0008,
+ 0x0009,
+ 0x000A,
+ 0x000B,
+ 0x000C,
+ 0x000D,
+ 0x000E,
+ 0x000F,
+
+ 0x0010,
+ 0x0011,
+ 0x0012,
+ 0x0013,
+ 0x0014,
+ 0x0015,
+ 0x0016,
+ 0x0017,
+ 0x0018,
+ 0x0019,
+ 0x001A,
+ 0x001B,
+ 0x001C,
+ 0x001D,
+ 0x001E,
+ 0x001F,
+
+ 0x0020,
+ 0x0021,
+ 0x2200,
+ 0x0023,
+ 0x2203,
+ 0x0025,
+ 0x0026,
+ 0x220D,
+ 0x0028,
+ 0x0029,
+ 0x2217,
+ 0x002B,
+ 0x002C,
+ 0x2212,
+ 0x002E,
+ 0x002F,
+
+ 0x0030,
+ 0x0031,
+ 0x0032,
+ 0x0033,
+ 0x0034,
+ 0x0035,
+ 0x0036,
+ 0x0037,
+ 0x0038,
+ 0x0039,
+ 0x003A,
+ 0x003B,
+ 0x003C,
+ 0x003D,
+ 0x003E,
+ 0x003F,
+
+ 0x2245,
+ 0x0391,
+ 0x0392,
+ 0x03A7,
+ 0x0394,
+ 0x0395,
+ 0x03A6,
+ 0x0393,
+ 0x0397,
+ 0x0399,
+ 0x03D1,
+ 0x039A,
+ 0x039B,
+ 0x039C,
+ 0x039D,
+ 0x039F,
+
+ 0x03A0,
+ 0x0398,
+ 0x03A1,
+ 0x03A3,
+ 0x03A4,
+ 0x03A5,
+ 0x03C2,
+ 0x03A9,
+ 0x039E,
+ 0x03A8,
+ 0x0396,
+ 0x005B,
+ 0x2234,
+ 0x005D,
+ 0x22A5,
+ 0x005F,
+
+ 0x00AF,
+ 0x03B1,
+ 0x03B2,
+ 0x03C7,
+ 0x03B4,
+ 0x03B5,
+ 0x03C6,
+ 0x03B3,
+ 0x03B7,
+ 0x03B9,
+ 0x03D5,
+ 0x03BA,
+ 0x03BB,
+ 0x03BC,
+ 0x03BD,
+ 0x03BF,
+
+ 0x03C0,
+ 0x03B8,
+ 0x03C1,
+ 0x03C3,
+ 0x03C4,
+ 0x03C5,
+ 0x03D6,
+ 0x03C9,
+ 0x03BE,
+ 0x03C8,
+ 0x03B6,
+ 0x007B,
+ 0x007C,
+ 0x007D,
+ 0x223C,
+ 0x003F,
+
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+
+ 0x00A0,
+ 0x03D2,
+ 0x2032,
+ 0x2264,
+ 0x2044,
+ 0x221E,
+ 0x0192,
+ 0x2663,
+ 0x2666,
+ 0x2665,
+ 0x2660,
+ 0x2194,
+ 0x2190,
+ 0x2191,
+ 0x2192,
+ 0x2193,
+
+ 0x00B0,
+ 0x00B1,
+ 0x2033,
+ 0x2265,
+ 0x00D7,
+ 0x221D,
+ 0x2202,
+ 0x00B7,
+ 0x00F7,
+ 0x2260,
+ 0x2261,
+ 0x2248,
+ 0x2026,
+ 0x003F,
+ 0x003F,
+ 0x21B5,
+
+ 0x2135,
+ 0x2111,
+ 0x211C,
+ 0x2118,
+ 0x2297,
+ 0x2295,
+ 0x2205,
+ 0x2229,
+ 0x222A,
+ 0x2283,
+ 0x2287,
+ 0x2284,
+ 0x2282,
+ 0x2286,
+ 0x2208,
+ 0x2209,
+
+ 0x2220,
+ 0x2207,
+ 0x00AE,
+ 0x00A9,
+ 0x2122,
+ 0x220F,
+ 0x221A,
+ 0x22C5,
+ 0x00AC,
+ 0x2227,
+ 0x2228,
+ 0x21D4,
+ 0x21D0,
+ 0x21D1,
+ 0x21D2,
+ 0x21D3,
+
+ 0x25CA,
+ 0x2329,
+ 0x00AE,
+ 0x00A9,
+ 0x2122,
+ 0x2211,
+ 0x003F,
+ 0x003F,
+ 0x003F,
+ 0x003F,
+ 0x003F,
+ 0x003F,
+ 0x003F,
+ 0x003F,
+ 0x003F,
+ 0x003F,
+
+ 0x20AC,
+ 0x232A,
+ 0x222B,
+ 0x2320,
+ 0x003F,
+ 0x2321,
+ 0x003F,
+ 0x003F,
+ 0x003F,
+ 0x003F,
+ 0x003F,
+ 0x003F,
+ 0x003F,
+ 0x003F,
+ 0x003F,
+ 0x003F};
+
+ /**
+ * Array of valid UTF8 sequences.
+ */
+ private static final ValidUTF8Sequence[] VALID_UTF8 = {
+ new ValidUTF8Sequence(0x0000, 0x007F, 1, new char[]{0x00, 0x7F, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00}),
+ new ValidUTF8Sequence(0x0080, 0x07FF, 2, new char[]{0xC2, 0xDF, 0x80, 0xBF, 0x00,
0x00, 0x00, 0x00}),
+ new ValidUTF8Sequence(0x0800, 0x0FFF, 3, new char[]{0xE0, 0xE0, 0xA0, 0xBF, 0x80,
0xBF, 0x00, 0x00}),
+ new ValidUTF8Sequence(0x1000, 0xFFFF, 3, new char[]{0xE1, 0xEF, 0x80, 0xBF, 0x80,
0xBF, 0x00, 0x00}),
+ new ValidUTF8Sequence(0x10000, 0x3FFFF, 4, new char[]{0xF0, 0xF0, 0x90, 0xBF,
0x80, 0xBF, 0x80, 0xBF}),
+ new ValidUTF8Sequence(0x40000, 0xFFFFF, 4, new char[]{0xF1, 0xF3, 0x80, 0xBF,
0x80, 0xBF, 0x80, 0xBF}),
+ new ValidUTF8Sequence(0x100000, 0x10FFFF, 4, new char[]{0xF4, 0xF4, 0x80, 0x8F,
0x80, 0xBF, 0x80, 0xBF})};
+
+ /**
+ * number of valid utf8 sequances.
+ */
+ private static final int NUM_UTF8_SEQUENCES = VALID_UTF8.length;
+
+ /**
+ * Offset for utf8 sequences.
+ */
+ private static final int[] OFFSET_UTF8_SEQUENCES = {0, // 1 byte
+ 1, // 2 bytes
+ 2, // 3 bytes
+ 4, // 4 bytes
+ NUM_UTF8_SEQUENCES}; // must be last
+
+ /**
+ * don't instantiate.
+ */
+ private EncodingUtils()
+ {
+ // unused
+ }
+
+ /**
+ * Function for conversion from Windows-1252 to Unicode.
+ * @param c char to decode
+ * @return decoded char
+ */
+ protected static int decodeWin1252(int c)
+ {
+ return WIN2UNICODE[c - 128];
+ }
+
+ /**
+ * Function to convert from MacRoman to Unicode.
+ * @param c char to decode
+ * @return decoded char
+ */
+ protected static int decodeMacRoman(int c)
+ {
+ if (127 < c)
+ {
+ c = MAC2UNICODE[c - 128];
+ }
+ return c;
+ }
+
+ /**
+ * Function to convert from Symbol Font chars to Unicode.
+ * @param c char to decode
+ * @return decoded char
+ */
+ static int decodeSymbolFont(int c)
+ {
+ if (c > 255)
+ {
+ return c;
+ }
+
+ return SYMBOL2UNICODE[c];
+ }
+
+ /**
+ * Decodes an array of bytes to a char.
+ * @param c will contain the decoded char
+ * @param firstByte first input byte
+ * @param successorBytes array containing successor bytes (can be null if a getter is
provided).
+ * @param getter callback used to get new bytes if successorBytes doesn't contain
enough bytes
+ * @param count will contain the number of bytes read
+ * @param startInSuccessorBytesArray starting offset for bytes in successorBytes
+ * @return <code>true</code> if error
+ */
+ static boolean decodeUTF8BytesToChar(int[] c, int firstByte, byte[] successorBytes,
GetBytes getter, int[] count,
+ int startInSuccessorBytesArray)
+ {
+ byte[] buf = new byte[10];
+
+ int ch = 0;
+ int n = 0;
+ int i, bytes = 0;
+ boolean hasError = false;
+
+ if (successorBytes.length != 0)
+ {
+ buf = successorBytes;
+ }
+
+ // special check if we have been passed an EOF char
+ if (firstByte == StreamIn.END_OF_STREAM) //uint
+ {
+ // at present
+ c[0] = firstByte;
+ count[0] = 1;
+ return false;
+ }
+
+ ch = TidyUtils.toUnsigned(firstByte); // first byte is passed in separately
+
+ if (ch <= 0x7F) // 0XXX XXXX one byte
+ {
+ n = ch;
+ bytes = 1;
+ }
+ else if ((ch & 0xE0) == 0xC0) /* 110X XXXX two bytes */
+ {
+ n = ch & 31;
+ bytes = 2;
+ }
+ else if ((ch & 0xF0) == 0xE0) /* 1110 XXXX three bytes */
+ {
+ n = ch & 15;
+ bytes = 3;
+ }
+ else if ((ch & 0xF8) == 0xF0) /* 1111 0XXX four bytes */
+ {
+ n = ch & 7;
+ bytes = 4;
+ }
+ else if ((ch & 0xFC) == 0xF8) /* 1111 10XX five bytes */
+ {
+ n = ch & 3;
+ bytes = 5;
+ hasError = true;
+ }
+ else if ((ch & 0xFE) == 0xFC) /* 1111 110X six bytes */
+ {
+ n = ch & 1;
+ bytes = 6;
+ hasError = true;
+ }
+ else
+ {
+ // not a valid first byte of a UTF-8 sequence
+ n = ch;
+ bytes = 1;
+ hasError = true;
+ }
+
+ for (i = 1; i < bytes; ++i)
+ {
+ int[] tempCount = new int[1]; // no. of additional bytes to get
+
+ // successor bytes should have the form 10XX XXXX
+ if (getter != null && (bytes - i > 0))
+ {
+ tempCount[0] = 1; // to simplify things, get 1 byte at a time
+ int[] buftocopy = new int[]{buf[startInSuccessorBytesArray + i - 1]};
+
+ getter.doGet(buftocopy, tempCount, false);
+ //readRawBytesFromStream(buftocopy, tempCount, false);
+ if (tempCount[0] <= 0) // EOF
+ {
+ hasError = true;
+ bytes = i;
+ break;
+ }
+ }
+
+ if ((buf[startInSuccessorBytesArray + i - 1] & 0xC0) != 0x80)
+ {
+ // illegal successor byte value
+ hasError = true;
+ bytes = i;
+ if (getter != null)
+ {
+ int[] buftocopy = new int[]{buf[startInSuccessorBytesArray + i -
1]};
+ tempCount[0] = 1; // to simplify things, unget 1 byte at a time
+ getter.doGet(buftocopy, tempCount, true);
+ }
+ break;
+ }
+
+ n = (n << 6) | (buf[startInSuccessorBytesArray + i - 1] & 0x3F);
+ }
+
+ if (!hasError && ((n == UTF8_BYTE_SWAP_NOT_A_CHAR) || (n ==
UTF8_NOT_A_CHAR)))
+ {
+ hasError = true;
+ }
+
+ if (!hasError && (n > MAX_UTF8_FROM_UCS4))
+ {
+ hasError = true;
+ }
+
+ if (!hasError && (n >= UTF16_LOW_SURROGATE_BEGIN) && (n <=
UTF16_HIGH_SURROGATE_END))
+ {
+ // unpaired surrogates not allowed
+ hasError = true;
+ }
+
+ if (!hasError)
+ {
+ int lo = OFFSET_UTF8_SEQUENCES[bytes - 1];
+ int hi = OFFSET_UTF8_SEQUENCES[bytes] - 1;
+
+ // check for overlong sequences
+ if ((n < VALID_UTF8[lo].lowChar) || (n > VALID_UTF8[hi].highChar))
+ {
+ hasError = true;
+ }
+ else
+ {
+ hasError = true; // assume error until proven otherwise
+
+ for (i = lo; i <= hi; i++)
+ {
+ int tempCount;
+ char theByte; //unsigned
+
+ for (tempCount = 0; tempCount < bytes; tempCount++)
+ {
+ if (!TidyUtils.toBoolean(tempCount))
+ {
+ theByte = (char) firstByte;
+ }
+ else
+ {
+ theByte = (char) buf[startInSuccessorBytesArray + tempCount -
1];
+ }
+ if ((theByte >= VALID_UTF8[i].validBytes[(tempCount * 2)])
+ && (theByte <= VALID_UTF8[i].validBytes[(tempCount
* 2) + 1]))
+ {
+ hasError = false;
+ }
+ if (hasError)
+ {
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ count[0] = bytes;
+
+ c[0] = n;
+
+ // n = 0xFFFD;
+ // replacement char - do this in the caller
+ return hasError;
+
+ }
+
+ /**
+ * Encode a char to an array of bytes.
+ * @param c char to encode
+ * @param encodebuf will contain the decoded bytes
+ * @param putter if not null it will be called to write bytes to out
+ * @param count number of bytes written
+ * @return <code>false</code>= ok, <code>true</code>= error
+ */
+ static boolean encodeCharToUTF8Bytes(int c, byte[] encodebuf, PutBytes putter, int[]
count)
+ {
+ int bytes = 0;
+
+ byte[] buf = new byte[10];
+
+ if (encodebuf != null)
+ {
+ buf = encodebuf;
+ }
+
+ boolean hasError = false;
+
+ if (c <= 0x7F) // 0XXX XXXX one byte
+ {
+ buf[0] = (byte) c;
+ bytes = 1;
+ }
+ else if (c <= 0x7FF) // 110X XXXX two bytes
+ {
+ buf[0] = (byte) (0xC0 | (c >> 6));
+ buf[1] = (byte) (0x80 | (c & 0x3F));
+ bytes = 2;
+ }
+ else if (c <= 0xFFFF) // 1110 XXXX three bytes
+ {
+ buf[0] = (byte) (0xE0 | (c >> 12));
+ buf[1] = (byte) (0x80 | ((c >> 6) & 0x3F));
+ buf[2] = (byte) (0x80 | (c & 0x3F));
+ bytes = 3;
+ if ((c == UTF8_BYTE_SWAP_NOT_A_CHAR) || (c == UTF8_NOT_A_CHAR))
+ {
+ hasError = true;
+ }
+ else if ((c >= UTF16_LOW_SURROGATE_BEGIN) && (c <=
UTF16_HIGH_SURROGATE_END))
+ {
+ // unpaired surrogates not allowed
+ hasError = true;
+ }
+ }
+ else if (c <= 0x1FFFFF) // 1111 0XXX four bytes
+ {
+ buf[0] = (byte) (0xF0 | (c >> 18));
+ buf[1] = (byte) (0x80 | ((c >> 12) & 0x3F));
+ buf[2] = (byte) (0x80 | ((c >> 6) & 0x3F));
+ buf[3] = (byte) (0x80 | (c & 0x3F));
+ bytes = 4;
+ if (c > MAX_UTF8_FROM_UCS4)
+ {
+ hasError = true;
+ }
+ }
+ else if (c <= 0x3FFFFFF) // 1111 10XX five bytes
+ {
+ buf[0] = (byte) (0xF8 | (c >> 24));
+ buf[1] = (byte) (0x80 | (c >> 18));
+ buf[2] = (byte) (0x80 | ((c >> 12) & 0x3F));
+ buf[3] = (byte) (0x80 | ((c >> 6) & 0x3F));
+ buf[4] = (byte) (0x80 | (c & 0x3F));
+ bytes = 5;
+ hasError = true;
+ }
+ else if (c <= 0x7FFFFFFF) // 1111 110X six bytes
+ {
+ buf[0] = (byte) (0xFC | (c >> 30));
+ buf[1] = (byte) (0x80 | ((c >> 24) & 0x3F));
+ buf[2] = (byte) (0x80 | ((c >> 18) & 0x3F));
+ buf[3] = (byte) (0x80 | ((c >> 12) & 0x3F));
+ buf[4] = (byte) (0x80 | ((c >> 6) & 0x3F));
+ buf[5] = (byte) (0x80 | (c & 0x3F));
+ bytes = 6;
+ hasError = true;
+ }
+ else
+ {
+ hasError = true;
+ }
+
+ if (!hasError && putter != null) // don't output invalid UTF-8 byte
sequence to a stream
+ {
+ int[] tempCount = new int[]{bytes};
+ putter.doPut(buf, tempCount);
+
+ if (tempCount[0] < bytes)
+ {
+ hasError = true;
+ }
+ }
+
+ count[0] = bytes;
+ return hasError;
+ }
+
+ /**
+ * Getter callback: called to retrieve 1 or more additional UTF-8 bytes. The Getter
callback can also unget if
+ * necessary to re-synchronize the input stream.
+ */
+ static interface GetBytes
+ {
+
+ /**
+ * Get one or more byte.
+ * @param buf will contain the bytes.
+ * @param count number of bytes actually stored in "buf". <= 0
if error or EOF
+ * @param unget unget bytes?
+ */
+ void doGet(int[] buf, int[] count, boolean unget);
+ }
+
+ /**
+ * Putter callbacks: called to store 1 or more additional UTF-8 bytes.
+ */
+ static interface PutBytes
+ {
+
+ /**
+ * Store one or more byte.
+ * @param buf will contain the bytes.
+ * @param count number of bytes actually stored in "buf". <= 0
if error or EOF
+ */
+ void doPut(byte[] buf, int[] count);
+ }
+}
Added: branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Entity.java
===================================================================
--- branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Entity.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Entity.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,104 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+/**
+ * HTML ISO entity.
+ * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org
</a>
+ * @author Andy Quick <a
href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a>
(translation to Java)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public class Entity
+{
+
+ /**
+ * entity name.
+ */
+ private String name;
+
+ /**
+ * entity code.
+ */
+ private short code;
+
+ /**
+ * instantiates a new entity.
+ * @param name entity name
+ * @param code entity code (will be casted to short)
+ */
+ public Entity(String name, int code)
+ {
+ this.name = name;
+ this.code = (short) code;
+ }
+
+ /**
+ * Getter for <code>code</code>.
+ * @return Returns the code.
+ */
+ public short getCode()
+ {
+ return this.code;
+ }
+
+ /**
+ * Getter for <code>name</code>.
+ * @return Returns the name.
+ */
+ public String getName()
+ {
+ return this.name;
+ }
+}
\ No newline at end of file
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/EntityTable.java
===================================================================
---
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/EntityTable.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/EntityTable.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,455 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+import java.util.Hashtable;
+import java.util.Iterator;
+import java.util.Map;
+
+
+/**
+ * Entity hash table.
+ * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org
</a>
+ * @author Andy Quick <a
href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a>
(translation to Java)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public final class EntityTable
+{
+
+ /**
+ * the default entity table.
+ */
+ private static EntityTable defaultEntityTable;
+
+ /**
+ * Known entities.
+ */
+ private static Entity[] entities = {
+ new Entity("nbsp", 160),
+ new Entity("iexcl", 161),
+ new Entity("cent", 162),
+ new Entity("pound", 163),
+ new Entity("curren", 164),
+ new Entity("yen", 165),
+ new Entity("brvbar", 166),
+ new Entity("sect", 167),
+ new Entity("uml", 168),
+ new Entity("copy", 169),
+ new Entity("ordf", 170),
+ new Entity("laquo", 171),
+ new Entity("not", 172),
+ new Entity("shy", 173),
+ new Entity("reg", 174),
+ new Entity("macr", 175),
+ new Entity("deg", 176),
+ new Entity("plusmn", 177),
+ new Entity("sup2", 178),
+ new Entity("sup3", 179),
+ new Entity("acute", 180),
+ new Entity("micro", 181),
+ new Entity("para", 182),
+ new Entity("middot", 183),
+ new Entity("cedil", 184),
+ new Entity("sup1", 185),
+ new Entity("ordm", 186),
+ new Entity("raquo", 187),
+ new Entity("frac14", 188),
+ new Entity("frac12", 189),
+ new Entity("frac34", 190),
+ new Entity("iquest", 191),
+ new Entity("Agrave", 192),
+ new Entity("Aacute", 193),
+ new Entity("Acirc", 194),
+ new Entity("Atilde", 195),
+ new Entity("Auml", 196),
+ new Entity("Aring", 197),
+ new Entity("AElig", 198),
+ new Entity("Ccedil", 199),
+ new Entity("Egrave", 200),
+ new Entity("Eacute", 201),
+ new Entity("Ecirc", 202),
+ new Entity("Euml", 203),
+ new Entity("Igrave", 204),
+ new Entity("Iacute", 205),
+ new Entity("Icirc", 206),
+ new Entity("Iuml", 207),
+ new Entity("ETH", 208),
+ new Entity("Ntilde", 209),
+ new Entity("Ograve", 210),
+ new Entity("Oacute", 211),
+ new Entity("Ocirc", 212),
+ new Entity("Otilde", 213),
+ new Entity("Ouml", 214),
+ new Entity("times", 215),
+ new Entity("Oslash", 216),
+ new Entity("Ugrave", 217),
+ new Entity("Uacute", 218),
+ new Entity("Ucirc", 219),
+ new Entity("Uuml", 220),
+ new Entity("Yacute", 221),
+ new Entity("THORN", 222),
+ new Entity("szlig", 223),
+ new Entity("agrave", 224),
+ new Entity("aacute", 225),
+ new Entity("acirc", 226),
+ new Entity("atilde", 227),
+ new Entity("auml", 228),
+ new Entity("aring", 229),
+ new Entity("aelig", 230),
+ new Entity("ccedil", 231),
+ new Entity("egrave", 232),
+ new Entity("eacute", 233),
+ new Entity("ecirc", 234),
+ new Entity("euml", 235),
+ new Entity("igrave", 236),
+ new Entity("iacute", 237),
+ new Entity("icirc", 238),
+ new Entity("iuml", 239),
+ new Entity("eth", 240),
+ new Entity("ntilde", 241),
+ new Entity("ograve", 242),
+ new Entity("oacute", 243),
+ new Entity("ocirc", 244),
+ new Entity("otilde", 245),
+ new Entity("ouml", 246),
+ new Entity("divide", 247),
+ new Entity("oslash", 248),
+ new Entity("ugrave", 249),
+ new Entity("uacute", 250),
+ new Entity("ucirc", 251),
+ new Entity("uuml", 252),
+ new Entity("yacute", 253),
+ new Entity("thorn", 254),
+ new Entity("yuml", 255),
+ new Entity("fnof", 402),
+ new Entity("Alpha", 913),
+ new Entity("Beta", 914),
+ new Entity("Gamma", 915),
+ new Entity("Delta", 916),
+ new Entity("Epsilon", 917),
+ new Entity("Zeta", 918),
+ new Entity("Eta", 919),
+ new Entity("Theta", 920),
+ new Entity("Iota", 921),
+ new Entity("Kappa", 922),
+ new Entity("Lambda", 923),
+ new Entity("Mu", 924),
+ new Entity("Nu", 925),
+ new Entity("Xi", 926),
+ new Entity("Omicron", 927),
+ new Entity("Pi", 928),
+ new Entity("Rho", 929),
+ new Entity("Sigma", 931),
+ new Entity("Tau", 932),
+ new Entity("Upsilon", 933),
+ new Entity("Phi", 934),
+ new Entity("Chi", 935),
+ new Entity("Psi", 936),
+ new Entity("Omega", 937),
+ new Entity("alpha", 945),
+ new Entity("beta", 946),
+ new Entity("gamma", 947),
+ new Entity("delta", 948),
+ new Entity("epsilon", 949),
+ new Entity("zeta", 950),
+ new Entity("eta", 951),
+ new Entity("theta", 952),
+ new Entity("iota", 953),
+ new Entity("kappa", 954),
+ new Entity("lambda", 955),
+ new Entity("mu", 956),
+ new Entity("nu", 957),
+ new Entity("xi", 958),
+ new Entity("omicron", 959),
+ new Entity("pi", 960),
+ new Entity("rho", 961),
+ new Entity("sigmaf", 962),
+ new Entity("sigma", 963),
+ new Entity("tau", 964),
+ new Entity("upsilon", 965),
+ new Entity("phi", 966),
+ new Entity("chi", 967),
+ new Entity("psi", 968),
+ new Entity("omega", 969),
+ new Entity("thetasym", 977),
+ new Entity("upsih", 978),
+ new Entity("piv", 982),
+ new Entity("bull", 8226),
+ new Entity("hellip", 8230),
+ new Entity("prime", 8242),
+ new Entity("Prime", 8243),
+ new Entity("oline", 8254),
+ new Entity("frasl", 8260),
+ new Entity("weierp", 8472),
+ new Entity("image", 8465),
+ new Entity("real", 8476),
+ new Entity("trade", 8482),
+ new Entity("alefsym", 8501),
+ new Entity("larr", 8592),
+ new Entity("uarr", 8593),
+ new Entity("rarr", 8594),
+ new Entity("darr", 8595),
+ new Entity("harr", 8596),
+ new Entity("crarr", 8629),
+ new Entity("lArr", 8656),
+ new Entity("uArr", 8657),
+ new Entity("rArr", 8658),
+ new Entity("dArr", 8659),
+ new Entity("hArr", 8660),
+ new Entity("forall", 8704),
+ new Entity("part", 8706),
+ new Entity("exist", 8707),
+ new Entity("empty", 8709),
+ new Entity("nabla", 8711),
+ new Entity("isin", 8712),
+ new Entity("notin", 8713),
+ new Entity("ni", 8715),
+ new Entity("prod", 8719),
+ new Entity("sum", 8721),
+ new Entity("minus", 8722),
+ new Entity("lowast", 8727),
+ new Entity("radic", 8730),
+ new Entity("prop", 8733),
+ new Entity("infin", 8734),
+ new Entity("ang", 8736),
+ new Entity("and", 8743),
+ new Entity("or", 8744),
+ new Entity("cap", 8745),
+ new Entity("cup", 8746),
+ new Entity("int", 8747),
+ new Entity("there4", 8756),
+ new Entity("sim", 8764),
+ new Entity("cong", 8773),
+ new Entity("asymp", 8776),
+ new Entity("ne", 8800),
+ new Entity("equiv", 8801),
+ new Entity("le", 8804),
+ new Entity("ge", 8805),
+ new Entity("sub", 8834),
+ new Entity("sup", 8835),
+ new Entity("nsub", 8836),
+ new Entity("sube", 8838),
+ new Entity("supe", 8839),
+ new Entity("oplus", 8853),
+ new Entity("otimes", 8855),
+ new Entity("perp", 8869),
+ new Entity("sdot", 8901),
+ new Entity("lceil", 8968),
+ new Entity("rceil", 8969),
+ new Entity("lfloor", 8970),
+ new Entity("rfloor", 8971),
+ new Entity("lang", 9001),
+ new Entity("rang", 9002),
+ new Entity("loz", 9674),
+ new Entity("spades", 9824),
+ new Entity("clubs", 9827),
+ new Entity("hearts", 9829),
+ new Entity("diams", 9830),
+ new Entity("quot", 34),
+ new Entity("amp", 38),
+ new Entity("lt", 60),
+ new Entity("gt", 62),
+ new Entity("OElig", 338),
+ new Entity("oelig", 339),
+ new Entity("Scaron", 352),
+ new Entity("scaron", 353),
+ new Entity("Yuml", 376),
+ new Entity("circ", 710),
+ new Entity("tilde", 732),
+ new Entity("ensp", 8194),
+ new Entity("emsp", 8195),
+ new Entity("thinsp", 8201),
+ new Entity("zwnj", 8204),
+ new Entity("zwj", 8205),
+ new Entity("lrm", 8206),
+ new Entity("rlm", 8207),
+ new Entity("ndash", 8211),
+ new Entity("mdash", 8212),
+ new Entity("lsquo", 8216),
+ new Entity("rsquo", 8217),
+ new Entity("sbquo", 8218),
+ new Entity("ldquo", 8220),
+ new Entity("rdquo", 8221),
+ new Entity("bdquo", 8222),
+ new Entity("dagger", 8224),
+ new Entity("Dagger", 8225),
+ new Entity("permil", 8240),
+ new Entity("lsaquo", 8249),
+ new Entity("rsaquo", 8250),
+ new Entity("euro", 8364)};
+
+ /**
+ * Entity map.
+ */
+ private Map<String, Entity> entityHashtable = new Hashtable<String,
Entity>();
+
+ /**
+ * use getDefaultEntityTable to get an entity table instance.
+ */
+ private EntityTable()
+ {
+ super();
+ }
+
+ /**
+ * installs an entity.
+ * @param ent entity
+ * @return installed Entity
+ */
+ private Entity install(Entity ent)
+ {
+ return (Entity) this.entityHashtable.put(ent.getName(), ent);
+ }
+
+ /**
+ * Lookup an entity by its name.
+ * @param name entity name
+ * @return entity
+ */
+ public Entity lookup(String name)
+ {
+ return (Entity) this.entityHashtable.get(name);
+ }
+
+ /**
+ * Returns the entity code for the given entity name.
+ * @param name entity name
+ * @return entity code or 0 for unknown entity names
+ */
+ public int entityCode(String name)
+ {
+ // entity starting with "&" returns zero on error.
+ int c;
+
+ if (name.length() <= 1)
+ {
+ return 0;
+ }
+
+ // numeric entitity: name = "&#" followed by number
+ if (name.charAt(1) == '#')
+ {
+ c = 0; // zero on missing/bad number
+
+ // 'x' prefix denotes hexadecimal number format
+ try
+ {
+ if (name.length() >= 4 && name.charAt(2) == 'x')
+ {
+ c = Integer.parseInt(name.substring(3), 16);
+ }
+ else if (name.length() >= 3)
+ {
+ c = Integer.parseInt(name.substring(2));
+ }
+ }
+ catch (NumberFormatException e)
+ {
+ // ignore
+ }
+
+ return c;
+ }
+
+ // Named entity: name ="&" followed by a name
+ Entity ent = lookup(name.substring(1));
+ if (ent != null)
+ {
+ return ent.getCode();
+ }
+
+ return 0; // zero signifies unknown entity name
+ }
+
+ /**
+ * Returns the entity name for the given entity code.
+ * @param code entity code
+ * @return entity name or null for unknown entity codes
+ */
+ public String entityName(short code)
+ {
+ String name = null;
+ Entity ent;
+ Iterator<Entity> en = this.entityHashtable.values().iterator();
+ while (en.hasNext())
+ {
+ ent = (Entity) en.next();
+ if (ent.getCode() == code)
+ {
+ name = ent.getName();
+ break;
+ }
+ }
+ return name;
+ }
+
+ /**
+ * Returns the default entity table instance.
+ * @return entity table instance
+ */
+ public static EntityTable getDefaultEntityTable()
+ {
+ if (defaultEntityTable == null)
+ {
+ defaultEntityTable = new EntityTable();
+ for (int i = 0; i < entities.length; i++)
+ {
+ defaultEntityTable.install(entities[i]);
+ }
+ }
+ return defaultEntityTable;
+ }
+
+}
\ No newline at end of file
Added: branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/IStack.java
===================================================================
--- branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/IStack.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/IStack.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,92 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+/**
+ * Inline stack node.
+ * <p>
+ * Mosaic handles inlines via a separate stack from other elements We duplicate this to
recover from inline markup
+ * errors such as: <i>italic text <p> more italic text</b>
normal text which for compatibility with Mosaic is
+ * mapped to: <i>italic text</i> <p> <i>more
italic text</i> normal text Note that any inline end tag
+ * pop's the effect of the current inline start tag, so that</b> pop's
<i>in the above example.
+ * </p>
+ * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org
</a>
+ * @author Andy Quick <a
href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a>
(translation to Java)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public class IStack
+{
+
+ /**
+ * Next element in the stack.
+ */
+ protected IStack next;
+
+ /**
+ * tag's dictionary definition.
+ */
+ protected Dict tag;
+
+ /**
+ * name (null for text nodes).
+ */
+ protected String element;
+
+ /**
+ * Attributes.
+ */
+ protected AttVal attributes;
+
+}
\ No newline at end of file
Added: branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Lexer.java
===================================================================
--- branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Lexer.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Lexer.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,4116 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+import java.io.PrintWriter;
+import java.util.List;
+import java.util.Stack;
+import java.util.Vector;
+
+
+/**
+ * Lexer for html parser.
+ * <p>
+ * Given a file stream fp it returns a sequence of tokens. GetToken(fp) gets the next
token UngetToken(fp) provides one
+ * level undo The tags include an attribute list: - linked list of attribute/value nodes
- each node has 2
+ * null-terminated strings. - entities are replaced in attribute values white space is
compacted if not in preformatted
+ * mode If not in preformatted mode then leading white space is discarded and subsequent
white space sequences compacted
+ * to single space chars. If XmlTags is no then Tag names are folded to upper case and
attribute names to lower case.
+ * Not yet done: - Doctype subset and marked sections
+ * </p>
+ * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org
</a>
+ * @author Andy Quick <a
href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a>
(translation to Java)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public class Lexer
+{
+
+ /**
+ * state: ignore whitespace.
+ */
+ public static final short IGNORE_WHITESPACE = 0;
+
+ /**
+ * state: mixed content.
+ */
+ public static final short MIXED_CONTENT = 1;
+
+ /**
+ * state: preformatted.
+ */
+ public static final short PREFORMATTED = 2;
+
+ /**
+ * state: ignore markup.
+ */
+ public static final short IGNORE_MARKUP = 3;
+
+ /**
+ * URI for XHTML 1.0 transitional DTD.
+ */
+ private static final String VOYAGER_LOOSE =
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd";
+
+ /**
+ * URI for XHTML 1.0 strict DTD.
+ */
+ private static final String VOYAGER_STRICT =
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";
+
+ /**
+ * URI for XHTML 1.0 frameset DTD.
+ */
+ private static final String VOYAGER_FRAMESET =
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd";
+
+ /**
+ * URI for XHTML 1.1.
+ */
+ private static final String VOYAGER_11 =
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd";
+
+ /**
+ * URI for XHTML Basic 1.0.
+ */
+ // private static final String VOYAGER_BASIC =
"http://www.w3.org/TR/xhtml-basic/xhtml-basic10.dtd";
+ /**
+ * xhtml namespace.
+ */
+ private static final String XHTML_NAMESPACE =
"http://www.w3.org/1999/xhtml";
+
+ /**
+ * lists all the known versions.
+ */
+ private static final Lexer.W3CVersionInfo[] W3CVERSION = {
+ new W3CVersionInfo("HTML 4.01", "XHTML 1.0 Strict",
VOYAGER_STRICT, Dict.VERS_HTML40_STRICT),
+ new W3CVersionInfo("HTML 4.01 Transitional", "XHTML 1.0
Transitional", VOYAGER_LOOSE, Dict.VERS_HTML40_LOOSE),
+ new W3CVersionInfo("HTML 4.01 Frameset", "XHTML 1.0
Frameset", VOYAGER_FRAMESET, Dict.VERS_FRAMESET),
+ new W3CVersionInfo("HTML 4.0", "XHTML 1.0 Strict",
VOYAGER_STRICT, Dict.VERS_HTML40_STRICT),
+ new W3CVersionInfo("HTML 4.0 Transitional", "XHTML 1.0
Transitional", VOYAGER_LOOSE, Dict.VERS_HTML40_LOOSE),
+ new W3CVersionInfo("HTML 4.0 Frameset", "XHTML 1.0 Frameset",
VOYAGER_FRAMESET, Dict.VERS_FRAMESET),
+ new W3CVersionInfo("HTML 3.2", "XHTML 1.0 Transitional",
VOYAGER_LOOSE, Dict.VERS_HTML32),
+ new W3CVersionInfo("HTML 3.2 Final", "XHTML 1.0
Transitional", VOYAGER_LOOSE, Dict.VERS_HTML32),
+ new W3CVersionInfo("HTML 3.2 Draft", "XHTML 1.0
Transitional", VOYAGER_LOOSE, Dict.VERS_HTML32),
+ new W3CVersionInfo("HTML 2.0", "XHTML 1.0 Strict",
VOYAGER_STRICT, Dict.VERS_HTML20),
+ new W3CVersionInfo("HTML 4.01", "XHTML 1.1", VOYAGER_STRICT,
Dict.VERS_XHTML11)};
+
+ /**
+ * getToken state: content.
+ */
+ private static final short LEX_CONTENT = 0;
+
+ /**
+ * getToken state: gt.
+ */
+ private static final short LEX_GT = 1;
+
+ /**
+ * getToken state: endtag.
+ */
+ private static final short LEX_ENDTAG = 2;
+
+ /**
+ * getToken state: start tag.
+ */
+ private static final short LEX_STARTTAG = 3;
+
+ /**
+ * getToken state: comment.
+ */
+ private static final short LEX_COMMENT = 4;
+
+ /**
+ * getToken state: doctype.
+ */
+ private static final short LEX_DOCTYPE = 5;
+
+ /**
+ * getToken state: procinstr.
+ */
+ private static final short LEX_PROCINSTR = 6;
+
+ /**
+ * getToken state: cdata.
+ */
+ private static final short LEX_CDATA = 8;
+
+ /**
+ * getToken state: section.
+ */
+ private static final short LEX_SECTION = 9;
+
+ /**
+ * getToken state: asp.
+ */
+ private static final short LEX_ASP = 10;
+
+ /**
+ * getToken state: jste.
+ */
+ private static final short LEX_JSTE = 11;
+
+ /**
+ * getToken state: php.
+ */
+ private static final short LEX_PHP = 12;
+
+ /**
+ * getToken state: xml declaration.
+ */
+ private static final short LEX_XMLDECL = 13;
+
+ /**
+ * file stream.
+ */
+ protected StreamIn in;
+
+ /**
+ * error output stream.
+ */
+ protected PrintWriter errout;
+
+ /**
+ * for accessibility errors.
+ */
+ protected short badAccess;
+
+ /**
+ * for bad style errors.
+ */
+ protected short badLayout;
+
+ /**
+ * for bad char encodings.
+ */
+ protected short badChars;
+
+ /**
+ * for mismatched/mispositioned form tags.
+ */
+ protected short badForm;
+
+ /**
+ * count of warnings in this document.
+ */
+ protected short warnings;
+
+ /**
+ * count of errors.
+ */
+ protected short errors;
+
+ /**
+ * lines seen.
+ */
+ protected int lines;
+
+ /**
+ * at start of current token.
+ */
+ protected int columns;
+
+ /**
+ * used to collapse contiguous white space.
+ */
+ protected boolean waswhite;
+
+ /**
+ * true after token has been pushed back.
+ */
+ protected boolean pushed;
+
+ /**
+ * when space is moved after end tag.
+ */
+ protected boolean insertspace;
+
+ /**
+ * Netscape compatibility.
+ */
+ protected boolean excludeBlocks;
+
+ /**
+ * true if moved out of table.
+ */
+ protected boolean exiled;
+
+ /**
+ * true if xmlns attribute on html element.
+ */
+ protected boolean isvoyager;
+
+ /**
+ * bit vector of HTML versions.
+ */
+ protected short versions;
+
+ /**
+ * version as given by doctype (if any).
+ */
+ protected int doctype;
+
+ /**
+ * set if html or PUBLIC is missing.
+ */
+ protected boolean badDoctype;
+
+ /**
+ * start of current node.
+ */
+ protected int txtstart;
+
+ /**
+ * end of current node.
+ */
+ protected int txtend;
+
+ /**
+ * state of lexer's finite state machine.
+ */
+ protected short state;
+
+ /**
+ * current node.
+ */
+ protected Node token;
+
+ /**
+ * Lexer character buffer parse tree nodes span onto this buffer which contains the
concatenated text contents of
+ * all of the elements. Lexsize must be reset for each file. Byte buffer of UTF-8
chars.
+ */
+ protected byte[] lexbuf;
+
+ /**
+ * allocated.
+ */
+ protected int lexlength;
+
+ /**
+ * used.
+ */
+ protected int lexsize;
+
+ /**
+ * Inline stack for compatibility with Mosaic. For deferring text node.
+ */
+ protected Node inode;
+
+ /**
+ * for inferring inline tags.
+ */
+ protected int insert;
+
+ /**
+ * stack.
+ */
+ protected Stack<IStack> istack;
+
+ /**
+ * start of frame.
+ */
+ protected int istackbase;
+
+ /**
+ * used for cleaning up presentation markup.
+ */
+ protected Style styles;
+
+ /**
+ * configuration.
+ */
+ protected Configuration configuration;
+
+ /**
+ * already seen end body tag?
+ */
+ protected boolean seenEndBody;
+
+ /**
+ * already seen end html tag?
+ */
+ protected boolean seenEndHtml;
+
+ /**
+ * report.
+ */
+ protected Report report;
+
+ /**
+ * Root node is saved here.
+ */
+ protected Node root;
+
+ /**
+ * node list.
+ */
+ private List<Node> nodeList;
+
+ /**
+ * Instantiates a new Lexer.
+ * @param in StreamIn
+ * @param configuration configuation instance
+ * @param report report instance, for reporting errors
+ */
+ public Lexer(StreamIn in, Configuration configuration, Report report)
+ {
+ this.report = report;
+ this.in = in;
+ this.lines = 1;
+ this.columns = 1;
+ this.state = LEX_CONTENT;
+ this.versions = (Dict.VERS_ALL | Dict.VERS_PROPRIETARY);
+ this.doctype = Dict.VERS_UNKNOWN;
+ this.insert = -1;
+ this.istack = new Stack();
+ this.configuration = configuration;
+ this.nodeList = new Vector();
+ }
+
+ /**
+ * Creates a new node and add it to nodelist.
+ * @return Node
+ */
+ public Node newNode()
+ {
+ Node node = new Node();
+ this.nodeList.add(node);
+ return node;
+ }
+
+ /**
+ * Creates a new node and add it to nodelist.
+ * @param type node type: Node.ROOT_NODE | Node.DOCTYPE_TAG | Node.COMMENT_TAG |
Node.PROC_INS_TAG | Node.TEXT_NODE |
+ * Node.START_TAG | Node.END_TAG | Node.START_END_TAG | Node.CDATA_TAG |
Node.SECTION_TAG | Node. ASP_TAG |
+ * Node.JSTE_TAG | Node.PHP_TAG | Node.XML_DECL
+ * @param textarray array of bytes contained in the Node
+ * @param start start position
+ * @param end end position
+ * @return Node
+ */
+ public Node newNode(short type, byte[] textarray, int start, int end)
+ {
+ Node node = new Node(type, textarray, start, end);
+ this.nodeList.add(node);
+ return node;
+ }
+
+ /**
+ * Creates a new node and add it to nodelist.
+ * @param type node type: Node.ROOT_NODE | Node.DOCTYPE_TAG | Node.COMMENT_TAG |
Node.PROC_INS_TAG | Node.TEXT_NODE |
+ * Node.START_TAG | Node.END_TAG | Node.START_END_TAG | Node.CDATA_TAG |
Node.SECTION_TAG | Node. ASP_TAG |
+ * Node.JSTE_TAG | Node.PHP_TAG | Node.XML_DECL
+ * @param textarray array of bytes contained in the Node
+ * @param start start position
+ * @param end end position
+ * @param element tag name
+ * @return Node
+ */
+ public Node newNode(short type, byte[] textarray, int start, int end, String
element)
+ {
+ Node node = new Node(type, textarray, start, end, element,
this.configuration.tt);
+ this.nodeList.add(node);
+ return node;
+ }
+
+ /**
+ * Clones a node and add it to node list.
+ * @param node Node
+ * @return cloned Node
+ */
+ public Node cloneNode(Node node)
+ {
+ Node cnode = (Node) node.clone();
+ this.nodeList.add(cnode);
+ for (AttVal att = cnode.attributes; att != null; att = att.next)
+ {
+ if (att.asp != null)
+ {
+ this.nodeList.add(att.asp);
+ }
+ if (att.php != null)
+ {
+ this.nodeList.add(att.php);
+ }
+ }
+ return cnode;
+ }
+
+ /**
+ * Clones an attribute value and add eventual asp or php node to node list.
+ * @param attrs original AttVal
+ * @return cloned AttVal
+ */
+ public AttVal cloneAttributes(AttVal attrs)
+ {
+ AttVal cattrs = (AttVal) attrs.clone();
+ for (AttVal att = cattrs; att != null; att = att.next)
+ {
+ if (att.asp != null)
+ {
+ this.nodeList.add(att.asp);
+ }
+ if (att.php != null)
+ {
+ this.nodeList.add(att.php);
+ }
+ }
+ return cattrs;
+ }
+
+ /**
+ * Update <code>oldtextarray</code> in the current nodes.
+ * @param oldtextarray previous text array
+ * @param newtextarray new text array
+ */
+ protected void updateNodeTextArrays(byte[] oldtextarray, byte[] newtextarray)
+ {
+ Node node;
+ for (int i = 0; i < this.nodeList.size(); i++)
+ {
+ node = (Node) (this.nodeList.get(i));
+ if (node.textarray == oldtextarray)
+ {
+ node.textarray = newtextarray;
+ }
+ }
+ }
+
+ /**
+ * Adds a new line node. Used for creating preformatted text from Word2000.
+ * @return new line node
+ */
+ public Node newLineNode()
+ {
+ Node node = newNode();
+
+ node.textarray = this.lexbuf;
+ node.start = this.lexsize;
+ addCharToLexer('\n');
+ node.end = this.lexsize;
+ return node;
+ }
+
+ /**
+ * Has end of input stream been reached?
+ * @return <code>true</code> if end of input stream been reached
+ */
+ public boolean endOfInput()
+ {
+ return this.in.isEndOfStream();
+ }
+
+ /**
+ * Adds a byte to lexer buffer.
+ * @param c byte to add
+ */
+ public void addByte(int c)
+ {
+ if (this.lexsize + 1 >= this.lexlength)
+ {
+ while (this.lexsize + 1 >= this.lexlength)
+ {
+ if (this.lexlength == 0)
+ {
+ this.lexlength = 8192;
+ }
+ else
+ {
+ this.lexlength = this.lexlength * 2;
+ }
+ }
+
+ byte[] temp = this.lexbuf;
+ this.lexbuf = new byte[this.lexlength];
+ if (temp != null)
+ {
+ System.arraycopy(temp, 0, this.lexbuf, 0, temp.length);
+ updateNodeTextArrays(temp, this.lexbuf);
+ }
+ }
+
+ this.lexbuf[this.lexsize++] = (byte) c;
+ this.lexbuf[this.lexsize] = (byte) '\0'; // debug
+ }
+
+ /**
+ * Substitute the last char in buffer.
+ * @param c new char
+ */
+ public void changeChar(byte c)
+ {
+ if (this.lexsize > 0)
+ {
+ this.lexbuf[this.lexsize - 1] = c;
+ }
+ }
+
+ /**
+ * Store char c as UTF-8 encoded byte stream.
+ * @param c char to store
+ */
+ public void addCharToLexer(int c)
+ {
+ // Allow only valid XML characters. See:
http://www.w3.org/TR/2004/REC-xml-20040204/#NT-Char
+ // Fix by Pablo Mayrgundter 17-08-2004
+
+ if ((this.configuration.xmlOut || this.configuration.xHTML) // only for xml
output
+ && !((c >= 0x20 && c <= 0xD7FF) // Check the
common-case first.
+ || c == 0x9 || c == 0xA || c == 0xD // Then white-space.
+ || (c >= 0xE000 && c <= 0xFFFD) // Then high-range
unicode.
+ || (c >= 0x10000 && c <= 0x10FFFF)))
+ {
+ return;
+ }
+
+ int i = 0;
+ int[] count = new int[]{0};
+ byte[] buf = new byte[10]; // unsigned char
+
+ boolean err = EncodingUtils.encodeCharToUTF8Bytes(c, buf, null, count);
+ if (err)
+ {
+ // replacement char 0xFFFD encoded as UTF-8
+ buf[0] = (byte) 0xEF;
+ buf[1] = (byte) 0xBF;
+ buf[2] = (byte) 0xBD;
+ count[0] = 3;
+ }
+
+ for (i = 0; i < count[0]; i++)
+ {
+ addByte(buf[i]); // uint
+ }
+
+ }
+
+ /**
+ * Adds a string to lexer buffer.
+ * @param str String to add
+ */
+ public void addStringToLexer(String str)
+ {
+ for (int i = 0; i < str.length(); i++)
+ {
+ addCharToLexer(str.charAt(i));
+ }
+ }
+
+ /**
+ * Parse an html entity.
+ * @param mode mode
+ */
+ public void parseEntity(short mode)
+ {
+ // No longer attempts to insert missing ';' for unknown
+ // entities unless one was present already, since this
+ // gives unexpected results.
+ //
+ // For example: <a href="something.htm?foo&bar&fred">
+ // was tidied to: <a
href="something.htm?foo&bar;&fred;">
+ // rather than: <a
href="something.htm?foo&bar&fred">
+ //
+ // My thanks for Maurice Buxton for spotting this.
+ //
+ // Also Randy Waki pointed out the following case for the
+ // 04 Aug 00 version (bug #433012):
+ //
+ // For example: <a href="something.htm?id=1&lang=en">
+ // was tidied to: <a href="something.htm?id=1⟨=en">
+ // rather than: <a href="something.htm?id=1&lang=en">
+ //
+ // where "lang" is a known entity (#9001), but browsers would
+ // misinterpret "⟨" because it had a value > 256.
+ //
+ // So the case of an apparently known entity with a value > 256 and
+ // missing a semicolon is handled specially.
+ //
+ // "ParseEntity" is also a bit of a misnomer - it handles entities and
+ // numeric character references. Invalid NCR's are now reported.
+
+ int start;
+ boolean first = true;
+ boolean semicolon = false;
+ int c, ch, startcol;
+ String str;
+
+ start = this.lexsize - 1; // to start at "&"
+ startcol = this.in.getCurcol() - 1;
+
+ while ((c = this.in.readChar()) != StreamIn.END_OF_STREAM)
+ {
+ if (c == ';')
+ {
+ semicolon = true;
+ break;
+ }
+
+ if (first && c == '#')
+ {
+ // #431953 - start RJ
+ if (!this.configuration.ncr
+ || this.configuration.getInCharEncoding() == Configuration.BIG5
+ || this.configuration.getInCharEncoding() == Configuration.SHIFTJIS)
+ {
+ this.in.ungetChar(c);
+ return;
+ }
+ // #431953 - end RJ
+
+ addCharToLexer(c);
+ first = false;
+ continue;
+ }
+
+ first = false;
+
+ if (TidyUtils.isNamechar((char) c))
+ {
+ addCharToLexer(c);
+ continue;
+ }
+
+ // otherwise put it back
+ this.in.ungetChar(c);
+ break;
+ }
+
+ str = TidyUtils.getString(this.lexbuf, start, this.lexsize - start);
+
+ if ("&apos".equals(str) && !configuration.xmlOut &&
!this.isvoyager && !configuration.xHTML)
+ {
+ report.entityError(this, Report.APOS_UNDEFINED, str, 39);
+ }
+
+ ch = EntityTable.getDefaultEntityTable().entityCode(str);
+
+ // drops invalid numeric entities from XML mode. Fix by Pablo Mayrgundter
17-08-2004
+ // if ((this.configuration.xmlOut || this.configuration.xHTML) // only for xml
output
+ // && !((ch >= 0x20 && ch <= 0xD7FF) // Check the
common-case first.
+ // || ch == 0x9 || ch == 0xA || ch == 0xD // Then white-space.
+ // || (ch >= 0xE000 && ch <= 0xFFFD)))
+ // {
+ // this.lexsize = start;
+ // return;
+ // }
+
+ // deal with unrecognized or invalid entities
+ // #433012 - fix by Randy Waki 17 Feb 01
+ // report invalid NCR's - Terry Teague 01 Sep 01
+ if (ch <= 0 || (ch >= 256 && c != ';'))
+ {
+ // set error position just before offending character
+ this.lines = this.in.getCurline();
+ this.columns = startcol;
+
+ if (this.lexsize > start + 1)
+ {
+ if (ch >= 128 && ch <= 159)
+ {
+ // invalid numeric character reference
+ int c1 = 0;
+
+ if (configuration.replacementCharEncoding == Configuration.WIN1252)
+ {
+ c1 = EncodingUtils.decodeWin1252(ch);
+ }
+ else if (configuration.replacementCharEncoding ==
Configuration.MACROMAN)
+ {
+ c1 = EncodingUtils.decodeMacRoman(ch);
+ }
+
+ // "or" DISCARDED_CHAR with the other errors if discarding
char; otherwise default is replacing
+
+ int replaceMode = c1 != 0 ? Report.REPLACED_CHAR :
Report.DISCARDED_CHAR;
+
+ if (c != ';') /* issue warning if not terminated by
';' */
+ {
+ report.entityError(this, Report.MISSING_SEMICOLON_NCR, str, c);
+ }
+
+ report.encodingError(this, (short) (Report.INVALID_NCR |
replaceMode), ch);
+
+ if (c1 != 0)
+ {
+ // make the replacement
+ this.lexsize = start;
+ addCharToLexer(c1);
+ semicolon = false;
+ }
+ else
+ {
+ /* discard */
+ this.lexsize = start;
+ semicolon = false;
+ }
+
+ }
+ else
+ {
+ report.entityError(this, Report.UNKNOWN_ENTITY, str, ch);
+ }
+
+ if (semicolon)
+ {
+ addCharToLexer(';');
+ }
+ }
+ else
+ {
+ // naked &
+ report.entityError(this, Report.UNESCAPED_AMPERSAND, str, ch);
+ }
+ }
+ else
+ {
+ // issue warning if not terminated by ';'
+ if (c != ';')
+ {
+ // set error position just before offending character
+ this.lines = this.in.getCurline();
+ this.columns = startcol;
+ report.entityError(this, Report.MISSING_SEMICOLON, str, c);
+ }
+
+ this.lexsize = start;
+
+ if (ch == 160 && TidyUtils.toBoolean(mode & PREFORMATTED))
+ {
+ ch = ' ';
+ }
+
+ addCharToLexer(ch);
+
+ if (ch == '&' && !this.configuration.quoteAmpersand)
+ {
+ addCharToLexer('a');
+ addCharToLexer('m');
+ addCharToLexer('p');
+ addCharToLexer(';');
+ }
+ }
+ }
+
+ /**
+ * Parses a tag name.
+ * @return first char after the tag name
+ */
+ public char parseTagName()
+ {
+ int c;
+
+ // fold case of first char in buffer
+ c = this.lexbuf[this.txtstart];
+
+ if (!this.configuration.xmlTags && TidyUtils.isUpper((char) c))
+ {
+ c = TidyUtils.toLower((char) c);
+ this.lexbuf[this.txtstart] = (byte) c;
+ }
+
+ while ((c = this.in.readChar()) != StreamIn.END_OF_STREAM)
+ {
+ if (!TidyUtils.isNamechar((char) c))
+ {
+ break;
+ }
+
+ // fold case of subsequent chars
+ if (!this.configuration.xmlTags && TidyUtils.isUpper((char) c))
+ {
+ c = TidyUtils.toLower((char) c);
+ }
+
+ addCharToLexer(c);
+ }
+
+ this.txtend = this.lexsize;
+ return (char) c;
+ }
+
+ /**
+ * calls addCharToLexer for any char in the string.
+ * @param str input String
+ */
+ public void addStringLiteral(String str)
+ {
+ int len = str.length();
+ for (int i = 0; i < len; i++)
+ {
+ addCharToLexer(str.charAt(i));
+ }
+ }
+
+ /**
+ * calls addCharToLexer for any char in the string till len is reached.
+ * @param str input String
+ * @param len length of the substring to be added
+ */
+ void addStringLiteralLen(String str, int len)
+ {
+ int strlen = str.length();
+ if (strlen < len)
+ {
+ len = strlen;
+ }
+ for (int i = 0; i < len; i++)
+ {
+ addCharToLexer(str.charAt(i));
+ }
+ }
+
+ /**
+ * Choose what version to use for new doctype.
+ * @return html version constant
+ */
+ public short htmlVersion()
+ {
+ if (TidyUtils.toBoolean(versions & Dict.VERS_HTML20))
+ {
+ return Dict.VERS_HTML20;
+ }
+
+ if (!(this.configuration.xmlOut | this.configuration.xmlTags | this.isvoyager)
+ && TidyUtils.toBoolean(versions & Dict.VERS_HTML32))
+ {
+ return Dict.VERS_HTML32;
+ }
+ if (TidyUtils.toBoolean(versions & Dict.VERS_XHTML11))
+ {
+ return Dict.VERS_XHTML11;
+ }
+ if (TidyUtils.toBoolean(versions & Dict.VERS_HTML40_STRICT))
+ {
+ return Dict.VERS_HTML40_STRICT;
+ }
+
+ if (TidyUtils.toBoolean(versions & Dict.VERS_HTML40_LOOSE))
+ {
+ return Dict.VERS_HTML40_LOOSE;
+ }
+
+ if (TidyUtils.toBoolean(versions & Dict.VERS_FRAMESET))
+ {
+ return Dict.VERS_FRAMESET;
+ }
+
+ return Dict.VERS_UNKNOWN;
+ }
+
+ /**
+ * Choose what version to use for new doctype.
+ * @return html version name
+ */
+ public String htmlVersionName()
+ {
+ short guessed;
+ int j;
+
+ guessed = apparentVersion();
+
+ for (j = 0; j < W3CVERSION.length; ++j)
+ {
+ if (guessed == W3CVERSION[j].code)
+ {
+ if (this.isvoyager)
+ {
+ return W3CVERSION[j].voyagerName;
+ }
+
+ return W3CVERSION[j].name;
+ }
+ }
+
+ return null;
+ }
+
+ /**
+ * Add meta element for Tidy. If the meta tag is already present, update release
date.
+ * @param root root node
+ * @return <code>true</code> if the tag has been added
+ */
+ public boolean addGenerator(Node root)
+ {
+ AttVal attval;
+ Node node;
+ Node head = root.findHEAD(this.configuration.tt);
+
+ if (head != null)
+ {
+ String meta = "HTML Tidy for Java (vers. " +
Report.RELEASE_DATE_STRING + "), see
www.w3.org";
+
+ for (node = head.content; node != null; node = node.next)
+ {
+ if (node.tag == this.configuration.tt.tagMeta)
+ {
+ attval = node.getAttrByName("name");
+
+ if (attval != null && attval.value != null &&
"generator".equalsIgnoreCase(attval.value))
+ {
+ attval = node.getAttrByName("content");
+
+ if (attval != null
+ && attval.value != null
+ && attval.value.length() >= 9
+ && "HTML
Tidy".equalsIgnoreCase(attval.value.substring(0, 9)))
+ {
+ attval.value = meta;
+ return false;
+ }
+ }
+ }
+ }
+
+ node = this.inferredTag("meta");
+ node.addAttribute("content", meta);
+ node.addAttribute("name", "generator");
+ head.insertNodeAtStart(node);
+ return true;
+ }
+
+ return false;
+ }
+
+ /**
+ * Check system keywords (keywords should be uppercase).
+ * @param doctype doctype node
+ * @return true if doctype keywords are all uppercase
+ */
+ public boolean checkDocTypeKeyWords(Node doctype)
+ {
+ int len = doctype.end - doctype.start;
+ String s = TidyUtils.getString(this.lexbuf, doctype.start, len);
+
+ return !(TidyUtils.findBadSubString("SYSTEM", s, len)
+ || TidyUtils.findBadSubString("PUBLIC", s, len)
+ || TidyUtils.findBadSubString("//DTD", s, len)
+ || TidyUtils.findBadSubString("//W3C", s, len) ||
TidyUtils.findBadSubString("//EN", s, len));
+ }
+
+ /**
+ * Examine DOCTYPE to identify version.
+ * @param doctype doctype node
+ * @return version code
+ */
+ public short findGivenVersion(Node doctype)
+ {
+ String p, s;
+ int i, j;
+ int len;
+ String str1;
+ String str2;
+
+ // if root tag for doctype isn't html give up now
+ str1 = TidyUtils.getString(this.lexbuf, doctype.start, 5);
+ if (!"html ".equalsIgnoreCase(str1))
+ {
+ return 0;
+ }
+
+ if (!checkDocTypeKeyWords(doctype))
+ {
+ report.warning(this, doctype, null, Report.DTYPE_NOT_UPPER_CASE);
+ }
+
+ // give up if all we are given is the system id for the doctype
+ str1 = TidyUtils.getString(this.lexbuf, doctype.start + 5, 7);
+ if ("SYSTEM ".equalsIgnoreCase(str1))
+ {
+ // but at least ensure the case is correct
+ if (!str1.substring(0, 6).equals("SYSTEM"))
+ {
+ System.arraycopy(TidyUtils.getBytes("SYSTEM"), 0, this.lexbuf,
doctype.start + 5, 6);
+ }
+ return 0; // unrecognized
+ }
+
+ if ("PUBLIC ".equalsIgnoreCase(str1))
+ {
+ if (!str1.substring(0, 6).equals("PUBLIC"))
+ {
+ System.arraycopy(TidyUtils.getBytes("PUBLIC "), 0, this.lexbuf,
doctype.start + 5, 6);
+ }
+ }
+ else
+ {
+ this.badDoctype = true;
+ }
+
+ for (i = doctype.start; i < doctype.end; ++i)
+ {
+ if (this.lexbuf[i] == (byte) '"')
+ {
+ str1 = TidyUtils.getString(this.lexbuf, i + 1, 12);
+ str2 = TidyUtils.getString(this.lexbuf, i + 1, 13);
+ if (str1.equals("-//W3C//DTD "))
+ {
+ // compute length of identifier e.g. "HTML 4.0
Transitional"
+ for (j = i + 13; j < doctype.end && this.lexbuf[j] !=
(byte) '/'; ++j)
+ {
+ //
+ }
+ len = j - i - 13;
+ p = TidyUtils.getString(this.lexbuf, i + 13, len);
+
+ for (j = 1; j < W3CVERSION.length; ++j)
+ {
+ s = W3CVERSION[j].name;
+ if (len == s.length() && s.equals(p))
+ {
+ return W3CVERSION[j].code;
+ }
+ }
+
+ // else unrecognized version
+ }
+ else if (str2.equals("-//IETF//DTD "))
+ {
+ // compute length of identifier e.g. "HTML 2.0"
+ for (j = i + 14; j < doctype.end && this.lexbuf[j] !=
(byte) '/'; ++j)
+ {
+ //
+ }
+ len = j - i - 14;
+
+ p = TidyUtils.getString(this.lexbuf, i + 14, len);
+ s = W3CVERSION[0].name;
+ if (len == s.length() && s.equals(p))
+ {
+ return W3CVERSION[0].code;
+ }
+
+ // else unrecognized version
+ }
+ break;
+ }
+ }
+
+ return 0;
+ }
+
+ /**
+ * Fix xhtml namespace.
+ * @param root root Node
+ * @param profile current profile
+ */
+ public void fixHTMLNameSpace(Node root, String profile)
+ {
+ Node node;
+ AttVal attr;
+
+ node = root.content;
+ while (node != null && node.tag != this.configuration.tt.tagHtml)
+ {
+ node = node.next;
+ }
+
+ if (node != null)
+ {
+
+ for (attr = node.attributes; attr != null; attr = attr.next)
+ {
+ if (attr.attribute.equals("xmlns"))
+ {
+ break;
+ }
+
+ }
+
+ if (attr != null)
+ {
+ if (!attr.value.equals(profile))
+ {
+ report.warning(this, node, null, Report.INCONSISTENT_NAMESPACE);
+ attr.value = profile;
+ }
+ }
+ else
+ {
+ attr = new AttVal(node.attributes, null, '"',
"xmlns", profile);
+ attr.dict =
AttributeTable.getDefaultAttributeTable().findAttribute(attr);
+ node.attributes = attr;
+ }
+ }
+ }
+
+ /**
+ * Put DOCTYPE declaration between the <:?xml version "1.0" ...
?> declaration, if any, and the
+ * <code>html</code> tag. Should also work for any comments, etc. that
may precede the <code>html</code> tag.
+ * @param root root node
+ * @return new doctype node
+ */
+ Node newXhtmlDocTypeNode(Node root)
+ {
+ Node html = root.findHTML(this.configuration.tt);
+ if (html == null)
+ {
+ return null;
+ }
+
+ Node newdoctype = newNode();
+ newdoctype.setType(Node.DOCTYPE_TAG);
+ newdoctype.next = html;
+ newdoctype.parent = root;
+ newdoctype.prev = null;
+
+ if (html == root.content)
+ {
+ // No <?xml ... ?> declaration.
+ root.content.prev = newdoctype;
+ root.content = newdoctype;
+ newdoctype.prev = null;
+ }
+ else
+ {
+ // we have an <?xml ... ?> declaration.
+ newdoctype.prev = html.prev;
+ newdoctype.prev.next = newdoctype;
+ }
+ html.prev = newdoctype;
+ return newdoctype;
+ }
+
+ /**
+ * Adds a new xhtml doctype to the document.
+ * @param root root node
+ * @return <code>true</code> if a doctype has been added
+ */
+ public boolean setXHTMLDocType(Node root)
+ {
+ String fpi = " ";
+ String sysid = "";
+ String namespace = XHTML_NAMESPACE;
+ String dtdsub = null;
+ Node doctype;
+ int dtdlen = 0;
+
+ if ( this.configuration.docTypeMode == Configuration.DOCTYPE_IGNORE)
+ {
+ return true;
+ }
+
+ doctype = root.findDocType();
+
+ fixHTMLNameSpace(root, namespace); // #427839 - fix by Evan Lenz 05 Sep 00
+
+ if (this.configuration.docTypeMode == Configuration.DOCTYPE_OMIT)
+ {
+ if (doctype != null)
+ {
+ Node.discardElement(doctype);
+ }
+ return true;
+ }
+
+ if (this.configuration.docTypeMode == Configuration.DOCTYPE_AUTO)
+ {
+ // see what flavor of XHTML this document matches
+ if (TidyUtils.toBoolean(this.versions & Dict.VERS_HTML40_STRICT))
+ {
+ // use XHTML strict
+ fpi = "-//W3C//DTD XHTML 1.0 Strict//EN";
+ sysid = VOYAGER_STRICT;
+ }
+ else if (TidyUtils.toBoolean(this.versions & Dict.VERS_FRAMESET))
+ {
+ // use XHTML frames
+ fpi = "-//W3C//DTD XHTML 1.0 Frameset//EN";
+ sysid = VOYAGER_FRAMESET;
+ }
+ else if (TidyUtils.toBoolean(this.versions & Dict.VERS_LOOSE))
+ {
+ fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN";
+ sysid = VOYAGER_LOOSE;
+ }
+ else if (TidyUtils.toBoolean(this.versions & Dict.VERS_XHTML11))
+ {
+ // use XHTML 1.1
+ fpi = "-//W3C//DTD XHTML 1.1//EN";
+ sysid = VOYAGER_11;
+ }
+ else
+ {
+ // proprietary
+ fpi = null;
+ sysid = "";
+ if (doctype != null)// #473490 - fix by Bjoern Hoehrmann 10 Oct 01
+ {
+ Node.discardElement(doctype);
+ }
+ }
+ }
+ else if (this.configuration.docTypeMode == Configuration.DOCTYPE_STRICT)
+ {
+ fpi = "-//W3C//DTD XHTML 1.0 Strict//EN";
+ sysid = VOYAGER_STRICT;
+ }
+ else if (this.configuration.docTypeMode == Configuration.DOCTYPE_LOOSE)
+ {
+ fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN";
+ sysid = VOYAGER_LOOSE;
+ }
+
+ if (this.configuration.docTypeMode == Configuration.DOCTYPE_USER &&
this.configuration.docTypeStr != null)
+ {
+ fpi = this.configuration.docTypeStr;
+ sysid = "";
+ }
+
+ if (fpi == null)
+ {
+ return false;
+ }
+
+ if (doctype != null)
+ {
+ // Look for internal DTD subset
+ if (configuration.xHTML || configuration.xmlOut)
+ {
+
+ int len = doctype.end - doctype.start + 1;
+ String start = TidyUtils.getString(this.lexbuf, doctype.start, len);
+
+ int dtdbeg = start.indexOf('[');
+ if (dtdbeg >= 0)
+ {
+ int dtdend = start.substring(dtdbeg).indexOf(']');
+ if (dtdend >= 0)
+ {
+ dtdlen = dtdend + 1;
+ dtdsub = start.substring(dtdbeg);
+ }
+ }
+ }
+ }
+ else
+ {
+ if ((doctype = newXhtmlDocTypeNode(root)) == null)
+ {
+ return false;
+ }
+ }
+
+ this.txtstart = this.lexsize;
+ this.txtend = this.lexsize;
+
+ // add public identifier
+ addStringLiteral("html PUBLIC ");
+
+ // check if the fpi is quoted or not
+ if (fpi.charAt(0) == '"')
+ {
+ addStringLiteral(fpi);
+ }
+ else
+ {
+ addStringLiteral("\"");
+ addStringLiteral(fpi);
+ addStringLiteral("\"");
+ }
+
+ if (this.configuration.wraplen != 0 && sysid.length() + 6 >=
this.configuration.wraplen)
+ {
+ addStringLiteral("\n\"");
+ }
+ else
+ {
+ // FG: don't wrap
+ addStringLiteral(" \"");
+ }
+
+ // add system identifier
+ addStringLiteral(sysid);
+ addStringLiteral("\"");
+
+ if (dtdlen > 0 && dtdsub != null)
+ {
+ addCharToLexer(' ');
+ addStringLiteralLen(dtdsub, dtdlen);
+ }
+
+ this.txtend = this.lexsize;
+
+ int length = this.txtend - this.txtstart;
+ doctype.textarray = new byte[length];
+
+ System.arraycopy(this.lexbuf, this.txtstart, doctype.textarray, 0, length);
+ doctype.start = 0;
+ doctype.end = length;
+
+ return false;
+ }
+
+ /**
+ * Return the html version used in document.
+ * @return version code
+ */
+ public short apparentVersion()
+ {
+ switch (this.doctype)
+ {
+ case Dict.VERS_UNKNOWN :
+ return htmlVersion();
+
+ case Dict.VERS_HTML20 :
+ if (TidyUtils.toBoolean(this.versions & Dict.VERS_HTML20))
+ {
+ return Dict.VERS_HTML20;
+ }
+
+ break;
+
+ case Dict.VERS_HTML32 :
+ if (TidyUtils.toBoolean(this.versions & Dict.VERS_HTML32))
+ {
+ return Dict.VERS_HTML32;
+ }
+
+ break; // to replace old version by new
+
+ case Dict.VERS_HTML40_STRICT :
+ if (TidyUtils.toBoolean(this.versions & Dict.VERS_HTML40_STRICT))
+ {
+ return Dict.VERS_HTML40_STRICT;
+ }
+
+ break;
+
+ case Dict.VERS_HTML40_LOOSE :
+ if (TidyUtils.toBoolean(this.versions & Dict.VERS_HTML40_LOOSE))
+ {
+ return Dict.VERS_HTML40_LOOSE;
+ }
+
+ break; // to replace old version by new
+
+ case Dict.VERS_FRAMESET :
+ if (TidyUtils.toBoolean(this.versions & Dict.VERS_FRAMESET))
+ {
+ return Dict.VERS_FRAMESET;
+ }
+
+ break;
+
+ case Dict.VERS_XHTML11 :
+ if (TidyUtils.toBoolean(this.versions & Dict.VERS_XHTML11))
+ {
+ return Dict.VERS_XHTML11;
+ }
+
+ break;
+ default :
+ // should never reach here
+ break;
+ }
+
+ // kludge to avoid error appearing at end of file
+ // it would be better to note the actual position
+ // when first encountering the doctype declaration
+
+ this.lines = 1;
+ this.columns = 1;
+
+ report.warning(this, null, null, Report.INCONSISTENT_VERSION);
+ return this.htmlVersion();
+ }
+
+ /**
+ * Fixup doctype if missing.
+ * @param root root node
+ * @return <code>false</code> if current version has not been identified
+ */
+ public boolean fixDocType(Node root)
+ {
+ Node doctype;
+ int guessed = Dict.VERS_HTML40_STRICT, i;
+
+ if (this.badDoctype)
+ {
+ report.warning(this, null, null, Report.MALFORMED_DOCTYPE);
+ }
+
+ doctype = root.findDocType();
+
+ if (this.configuration.docTypeMode == Configuration.DOCTYPE_OMIT)
+ {
+ if (doctype != null)
+ {
+ Node.discardElement(doctype);
+ }
+ return true;
+ }
+
+ if (this.configuration.xmlOut || this.configuration.docTypeMode ==
Configuration.DOCTYPE_IGNORE)
+ {
+ return true;
+ }
+
+ if (this.configuration.docTypeMode == Configuration.DOCTYPE_STRICT)
+ {
+ Node.discardElement(doctype);
+ doctype = null;
+ guessed = Dict.VERS_HTML40_STRICT;
+ }
+ else if (this.configuration.docTypeMode == Configuration.DOCTYPE_LOOSE)
+ {
+ Node.discardElement(doctype);
+ doctype = null;
+ guessed = Dict.VERS_HTML40_LOOSE;
+ }
+ else if (this.configuration.docTypeMode == Configuration.DOCTYPE_AUTO)
+ {
+ if (doctype != null)
+ {
+ if (this.doctype == Dict.VERS_UNKNOWN)
+ {
+ return false;
+ }
+
+ switch (this.doctype)
+ {
+ case Dict.VERS_UNKNOWN :
+ return false;
+
+ case Dict.VERS_HTML20 :
+ if (TidyUtils.toBoolean(this.versions & Dict.VERS_HTML20))
+ {
+ return true;
+ }
+
+ break; // to replace old version by new
+
+ case Dict.VERS_HTML32 :
+ if (TidyUtils.toBoolean(this.versions & Dict.VERS_HTML32))
+ {
+ return true;
+ }
+
+ break; // to replace old version by new
+
+ case Dict.VERS_HTML40_STRICT :
+ if (TidyUtils.toBoolean(this.versions &
Dict.VERS_HTML40_STRICT))
+ {
+ return true;
+ }
+
+ break; // to replace old version by new
+
+ case Dict.VERS_HTML40_LOOSE :
+ if (TidyUtils.toBoolean(this.versions &
Dict.VERS_HTML40_LOOSE))
+ {
+ return true;
+ }
+
+ break; // to replace old version by new
+
+ case Dict.VERS_FRAMESET :
+ if (TidyUtils.toBoolean(this.versions & Dict.VERS_FRAMESET))
+ {
+ return true;
+ }
+
+ break; // to replace old version by new
+
+ case Dict.VERS_XHTML11 :
+ if (TidyUtils.toBoolean(this.versions & Dict.VERS_XHTML11))
+ {
+ return true;
+ }
+
+ break; // to replace old version by new
+ default :
+ // should never reach here
+ break;
+ }
+
+ // INCONSISTENT_VERSION warning is now issued by ApparentVersion()
+ }
+
+ // choose new doctype
+ guessed = htmlVersion();
+ }
+
+ if (guessed == Dict.VERS_UNKNOWN)
+ {
+ return false;
+ }
+
+ // for XML use the Voyager system identifier
+ if (this.configuration.xmlOut || this.configuration.xmlTags || this.isvoyager)
+ {
+ if (doctype != null)
+ {
+ Node.discardElement(doctype);
+ }
+
+ fixHTMLNameSpace(root, XHTML_NAMESPACE);
+
+ // Namespace is the same for all XHTML variants
+ // Also, don't return yet. Still need to add DOCTYPE declaration.
+ //
+ // for (i = 0; i < W3CVersion.length; ++i)
+ // {
+ // if (guessed == W3CVersion[i].code)
+ // {
+ // fixHTMLNameSpace(root, W3CVersion[i].profile);
+ // break;
+ // }
+ // }
+ // return true;
+ }
+
+ if (doctype == null)
+ {
+ if ((doctype = newXhtmlDocTypeNode(root)) == null)
+ {
+ return false;
+ }
+ }
+
+ this.txtstart = this.lexsize;
+ this.txtend = this.lexsize;
+
+ // use the appropriate public identifier
+ addStringLiteral("html PUBLIC ");
+
+ if (this.configuration.docTypeMode == Configuration.DOCTYPE_USER
+ && this.configuration.docTypeStr != null
+ && this.configuration.docTypeStr.length() > 0)
+ {
+ // check if the fpi is quoted or not
+ if (this.configuration.docTypeStr.charAt(0) == '"')
+ {
+ addStringLiteral(this.configuration.docTypeStr);
+ }
+ else
+ {
+ addStringLiteral("\""); // #431889 - fix by Dave Bryan 04
Jan 2001
+ addStringLiteral(this.configuration.docTypeStr);
+ addStringLiteral("\""); // #431889 - fix by Dave Bryan 04
Jan 2001
+ }
+ }
+ else if (guessed == Dict.VERS_HTML20)
+ {
+ addStringLiteral("\"-//IETF//DTD HTML 2.0//EN\"");
+ }
+ else
+ {
+ addStringLiteral("\"-//W3C//DTD ");
+
+ for (i = 0; i < W3CVERSION.length; ++i)
+ {
+ if (guessed == W3CVERSION[i].code)
+ {
+ addStringLiteral(W3CVERSION[i].name);
+ break;
+ }
+ }
+
+ addStringLiteral("//EN\"");
+ }
+
+ this.txtend = this.lexsize;
+
+ int length = this.txtend - this.txtstart;
+ doctype.textarray = new byte[length];
+
+ System.arraycopy(this.lexbuf, this.txtstart, doctype.textarray, 0, length);
+ doctype.start = 0;
+ doctype.end = length;
+
+ return true;
+ }
+
+ /**
+ * Ensure XML document starts with <code><?XML
version="1.0"?></code>. Add encoding attribute if not using
+ * ASCII or UTF-8 output.
+ * @param root root node
+ * @return always true
+ */
+ public boolean fixXmlDecl(Node root)
+ {
+ Node xml;
+ AttVal version;
+ AttVal encoding;
+
+ if (root.content != null && root.content.type == Node.XML_DECL)
+ {
+ xml = root.content;
+ }
+ else
+ {
+ xml = newNode(Node.XML_DECL, this.lexbuf, 0, 0);
+ xml.next = root.content;
+
+ if (root.content != null)
+ {
+ root.content.prev = xml;
+ xml.next = root.content;
+ }
+
+ root.content = xml;
+ }
+
+ version = xml.getAttrByName("version");
+ encoding = xml.getAttrByName("encoding");
+
+ // We need to insert a check if declared encoding and output encoding mismatch
+ // and fix the Xml declaration accordingly!!!
+ if (encoding == null && this.configuration.getOutCharEncoding() !=
Configuration.UTF8)
+ {
+ if (this.configuration.getOutCharEncoding() == Configuration.LATIN1)
+ {
+ xml.addAttribute("encoding", "iso-8859-1");
+ }
+ if (this.configuration.getOutCharEncoding() == Configuration.ISO2022)
+ {
+ xml.addAttribute("encoding", "iso-2022");
+ }
+ }
+
+ if (version == null)
+ {
+ xml.addAttribute("version", "1.0");
+ }
+
+ return true;
+ }
+
+
+ /**
+ * Generates and inserts a new node.
+ * @param name tag name
+ * @return generated node
+ */
+ public Node inferredTag(String name)
+ {
+ Node node;
+
+ node = newNode(Node.START_TAG, this.lexbuf, this.txtstart, this.txtend, name);
+ node.implicit = true;
+ return node;
+ }
+
+ /**
+ * Create a text node for the contents of a CDATA element like style or script which
ends with </foo> for some
+ * foo.
+ * @param container container node
+ * @return cdata node
+ */
+ public Node getCDATA(Node container)
+ {
+ int c, lastc, prelastc, start, len, i;
+ int qt = 0;
+ int esc = 0;
+ String str="";
+ boolean endtag = false;
+ boolean begtag = false;
+ boolean cdata = false;
+ boolean comment = false;
+
+ if (container.isJavaScript())
+ {
+ esc = '\\';
+ }
+
+ this.lines = this.in.getCurline();
+ this.columns = this.in.getCurcol();
+ this.waswhite = false;
+ this.txtstart = this.lexsize;
+ this.txtend = this.lexsize;
+
+ lastc = '\0';
+ prelastc = '\0';
+ start = -1;
+
+ while ((c = this.in.readChar()) != StreamIn.END_OF_STREAM)
+ {
+ // treat \r\n as \n and \r as \n
+ if (qt > 0)
+ {
+ // #598860 script parsing fails with quote chars
+ // A quoted string is ended by the quotation character, or end of line
+ if ((c == '\r' || c == '\n' || c == qt) &&
(!TidyUtils.toBoolean(esc) || lastc != esc))
+ {
+ qt = 0;
+ }
+ else if (c == '/' && lastc == '<')
+ {
+ start = this.lexsize + 1; // to first letter
+ }
+
+ else if (c == '>' && start >= 0)
+ {
+ len = this.lexsize - start;
+
+ this.lines = this.in.getCurline();
+ this.columns = this.in.getCurcol() - 3;
+
+ report.warning(this, null, null, Report.BAD_CDATA_CONTENT);
+
+ // if javascript insert backslash before /
+ if (TidyUtils.toBoolean(esc))
+ {
+ for (i = this.lexsize; i > start - 1; --i)
+ {
+ this.lexbuf[i] = this.lexbuf[i - 1];
+ }
+
+ this.lexbuf[start - 1] = (byte) esc;
+ this.lexsize++;
+ }
+
+ start = -1;
+ }
+ }
+ else if (TidyUtils.isQuote(c) && (!TidyUtils.toBoolean(esc) || lastc
!= esc))
+ {
+ qt = c;
+ }
+ else if (c == '<' && !cdata && !comment)
+ {
+ start = this.lexsize + 1; // to first letter
+ endtag = false;
+ begtag = true;
+ }
+// else if (c == '!' && lastc == '<') // Cancel
start tag
+// {
+// start = -1;
+// endtag = false;
+// begtag = false;
+// }
+ // Fix CDATA and comments.
+ else if( c == '[' && this.lexsize >= 8 &&
TidyUtils.getString(this.lexbuf, this.lexsize-8, 8).equals("<![CDATA")){
+ cdata = true;
+ }
+ else if( c == '-' && lastc == '-' &&
this.lexsize >= 3 && TidyUtils.getString(this.lexbuf, this.lexsize-3,
3).equals("<!-")){
+ comment = true;
+ if(cdata || this.configuration.xHTML ){
+ this.lexsize -= 3;
+ continue;
+ }
+ }
+ else if (c == '/' && lastc == '<' &&
!cdata && !comment)
+ {
+ start = this.lexsize + 1; // to first letter
+ endtag = true;
+ begtag = false;
+ str = "";
+ }
+ else if( c == '>' && cdata && lastc == ']'
&& prelastc == ']'){
+ cdata = false;
+ }
+ else if( c == '>' && comment && lastc ==
'-' && prelastc == '-'){
+ comment = false;
+ if(cdata || this.configuration.xHTML ){
+ this.lexsize -= 2;
+ continue;
+ }
+ }
+ else if (c == '>' && start >= 0) // End of begin or end
tag
+ {
+ int decr = 2;
+
+ if (endtag)
+ {
+
+// str = TidyUtils.getString(this.lexbuf, start, len);
+ if (container.element.equalsIgnoreCase(str))
+ {
+ this.txtend = start - decr;
+ this.lexsize = start - decr; // #433857 - fix by Huajun Zeng 26
Apr 01
+ break;
+ }
+ }
+
+ // Unquoted markup will end SCRIPT or STYLE elements
+
+ this.lines = this.in.getCurline();
+ this.columns = this.in.getCurcol() - 3;
+
+ report.warning(this, null, null, Report.BAD_CDATA_CONTENT);
+ if (begtag)
+ {
+ decr = 1;
+ }
+ this.txtend = start - decr;
+ this.lexsize = start - decr;
+ break;
+ }
+ // #427844 - fix by Markus Hoenicka 21 Oct 00
+ else if (c == '\r')
+ {
+// if (begtag || endtag)
+// {
+// continue; // discard whitespace in endtag
+// }
+
+ c = this.in.readChar();
+
+ if (c != '\n')
+ {
+ this.in.ungetChar(c);
+ }
+
+ c = '\n';
+
+ }
+// else if ((c == '\n' || c == '\t' || c == ' ')
&& (endtag))
+// {
+// continue; // discard whitespace in endtag
+// }
+ if (endtag && TidyUtils.isNamechar((char) c) ) {
+ str = str + (char) c;
+ }
+ if(begtag && !TidyUtils.isNamechar((char) c)){
+ if(lastc == '<' || ( qt == 0 && c !='='
&& c !=';' && !TidyUtils.isWhite((char) c)) ){
+ start = -1;
+ endtag = false;
+ begtag = false;
+ }
+ }
+
+ addCharToLexer(c);
+ this.txtend = this.lexsize;
+ prelastc = lastc;
+ lastc = c;
+ }
+
+ if (c == StreamIn.END_OF_STREAM)
+ {
+ report.warning(this, container, null, Report.MISSING_ENDTAG_FOR);
+ }
+
+ if (this.txtend > this.txtstart)
+ {
+ this.token = newNode(Node.TEXT_NODE, this.lexbuf, this.txtstart,
this.txtend);
+ return this.token;
+ }
+
+ return null;
+ }
+
+ /**
+ *
+ *
+ */
+ public void ungetToken()
+ {
+ this.pushed = true;
+ }
+
+ /**
+ * Gets a token.
+ * @param mode one of the following:
+ * <ul>
+ * <li><code>MixedContent</code>-- for elements which don't
accept PCDATA</li>
+ * <li><code>Preformatted</code>-- white spacepreserved as
is</li>
+ * <li><code>IgnoreMarkup</code>-- for CDATA elements such as
script, style</li>
+ * </ul>
+ * @return next Node
+ */
+ public Node getToken(short mode)
+ {
+ int c = 0;
+ int badcomment = 0;
+ // pass by reference
+ boolean[] isempty = new boolean[1];
+ boolean inDTDSubset = false;
+ AttVal attributes = null;
+ short basemode = mode;
+
+ if (this.pushed)
+ {
+ // duplicate inlines in preference to pushed text nodes when appropriate
+ if (this.token.type != Node.TEXT_NODE || (this.insert == -1 &&
this.inode == null))
+ {
+ this.pushed = false;
+ return this.token;
+ }
+ }
+
+ // at start of block elements, unclosed inline
+ if (this.insert != -1 || this.inode != null)
+ {
+ return insertedToken();
+ }
+
+ this.lines = this.in.getCurline();
+ this.columns = this.in.getCurcol();
+ this.waswhite = false;
+
+ this.txtstart = this.lexsize;
+ this.txtend = this.lexsize;
+
+ while ((c = this.in.readChar()) != StreamIn.END_OF_STREAM)
+ {
+ // FG fix for [427846] different from tidy
+ // if (this.insertspace && (!TidyUtils.toBoolean(mode &
IGNORE_WHITESPACE)))
+ if (this.insertspace && mode != IGNORE_WHITESPACE)
+ {
+ addCharToLexer(' ');
+ }
+ if (this.insertspace && (!TidyUtils.toBoolean(mode &
IGNORE_WHITESPACE)))
+ {
+ this.waswhite = true;
+ this.insertspace = false;
+ }
+
+ // treat \r\n as \n and \r as \n
+ if (c == '\r')
+ {
+ c = this.in.readChar();
+
+ if (c != '\n')
+ {
+ this.in.ungetChar(c);
+ }
+
+ c = '\n';
+ }
+
+ addCharToLexer(c);
+
+ switch (this.state)
+ {
+ case LEX_CONTENT :
+ // element content
+
+ // Discard white space if appropriate.
+ // Its cheaper to do this here rather than in parser methods for
elements that
+ // don't have mixed content.
+ if (TidyUtils.isWhite((char) c) && (mode ==
IGNORE_WHITESPACE) && this.lexsize == this.txtstart + 1)
+ {
+ --this.lexsize;
+ this.waswhite = false;
+ this.lines = this.in.getCurline();
+ this.columns = this.in.getCurcol();
+ continue;
+ }
+
+ if (c == '<')
+ {
+ this.state = LEX_GT;
+ continue;
+ }
+
+ if (TidyUtils.isWhite((char) c))
+ {
+ // was previous char white?
+ if (this.waswhite)
+ {
+ if (mode != PREFORMATTED && mode != IGNORE_MARKUP)
+ {
+ --this.lexsize;
+ this.lines = this.in.getCurline();
+ this.columns = this.in.getCurcol();
+ }
+ }
+ else
+ {
+ // prev char wasn't white
+ this.waswhite = true;
+
+ if (mode != PREFORMATTED && mode != IGNORE_MARKUP
&& c != ' ')
+ {
+ changeChar((byte) ' ');
+ }
+ }
+
+ continue;
+ }
+ else if (c == '&' && mode != IGNORE_MARKUP)
+ {
+ parseEntity(mode);
+ }
+
+ // this is needed to avoid trimming trailing whitespace
+ if (mode == IGNORE_WHITESPACE)
+ {
+ mode = MIXED_CONTENT;
+ }
+
+ this.waswhite = false;
+ continue;
+
+ case LEX_GT :
+ // <
+
+ // check for endtag
+ if (c == '/')
+ {
+ c = this.in.readChar();
+ if (c == StreamIn.END_OF_STREAM)
+ {
+ this.in.ungetChar(c);
+ continue;
+ }
+
+ addCharToLexer(c);
+
+ if (TidyUtils.isLetter((char) c))
+ {
+ this.lexsize -= 3;
+ this.txtend = this.lexsize;
+ this.in.ungetChar(c);
+ this.state = LEX_ENDTAG;
+ this.lexbuf[this.lexsize] = (byte) '\0'; // debug
+
+ // changed from
+ // this.in.curcol -= 2;
+ this.columns -= 2;
+
+ // if some text before the </ return it now
+ if (this.txtend > this.txtstart)
+ {
+ // trim space char before end tag
+ if (mode == IGNORE_WHITESPACE &&
this.lexbuf[this.lexsize - 1] == (byte) ' ')
+ {
+ this.lexsize -= 1;
+ this.txtend = this.lexsize;
+ }
+
+ this.token = newNode(Node.TEXT_NODE, this.lexbuf,
this.txtstart, this.txtend);
+ return this.token;
+ }
+
+ continue; // no text so keep going
+ }
+
+ // otherwise treat as CDATA
+ this.waswhite = false;
+ this.state = LEX_CONTENT;
+ continue;
+ }
+
+ if (mode == IGNORE_MARKUP)
+ {
+ // otherwise treat as CDATA
+ this.waswhite = false;
+ this.state = LEX_CONTENT;
+ continue;
+ }
+
+ // look out for comments, doctype or marked sections this isn't
quite right, but its getting there
+ if (c == '!')
+ {
+ c = this.in.readChar();
+
+ if (c == '-')
+ {
+ c = this.in.readChar();
+
+ if (c == '-')
+ {
+ this.state = LEX_COMMENT; // comment
+ this.lexsize -= 2;
+ this.txtend = this.lexsize;
+
+ // if some text before < return it now
+ if (this.txtend > this.txtstart)
+ {
+ this.token = newNode(Node.TEXT_NODE, this.lexbuf,
this.txtstart, this.txtend);
+ return this.token;
+ }
+
+ this.txtstart = this.lexsize;
+ continue;
+ }
+
+ report.warning(this, null, null, Report.MALFORMED_COMMENT);
+ }
+ else if (c == 'd' || c == 'D')
+ {
+ this.state = LEX_DOCTYPE; // doctype
+ this.lexsize -= 2;
+ this.txtend = this.lexsize;
+ mode = IGNORE_WHITESPACE;
+
+ // skip until white space or '>'
+
+ for (;;)
+ {
+ c = this.in.readChar();
+
+ if (c == StreamIn.END_OF_STREAM || c == '>')
+ {
+ this.in.ungetChar(c);
+ break;
+ }
+
+ if (!TidyUtils.isWhite((char) c))
+ {
+ continue;
+ }
+
+ // and skip to end of whitespace
+
+ for (;;)
+ {
+ c = this.in.readChar();
+
+ if (c == StreamIn.END_OF_STREAM || c ==
'>')
+ {
+ this.in.ungetChar(c);
+ break;
+ }
+
+ if (TidyUtils.isWhite((char) c))
+ {
+ continue;
+ }
+
+ this.in.ungetChar(c);
+ break;
+ }
+
+ break;
+ }
+
+ // if some text before < return it now
+ if (this.txtend > this.txtstart)
+ {
+ this.token = newNode(Node.TEXT_NODE, this.lexbuf,
this.txtstart, this.txtend);
+ return this.token;
+ }
+
+ this.txtstart = this.lexsize;
+ continue;
+ }
+ else if (c == '[')
+ {
+ // Word 2000 embeds <![if ...]> ... <![endif]>
sequences
+ this.lexsize -= 2;
+ this.state = LEX_SECTION;
+ this.txtend = this.lexsize;
+
+ // if some text before < return it now
+ if (this.txtend > this.txtstart)
+ {
+ this.token = newNode(Node.TEXT_NODE, this.lexbuf,
this.txtstart, this.txtend);
+ return this.token;
+ }
+
+ this.txtstart = this.lexsize;
+ continue;
+ }
+
+ // otherwise swallow chars up to and including next
'>'
+ while (true)
+ {
+ c = this.in.readChar();
+ if (c == '>')
+ {
+ break;
+ }
+ if (c == -1)
+ {
+ this.in.ungetChar(c);
+ break;
+ }
+ }
+
+ this.lexsize -= 2;
+ this.lexbuf[this.lexsize] = (byte) '\0';
+ this.state = LEX_CONTENT;
+ continue;
+ }
+
+ // processing instructions
+
+ if (c == '?')
+ {
+ this.lexsize -= 2;
+ this.state = LEX_PROCINSTR;
+ this.txtend = this.lexsize;
+
+ // if some text before < return it now
+ if (this.txtend > this.txtstart)
+ {
+ this.token = newNode(Node.TEXT_NODE, this.lexbuf,
this.txtstart, this.txtend);
+ return this.token;
+ }
+
+ this.txtstart = this.lexsize;
+ continue;
+ }
+
+ // Microsoft ASP's e.g. <% ... server-code ... %>
+ if (c == '%')
+ {
+ this.lexsize -= 2;
+ this.state = LEX_ASP;
+ this.txtend = this.lexsize;
+
+ // if some text before < return it now
+ if (this.txtend > this.txtstart)
+ {
+ this.token = newNode(Node.TEXT_NODE, this.lexbuf,
this.txtstart, this.txtend);
+ return this.token;
+ }
+
+ this.txtstart = this.lexsize;
+ continue;
+ }
+
+ // Netscapes JSTE e.g. <# ... server-code ... #>
+ if (c == '#')
+ {
+ this.lexsize -= 2;
+ this.state = LEX_JSTE;
+ this.txtend = this.lexsize;
+
+ // if some text before < return it now
+ if (this.txtend > this.txtstart)
+ {
+ this.token = newNode(Node.TEXT_NODE, this.lexbuf,
this.txtstart, this.txtend);
+ return this.token;
+ }
+
+ this.txtstart = this.lexsize;
+ continue;
+ }
+
+ // check for start tag
+ if (TidyUtils.isLetter((char) c))
+ {
+ this.in.ungetChar(c); // push back letter
+ this.lexsize -= 2; // discard " <" + letter
+ this.txtend = this.lexsize;
+ this.state = LEX_STARTTAG; // ready to read tag name
+
+ // if some text before < return it now
+ if (this.txtend > this.txtstart)
+ {
+ this.token = newNode(Node.TEXT_NODE, this.lexbuf,
this.txtstart, this.txtend);
+ return this.token;
+ }
+
+ continue; // no text so keep going
+ }
+
+ // otherwise treat as CDATA
+ this.state = LEX_CONTENT;
+ this.waswhite = false;
+ continue;
+
+ case LEX_ENDTAG :
+ // </letter
+ this.txtstart = this.lexsize - 1;
+
+ // changed from
+ // this.in.curcol -= 2;
+ this.columns -= 2;
+
+ c = parseTagName();
+ this.token = newNode(Node.END_TAG, // create endtag token
+ this.lexbuf, this.txtstart, this.txtend, TidyUtils.getString(
+ this.lexbuf,
+ this.txtstart,
+ this.txtend - this.txtstart));
+ this.lexsize = this.txtstart;
+ this.txtend = this.txtstart;
+
+ // skip to '>'
+ while (c != '>')
+ {
+ c = this.in.readChar();
+
+ if (c == StreamIn.END_OF_STREAM)
+ {
+ break;
+ }
+ }
+
+ if (c == StreamIn.END_OF_STREAM)
+ {
+ this.in.ungetChar(c);
+ continue;
+ }
+
+ this.state = LEX_CONTENT;
+ this.waswhite = false;
+ return this.token; // the endtag token
+
+ case LEX_STARTTAG :
+ // first letter of tagname
+ this.txtstart = this.lexsize - 1; // set txtstart to first letter
+ c = parseTagName();
+ isempty[0] = false;
+ attributes = null;
+ this.token = newNode(
+ (isempty[0] ? Node.START_END_TAG : Node.START_TAG),
+ this.lexbuf,
+ this.txtstart,
+ this.txtend,
+ TidyUtils.getString(this.lexbuf, this.txtstart, this.txtend -
this.txtstart));
+
+ // parse attributes, consuming closing ">"
+ if (c != '>')
+ {
+ if (c == '/')
+ {
+ this.in.ungetChar(c);
+ }
+
+ attributes = parseAttrs(isempty);
+ }
+
+ if (isempty[0])
+ {
+ this.token.type = Node.START_END_TAG;
+ }
+
+ this.token.attributes = attributes;
+ this.lexsize = this.txtstart;
+ this.txtend = this.txtstart;
+
+ // swallow newline following start tag
+ // special check needed for CRLF sequence
+ // this doesn't apply to empty elements
+ // nor to preformatted content that needs escaping
+
+ if (
+
+ (mode != PREFORMATTED || preContent(this.token))
+ && (this.token.expectsContent() || this.token.tag ==
this.configuration.tt.tagBr))
+ {
+
+ c = this.in.readChar();
+
+ if (c == '\r')
+ {
+ c = this.in.readChar();
+
+ if (c != '\n')
+ {
+ this.in.ungetChar(c);
+ }
+ }
+ else if (c != '\n' && c != '\f')
+ {
+ this.in.ungetChar(c);
+ }
+
+ this.waswhite = true; // to swallow leading whitespace
+ }
+ else
+ {
+ this.waswhite = false;
+ }
+
+ this.state = LEX_CONTENT;
+
+ if (this.token.tag == null)
+ {
+ report.error(this, null, this.token, Report.UNKNOWN_ELEMENT);
+ }
+ else if (!this.configuration.xmlTags)
+ {
+ constrainVersion(this.token.tag.versions);
+
+ if (TidyUtils.toBoolean(this.token.tag.versions &
Dict.VERS_PROPRIETARY))
+ {
+ // #427810 - fix by Gary Deschaines 24 May 00
+ if (this.configuration.makeClean && (this.token.tag
!= this.configuration.tt.tagNobr && //
+ this.token.tag != this.configuration.tt.tagWbr))
+ {
+ report.warning(this, null, this.token,
Report.PROPRIETARY_ELEMENT);
+ }
+ // #427810 - fix by Terry Teague 2 Jul 01
+ else if (!this.configuration.makeClean)
+ {
+ report.warning(this, null, this.token,
Report.PROPRIETARY_ELEMENT);
+ }
+ }
+
+ if (this.token.tag.getChkattrs() != null)
+ {
+ this.token.tag.getChkattrs().check(this, this.token);
+ }
+ else
+ {
+ this.token.checkAttributes(this);
+ }
+
+ // should this be called before attribute checks?
+ this.token.repairDuplicateAttributes(this);
+
+ }
+
+ return this.token; // return start tag
+
+ case LEX_COMMENT :
+ // seen <!-- so look for -->
+
+ if (c != '-')
+ {
+ continue;
+ }
+
+ c = this.in.readChar();
+ addCharToLexer(c);
+
+ if (c != '-')
+ {
+ continue;
+ }
+
+ end_comment : while (true)
+ {
+ c = this.in.readChar();
+
+ if (c == '>')
+ {
+ if (badcomment != 0)
+ {
+ report.warning(this, null, null,
Report.MALFORMED_COMMENT);
+ }
+
+ this.txtend = this.lexsize - 2; // AQ 8Jul2000
+ this.lexbuf[this.lexsize] = (byte) '\0';
+ this.state = LEX_CONTENT;
+ this.waswhite = false;
+ this.token = newNode(Node.COMMENT_TAG, this.lexbuf,
this.txtstart, this.txtend);
+
+ // now look for a line break
+
+ c = this.in.readChar();
+
+ if (c == '\r')
+ {
+ c = this.in.readChar();
+
+ if (c != '\n')
+ {
+ this.token.linebreak = true;
+ }
+ }
+
+ if (c == '\n')
+ {
+ this.token.linebreak = true;
+ }
+ else
+ {
+ this.in.ungetChar(c);
+ }
+
+ return this.token;
+ }
+
+ // note position of first such error in the comment
+ if (badcomment == 0)
+ {
+ this.lines = this.in.getCurline();
+ this.columns = this.in.getCurcol() - 3;
+ }
+
+ badcomment++;
+ if (this.configuration.fixComments)
+ {
+ this.lexbuf[this.lexsize - 2] = (byte) '=';
+ }
+
+ addCharToLexer(c);
+
+ // if '-' then look for '>' to end the
comment
+ if (c != '-')
+ {
+ break end_comment;
+ }
+
+ }
+ // otherwise continue to look for -->
+ this.lexbuf[this.lexsize - 2] = (byte) '=';
+ continue;
+
+ case LEX_DOCTYPE :
+ // seen <!d so look for '> ' munging whitespace
+
+ if (TidyUtils.isWhite((char) c))
+ {
+ if (this.waswhite)
+ {
+ this.lexsize -= 1;
+ }
+
+ this.waswhite = true;
+ }
+ else
+ {
+ this.waswhite = false;
+ }
+
+ if (inDTDSubset)
+ {
+ if (c == ']')
+ {
+ inDTDSubset = false;
+ }
+ }
+ else if (c == '[')
+ {
+ inDTDSubset = true;
+ }
+ if (inDTDSubset || c != '>')
+ {
+ continue;
+ }
+
+ this.lexsize -= 1;
+ this.txtend = this.lexsize;
+ this.lexbuf[this.lexsize] = (byte) '\0';
+ this.state = LEX_CONTENT;
+ this.waswhite = false;
+ this.token = newNode(Node.DOCTYPE_TAG, this.lexbuf, this.txtstart,
this.txtend);
+ // make a note of the version named by the doctype
+ this.doctype = findGivenVersion(this.token);
+ return this.token;
+
+ case LEX_PROCINSTR :
+ // seen <? so look for '> '
+ // check for PHP preprocessor instructions <?php ... ?>
+
+ if (this.lexsize - this.txtstart == 3)
+ {
+ if ((TidyUtils.getString(this.lexbuf, this.txtstart,
3)).equals("php"))
+ {
+ this.state = LEX_PHP;
+ continue;
+ }
+ }
+
+ if (this.lexsize - this.txtstart == 4)
+ {
+ if ((TidyUtils.getString(this.lexbuf, this.txtstart,
3)).equals("xml")
+ && TidyUtils.isWhite((char) this.lexbuf[this.txtstart
+ 3]))
+ {
+ this.state = LEX_XMLDECL;
+ attributes = null;
+ continue;
+ }
+ }
+
+ if (this.configuration.xmlPIs) // insist on ?> as terminator
+ {
+ if (c != '?')
+ {
+ continue;
+ }
+
+ // now look for '>'
+ c = this.in.readChar();
+
+ if (c == StreamIn.END_OF_STREAM)
+ {
+ report.warning(this, null, null,
Report.UNEXPECTED_END_OF_FILE);
+ this.in.ungetChar(c);
+ continue;
+ }
+
+ addCharToLexer(c);
+ }
+
+ if (c != '>')
+ {
+ continue;
+ }
+
+ this.lexsize -= 1;
+ this.txtend = this.lexsize;
+ this.lexbuf[this.lexsize] = (byte) '\0';
+ this.state = LEX_CONTENT;
+ this.waswhite = false;
+ this.token = newNode(Node.PROC_INS_TAG, this.lexbuf, this.txtstart,
this.txtend);
+ return this.token;
+
+ case LEX_ASP :
+ // seen <% so look for "%> "
+ if (c != '%')
+ {
+ continue;
+ }
+
+ // now look for '>'
+ c = this.in.readChar();
+
+ if (c != '>')
+ {
+ this.in.ungetChar(c);
+ continue;
+ }
+
+ this.lexsize -= 1;
+ this.txtend = this.lexsize;
+ this.lexbuf[this.lexsize] = (byte) '\0';
+ this.state = LEX_CONTENT;
+ this.waswhite = false;
+ this.token = newNode(Node.ASP_TAG, this.lexbuf, this.txtstart,
this.txtend);
+ return this.token;
+
+ case LEX_JSTE :
+ // seen <# so look for "#> "
+ if (c != '#')
+ {
+ continue;
+ }
+
+ // now look for '>'
+ c = this.in.readChar();
+
+ if (c != '>')
+ {
+ this.in.ungetChar(c);
+ continue;
+ }
+
+ this.lexsize -= 1;
+ this.txtend = this.lexsize;
+ this.lexbuf[this.lexsize] = (byte) '\0';
+ this.state = LEX_CONTENT;
+ this.waswhite = false;
+ this.token = newNode(Node.JSTE_TAG, this.lexbuf, this.txtstart,
this.txtend);
+ return this.token;
+
+ case LEX_PHP :
+ // seen " <?php" so look for "?> "
+ if (c != '?')
+ {
+ continue;
+ }
+
+ // now look for '>'
+ c = this.in.readChar();
+
+ if (c != '>')
+ {
+ this.in.ungetChar(c);
+ continue;
+ }
+
+ this.lexsize -= 1;
+ this.txtend = this.lexsize;
+ this.lexbuf[this.lexsize] = (byte) '\0';
+ this.state = LEX_CONTENT;
+ this.waswhite = false;
+ this.token = newNode(Node.PHP_TAG, this.lexbuf, this.txtstart,
this.txtend);
+ return this.token;
+
+ case LEX_XMLDECL : // seen "<?xml" so look for
"?>"
+
+ if (TidyUtils.isWhite((char) c) && c != '?')
+ {
+ continue;
+ }
+
+ // get pseudo-attribute
+ if (c != '?')
+ {
+ String name;
+ Node[] asp = new Node[1];
+ Node[] php = new Node[1];
+ AttVal av = new AttVal();
+ int[] pdelim = new int[1];
+ isempty[0] = false;
+
+ this.in.ungetChar(c);
+
+ name = this.parseAttribute(isempty, asp, php);
+ av.attribute = name;
+
+ av.value = this.parseValue(name, true, isempty, pdelim);
+ av.delim = pdelim[0];
+ av.next = attributes;
+
+ attributes = av;
+ // continue;
+ }
+
+ // now look for '>'
+ c = this.in.readChar();
+
+ if (c != '>')
+ {
+ this.in.ungetChar(c);
+ continue;
+ }
+ this.lexsize -= 1;
+ this.txtend = this.txtstart;
+ this.lexbuf[this.txtend] = '\0';
+ this.state = LEX_CONTENT;
+ this.waswhite = false;
+ this.token = newNode(Node.XML_DECL, this.lexbuf, this.txtstart,
this.txtend);
+ this.token.attributes = attributes;
+ return this.token;
+
+ case LEX_SECTION :
+ // seen " <![" so look for "]> "
+ if (c == '[')
+ {
+ if (this.lexsize == (this.txtstart + 6)
+ && (TidyUtils.getString(this.lexbuf, this.txtstart,
6)).equals("CDATA["))
+ {
+ this.state = LEX_CDATA;
+ this.lexsize -= 6;
+ mode = IGNORE_MARKUP;
+ continue;
+ }
+ }
+
+ if (c != ']')
+ {
+ continue;
+ }
+
+ // now look for '>'
+ c = this.in.readChar();
+
+ if (c != '>')
+ {
+ this.in.ungetChar(c);
+ continue;
+ }
+
+ this.lexsize -= 1;
+ this.txtend = this.lexsize;
+ this.lexbuf[this.lexsize] = (byte) '\0';
+ this.state = LEX_CONTENT;
+ this.waswhite = false;
+ this.token = newNode(Node.SECTION_TAG, this.lexbuf, this.txtstart,
this.txtend);
+ return this.token;
+
+ case LEX_CDATA :
+ // seen " <![CDATA[" so look for "]]> "
+ if (c != ']')
+ {
+ continue;
+ }
+
+ // now look for ']'
+ c = this.in.readChar();
+
+ if (c != ']')
+ {
+ this.in.ungetChar(c);
+ continue;
+ }
+
+ // now look for '>'
+ c = this.in.readChar();
+
+ if (c != '>')
+ {
+ this.in.ungetChar(c);
+ continue;
+ }
+
+ this.lexsize -= 1;
+ this.txtend = this.lexsize;
+ this.lexbuf[this.lexsize] = (byte) '\0';
+ this.state = LEX_CONTENT;
+ this.waswhite = false;
+ this.token = newNode(Node.CDATA_TAG, this.lexbuf, this.txtstart,
this.txtend);
+ return this.token;
+
+ default :
+ // should never reach here
+ break;
+ }
+ }
+
+ if (this.state == LEX_CONTENT) // text string
+ {
+ this.txtend = this.lexsize;
+
+ if (this.txtend > this.txtstart)
+ {
+ this.in.ungetChar(c);
+
+ if (this.lexbuf[this.lexsize - 1] == (byte) ' ')
+ {
+ this.lexsize -= 1;
+ this.txtend = this.lexsize;
+ }
+
+ this.token = newNode(Node.TEXT_NODE, this.lexbuf, this.txtstart,
this.txtend);
+ return this.token;
+ }
+ }
+ else if (this.state == LEX_COMMENT) // comment
+ {
+ if (c == StreamIn.END_OF_STREAM)
+ {
+ report.warning(this, null, null, Report.MALFORMED_COMMENT);
+ }
+
+ this.txtend = this.lexsize;
+ this.lexbuf[this.lexsize] = (byte) '\0';
+ this.state = LEX_CONTENT;
+ this.waswhite = false;
+ this.token = newNode(Node.COMMENT_TAG, this.lexbuf, this.txtstart,
this.txtend);
+ return this.token;
+ }
+
+ return null;
+ }
+
+ /**
+ * parser for ASP within start tags Some people use ASP for to customize attributes
Tidy isn't really well suited to
+ * dealing with ASP This is a workaround for attributes, but won't deal with the
case where the ASP is used to
+ * tailor the attribute value. Here is an example of a work around for using ASP in
attribute values:
+ *
<code>href='<%=rsSchool.Fields("ID").Value%>'</code>
where the ASP that generates the attribute value is
+ * masked from Tidy by the quotemarks.
+ * @return parsed Node
+ */
+ public Node parseAsp()
+ {
+ int c;
+ Node asp = null;
+
+ this.txtstart = this.lexsize;
+
+ while ((c = this.in.readChar()) != StreamIn.END_OF_STREAM)
+ {
+
+ addCharToLexer(c);
+
+ if (c != '%')
+ {
+ continue;
+ }
+
+ if ((c = this.in.readChar()) == StreamIn.END_OF_STREAM)
+ {
+ break;
+ }
+ addCharToLexer(c);
+
+ if (c == '>')
+ {
+ break;
+ }
+ }
+
+ this.lexsize -= 2;
+ this.txtend = this.lexsize;
+
+ if (this.txtend > this.txtstart)
+ {
+ asp = newNode(Node.ASP_TAG, this.lexbuf, this.txtstart, this.txtend);
+ }
+
+ this.txtstart = this.txtend;
+ return asp;
+ }
+
+ /**
+ * PHP is like ASP but is based upon XML processing instructions, e.g.
<code><?php ... ?></code>.
+ * @return parsed Node
+ */
+ public Node parsePhp()
+ {
+ int c;
+ Node php = null;
+
+ this.txtstart = this.lexsize;
+
+ while ((c = this.in.readChar()) != StreamIn.END_OF_STREAM)
+ {
+ addCharToLexer(c);
+
+ if (c != '?')
+ {
+ continue;
+ }
+
+ if ((c = this.in.readChar()) == StreamIn.END_OF_STREAM)
+ {
+ break;
+ }
+ addCharToLexer(c);
+
+ if (c == '>')
+ {
+ break;
+ }
+ }
+
+ this.lexsize -= 2;
+ this.txtend = this.lexsize;
+
+ if (this.txtend > this.txtstart)
+ {
+ php = newNode(Node.PHP_TAG, this.lexbuf, this.txtstart, this.txtend);
+ }
+
+ this.txtstart = this.txtend;
+ return php;
+ }
+
+ /**
+ * consumes the '>' terminating start tags.
+ * @param isempty flag is passed as array so it can be modified
+ * @param asp asp Node, passed as array so it can be modified
+ * @param php php Node, passed as array so it can be modified
+ * @return parsed attribute
+ */
+ public String parseAttribute(boolean[] isempty, Node[] asp, Node[] php)
+ {
+ int start = 0;
+ String attr;
+ int c = 0;
+ int lastc = 0;
+
+ asp[0] = null; // clear asp pointer
+ php[0] = null; // clear php pointer
+ // skip white space before the attribute
+
+ for (;;)
+ {
+ c = this.in.readChar();
+
+ if (c == '/')
+ {
+ c = this.in.readChar();
+
+ if (c == '>')
+ {
+ isempty[0] = true;
+ return null;
+ }
+
+ this.in.ungetChar(c);
+ c = '/';
+ break;
+ }
+
+ if (c == '>')
+ {
+ return null;
+ }
+
+ if (c == '<')
+ {
+ c = this.in.readChar();
+
+ if (c == '%')
+ {
+ asp[0] = parseAsp();
+ return null;
+ }
+ else if (c == '?')
+ {
+ php[0] = parsePhp();
+ return null;
+ }
+
+ this.in.ungetChar(c);
+ if (this.state != LEX_XMLDECL) // FG fix for 532535
+ {
+ this.in.ungetChar('<'); // fix for 433360
+ }
+ report.attrError(this, this.token, null, Report.UNEXPECTED_GT);
+ return null;
+ }
+
+ if (c == '=')
+ {
+ report.attrError(this, this.token, null, Report.UNEXPECTED_EQUALSIGN);
+ continue;
+ }
+
+ if (c == '"' || c == '\'')
+ {
+ report.attrError(this, this.token, null, Report.UNEXPECTED_QUOTEMARK);
+ continue;
+ }
+
+ if (c == StreamIn.END_OF_STREAM)
+ {
+ report.attrError(this, this.token, null, Report.UNEXPECTED_END_OF_FILE);
+ this.in.ungetChar(c);
+ return null;
+ }
+
+ if (!TidyUtils.isWhite((char) c))
+ {
+ break;
+ }
+ }
+
+ start = this.lexsize;
+ lastc = c;
+
+ for (;;)
+ {
+ // but push back '=' for parseValue()
+ if (c == '=' || c == '>')
+ {
+ this.in.ungetChar(c);
+ break;
+ }
+
+ if (c == '<' || c == StreamIn.END_OF_STREAM)
+ {
+ this.in.ungetChar(c);
+ break;
+ }
+ if (lastc == '-' && (c == '"' || c ==
'\''))
+ {
+ this.lexsize--;
+ this.in.ungetChar(c);
+ break;
+ }
+ if (TidyUtils.isWhite((char) c))
+ {
+ break;
+ }
+
+ // what should be done about non-namechar characters?
+ // currently these are incorporated into the attr name
+
+ if (!this.configuration.xmlTags && TidyUtils.isUpper((char) c))
+ {
+ c = TidyUtils.toLower((char) c);
+ }
+
+ // ++len; #427672 - handle attribute names with multibyte chars - fix by
Randy Waki - 10 Aug 00
+ addCharToLexer(c);
+
+ lastc = c;
+ c = this.in.readChar();
+ }
+
+ // #427672 - handle attribute names with multibyte chars - fix by Randy Waki - 10
Aug 00
+ int len = this.lexsize - start;
+ attr = (len > 0 ? TidyUtils.getString(this.lexbuf, start, len) : null);
+ this.lexsize = start;
+
+ return attr;
+ }
+
+ /**
+ * Invoked when < is seen in place of attribute value but terminates on
whitespace if not ASP, PHP or Tango this
+ * routine recognizes ' and " quoted strings.
+ * @return delimiter
+ */
+ public int parseServerInstruction()
+ {
+ int c, delim = '"';
+ boolean isrule = false;
+
+ c = this.in.readChar();
+ addCharToLexer(c);
+
+ // check for ASP, PHP or Tango
+ if (c == '%' || c == '?' || c == '@')
+ {
+ isrule = true;
+ }
+
+ for (;;)
+ {
+ c = this.in.readChar();
+
+ if (c == StreamIn.END_OF_STREAM)
+ {
+ break;
+ }
+
+ if (c == '>')
+ {
+ if (isrule)
+ {
+ addCharToLexer(c);
+ }
+ else
+ {
+ this.in.ungetChar(c);
+ }
+
+ break;
+ }
+
+ // if not recognized as ASP, PHP or Tango
+ // then also finish value on whitespace
+ if (!isrule)
+ {
+ if (TidyUtils.isWhite((char) c))
+ {
+ break;
+ }
+ }
+
+ addCharToLexer(c);
+
+ if (c == '"')
+ {
+ do
+ {
+ c = this.in.readChar();
+
+ if (endOfInput()) // #427840 - fix by Terry Teague 30 Jun 01
+ {
+ report.attrError(this, this.token, null,
Report.UNEXPECTED_END_OF_FILE);
+ this.in.ungetChar(c);
+ return 0;
+ }
+ if (c == '>') // #427840 - fix by Terry Teague 30 Jun 01
+ {
+ this.in.ungetChar(c);
+ report.attrError(this, this.token, null, Report.UNEXPECTED_GT);
+ return 0;
+ }
+
+ addCharToLexer(c);
+ }
+ while (c != '"');
+ delim = '\'';
+ continue;
+ }
+
+ if (c == '\'')
+ {
+ do
+ {
+ c = this.in.readChar();
+
+ if (endOfInput()) // #427840 - fix by Terry Teague 30 Jun 01
+ {
+ report.attrError(this, this.token, null,
Report.UNEXPECTED_END_OF_FILE);
+ this.in.ungetChar(c);
+ return 0;
+ }
+ if (c == '>') // #427840 - fix by Terry Teague 30 Jun 01
+ {
+ this.in.ungetChar(c);
+ report.attrError(this, this.token, null, Report.UNEXPECTED_GT);
+ return 0;
+ }
+
+ addCharToLexer(c);
+ }
+ while (c != '\'');
+ }
+ }
+
+ return delim;
+ }
+
+ /**
+ * Parse an attribute value.
+ * @param name attribute name
+ * @param foldCase fold case?
+ * @param isempty is attribute empty? Passed as an array reference to allow
modification
+ * @param pdelim delimiter, passed as an array reference to allow modification
+ * @return parsed value
+ */
+ public String parseValue(String name, boolean foldCase, boolean[] isempty, int[]
pdelim)
+ {
+ // values start with "=" or " = " etc.
+ // doesn't consume the ">" at end of start tag
+
+ int len = 0;
+ int start;
+ boolean seenGt = false;
+ boolean munge = true;
+ int c = 0;
+ int lastc, delim, quotewarning;
+ String value;
+
+ delim = 0;
+ pdelim[0] = '"';
+
+ // Henry Zrepa reports that some folk are using the embed element with script
attributes where newlines are
+ // significant and must be preserved
+
+ if (this.configuration.literalAttribs)
+ {
+ munge = false;
+ }
+
+ // skip white space before the '='
+ while (true)
+ {
+ c = this.in.readChar();
+
+ if (c == StreamIn.END_OF_STREAM)
+ {
+ this.in.ungetChar(c);
+ break;
+ }
+
+ if (!TidyUtils.isWhite((char) c))
+ {
+ break;
+ }
+ }
+
+ // c should be '=' if there is a value other legal possibilities are
white space, '/' and '>'
+
+ if (c != '=' && c != '"' && c !=
'\'')
+ {
+ this.in.ungetChar(c);
+ return null;
+ }
+
+ // skip white space after '='
+
+ while (true)
+ {
+ c = this.in.readChar();
+
+ if (c == StreamIn.END_OF_STREAM)
+ {
+ this.in.ungetChar(c);
+ break;
+ }
+
+ if (!TidyUtils.isWhite((char) c))
+ {
+ break;
+ }
+ }
+
+ // check for quote marks
+
+ if (c == '"' || c == '\'')
+ {
+ delim = c;
+ }
+ else if (c == '<')
+ {
+ start = this.lexsize;
+ addCharToLexer(c);
+ pdelim[0] = parseServerInstruction();
+ len = this.lexsize - start;
+ this.lexsize = start;
+ return (len > 0 ? TidyUtils.getString(this.lexbuf, start, len) : null);
+ }
+ else
+ {
+ this.in.ungetChar(c);
+ }
+
+ // and read the value string check for quote mark if needed
+
+ quotewarning = 0;
+ start = this.lexsize;
+ c = '\0';
+
+ while (true)
+ {
+ lastc = c; // track last character
+ c = this.in.readChar();
+
+ if (c == StreamIn.END_OF_STREAM)
+ {
+ report.attrError(this, this.token, null, Report.UNEXPECTED_END_OF_FILE);
+ this.in.ungetChar(c);
+ break;
+ }
+
+ if (delim == (char) 0)
+ {
+ if (c == '>')
+ {
+ this.in.ungetChar(c);
+ break;
+ }
+
+ if (c == '"' || c == '\'')
+ {
+ report.attrError(this, this.token, null,
Report.UNEXPECTED_QUOTEMARK);
+ break;
+ }
+
+ if (c == '<')
+ {
+ this.in.ungetChar(c); // fix for 433360
+ c = '>';
+ this.in.ungetChar(c);
+ report.attrError(this, this.token, null, Report.UNEXPECTED_GT);
+ break;
+ }
+
+ // For cases like <br clear=all/> need to avoid treating /> as
part of the attribute value, however
+ // care is needed to avoid so treating <a
href=http://www.acme.com
/> in this way, which would map the
+ // <a> tag to <a href="http://www.acme.com"/>
+
+ if (c == '/')
+ {
+ // peek ahead in case of />
+ c = this.in.readChar();
+
+ if (c == '>' &&
!AttributeTable.getDefaultAttributeTable().isUrl(name))
+ {
+ isempty[0] = true;
+ this.in.ungetChar(c);
+ break;
+ }
+
+ // unget peeked char
+ this.in.ungetChar(c);
+ c = '/';
+ }
+ }
+ else
+ {
+ // delim is '\'' or '"'
+ if (c == delim)
+ {
+ break;
+ }
+
+ // treat CRLF, CR and LF as single line break
+
+ if (c == '\r')
+ {
+ c = this.in.readChar();
+ if (c != '\n')
+ {
+ this.in.ungetChar(c);
+ }
+
+ c = '\n';
+ }
+
+ if (c == '\n' || c == '<' || c == '>')
+ {
+ ++quotewarning;
+ }
+
+ if (c == '>')
+ {
+ seenGt = true;
+ }
+ }
+
+ if (c == '&')
+ {
+ // no entities in ID attributes
+ if ("id".equalsIgnoreCase(name))
+ {
+ report.attrError(this, null, null, Report.ENTITY_IN_ID);
+ continue;
+ }
+
+ addCharToLexer(c);
+ parseEntity((short) 0);
+ continue;
+
+ }
+
+ // kludge for JavaScript attribute values with line continuations in string
literals
+
+ if (c == '\\')
+ {
+ c = this.in.readChar();
+
+ if (c != '\n')
+ {
+ this.in.ungetChar(c);
+ c = '\\';
+ }
+ }
+
+ if (TidyUtils.isWhite((char) c))
+ {
+ if (delim == (char) 0)
+ {
+ break;
+ }
+
+ if (munge)
+ {
+ // discard line breaks in quoted URLs
+ // #438650 - fix by Randy Waki
+ if (c == '\n' &&
AttributeTable.getDefaultAttributeTable().isUrl(name))
+ {
+ // warn that we discard this newline
+ report.attrError(this, this.token, null, Report.NEWLINE_IN_URI);
+ continue;
+ }
+
+ c = ' ';
+
+ if (lastc == ' ')
+ {
+ continue;
+ }
+ }
+ }
+ else if (foldCase && TidyUtils.isUpper((char) c))
+ {
+ c = TidyUtils.toLower((char) c);
+ }
+
+ addCharToLexer(c);
+ }
+
+ if (quotewarning > 10 && seenGt && munge)
+ {
+ // there is almost certainly a missing trailing quote mark as we have see too
many newlines, < or >
+ // characters. an exception is made for Javascript attributes and the
javascript URL scheme which may
+ // legitimately include < and >, and for attributes starting with
"<xml " as generated by Microsoft Office.
+
+ if (!AttributeTable.getDefaultAttributeTable().isScript(name)
+ && !(AttributeTable.getDefaultAttributeTable().isUrl(name)
&& "javascript:".equals(TidyUtils.getString(
+ this.lexbuf,
+ start,
+ 11)))
+ && !"<xml ".equals(TidyUtils.getString(this.lexbuf,
start, 5))) // #500236 - fix by Klaus Johannes Rusch
+ // 06 Jan 02
+ {
+ report.error(this, null, null, Report.SUSPECTED_MISSING_QUOTE);
+ }
+ }
+
+ len = this.lexsize - start;
+ this.lexsize = start;
+
+ if (len > 0 || delim != 0)
+ {
+ // ignore leading and trailing white space for all but title, alt, value and
prompts attributes unless
+ // --literal-attributes is set to yes
+ // #994841 - Whitespace is removed from value attributes
+
+ if (munge && !TidyUtils.isInValuesIgnoreCase(new
String[]{"alt", "title", "value", "prompt"},
name))
+ {
+ while (TidyUtils.isWhite((char) this.lexbuf[start + len - 1]))
+ {
+ --len;
+ }
+
+ while (TidyUtils.isWhite((char) this.lexbuf[start]) && start <
len)
+ {
+ ++start;
+ --len;
+ }
+ }
+
+ value = TidyUtils.getString(this.lexbuf, start, len);
+ }
+ else
+ {
+ value = null;
+ }
+
+ // note delimiter if given
+ if (delim != 0)
+ {
+ pdelim[0] = delim;
+ }
+ else
+ {
+ pdelim[0] = '"';
+ }
+
+ return value;
+ }
+
+ /**
+ * Check if attr is a valid name.
+ * @param attr String to check, must be non-null
+ * @return <code>true</code> if attr is a valid name.
+ */
+ public static boolean isValidAttrName(String attr)
+ {
+ char c;
+ int i;
+
+ // first character should be a letter
+ c = attr.charAt(0);
+
+ if (!TidyUtils.isLetter(c))
+ {
+ return false;
+ }
+
+ // remaining characters should be namechars
+ for (i = 1; i < attr.length(); i++)
+ {
+ c = attr.charAt(i);
+
+ if (TidyUtils.isNamechar(c))
+ {
+ continue;
+ }
+
+ return false;
+ }
+
+ return true;
+ }
+
+ /**
+ * In CSS1, selectors can contain only the characters A-Z, 0-9, and Unicode
characters 161-255, plus dash (-); they
+ * cannot start with a dash or a digit; they can also contain escaped characters and
any Unicode character as a
+ * numeric code (see next item). The backslash followed by at most four hexadecimal
digits (0..9A..F) stands for the
+ * Unicode character with that number. Any character except a hexadecimal digit can
be escaped to remove its special
+ * meaning, by putting a backslash in front.
+ * @param buf css selector name
+ * @return <code>true</code> if the given string is a valid css1 selector
name
+ */
+ public static boolean isCSS1Selector(String buf)
+ {
+ if (buf == null)
+ {
+ return false;
+ }
+
+ // #508936 - CSS class naming for -clean option
+ boolean valid = true;
+ int esclen = 0;
+ char c;
+ int pos;
+
+ for (pos = 0; valid && pos < buf.length(); ++pos)
+ {
+ c = buf.charAt(pos);
+ if (c == '\\')
+ {
+ esclen = 1; // ab\555\444 is 4 chars {'a', 'b', \555,
\444}
+ }
+ else if (Character.isDigit(c))
+ {
+ // Digit not 1st, unless escaped (Max length "\112F")
+ if (esclen > 0)
+ {
+ valid = (++esclen < 6);
+ }
+ if (valid)
+ {
+ valid = (pos > 0 || esclen > 0);
+ }
+ }
+ else
+ {
+ valid = (esclen > 0 // Escaped? Anything goes.
+ || (pos > 0 && c == '-') // Dash cannot be 1st
char
+ || Character.isLetter(c) // a-z, A-Z anywhere
+ || (c >= 161 && c <= 255)); // Unicode 161-255 anywhere
+ esclen = 0;
+ }
+ }
+ return valid;
+ }
+
+ /**
+ * Parse tag attributes.
+ * @param isempty is tag empty?
+ * @return parsed attribute/value list
+ */
+ public AttVal parseAttrs(boolean[] isempty)
+ {
+ AttVal av, list;
+ String attribute, value;
+ int[] delim = new int[1];
+ Node[] asp = new Node[1];
+ Node[] php = new Node[1];
+
+ list = null;
+
+ while (!endOfInput())
+ {
+ attribute = parseAttribute(isempty, asp, php);
+
+ if (attribute == null)
+ {
+ // check if attributes are created by ASP markup
+ if (asp[0] != null)
+ {
+ av = new AttVal(list, null, asp[0], null, '\0', null, null);
+ list = av;
+ continue;
+ }
+
+ // check if attributes are created by PHP markup
+ if (php[0] != null)
+ {
+ av = new AttVal(list, null, null, php[0], '\0', null, null);
+ list = av;
+ continue;
+ }
+
+ break;
+ }
+
+ value = parseValue(attribute, false, isempty, delim);
+
+ if (attribute != null && isValidAttrName(attribute))
+ {
+ av = new AttVal(list, null, null, null, delim[0], attribute, value);
+ av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av);
+ list = av;
+ }
+ else
+ {
+ av = new AttVal(null, null, null, null, 0, attribute, value);
+
+ // #427664 - fix by Gary Peskin 04 Aug 00; other fixes by Dave Raggett
+ if (value != null)
+ {
+ report.attrError(this, this.token, av, Report.BAD_ATTRIBUTE_VALUE);
+ }
+ else if (TidyUtils.lastChar(attribute) == '"')
+ {
+ report.attrError(this, this.token, av, Report.MISSING_QUOTEMARK);
+ }
+ else
+ {
+ report.attrError(this, this.token, av, Report.UNKNOWN_ATTRIBUTE);
+ }
+ }
+ }
+
+ return list;
+ }
+
+ /**
+ * Push a copy of an inline node onto stack but don't push if implicit or OBJECT
or APPLET (implicit tags are ones
+ * generated from the istack) One issue arises with pushing inlines when the tag is
already pushed. For instance:
+ * <code><p><em> text <p><em> more
text</code> Shouldn't be mapped to
+ * <code><p><em> text
</em></p><p><em><em> more text
</em></em></code>
+ * @param node Node to be pushed
+ */
+ public void pushInline(Node node)
+ {
+ IStack is;
+
+ if (node.implicit)
+ {
+ return;
+ }
+
+ if (node.tag == null)
+ {
+ return;
+ }
+
+ if (!TidyUtils.toBoolean(node.tag.model & Dict.CM_INLINE))
+ {
+ return;
+ }
+
+ if (TidyUtils.toBoolean(node.tag.model & Dict.CM_OBJECT))
+ {
+ return;
+ }
+
+ if (node.tag != this.configuration.tt.tagFont && isPushed(node))
+ {
+ return;
+ }
+
+ // make sure there is enough space for the stack
+ is = new IStack();
+ is.tag = node.tag;
+ is.element = node.element;
+ if (node.attributes != null)
+ {
+ is.attributes = cloneAttributes(node.attributes);
+ }
+ this.istack.push(is);
+ }
+
+ /**
+ * Pop a copy of an inline node from the stack.
+ * @param node Node to be popped
+ */
+ public void popInline(Node node)
+ {
+ IStack is;
+
+ if (node != null)
+ {
+
+ if (node.tag == null)
+ {
+ return;
+ }
+
+ if (!TidyUtils.toBoolean(node.tag.model & Dict.CM_INLINE))
+ {
+ return;
+ }
+
+ if (TidyUtils.toBoolean(node.tag.model & Dict.CM_OBJECT))
+ {
+ return;
+ }
+
+ // if node is </a> then pop until we find an <a>
+ if (node.tag == this.configuration.tt.tagA)
+ {
+
+ while (this.istack.size() > 0)
+ {
+ is = (IStack) this.istack.pop();
+ if (is.tag == this.configuration.tt.tagA)
+ {
+ break;
+ }
+ }
+
+ if (this.insert >= this.istack.size())
+ {
+ this.insert = -1;
+ }
+ return;
+ }
+ }
+
+ if (this.istack.size() > 0)
+ {
+ is = (IStack) this.istack.pop();
+ if (this.insert >= this.istack.size())
+ {
+ this.insert = -1;
+ }
+ }
+ }
+
+ /**
+ * Is the node in the stack?
+ * @param node Node
+ * @return <code>true</code> is the node is found in the stack
+ */
+ public boolean isPushed(Node node)
+ {
+ int i;
+ IStack is;
+
+ for (i = this.istack.size() - 1; i >= 0; --i)
+ {
+ is = (IStack) this.istack.elementAt(i);
+ if (is.tag == node.tag)
+ {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * This has the effect of inserting "missing" inline elements around the
contents of blocklevel elements such as P,
+ * TD, TH, DIV, PRE etc. This procedure is called at the start of ParseBlock. When
the inline stack is not empty, as
+ * will be the case in: <code><i><h1>italic
heading</h1></i></code> which is then treated as
+ * equivalent to <code><h1><i>italic
heading</i></h1></code> This is implemented by setting the
lexer
+ * into a mode where it gets tokens from the inline stack rather than from the input
stream.
+ * @param node original node
+ * @return stack size
+ */
+ public int inlineDup(Node node)
+ {
+ int n;
+
+ n = this.istack.size() - this.istackbase;
+ if (n > 0)
+ {
+ this.insert = this.istackbase;
+ this.inode = node;
+ }
+
+ return n;
+ }
+
+ /**
+ * @return
+ */
+ public Node insertedToken()
+ {
+ Node node;
+ IStack is;
+ int n;
+
+ // this will only be null if inode != null
+ if (this.insert == -1)
+ {
+ node = this.inode;
+ this.inode = null;
+ return node;
+ }
+
+ // is this is the "latest" node then update the position, otherwise use
current values
+ if (this.inode == null)
+ {
+ this.lines = this.in.getCurline();
+ this.columns = this.in.getCurcol();
+ }
+
+ node = newNode(Node.START_TAG, this.lexbuf, this.txtstart, this.txtend);
+
+ // GLP: Bugfix 126261. Remove when this change is fixed in istack.c in the
original Tidy
+ node.implicit = true;
+ is = (IStack) this.istack.elementAt(this.insert);
+ node.element = is.element;
+ node.tag = is.tag;
+ if (is.attributes != null)
+ {
+ node.attributes = cloneAttributes(is.attributes);
+ }
+
+ // advance lexer to next item on the stack
+ n = this.insert;
+
+ // and recover state if we have reached the end
+ if (++n < this.istack.size())
+ {
+ this.insert = n;
+ }
+ else
+ {
+ this.insert = -1;
+ }
+
+ return node;
+ }
+
+ /**
+ * Can the given element be removed?
+ * @param element node
+ * @return <code>true</code> if he element can be removed
+ */
+ public boolean canPrune(Node element)
+ {
+ if (element.type == Node.TEXT_NODE)
+ {
+ return true;
+ }
+
+ if (element.content != null)
+ {
+ return false;
+ }
+
+ if (element.tag == this.configuration.tt.tagA && element.attributes !=
null)
+ {
+ return false;
+ }
+
+ if (element.tag == this.configuration.tt.tagP &&
!this.configuration.dropEmptyParas)
+ {
+ return false;
+ }
+
+ if (element.tag == null)
+ {
+ return false;
+ }
+
+ if (TidyUtils.toBoolean(element.tag.model & Dict.CM_ROW))
+ {
+ return false;
+ }
+
+ if (TidyUtils.toBoolean(element.tag.model & Dict.CM_EMPTY))
+ {
+ return false;
+ }
+
+ if (element.tag == this.configuration.tt.tagApplet)
+ {
+ return false;
+ }
+
+ if (element.tag == this.configuration.tt.tagObject)
+ {
+ return false;
+ }
+
+ if (element.tag == this.configuration.tt.tagScript &&
element.getAttrByName("src") != null)
+ {
+ return false;
+ }
+
+ // #540555 Empty title tag is trimmed
+ if (element.tag == this.configuration.tt.tagTitle)
+ {
+ return false;
+ }
+
+ // #433359 - fix by Randy Waki 12 Mar 01 - Empty iframe is trimmed
+ if (element.tag == this.configuration.tt.tagIframe)
+ {
+ return false;
+ }
+
+ if (element.getAttrByName("id") != null ||
element.getAttrByName("name") != null)
+ {
+ return false;
+ }
+
+ return true;
+ }
+
+ /**
+ * duplicate name attribute as an id and check if id and name match.
+ * @param node Node to check for name/it attributes
+ */
+ public void fixId(Node node)
+ {
+ AttVal name = node.getAttrByName("name");
+ AttVal id = node.getAttrByName("id");
+
+ if (name != null)
+ {
+ if (id != null)
+ {
+ if (id.value != null && !id.value.equals(name.value))
+ {
+ report.attrError(this, node, name, Report.ID_NAME_MISMATCH);
+ }
+ }
+ else if (this.configuration.xmlOut)
+ {
+ node.addAttribute("id", name.value);
+ }
+ }
+ }
+
+ /**
+ * Defer duplicates when entering a table or other element where the inlines
shouldn't be duplicated.
+ */
+ public void deferDup()
+ {
+ this.insert = -1;
+ this.inode = null;
+ }
+
+ /**
+ * Find last inserted element for put properly place to error message.
+ * @return
+ */
+ public Node getLastNode() {
+ Node last = root;
+ while(last.last != null) {
+ last=last.last;
+ }
+ return last;
+ }
+
+ /**
+ * Constraint the html version in the document to the given one. Everything is
allowed in proprietary version of
+ * HTML this is handled here rather than in the tag/attr dicts.
+ * @param vers html version code
+ */
+ void constrainVersion(int vers)
+ {
+ this.versions &= (vers | Dict.VERS_PROPRIETARY);
+ }
+
+ /**
+ * Is content acceptable for pre elements?
+ * @param node content
+ * @return <code>true</code> if node is acceptable in pre elements
+ */
+ protected boolean preContent(Node node)
+ {
+ // p is coerced to br's
+ if (node.tag == this.configuration.tt.tagP)
+ {
+ return true;
+ }
+
+ if (node.tag == null
+ || node.tag == this.configuration.tt.tagP
+ || !TidyUtils.toBoolean(node.tag.model & (Dict.CM_INLINE |
Dict.CM_NEW)))
+ {
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * document type.
+ */
+ private static class W3CVersionInfo
+ {
+
+ /**
+ * name.
+ */
+ String name;
+
+ /**
+ * voyager name.
+ */
+ String voyagerName;
+
+ /**
+ * profile.
+ */
+ String profile;
+
+ /**
+ * code.
+ */
+ short code;
+
+ /**
+ * Instantiates a new W3CVersionInfo.
+ * @param name version name
+ * @param voyagerName voyager (xhtml) name
+ * @param profile VOYAGER_STRICT | VOYAGER_LOOSE | VOYAGER_FRAMESET
+ * @param code unique code for this version info
+ */
+ public W3CVersionInfo(String name, String voyagerName, String profile, short
code)
+ {
+ this.name = name;
+ this.voyagerName = voyagerName;
+ this.profile = profile;
+ this.code = code;
+ }
+ }
+
+}
\ No newline at end of file
Added: branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Node.java
===================================================================
--- branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Node.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Node.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,1597 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+/**
+ * Used for elements and text nodes element name is null for text nodes start and end are
offsets into lexbuf which
+ * contains the textual content of all elements in the parse tree. Parent and content
allow traversal of the parse tree
+ * in any direction. attributes are represented as a linked list of AttVal nodes which
hold the strings for
+ * attribute/value pairs.
+ * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org
</a>
+ * @author Andy Quick <a
href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a>
(translation to Java)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public class Node implements Cloneable
+{
+
+ /**
+ * node type: root.
+ */
+ public static final short ROOT_NODE = 0;
+
+ /**
+ * node type: doctype.
+ */
+ public static final short DOCTYPE_TAG = 1;
+
+ /**
+ * node type: comment.
+ */
+ public static final short COMMENT_TAG = 2;
+
+ /**
+ * node type: .
+ */
+ public static final short PROC_INS_TAG = 3;
+
+ /**
+ * node type: text.
+ */
+ public static final short TEXT_NODE = 4;
+
+ /**
+ * Start tag.
+ */
+ public static final short START_TAG = 5;
+
+ /**
+ * End tag.
+ */
+ public static final short END_TAG = 6;
+
+ /**
+ * Start of an end tag.
+ */
+ public static final short START_END_TAG = 7;
+
+ /**
+ * node type: CDATA.
+ */
+ public static final short CDATA_TAG = 8;
+
+ /**
+ * node type: section tag.
+ */
+ public static final short SECTION_TAG = 9;
+
+ /**
+ * node type: asp tag.
+ */
+ public static final short ASP_TAG = 10;
+
+ /**
+ * node type: jste tag.
+ */
+ public static final short JSTE_TAG = 11;
+
+ /**
+ * node type: php tag.
+ */
+ public static final short PHP_TAG = 12;
+
+ /**
+ * node type: doctype.
+ */
+ public static final short XML_DECL = 13;
+
+ /**
+ * node type: doctype.
+ */
+ public static final short CDATA_TEXT = 14;
+
+ /**
+ * Description for all the node types. Used in toString.
+ */
+ private static final String[] NODETYPE_STRING = {
+ "RootNode",
+ "DocTypeTag",
+ "CommentTag",
+ "ProcInsTag",
+ "TextNode",
+ "StartTag",
+ "EndTag",
+ "StartEndTag",
+ "SectionTag",
+ "AspTag",
+ "PhpTag",
+ "XmlDecl",
+ "CDATAText"};
+
+ /**
+ * parent node.
+ */
+ protected Node parent;
+
+ /**
+ * pevious node.
+ */
+ protected Node prev;
+
+ /**
+ * next node.
+ */
+ protected Node next;
+
+ /**
+ * last node.
+ */
+ protected Node last;
+
+ /**
+ * start of span onto text array.
+ */
+ protected int start;
+
+ /**
+ * end of span onto text array.
+ */
+ protected int end;
+
+ /**
+ * the text array.
+ */
+ protected byte[] textarray;
+
+ /**
+ * TextNode, StartTag, EndTag etc.
+ */
+ protected short type;
+
+ /**
+ * true if closed by explicit end tag.
+ */
+ protected boolean closed;
+
+ /**
+ * true if inferred.
+ */
+ protected boolean implicit;
+
+ /**
+ * true if followed by a line break.
+ */
+ protected boolean linebreak;
+
+ /**
+ * old tag when it was changed.
+ */
+ protected Dict was;
+
+ /**
+ * tag's dictionary definition.
+ */
+ protected Dict tag;
+
+ /**
+ * Tag name.
+ */
+ protected String element;
+
+ /**
+ * Attribute/Value linked list.
+ */
+ protected AttVal attributes;
+
+ /**
+ * Contained node.
+ */
+ protected Node content;
+
+ /**
+ * DOM adapter.
+ */
+ protected org.w3c.dom.Node adapter;
+
+ /**
+ * Instantiates a new text node.
+ */
+ public Node()
+ {
+ this(TEXT_NODE, null, 0, 0);
+ }
+
+ /**
+ * Instantiates a new node.
+ * @param type node type: Node.ROOT_NODE | Node.DOCTYPE_TAG | Node.COMMENT_TAG |
Node.PROC_INS_TAG | Node.TEXT_NODE |
+ * Node.START_TAG | Node.END_TAG | Node.START_END_TAG | Node.CDATA_TAG |
Node.SECTION_TAG | Node. ASP_TAG |
+ * Node.JSTE_TAG | Node.PHP_TAG | Node.XML_DECL
+ * @param textarray array of bytes contained in the Node
+ * @param start start position
+ * @param end end position
+ */
+ public Node(short type, byte[] textarray, int start, int end)
+ {
+ this.parent = null;
+ this.prev = null;
+ this.next = null;
+ this.last = null;
+ this.start = start;
+ this.end = end;
+ this.textarray = textarray;
+ this.type = type;
+ this.closed = false;
+ this.implicit = false;
+ this.linebreak = false;
+ this.was = null;
+ this.tag = null;
+ this.element = null;
+ this.attributes = null;
+ this.content = null;
+ }
+
+ /**
+ * Instantiates a new node.
+ * @param type node type: Node.ROOT_NODE | Node.DOCTYPE_TAG | Node.COMMENT_TAG |
Node.PROC_INS_TAG | Node.TEXT_NODE |
+ * Node.START_TAG | Node.END_TAG | Node.START_END_TAG | Node.CDATA_TAG |
Node.SECTION_TAG | Node. ASP_TAG |
+ * Node.JSTE_TAG | Node.PHP_TAG | Node.XML_DECL
+ * @param textarray array of bytes contained in the Node
+ * @param start start position
+ * @param end end position
+ * @param element tag name
+ * @param tt tag table instance
+ */
+ public Node(short type, byte[] textarray, int start, int end, String element,
TagTable tt)
+ {
+ this.parent = null;
+ this.prev = null;
+ this.next = null;
+ this.last = null;
+ this.start = start;
+ this.end = end;
+ this.textarray = textarray;
+ this.type = type;
+ this.closed = false;
+ this.implicit = false;
+ this.linebreak = false;
+ this.was = null;
+ this.tag = null;
+ this.element = element;
+ this.attributes = null;
+ this.content = null;
+ if (type == START_TAG || type == START_END_TAG || type == END_TAG)
+ {
+ tt.findTag(this);
+ }
+ }
+
+ /**
+ * Used to clone heading nodes when split by an hr.
+ * @see java.lang.Object#clone()
+ */
+ protected Object clone()
+ {
+ Node node;
+ try
+ {
+ node = (Node) super.clone();
+ }
+ catch (CloneNotSupportedException e)
+ {
+ // should never happen
+ throw new RuntimeException("CloneNotSupportedException " +
e.getMessage());
+ }
+ if (this.textarray != null)
+ {
+ node.textarray = new byte[this.end - this.start];
+ node.start = 0;
+ node.end = this.end - this.start;
+ if (node.end > 0)
+ {
+ System.arraycopy(this.textarray, this.start, node.textarray, node.start,
node.end);
+ }
+ }
+ if (this.attributes != null)
+ {
+ node.attributes = (AttVal) this.attributes.clone();
+ }
+ return node;
+ }
+
+ /**
+ * Returns an attribute with the given name in the current node.
+ * @param name attribute name.
+ * @return AttVal instance or null if no attribute with the iven name is found
+ */
+ public AttVal getAttrByName(String name)
+ {
+ AttVal attr;
+
+ for (attr = this.attributes; attr != null; attr = attr.next)
+ {
+ if (name != null && attr.attribute != null &&
attr.attribute.equals(name))
+ {
+ break;
+ }
+ }
+
+ return attr;
+ }
+
+ /**
+ * Default method for checking an element's attributes.
+ * @param lexer Lexer
+ */
+ public void checkAttributes(Lexer lexer)
+ {
+ AttVal attval;
+
+ for (attval = this.attributes; attval != null; attval = attval.next)
+ {
+ attval.checkAttribute(lexer, this);
+ }
+ }
+
+ /**
+ * The same attribute name can't be used more than once in each element. Discard
or join attributes according to
+ * configuration.
+ * @param lexer Lexer
+ */
+ public void repairDuplicateAttributes(Lexer lexer)
+ {
+ AttVal attval;
+
+ for (attval = this.attributes; attval != null;)
+ {
+ if (attval.asp == null && attval.php == null)
+ {
+ AttVal current;
+
+ for (current = attval.next; current != null;)
+ {
+ if (current.asp == null
+ && current.php == null
+ && attval.attribute != null
+ && attval.attribute.equalsIgnoreCase(current.attribute))
+ {
+ AttVal temp;
+
+ if ("class".equalsIgnoreCase(current.attribute)
&& lexer.configuration.joinClasses)
+ {
+ // concatenate classes
+ current.value = current.value + " " +
attval.value;
+
+ temp = attval.next;
+
+ if (temp.next == null)
+ {
+ current = null;
+ }
+ else
+ {
+ current = current.next;
+ }
+
+ lexer.report.attrError(lexer, this, attval,
Report.JOINING_ATTRIBUTE);
+
+ removeAttribute(attval);
+ attval = temp;
+ }
+ else if ("style".equalsIgnoreCase(current.attribute)
&& lexer.configuration.joinStyles)
+ {
+ // concatenate styles
+
+ // this doesn't handle CSS comments and leading/trailing
white-space very well see
+ //
http://www.w3.org/TR/css-style-attr
+
+ int end = current.value.length() - 1;
+
+ if (current.value.charAt(end) == ';')
+ {
+ // attribute ends with declaration seperator
+ current.value = current.value + " " +
attval.value;
+ }
+ else if (current.value.charAt(end) == '}')
+ {
+ // attribute ends with rule set
+ current.value = current.value + " { " +
attval.value + " }";
+ }
+ else
+ {
+ // attribute ends with property value
+ current.value = current.value + "; " +
attval.value;
+ }
+
+ temp = attval.next;
+
+ if (temp.next == null)
+ {
+ current = null;
+ }
+ else
+ {
+ current = current.next;
+ }
+
+ lexer.report.attrError(lexer, this, attval,
Report.JOINING_ATTRIBUTE);
+
+ removeAttribute(attval);
+ attval = temp;
+
+ }
+ else if (lexer.configuration.duplicateAttrs ==
Configuration.KEEP_LAST)
+ {
+ temp = current.next;
+
+ lexer.report.attrError(lexer, this, current,
Report.REPEATED_ATTRIBUTE);
+
+ removeAttribute(current);
+ current = temp;
+ }
+ else
+ {
+ temp = attval.next;
+
+ if (attval.next == null)
+ {
+ current = null;
+ }
+ else
+ {
+ current = current.next;
+ }
+
+ lexer.report.attrError(lexer, this, attval,
Report.REPEATED_ATTRIBUTE);
+
+ removeAttribute(attval);
+ attval = temp;
+ }
+ }
+ else
+ {
+ current = current.next;
+ }
+ }
+ attval = attval.next;
+ }
+ else
+ {
+ attval = attval.next;
+ }
+ }
+ }
+
+ /**
+ * Adds an attribute to the node.
+ * @param name attribute name
+ * @param value attribute value
+ */
+ public void addAttribute(String name, String value)
+ {
+ AttVal av = new AttVal(null, null, null, null, '"', name, value);
+ av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av);
+
+ if (this.attributes == null)
+ {
+ this.attributes = av;
+ }
+ else
+ {
+ // append to end of attributes
+ AttVal here = this.attributes;
+
+ while (here.next != null)
+ {
+ here = here.next;
+ }
+
+ here.next = av;
+ }
+ }
+
+ /**
+ * Remove an attribute from node and then free it.
+ * @param attr attribute to remove
+ */
+ public void removeAttribute(AttVal attr)
+ {
+ AttVal av;
+ AttVal prev = null;
+ AttVal next;
+
+ for (av = this.attributes; av != null; av = next)
+ {
+ next = av.next;
+
+ if (av == attr)
+ {
+ if (prev != null)
+ {
+ prev.next = next;
+ }
+ else
+ {
+ this.attributes = next;
+ }
+ }
+ else
+ {
+ prev = av;
+ }
+ }
+ }
+
+ /**
+ * Find the doctype element.
+ * @return doctype node or null if not found
+ */
+ public Node findDocType()
+ {
+ Node node = this.content;
+
+ while (node != null && node.type != DOCTYPE_TAG)
+ {
+ node = node.next;
+ }
+
+ return node;
+ }
+
+ /**
+ * Discard the doctype node.
+ */
+ public void discardDocType()
+ {
+ Node node;
+
+ node = findDocType();
+ if (node != null)
+ {
+ if (node.prev != null)
+ {
+ node.prev.next = node.next;
+ }
+ else
+ {
+ node.parent.content = node.next;
+ }
+
+ if (node.next != null)
+ {
+ node.next.prev = node.prev;
+ }
+
+ node.next = null;
+ }
+ }
+
+ /**
+ * Remove node from markup tree and discard it.
+ * @param element discarded node
+ * @return next node
+ */
+ public static Node discardElement(Node element)
+ {
+ Node next = null;
+
+ if (element != null)
+ {
+ next = element.next;
+ element.removeNode();
+ }
+
+ return next;
+ }
+
+ /**
+ * Insert a node into markup tree.
+ * @param node to insert
+ */
+ public void insertNodeAtStart(Node node)
+ {
+ node.parent = this;
+
+ if (this.content == null)
+ {
+ this.last = node;
+ }
+ else
+ {
+ this.content.prev = node; // AQ added 13 Apr 2000
+ }
+
+ node.next = this.content;
+ node.prev = null;
+ this.content = node;
+ }
+
+ /**
+ * Insert node into markup tree.
+ * @param node Node to insert
+ */
+ public void insertNodeAtEnd(Node node)
+ {
+ node.parent = this;
+ node.prev = this.last;
+
+ if (this.last != null)
+ {
+ this.last.next = node;
+ }
+ else
+ {
+ this.content = node;
+ }
+
+ this.last = node;
+ }
+
+ /**
+ * Insert node into markup tree in pace of element which is moved to become the child
of the node.
+ * @param element child node. Will be inserted as a child of element
+ * @param node parent node
+ */
+ public static void insertNodeAsParent(Node element, Node node)
+ {
+ node.content = element;
+ node.last = element;
+ node.parent = element.parent;
+ element.parent = node;
+
+ if (node.parent.content == element)
+ {
+ node.parent.content = node;
+ }
+
+ if (node.parent.last == element)
+ {
+ node.parent.last = node;
+ }
+
+ node.prev = element.prev;
+ element.prev = null;
+
+ if (node.prev != null)
+ {
+ node.prev.next = node;
+ }
+
+ node.next = element.next;
+ element.next = null;
+
+ if (node.next != null)
+ {
+ node.next.prev = node;
+ }
+ }
+
+ /**
+ * Insert node into markup tree before element.
+ * @param element child node. Will be insertedbefore element
+ * @param node following node
+ */
+ public static void insertNodeBeforeElement(Node element, Node node)
+ {
+ Node parent;
+
+ parent = element.parent;
+ node.parent = parent;
+ node.next = element;
+ node.prev = element.prev;
+ element.prev = node;
+
+ if (node.prev != null)
+ {
+ node.prev.next = node;
+ }
+
+ if (parent != null && parent.content == element)
+ {
+ parent.content = node;
+ }
+ }
+
+ /**
+ * Insert node into markup tree after element.
+ * @param node new node to insert
+ */
+ public void insertNodeAfterElement(Node node)
+ {
+ Node parent;
+
+ parent = this.parent;
+ node.parent = parent;
+
+ // AQ - 13Jan2000 fix for parent == null
+ if (parent != null && parent.last == this)
+ {
+ parent.last = node;
+ }
+ else
+ {
+ node.next = this.next;
+ // AQ - 13Jan2000 fix for node.next == null
+ if (node.next != null)
+ {
+ node.next.prev = node;
+ }
+ }
+
+ this.next = node;
+ node.prev = this;
+ }
+
+ /**
+ * Trim an empty element.
+ * @param lexer Lexer
+ * @param element empty node to be removed
+ */
+ public static void trimEmptyElement(Lexer lexer, Node element)
+ {
+ // don't trim if user explicitely set trim-empty-elements to false
+ // empty element can be needed in css sites
+ if (lexer.configuration.trimEmpty)
+ {
+ TagTable tt = lexer.configuration.tt;
+
+ if (lexer.canPrune(element))
+ {
+ if (element.type != TEXT_NODE)
+ {
+ lexer.report.warning(lexer, element, null,
Report.TRIM_EMPTY_ELEMENT);
+ }
+
+ discardElement(element);
+ }
+ else if (element.tag == tt.tagP && element.content == null)
+ {
+ // replace <p></p> by <br><br> to preserve
formatting
+ Node node = lexer.inferredTag("br");
+ Node.coerceNode(lexer, element, tt.tagBr);
+ element.insertNodeAfterElement(node);
+ }
+ }
+ }
+
+ /**
+ * This maps <em> hello </em> <strong>world </strong> to
<em> hello </em> <strong>world </strong>. If last child of
+ * element is a text node then trim trailing white space character moving it to after
element's end tag.
+ * @param lexer Lexer
+ * @param element node
+ * @param last last child of element
+ */
+ public static void trimTrailingSpace(Lexer lexer, Node element, Node last)
+ {
+ byte c;
+ TagTable tt = lexer.configuration.tt;
+
+ if (last != null && last.type == Node.TEXT_NODE)
+ {
+ if (last.end > last.start)
+
+ {
+ c = lexer.lexbuf[last.end - 1];
+
+ if (c == 160 || c == (byte) ' ')
+ {
+ // take care with <td> </td>
+ // fix for [435920]
+ if (c == 160 && (element.tag == tt.tagTd || element.tag ==
tt.tagTh))
+ {
+ if (last.end > last.start + 1)
+ {
+ last.end -= 1;
+ }
+ }
+ else
+ {
+ last.end -= 1;
+
+ if (TidyUtils.toBoolean(element.tag.model & Dict.CM_INLINE)
+ && !TidyUtils.toBoolean(element.tag.model &
Dict.CM_FIELD))
+ {
+ lexer.insertspace = true;
+ }
+ }
+ }
+ }
+ // if empty string then delete from parse tree
+ if (last.start == last.end) // COMMENT_NBSP_FIX: && tag != tag_td
&& tag != tag_th
+ {
+ trimEmptyElement(lexer, last);
+ }
+ }
+ }
+
+ /**
+ * Escapes the given tag.
+ * @param lexer Lexer
+ * @param element node to be escaped
+ * @return escaped node
+ */
+ protected static Node escapeTag(Lexer lexer, Node element)
+ {
+ Node node = lexer.newNode();
+ node.start = lexer.lexsize;
+ node.textarray = element.textarray; // @todo check it
+ lexer.addByte('<');
+
+ if (element.type == END_TAG)
+ {
+ lexer.addByte('/');
+ }
+
+ if (element.element != null)
+ {
+ lexer.addStringLiteral(element.element);
+ }
+ else if (element.type == DOCTYPE_TAG)
+ {
+ int i;
+
+ lexer.addByte('!');
+ lexer.addByte('D');
+ lexer.addByte('O');
+ lexer.addByte('C');
+ lexer.addByte('T');
+ lexer.addByte('Y');
+ lexer.addByte('P');
+ lexer.addByte('E');
+ lexer.addByte(' ');
+
+ for (i = element.start; i < element.end; ++i)
+ {
+ lexer.addByte(lexer.lexbuf[i]);
+ }
+ }
+
+ if (element.type == START_END_TAG)
+ {
+ lexer.addByte('/');
+ }
+
+ lexer.addByte('>');
+ node.end = lexer.lexsize;
+
+ return node;
+ }
+
+ /**
+ * Is the node content empty or blank? Assumes node is a text node.
+ * @param lexer Lexer
+ * @return <code>true</code> if the node content empty or blank
+ */
+ public boolean isBlank(Lexer lexer)
+ {
+ if (this.type == TEXT_NODE)
+ {
+ if (this.end == this.start)
+ {
+ return true;
+ }
+ if (this.end == this.start + 1 && lexer.lexbuf[this.end - 1] == '
')
+ {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * This maps <code><p> hello <em> world
</em></code> to <code><p> hello <em> world
</em></code>.
+ * Trims initial space, by moving it before the start tag, or if this element is the
first in parent's content, then
+ * by discarding the space.
+ * @param lexer Lexer
+ * @param element parent node
+ * @param text text node
+ */
+ public static void trimInitialSpace(Lexer lexer, Node element, Node text)
+ {
+ Node prev, node;
+
+ // #427677 - fix by Gary Peskin 31 Oct 00
+ if (text.type == TEXT_NODE && text.textarray[text.start] == (byte) '
' && (text.start < text.end))
+ {
+ if (TidyUtils.toBoolean(element.tag.model & Dict.CM_INLINE)
+ && !TidyUtils.toBoolean(element.tag.model & Dict.CM_FIELD)
+ && element.parent.content != element)
+ {
+ prev = element.prev;
+
+ if (prev != null && prev.type == TEXT_NODE)
+ {
+ if (prev.textarray[prev.end - 1] != (byte) ' ')
+ {
+ prev.textarray[prev.end++] = (byte) ' ';
+ }
+
+ ++element.start;
+ }
+ else
+ {
+ // create new node
+ node = lexer.newNode();
+ // Local fix for bug 228486 (GLP). This handles the case
+ // where we need to create a preceeding text node but there are
+ // no "slots" in textarray that we can steal from the
current
+ // element. Therefore, we create a new textarray containing
+ // just the blank. When Tidy is fixed, this should be removed.
+ if (element.start >= element.end)
+ {
+ node.start = 0;
+ node.end = 1;
+ node.textarray = new byte[1];
+ }
+ else
+ {
+ node.start = element.start++;
+ node.end = element.start;
+ node.textarray = element.textarray;
+ }
+ node.textarray[node.start] = (byte) ' ';
+ node.prev = prev;
+ if (prev != null)
+ {
+ prev.next = node;
+ }
+ node.next = element;
+ element.prev = node;
+ node.parent = element.parent;
+ }
+ }
+
+ // discard the space in current node
+ ++text.start;
+ }
+ }
+
+ /**
+ * Move initial and trailing space out. This routine maps: hello <em> world
</em> to hello <em> world </em> and
+ * <em> hello </em> <strong>world </strong> to <em>
hello </em> <strong>world </strong>.
+ * @param lexer Lexer
+ * @param element Node
+ */
+ public static void trimSpaces(Lexer lexer, Node element)
+ {
+ Node text = element.content;
+ TagTable tt = lexer.configuration.tt;
+
+ if (text != null && text.type == Node.TEXT_NODE && element.tag !=
tt.tagPre)
+ {
+ trimInitialSpace(lexer, element, text);
+ }
+
+ text = element.last;
+
+ if (text != null && text.type == Node.TEXT_NODE)
+ {
+ trimTrailingSpace(lexer, element, text);
+ }
+ }
+
+ /**
+ * Is this node contained in a given tag?
+ * @param tag descendant tag
+ * @return <code>true</code> if node is contained in tag
+ */
+ public boolean isDescendantOf(Dict tag)
+ {
+ Node parent;
+
+ for (parent = this.parent; parent != null; parent = parent.parent)
+ {
+ if (parent.tag == tag)
+ {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * The doctype has been found after other tags, and needs moving to before the html
element.
+ * @param lexer Lexer
+ * @param element document
+ * @param doctype doctype node to insert at the beginning of element
+ */
+ public static void insertDocType(Lexer lexer, Node element, Node doctype)
+ {
+ TagTable tt = lexer.configuration.tt;
+
+ lexer.report.warning(lexer, element, doctype, Report.DOCTYPE_AFTER_TAGS);
+
+ while (element.tag != tt.tagHtml)
+ {
+ element = element.parent;
+ }
+
+ insertNodeBeforeElement(element, doctype);
+ }
+
+ /**
+ * Find the body node.
+ * @param tt tag table
+ * @return body node
+ */
+ public Node findBody(TagTable tt)
+ {
+ Node node;
+
+ node = this.content;
+
+ while (node != null && node.tag != tt.tagHtml)
+ {
+ node = node.next;
+ }
+
+ if (node == null)
+ {
+ return null;
+ }
+
+ node = node.content;
+
+ while (node != null && node.tag != tt.tagBody && node.tag !=
tt.tagFrameset)
+ {
+ node = node.next;
+ }
+
+ if (node.tag == tt.tagFrameset)
+ {
+ node = node.content;
+
+ while (node != null && node.tag != tt.tagNoframes)
+ {
+ node = node.next;
+ }
+
+ if (node != null)
+ {
+ node = node.content;
+ while (node != null && node.tag != tt.tagBody)
+ {
+ node = node.next;
+ }
+ }
+ }
+
+ return node;
+ }
+
+ /**
+ * Is the node an element?
+ * @return <code>true</code> if type is START_TAG | START_END_TAG
+ */
+ public boolean isElement()
+ {
+ return (this.type == START_TAG || this.type == START_END_TAG ? true : false);
+ }
+
+ /**
+ * Unexpected content in table row is moved to just before the table in accordance
with Netscape and IE. This code
+ * assumes that node hasn't been inserted into the row.
+ * @param row Row node
+ * @param node Node which should be moved before the table
+ * @param tt tag table
+ */
+ public static void moveBeforeTable(Node row, Node node, TagTable tt)
+ {
+ Node table;
+
+ /* first find the table element */
+ for (table = row.parent; table != null; table = table.parent)
+ {
+ if (table.tag == tt.tagTable)
+ {
+ if (table.parent.content == table)
+ {
+ table.parent.content = node;
+ }
+
+ node.prev = table.prev;
+ node.next = table;
+ table.prev = node;
+ node.parent = table.parent;
+
+ if (node.prev != null)
+ {
+ node.prev.next = node;
+ }
+
+ break;
+ }
+ }
+ }
+
+ /**
+ * If a table row is empty then insert an empty cell.This practice is consistent with
browser behavior and avoids
+ * potential problems with row spanning cells.
+ * @param lexer Lexer
+ * @param row row node
+ */
+ public static void fixEmptyRow(Lexer lexer, Node row)
+ {
+ Node cell;
+
+ if (row.content == null)
+ {
+ cell = lexer.inferredTag("td");
+ row.insertNodeAtEnd(cell);
+ lexer.report.warning(lexer, row, cell, Report.MISSING_STARTTAG);
+ }
+ }
+
+ /**
+ * Coerce a node.
+ * @param lexer Lexer
+ * @param node Node
+ * @param tag tag dictionary reference
+ */
+ public static void coerceNode(Lexer lexer, Node node, Dict tag)
+ {
+ Node tmp = lexer.inferredTag(tag.name);
+ lexer.report.warning(lexer, node, tmp, Report.OBSOLETE_ELEMENT);
+ node.was = node.tag;
+ node.tag = tag;
+ node.type = START_TAG;
+ node.implicit = true;
+ node.element = tag.name;
+ }
+
+ /**
+ * Extract this node and its children from a markup tree.
+ */
+ public void removeNode()
+ {
+ if (this.prev != null)
+ {
+ this.prev.next = this.next;
+ }
+
+ if (this.next != null)
+ {
+ this.next.prev = this.prev;
+ }
+
+ if (this.parent != null)
+ {
+ if (this.parent.content == this)
+ {
+ this.parent.content = this.next;
+ }
+
+ if (this.parent.last == this)
+ {
+ this.parent.last = this.prev;
+ }
+ }
+
+ this.parent = null;
+ this.prev = null;
+ this.next = null;
+ }
+
+ /**
+ * Insert a node at the end.
+ * @param element parent node
+ * @param node will be inserted at the end of element
+ * @return <code>true</code> if the node has been inserted
+ */
+ public static boolean insertMisc(Node element, Node node)
+ {
+ if (node.type == COMMENT_TAG
+ || node.type == PROC_INS_TAG
+ || node.type == CDATA_TAG
+ || node.type == SECTION_TAG
+ || node.type == ASP_TAG
+ || node.type == JSTE_TAG
+ || node.type == PHP_TAG
+ || node.type == XML_DECL)
+ {
+ element.insertNodeAtEnd(node);
+ return true;
+ }
+
+ return false;
+ }
+
+ /**
+ * Is this a new (user defined) node? Used to determine how attributes without values
should be printed. This was
+ * introduced to deal with user defined tags e.g. Cold Fusion.
+ * @return <code>true</code> if this node represents a user-defined tag.
+ */
+ public boolean isNewNode()
+ {
+ if (this.tag != null)
+ {
+ return TidyUtils.toBoolean(this.tag.model & Dict.CM_NEW);
+ }
+
+ return true;
+ }
+
+ /**
+ * Does the node have one (and only one) child?
+ * @return <code>true</code> if the node has one child
+ */
+ public boolean hasOneChild()
+ {
+ return (this.content != null && this.content.next == null);
+ }
+
+ /**
+ * Find the "html" element.
+ * @param tt tag table
+ * @return html node
+ */
+ public Node findHTML(TagTable tt)
+ {
+ Node node;
+
+ for (node = this.content; node != null && node.tag != tt.tagHtml; node =
node.next)
+ {
+ //
+ }
+
+ return node;
+ }
+
+ /**
+ * Find the head tag.
+ * @param tt tag table
+ * @return head node
+ */
+ public Node findHEAD(TagTable tt)
+ {
+ Node node;
+
+ node = this.findHTML(tt);
+
+ if (node != null)
+ {
+ for (node = node.content; node != null && node.tag != tt.tagHead;
node = node.next)
+ {
+ //
+ }
+ }
+
+ return node;
+ }
+
+ /**
+ * Checks for node integrity.
+ * @return false if node is not consistent
+ */
+ public boolean checkNodeIntegrity()
+ {
+ Node child;
+ boolean found = false;
+
+ if (this.prev != null)
+ {
+ if (this.prev.next != this)
+ {
+ return false;
+ }
+ }
+
+ if (this.next != null)
+ {
+ if (this.next.prev != this)
+ {
+ return false;
+ }
+ }
+
+ if (this.parent != null)
+ {
+ if (this.prev == null && this.parent.content != this)
+ {
+ return false;
+ }
+
+ if (this.next == null && this.parent.last != this)
+ {
+ return false;
+ }
+
+ for (child = this.parent.content; child != null; child = child.next)
+ {
+ if (child == this)
+ {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found)
+ {
+ return false;
+ }
+ }
+
+ for (child = this.content; child != null; child = child.next)
+ {
+ if (!child.checkNodeIntegrity())
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Add a css class to the node. If a class attribute already exists adds the value to
the existing attribute.
+ * @param classname css class name
+ */
+ public void addClass(String classname)
+ {
+ AttVal classattr = this.getAttrByName("class");
+
+ // if there already is a class attribute then append class name after a space
+ if (classattr != null)
+ {
+ classattr.value = classattr.value + " " + classname;
+ }
+ else
+ {
+ // create new class attribute
+ this.addAttribute("class", classname);
+ }
+ }
+
+ /**
+ * @see java.lang.Object#toString()
+ */
+ public String toString()
+ {
+ String s = "";
+ Node n = this;
+
+ while (n != null)
+ {
+ s += "[Node type=";
+ s += NODETYPE_STRING[n.type];
+ s += ",element=";
+ if (n.element != null)
+ {
+ s += n.element;
+ }
+ else
+ {
+ s += "null";
+ }
+ if (n.type == TEXT_NODE || n.type == COMMENT_TAG || n.type == PROC_INS_TAG)
+ {
+ s += ",text=";
+ if (n.textarray != null && n.start <= n.end)
+ {
+ s += "\"";
+ s += TidyUtils.getString(n.textarray, n.start, n.end - n.start);
+ s += "\"";
+ }
+ else
+ {
+ s += "null";
+ }
+ }
+ s += ",content=";
+ if (n.content != null)
+ {
+ s += n.content.toString();
+ }
+ else
+ {
+ s += "null";
+ }
+ s += "]";
+ if (n.next != null)
+ {
+ s += ",";
+ }
+ n = n.next;
+ }
+ return s;
+ }
+
+ /**
+ * Returns a DOM Node which wrap the current tidy Node.
+ * @return org.w3c.dom.Node instance
+ */
+ protected org.w3c.dom.Node getAdapter()
+ {
+ if (adapter == null)
+ {
+ switch (this.type)
+ {
+ case ROOT_NODE :
+ adapter = new DOMDocumentImpl(this);
+ break;
+ case START_TAG :
+ case START_END_TAG :
+ adapter = new DOMElementImpl(this);
+ break;
+ case DOCTYPE_TAG :
+ adapter = new DOMDocumentTypeImpl(this);
+ break;
+ case COMMENT_TAG :
+ adapter = new DOMCommentImpl(this);
+ break;
+ case TEXT_NODE :
+ adapter = new DOMTextImpl(this);
+ break;
+ case CDATA_TEXT :
+ case CDATA_TAG :
+ adapter = new DOMCDATASectionImpl(this);
+ break;
+ case PROC_INS_TAG :
+ adapter = new DOMProcessingInstructionImpl(this);
+ break;
+ default :
+ adapter = new DOMNodeImpl(this);
+ }
+ }
+ return adapter;
+ }
+
+ /**
+ * Clone this node.
+ * @param deep if true deep clone the node (also clones all the contained nodes)
+ * @return cloned node
+ */
+ protected Node cloneNode(boolean deep)
+ {
+ Node node = (Node) this.clone();
+ if (deep)
+ {
+ Node child;
+ Node newChild;
+ for (child = this.content; child != null; child = child.next)
+ {
+ newChild = child.cloneNode(deep);
+ node.insertNodeAtEnd(newChild);
+ }
+ }
+ return node;
+ }
+
+ /**
+ * Setter for node type.
+ * @param newType a valid node type constant
+ */
+ protected void setType(short newType)
+ {
+ this.type = newType;
+ }
+
+ /**
+ * Used to check script node for script language.
+ * @return <code>true</code> if the script node contains javascript
+ */
+ public boolean isJavaScript()
+ {
+ boolean result = false;
+ AttVal attr;
+
+ if (this.attributes == null)
+ {
+ return true;
+ }
+
+ for (attr = this.attributes; attr != null; attr = attr.next)
+ {
+ if (("language".equalsIgnoreCase(attr.attribute) ||
"type".equalsIgnoreCase(attr.attribute))
+ &&
("javascript".equalsIgnoreCase(attr.value)||"text/javascript".equalsIgnoreCase(attr.value)))
+ {
+ result = true;
+ }
+ }
+
+ return result;
+ }
+
+ /**
+ * Does the node expect contents?
+ * @return <code>false</code> if this node should be empty
+ */
+ public boolean expectsContent()
+ {
+ if (this.type != Node.START_TAG)
+ {
+ return false;
+ }
+
+ // unknown element?
+ if (this.tag == null)
+ {
+ return true;
+ }
+
+ if (TidyUtils.toBoolean(this.tag.model & Dict.CM_EMPTY))
+ {
+ return false;
+ }
+
+ return true;
+ }
+
+ /**
+ * @return Returns the content.
+ */
+ public Node getContent() {
+ return content;
+ }
+
+ /**
+ * @return Returns the element.
+ */
+ public String getElement() {
+ return element;
+ }
+
+ /**
+ * @return Returns the tag.
+ */
+ public Dict getTag() {
+ return tag;
+ }
+
+ /**
+ * @return Returns the type.
+ */
+ public short getType() {
+ return type;
+ }
+}
\ No newline at end of file
Added: branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Out.java
===================================================================
--- branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Out.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Out.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,90 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+import java.io.IOException;
+
+/**
+ * Tidy Output interface.
+ * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org
</a>
+ * @author Andy Quick <a
href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a>
(translation to Java)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public interface Out
+{
+
+ /**
+ * writes an char.
+ * @param c char to write
+ */
+ void outc(int c) throws IOException;
+
+ /**
+ * writes a byte.
+ * @param c byte to write
+ */
+ void outc(byte c) throws IOException;
+
+ /**
+ * writes a newline.
+ */
+ void newline() throws IOException;
+
+ /**
+ * Flush and close the stream.
+ */
+ void close() throws IOException;
+
+}
\ No newline at end of file
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/OutFactory.java
===================================================================
---
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/OutFactory.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/OutFactory.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,100 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
+import java.io.Writer;
+
+import org.ajax4jsf.Messages;
+
+
+/**
+ * Tidy Output factory.
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public final class OutFactory
+{
+
+ /**
+ * Don't instantiate.
+ */
+ private OutFactory()
+ {
+ // unused
+ }
+
+ /**
+ * Returns the appropriate Out implementation.
+ * @param config configuration instance
+ * @param stream output stream
+ * @return out instance
+ */
+ public static Out getOut(Configuration config, OutputStream stream)
+ {
+ try
+ {
+ return new OutJavaImpl(config, config.getOutCharEncodingName(), stream);
+ }
+ catch (UnsupportedEncodingException e)
+ {
+ throw new
RuntimeException(Messages.getMessage(Messages.UNSUPPORTED_ENCODING_ERROR,
e.getMessage()));
+ }
+ }
+
+ public static Out getOut(Configuration config, Writer out) {
+ return new OutJavaImpl(config, config.getOutCharEncodingName(), out);
+ }
+}
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/OutImpl.java
===================================================================
--- branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/OutImpl.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/OutImpl.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,439 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+import java.io.IOException;
+import java.io.OutputStream;
+
+import org.ajax4jsf.org.w3c.tidy.EncodingUtils.PutBytes;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+
+
+/**
+ * Output implementation. This implementation is from the c version of tidy and it
doesn't take advantage of java
+ * writers.
+ * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org
</a>
+ * @author Andy Quick <a
href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a>
(translation to Java)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public class OutImpl implements Out
+{
+
+ private static final Log log = LogFactory.getLog(OutImpl.class);
+
+ /**
+ * output encoding.
+ */
+ private int encoding;
+
+ /**
+ * actual state for ISO 2022.
+ */
+ private int state;
+
+ /**
+ * output stream.
+ */
+ private OutputStream out;
+
+ /**
+ * putter callback.
+ */
+ private PutBytes putBytes;
+
+ /**
+ * newline bytes.
+ */
+ private byte[] newline;
+
+ /**
+ * Constructor.
+ * @param configuration actual configuration instance (needed for newline
configuration)
+ * @param encoding encoding constant
+ * @param out output stream
+ */
+ public OutImpl(Configuration configuration, int encoding, OutputStream out)
+ {
+ this.encoding = encoding;
+ this.state = EncodingUtils.FSM_ASCII;
+ this.out = out;
+
+ // copy configured newline in bytes
+ this.newline = new byte[configuration.newline.length];
+ for (int j = 0; j < configuration.newline.length; j++)
+ {
+ this.newline[j] = (byte) configuration.newline[j];
+ }
+
+ this.putBytes = new PutBytes()
+ {
+
+ private OutImpl impl;
+
+ PutBytes setOut(OutImpl out)
+ {
+ this.impl = out;
+ return this;
+ }
+
+ public void doPut(byte[] buf, int[] count)
+ {
+ impl.outcUTF8Bytes(buf, count);
+ }
+ } // set the out instance direclty
+ .setOut(this);
+ }
+
+ /**
+ * output UTF-8 bytes to output stream.
+ * @param buf array of bytes
+ * @param count number of bytes in buf to write
+ */
+ void outcUTF8Bytes(byte[] buf, int[] count)
+ {
+ try
+ {
+ for (int i = 0; i < count[0]; i++)
+ {
+ out.write(buf[i]);
+ }
+ }
+ catch (IOException e)
+ {
+ log.error("OutImpl.outcUTF8Bytes: " + e.toString());
+ }
+ }
+
+ /**
+ * .
+ * @see org.ajax4jsf.org.w3c.tidy.Out#outc(byte)
+ */
+ public void outc(byte c)
+ {
+ outc(c & 0xFF); // Convert to unsigned.
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.Out#outc(int)
+ */
+ public void outc(int c)
+ {
+ int ch;
+
+ try
+ {
+
+ if (this.encoding == Configuration.MACROMAN)
+ {
+ if (c < 128)
+ {
+ out.write(c);
+ }
+ else
+ {
+ int i;
+
+ for (i = 128; i < 256; i++)
+ {
+ if (EncodingUtils.decodeMacRoman(i - 128) == c)
+ {
+ out.write(i);
+ break;
+ }
+ }
+ }
+ }
+ else
+
+ if (this.encoding == Configuration.WIN1252)
+ {
+ if (c < 128 || (c > 159 && c < 256))
+ {
+ out.write(c);
+ }
+ else
+ {
+ int i;
+
+ for (i = 128; i < 160; i++)
+ {
+ if (EncodingUtils.decodeWin1252(i - 128) == c)
+ {
+ out.write(i);
+ break;
+ }
+ }
+ }
+ }
+ else if (this.encoding == Configuration.UTF8)
+ {
+ int[] count = new int[]{0};
+
+ EncodingUtils.encodeCharToUTF8Bytes(c, null, this.putBytes, count);
+ if (count[0] <= 0)
+ {
+ /* ReportEncodingError(in->lexer, INVALID_UTF8 | REPLACED_CHAR,
c); */
+ /* replacement char 0xFFFD encoded as UTF-8 */
+ out.write(0xEF);
+ out.write(0xBF);
+ out.write(0xBF);
+ }
+ }
+ else if (this.encoding == Configuration.ISO2022)
+ {
+ if (c == 0x1b) /* ESC */
+ {
+ this.state = EncodingUtils.FSM_ESC;
+ }
+ else
+ {
+ switch (this.state)
+ {
+ case EncodingUtils.FSM_ESC :
+ if (c == '$')
+ {
+ this.state = EncodingUtils.FSM_ESCD;
+ }
+ else if (c == '(')
+ {
+ this.state = EncodingUtils.FSM_ESCP;
+ }
+ else
+ {
+ this.state = EncodingUtils.FSM_ASCII;
+ }
+ break;
+
+ case EncodingUtils.FSM_ESCD :
+ if (c == '(')
+ {
+ this.state = EncodingUtils.FSM_ESCDP;
+ }
+ else
+ {
+ this.state = EncodingUtils.FSM_NONASCII;
+ }
+ break;
+
+ case EncodingUtils.FSM_ESCDP :
+ this.state = EncodingUtils.FSM_NONASCII;
+ break;
+
+ case EncodingUtils.FSM_ESCP :
+ this.state = EncodingUtils.FSM_ASCII;
+ break;
+
+ case EncodingUtils.FSM_NONASCII :
+ c &= 0x7F;
+ break;
+
+ default :
+ // should not reach here
+ break;
+ }
+ }
+
+ this.out.write(c);
+ }
+ else if (this.encoding == Configuration.UTF16LE
+ || this.encoding == Configuration.UTF16BE
+ || this.encoding == Configuration.UTF16)
+ {
+ int i = 1;
+ int numChars = 1;
+ int[] theChars = new int[2];
+
+ if (c > EncodingUtils.MAX_UTF16_FROM_UCS4)
+ {
+ // invalid UTF-16 value
+ /* ReportEncodingError(in.lexer, INVALID_UTF16 | DISCARDED_CHAR, c);
*/
+ c = 0;
+ numChars = 0;
+ }
+ else if (c >= EncodingUtils.UTF16_SURROGATES_BEGIN)
+ {
+ // encode surrogate pairs
+
+ // check for invalid pairs
+ if (((c & 0x0000FFFE) == 0x0000FFFE) || ((c & 0x0000FFFF) ==
0x0000FFFF))
+ {
+ /* ReportEncodingError(in.lexer, INVALID_UTF16 | DISCARDED_CHAR,
c); */
+ c = 0;
+ numChars = 0;
+ }
+ else
+ {
+ theChars[0] = (c - EncodingUtils.UTF16_SURROGATES_BEGIN)
+ / 0x400
+ + EncodingUtils.UTF16_LOW_SURROGATE_BEGIN;
+ theChars[1] = (c - EncodingUtils.UTF16_SURROGATES_BEGIN)
+ % 0x400
+ + EncodingUtils.UTF16_HIGH_SURROGATE_BEGIN;
+
+ // output both
+ numChars = 2;
+ }
+ }
+ else
+ {
+ // just put the char out
+ theChars[0] = c;
+ }
+
+ for (i = 0; i < numChars; i++)
+ {
+ c = theChars[i];
+
+ if (this.encoding == Configuration.UTF16LE)
+ {
+ ch = c & 0xFF;
+ out.write(ch);
+ ch = (c >> 8) & 0xFF;
+ out.write(ch);
+ }
+
+ else if (this.encoding == Configuration.UTF16BE || this.encoding ==
Configuration.UTF16)
+ {
+ ch = (c >> 8) & 0xFF;
+ out.write(ch);
+ ch = c & 0xFF;
+ out.write(ch);
+ }
+ }
+ }
+ // #431953 - start RJ
+ else if (this.encoding == Configuration.BIG5 || this.encoding ==
Configuration.SHIFTJIS)
+ {
+ if (c < 128)
+ {
+ this.out.write(c);
+ }
+ else
+ {
+ ch = (c >> 8) & 0xFF;
+ this.out.write(ch);
+ ch = c & 0xFF;
+ this.out.write(ch);
+ }
+ }
+ // #431953 - end RJ
+ else
+ {
+ this.out.write(c);
+ }
+ }
+ catch (IOException e)
+ {
+ log.error("OutImpl.outc: " + e.toString());
+ }
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.Out#newline()
+ */
+ public void newline()
+ {
+ try
+ {
+ this.out.write(this.newline);
+ this.out.flush();
+ }
+ catch (IOException e)
+ {
+ log.error("OutImpl.newline: " + e.toString());
+ }
+ }
+
+ /**
+ * Setter for <code>out</code>.
+ * @param out The out to set.
+ */
+ public void setOut(OutputStream out)
+ {
+ this.out = out;
+ }
+
+ /**
+ * Output a Byte Order Mark.
+ */
+ public void outBOM()
+ {
+ if (this.encoding == Configuration.UTF8
+ || this.encoding == Configuration.UTF16LE
+ || this.encoding == Configuration.UTF16BE
+ || this.encoding == Configuration.UTF16)
+ {
+ outc(EncodingUtils.UNICODE_BOM); // this will take care of encoding the BOM
correctly
+ }
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.Out#close()
+ */
+ public void close()
+ {
+ try
+ {
+ this.out.flush();
+ this.out.close();
+ }
+ catch (IOException e)
+ {
+ log.error("OutImpl.close: " + e.toString());
+ }
+ }
+}
\ No newline at end of file
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/OutJavaImpl.java
===================================================================
---
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/OutJavaImpl.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/OutJavaImpl.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,180 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.UnsupportedEncodingException;
+import java.io.Writer;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+
+/**
+ * Output implementation using java writers.
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public class OutJavaImpl implements Out
+{
+
+ private static final Log log = LogFactory.getLog(OutJavaImpl.class);
+
+ /**
+ * Java input stream writer.
+ */
+ private Writer writer;
+
+ /**
+ * Newline string.
+ */
+ private char[] newline;
+
+ /**
+ * Constructor.
+ * @param configuration actual configuration instance (needed for newline
configuration)
+ * @param encoding encoding name
+ * @param out output stream
+ * @throws UnsupportedEncodingException if the undelining OutputStreamWriter
doesn't support the rquested encoding.
+ */
+ public OutJavaImpl(Configuration configuration, String encoding, OutputStream out)
+ throws UnsupportedEncodingException
+ {
+ this.writer = new OutputStreamWriter(out, encoding);
+ this.newline = configuration.newline;
+ }
+
+ public OutJavaImpl(Configuration config, String outCharEncodingName, Writer out) {
+ this.writer = out;
+ this.newline = config.newline;
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.Out#outc(int)
+ */
+ public void outc(int c) throws IOException
+ {
+ try
+ {
+ writer.write(c);
+ }
+ catch (IOException e)
+ {
+ // @todo throws exception
+ if (log.isErrorEnabled()) {
+ log.error("OutJavaImpl.outc: " + e.getMessage());
+ }
+ throw e;
+ }
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.Out#outc(byte)
+ */
+ public void outc(byte c) throws IOException
+ {
+ try
+ {
+ writer.write(c);
+ }
+ catch (IOException e)
+ {
+ // @todo throws exception
+ if (log.isErrorEnabled()) {
+ log.error("OutJavaImpl.outc: " + e.getMessage());
+ }
+ throw e;
+ }
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.Out#newline()
+ */
+ public void newline() throws IOException
+ {
+ try
+ {
+ writer.write(this.newline);
+ }
+ catch (IOException e)
+ {
+ // @todo throws exception
+ if (log.isErrorEnabled()) {
+ log.error("OutJavaImpl.newline: " + e.getMessage());
+ }
+ throw e;
+ }
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.Out#close()
+ */
+ public void close() throws IOException
+ {
+ try
+ {
+ writer.close();
+ }
+ catch (IOException e)
+ {
+ if (log.isErrorEnabled()) {
+ log.error("OutJavaImpl.close: " + e.getMessage());
+ }
+ throw e;
+ }
+ }
+
+}
Added: branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/PPrint.java
===================================================================
--- branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/PPrint.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/PPrint.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,2840 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.text.NumberFormat;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+
+/**
+ * Pretty print parse tree. Block-level and unknown elements are printed on new lines and
their contents indented 2
+ * spaces Inline elements are printed inline. Inline content is wrapped on spaces (except
in attribute values or
+ * preformatted text, after start tags and before end tags.
+ * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org
</a>
+ * @author Andy Quick <a
href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a>
(translation to Java)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public class PPrint
+{
+ private static final Log log = LogFactory.getLog(PPrint.class);
+
+ /**
+ * position: normal.
+ */
+ private static final short NORMAL = 0;
+
+ /**
+ * position: preformatted text.
+ */
+ private static final short PREFORMATTED = 1;
+
+ /**
+ * position: comment.
+ */
+ private static final short COMMENT = 2;
+
+ /**
+ * position: attribute value.
+ */
+ private static final short ATTRIBVALUE = 4;
+
+ /**
+ * position: nowrap.
+ */
+ private static final short NOWRAP = 8;
+
+ /**
+ * position: cdata.
+ */
+ private static final short CDATA = 16;
+
+ /**
+ * Start cdata token.
+ */
+ private static final String CDATA_START = "<![CDATA[";
+
+ /**
+ * End cdata token.
+ */
+ private static final String CDATA_END = "]]>";
+
+ /**
+ * Javascript comment start.
+ */
+ private static final String JS_COMMENT_START = "//";
+
+ /**
+ * Javascript comment end.
+ */
+ private static final String JS_COMMENT_END = "";
+
+ /**
+ * VB comment start.
+ */
+ private static final String VB_COMMENT_START = "\'";
+
+ /**
+ * VB comment end.
+ */
+ private static final String VB_COMMENT_END = "";
+
+ /**
+ * CSS comment start.
+ */
+ private static final String CSS_COMMENT_START = "/*";
+
+ /**
+ * CSS comment end.
+ */
+ private static final String CSS_COMMENT_END = "*/";
+
+ /**
+ * Default comment start.
+ */
+ private static final String DEFAULT_COMMENT_START = "";
+
+ /**
+ * Default comment end.
+ */
+ private static final String DEFAULT_COMMENT_END = "";
+
+ private int[] linebuf;
+
+ private int lbufsize;
+
+ private int linelen;
+
+ private int wraphere;
+
+ private boolean inAttVal;
+
+ private boolean inString;
+
+ /**
+ * Current slide number.
+ */
+ private int slide;
+
+ /**
+ * Total slides count.
+ */
+ private int count;
+
+ private Node slidecontent;
+
+ /**
+ * current configuration.
+ */
+ private Configuration configuration;
+
+ /**
+ * Instantiates a new PPrint.
+ * @param configuration configuration
+ */
+ public PPrint(Configuration configuration)
+ {
+ this.configuration = configuration;
+ }
+
+ /**
+ * @param ind
+ * @return
+ */
+ int cWrapLen(int ind)
+ {
+ /* #431953 - start RJ Wraplen adjusted for smooth international ride */
+ if ("zh".equals(this.configuration.language))
+ {
+ // Chinese characters take two positions on a fixed-width screen
+ // It would be more accurate to keep a parallel linelen and wraphere
incremented by 2 for Chinese characters
+ // and 1 otherwise, but this is way simpler.
+ return (ind + ((this.configuration.wraplen - ind) / 2));
+ }
+ if ("ja".equals(this.configuration.language))
+ {
+ /* average Japanese text is 30% kanji */
+ return (ind + (((this.configuration.wraplen - ind) * 7) / 10));
+ }
+ return (this.configuration.wraplen);
+ /* #431953 - end RJ */
+ }
+
+ /**
+ * return one less than the number of bytes used by the UTF-8 byte sequence. The
Unicode char is returned in ch.
+ * @param str points to the UTF-8 byte sequence
+ * @param start starting offset in str
+ * @param ch initialized to 1st byte, passed as an array to allow modification
+ * @return one less that the number of bytes used by UTF-8 char
+ */
+ public static int getUTF8(byte[] str, int start, int[] ch)
+ {
+
+ int[] n = new int[1];
+
+ int[] bytes = new int[]{0};
+
+ // first byte "str[0]" is passed in separately from the
+ // rest of the UTF-8 byte sequence starting at "str[1]"
+ byte[] successorBytes = str;
+
+ boolean err = EncodingUtils.decodeUTF8BytesToChar(
+ n,
+ TidyUtils.toUnsigned(str[start]),
+ successorBytes,
+ null,
+ bytes,
+ start + 1);
+
+ if (err)
+ {
+ n[0] = 0xFFFD; // replacement char
+ }
+ ch[0] = n[0];
+ return bytes[0] - 1;
+
+ }
+
+ /**
+ * store char c as UTF-8 encoded byte stream.
+ * @param buf
+ * @param start
+ * @param c
+ * @return
+ */
+ public static int putUTF8(byte[] buf, int start, int c)
+ {
+ int[] count = new int[]{0};
+
+ boolean err = EncodingUtils.encodeCharToUTF8Bytes(c, buf, null, count);
+ if (err)
+ {
+ // replacement char 0xFFFD encoded as UTF-8
+ buf[0] = (byte) 0xEF;
+ buf[1] = (byte) 0xBF;
+ buf[2] = (byte) 0xBD;
+ count[0] = 3;
+ }
+
+ start += count[0];
+
+ return start;
+ }
+
+ private void addC(int c, int index)
+ {
+ if (index + 1 >= lbufsize)
+ {
+ while (index + 1 >= lbufsize)
+ {
+ if (lbufsize == 0)
+ {
+ lbufsize = 256;
+ }
+ else
+ {
+ lbufsize = lbufsize * 2;
+ }
+ }
+
+ int[] temp = new int[lbufsize];
+ if (linebuf != null)
+ {
+ System.arraycopy(linebuf, 0, temp, 0, index);
+ }
+ linebuf = temp;
+ }
+
+ linebuf[index] = c;
+ }
+
+ /**
+ * Adds an ascii String.
+ * @param str String to be added
+ * @param index actual line lenght
+ * @return final line length
+ */
+ private int addAsciiString(String str, int index)
+ {
+
+ int len = str.length();
+ if (index + len >= lbufsize)
+ {
+ while (index + len >= lbufsize)
+ {
+ if (lbufsize == 0)
+ {
+ lbufsize = 256;
+ }
+ else
+ {
+ lbufsize = lbufsize * 2;
+ }
+ }
+
+ int[] temp = new int[lbufsize];
+ if (linebuf != null)
+ {
+ System.arraycopy(linebuf, 0, temp, 0, index);
+ }
+ linebuf = temp;
+ }
+
+ for (int ix = 0; ix < len; ++ix)
+ {
+ linebuf[index + ix] = str.charAt(ix);
+ }
+ return index + len;
+ }
+
+ /**
+ * @param fout
+ * @param indent
+ * @throws IOException
+ */
+ private void wrapLine(Out fout, int indent) throws IOException
+ {
+ int i, p, q;
+
+ if (wraphere == 0)
+ {
+ return;
+ }
+
+ for (i = 0; i < indent; ++i)
+ {
+ fout.outc(' ');
+ }
+
+ for (i = 0; i < wraphere; ++i)
+ {
+ fout.outc(linebuf[i]);
+ }
+
+ if (inString)
+ {
+ fout.outc(' ');
+ fout.outc('\\');
+ }
+
+ fout.newline();
+
+ if (linelen > wraphere)
+ {
+ p = 0;
+
+ if (linebuf[wraphere] == ' ')
+ {
+ ++wraphere;
+ }
+
+ q = wraphere;
+ addC('\0', linelen);
+
+ while (true)
+ {
+ linebuf[p] = linebuf[q];
+ if (linebuf[q] == 0)
+ {
+ break;
+ }
+ p++;
+ q++;
+ }
+ linelen -= wraphere;
+ }
+ else
+ {
+ linelen = 0;
+ }
+
+ wraphere = 0;
+ }
+
+ /**
+ * @param fout
+ * @param indent
+ * @param inString
+ * @throws IOException
+ */
+ private void wrapAttrVal(Out fout, int indent, boolean inString) throws IOException
+ {
+ int i, p, q;
+
+ for (i = 0; i < indent; ++i)
+ {
+ fout.outc(' ');
+ }
+
+ for (i = 0; i < wraphere; ++i)
+ {
+ fout.outc(linebuf[i]);
+ }
+
+ fout.outc(' ');
+
+ if (inString)
+ {
+ fout.outc('\\');
+ }
+
+ fout.newline();
+
+ if (linelen > wraphere)
+ {
+ p = 0;
+
+ if (linebuf[wraphere] == ' ')
+ {
+ ++wraphere;
+ }
+
+ q = wraphere;
+ addC('\0', linelen);
+
+ while (true)
+ {
+ linebuf[p] = linebuf[q];
+ if (linebuf[q] == 0)
+ {
+ break;
+ }
+ p++;
+ q++;
+ }
+ linelen -= wraphere;
+ }
+ else
+ {
+ linelen = 0;
+ }
+
+ wraphere = 0;
+ }
+
+ /**
+ * @param fout
+ * @param indent
+ * @throws IOException
+ */
+ public void flushLine(Out fout, int indent) throws IOException
+ {
+ int i;
+
+ if (linelen > 0)
+ {
+ if (indent + linelen >= this.configuration.wraplen)
+ {
+ wrapLine(fout, indent);
+ }
+
+ if (!inAttVal || this.configuration.indentAttributes)
+ {
+ for (i = 0; i < indent; ++i)
+ {
+ fout.outc(' ');
+ }
+ }
+
+ for (i = 0; i < linelen; ++i)
+ {
+ fout.outc(linebuf[i]);
+ }
+ }
+
+// fout.newline();
+ linelen = 0;
+ wraphere = 0;
+ inAttVal = false;
+ }
+
+ /**
+ * @param fout
+ * @param indent
+ * @throws IOException
+ */
+ public void condFlushLine(Out fout, int indent) throws IOException
+ {
+ int i;
+
+ if (linelen > 0)
+ {
+ if (indent + linelen >= this.configuration.wraplen)
+ {
+ wrapLine(fout, indent);
+ }
+
+ if (!inAttVal || this.configuration.indentAttributes)
+ {
+ for (i = 0; i < indent; ++i)
+ {
+ fout.outc(' ');
+ }
+ }
+
+ for (i = 0; i < linelen; ++i)
+ {
+ fout.outc(linebuf[i]);
+ }
+
+// fout.newline();
+ linelen = 0;
+ wraphere = 0;
+ inAttVal = false;
+ }
+ }
+
+ /**
+ * @param c
+ * @param mode
+ */
+ private void printChar(int c, short mode)
+ {
+ String entity;
+ boolean breakable = false; // #431953 - RJ
+
+ if (c == ' ' && !TidyUtils.toBoolean(mode & (PREFORMATTED |
COMMENT | ATTRIBVALUE | CDATA)))
+ {
+ // coerce a space character to a non-breaking space
+ if (TidyUtils.toBoolean(mode & NOWRAP))
+ {
+ // by default XML doesn't define
+ if (this.configuration.numEntities || this.configuration.xmlTags)
+ {
+ addC('&', linelen++);
+ addC('#', linelen++);
+ addC('1', linelen++);
+ addC('6', linelen++);
+ addC('0', linelen++);
+ addC(';', linelen++);
+ }
+ else
+ {
+ // otherwise use named entity
+ addC('&', linelen++);
+ addC('n', linelen++);
+ addC('b', linelen++);
+ addC('s', linelen++);
+ addC('p', linelen++);
+ addC(';', linelen++);
+ }
+ return;
+ }
+ wraphere = linelen;
+ }
+
+ // comment characters are passed raw
+ if (TidyUtils.toBoolean(mode & (COMMENT | CDATA)))
+ {
+ addC(c, linelen++);
+ return;
+ }
+
+ // except in CDATA map < to < etc.
+ if (!TidyUtils.toBoolean(mode & CDATA))
+ {
+ if (c == '<')
+ {
+ addC('&', linelen++);
+ addC('l', linelen++);
+ addC('t', linelen++);
+ addC(';', linelen++);
+ return;
+ }
+
+ if (c == '>')
+ {
+ addC('&', linelen++);
+ addC('g', linelen++);
+ addC('t', linelen++);
+ addC(';', linelen++);
+ return;
+ }
+
+ // naked '&' chars can be left alone or quoted as &
+ // The latter is required for XML where naked '&' are illegal.
+ if (c == '&' && this.configuration.quoteAmpersand)
+ {
+ addC('&', linelen++);
+ addC('a', linelen++);
+ addC('m', linelen++);
+ addC('p', linelen++);
+ addC(';', linelen++);
+ return;
+ }
+
+ if (c == '"' && this.configuration.quoteMarks)
+ {
+ addC('&', linelen++);
+ addC('q', linelen++);
+ addC('u', linelen++);
+ addC('o', linelen++);
+ addC('t', linelen++);
+ addC(';', linelen++);
+ return;
+ }
+
+ if (c == '\'' && this.configuration.quoteMarks)
+ {
+ addC('&', linelen++);
+ addC('#', linelen++);
+ addC('3', linelen++);
+ addC('9', linelen++);
+ addC(';', linelen++);
+ return;
+ }
+
+ if (c == 160 && !this.configuration.rawOut)
+ {
+ if (this.configuration.makeBare)
+ {
+ addC(' ', linelen++);
+ }
+ else if (this.configuration.quoteNbsp)
+ {
+ addC('&', linelen++);
+
+ if (this.configuration.numEntities || this.configuration.xmlTags)
+ {
+ addC('#', linelen++);
+ addC('1', linelen++);
+ addC('6', linelen++);
+ addC('0', linelen++);
+ }
+ else
+ {
+ addC('n', linelen++);
+ addC('b', linelen++);
+ addC('s', linelen++);
+ addC('p', linelen++);
+ }
+
+ addC(';', linelen++);
+ }
+ else
+ {
+ addC(c, linelen++);
+ }
+
+ return;
+ }
+ }
+
+ // #431953 - start RJ
+ // Handle encoding-specific issues
+
+ switch (this.configuration.getOutCharEncoding())
+ {
+ case Configuration.UTF8 :
+ // Chinese doesn't have spaces, so it needs other kinds of breaks
+ // This will also help documents using nice Unicode punctuation
+ // But we leave the ASCII range punctuation untouched
+
+ // Break after any punctuation or spaces characters
+ if ((c >= 0x2000) && !TidyUtils.toBoolean(mode &
PREFORMATTED))
+ {
+ if (((c >= 0x2000) && (c <= 0x2006))
+ || ((c >= 0x2008) && (c <= 0x2010))
+ || ((c >= 0x2011) && (c <= 0x2046))
+ || ((c >= 0x207D) && (c <= 0x207E))
+ || ((c >= 0x208D) && (c <= 0x208E))
+ || ((c >= 0x2329) && (c <= 0x232A))
+ || ((c >= 0x3001) && (c <= 0x3003))
+ || ((c >= 0x3008) && (c <= 0x3011))
+ || ((c >= 0x3014) && (c <= 0x301F))
+ || ((c >= 0xFD3E) && (c <= 0xFD3F))
+ || ((c >= 0xFE30) && (c <= 0xFE44))
+ || ((c >= 0xFE49) && (c <= 0xFE52))
+ || ((c >= 0xFE54) && (c <= 0xFE61))
+ || ((c >= 0xFE6A) && (c <= 0xFE6B))
+ || ((c >= 0xFF01) && (c <= 0xFF03))
+ || ((c >= 0xFF05) && (c <= 0xFF0A))
+ || ((c >= 0xFF0C) && (c <= 0xFF0F))
+ || ((c >= 0xFF1A) && (c <= 0xFF1B))
+ || ((c >= 0xFF1F) && (c <= 0xFF20))
+ || ((c >= 0xFF3B) && (c <= 0xFF3D))
+ || ((c >= 0xFF61) && (c <= 0xFF65)))
+ {
+ wraphere = linelen + 2; // 2, because AddChar is not till later
+ breakable = true;
+ }
+ else
+ {
+ switch (c)
+ {
+ case 0xFE63 :
+ case 0xFE68 :
+ case 0x3030 :
+ case 0x30FB :
+ case 0xFF3F :
+ case 0xFF5B :
+ case 0xFF5D :
+ wraphere = linelen + 2;
+ breakable = true;
+ }
+ }
+ // but break before a left punctuation
+ if (breakable)
+ {
+ if (((c >= 0x201A) && (c <= 0x201C)) || ((c >=
0x201E) && (c <= 0x201F)))
+ {
+ wraphere--;
+ }
+ else
+ {
+ switch (c)
+ {
+ case 0x2018 :
+ case 0x2039 :
+ case 0x2045 :
+ case 0x207D :
+ case 0x208D :
+ case 0x2329 :
+ case 0x3008 :
+ case 0x300A :
+ case 0x300C :
+ case 0x300E :
+ case 0x3010 :
+ case 0x3014 :
+ case 0x3016 :
+ case 0x3018 :
+ case 0x301A :
+ case 0x301D :
+ case 0xFD3E :
+ case 0xFE35 :
+ case 0xFE37 :
+ case 0xFE39 :
+ case 0xFE3B :
+ case 0xFE3D :
+ case 0xFE3F :
+ case 0xFE41 :
+ case 0xFE43 :
+ case 0xFE59 :
+ case 0xFE5B :
+ case 0xFE5D :
+ case 0xFF08 :
+ case 0xFF3B :
+ case 0xFF5B :
+ case 0xFF62 :
+ wraphere--;
+ }
+ }
+ }
+ }
+ break;
+ case Configuration.BIG5 :
+ // Allow linebreak at Chinese punctuation characters
+ // There are not many spaces in Chinese
+ addC(c, linelen++);
+ if (((c & 0xFF00) == 0xA100) && !TidyUtils.toBoolean(mode
& PREFORMATTED))
+ {
+ wraphere = linelen;
+ // opening brackets have odd codes: break before them
+ if ((c > 0x5C) && (c < 0xAD) && ((c & 1) ==
1))
+ {
+ wraphere--;
+ }
+ }
+ return;
+ case Configuration.SHIFTJIS :
+ case Configuration.ISO2022 : // ISO 2022 characters are passed raw
+ addC(c, linelen++);
+ return;
+ default :
+ if (this.configuration.rawOut)
+ {
+ addC(c, linelen++);
+ return;
+ }
+ // #431953 - end RJ
+ }
+
+ // if preformatted text, map to space
+ if (c == 160 && TidyUtils.toBoolean(mode & PREFORMATTED))
+ {
+ addC(' ', linelen++);
+ return;
+ }
+
+ // Filters from Word and PowerPoint often use smart quotes resulting in character
codes between 128 and 159.
+ // Unfortunately, the corresponding HTML 4.0 entities for these are not widely
supported.
+ // The following converts dashes and quotation marks to the nearest ASCII
equivalent.
+ // My thanks to Andrzej Novosiolov for his help with this code.
+
+ if (this.configuration.makeClean && this.configuration.asciiChars ||
this.configuration.makeBare)
+ {
+ if (c >= 0x2013 && c <= 0x201E)
+ {
+ switch (c)
+ {
+ case 0x2013 : // en dash
+ case 0x2014 : // em dash
+ c = '-';
+ break;
+ case 0x2018 : // left single quotation mark
+ case 0x2019 : // right single quotation mark
+ case 0x201A : // single low-9 quotation mark
+ c = '\'';
+ break;
+ case 0x201C : // left double quotation mark
+ case 0x201D : // right double quotation mark
+ case 0x201E : // double low-9 quotation mark
+ c = '"';
+ break;
+ }
+ }
+ }
+
+ // don't map latin-1 chars to entities
+ if (this.configuration.getOutCharEncoding() == Configuration.LATIN1)
+ {
+ if (c > 255) /* multi byte chars */
+ {
+ if (!this.configuration.numEntities)
+ {
+ entity = EntityTable.getDefaultEntityTable().entityName((short) c);
+ if (entity != null)
+ {
+ entity = "&" + entity + ";";
+ }
+ else
+ {
+ entity = "&#" + c + ";";
+ }
+ }
+ else
+ {
+ entity = "&#" + c + ";";
+ }
+
+ for (int i = 0; i < entity.length(); i++)
+ {
+ addC(entity.charAt(i), linelen++);
+ }
+
+ return;
+ }
+
+ if (c > 126 && c < 160)
+ {
+ entity = "&#" + c + ";";
+
+ for (int i = 0; i < entity.length(); i++)
+ {
+ addC(entity.charAt(i), linelen++);
+ }
+
+ return;
+ }
+
+ addC(c, linelen++);
+ return;
+ }
+
+ // don't map utf8 or utf16 chars to entities
+ if (this.configuration.getOutCharEncoding() == Configuration.UTF8
+ || this.configuration.getOutCharEncoding() == Configuration.UTF16
+ || this.configuration.getOutCharEncoding() == Configuration.UTF16LE
+ || this.configuration.getOutCharEncoding() == Configuration.UTF16BE)
+ {
+ addC(c, linelen++);
+ return;
+ }
+
+ // use numeric entities only for XML
+ if (this.configuration.xmlTags)
+ {
+ // if ASCII use numeric entities for chars > 127
+ if (c > 127 && this.configuration.getOutCharEncoding() ==
Configuration.ASCII)
+ {
+ entity = "&#" + c + ";";
+
+ for (int i = 0; i < entity.length(); i++)
+ {
+ addC(entity.charAt(i), linelen++);
+ }
+
+ return;
+ }
+
+ // otherwise output char raw
+ addC(c, linelen++);
+ return;
+ }
+
+ // default treatment for ASCII
+ if (this.configuration.getOutCharEncoding() == Configuration.ASCII && (c
> 126 || (c < ' ' && c != '\t')))
+ {
+ if (!this.configuration.numEntities)
+ {
+ entity = EntityTable.getDefaultEntityTable().entityName((short) c);
+ if (entity != null)
+ {
+ entity = "&" + entity + ";";
+ }
+ else
+ {
+ entity = "&#" + c + ";";
+ }
+ }
+ else
+ {
+ entity = "&#" + c + ";";
+ }
+
+ for (int i = 0; i < entity.length(); i++)
+ {
+ addC(entity.charAt(i), linelen++);
+ }
+
+ return;
+ }
+
+ addC(c, linelen++);
+ }
+
+ /**
+ * The line buffer is uint not char so we can hold Unicode values unencoded. The
translation to UTF-8 is deferred to
+ * the outc routine called to flush the line buffer.
+ * @param fout
+ * @param mode
+ * @param indent
+ * @param textarray
+ * @param start
+ * @param end
+ * @throws IOException
+ */
+ private void printText(Out fout, short mode, int indent, byte[] textarray, int start,
int end) throws IOException
+ {
+ int i, c;
+ int[] ci = new int[1];
+
+ for (i = start; i < end; ++i)
+ {
+ if (indent + linelen >= this.configuration.wraplen)
+ {
+ wrapLine(fout, indent);
+ }
+
+ c = (textarray[i]) & 0xFF; // Convert to unsigned.
+
+ // look for UTF-8 multibyte character
+ if (c > 0x7F)
+ {
+ i += getUTF8(textarray, i, ci);
+ c = ci[0];
+ }
+
+ if (c == '\n')
+ {
+ flushLine(fout, indent);
+ fout.newline();
+ continue;
+ }
+
+ printChar(c, mode);
+ }
+ }
+
+ /**
+ * @param str
+ */
+ private void printString(String str)
+ {
+ for (int i = 0; i < str.length(); i++)
+ {
+ addC(str.charAt(i), linelen++);
+ }
+ }
+
+ /**
+ * @param fout
+ * @param indent
+ * @param value
+ * @param delim
+ * @param wrappable
+ * @throws IOException
+ */
+ private void printAttrValue(Out fout, int indent, String value, int delim, boolean
wrappable) throws IOException
+ {
+ int c;
+ int[] ci = new int[1];
+ boolean wasinstring = false;
+ byte[] valueChars = null;
+ int i;
+ short mode = (wrappable ? (short) (NORMAL | ATTRIBVALUE) : (short) (PREFORMATTED
| ATTRIBVALUE));
+
+ if (value != null)
+ {
+ valueChars = TidyUtils.getBytes(value);
+ }
+
+ // look for ASP, Tango or PHP instructions for computed attribute value
+// if (valueChars != null && valueChars.length >= 5 &&
valueChars[0] == '<')
+// {
+// if (valueChars[1] == '%' || valueChars[1] == '@' || (new
String(valueChars, 0, 5)).equals("<?php"))
+// {
+// mode |= CDATA;
+// }
+// }
+
+ if (delim == 0)
+ {
+ delim = '"';
+ }
+
+ addC('=', linelen++);
+
+ // don't wrap after "=" for xml documents
+ if (!this.configuration.xmlOut)
+ {
+
+ if (indent + linelen < this.configuration.wraplen)
+ {
+ wraphere = linelen;
+ }
+
+ if (indent + linelen >= this.configuration.wraplen)
+ {
+ wrapLine(fout, indent);
+ }
+
+ if (indent + linelen < this.configuration.wraplen)
+ {
+ wraphere = linelen;
+ }
+ else
+ {
+ condFlushLine(fout, indent);
+ }
+ }
+
+ addC(delim, linelen++);
+
+ if (value != null)
+ {
+ inString = false;
+
+ i = 0;
+ while (i < valueChars.length)
+ {
+ c = (valueChars[i]) & 0xFF; // Convert to unsigned.
+
+ if (wrappable && c == ' ' && indent + linelen
< this.configuration.wraplen)
+ {
+ wraphere = linelen;
+ wasinstring = inString;
+ }
+
+ if (wrappable && wraphere > 0 && indent + linelen
>= this.configuration.wraplen)
+ {
+ wrapAttrVal(fout, indent, wasinstring);
+ }
+
+ if (c == delim)
+ {
+ String entity;
+
+ entity = (c == '"' ? """ :
"'");
+
+ for (int j = 0; j < entity.length(); j++)
+ {
+ addC(entity.charAt(j), linelen++);
+ }
+
+ ++i;
+ continue;
+ }
+ else if (c == '"')
+ {
+ if (this.configuration.quoteMarks)
+ {
+ addC('&', linelen++);
+ addC('q', linelen++);
+ addC('u', linelen++);
+ addC('o', linelen++);
+ addC('t', linelen++);
+ addC(';', linelen++);
+ }
+ else
+ {
+ addC('"', linelen++);
+ }
+
+ if (delim == '\'')
+ {
+ inString = !inString;
+ }
+
+ ++i;
+ continue;
+ }
+ else if (c == '\'')
+ {
+ if (this.configuration.quoteMarks)
+ {
+ addC('&', linelen++);
+ addC('#', linelen++);
+ addC('3', linelen++);
+ addC('9', linelen++);
+ addC(';', linelen++);
+ }
+ else
+ {
+ addC('\'', linelen++);
+ }
+
+ if (delim == '"')
+ {
+ inString = !inString;
+ }
+
+ ++i;
+ continue;
+ }
+
+ // look for UTF-8 multibyte character
+ if (c > 0x7F)
+ {
+ i += getUTF8(valueChars, i, ci);
+ c = ci[0];
+ }
+
+ ++i;
+
+ if (c == '\n')
+ {
+ flushLine(fout, indent);
+ continue;
+ }
+
+ printChar(c, mode);
+ }
+ }
+
+ inString = false;
+ addC(delim, linelen++);
+ }
+
+ /**
+ * @param fout
+ * @param indent
+ * @param node
+ * @param attr
+ * @throws IOException
+ */
+ private void printAttribute(Out fout, int indent, Node node, AttVal attr) throws
IOException
+ {
+ String name;
+ boolean wrappable = false;
+
+ if (this.configuration.indentAttributes)
+ {
+ flushLine(fout, indent);
+ indent += this.configuration.spaces;
+ }
+
+ name = attr.attribute;
+
+ if (indent + linelen >= this.configuration.wraplen)
+ {
+ wrapLine(fout, indent);
+ }
+
+ if (!this.configuration.xmlTags && !this.configuration.xmlOut &&
attr.dict != null)
+ {
+ if (AttributeTable.getDefaultAttributeTable().isScript(name))
+ {
+ wrappable = this.configuration.wrapScriptlets;
+ }
+ else if (!attr.dict.isNowrap() && this.configuration.wrapAttVals)
+ {
+ wrappable = true;
+ }
+ }
+
+ if (indent + linelen < this.configuration.wraplen)
+ {
+ wraphere = linelen;
+ addC(' ', linelen++);
+ }
+ else
+ {
+ condFlushLine(fout, indent);
+ addC(' ', linelen++);
+ }
+
+ for (int i = 0; i < name.length(); i++)
+ {
+ addC(
+ TidyUtils.foldCase(name.charAt(i), this.configuration.upperCaseAttrs,
this.configuration.xmlTags),
+ linelen++);
+ }
+
+ if (indent + linelen >= this.configuration.wraplen)
+ {
+ wrapLine(fout, indent);
+ }
+
+ if (attr.value == null)
+ {
+ if (this.configuration.xmlTags || this.configuration.xmlOut)
+ {
+ printAttrValue(fout, indent, (attr.isBoolAttribute() ? attr.attribute :
""), attr.delim, true);
+ }
+ else if (!attr.isBoolAttribute() && node != null &&
!node.isNewNode())
+ {
+ printAttrValue(fout, indent, "", attr.delim, true);
+ }
+ else if (indent + linelen < this.configuration.wraplen)
+ {
+ wraphere = linelen;
+ }
+
+ }
+ else
+ {
+ printAttrValue(fout, indent, attr.value, attr.delim, wrappable);
+ }
+ }
+
+ /**
+ * @param fout
+ * @param indent
+ * @param node
+ * @param attr
+ * @throws IOException
+ */
+ private void printAttrs(Out fout, int indent, Node node, AttVal attr) throws
IOException
+ {
+ // add xml:space attribute to pre and other elements
+ if (configuration.xmlOut
+ && configuration.xmlSpace
+ && ParserImpl.XMLPreserveWhiteSpace(node, configuration.tt)
+ && node.getAttrByName("xml:space") == null)
+ {
+ node.addAttribute("xml:space", "preserve");
+ if (attr != null)
+ {
+ attr = node.attributes;
+ }
+ }
+
+ if (attr != null)
+ {
+ if (attr.next != null)
+ {
+ printAttrs(fout, indent, node, attr.next);
+ }
+
+ if (attr.attribute != null)
+ {
+ Attribute attribute = attr.dict;
+
+ if (!this.configuration.dropProprietaryAttributes
+ || !(attribute == null || TidyUtils.toBoolean(attribute.getVersions()
& Dict.VERS_PROPRIETARY)))
+ {
+ printAttribute(fout, indent, node, attr);
+ }
+ }
+ else if (attr.asp != null)
+ {
+ addC(' ', linelen++);
+ printAsp(fout, indent, attr.asp);
+ }
+ else if (attr.php != null)
+ {
+ addC(' ', linelen++);
+ printPhp(fout, indent, attr.php);
+ }
+ }
+
+ }
+
+ /**
+ * Line can be wrapped immediately after inline start tag provided if follows a text
node ending in a space, or it
+ * parent is an inline element that that rule applies to. This behaviour was reverse
engineered from Netscape 3.0
+ * @param node current Node
+ * @return <code>true</code> if the current char follows a space
+ */
+ private static boolean afterSpace(Node node)
+ {
+ Node prev;
+ int c;
+
+ if (node == null || node.tag == null || !TidyUtils.toBoolean(node.tag.model &
Dict.CM_INLINE))
+ {
+ return true;
+ }
+
+ prev = node.prev;
+
+ if (prev != null)
+ {
+ if (prev.type == Node.TEXT_NODE && prev.end > prev.start)
+ {
+ c = (prev.textarray[prev.end - 1]) & 0xFF; // Convert to unsigned.
+
+ if (c == 160 || c == ' ' || c == '\n')
+ {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ return afterSpace(node.parent);
+ }
+
+ /**
+ * @param lexer
+ * @param fout
+ * @param mode
+ * @param indent
+ * @param node
+ * @throws IOException
+ */
+ private void printTag(Lexer lexer, Out fout, short mode, int indent, Node node)
throws IOException
+ {
+ String p;
+ TagTable tt = this.configuration.tt;
+
+ addC('<', linelen++);
+
+ if (node.type == Node.END_TAG)
+ {
+ addC('/', linelen++);
+ }
+
+ p = node.element;
+ for (int i = 0; i < p.length(); i++)
+ {
+ addC(
+ TidyUtils.foldCase(p.charAt(i), this.configuration.upperCaseTags,
this.configuration.xmlTags),
+ linelen++);
+ }
+
+ printAttrs(fout, indent, node, node.attributes);
+
+ if (node.type == Node.START_END_TAG || TidyUtils.toBoolean(node.tag.model &
Dict.CM_EMPTY)) {
+ if ((this.configuration.xmlOut || this.configuration.xHTML) ||
+ ! TidyUtils.toBoolean(node.tag.model & Dict.CM_EMPTY) )
+ {
+ addC(' ', linelen++); // Space is NS compatibility hack <br />
+ addC('/', linelen++); // Required end tag marker
+ }
+ }
+
+ addC('>', linelen++);
+
+ if ((node.type != Node.START_END_TAG || configuration.xHTML) &&
!TidyUtils.toBoolean(mode & PREFORMATTED))
+ {
+ if (indent + linelen >= this.configuration.wraplen)
+ {
+ wrapLine(fout, indent);
+ }
+
+ if (indent + linelen < this.configuration.wraplen)
+ {
+
+ // wrap after start tag if is <br/> or if it's not inline
+ // fix for [514348]
+ if (!TidyUtils.toBoolean(mode & NOWRAP)
+ && (node.tag == tt.tagBr)
+// && (!TidyUtils.toBoolean(node.tag.model &
Dict.CM_INLINE) || (node.tag == tt.tagBr))
+ && afterSpace(node))
+ {
+ wraphere = linelen;
+ }
+
+ }
+ }
+ else
+ {
+ condFlushLine(fout, indent);
+ }
+
+ }
+
+ /**
+ * @param mode
+ * @param indent
+ * @param node
+ */
+ private void printEndTag(short mode, int indent, Node node)
+ {
+ String p;
+
+ // Netscape ignores SGML standard by not ignoring a line break before </A>
or </U> etc.
+ // To avoid rendering this as an underlined space, I disable line wrapping before
inline end tags
+
+ // if (indent + linelen < this.configuration.wraplen &&
!TidyUtils.toBoolean(mode & NOWRAP))
+ // {
+ // wraphere = linelen;
+ // }
+
+ addC('<', linelen++);
+ addC('/', linelen++);
+
+ p = node.element;
+ for (int i = 0; i < p.length(); i++)
+ {
+ addC(
+ TidyUtils.foldCase(p.charAt(i), this.configuration.upperCaseTags,
this.configuration.xmlTags),
+ linelen++);
+ }
+
+ addC('>', linelen++);
+ }
+
+ /**
+ * @param fout
+ * @param indent
+ * @param node
+ * @throws IOException
+ */
+ private void printComment(Out fout, int indent, Node node) throws IOException
+ {
+ if (this.configuration.hideComments)
+ {
+ return;
+ }
+
+ if (indent + linelen < this.configuration.wraplen)
+ {
+ wraphere = linelen;
+ }
+
+ addC('<', linelen++);
+ addC('!', linelen++);
+ addC('-', linelen++);
+ addC('-', linelen++);
+
+ printText(fout, COMMENT, indent, node.textarray, node.start, node.end);
+
+ // See Lexer.java: AQ 8Jul2000
+ addC('-', linelen++);
+ addC('-', linelen++);
+ addC('>', linelen++);
+
+ if (node.linebreak)
+ {
+ flushLine(fout, indent);
+ fout.newline();
+ }
+ }
+
+ /**
+ * @param fout
+ * @param indent
+ * @param lexer
+ * @param node
+ * @throws IOException
+ */
+ private void printDocType(Out fout, int indent, Lexer lexer, Node node) throws
IOException
+ {
+ int i, c = 0;
+ short mode = 0;
+ boolean q = this.configuration.quoteMarks;
+
+ this.configuration.quoteMarks = false;
+
+ if (indent + linelen < this.configuration.wraplen)
+ {
+ wraphere = linelen;
+ }
+
+ condFlushLine(fout, indent);
+
+ addC('<', linelen++);
+ addC('!', linelen++);
+ addC('D', linelen++);
+ addC('O', linelen++);
+ addC('C', linelen++);
+ addC('T', linelen++);
+ addC('Y', linelen++);
+ addC('P', linelen++);
+ addC('E', linelen++);
+ addC(' ', linelen++);
+
+ if (indent + linelen < this.configuration.wraplen)
+ {
+ wraphere = linelen;
+ }
+
+ for (i = node.start; i < node.end; ++i)
+ {
+ if (indent + linelen >= this.configuration.wraplen)
+ {
+ wrapLine(fout, indent);
+ }
+
+ c = node.textarray[i] & 0xFF; // Convert to unsigned.
+
+ // inDTDSubset?
+ if (TidyUtils.toBoolean(mode & CDATA))
+ {
+ if (c == ']')
+ {
+ mode &= ~CDATA;
+ }
+ }
+ else if (c == '[')
+ {
+ mode |= CDATA;
+ }
+ int[] ci = new int[1];
+
+ // look for UTF-8 multibyte character
+ if (c > 0x7F)
+ {
+ i += getUTF8(node.textarray, i, ci);
+ c = ci[0];
+ }
+
+ if (c == '\n')
+ {
+ flushLine(fout, indent);
+ fout.newline();
+ continue;
+ }
+
+ printChar(c, mode);
+ }
+
+ if (linelen < this.configuration.wraplen)
+ {
+ wraphere = linelen;
+ }
+
+ addC('>', linelen++);
+ this.configuration.quoteMarks = q;
+ condFlushLine(fout, indent);
+ fout.newline();
+ }
+
+ /**
+ * @param fout
+ * @param indent
+ * @param node
+ * @throws IOException
+ */
+ private void printPI(Out fout, int indent, Node node) throws IOException
+ {
+ if (indent + linelen < this.configuration.wraplen)
+ {
+ wraphere = linelen;
+ }
+
+ addC('<', linelen++);
+ addC('?', linelen++);
+
+ // set CDATA to pass < and > unescaped
+ printText(fout, CDATA, indent, node.textarray, node.start, node.end);
+
+ if (node.end <= 0 || node.textarray[node.end - 1] != '?') // #542029 -
fix by Terry Teague 10 Apr 02
+ {
+ addC('?', linelen++);
+ }
+
+ addC('>', linelen++);
+ condFlushLine(fout, indent);
+ fout.newline();
+ }
+
+ /**
+ * Pretty print the xml declaration.
+ * @param fout
+ * @param indent
+ * @param node
+ * @throws IOException
+ */
+ private void printXmlDecl(Out fout, int indent, Node node) throws IOException
+ {
+ if (indent + linelen < this.configuration.wraplen)
+ {
+ wraphere = linelen;
+ }
+
+ addC('<', linelen++);
+ addC('?', linelen++);
+ addC('x', linelen++);
+ addC('m', linelen++);
+ addC('l', linelen++);
+
+ printAttrs(fout, indent, node, node.attributes);
+
+ if (node.end <= 0 || node.textarray[node.end - 1] != '?') // #542029 -
fix by Terry Teague 10 Apr 02
+ {
+ addC('?', linelen++);
+ }
+
+ addC('>', linelen++);
+
+ condFlushLine(fout, indent);
+ fout.newline();
+ }
+
+ /**
+ * note ASP and JSTE share <% ... %> syntax.
+ * @param fout
+ * @param indent
+ * @param node
+ * @throws IOException
+ */
+ private void printAsp(Out fout, int indent, Node node) throws IOException
+ {
+ int savewraplen = this.configuration.wraplen;
+
+ // disable wrapping if so requested
+
+ if (!this.configuration.wrapAsp || !this.configuration.wrapJste)
+ {
+ this.configuration.wraplen = 0xFFFFFF; // a very large number
+ }
+
+ addC('<', linelen++);
+ addC('%', linelen++);
+
+ printText(fout, (this.configuration.wrapAsp ? CDATA : COMMENT), indent,
node.textarray, node.start, node.end);
+
+ addC('%', linelen++);
+ addC('>', linelen++);
+ /* condFlushLine(fout, indent); */
+ this.configuration.wraplen = savewraplen;
+ }
+
+ /**
+ * JSTE also supports <# ... #> syntax
+ * @param fout
+ * @param indent
+ * @param node
+ * @throws IOException
+ */
+ private void printJste(Out fout, int indent, Node node) throws IOException
+ {
+ int savewraplen = this.configuration.wraplen;
+
+ // disable wrapping if so requested
+
+ if (!this.configuration.wrapJste)
+ {
+ this.configuration.wraplen = 0xFFFFFF; // a very large number
+ }
+
+ addC('<', linelen++);
+ addC('#', linelen++);
+
+ printText(fout, (this.configuration.wrapJste ? CDATA : COMMENT), indent,
node.textarray, node.start, node.end);
+
+ addC('#', linelen++);
+ addC('>', linelen++);
+ // condFlushLine(fout, indent);
+ this.configuration.wraplen = savewraplen;
+ }
+
+ /**
+ * PHP is based on XML processing instructions.
+ * @param fout
+ * @param indent
+ * @param node
+ * @throws IOException
+ */
+ private void printPhp(Out fout, int indent, Node node) throws IOException
+ {
+ int savewraplen = this.configuration.wraplen;
+
+ // disable wrapping if so requested
+
+ if (!this.configuration.wrapPhp)
+ {
+ this.configuration.wraplen = 0xFFFFFF; // a very large number
+ }
+
+ addC('<', linelen++);
+ addC('?', linelen++);
+
+ printText(fout, (this.configuration.wrapPhp ? CDATA : COMMENT), indent,
node.textarray, node.start, node.end);
+
+ addC('?', linelen++);
+ addC('>', linelen++);
+ // PCondFlushLine(fout, indent);
+ this.configuration.wraplen = savewraplen;
+ }
+
+ /**
+ * @param fout
+ * @param indent
+ * @param node
+ * @throws IOException
+ */
+ private void printCDATA(Out fout, int indent, Node node) throws IOException
+ {
+ int savewraplen = this.configuration.wraplen;
+
+ if (!this.configuration.indentCdata)
+ {
+ indent = 0;
+ }
+
+ condFlushLine(fout, indent);
+
+ // disable wrapping
+ this.configuration.wraplen = 0xFFFFFF; // a very large number
+
+ addC('<', linelen++);
+ addC('!', linelen++);
+ addC('[', linelen++);
+ addC('C', linelen++);
+ addC('D', linelen++);
+ addC('A', linelen++);
+ addC('T', linelen++);
+ addC('A', linelen++);
+ addC('[', linelen++);
+
+ printText(fout, COMMENT, indent, node.textarray, node.start, node.end);
+
+ addC(']', linelen++);
+ addC(']', linelen++);
+ addC('>', linelen++);
+ condFlushLine(fout, indent);
+ this.configuration.wraplen = savewraplen;
+ }
+
+ /**
+ * @param fout
+ * @param indent
+ * @param node
+ * @throws IOException
+ */
+ private void printSection(Out fout, int indent, Node node) throws IOException
+ {
+ int savewraplen = this.configuration.wraplen;
+
+ // disable wrapping if so requested
+
+ if (!this.configuration.wrapSection)
+ {
+ this.configuration.wraplen = 0xFFFFFF; // a very large number
+ }
+
+ addC('<', linelen++);
+ addC('!', linelen++);
+ addC('[', linelen++);
+
+ printText(
+ fout,
+ (this.configuration.wrapSection ? CDATA : COMMENT),
+ indent,
+ node.textarray,
+ node.start,
+ node.end);
+
+ addC(']', linelen++);
+ addC('>', linelen++);
+ // PCondFlushLine(fout, indent);
+ this.configuration.wraplen = savewraplen;
+ }
+
+ /**
+ * Is the current node inside HEAD?
+ * @param node Node
+ * @return <code>true</code> if node is inside an HEAD tag
+ */
+ private boolean insideHead(Node node)
+ {
+ if (node.tag == this.configuration.tt.tagHead)
+ {
+ return true;
+ }
+
+ if (node.parent != null)
+ {
+ return insideHead(node.parent);
+ }
+ return false;
+ }
+
+ /**
+ * Is text node and already ends w/ a newline? Used to pretty print CDATA/PRE text
content. If it already ends on a
+ * newline, it is not necessary to print another before printing end tag.
+ * @param lexer Lexer
+ * @param node text node
+ * @return text indent
+ */
+ private int textEndsWithNewline(Lexer lexer, Node node)
+ {
+ if (node.type == Node.TEXT_NODE && node.end > node.start)
+ {
+ int ch, ix = node.end - 1;
+ // Skip non-newline whitespace
+ while (ix >= node.start
+ && TidyUtils.toBoolean(ch = (node.textarray[ix] & 0xff))
+ && (ch == ' ' || ch == '\t' || ch ==
'\r'))
+ {
+ --ix;
+ }
+
+ if (ix >= 0 && node.textarray[ix] == '\n')
+ {
+ return node.end - ix - 1; // #543262 tidy eats all memory
+ }
+ }
+ return -1;
+ }
+
+ /**
+ * Does the current node contain a CDATA section?
+ * @param lexer Lexer
+ * @param node Node
+ * @return <code>true</code> if node contains a CDATA section
+ */
+ static boolean hasCDATA(Lexer lexer, Node node)
+ {
+ // Scan forward through the textarray. Since the characters we're
+ // looking for are < 0x7f, we don't have to do any UTF-8 decoding.
+
+ if (node.type != Node.TEXT_NODE)
+ {
+ return false;
+ }
+
+ int len = node.end - node.start;
+ String start = TidyUtils.getString(node.textarray, node.start, len);
+
+ int indexOfCData = start.indexOf(CDATA_START);
+ return indexOfCData > -1 && indexOfCData <= len;
+ }
+
+ /**
+ * Print script and style elements. For XHTML, wrap the content as follows:
+ *
+ * <pre>
+ * JavaScript:
+ * //<![CDATA[
+ * content
+ * //]]>
+ * VBScript:
+ * '<![CDATA[
+ * content
+ * ']]>
+ * CSS:
+ * /*<![CDATA[* /
+ * content
+ * /*]]>* /
+ * other:
+ * <![CDATA[
+ * content
+ * ]]>
+ * </pre>
+ *
+ * @param fout
+ * @param mode
+ * @param indent
+ * @param lexer
+ * @param node
+ * @throws IOException
+ */
+ private void printScriptStyle(Out fout, short mode, int indent, Lexer lexer, Node
node) throws IOException
+ {
+ Node content;
+ String commentStart = DEFAULT_COMMENT_START;
+ String commentEnd = DEFAULT_COMMENT_END;
+ boolean hasCData = false;
+ int contentIndent = -1;
+
+ if (insideHead(node))
+ {
+ // flushLine(fout, indent);
+ }
+
+ indent = 0;
+
+ // start script
+ printTag(lexer, fout, mode, indent, node);
+ // flushLine(fout, indent); // extra newline
+
+ if ((lexer.configuration.xHTML || lexer.configuration.xmlOut )&&
node.content != null)
+ {
+ AttVal type = node.getAttrByName("type");
+ if (type != null)
+ {
+ if ("text/javascript".equalsIgnoreCase(type.value))
+ {
+ commentStart = JS_COMMENT_START;
+ commentEnd = JS_COMMENT_END;
+ }
+ else if ("text/css".equalsIgnoreCase(type.value))
+ {
+ commentStart = CSS_COMMENT_START;
+ commentEnd = CSS_COMMENT_END;
+ }
+ else if ("text/vbscript".equalsIgnoreCase(type.value))
+ {
+ commentStart = VB_COMMENT_START;
+ commentEnd = VB_COMMENT_END;
+ }
+ }
+
+ hasCData = hasCDATA(lexer, node.content);
+ if (!hasCData)
+ {
+ // disable wrapping
+ int savewraplen = lexer.configuration.wraplen;
+ lexer.configuration.wraplen = 0xFFFFFF; // a very large number
+
+ linelen = addAsciiString(commentStart, linelen);
+ linelen = addAsciiString(CDATA_START, linelen);
+ linelen = addAsciiString(commentEnd, linelen);
+ condFlushLine(fout, indent);
+ fout.newline();
+ // restore wrapping
+ lexer.configuration.wraplen = savewraplen;
+ }
+ }
+
+ for (content = node.content; content != null; content = content.next)
+ {
+ printTree(fout, (short) (mode | PREFORMATTED | NOWRAP | CDATA), 0, lexer,
content);
+
+ if (content.next == null)
+ {
+ contentIndent = textEndsWithNewline(lexer, content);
+ }
+
+ }
+
+ if (contentIndent < 0)
+ {
+ condFlushLine(fout, indent);
+ fout.newline();
+ contentIndent = 0;
+ }
+
+ if ((lexer.configuration.xHTML || lexer.configuration.xmlOut ) &&
node.content != null)
+ {
+ if (!hasCData)
+ {
+ // disable wrapping
+ int ix, savewraplen = lexer.configuration.wraplen;
+ lexer.configuration.wraplen = 0xFFFFFF; // a very large number
+
+ // Add spaces to last text node to align w/ indent
+ if (contentIndent > 0 && linelen < contentIndent)
+ {
+ linelen = contentIndent;
+ }
+ for (ix = 0; contentIndent < indent && ix < indent -
contentIndent; ++ix)
+ {
+ addC(' ', linelen++);
+ }
+
+ linelen = addAsciiString(commentStart, linelen);
+ linelen = addAsciiString(CDATA_END, linelen);
+ linelen = addAsciiString(commentEnd, linelen);
+
+ // restore wrapping
+ lexer.configuration.wraplen = savewraplen;
+ condFlushLine(fout, 0);
+ fout.newline();
+ }
+ }
+
+ printEndTag(mode, indent, node);
+
+ if (!lexer.configuration.indentContent && node.next != null
+
+ && !((node.tag != null && TidyUtils.toBoolean(node.tag.model
& Dict.CM_INLINE))
+
+ || node.type != Node.TEXT_NODE
+
+ ))
+ {
+ flushLine(fout, indent);
+ }
+
+ flushLine(fout, indent);
+ }
+
+ /**
+ * Should tidy indent the give tag?
+ * @param node actual node
+ * @return <code>true</code> if line should be indented
+ */
+ private boolean shouldIndent(Node node)
+ {
+ TagTable tt = this.configuration.tt;
+
+ if (!this.configuration.indentContent)
+ {
+ return false;
+ }
+
+ if (this.configuration.smartIndent)
+ {
+ if (node.content != null && TidyUtils.toBoolean(node.tag.model &
Dict.CM_NO_INDENT))
+ {
+ for (node = node.content; node != null; node = node.next)
+ {
+ if (node.tag != null && TidyUtils.toBoolean(node.tag.model
& Dict.CM_BLOCK))
+ {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ if (TidyUtils.toBoolean(node.tag.model & Dict.CM_HEADING))
+ {
+ return false;
+ }
+
+ if (node.tag == tt.tagP)
+ {
+ return false;
+ }
+
+ if (node.tag == tt.tagTitle)
+ {
+ return false;
+ }
+ }
+
+ if (TidyUtils.toBoolean(node.tag.model & (Dict.CM_FIELD | Dict.CM_OBJECT)))
+ {
+ return true;
+ }
+
+ if (node.tag == tt.tagMap)
+ {
+ return true;
+ }
+
+ return !TidyUtils.toBoolean(node.tag.model & Dict.CM_INLINE);
+ }
+
+ /**
+ * Print just the content of the body element. Useful when you want to reuse material
from other documents.
+ * @param fout
+ * @param lexer
+ * @param root
+ * @param xml
+ * @throws IOException
+ */
+ void printBody(Out fout, Lexer lexer, Node root, boolean xml) throws IOException
+ {
+ if (root == null)
+ {
+ return;
+ }
+
+ // Feature request #434940 - fix by Dave Raggett/Ignacio Vazquez-Abrams 21 Jun
01
+ // Sebastiano Vigna <vigna(a)dsi.unimi.it>
+ Node body = root.findBody(lexer.configuration.tt);
+
+ if (body != null)
+ {
+ Node content;
+ for (content = body.content; content != null; content = content.next)
+ {
+ if (xml)
+ {
+ printXMLTree(fout, (short) 0, 0, lexer, content);
+ }
+ else
+ {
+ printTree(fout, (short) 0, 0, lexer, content);
+ }
+ }
+ }
+ }
+
+ /**
+ * @param fout
+ * @param mode
+ * @param indent
+ * @param lexer
+ * @param node
+ * @throws IOException
+ */
+ public void printTree(Out fout, short mode, int indent, Lexer lexer, Node node)
throws IOException
+ {
+ Node content, last;
+ TagTable tt = this.configuration.tt;
+
+ if (node == null)
+ {
+ return;
+ }
+
+ if (node.type == Node.TEXT_NODE || (node.type == Node.CDATA_TAG &&
lexer.configuration.escapeCdata))
+ {
+ printText(fout, mode, indent, node.textarray, node.start, node.end);
+ } else if (node.type == Node.CDATA_TEXT )
+ {
+ printText(fout, CDATA, indent, node.textarray, node.start, node.end);
+ }
+ else if (node.type == Node.COMMENT_TAG)
+ {
+ printComment(fout, indent, node);
+ }
+ else if (node.type == Node.ROOT_NODE)
+ {
+ for (content = node.content; content != null; content = content.next)
+ {
+ printTree(fout, mode, indent, lexer, content);
+ }
+ }
+ else if (node.type == Node.DOCTYPE_TAG)
+ {
+ printDocType(fout, indent, lexer, node);
+ }
+ else if (node.type == Node.PROC_INS_TAG)
+ {
+ printPI(fout, indent, node);
+ }
+ else if (node.type == Node.XML_DECL)
+ {
+ printXmlDecl(fout, indent, node);
+ }
+ else if (node.type == Node.CDATA_TAG)
+ {
+ printCDATA(fout, indent, node);
+ }
+ else if (node.type == Node.SECTION_TAG)
+ {
+ printSection(fout, indent, node);
+ }
+ else if (node.type == Node.ASP_TAG)
+ {
+ printAsp(fout, indent, node);
+ }
+ else if (node.type == Node.JSTE_TAG)
+ {
+ printJste(fout, indent, node);
+ }
+ else if (node.type == Node.PHP_TAG)
+ {
+ printPhp(fout, indent, node);
+ }
+ else if (TidyUtils.toBoolean(node.tag.model & Dict.CM_EMPTY))
+// || (node.type == Node.START_END_TAG && !configuration.xHTML))
+ {
+ if (!TidyUtils.toBoolean(node.tag.model & Dict.CM_INLINE))
+ {
+ condFlushLine(fout, indent);
+ }
+
+ if (node.tag == tt.tagBr
+ && node.prev != null
+ && node.prev.tag != tt.tagBr
+ && this.configuration.breakBeforeBR)
+ {
+ flushLine(fout, indent);
+ }
+
+ if (this.configuration.makeClean && node.tag == tt.tagWbr)
+ {
+ printString(" ");
+ }
+ else
+ {
+ printTag(lexer, fout, mode, indent, node);
+ }
+
+ if (node.tag == tt.tagParam || node.tag == tt.tagArea)
+ {
+ condFlushLine(fout, indent);
+ }
+ else if (node.tag == tt.tagBr || node.tag == tt.tagHr)
+ {
+ flushLine(fout, indent);
+ }
+ }
+ else
+ {
+ if (node.type == Node.START_END_TAG)
+ {
+ node.type = Node.START_TAG;
+ }
+
+ // some kind of container element
+ if (node.tag != null && node.tag.getParser() == ParserImpl.PRE)
+ {
+ condFlushLine(fout, indent);
+
+ indent = 0;
+ condFlushLine(fout, indent);
+ printTag(lexer, fout, mode, indent, node);
+ flushLine(fout, indent);
+
+ for (content = node.content; content != null; content = content.next)
+ {
+ printTree(fout, (short) (mode | PREFORMATTED | NOWRAP), indent,
lexer, content);
+ }
+
+ condFlushLine(fout, indent);
+ printEndTag(mode, indent, node);
+ flushLine(fout, indent);
+
+ if (!this.configuration.indentContent && node.next != null)
+ {
+ flushLine(fout, indent);
+ }
+ }
+ else if (node.tag == tt.tagStyle || node.tag == tt.tagScript)
+ {
+ printScriptStyle(fout, (short) (mode | PREFORMATTED | NOWRAP | CDATA),
indent, lexer, node);
+ }
+ else if (TidyUtils.toBoolean(node.tag.model & Dict.CM_INLINE))
+ {
+ if (this.configuration.makeClean)
+ {
+ // discards <font> and </font> tags
+ if (node.tag == tt.tagFont)
+ {
+ for (content = node.content; content != null; content =
content.next)
+ {
+ printTree(fout, mode, indent, lexer, content);
+ }
+ return;
+ }
+
+ // replace <nobr> ... </nobr> by or  
etc.
+ if (node.tag == tt.tagNobr)
+ {
+ for (content = node.content; content != null; content =
content.next)
+ {
+ printTree(fout, (short) (mode | NOWRAP), indent, lexer,
content);
+ }
+ return;
+ }
+ }
+
+ // otherwise a normal inline element
+
+ printTag(lexer, fout, mode, indent, node);
+
+ // indent content for SELECT, TEXTAREA, MAP, OBJECT and APPLET
+
+ if (shouldIndent(node))
+ {
+ condFlushLine(fout, indent);
+ indent += this.configuration.spaces;
+
+ for (content = node.content; content != null; content =
content.next)
+ {
+ printTree(fout, mode, indent, lexer, content);
+ }
+
+ condFlushLine(fout, indent);
+ indent -= this.configuration.spaces;
+ condFlushLine(fout, indent);
+ }
+ else
+ {
+
+ for (content = node.content; content != null; content =
content.next)
+ {
+ printTree(fout, mode, indent, lexer, content);
+ }
+ }
+
+ printEndTag(mode, indent, node);
+ }
+ else
+ {
+ // other tags
+ condFlushLine(fout, indent);
+
+ if (this.configuration.smartIndent && node.prev != null)
+ {
+ flushLine(fout, indent);
+ }
+
+ // do not omit elements with attributes
+ if (!this.configuration.hideEndTags
+ || !(node.tag != null && TidyUtils.toBoolean(node.tag.model
& Dict.CM_OMITST))
+ || node.attributes != null)
+ {
+ printTag(lexer, fout, mode, indent, node);
+
+ if (shouldIndent(node))
+ {
+ condFlushLine(fout, indent);
+ }
+ else if (TidyUtils.toBoolean(node.tag.model & Dict.CM_HTML)
+ || node.tag == tt.tagNoframes
+ || (TidyUtils.toBoolean(node.tag.model & Dict.CM_HEAD)
&& !(node.tag == tt.tagTitle)))
+ {
+ flushLine(fout, indent);
+ }
+ }
+
+ if (node.tag == tt.tagBody && this.configuration.burstSlides)
+ {
+ printSlide(fout, mode, (this.configuration.indentContent
+ ? indent + this.configuration.spaces
+ : indent), lexer);
+ }
+ else
+ {
+ last = null;
+
+ for (content = node.content; content != null; content =
content.next)
+ {
+ // kludge for naked text before block level tag
+ if (last != null
+ && !this.configuration.indentContent
+ && last.type == Node.TEXT_NODE
+ && content.tag != null
+ && !TidyUtils.toBoolean(content.tag.model &
Dict.CM_INLINE))
+ {
+ flushLine(fout, indent);
+ }
+
+ printTree(
+ fout,
+ mode,
+ (shouldIndent(node) ? indent + this.configuration.spaces :
indent),
+ lexer,
+ content);
+
+ last = content;
+ }
+ }
+
+ // don't flush line for td and th
+ if (shouldIndent(node)
+ || ((TidyUtils.toBoolean(node.tag.model & Dict.CM_HTML) ||
node.tag == tt.tagNoframes || //
+ (TidyUtils.toBoolean(node.tag.model & Dict.CM_HEAD) &&
!(node.tag == tt.tagTitle))) && //
+ !this.configuration.hideEndTags))
+ {
+ condFlushLine(
+ fout,
+ (this.configuration.indentContent ? indent +
this.configuration.spaces : indent));
+
+ if (!this.configuration.hideEndTags ||
!TidyUtils.toBoolean(node.tag.model & Dict.CM_OPT))
+ {
+ printEndTag(mode, indent, node);
+
+ // #603128 tidy adds newslines after </html> tag
+ // Fix by Fabrizio Giustina 12-02-2004
+ // fix is different from the one in original tidy
+ if (!lexer.seenEndHtml)
+ {
+ flushLine(fout, indent);
+ }
+ }
+ }
+ else
+ {
+ if (!this.configuration.hideEndTags ||
!TidyUtils.toBoolean(node.tag.model & Dict.CM_OPT))
+ {
+ printEndTag(mode, indent, node);
+ }
+
+ flushLine(fout, indent);
+ }
+
+ // FG commented out: double newlines
+ // if (!this.configuration.indentContent
+ // && node.next != null
+ // && !this.configuration.hideEndTags
+ // && (node.tag.model
+ // & TidyUtils.toBoolean(Dict.CM_BLOCK | Dict.CM_TABLE |
Dict.CM_LIST | Dict.CM_DEFLIST)))
+ // {
+ // flushLine(fout, indent);
+ // }
+ }
+ }
+ }
+
+ /**
+ * @param fout
+ * @param mode
+ * @param indent
+ * @param lexer
+ * @param node
+ * @throws IOException
+ */
+ public void printXMLTree(Out fout, short mode, int indent, Lexer lexer, Node node)
throws IOException
+ {
+ TagTable tt = this.configuration.tt;
+
+ if (node == null)
+ {
+ return;
+ }
+
+ if (node.type == Node.TEXT_NODE || (node.type == Node.CDATA_TAG &&
lexer.configuration.escapeCdata))
+ {
+ printText(fout, mode, indent, node.textarray, node.start, node.end);
+ } else if (node.type == Node.CDATA_TEXT )
+ {
+ printText(fout, CDATA, indent, node.textarray, node.start, node.end);
+ }
+ else if (node.type == Node.COMMENT_TAG)
+ {
+ condFlushLine(fout, indent);
+ printComment(fout, 0, node);
+ condFlushLine(fout, 0);
+ }
+ else if (node.type == Node.ROOT_NODE)
+ {
+ Node content;
+
+ for (content = node.content; content != null; content = content.next)
+ {
+ printXMLTree(fout, mode, indent, lexer, content);
+ }
+ }
+ else if (node.type == Node.DOCTYPE_TAG)
+ {
+ printDocType(fout, indent, lexer, node);
+ }
+ else if (node.type == Node.PROC_INS_TAG)
+ {
+ printPI(fout, indent, node);
+ }
+ else if (node.type == Node.XML_DECL)
+ {
+ printXmlDecl(fout, indent, node);
+ }
+ else if (node.type == Node.CDATA_TAG)
+ {
+ printCDATA(fout, indent, node);
+ }
+ else if (node.type == Node.SECTION_TAG)
+ {
+ printSection(fout, indent, node);
+ }
+ else if (node.type == Node.ASP_TAG)
+ {
+ printAsp(fout, indent, node);
+ }
+ else if (node.type == Node.JSTE_TAG)
+ {
+ printJste(fout, indent, node);
+ }
+ else if (node.type == Node.PHP_TAG)
+ {
+ printPhp(fout, indent, node);
+ }
+ else if (TidyUtils.toBoolean(node.tag.model & Dict.CM_EMPTY)
+ || node.type == Node.START_END_TAG
+ && !configuration.xHTML)
+ {
+ condFlushLine(fout, indent);
+ printTag(lexer, fout, mode, indent, node);
+ // fgiust: Remove empty lines between tags in XML.
+ //flushLine(fout, indent);
+
+ // CPR: folks don't want so much vertical spacing in XML
+ // if (node.next != null) { flushLine(fout, indent); }
+
+ }
+ else
+ {
+ // some kind of container element
+ Node content;
+ boolean mixed = false;
+ int cindent;
+
+ for (content = node.content; content != null; content = content.next)
+ {
+ if (content.type == Node.TEXT_NODE)
+ {
+ mixed = true;
+ break;
+ }
+ }
+
+ condFlushLine(fout, indent);
+
+ if (ParserImpl.XMLPreserveWhiteSpace(node, tt))
+ {
+ indent = 0;
+ cindent = 0;
+ mixed = false;
+ }
+ else if (mixed)
+ {
+ cindent = indent;
+ }
+ else
+ {
+ cindent = indent + this.configuration.spaces;
+ }
+
+ printTag(lexer, fout, mode, indent, node);
+
+ if (!mixed && node.content != null)
+ {
+ flushLine(fout, indent);
+ }
+
+ for (content = node.content; content != null; content = content.next)
+ {
+ printXMLTree(fout, mode, cindent, lexer, content);
+ }
+
+ if (!mixed && node.content != null)
+ {
+ condFlushLine(fout, cindent);
+ }
+ printEndTag(mode, indent, node);
+ //condFlushLine(fout, indent);
+
+ // CPR: folks don't want so much vertical spacing in XML
+ // if (node.next != null) { flushLine(fout, indent); }
+
+ }
+ }
+
+ /**
+ * Split parse tree by h2 elements and output to separate files. Counts number of h2
children (if any) belonging to
+ * node.
+ * @param node root node
+ * @return number of slides (number of h2 elements)
+ */
+ public int countSlides(Node node)
+ {
+ // assume minimum of 1 slide
+ int n = 1;
+
+ TagTable tt = this.configuration.tt;
+
+ //fix for [431716] avoid empty slides
+ if (node != null && node.content != null && node.content.tag ==
tt.tagH2)
+ {
+ // "first" slide is empty, so ignore it
+ n--;
+ }
+
+ if (node != null)
+ {
+ for (node = node.content; node != null; node = node.next)
+ {
+ if (node.tag == tt.tagH2)
+ {
+ ++n;
+ }
+ }
+ }
+
+ return n;
+ }
+
+ /**
+ * @param fout
+ * @param indent
+ * @throws IOException
+ */
+ private void printNavBar(Out fout, int indent) throws IOException
+ {
+ String buf;
+
+ condFlushLine(fout, indent);
+ printString("<center><small>");
+
+ NumberFormat numberFormat = NumberFormat.getInstance();
+ numberFormat.setMinimumIntegerDigits(3);
+
+ if (slide > 1)
+ {
+ buf = "<a href=\"slide" + numberFormat.format(slide - 1) +
".html\">previous</a> | ";
+ // #427666 - fix by Eric Rossen 02 Aug 00
+ printString(buf);
+ condFlushLine(fout, indent);
+
+ if (slide < count)
+ {
+ printString("<a
href=\"slide001.html\">start</a> | ");
+ // #427666 - fix by Eric Rossen 02 Aug 00
+ }
+ else
+ {
+ printString("<a
href=\"slide001.html\">start</a>");
+ // #427666 - fix by Eric Rossen 02 Aug 00
+ }
+
+ condFlushLine(fout, indent);
+ }
+
+ if (slide < count)
+ {
+ buf = "<a href=\"slide" + numberFormat.format(slide + 1) +
".html\">next</a>";
+ // #427666 - fix by Eric Rossen 02 Aug 00
+ printString(buf);
+ }
+
+ printString("</small></center>");
+ condFlushLine(fout, indent);
+ }
+
+ /**
+ * Called from printTree to print the content of a slide from the node slidecontent.
On return slidecontent points
+ * to the node starting the next slide or null. The variables slide and count are
used to customise the navigation
+ * bar.
+ * @param fout
+ * @param mode
+ * @param indent
+ * @param lexer
+ * @throws IOException
+ */
+ public void printSlide(Out fout, short mode, int indent, Lexer lexer) throws
IOException
+ {
+ Node content, last;
+ TagTable tt = this.configuration.tt;
+
+ NumberFormat numberFormat = NumberFormat.getInstance();
+ numberFormat.setMinimumIntegerDigits(3);
+
+ /* insert div for onclick handler */
+ String s;
+ s = "<div onclick=\"document.location='slide"
+ + numberFormat.format(slide < count ? slide + 1 : 1)
+ + ".html'\">";
+ // #427666 - fix by Eric Rossen 02 Aug 00
+ printString(s);
+ condFlushLine(fout, indent);
+
+ /* first print the h2 element and navbar */
+ if (slidecontent != null && slidecontent.tag == tt.tagH2)
+ {
+ printNavBar(fout, indent);
+
+ /* now print an hr after h2 */
+
+ addC('<', linelen++);
+
+ addC(TidyUtils.foldCase('h', this.configuration.upperCaseTags,
this.configuration.xmlTags), linelen++);
+ addC(TidyUtils.foldCase('r', this.configuration.upperCaseTags,
this.configuration.xmlTags), linelen++);
+
+ if (this.configuration.xmlOut)
+ {
+ printString(" />");
+ }
+ else
+ {
+ addC('>', linelen++);
+ }
+
+ if (this.configuration.indentContent)
+ {
+ condFlushLine(fout, indent);
+ }
+
+ // PrintVertSpacer(fout, indent);
+
+ // condFlushLine(fout, indent);
+
+ // print the h2 element
+ printTree(
+ fout,
+ mode,
+ (this.configuration.indentContent ? indent + this.configuration.spaces :
indent),
+ lexer,
+ slidecontent);
+
+ slidecontent = slidecontent.next;
+ }
+
+ // now continue until we reach the next h2
+
+ last = null;
+ content = slidecontent;
+
+ for (; content != null; content = content.next)
+ {
+ if (content.tag == tt.tagH2)
+ {
+ break;
+ }
+
+ // kludge for naked text before block level tag
+ if (last != null
+ && !this.configuration.indentContent
+ && last.type == Node.TEXT_NODE
+ && content.tag != null
+ && TidyUtils.toBoolean(content.tag.model & Dict.CM_BLOCK))
+ {
+ flushLine(fout, indent);
+ flushLine(fout, indent);
+ }
+
+ printTree(
+ fout,
+ mode,
+ (this.configuration.indentContent ? indent + this.configuration.spaces :
indent),
+ lexer,
+ content);
+
+ last = content;
+ }
+
+ slidecontent = content;
+
+ // now print epilog
+
+ condFlushLine(fout, indent);
+
+ printString("<br clear=\"all\">");
+ condFlushLine(fout, indent);
+
+ addC('<', linelen++);
+
+ addC(TidyUtils.foldCase('h', this.configuration.upperCaseTags,
this.configuration.xmlTags), linelen++);
+ addC(TidyUtils.foldCase('r', this.configuration.upperCaseTags,
this.configuration.xmlTags), linelen++);
+
+ if (this.configuration.xmlOut)
+ {
+ printString(" />");
+ }
+ else
+ {
+ addC('>', linelen++);
+ }
+
+ if (this.configuration.indentContent)
+ {
+ condFlushLine(fout, indent);
+ }
+
+ printNavBar(fout, indent);
+
+ // end tag for div
+ printString("</div>");
+ condFlushLine(fout, indent);
+ }
+
+ /**
+ * Add meta element for page transition effect, this works on IE but not NS.
+ * @param lexer
+ * @param root
+ * @param duration
+ */
+ public void addTransitionEffect(Lexer lexer, Node root, double duration)
+ {
+ Node head = root.findHEAD(lexer.configuration.tt);
+ String transition;
+
+ transition = "blendTrans(Duration=" + (new Double(duration)).toString()
+ ")";
+
+ if (head != null)
+ {
+ Node meta = lexer.inferredTag("meta");
+ meta.addAttribute("http-equiv", "Page-Enter");
+ meta.addAttribute("content", transition);
+ head.insertNodeAtStart(meta);
+ }
+ }
+
+ /**
+ * Creates slides from h2.
+ * @param lexer Lexer
+ * @param root root node
+ */
+ public void createSlides(Lexer lexer, Node root)
+ {
+ Node body;
+ String buf;
+
+ NumberFormat numberFormat = NumberFormat.getInstance();
+ numberFormat.setMinimumIntegerDigits(3);
+
+ body = root.findBody(lexer.configuration.tt);
+ count = countSlides(body);
+ slidecontent = body.content;
+
+ addTransitionEffect(lexer, root, 3.0);
+
+ for (slide = 1; slide <= count; ++slide)
+ {
+ buf = "slide" + numberFormat.format(slide) + ".html";
+
+ try
+ {
+ FileOutputStream fis = new FileOutputStream(buf);
+ Out out = OutFactory.getOut(configuration, fis);
+
+ printTree(out, (short) 0, 0, lexer, root);
+ flushLine(out, 0);
+ out.close();
+ }
+ catch (IOException e)
+ {
+ log.error(buf + e.toString());
+ }
+ }
+
+ // delete superfluous slides by deleting slideN.html for N = count+1, count+2,
etc.
+ // until no such file is found.
+
+ // #427666 - fix by Eric Rossen 02 Aug 00
+ while ((new File("slide" + numberFormat.format(slide) +
".html")).delete())
+ {
+ ++slide;
+ }
+ }
+
+}
\ No newline at end of file
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/ParseProperty.java
===================================================================
---
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/ParseProperty.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/ParseProperty.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,94 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+/**
+ * Interface for configuration property parser.
+ * @author Fabrizio Giustina
+ * @version $Revision $ ($Author $)
+ */
+public interface ParseProperty
+{
+
+ /**
+ * Parse a configuration option.
+ * @param value option value
+ * @param option option name
+ * @param configuration actual configuration instance
+ * @return parsed configuration value
+ */
+ Object parse(String value, String option, Configuration configuration);
+
+ /**
+ * Returns the option type.
+ * @return option type
+ */
+ String getType();
+
+ /**
+ * Returns the valid values.
+ * @return valid values (text)
+ */
+ String getOptionValues();
+
+ /**
+ * Returns the "friendly name" for the passed value. Needed to print actual
configuration setting.
+ * @param option option name
+ * @param value actual value
+ * @param configuration actual configuration
+ * @return "friendly" actual value
+ */
+ String getFriendlyName(String option, Object value, Configuration configuration);
+
+}
\ No newline at end of file
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/ParsePropertyImpl.java
===================================================================
---
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/ParsePropertyImpl.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/ParsePropertyImpl.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,929 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+import java.util.Iterator;
+import java.util.List;
+import java.util.StringTokenizer;
+
+
+/**
+ * Property parser instances.
+ * @author Fabrizio Giustina
+ * @version $Revision $ ($Author $)
+ */
+public final class ParsePropertyImpl
+{
+
+ /**
+ * configuration parser for int values.
+ */
+ static final ParseProperty INT = new ParseInt();
+
+ /**
+ * configuration parser for boolean values.
+ */
+ static final ParseProperty BOOL = new ParseBoolean();
+
+ /**
+ * configuration parser for inverted boolean values.
+ */
+ static final ParseProperty INVBOOL = new ParseInvBoolean();
+
+ /**
+ * configuration parser for char encoding values.
+ */
+ static final ParseProperty CHAR_ENCODING = new ParseCharEncoding();
+
+ /**
+ * configuration parser for name values.
+ */
+ static final ParseProperty NAME = new ParseName();
+
+ /**
+ * configuration parser for tag names.
+ */
+ static final ParseProperty TAGNAMES = new ParseTagNames();
+
+ /**
+ * configuration parser for doctype property.
+ */
+ static final ParseProperty DOCTYPE = new ParseDocType();
+
+ /**
+ * configuration parser for repetated attribute property.
+ */
+ static final ParseProperty REPEATED_ATTRIBUTES = new ParseRepeatedAttribute();
+
+ /**
+ * configuration parser for String values.
+ */
+ static final ParseProperty STRING = new ParseString();
+
+ /**
+ * configuration parser for indent property.
+ */
+ static final ParseProperty INDENT = new ParseIndent();
+
+ /**
+ * configuration parser for css selectors.
+ */
+ static final ParseProperty CSS1SELECTOR = new ParseCSS1Selector();
+
+ /**
+ * configuration parser for new line bytes.
+ */
+ static final ParseProperty NEWLINE = new ParseNewLine();
+
+ /**
+ * don't instantiate.
+ */
+ private ParsePropertyImpl()
+ {
+ // unused
+ }
+
+ /**
+ * parser for integer values.
+ */
+ static class ParseInt implements ParseProperty
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#parse(java.lang.String,
java.lang.String, org.ajax4jsf.org.w3c.tidy.Configuration)
+ */
+ public Object parse(String value, String option, Configuration configuration)
+ {
+ int i = 0;
+ try
+ {
+ i = Integer.parseInt(value);
+ }
+ catch (NumberFormatException e)
+ {
+ configuration.report.badArgument(value, option);
+ i = -1;
+ }
+ return new Integer(i);
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#getType()
+ */
+ public String getType()
+ {
+ return "Integer";
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#getOptionValues()
+ */
+ public String getOptionValues()
+ {
+ return "0, 1, 2, ...";
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String,
java.lang.Object, Configuration)
+ */
+ public String getFriendlyName(String option, Object value, Configuration
configuration)
+ {
+ return value == null ? "" : value.toString();
+ }
+ }
+
+ /**
+ * parser for boolean values.
+ */
+ static class ParseBoolean implements ParseProperty
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#parse(java.lang.String,
java.lang.String, org.ajax4jsf.org.w3c.tidy.Configuration)
+ */
+ public Object parse(String value, String option, Configuration configuration)
+ {
+ Boolean b = Boolean.TRUE;
+ if (value != null && value.length() > 0)
+ {
+ char c = value.charAt(0);
+ if ((c == 't') || (c == 'T') || (c == 'Y') || (c
== 'y') || (c == '1'))
+ {
+ b = Boolean.TRUE;
+ }
+ else if ((c == 'f') || (c == 'F') || (c == 'N')
|| (c == 'n') || (c == '0'))
+ {
+ b = Boolean.FALSE;
+ }
+ else
+ {
+ configuration.report.badArgument(value, option);
+ }
+ }
+ return b;
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#getType()
+ */
+ public String getType()
+ {
+ return "Boolean";
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#getOptionValues()
+ */
+ public String getOptionValues()
+ {
+ return "y/n, yes/no, t/f, true/false, 1/0";
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String,
java.lang.Object, Configuration)
+ */
+ public String getFriendlyName(String option, Object value, Configuration
configuration)
+ {
+ if (value == null)
+ {
+ return "";
+ }
+
+ return ((Boolean) value).booleanValue() ? "yes" : "no";
+ }
+ }
+
+ /**
+ * parser for boolean values.
+ */
+ static class ParseInvBoolean implements ParseProperty
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#parse(java.lang.String,
java.lang.String, org.ajax4jsf.org.w3c.tidy.Configuration)
+ */
+ public Object parse(String value, String option, Configuration configuration)
+ {
+ return (((Boolean) BOOL.parse(value, option, configuration)).booleanValue() ?
Boolean.FALSE : Boolean.TRUE);
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#getType()
+ */
+ public String getType()
+ {
+ return "Boolean";
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#getOptionValues()
+ */
+ public String getOptionValues()
+ {
+ return "yes, no, true, false";
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String,
java.lang.Object, Configuration)
+ */
+ public String getFriendlyName(String option, Object value, Configuration
configuration)
+ {
+ if (value == null)
+ {
+ return "";
+ }
+
+ return ((Boolean) value).booleanValue() ? "no" : "yes";
+ }
+ }
+
+ /**
+ * parse character encoding option. Can be RAW, ASCII, LATIN1, UTF8, ISO2022,
MACROMAN, UTF16LE, UTF16BE, UTF16,
+ * WIN1252, BIG5, SHIFTJIS
+ */
+ static class ParseCharEncoding implements ParseProperty
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#parse(java.lang.String,
java.lang.String, org.ajax4jsf.org.w3c.tidy.Configuration)
+ */
+ public Object parse(String value, String option, Configuration configuration)
+ {
+
+ if ("raw".equalsIgnoreCase(value))
+ {
+ // special value for compatibility with tidy c
+ configuration.rawOut = true;
+ }
+ else if (!TidyUtils.isCharEncodingSupported(value))
+ {
+ configuration.report.badArgument(value, option);
+ }
+ else if ("input-encoding".equalsIgnoreCase(option))
+ {
+ configuration.setInCharEncodingName(value);
+ }
+ else if ("output-encoding".equalsIgnoreCase(option))
+ {
+ configuration.setOutCharEncodingName(value);
+ }
+ else if ("char-encoding".equalsIgnoreCase(option))
+ {
+ configuration.setInCharEncodingName(value);
+ configuration.setOutCharEncodingName(value);
+ }
+
+ return null;
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#getType()
+ */
+ public String getType()
+ {
+ return "Encoding";
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#getOptionValues()
+ */
+ public String getOptionValues()
+ {
+ // ascii, latin1, raw, utf-8, iso2022, mac, utf-16, utf-16be, utf-16le, big5,
shiftjis
+ return "Any valid java char encoding name";
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String,
java.lang.Object, Configuration)
+ */
+ public String getFriendlyName(String option, Object value, Configuration
configuration)
+ {
+ if ("output-encoding".equalsIgnoreCase(option))
+ {
+ return configuration.getOutCharEncodingName();
+ }
+
+ // for input-encoding or char-encoding
+ return configuration.getInCharEncodingName();
+ }
+ }
+
+ /**
+ * parser for name values (a string excluding whitespace).
+ */
+ static class ParseName implements ParseProperty
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#parse(java.lang.String,
java.lang.String, org.ajax4jsf.org.w3c.tidy.Configuration)
+ */
+ public Object parse(String value, String option, Configuration configuration)
+ {
+ StringTokenizer t = new StringTokenizer(value);
+ String rs = null;
+ if (t.countTokens() >= 1)
+ {
+ rs = t.nextToken();
+ }
+ else
+ {
+ configuration.report.badArgument(value, option);
+ }
+ return rs;
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#getType()
+ */
+ public String getType()
+ {
+ return "Name";
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#getOptionValues()
+ */
+ public String getOptionValues()
+ {
+ return "-";
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String,
java.lang.Object, Configuration)
+ */
+ public String getFriendlyName(String option, Object value, Configuration
configuration)
+ {
+ return value == null ? "" : value.toString();
+ }
+ }
+
+ /**
+ * parser for name values.
+ */
+ static class ParseTagNames implements ParseProperty
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#parse(java.lang.String,
java.lang.String, org.ajax4jsf.org.w3c.tidy.Configuration)
+ */
+ public Object parse(String value, String option, Configuration configuration)
+ {
+ short tagType = Dict.TAGTYPE_INLINE;
+
+ if ("new-inline-tags".equals(option))
+ {
+ tagType = Dict.TAGTYPE_INLINE;
+ }
+ else if ("new-blocklevel-tags".equals(option))
+ {
+ tagType = Dict.TAGTYPE_BLOCK;
+ }
+ else if ("new-empty-tags".equals(option))
+ {
+ tagType = Dict.TAGTYPE_EMPTY;
+ }
+ else if ("new-pre-tags".equals(option))
+ {
+ tagType = Dict.TAGTYPE_PRE;
+ }
+
+ StringTokenizer t = new StringTokenizer(value, " \t\n\r,");
+ while (t.hasMoreTokens())
+ {
+ configuration.definedTags |= tagType;
+ configuration.tt.defineTag(tagType, t.nextToken());
+ }
+ return null;
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#getType()
+ */
+ public String getType()
+ {
+ return "Tag names";
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#getOptionValues()
+ */
+ public String getOptionValues()
+ {
+ return "tagX, tagY, ...";
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String,
java.lang.Object, Configuration)
+ */
+ public String getFriendlyName(String option, Object value, Configuration
configuration)
+ {
+ short tagType;
+ if ("new-inline-tags".equals(option))
+ {
+ tagType = Dict.TAGTYPE_INLINE;
+ }
+ else if ("new-blocklevel-tags".equals(option))
+ {
+ tagType = Dict.TAGTYPE_BLOCK;
+ }
+ else if ("new-empty-tags".equals(option))
+ {
+ tagType = Dict.TAGTYPE_EMPTY;
+ }
+ else if ("new-pre-tags".equals(option))
+ {
+ tagType = Dict.TAGTYPE_PRE;
+ }
+ else
+ {
+ return "";
+ }
+
+ List<String> tagList = configuration.tt.findAllDefinedTag(tagType);
+ if (tagList.isEmpty())
+ {
+ return "";
+ }
+
+ StringBuffer buffer = new StringBuffer();
+ Iterator<String> iterator = tagList.iterator();
+ while (iterator.hasNext())
+ {
+ buffer.append(iterator.next());
+ buffer.append(" ");
+ }
+
+ return buffer.toString();
+ }
+ }
+
+ /**
+ * Parse doctype preference. doctype: <code>omit | auto | strict | loose |
[fpi]</code> where the fpi is a string
+ * similar to <code>"-//ACME//DTD HTML 3.14159//EN"</code>.
+ */
+ static class ParseDocType implements ParseProperty
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#parse(java.lang.String,
java.lang.String, org.ajax4jsf.org.w3c.tidy.Configuration)
+ */
+ public Object parse(String value, String option, Configuration configuration)
+ {
+ value = value.trim();
+
+ /* "-//ACME//DTD HTML 3.14159//EN" or similar */
+
+ if (value.startsWith("\""))
+ {
+ configuration.docTypeMode = Configuration.DOCTYPE_USER;
+ return value;
+ }
+
+ /* read first word */
+ String word = "";
+ StringTokenizer t = new StringTokenizer(value, " \t\n\r,");
+ if (t.hasMoreTokens())
+ {
+ word = t.nextToken();
+ }
+ // #443663 - fix by Terry Teague 23 Jul 01
+ if ("auto".equalsIgnoreCase(word))
+ {
+ configuration.docTypeMode = Configuration.DOCTYPE_AUTO;
+ }
+ else if ("omit".equalsIgnoreCase(word))
+ {
+ configuration.docTypeMode = Configuration.DOCTYPE_OMIT;
+ }
+ else if ("strict".equalsIgnoreCase(word))
+ {
+ configuration.docTypeMode = Configuration.DOCTYPE_STRICT;
+ }
+ else if ("loose".equalsIgnoreCase(word) ||
"transitional".equalsIgnoreCase(word))
+ {
+ configuration.docTypeMode = Configuration.DOCTYPE_LOOSE;
+ }
+ else if ("ignore".equalsIgnoreCase(word))
+ {
+ configuration.docTypeMode = Configuration.DOCTYPE_IGNORE;
+ }
+ else
+ {
+ configuration.report.badArgument(value, option);
+ }
+ return null;
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#getType()
+ */
+ public String getType()
+ {
+ return "DocType";
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#getOptionValues()
+ */
+ public String getOptionValues()
+ {
+ return "omit | auto | strict | loose | ignore | [fpi]";
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String,
java.lang.Object, Configuration)
+ */
+ public String getFriendlyName(String option, Object value, Configuration
configuration)
+ {
+
+ String stringValue;
+
+ switch (configuration.docTypeMode)
+ {
+ case Configuration.DOCTYPE_AUTO :
+ stringValue = "auto";
+ break;
+
+ case Configuration.DOCTYPE_OMIT :
+ stringValue = "omit";
+ break;
+
+ case Configuration.DOCTYPE_STRICT :
+ stringValue = "strict";
+ break;
+
+ case Configuration.DOCTYPE_LOOSE :
+ stringValue = "transitional";
+ break;
+
+ case Configuration.DOCTYPE_IGNORE :
+ stringValue = "ignore";
+ break;
+
+ case Configuration.DOCTYPE_USER :
+ stringValue = configuration.docTypeStr;
+ break;
+
+ default :
+ stringValue = "unknown";
+ break;
+ }
+
+ return stringValue;
+ }
+ }
+
+ /**
+ * keep-first or keep-last?
+ */
+ static class ParseRepeatedAttribute implements ParseProperty
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#parse(java.lang.String,
java.lang.String, org.ajax4jsf.org.w3c.tidy.Configuration)
+ */
+ public Object parse(String value, String option, Configuration configuration)
+ {
+ int dupAttr;
+
+ if ("keep-first".equalsIgnoreCase(value))
+ {
+ dupAttr = Configuration.KEEP_FIRST;
+ }
+ else if ("keep-last".equalsIgnoreCase(value))
+ {
+ dupAttr = Configuration.KEEP_LAST;
+ }
+ else
+ {
+ configuration.report.badArgument(value, option);
+ dupAttr = -1;
+ }
+ return new Integer(dupAttr);
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#getType()
+ */
+ public String getType()
+ {
+ return "Enum";
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#getOptionValues()
+ */
+ public String getOptionValues()
+ {
+ return "keep-first, keep-last";
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String,
java.lang.Object, Configuration)
+ */
+ public String getFriendlyName(String option, Object value, Configuration
configuration)
+ {
+ if (value == null)
+ {
+ return "";
+ }
+
+ int intValue = ((Integer) value).intValue();
+ String stringValue;
+
+ switch (intValue)
+ {
+ case Configuration.KEEP_FIRST :
+ stringValue = "keep-first";
+ break;
+
+ case Configuration.KEEP_LAST :
+ stringValue = "keep-last";
+ break;
+
+ default :
+ stringValue = "unknown";
+ break;
+ }
+
+ return stringValue;
+ }
+ }
+
+ /**
+ * Parser for String values.
+ */
+ static class ParseString implements ParseProperty
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#parse(java.lang.String,
java.lang.String, org.ajax4jsf.org.w3c.tidy.Configuration)
+ */
+ public Object parse(String value, String option, Configuration configuration)
+ {
+ return value;
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#getType()
+ */
+ public String getType()
+ {
+ return "String";
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#getOptionValues()
+ */
+ public String getOptionValues()
+ {
+ return "-";
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String,
java.lang.Object, Configuration)
+ */
+ public String getFriendlyName(String option, Object value, Configuration
configuration)
+ {
+ return value == null ? "" : (String) value;
+ }
+ }
+
+ /**
+ * Parser for indent values.
+ */
+ static class ParseIndent implements ParseProperty
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#parse(java.lang.String,
java.lang.String, org.ajax4jsf.org.w3c.tidy.Configuration)
+ */
+ public Object parse(String value, String option, Configuration configuration)
+ {
+ boolean b = configuration.indentContent;
+
+ if ("yes".equalsIgnoreCase(value))
+ {
+ b = true;
+ configuration.smartIndent = false;
+ }
+ else if ("true".equalsIgnoreCase(value))
+ {
+ b = true;
+ configuration.smartIndent = false;
+ }
+ else if ("no".equalsIgnoreCase(value))
+ {
+ b = false;
+ configuration.smartIndent = false;
+ }
+ else if ("false".equalsIgnoreCase(value))
+ {
+ b = false;
+ configuration.smartIndent = false;
+ }
+ else if ("auto".equalsIgnoreCase(value))
+ {
+ b = true;
+ configuration.smartIndent = true;
+ }
+ else
+ {
+ configuration.report.badArgument(value, option);
+ }
+ return b ? Boolean.TRUE : Boolean.FALSE;
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#getType()
+ */
+ public String getType()
+ {
+ return "Indent";
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#getOptionValues()
+ */
+ public String getOptionValues()
+ {
+ return "auto, y/n, yes/no, t/f, true/false, 1/0";
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String,
java.lang.Object, Configuration)
+ */
+ public String getFriendlyName(String option, Object value, Configuration
configuration)
+ {
+ return value == null ? "" : value.toString();
+ }
+ }
+
+ /**
+ * Parser for css selectors.
+ */
+ static class ParseCSS1Selector implements ParseProperty
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#parse(java.lang.String,
java.lang.String, org.ajax4jsf.org.w3c.tidy.Configuration)
+ */
+ public Object parse(String value, String option, Configuration configuration)
+ {
+ StringTokenizer t = new StringTokenizer(value);
+ String buf = null;
+ if (t.countTokens() >= 1)
+ {
+ buf = t.nextToken() + "-"; // Make sure any escaped Unicode is
terminated so valid class names are
+ // generated after Tidy appends last digits.
+ }
+ else
+ {
+ configuration.report.badArgument(value, option);
+ }
+
+ if (!Lexer.isCSS1Selector(value))
+ {
+ configuration.report.badArgument(value, option);
+ }
+
+ return buf;
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#getType()
+ */
+ public String getType()
+ {
+ return "Name";
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#getOptionValues()
+ */
+ public String getOptionValues()
+ {
+ return "CSS1 selector";
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String,
java.lang.Object, Configuration)
+ */
+ public String getFriendlyName(String option, Object value, Configuration
configuration)
+ {
+ return value == null ? "" : (String) value;
+ }
+ }
+
+ /**
+ * Parser for newline bytes. Allows lf|crlf|cr.
+ */
+ static class ParseNewLine implements ParseProperty
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#parse(java.lang.String,
java.lang.String, org.ajax4jsf.org.w3c.tidy.Configuration)
+ */
+ public Object parse(String value, String option, Configuration configuration)
+ {
+ // lf|crlf|cr
+ if ("lf".equalsIgnoreCase(value))
+ {
+ configuration.newline = new char[]{'\n'};
+ }
+ else if ("cr".equalsIgnoreCase(value))
+ {
+ configuration.newline = new char[]{'\r'};
+ }
+ else if ("crlf".equalsIgnoreCase(value))
+ {
+ configuration.newline = new char[]{'\r', '\n'};
+ }
+ else
+ {
+ configuration.report.badArgument(value, option);
+ }
+ return null;
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#getType()
+ */
+ public String getType()
+ {
+ return "Enum";
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#getOptionValues()
+ */
+ public String getOptionValues()
+ {
+ return "lf, crlf, cr";
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String,
java.lang.Object, Configuration)
+ */
+ public String getFriendlyName(String option, Object value, Configuration
configuration)
+ {
+ if (configuration.newline.length == 1)
+ {
+ return (configuration.newline[0] == '\n') ? "lf" :
"cr";
+ }
+ return "crlf";
+ }
+ }
+
+}
\ No newline at end of file
Added: branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Parser.java
===================================================================
--- branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Parser.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Parser.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,74 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+/**
+ * HTML Parser.
+ * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org
</a>
+ * @author Andy Quick <a
href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a>
(translation to Java)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public interface Parser
+{
+
+ /**
+ * Parse the given node.
+ * @param lexer Lexer
+ * @param node node created by the lexer upon seeing the start tag, or by the parser
when the start tag is inferred
+ * @param mode content mode
+ */
+ void parse(Lexer lexer, Node node, short mode);
+
+}
\ No newline at end of file
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/ParserImpl.java
===================================================================
---
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/ParserImpl.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/ParserImpl.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,3624 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+/**
+ * HTML Parser implementation.
+ * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org
</a>
+ * @author Andy Quick <a
href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a>
(translation to Java)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public final class ParserImpl
+{
+
+ /**
+ * parser for html.
+ */
+ public static final Parser HTML = new ParseHTML();
+
+ /**
+ * parser for head.
+ */
+ public static final Parser HEAD = new ParseHead();
+
+ /**
+ * parser for title.
+ */
+ public static final Parser TITLE = new ParseTitle();
+
+ /**
+ * parser for script.
+ */
+ public static final Parser SCRIPT = new ParseScript();
+
+ /**
+ * parser for body.
+ */
+ public static final Parser BODY = new ParseBody();
+
+ /**
+ * parser for frameset.
+ */
+ public static final Parser FRAMESET = new ParseFrameSet();
+
+ /**
+ * parser for inline.
+ */
+ public static final Parser INLINE = new ParseInline();
+
+ /**
+ * parser for list.
+ */
+ public static final Parser LIST = new ParseList();
+
+ /**
+ * parser for definition lists.
+ */
+ public static final Parser DEFLIST = new ParseDefList();
+
+ /**
+ * parser for pre.
+ */
+ public static final Parser PRE = new ParsePre();
+
+ /**
+ * parser for block elements.
+ */
+ public static final Parser BLOCK = new ParseBlock();
+
+ /**
+ * parser for table.
+ */
+ public static final Parser TABLETAG = new ParseTableTag();
+
+ /**
+ * parser for colgroup.
+ */
+ public static final Parser COLGROUP = new ParseColGroup();
+
+ /**
+ * parser for rowgroup.
+ */
+ public static final Parser ROWGROUP = new ParseRowGroup();
+
+ /**
+ * parser for row.
+ */
+ public static final Parser ROW = new ParseRow();
+
+ /**
+ * parser for noframes.
+ */
+ public static final Parser NOFRAMES = new ParseNoFrames();
+
+ /**
+ * parser for select.
+ */
+ public static final Parser SELECT = new ParseSelect();
+
+ /**
+ * parser for text.
+ */
+ public static final Parser TEXT = new ParseText();
+
+ /**
+ * parser for empty elements.
+ */
+ public static final Parser EMPTY = new ParseEmpty();
+
+ /**
+ * parser for optgroup.
+ */
+ public static final Parser OPTGROUP = new ParseOptGroup();
+
+ /**
+ * ParserImpl should not be instantiated.
+ */
+ private ParserImpl()
+ {
+ // unused
+ }
+
+ /**
+ * @param lexer
+ * @param node
+ * @param mode
+ */
+ protected static void parseTag(Lexer lexer, Node node, short mode)
+ {
+ // Fix by GLP 2000-12-21. Need to reset insertspace if this
+ // is both a non-inline and empty tag (base, link, meta, isindex, hr, area).
+ if ((node.tag.model & Dict.CM_EMPTY) != 0)
+ {
+ lexer.waswhite = false;
+ }
+ else if (!((node.tag.model & Dict.CM_INLINE) != 0))
+ {
+ lexer.insertspace = false;
+ }
+
+ if (node.tag.getParser() == null)
+ {
+ return;
+ }
+
+ if (node.type == Node.START_END_TAG)
+ {
+ Node.trimEmptyElement(lexer, node);
+ return;
+ }
+
+ node.tag.getParser().parse(lexer, node, mode);
+ }
+
+ /**
+ * Move node to the head, where element is used as starting point in hunt for head.
Normally called during parsing.
+ * @param lexer
+ * @param element
+ * @param node
+ */
+ protected static void moveToHead(Lexer lexer, Node element, Node node)
+ {
+ Node head;
+ node.removeNode(); // make sure that node is isolated
+
+ TagTable tt = lexer.configuration.tt;
+
+ if (node.type == Node.START_TAG || node.type == Node.START_END_TAG) {
+ lexer.report.warning(lexer, element, node,
+ Report.TAG_NOT_ALLOWED_IN);
+// Parse node
+ if (node.tag.getParser() != null) {
+ parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
+ }
+
+ if (lexer.configuration.moveElements) {
+
+ while (element.tag != tt.tagHtml) {
+ element = element.parent;
+ }
+
+ for (head = element.content; head != null; head = head.next) {
+ if (head.tag == tt.tagHead) {
+ head.insertNodeAtEnd(node);
+ break;
+ }
+ }
+ } else {
+ element.insertNodeAtEnd(node);
+ }
+
+ } else {
+ lexer.report.warning(lexer, element, node,
+ Report.DISCARDING_UNEXPECTED);
+ }
+ }
+
+ /**
+ * moves given node to end of body element.
+ * @param lexer Lexer
+ * @param node Node to insert
+ */
+ static void moveNodeToBody(Lexer lexer, Node node)
+ {
+ if (lexer.configuration.moveElements) {
+ node.removeNode();
+ Node body = lexer.root.findBody(lexer.configuration.tt);
+ body.insertNodeAtEnd(node);
+ }
+ }
+
+ /**
+ * Parser for HTML.
+ */
+ public static class ParseHTML implements Parser
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.Parser#parse(org.ajax4jsf.org.w3c.tidy.Lexer,
org.ajax4jsf.org.w3c.tidy.Node, short)
+ */
+ public void parse(Lexer lexer, Node html, short mode)
+ {
+ Node node, head;
+ Node frameset = null;
+ Node noframes = null;
+
+ lexer.configuration.xmlTags = false;
+ lexer.seenEndBody = false;
+ TagTable tt = lexer.configuration.tt;
+
+ while (true)
+ {
+ node = lexer.getToken(Lexer.IGNORE_WHITESPACE);
+
+ if (node == null)
+ {
+ node = lexer.inferredTag("head");
+ break;
+ }
+
+ if (node.tag == tt.tagHead)
+ {
+ break;
+ }
+
+ if (node.tag == html.tag && node.type == Node.END_TAG)
+ {
+ lexer.report.warning(lexer, html, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ // deal with comments etc.
+ if (Node.insertMisc(html, node))
+ {
+ continue;
+ }
+
+ lexer.ungetToken();
+ node = lexer.inferredTag("head");
+ break;
+ }
+
+ head = node;
+ html.insertNodeAtEnd(head);
+ HEAD.parse(lexer, head, mode);
+
+ while (true)
+ {
+ node = lexer.getToken(Lexer.IGNORE_WHITESPACE);
+
+ if (node == null)
+ {
+ if (frameset == null)
+ {
+ // implied body
+ node = lexer.inferredTag("body");
+ html.insertNodeAtEnd(node);
+ BODY.parse(lexer, node, mode);
+ }
+
+ return;
+ }
+
+ // robustly handle html tags
+ if (node.tag == html.tag)
+ {
+ if (node.type != Node.START_TAG && frameset == null)
+ {
+ lexer.report.warning(lexer, html, node,
Report.DISCARDING_UNEXPECTED);
+ }
+ else if (node.type == Node.END_TAG)
+ {
+ lexer.seenEndHtml = true;
+ }
+
+ continue;
+ }
+
+ // deal with comments etc.
+ if (Node.insertMisc(html, node))
+ {
+ continue;
+ }
+
+ // if frameset document coerce <body> to <noframes>
+ if (node.tag == tt.tagBody)
+ {
+ if (node.type != Node.START_TAG)
+ {
+ lexer.report.warning(lexer, html, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if (frameset != null)
+ {
+ lexer.ungetToken();
+
+ if (noframes == null)
+ {
+ noframes = lexer.inferredTag("noframes");
+ frameset.insertNodeAtEnd(noframes);
+ lexer.report.warning(lexer, html, noframes,
Report.INSERTING_TAG);
+ }
+
+ parseTag(lexer, noframes, mode);
+ continue;
+ }
+
+ lexer.constrainVersion(~Dict.VERS_FRAMESET);
+ break; // to parse body
+ }
+
+ // flag an error if we see more than one frameset
+ if (node.tag == tt.tagFrameset)
+ {
+ if (node.type != Node.START_TAG)
+ {
+ lexer.report.warning(lexer, html, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if (frameset != null)
+ {
+ lexer.report.error(lexer, html, node,
Report.DUPLICATE_FRAMESET);
+ }
+ else
+ {
+ frameset = node;
+ }
+
+ html.insertNodeAtEnd(node);
+ parseTag(lexer, node, mode);
+
+ // see if it includes a noframes element so that we can merge
subsequent noframes elements
+
+ for (node = frameset.content; node != null; node = node.next)
+ {
+ if (node.tag == tt.tagNoframes)
+ {
+ noframes = node;
+ }
+ }
+ continue;
+ }
+
+ // if not a frameset document coerce <noframes> to <body>
+ if (node.tag == tt.tagNoframes)
+ {
+ if (node.type != Node.START_TAG)
+ {
+ lexer.report.warning(lexer, html, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if (frameset == null)
+ {
+ lexer.report.warning(lexer, html, node,
Report.DISCARDING_UNEXPECTED);
+ node = lexer.inferredTag("body");
+ break;
+ }
+
+ if (noframes == null)
+ {
+ noframes = node;
+ frameset.insertNodeAtEnd(noframes);
+ }
+
+ parseTag(lexer, noframes, mode);
+ continue;
+ }
+
+ if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
+ {
+ if (node.tag != null && (node.tag.model & Dict.CM_HEAD)
!= 0)
+ {
+ moveToHead(lexer, html, node);
+ continue;
+ }
+
+ // #427675 - discard illegal frame element following a frameset - fix
by Randy Waki 11 Oct 00
+ if (frameset != null && node.tag == tt.tagFrame)
+ {
+ lexer.report.warning(lexer, html, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+ }
+
+ lexer.ungetToken();
+
+ // insert other content into noframes element
+ if (frameset != null)
+ {
+ if (noframes == null)
+ {
+ noframes = lexer.inferredTag("noframes");
+ frameset.insertNodeAtEnd(noframes);
+ }
+ else
+ {
+ lexer.report.warning(lexer, html, node,
Report.NOFRAMES_CONTENT);
+ }
+
+ lexer.constrainVersion(Dict.VERS_FRAMESET);
+ parseTag(lexer, noframes, mode);
+ continue;
+ }
+
+ node = lexer.inferredTag("body");
+ lexer.constrainVersion(~Dict.VERS_FRAMESET);
+ break;
+ }
+
+ // node must be body
+ html.insertNodeAtEnd(node);
+ parseTag(lexer, node, mode);
+ lexer.seenEndHtml = true;
+ }
+
+ }
+
+ /**
+ * Parser for HEAD.
+ */
+ public static class ParseHead implements Parser
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.Parser#parse(org.ajax4jsf.org.w3c.tidy.Lexer,
org.ajax4jsf.org.w3c.tidy.Node, short)
+ */
+ public void parse(Lexer lexer, Node head, short mode)
+ {
+ Node node;
+ int hasTitle = 0;
+ int hasBase = 0;
+ TagTable tt = lexer.configuration.tt;
+
+ while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
+ {
+ if (node.tag == head.tag && node.type == Node.END_TAG)
+ {
+ head.closed = true;
+ break;
+ }
+
+ if (node.type == Node.TEXT_NODE)
+ {
+ lexer.report.warning(lexer, head, node, Report.TAG_NOT_ALLOWED_IN);
+ lexer.ungetToken();
+ break;
+ }
+
+ // deal with comments etc.
+ if (Node.insertMisc(head, node))
+ {
+ continue;
+ }
+
+ if (node.type == Node.DOCTYPE_TAG)
+ {
+ Node.insertDocType(lexer, head, node);
+ continue;
+ }
+
+ // discard unknown tags
+ if (node.tag == null)
+ {
+ lexer.report.warning(lexer, head, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if (!TidyUtils.toBoolean(node.tag.model & Dict.CM_HEAD))
+ {
+ // #545067 Implicit closing of head broken - warn only for XHTML
input
+ if (lexer.isvoyager)
+ {
+ lexer.report.warning(lexer, head, node,
Report.TAG_NOT_ALLOWED_IN);
+ }
+ lexer.ungetToken();
+ break;
+ }
+
+ if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
+ {
+ if (node.tag == tt.tagTitle)
+ {
+ ++hasTitle;
+
+ if (hasTitle > 1)
+ {
+ lexer.report.warning(lexer, head, node,
Report.TOO_MANY_ELEMENTS);
+ }
+ }
+ else if (node.tag == tt.tagBase)
+ {
+ ++hasBase;
+
+ if (hasBase > 1)
+ {
+ lexer.report.warning(lexer, head, node,
Report.TOO_MANY_ELEMENTS);
+ }
+ }
+ else if (node.tag == tt.tagNoscript)
+ {
+ lexer.report.warning(lexer, head, node,
Report.TAG_NOT_ALLOWED_IN);
+ }
+
+ head.insertNodeAtEnd(node);
+ parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
+ continue;
+ }
+
+ // discard unexpected text nodes and end tags
+ lexer.report.warning(lexer, head, node, Report.DISCARDING_UNEXPECTED);
+ }
+
+ if (hasTitle == 0)
+ {
+ if (!lexer.configuration.bodyOnly)
+ {
+ lexer.report.warning(lexer, head, null,
Report.MISSING_TITLE_ELEMENT);
+ }
+ head.insertNodeAtEnd(lexer.inferredTag("title"));
+ }
+ }
+
+ }
+
+ /**
+ * Parser for TITLE.
+ */
+ public static class ParseTitle implements Parser
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.Parser#parse(org.ajax4jsf.org.w3c.tidy.Lexer,
org.ajax4jsf.org.w3c.tidy.Node, short)
+ */
+ public void parse(Lexer lexer, Node title, short mode)
+ {
+ Node node;
+
+ while ((node = lexer.getToken(Lexer.MIXED_CONTENT)) != null)
+ {
+ // [438658] : Missing / in title endtag makes 2 titles
+ if (node.tag == title.tag && node.type == Node.START_TAG)
+ {
+ lexer.report.warning(lexer, title, node, Report.COERCE_TO_ENDTAG);
+ node.type = Node.END_TAG;
+ continue;
+ }
+ else if (node.tag == title.tag && node.type == Node.END_TAG)
+ {
+ title.closed = true;
+ Node.trimSpaces(lexer, title);
+ return;
+ }
+
+ if (node.type == Node.TEXT_NODE)
+ {
+ // only called for 1st child
+ if (title.content == null)
+ {
+ Node.trimInitialSpace(lexer, title, node);
+ }
+
+ if (node.start >= node.end)
+ {
+ continue;
+ }
+
+ title.insertNodeAtEnd(node);
+ continue;
+ }
+
+ // deal with comments etc.
+ if (Node.insertMisc(title, node))
+ {
+ continue;
+ }
+
+ // discard unknown tags
+ if (node.tag == null)
+ {
+ lexer.report.warning(lexer, title, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ // pushback unexpected tokens
+ lexer.report.warning(lexer, title, node, Report.MISSING_ENDTAG_BEFORE);
+ lexer.ungetToken();
+ Node.trimSpaces(lexer, title);
+ return;
+ }
+
+ lexer.report.warning(lexer, title, node, Report.MISSING_ENDTAG_FOR);
+ }
+
+ }
+
+ /**
+ * Parser for SCRIPT.
+ */
+ public static class ParseScript implements Parser
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.Parser#parse(org.ajax4jsf.org.w3c.tidy.Lexer,
org.ajax4jsf.org.w3c.tidy.Node, short)
+ */
+ public void parse(Lexer lexer, Node script, short mode)
+ {
+ // This isn't quite right for CDATA content as it recognises tags within
the content and parses them
+ // accordingly. This will unfortunately screw up scripts which include < +
letter, < + !, < + ? or < + / +
+ // letter
+
+ Node node = lexer.getCDATA(script);
+
+ if (node != null)
+ {
+ script.insertNodeAtEnd(node);
+ }
+ }
+
+ }
+
+ /**
+ * Parser for BODY.
+ */
+ public static class ParseBody implements Parser
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.Parser#parse(org.ajax4jsf.org.w3c.tidy.Lexer,
org.ajax4jsf.org.w3c.tidy.Node, short)
+ */
+ public void parse(Lexer lexer, Node body, short mode)
+ {
+ Node node;
+ boolean checkstack, iswhitenode;
+
+ mode = Lexer.IGNORE_WHITESPACE;
+ checkstack = true;
+ TagTable tt = lexer.configuration.tt;
+
+ Clean.bumpObject(lexer, body.parent);
+
+ while ((node = lexer.getToken(mode)) != null)
+ {
+
+ // #538536 Extra endtags not detected
+ if (node.tag == tt.tagHtml)
+ {
+ if (node.type == Node.START_TAG || node.type == Node.START_END_TAG ||
lexer.seenEndHtml)
+ {
+ lexer.report.warning(lexer, body, node,
Report.DISCARDING_UNEXPECTED);
+ }
+ else
+ {
+ lexer.seenEndHtml = true;
+ }
+
+ continue;
+ }
+
+ if (lexer.seenEndBody
+ && (node.type == Node.START_TAG || node.type == Node.END_TAG
|| node.type == Node.START_END_TAG))
+ {
+ lexer.report.warning(lexer, body, node, Report.CONTENT_AFTER_BODY);
+ }
+
+ if (node.tag == body.tag && node.type == Node.END_TAG)
+ {
+ body.closed = true;
+ Node.trimSpaces(lexer, body);
+ lexer.seenEndBody = true;
+ mode = Lexer.IGNORE_WHITESPACE;
+
+ if (body.parent.tag == tt.tagNoframes)
+ {
+ break;
+ }
+
+ continue;
+ }
+
+ if (node.tag == tt.tagNoframes)
+ {
+ if (node.type == Node.START_TAG)
+ {
+ body.insertNodeAtEnd(node);
+ BLOCK.parse(lexer, node, mode);
+ continue;
+ }
+
+ if (node.type == Node.END_TAG && body.parent.tag ==
tt.tagNoframes)
+ {
+ Node.trimSpaces(lexer, body);
+ lexer.ungetToken();
+ break;
+ }
+ }
+
+ if ((node.tag == tt.tagFrame || node.tag == tt.tagFrameset) &&
body.parent.tag == tt.tagNoframes)
+ {
+ Node.trimSpaces(lexer, body);
+ lexer.ungetToken();
+ break;
+ }
+
+ iswhitenode = false;
+
+ if (node.type == Node.TEXT_NODE
+ && node.end <= node.start + 1
+ && node.textarray[node.start] == (byte) ' ')
+ {
+ iswhitenode = true;
+ }
+
+ // deal with comments etc.
+ if (Node.insertMisc(body, node))
+ {
+ continue;
+ }
+
+ // #538536 Extra endtags not detected
+ // if (lexer.seenEndBody && !iswhitenode)
+ // {
+ // lexer.seenEndBody = true;
+ // lexer.report.warning(lexer, body, node,
Report.CONTENT_AFTER_BODY);
+ // }
+
+ // mixed content model permits text
+ if (node.type == Node.TEXT_NODE)
+ {
+ if (iswhitenode && mode == Lexer.IGNORE_WHITESPACE)
+ {
+ continue;
+ }
+
+ if (lexer.configuration.encloseBodyText && !iswhitenode)
+ {
+ Node para;
+
+ lexer.ungetToken();
+ para = lexer.inferredTag("p");
+ body.insertNodeAtEnd(para);
+ parseTag(lexer, para, mode);
+ mode = Lexer.MIXED_CONTENT;
+ continue;
+ }
+
+ // HTML2 and HTML4 strict doesn't allow text here
+ lexer.constrainVersion(~(Dict.VERS_HTML40_STRICT |
Dict.VERS_HTML20));
+
+ if (checkstack)
+ {
+ checkstack = false;
+
+ if (lexer.inlineDup(node) > 0)
+ {
+ continue;
+ }
+ }
+
+ body.insertNodeAtEnd(node);
+ mode = Lexer.MIXED_CONTENT;
+ continue;
+ }
+
+ if (node.type == Node.DOCTYPE_TAG)
+ {
+ Node.insertDocType(lexer, body, node);
+ continue;
+ }
+ // discard unknown and PARAM tags
+ if (node.tag == null || node.tag == tt.tagParam)
+ {
+ lexer.report.warning(lexer, body, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ // Netscape allows LI and DD directly in BODY We infer UL or DL
respectively and use this boolean to
+ // exclude block-level elements so as to match Netscape's observed
behaviour.
+
+ lexer.excludeBlocks = false;
+
+ if ((!((node.tag.model & Dict.CM_BLOCK) != 0) &&
!((node.tag.model & Dict.CM_INLINE) != 0))
+ || node.tag == tt.tagInput)
+ {
+ // avoid this error message being issued twice
+ if (!((node.tag.model & Dict.CM_HEAD) != 0))
+ {
+ lexer.report.warning(lexer, body, node,
Report.TAG_NOT_ALLOWED_IN);
+ }
+
+ if ((node.tag.model & Dict.CM_HTML) != 0)
+ {
+ // copy body attributes if current body was inferred
+ if (node.tag == tt.tagBody && body.implicit &&
body.attributes == null)
+ {
+ body.attributes = node.attributes;
+ node.attributes = null;
+ }
+
+ continue;
+ }
+
+ if ((node.tag.model & Dict.CM_HEAD) != 0)
+ {
+ moveToHead(lexer, body, node);
+ continue;
+ }
+
+ if ((node.tag.model & Dict.CM_LIST) != 0)
+ {
+ lexer.ungetToken();
+ node = lexer.inferredTag("ul");
+ node.addClass("noindent");
+ lexer.excludeBlocks = true;
+ }
+ else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
+ {
+ lexer.ungetToken();
+ node = lexer.inferredTag("dl");
+ lexer.excludeBlocks = true;
+ }
+ else if ((node.tag.model & (Dict.CM_TABLE | Dict.CM_ROWGRP |
Dict.CM_ROW)) != 0)
+ {
+ lexer.ungetToken();
+ node = lexer.inferredTag("table");
+ lexer.excludeBlocks = true;
+ }
+ // Changed by Asmirnov - exclude empty <form> inserting
+ else if (node.tag == tt.tagInput)
+ {
+ // lexer.ungetToken();
+ // node = lexer.inferredTag("form");
+ lexer.excludeBlocks = true;
+ }
+ else
+ {
+ if (!((node.tag.model & (Dict.CM_ROW | Dict.CM_FIELD)) !=
0))
+ {
+ lexer.ungetToken();
+ return;
+ }
+
+ // ignore </td></th> <option> etc.
+ continue;
+ }
+ }
+
+ if (node.type == Node.END_TAG)
+ {
+ if (node.tag == tt.tagBr)
+ {
+ node.type = Node.START_TAG;
+ }
+ else if (node.tag == tt.tagP)
+ {
+ Node.coerceNode(lexer, node, tt.tagBr);
+ body.insertNodeAtEnd(node);
+ node = lexer.inferredTag("br");
+ }
+ else if ((node.tag.model & Dict.CM_INLINE) != 0)
+ {
+ lexer.popInline(node);
+ }
+ }
+
+ if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
+ {
+ if (((node.tag.model & Dict.CM_INLINE) != 0) &&
!((node.tag.model & Dict.CM_MIXED) != 0))
+ {
+ // HTML4 strict doesn't allow inline content here
+ // but HTML2 does allow img elements as children of body
+ if (node.tag == tt.tagImg)
+ {
+ lexer.constrainVersion(~Dict.VERS_HTML40_STRICT);
+ }
+ else
+ {
+ lexer.constrainVersion(~(Dict.VERS_HTML40_STRICT |
Dict.VERS_HTML20));
+ }
+
+ if (checkstack && !node.implicit)
+ {
+ checkstack = false;
+
+ if (lexer.inlineDup(node) > 0)
+ {
+ continue;
+ }
+ }
+
+ mode = Lexer.MIXED_CONTENT;
+ }
+ else
+ {
+ checkstack = true;
+ mode = Lexer.IGNORE_WHITESPACE;
+ }
+
+ if (node.implicit)
+ {
+ lexer.report.warning(lexer, body, node, Report.INSERTING_TAG);
+ }
+
+ body.insertNodeAtEnd(node);
+ parseTag(lexer, node, mode);
+ continue;
+ }
+
+ // discard unexpected tags
+ lexer.report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
+ }
+ }
+
+ }
+
+ /**
+ * Parser for FRAMESET.
+ */
+ public static class ParseFrameSet implements Parser
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.Parser#parse(org.ajax4jsf.org.w3c.tidy.Lexer,
org.ajax4jsf.org.w3c.tidy.Node, short)
+ */
+ public void parse(Lexer lexer, Node frameset, short mode)
+ {
+ Node node;
+ TagTable tt = lexer.configuration.tt;
+
+ lexer.badAccess |= Report.USING_FRAMES;
+
+ while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
+ {
+ if (node.tag == frameset.tag && node.type == Node.END_TAG)
+ {
+ frameset.closed = true;
+ Node.trimSpaces(lexer, frameset);
+ return;
+ }
+
+ // deal with comments etc.
+ if (Node.insertMisc(frameset, node))
+ {
+ continue;
+ }
+
+ if (node.tag == null)
+ {
+ lexer.report.warning(lexer, frameset, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
+ {
+ if (node.tag != null && (node.tag.model & Dict.CM_HEAD)
!= 0)
+ {
+ moveToHead(lexer, frameset, node);
+ continue;
+ }
+ }
+
+ if (node.tag == tt.tagBody)
+ {
+ lexer.ungetToken();
+ node = lexer.inferredTag("noframes");
+ lexer.report.warning(lexer, frameset, node, Report.INSERTING_TAG);
+ }
+
+ if (node.type == Node.START_TAG && (node.tag.model &
Dict.CM_FRAMES) != 0)
+ {
+ frameset.insertNodeAtEnd(node);
+ lexer.excludeBlocks = false;
+ parseTag(lexer, node, Lexer.MIXED_CONTENT);
+ continue;
+ }
+ else if (node.type == Node.START_END_TAG && (node.tag.model &
Dict.CM_FRAMES) != 0)
+ {
+ frameset.insertNodeAtEnd(node);
+ continue;
+ }
+
+ // discard unexpected tags
+ lexer.report.warning(lexer, frameset, node,
Report.DISCARDING_UNEXPECTED);
+ }
+
+ lexer.report.warning(lexer, frameset, node, Report.MISSING_ENDTAG_FOR);
+ }
+
+ }
+
+ /**
+ * Parser for INLINE.
+ */
+ public static class ParseInline implements Parser
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.Parser#parse(org.ajax4jsf.org.w3c.tidy.Lexer,
org.ajax4jsf.org.w3c.tidy.Node, short)
+ */
+ public void parse(Lexer lexer, Node element, short mode)
+ {
+ Node node, parent;
+ TagTable tt = lexer.configuration.tt;
+
+ if (TidyUtils.toBoolean(element.tag.model & Dict.CM_EMPTY))
+ {
+ return;
+ }
+
+ // ParseInline is used for some block level elements like H1 to H6 For such
elements we need to insert
+ // inline emphasis tags currently on the inline stack. For Inline elements,
we normally push them onto the
+ // inline stack provided they aren't implicit or OBJECT/APPLET. This test
is carried out in PushInline and
+ // PopInline, see istack.c We don't push SPAN to replicate current
browser behavior
+
+ if (TidyUtils.toBoolean(element.tag.model & Dict.CM_BLOCK) ||
(element.tag == tt.tagDt))
+ {
+ lexer.inlineDup(null);
+ }
+ else if (TidyUtils.toBoolean(element.tag.model & Dict.CM_INLINE)
&& element.tag != tt.tagSpan )
+ {
+ // && element.tag != tt.tagSpan #540571 Inconsistent behaviour
with span inline element
+ lexer.pushInline(element);
+ }
+
+ if (element.tag == tt.tagNobr)
+ {
+ lexer.badLayout |= Report.USING_NOBR;
+ }
+ else if (element.tag == tt.tagFont)
+ {
+ lexer.badLayout |= Report.USING_FONT;
+ }
+
+ // Inline elements may or may not be within a preformatted element
+ if (mode != Lexer.PREFORMATTED)
+ {
+ mode = Lexer.MIXED_CONTENT;
+ }
+
+ while ((node = lexer.getToken(mode)) != null)
+ {
+ // end tag for current element
+ if (node.tag == element.tag && node.type == Node.END_TAG)
+ {
+ if (TidyUtils.toBoolean(element.tag.model & Dict.CM_INLINE))
+ {
+ lexer.popInline(node);
+ }
+
+ if (!TidyUtils.toBoolean(mode & Lexer.PREFORMATTED))
+ {
+ Node.trimSpaces(lexer, element);
+ }
+
+ // if a font element wraps an anchor and nothing else then move the
font element inside the anchor
+ // since otherwise it won't alter the anchor text color
+
+ if (element.tag == tt.tagFont && element.content != null
&& element.content == element.last)
+ {
+ Node child = element.content;
+
+ if (child.tag == tt.tagA)
+ {
+ child.parent = element.parent;
+ child.next = element.next;
+ child.prev = element.prev;
+
+ if (child.prev != null)
+ {
+ child.prev.next = child;
+ }
+ else
+ {
+ child.parent.content = child;
+ }
+
+ if (child.next != null)
+ {
+ child.next.prev = child;
+ }
+ else
+ {
+ child.parent.last = child;
+ }
+
+ element.next = null;
+ element.prev = null;
+ element.parent = child;
+ element.content = child.content;
+ element.last = child.last;
+ child.content = element;
+ child.last = element;
+ for (child = element.content; child != null; child =
child.next)
+ {
+ child.parent = element;
+ }
+ }
+ }
+ element.closed = true;
+ Node.trimSpaces(lexer, element);
+ Node.trimEmptyElement(lexer, element);
+ return;
+ }
+
+ // <u> ... <u> map 2nd <u> to </u> if 1st is
explicit
+ // otherwise emphasis nesting is probably unintentional
+ // big and small have cumulative effect to leave them alone
+ if (node.type == Node.START_TAG
+ && node.tag == element.tag
+ && lexer.isPushed(node)
+ && !node.implicit
+ && !element.implicit
+ && node.tag != null
+ && ((node.tag.model & Dict.CM_INLINE) != 0)
+ && node.tag != tt.tagA
+ && node.tag != tt.tagFont
+ && node.tag != tt.tagBig
+ && node.tag != tt.tagSmall
+ && node.tag != tt.tagQ)
+ {
+ if (element.content != null && node.attributes == null)
+ {
+ lexer.report.warning(lexer, element, node,
Report.COERCE_TO_ENDTAG);
+ node.type = Node.END_TAG;
+ lexer.ungetToken();
+ continue;
+ }
+
+ lexer.report.warning(lexer, element, node, Report.NESTED_EMPHASIS);
+ }
+ else if (lexer.isPushed(node) && node.type == Node.START_TAG
&& node.tag == tt.tagQ)
+ {
+ lexer.report.warning(lexer, element, node, Report.NESTED_QUOTATION);
+ }
+
+ if (node.type == Node.TEXT_NODE)
+ {
+ // only called for 1st child
+ if (element.content == null && !TidyUtils.toBoolean(mode
& Lexer.PREFORMATTED))
+ {
+ Node.trimSpaces(lexer, element);
+ }
+
+ if (node.start >= node.end)
+ {
+ continue;
+ }
+
+ element.insertNodeAtEnd(node);
+ continue;
+ }
+
+ // mixed content model so allow text
+ if (Node.insertMisc(element, node))
+ {
+ continue;
+ }
+
+ // deal with HTML tags
+ if (node.tag == tt.tagHtml)
+ {
+ if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
+ {
+ lexer.report.warning(lexer, element, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ // otherwise infer end of inline element
+ lexer.ungetToken();
+ if (!((mode & Lexer.PREFORMATTED) != 0))
+ {
+ Node.trimSpaces(lexer, element);
+ }
+ Node.trimEmptyElement(lexer, element);
+ return;
+ }
+
+ // within <dt> or <pre> map <p> to <br>
+ if (node.tag == tt.tagP
+ && node.type == Node.START_TAG
+ && ((mode & Lexer.PREFORMATTED) != 0 || element.tag ==
tt.tagDt || element.isDescendantOf(tt.tagDt)))
+ {
+ node.tag = tt.tagBr;
+ node.element = "br";
+ Node.trimSpaces(lexer, element);
+ element.insertNodeAtEnd(node);
+ continue;
+ }
+
+ // ignore unknown and PARAM tags
+ if (node.tag == null || node.tag == tt.tagParam)
+ {
+ lexer.report.warning(lexer, element, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if (node.tag == tt.tagBr && node.type == Node.END_TAG)
+ {
+ node.type = Node.START_TAG;
+ }
+
+ if (node.type == Node.END_TAG)
+ {
+ // coerce </br> to <br>
+ if (node.tag == tt.tagBr)
+ {
+ node.type = Node.START_TAG;
+ }
+ else if (node.tag == tt.tagP)
+ {
+ // coerce unmatched </p> to <br><br>
+ if (!element.isDescendantOf(tt.tagP))
+ {
+ Node.coerceNode(lexer, node, tt.tagBr);
+ Node.trimSpaces(lexer, element);
+ element.insertNodeAtEnd(node);
+ node = lexer.inferredTag("br");
+ continue;
+ }
+ }
+ else if ((node.tag.model & Dict.CM_INLINE) != 0
+ && node.tag != tt.tagA
+ && !((node.tag.model & Dict.CM_OBJECT) != 0)
+ && (element.tag.model & Dict.CM_INLINE) != 0)
+ {
+ // allow any inline end tag to end current element
+ lexer.popInline(element);
+
+ if (element.tag != tt.tagA)
+ {
+ if (node.tag == tt.tagA && node.tag != element.tag)
+ {
+ lexer.report.warning(lexer, element, node,
Report.MISSING_ENDTAG_BEFORE);
+ lexer.ungetToken();
+ }
+ else
+ {
+ lexer.report.warning(lexer, element, node,
Report.NON_MATCHING_ENDTAG);
+ }
+
+ if (!((mode & Lexer.PREFORMATTED) != 0))
+ {
+ Node.trimSpaces(lexer, element);
+ }
+ Node.trimEmptyElement(lexer, element);
+ return;
+ }
+
+ // if parent is <a> then discard unexpected inline end tag
+ lexer.report.warning(lexer, element, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ } // special case </tr> etc. for stuff moved in front of table
+ else if (lexer.exiled && node.tag.model != 0 &&
(node.tag.model & Dict.CM_TABLE) != 0)
+ {
+ lexer.ungetToken();
+ Node.trimSpaces(lexer, element);
+ Node.trimEmptyElement(lexer, element);
+ return;
+ }
+ }
+
+ // allow any header tag to end current header
+ if ((node.tag.model & Dict.CM_HEADING) != 0 &&
(element.tag.model & Dict.CM_HEADING) != 0)
+ {
+ if (node.tag == element.tag)
+ {
+ lexer.report.warning(lexer, element, node,
Report.NON_MATCHING_ENDTAG);
+ }
+ else
+ {
+ lexer.report.warning(lexer, element, node,
Report.MISSING_ENDTAG_BEFORE);
+ lexer.ungetToken();
+ }
+ if (!((mode & Lexer.PREFORMATTED) != 0))
+ {
+ Node.trimSpaces(lexer, element);
+ }
+ Node.trimEmptyElement(lexer, element);
+ return;
+ }
+
+ // an <A> tag to ends any open <A> element but <A
href=...> is mapped to </A><A href=...>
+
+ // #427827 - fix by Randy Waki and Bjoern Hoehrmann 23 Aug 00
+ // if (node.tag == tt.tagA && !node.implicit &&
lexer.isPushed(node))
+ if (node.tag == tt.tagA
+ && !node.implicit
+ && (element.tag == tt.tagA ||
element.isDescendantOf(tt.tagA)))
+ {
+ // coerce <a> to </a> unless it has some attributes
+ // #427827 - fix by Randy Waki and Bjoern Hoehrmann 23 Aug 00
+ // other fixes by Dave Raggett
+ // if (node.attributes == null)
+ if (node.type != Node.END_TAG && node.attributes == null)
+ {
+ node.type = Node.END_TAG;
+ lexer.report.warning(lexer, element, node,
Report.COERCE_TO_ENDTAG);
+ // lexer.popInline(node);
+ lexer.ungetToken();
+ continue;
+ }
+
+ lexer.ungetToken();
+ lexer.report.warning(lexer, element, node,
Report.MISSING_ENDTAG_BEFORE);
+ // lexer.popInline(element);
+ if (!((mode & Lexer.PREFORMATTED) != 0))
+ {
+ Node.trimSpaces(lexer, element);
+ }
+ Node.trimEmptyElement(lexer, element);
+ return;
+ }
+
+ if ((element.tag.model & Dict.CM_HEADING) != 0)
+ {
+ if (node.tag == tt.tagCenter || node.tag == tt.tagDiv)
+ {
+ if (node.type != Node.START_TAG && node.type !=
Node.START_END_TAG)
+ {
+ lexer.report.warning(lexer, element, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ lexer.report.warning(lexer, element, node,
Report.TAG_NOT_ALLOWED_IN);
+
+ // insert center as parent if heading is empty
+ if (element.content == null)
+ {
+ Node.insertNodeAsParent(element, node);
+ continue;
+ }
+
+ // split heading and make center parent of 2nd part
+ element.insertNodeAfterElement(node);
+
+ if (!((mode & Lexer.PREFORMATTED) != 0))
+ {
+ Node.trimSpaces(lexer, element);
+ }
+
+ element = lexer.cloneNode(element);
+ element.start = lexer.lexsize;
+ element.end = lexer.lexsize;
+ node.insertNodeAtEnd(element);
+ continue;
+ }
+
+ if (node.tag == tt.tagHr)
+ {
+ if (node.type != Node.START_TAG && node.type !=
Node.START_END_TAG)
+ {
+ lexer.report.warning(lexer, element, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ lexer.report.warning(lexer, element, node,
Report.TAG_NOT_ALLOWED_IN);
+
+ // insert hr before heading if heading is empty
+ if (element.content == null)
+ {
+ Node.insertNodeBeforeElement(element, node);
+ continue;
+ }
+
+ // split heading and insert hr before 2nd part
+ element.insertNodeAfterElement(node);
+
+ if (!((mode & Lexer.PREFORMATTED) != 0))
+ {
+ Node.trimSpaces(lexer, element);
+ }
+
+ element = lexer.cloneNode(element);
+ element.start = lexer.lexsize;
+ element.end = lexer.lexsize;
+ node.insertNodeAfterElement(element);
+ continue;
+ }
+ }
+
+ if (element.tag == tt.tagDt)
+ {
+ if (node.tag == tt.tagHr)
+ {
+ Node dd;
+
+ if (node.type != Node.START_TAG && node.type !=
Node.START_END_TAG)
+ {
+ lexer.report.warning(lexer, element, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ lexer.report.warning(lexer, element, node,
Report.TAG_NOT_ALLOWED_IN);
+ dd = lexer.inferredTag("dd");
+
+ // insert hr within dd before dt if dt is empty
+ if (element.content == null)
+ {
+ Node.insertNodeBeforeElement(element, dd);
+ dd.insertNodeAtEnd(node);
+ continue;
+ }
+
+ // split dt and insert hr within dd before 2nd part
+ element.insertNodeAfterElement(dd);
+ dd.insertNodeAtEnd(node);
+
+ if (!((mode & Lexer.PREFORMATTED) != 0))
+ {
+ Node.trimSpaces(lexer, element);
+ }
+
+ element = lexer.cloneNode(element);
+ element.start = lexer.lexsize;
+ element.end = lexer.lexsize;
+ dd.insertNodeAfterElement(element);
+ continue;
+ }
+ }
+
+ // if this is the end tag for an ancestor element then infer end tag for
this element
+
+ if (node.type == Node.END_TAG)
+ {
+ for (parent = element.parent; parent != null; parent =
parent.parent)
+ {
+ if (node.tag == parent.tag)
+ {
+ if (!((element.tag.model & Dict.CM_OPT) != 0) &&
!element.implicit)
+ {
+ lexer.report.warning(lexer, element, node,
Report.MISSING_ENDTAG_BEFORE);
+ }
+
+ if (element.tag == tt.tagA)
+ {
+ lexer.popInline(element);
+ }
+
+ lexer.ungetToken();
+
+ if (!((mode & Lexer.PREFORMATTED) != 0))
+ {
+ Node.trimSpaces(lexer, element);
+ }
+
+ Node.trimEmptyElement(lexer, element);
+ return;
+ }
+ }
+ }
+
+ // block level tags end this element, expect span - used jsf for mark
facets and groups.
+ if (!((node.tag.model & Dict.CM_INLINE) != 0) && element.tag
!= tt.tagSpan)
+ {
+ if (node.type != Node.START_TAG)
+ {
+ lexer.report.warning(lexer, element, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if (!((element.tag.model & Dict.CM_OPT) != 0))
+ {
+ lexer.report.warning(lexer, element, node,
Report.MISSING_ENDTAG_BEFORE);
+ }
+
+ if ((node.tag.model & Dict.CM_HEAD) != 0 &&
!((node.tag.model & Dict.CM_BLOCK) != 0))
+ {
+ moveToHead(lexer, element, node);
+ continue;
+ }
+
+ // prevent anchors from propagating into block tags except for
headings h1 to h6
+
+ if (element.tag == tt.tagA)
+ {
+ if (node.tag != null && !((node.tag.model &
Dict.CM_HEADING) != 0))
+ {
+ lexer.popInline(element);
+ }
+ else if (!(element.content != null))
+ {
+ Node.discardElement(element);
+ lexer.ungetToken();
+ return;
+ }
+ }
+
+ lexer.ungetToken();
+
+ if (!((mode & Lexer.PREFORMATTED) != 0))
+ {
+ Node.trimSpaces(lexer, element);
+ }
+
+ Node.trimEmptyElement(lexer, element);
+ return;
+ }
+
+ // parse inline element
+ if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
+ {
+ if (node.implicit)
+ {
+ lexer.report.warning(lexer, element, node,
Report.INSERTING_TAG);
+ }
+
+ // trim white space before <br>
+ if (node.tag == tt.tagBr)
+ {
+ Node.trimSpaces(lexer, element);
+ }
+
+ element.insertNodeAtEnd(node);
+ parseTag(lexer, node, mode);
+ continue;
+ }
+
+ // discard unexpected tags
+ lexer.report.warning(lexer, element, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if (!((element.tag.model & Dict.CM_OPT) != 0))
+ {
+ lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_FOR);
+ }
+
+ Node.trimEmptyElement(lexer, element);
+ }
+ }
+
+ /**
+ * Parser for LIST.
+ */
+ public static class ParseList implements Parser
+ {
+
+ public void parse(Lexer lexer, Node list, short mode)
+ {
+ Node node;
+ Node parent;
+ TagTable tt = lexer.configuration.tt;
+
+ if ((list.tag.model & Dict.CM_EMPTY) != 0)
+ {
+ return;
+ }
+
+ lexer.insert = -1; // defer implicit inline start tags
+
+ while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
+ {
+ if (node.tag == list.tag && node.type == Node.END_TAG)
+ {
+ if ((list.tag.model & Dict.CM_OBSOLETE) != 0)
+ {
+ Node.coerceNode(lexer, list, tt.tagUl);
+ }
+
+ list.closed = true;
+ Node.trimEmptyElement(lexer, list);
+ return;
+ }
+
+ // deal with comments etc.
+ if (Node.insertMisc(list, node))
+ {
+ continue;
+ }
+
+ if (node.type != Node.TEXT_NODE && node.tag == null)
+ {
+ lexer.report.warning(lexer, list, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ // if this is the end tag for an ancestor element then infer end tag for
this element
+
+ if (node.type == Node.END_TAG)
+ {
+ if (node.tag == tt.tagForm)
+ {
+ badForm(lexer);
+ lexer.report.warning(lexer, list, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if (node.tag != null && (node.tag.model & Dict.CM_INLINE)
!= 0)
+ {
+ lexer.report.warning(lexer, list, node,
Report.DISCARDING_UNEXPECTED);
+ lexer.popInline(node);
+ continue;
+ }
+
+ for (parent = list.parent; parent != null; parent = parent.parent)
+ {
+ if (node.tag == parent.tag)
+ {
+ lexer.report.warning(lexer, list, node,
Report.MISSING_ENDTAG_BEFORE);
+ lexer.ungetToken();
+
+ if ((list.tag.model & Dict.CM_OBSOLETE) != 0)
+ {
+ Node.coerceNode(lexer, list, tt.tagUl);
+ }
+
+ Node.trimEmptyElement(lexer, list);
+ return;
+ }
+ }
+
+ lexer.report.warning(lexer, list, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if (node.tag != tt.tagLi)
+ {
+ lexer.ungetToken();
+
+ if (node.tag != null && (node.tag.model & Dict.CM_BLOCK)
!= 0 && lexer.excludeBlocks)
+ {
+ lexer.report.warning(lexer, list, node,
Report.MISSING_ENDTAG_BEFORE);
+ Node.trimEmptyElement(lexer, list);
+ return;
+ }
+
+ node = lexer.inferredTag("li");
+ node.addAttribute("style", "list-style: none");
+ lexer.report.warning(lexer, list, node, Report.MISSING_STARTTAG);
+ }
+
+ // node should be <LI>
+ list.insertNodeAtEnd(node);
+ parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
+ }
+
+ if ((list.tag.model & Dict.CM_OBSOLETE) != 0)
+ {
+ Node.coerceNode(lexer, list, tt.tagUl);
+ }
+
+ lexer.report.warning(lexer, list, node, Report.MISSING_ENDTAG_FOR);
+ Node.trimEmptyElement(lexer, list);
+ }
+
+ }
+
+ /**
+ * Parser for empty elements.
+ */
+ public static class ParseEmpty implements Parser
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.Parser#parse(org.ajax4jsf.org.w3c.tidy.Lexer,
org.ajax4jsf.org.w3c.tidy.Node, short)
+ */
+ public void parse(Lexer lexer, Node element, short mode)
+ {
+ if (lexer.isvoyager)
+ {
+ Node node = lexer.getToken(mode);
+ if (node != null && !(node.type == Node.END_TAG &&
node.tag == element.tag))
+ {
+ lexer.report.warning(lexer, element, node,
Report.ELEMENT_NOT_EMPTY);
+ lexer.ungetToken();
+ }
+ }
+ }
+ }
+
+ /**
+ * Parser for DEFLIST.
+ */
+ public static class ParseDefList implements Parser
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.Parser#parse(org.ajax4jsf.org.w3c.tidy.Lexer,
org.ajax4jsf.org.w3c.tidy.Node, short)
+ */
+ public void parse(Lexer lexer, Node list, short mode)
+ {
+ Node node, parent;
+ TagTable tt = lexer.configuration.tt;
+
+ if ((list.tag.model & Dict.CM_EMPTY) != 0)
+ {
+ return;
+ }
+
+ lexer.insert = -1; // defer implicit inline start tags
+
+ while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
+ {
+ if (node.tag == list.tag && node.type == Node.END_TAG)
+ {
+ list.closed = true;
+ Node.trimEmptyElement(lexer, list);
+ return;
+ }
+
+ // deal with comments etc.
+ if (Node.insertMisc(list, node))
+ {
+ continue;
+ }
+
+ if (node.type == Node.TEXT_NODE)
+ {
+ lexer.ungetToken();
+ node = lexer.inferredTag("dt");
+ lexer.report.warning(lexer, list, node, Report.MISSING_STARTTAG);
+ }
+
+ if (node.tag == null)
+ {
+ lexer.report.warning(lexer, list, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ // if this is the end tag for an ancestor element then infer end tag for
this element
+
+ if (node.type == Node.END_TAG)
+ {
+ if (node.tag == tt.tagForm)
+ {
+ badForm(lexer);
+ lexer.report.warning(lexer, list, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ for (parent = list.parent; parent != null; parent = parent.parent)
+ {
+ if (node.tag == parent.tag)
+ {
+ lexer.report.warning(lexer, list, node,
Report.MISSING_ENDTAG_BEFORE);
+
+ lexer.ungetToken();
+ Node.trimEmptyElement(lexer, list);
+ return;
+ }
+ }
+ }
+
+ // center in a dt or a dl breaks the dl list in two
+ if (node.tag == tt.tagCenter)
+ {
+ if (list.content != null)
+ {
+ list.insertNodeAfterElement(node);
+ }
+ else
+ {
+ // trim empty dl list
+ Node.insertNodeBeforeElement(list, node);
+
+ // #540296 tidy dumps with empty definition list
+ Node.discardElement(list);
+ }
+
+ // and parse contents of center
+ parseTag(lexer, node, mode);
+
+ // now create a new dl element
+ list = lexer.inferredTag("dl");
+ node.insertNodeAfterElement(list);
+ continue;
+ }
+
+ if (!(node.tag == tt.tagDt || node.tag == tt.tagDd))
+ {
+ lexer.ungetToken();
+
+ if (!((node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0))
+ {
+ lexer.report.warning(lexer, list, node,
Report.TAG_NOT_ALLOWED_IN);
+ Node.trimEmptyElement(lexer, list);
+ return;
+ }
+
+ // if DD appeared directly in BODY then exclude blocks
+ if (!((node.tag.model & Dict.CM_INLINE) != 0) &&
lexer.excludeBlocks)
+ {
+ Node.trimEmptyElement(lexer, list);
+ return;
+ }
+
+ node = lexer.inferredTag("dd");
+ lexer.report.warning(lexer, list, node, Report.MISSING_STARTTAG);
+ }
+
+ if (node.type == Node.END_TAG)
+ {
+ lexer.report.warning(lexer, list, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ // node should be <DT> or <DD>
+ list.insertNodeAtEnd(node);
+ parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
+ }
+
+ lexer.report.warning(lexer, list, node, Report.MISSING_ENDTAG_FOR);
+ Node.trimEmptyElement(lexer, list);
+ }
+
+ }
+
+ /**
+ * Parser for PRE.
+ */
+ public static class ParsePre implements Parser
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.Parser#parse(org.ajax4jsf.org.w3c.tidy.Lexer,
org.ajax4jsf.org.w3c.tidy.Node, short)
+ */
+ public void parse(Lexer lexer, Node pre, short mode)
+ {
+ Node node;
+ TagTable tt = lexer.configuration.tt;
+
+ if ((pre.tag.model & Dict.CM_EMPTY) != 0)
+ {
+ return;
+ }
+
+ if ((pre.tag.model & Dict.CM_OBSOLETE) != 0)
+ {
+ Node.coerceNode(lexer, pre, tt.tagPre);
+ }
+
+ lexer.inlineDup(null); // tell lexer to insert inlines if needed
+
+ while ((node = lexer.getToken(Lexer.PREFORMATTED)) != null)
+ {
+ if (node.tag == pre.tag && node.type == Node.END_TAG)
+ {
+ Node.trimSpaces(lexer, pre);
+ pre.closed = true;
+ Node.trimEmptyElement(lexer, pre);
+ return;
+ }
+
+ if (node.tag == tt.tagHtml)
+ {
+ if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
+ {
+ lexer.report.warning(lexer, pre, node,
Report.DISCARDING_UNEXPECTED);
+ }
+
+ continue;
+ }
+
+ if (node.type == Node.TEXT_NODE)
+ {
+ // if first check for inital newline
+ if (pre.content == null)
+ {
+ if (node.textarray[node.start] == (byte) '\n')
+ {
+ ++node.start;
+ }
+
+ if (node.start >= node.end)
+ {
+ continue;
+ }
+ }
+
+ pre.insertNodeAtEnd(node);
+ continue;
+ }
+
+ // deal with comments etc.
+ if (Node.insertMisc(pre, node))
+ {
+ continue;
+ }
+
+ // strip unexpected tags
+ if (!lexer.preContent(node))
+ {
+ Node newnode;
+
+ lexer.report.warning(lexer, pre, node, Report.UNESCAPED_ELEMENT);
+ newnode = Node.escapeTag(lexer, node);
+ pre.insertNodeAtEnd(newnode);
+ continue;
+ }
+
+ if (node.tag == tt.tagP)
+ {
+ if (node.type == Node.START_TAG)
+ {
+ lexer.report.warning(lexer, pre, node,
Report.USING_BR_INPLACE_OF);
+
+ // trim white space before <p> in <pre>
+ Node.trimSpaces(lexer, pre);
+
+ // coerce both <p> and </p> to <br>
+ Node.coerceNode(lexer, node, tt.tagBr);
+ pre.insertNodeAtEnd(node);
+ }
+ else
+ {
+ lexer.report.warning(lexer, pre, node,
Report.DISCARDING_UNEXPECTED);
+ }
+ continue;
+ }
+
+ if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
+ {
+ // trim white space before <br>
+ if (node.tag == tt.tagBr)
+ {
+ Node.trimSpaces(lexer, pre);
+ }
+
+ pre.insertNodeAtEnd(node);
+ parseTag(lexer, node, Lexer.PREFORMATTED);
+ continue;
+ }
+
+ // discard unexpected tags
+ lexer.report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
+ }
+
+ lexer.report.warning(lexer, pre, node, Report.MISSING_ENDTAG_FOR);
+ Node.trimEmptyElement(lexer, pre);
+ }
+
+ }
+
+ /**
+ * Parser for block elements.
+ */
+ public static class ParseBlock implements Parser
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.Parser#parse(org.ajax4jsf.org.w3c.tidy.Lexer,
org.ajax4jsf.org.w3c.tidy.Node, short)
+ */
+ public void parse(Lexer lexer, Node element, short mode)
+ {
+ // element is node created by the lexer upon seeing the start tag, or by the
parser when the start tag is
+ // inferred.
+ Node node, parent;
+ boolean checkstack;
+ int istackbase = 0;
+ TagTable tt = lexer.configuration.tt;
+
+ checkstack = true;
+
+ if ((element.tag.model & Dict.CM_EMPTY) != 0)
+ {
+ return;
+ }
+
+ if (element.tag == tt.tagForm && element.isDescendantOf(tt.tagForm))
+ {
+ lexer.report.warning(lexer, element, null, Report.ILLEGAL_NESTING);
+ }
+
+ // InlineDup() asks the lexer to insert inline emphasis tags currently pushed
on the istack, but take care
+ // to avoid propagating inline emphasis inside OBJECT or APPLET. For these
elements a fresh inline stack
+ // context is created and disposed of upon reaching the end of the element.
They thus behave like table
+ // cells in this respect.
+
+ if ((element.tag.model & Dict.CM_OBJECT) != 0)
+ {
+ istackbase = lexer.istackbase;
+ lexer.istackbase = lexer.istack.size();
+ }
+
+ if (!((element.tag.model & Dict.CM_MIXED) != 0))
+ {
+ lexer.inlineDup(null);
+ }
+
+ mode = Lexer.IGNORE_WHITESPACE;
+
+ while ((node = lexer.getToken(mode)) != null)
+ {
+ // end tag for this element
+ if (node.type == Node.END_TAG
+ && node.tag != null
+ && (node.tag == element.tag || element.was == node.tag))
+ {
+
+ if ((element.tag.model & Dict.CM_OBJECT) != 0)
+ {
+ // pop inline stack
+ while (lexer.istack.size() > lexer.istackbase)
+ {
+ lexer.popInline(null);
+ }
+ lexer.istackbase = istackbase;
+ }
+
+ element.closed = true;
+ Node.trimSpaces(lexer, element);
+ Node.trimEmptyElement(lexer, element);
+ return;
+ }
+
+ if (node.tag == tt.tagHtml || node.tag == tt.tagHead || node.tag ==
tt.tagBody)
+ {
+ if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
+ {
+ lexer.report.warning(lexer, element, node,
Report.DISCARDING_UNEXPECTED);
+ }
+
+ continue;
+ }
+
+ if (node.type == Node.END_TAG)
+ {
+ if (node.tag == null)
+ {
+ lexer.report.warning(lexer, element, node,
Report.DISCARDING_UNEXPECTED);
+
+ continue;
+ }
+ else if (node.tag == tt.tagBr)
+ {
+ node.type = Node.START_TAG;
+ }
+ else if (node.tag == tt.tagP)
+ {
+ Node.coerceNode(lexer, node, tt.tagBr);
+ element.insertNodeAtEnd(node);
+ node = lexer.inferredTag("br");
+ }
+ else
+ {
+ // if this is the end tag for an ancestor element then infer end
tag for this element
+
+ for (parent = element.parent; parent != null; parent =
parent.parent)
+ {
+ if (node.tag == parent.tag)
+ {
+ if (!((element.tag.model & Dict.CM_OPT) != 0))
+ {
+ lexer.report.warning(lexer, element, node,
Report.MISSING_ENDTAG_BEFORE);
+ }
+
+ lexer.ungetToken();
+
+ if ((element.tag.model & Dict.CM_OBJECT) != 0)
+ {
+ // pop inline stack
+ while (lexer.istack.size() > lexer.istackbase)
+ {
+ lexer.popInline(null);
+ }
+ lexer.istackbase = istackbase;
+ }
+
+ Node.trimSpaces(lexer, element);
+ Node.trimEmptyElement(lexer, element);
+ return;
+ }
+ }
+ // special case </tr> etc. for stuff moved in front of
table
+ if (lexer.exiled && node.tag.model != 0 &&
(node.tag.model & Dict.CM_TABLE) != 0)
+ {
+ lexer.ungetToken();
+ Node.trimSpaces(lexer, element);
+ Node.trimEmptyElement(lexer, element);
+ return;
+ }
+ }
+ }
+
+ // mixed content model permits text
+ if (node.type == Node.TEXT_NODE)
+ {
+ boolean iswhitenode = false;
+
+ if (node.type == Node.TEXT_NODE
+ && node.end <= node.start + 1
+ && lexer.lexbuf[node.start] == (byte) ' ')
+ {
+ iswhitenode = true;
+ }
+
+ if (lexer.configuration.encloseBlockText && !iswhitenode)
+ {
+ lexer.ungetToken();
+ node = lexer.inferredTag("p");
+ element.insertNodeAtEnd(node);
+ parseTag(lexer, node, Lexer.MIXED_CONTENT);
+ continue;
+ }
+
+ if (checkstack)
+ {
+ checkstack = false;
+
+ if (!((element.tag.model & Dict.CM_MIXED) != 0))
+ {
+ if (lexer.inlineDup(node) > 0)
+ {
+ continue;
+ }
+ }
+ }
+
+ element.insertNodeAtEnd(node);
+ mode = Lexer.MIXED_CONTENT;
+
+ // HTML4 strict doesn't allow mixed content for elements with
%block; as their content model
+ // But only body, map, blockquote, form and noscript have content
model %block;
+ if (element.tag == tt.tagBody
+ || element.tag == tt.tagMap
+ || element.tag == tt.tagBlockquote
+ || element.tag == tt.tagForm
+ || element.tag == tt.tagNoscript)
+ {
+ lexer.constrainVersion(~Dict.VERS_HTML40_STRICT);
+ }
+ continue;
+ }
+
+ if (Node.insertMisc(element, node))
+ {
+ continue;
+ }
+
+ // allow PARAM elements?
+ if (node.tag == tt.tagParam)
+ {
+ if (((element.tag.model & Dict.CM_PARAM) != 0)
+ && (node.type == Node.START_TAG || node.type ==
Node.START_END_TAG))
+ {
+ element.insertNodeAtEnd(node);
+ continue;
+ }
+
+ // otherwise discard it
+ lexer.report.warning(lexer, element, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ // allow AREA elements?
+ if (node.tag == tt.tagArea)
+ {
+ if ((element.tag == tt.tagMap) && (node.type ==
Node.START_TAG || node.type == Node.START_END_TAG))
+ {
+ element.insertNodeAtEnd(node);
+ continue;
+ }
+
+ // otherwise discard it
+ lexer.report.warning(lexer, element, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ // ignore unknown start/end tags
+ if (node.tag == null)
+ {
+ lexer.report.warning(lexer, element, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ // Allow Dict.CM_INLINE elements here. Allow Dict.CM_BLOCK elements here
unless lexer.excludeBlocks is
+ // yes. LI and DD are special cased. Otherwise infer end tag for this
element.
+
+ if (!((node.tag.model & Dict.CM_INLINE) != 0))
+ {
+ if (node.type != Node.START_TAG && node.type !=
Node.START_END_TAG)
+ {
+ if (node.tag == tt.tagForm)
+ {
+ badForm(lexer);
+ }
+ lexer.report.warning(lexer, element, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ // #427671 - Fix by Randy Waki - 10 Aug 00
+ // If an LI contains an illegal FRAME, FRAMESET, OPTGROUP, or OPTION
start tag, discard the start
+ // tag and let the subsequent content get parsed as content of the
enclosing LI. This seems to
+ // mimic IE and Netscape, and avoids an infinite loop: without this
check, ParseBlock (which is
+ // parsing the LI's content) and ParseList (which is parsing the
LI's parent's content) repeatedly
+ // defer to each other to parse the illegal start tag, each time
inferring a missing </li> or <li>
+ // respectively. NOTE: This check is a bit fragile. It specifically
checks for the four tags that
+ // happen to weave their way through the current series of tests
performed by ParseBlock and
+ // ParseList to trigger the infinite loop.
+
+ if (element.tag == tt.tagLi)
+ {
+ if (node.tag == tt.tagFrame
+ || node.tag == tt.tagFrameset
+ || node.tag == tt.tagOptgroup
+ || node.tag == tt.tagOption)
+ {
+ lexer.report.warning(lexer, element, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+ }
+
+ if (element.tag == tt.tagTd || element.tag == tt.tagTh)
+ {
+ // if parent is a table cell, avoid inferring the end of the
cell
+
+ if ((node.tag.model & Dict.CM_HEAD) != 0)
+ {
+ moveToHead(lexer, element, node);
+ continue;
+ }
+
+ if ((node.tag.model & Dict.CM_LIST) != 0)
+ {
+ lexer.ungetToken();
+ node = lexer.inferredTag("ul");
+ node.addClass("noindent");
+ lexer.excludeBlocks = true;
+ }
+ else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
+ {
+ lexer.ungetToken();
+ node = lexer.inferredTag("dl");
+ lexer.excludeBlocks = true;
+ }
+
+ // infer end of current table cell
+ if (!((node.tag.model & Dict.CM_BLOCK) != 0))
+ {
+ lexer.ungetToken();
+ Node.trimSpaces(lexer, element);
+ Node.trimEmptyElement(lexer, element);
+ return;
+ }
+ }
+ else if ((node.tag.model & Dict.CM_BLOCK) != 0)
+ {
+ if (lexer.excludeBlocks)
+ {
+ if (!((element.tag.model & Dict.CM_OPT) != 0))
+ {
+ lexer.report.warning(lexer, element, node,
Report.MISSING_ENDTAG_BEFORE);
+ }
+
+ lexer.ungetToken();
+
+ if ((element.tag.model & Dict.CM_OBJECT) != 0)
+ {
+ lexer.istackbase = istackbase;
+ }
+
+ Node.trimSpaces(lexer, element);
+ Node.trimEmptyElement(lexer, element);
+ return;
+ }
+ }
+ else
+ {
+ // things like list items
+
+ if ((node.tag.model & Dict.CM_HEAD) != 0)
+ {
+ moveToHead(lexer, element, node);
+ continue;
+ }
+
+ // special case where a form start tag occurs in a tr and is
followed by td or th
+ if (element.tag == tt.tagForm && element.parent.tag ==
tt.tagTd && element.parent.implicit)
+ {
+ if (node.tag == tt.tagTd)
+ {
+ lexer.report.warning(lexer, element, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if (node.tag == tt.tagTh)
+ {
+ lexer.report.warning(lexer, element, node,
Report.DISCARDING_UNEXPECTED);
+ node = element.parent;
+ node.element = "th";
+ node.tag = tt.tagTh;
+ continue;
+ }
+ }
+
+ if (!((element.tag.model & Dict.CM_OPT) != 0) &&
!element.implicit)
+ {
+ lexer.report.warning(lexer, element, node,
Report.MISSING_ENDTAG_BEFORE);
+ }
+
+ lexer.ungetToken();
+
+ if ((node.tag.model & Dict.CM_LIST) != 0)
+ {
+ if (element.parent != null
+ && element.parent.tag != null
+ && element.parent.tag.getParser() == LIST)
+ {
+ Node.trimSpaces(lexer, element);
+ Node.trimEmptyElement(lexer, element);
+ return;
+ }
+
+ node = lexer.inferredTag("ul");
+ node.addClass("noindent");
+ }
+ else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
+ {
+ if (element.parent.tag == tt.tagDl)
+ {
+ Node.trimSpaces(lexer, element);
+ Node.trimEmptyElement(lexer, element);
+ return;
+ }
+
+ node = lexer.inferredTag("dl");
+ }
+ else if ((node.tag.model & Dict.CM_TABLE) != 0 ||
(node.tag.model & Dict.CM_ROW) != 0)
+ {
+ node = lexer.inferredTag("table");
+ }
+ else if ((element.tag.model & Dict.CM_OBJECT) != 0)
+ {
+ // pop inline stack
+ while (lexer.istack.size() > lexer.istackbase)
+ {
+ lexer.popInline(null);
+ }
+ lexer.istackbase = istackbase;
+ Node.trimSpaces(lexer, element);
+ Node.trimEmptyElement(lexer, element);
+ return;
+
+ }
+ else
+ {
+ Node.trimSpaces(lexer, element);
+ Node.trimEmptyElement(lexer, element);
+ return;
+ }
+ }
+ }
+
+ // parse known element
+ if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
+ {
+ if (TidyUtils.toBoolean(node.tag.model & Dict.CM_INLINE))
+ {
+ // DSR - 27Apr02 ensure we wrap anchors and other inline content
+ if (lexer.configuration.encloseBlockText)
+ {
+ lexer.ungetToken();
+ node = lexer.inferredTag("p");
+ element.insertNodeAtEnd(node);
+ parseTag(lexer, node, Lexer.MIXED_CONTENT);
+ continue;
+ }
+
+ if (checkstack && !node.implicit)
+ {
+ checkstack = false;
+
+ // #431731 - fix by Randy Waki 25 Dec 00
+ if (!TidyUtils.toBoolean(element.tag.model &
Dict.CM_MIXED))
+ {
+ if (lexer.inlineDup(node) > 0)
+ {
+ continue;
+ }
+ }
+ }
+
+ mode = Lexer.MIXED_CONTENT;
+ }
+ else
+ {
+ checkstack = true;
+ mode = Lexer.IGNORE_WHITESPACE;
+ }
+
+ // trim white space before <br>
+ if (node.tag == tt.tagBr)
+ {
+ Node.trimSpaces(lexer, element);
+ }
+
+ element.insertNodeAtEnd(node);
+
+ if (node.implicit)
+ {
+ lexer.report.warning(lexer, element, node,
Report.INSERTING_TAG);
+ }
+
+ parseTag(lexer, node, Lexer.IGNORE_WHITESPACE // Lexer.MixedContent
+ );
+ continue;
+ }
+
+ // discard unexpected tags
+ if (node.type == Node.END_TAG)
+ {
+ lexer.popInline(node); // if inline end tag
+ }
+
+ lexer.report.warning(lexer, element, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if (!((element.tag.model & Dict.CM_OPT) != 0))
+ {
+ lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_FOR);
+ }
+
+ if ((element.tag.model & Dict.CM_OBJECT) != 0)
+ {
+ // pop inline stack
+ while (lexer.istack.size() > lexer.istackbase)
+ {
+ lexer.popInline(null);
+ }
+ lexer.istackbase = istackbase;
+ }
+
+ Node.trimSpaces(lexer, element);
+ Node.trimEmptyElement(lexer, element);
+ }
+
+ }
+
+ /**
+ * Parser for TABLE.
+ */
+ public static class ParseTableTag implements Parser
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.Parser#parse(org.ajax4jsf.org.w3c.tidy.Lexer,
org.ajax4jsf.org.w3c.tidy.Node, short)
+ */
+ public void parse(Lexer lexer, Node table, short mode)
+ {
+ Node node, parent;
+ int istackbase;
+ TagTable tt = lexer.configuration.tt;
+
+ lexer.deferDup();
+ istackbase = lexer.istackbase;
+ lexer.istackbase = lexer.istack.size();
+
+ while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
+ {
+ if (node.tag == table.tag && node.type == Node.END_TAG)
+ {
+ lexer.istackbase = istackbase;
+ table.closed = true;
+ Node.trimEmptyElement(lexer, table);
+ return;
+ }
+
+ // deal with comments etc.
+ if (Node.insertMisc(table, node))
+ {
+ continue;
+ }
+
+ // discard unknown tags
+ if (node.tag == null && node.type != Node.TEXT_NODE)
+ {
+ lexer.report.warning(lexer, table, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ // if TD or TH or text or inline or block then infer <TR>
+
+ if (node.type != Node.END_TAG)
+ {
+ // Modified by Alexander Smirnov
+ // TODO - correct parse table with <thead> but withowt
<tbody>
+ if (node.tag == tt.tagTr )
+ {
+ lexer.ungetToken();
+ node = lexer.inferredTag("tbody");
+ lexer.report.warning(lexer, table, node,
Report.MISSING_STARTTAG);
+ }
+ else if (node.tag == tt.tagTd || node.tag == tt.tagTh || node.tag ==
tt.tagTable)
+ {
+ lexer.ungetToken();
+ node = lexer.inferredTag("tr");
+ lexer.report.warning(lexer, table, node,
Report.MISSING_STARTTAG);
+ }
+ else if (node.type == Node.TEXT_NODE || (node.tag.model &
(Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)
+ {
+ Node.insertNodeBeforeElement(table, node);
+ lexer.report.warning(lexer, table, node,
Report.TAG_NOT_ALLOWED_IN);
+ lexer.exiled = true;
+
+ if (!(node.type == Node.TEXT_NODE)) // #427662 - was (!node.type
== TextNode) - fix by Young
+ {
+ parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
+ }
+
+ lexer.exiled = false;
+ continue;
+ }
+ else if ((node.tag.model & Dict.CM_HEAD) != 0)
+ {
+ moveToHead(lexer, table, node);
+ continue;
+ }
+ }
+
+ // if this is the end tag for an ancestor element then infer end tag for
this element
+
+ if (node.type == Node.END_TAG)
+ {
+ if (node.tag == tt.tagForm
+ || (node.tag != null && ((node.tag.model &
(Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)))
+ {
+ badForm(lexer);
+ lexer.report.warning(lexer, table, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if ((node.tag != null && (node.tag.model & (Dict.CM_TABLE
| Dict.CM_ROW)) != 0)
+ || (node.tag != null && (node.tag.model &
(Dict.CM_BLOCK | Dict.CM_INLINE)) != 0))
+ {
+ lexer.report.warning(lexer, table, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ for (parent = table.parent; parent != null; parent = parent.parent)
+ {
+ if (node.tag == parent.tag)
+ {
+ lexer.report.warning(lexer, table, node,
Report.MISSING_ENDTAG_BEFORE);
+ lexer.ungetToken();
+ lexer.istackbase = istackbase;
+ Node.trimEmptyElement(lexer, table);
+ return;
+ }
+ }
+ }
+
+ if (!((node.tag.model & Dict.CM_TABLE) != 0))
+ {
+ lexer.ungetToken();
+ lexer.report.warning(lexer, table, node, Report.TAG_NOT_ALLOWED_IN);
+ lexer.istackbase = istackbase;
+ Node.trimEmptyElement(lexer, table);
+ return;
+ }
+
+ if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
+ {
+ table.insertNodeAtEnd(node);
+
+ parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
+ continue;
+ }
+
+ // discard unexpected text nodes and end tags
+ lexer.report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
+ }
+
+ lexer.report.warning(lexer, table, node, Report.MISSING_ENDTAG_FOR);
+ Node.trimEmptyElement(lexer, table);
+ lexer.istackbase = istackbase;
+ }
+
+ }
+
+ /**
+ * Parser for COLGROUP.
+ */
+ public static class ParseColGroup implements Parser
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.Parser#parse(org.ajax4jsf.org.w3c.tidy.Lexer,
org.ajax4jsf.org.w3c.tidy.Node, short)
+ */
+ public void parse(Lexer lexer, Node colgroup, short mode)
+ {
+ Node node, parent;
+ TagTable tt = lexer.configuration.tt;
+
+ if ((colgroup.tag.model & Dict.CM_EMPTY) != 0)
+ {
+ return;
+ }
+
+ while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
+ {
+ if (node.tag == colgroup.tag && node.type == Node.END_TAG)
+ {
+ colgroup.closed = true;
+ return;
+ }
+
+ // if this is the end tag for an ancestor element then infer end tag for
this element
+
+ if (node.type == Node.END_TAG)
+ {
+ if (node.tag == tt.tagForm)
+ {
+ badForm(lexer);
+ lexer.report.warning(lexer, colgroup, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ for (parent = colgroup.parent; parent != null; parent =
parent.parent)
+ {
+
+ if (node.tag == parent.tag)
+ {
+ lexer.ungetToken();
+ return;
+ }
+ }
+ }
+
+ if (node.type == Node.TEXT_NODE)
+ {
+ lexer.ungetToken();
+ return;
+ }
+
+ // deal with comments etc.
+ if (Node.insertMisc(colgroup, node))
+ {
+ continue;
+ }
+
+ // discard unknown tags
+ if (node.tag == null)
+ {
+ lexer.report.warning(lexer, colgroup, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if (node.tag != tt.tagCol)
+ {
+ lexer.ungetToken();
+ return;
+ }
+
+ if (node.type == Node.END_TAG)
+ {
+ lexer.report.warning(lexer, colgroup, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ // node should be <COL>
+ colgroup.insertNodeAtEnd(node);
+ parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
+ }
+ }
+
+ }
+
+ /**
+ * Parser for ROWGROUP.
+ */
+ public static class ParseRowGroup implements Parser
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.Parser#parse(org.ajax4jsf.org.w3c.tidy.Lexer,
org.ajax4jsf.org.w3c.tidy.Node, short)
+ */
+ public void parse(Lexer lexer, Node rowgroup, short mode)
+ {
+ Node node, parent;
+ TagTable tt = lexer.configuration.tt;
+
+ if ((rowgroup.tag.model & Dict.CM_EMPTY) != 0)
+ {
+ return;
+ }
+
+ while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
+ {
+ if (node.tag == rowgroup.tag)
+ {
+ if (node.type == Node.END_TAG)
+ {
+ rowgroup.closed = true;
+ Node.trimEmptyElement(lexer, rowgroup);
+ return;
+ }
+
+ lexer.ungetToken();
+ return;
+ }
+
+ // if </table> infer end tag
+ if (node.tag == tt.tagTable && node.type == Node.END_TAG)
+ {
+ lexer.ungetToken();
+ Node.trimEmptyElement(lexer, rowgroup);
+ return;
+ }
+
+ // deal with comments etc.
+ if (Node.insertMisc(rowgroup, node))
+ {
+ continue;
+ }
+
+ // discard unknown tags
+ if (node.tag == null && node.type != Node.TEXT_NODE)
+ {
+ lexer.report.warning(lexer, rowgroup, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ // if TD or TH then infer <TR> if text or inline or block move
before table if head content move to
+ // head
+
+ if (node.type != Node.END_TAG)
+ {
+ if (node.tag == tt.tagTd || node.tag == tt.tagTh)
+ {
+ lexer.ungetToken();
+ node = lexer.inferredTag("tr");
+ lexer.report.warning(lexer, rowgroup, node,
Report.MISSING_STARTTAG);
+ }
+ else if (node.type == Node.TEXT_NODE || (node.tag.model &
(Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)
+ {
+ lexer.report.warning(lexer, rowgroup, node,
Report.TAG_NOT_ALLOWED_IN);
+ if (lexer.configuration.moveElements) {
+ Node.moveBeforeTable(rowgroup, node, tt);
+ lexer.exiled = true;
+ }
+
+ // #427662 was (!node.type == TextNode) fix by Young 04 Aug 00
+ if (node.type != Node.TEXT_NODE)
+ {
+ parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
+ }
+
+ lexer.exiled = false;
+ continue;
+ }
+ else if ((node.tag.model & Dict.CM_HEAD) != 0)
+ {
+ lexer.report.warning(lexer, rowgroup, node,
Report.TAG_NOT_ALLOWED_IN);
+ moveToHead(lexer, rowgroup, node);
+ continue;
+ }
+ }
+
+ // if this is the end tag for ancestor element then infer end tag for
this element
+
+ if (node.type == Node.END_TAG)
+ {
+
+ if (node.tag == tt.tagForm
+ || (node.tag != null && (node.tag.model &
(Dict.CM_BLOCK | Dict.CM_INLINE)) != 0))
+ {
+ if (node.tag == tt.tagForm)
+ {
+ badForm(lexer);
+ }
+ lexer.report.warning(lexer, rowgroup, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if (node.tag == tt.tagTr || node.tag == tt.tagTd || node.tag ==
tt.tagTh)
+ {
+ lexer.report.warning(lexer, rowgroup, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ for (parent = rowgroup.parent; parent != null; parent =
parent.parent)
+ {
+ if (node.tag == parent.tag)
+ {
+ lexer.ungetToken();
+ Node.trimEmptyElement(lexer, rowgroup);
+ return;
+ }
+ }
+
+ }
+
+ // if THEAD, TFOOT or TBODY then implied end tag
+
+ if ((node.tag.model & Dict.CM_ROWGRP) != 0)
+ {
+ if (node.type != Node.END_TAG)
+ {
+ lexer.ungetToken();
+ }
+
+ Node.trimEmptyElement(lexer, rowgroup);
+ return;
+ }
+
+ if (node.type == Node.END_TAG)
+ {
+ lexer.report.warning(lexer, rowgroup, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if (!(node.tag == tt.tagTr))
+ {
+ node = lexer.inferredTag("tr");
+ lexer.report.warning(lexer, rowgroup, node,
Report.MISSING_STARTTAG);
+ lexer.ungetToken();
+ }
+
+ // node should be <TR>
+ rowgroup.insertNodeAtEnd(node);
+ parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
+ }
+ Node.trimEmptyElement(lexer, rowgroup);
+ }
+ }
+
+ /**
+ * Parser for ROW.
+ */
+ public static class ParseRow implements Parser
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.Parser#parse(org.ajax4jsf.org.w3c.tidy.Lexer,
org.ajax4jsf.org.w3c.tidy.Node, short)
+ */
+ public void parse(Lexer lexer, Node row, short mode)
+ {
+ Node node, parent;
+ boolean excludeState;
+ TagTable tt = lexer.configuration.tt;
+
+ if ((row.tag.model & Dict.CM_EMPTY) != 0)
+ {
+ return;
+ }
+
+ while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
+ {
+ if (node.tag == row.tag)
+ {
+ if (node.type == Node.END_TAG)
+ {
+ row.closed = true;
+ Node.fixEmptyRow(lexer, row);
+ return;
+ }
+
+ lexer.ungetToken();
+ Node.fixEmptyRow(lexer, row);
+ return;
+ }
+
+ // if this is the end tag for an ancestor element then infer end tag for
this element
+ if (node.type == Node.END_TAG)
+ {
+ if (node.tag == tt.tagForm
+ || (node.tag != null && (node.tag.model &
(Dict.CM_BLOCK | Dict.CM_INLINE)) != 0))
+ {
+ if (node.tag == tt.tagForm)
+ {
+ badForm(lexer);
+ }
+ lexer.report.warning(lexer, row, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if (node.tag == tt.tagTd || node.tag == tt.tagTh)
+ {
+ lexer.report.warning(lexer, row, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ for (parent = row.parent; parent != null; parent = parent.parent)
+ {
+ if (node.tag == parent.tag)
+ {
+ lexer.ungetToken();
+ Node.trimEmptyElement(lexer, row);
+ return;
+ }
+ }
+ }
+
+ // deal with comments etc.
+ if (Node.insertMisc(row, node))
+ {
+ continue;
+ }
+
+ // discard unknown tags
+ if (node.tag == null && node.type != Node.TEXT_NODE)
+ {
+ lexer.report.warning(lexer, row, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ // discard unexpected <table> element
+ if (node.tag == tt.tagTable)
+ {
+ lexer.report.warning(lexer, row, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ // THEAD, TFOOT or TBODY
+ if (node.tag != null && (node.tag.model & Dict.CM_ROWGRP) !=
0)
+ {
+ lexer.ungetToken();
+ Node.trimEmptyElement(lexer, row);
+ return;
+ }
+
+ if (node.type == Node.END_TAG)
+ {
+ lexer.report.warning(lexer, row, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ // if text or inline or block move before table if head content move to
head
+
+ if (node.type != Node.END_TAG)
+ {
+ if (node.tag == tt.tagForm)
+ {
+ lexer.ungetToken();
+ node = lexer.inferredTag("td");
+ lexer.report.warning(lexer, row, node, Report.MISSING_STARTTAG);
+ }
+ else if (node.type == Node.TEXT_NODE || (node.tag.model &
(Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)
+ {
+ lexer.report.warning(lexer, row, node,
Report.TAG_NOT_ALLOWED_IN);
+ if (lexer.configuration.moveElements) {
+ Node.moveBeforeTable(row, node, tt);
+ lexer.exiled = true;
+ }
+
+ if (node.type != Node.TEXT_NODE)
+ {
+ parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
+ }
+
+ lexer.exiled = false;
+ continue;
+ }
+ else if ((node.tag.model & Dict.CM_HEAD) != 0)
+ {
+ lexer.report.warning(lexer, row, node,
Report.TAG_NOT_ALLOWED_IN);
+ moveToHead(lexer, row, node);
+ continue;
+ }
+ }
+
+ if (!(node.tag == tt.tagTd || node.tag == tt.tagTh))
+ {
+ lexer.report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
+ continue;
+ }
+
+ // node should be <TD> or <TH>
+ row.insertNodeAtEnd(node);
+ excludeState = lexer.excludeBlocks;
+ lexer.excludeBlocks = false;
+ parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
+ lexer.excludeBlocks = excludeState;
+
+ // pop inline stack
+
+ while (lexer.istack.size() > lexer.istackbase)
+ {
+ lexer.popInline(null);
+ }
+ }
+
+ Node.trimEmptyElement(lexer, row);
+ }
+
+ }
+
+ /**
+ * Parser for NOFRAMES.
+ */
+ public static class ParseNoFrames implements Parser
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.Parser#parse(org.ajax4jsf.org.w3c.tidy.Lexer,
org.ajax4jsf.org.w3c.tidy.Node, short)
+ */
+ public void parse(Lexer lexer, Node noframes, short mode)
+ {
+ Node node;
+ TagTable tt = lexer.configuration.tt;
+
+ lexer.badAccess |= Report.USING_NOFRAMES;
+ mode = Lexer.IGNORE_WHITESPACE;
+
+ while ((node = lexer.getToken(mode)) != null)
+ {
+ if (node.tag == noframes.tag && node.type == Node.END_TAG)
+ {
+ noframes.closed = true;
+ Node.trimSpaces(lexer, noframes);
+ return;
+ }
+
+ if ((node.tag == tt.tagFrame || node.tag == tt.tagFrameset))
+ {
+
+ Node.trimSpaces(lexer, noframes);
+
+ // fix for [539369]
+ if (node.type == Node.END_TAG)
+ {
+ lexer.report.warning(lexer, noframes, node,
Report.DISCARDING_UNEXPECTED);
+ // Throw it away
+ }
+ else
+ {
+ lexer.report.warning(lexer, noframes, node,
Report.MISSING_ENDTAG_BEFORE);
+
+ lexer.ungetToken();
+ }
+ return;
+ }
+
+ if (node.tag == tt.tagHtml)
+ {
+ if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
+ {
+ lexer.report.warning(lexer, noframes, node,
Report.DISCARDING_UNEXPECTED);
+ }
+
+ continue;
+ }
+
+ // deal with comments etc.
+ if (Node.insertMisc(noframes, node))
+ {
+ continue;
+ }
+
+ if (node.tag == tt.tagBody && node.type == Node.START_TAG)
+ {
+ boolean seenbody = lexer.seenEndBody;
+ noframes.insertNodeAtEnd(node);
+ parseTag(lexer, node, Lexer.IGNORE_WHITESPACE); // MixedContent
+
+ if (seenbody)
+ {
+ Node.coerceNode(lexer, node, tt.tagDiv);
+ moveNodeToBody(lexer, node);
+ }
+ continue;
+ }
+
+ // implicit body element inferred
+ if (node.type == Node.TEXT_NODE || (node.tag != null && node.type
!= Node.END_TAG))
+ {
+ if (lexer.seenEndBody)
+ {
+ Node body = lexer.root.findBody(tt);
+
+ if (node.type == Node.TEXT_NODE)
+ {
+ lexer.ungetToken();
+ node = lexer.inferredTag("p");
+ lexer.report.warning(lexer, noframes, node,
Report.CONTENT_AFTER_BODY);
+ }
+
+ body.insertNodeAtEnd(node);
+ }
+ else
+ {
+ lexer.ungetToken();
+ node = lexer.inferredTag("body");
+ if (lexer.configuration.xmlOut)
+ {
+ lexer.report.warning(lexer, noframes, node,
Report.INSERTING_TAG);
+ }
+ noframes.insertNodeAtEnd(node);
+ }
+ parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
+ // MixedContent
+ continue;
+ }
+ // discard unexpected end tags
+ lexer.report.warning(lexer, noframes, node,
Report.DISCARDING_UNEXPECTED);
+ }
+
+ lexer.report.warning(lexer, noframes, node, Report.MISSING_ENDTAG_FOR);
+ }
+
+ }
+
+ /**
+ * Parser for SELECT.
+ */
+ public static class ParseSelect implements Parser
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.Parser#parse(org.ajax4jsf.org.w3c.tidy.Lexer,
org.ajax4jsf.org.w3c.tidy.Node, short)
+ */
+ public void parse(Lexer lexer, Node field, short mode)
+ {
+ Node node;
+ TagTable tt = lexer.configuration.tt;
+
+ lexer.insert = -1; // defer implicit inline start tags
+
+ while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
+ {
+ if (node.tag == field.tag && node.type == Node.END_TAG)
+ {
+ field.closed = true;
+ Node.trimSpaces(lexer, field);
+ return;
+ }
+
+ // deal with comments etc.
+ if (Node.insertMisc(field, node))
+ {
+ continue;
+ }
+
+ if (node.type == Node.START_TAG
+ && (node.tag == tt.tagOption || node.tag == tt.tagOptgroup ||
node.tag == tt.tagScript))
+ {
+ field.insertNodeAtEnd(node);
+ parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
+ continue;
+ }
+
+ // discard unexpected tags
+ lexer.report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED);
+ }
+
+ lexer.report.warning(lexer, field, node, Report.MISSING_ENDTAG_FOR);
+ }
+
+ }
+
+ /**
+ * Parser for text nodes.
+ */
+ public static class ParseText implements Parser
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.Parser#parse(org.ajax4jsf.org.w3c.tidy.Lexer,
org.ajax4jsf.org.w3c.tidy.Node, short)
+ */
+ public void parse(Lexer lexer, Node field, short mode)
+ {
+ Node node;
+ TagTable tt = lexer.configuration.tt;
+
+ lexer.insert = -1; // defer implicit inline start tags
+
+ if (field.tag == tt.tagTextarea)
+ {
+ mode = Lexer.PREFORMATTED;
+ }
+ else
+ {
+ mode = Lexer.MIXED_CONTENT; // kludge for font tags
+ }
+
+ while ((node = lexer.getToken(mode)) != null)
+ {
+ if (node.tag == field.tag && node.type == Node.END_TAG)
+ {
+ field.closed = true;
+ Node.trimSpaces(lexer, field);
+ return;
+ }
+
+ // deal with comments etc.
+ if (Node.insertMisc(field, node))
+ {
+ continue;
+ }
+
+ if (node.type == Node.TEXT_NODE)
+ {
+ // only called for 1st child
+ if (field.content == null && !((mode &
Lexer.PREFORMATTED) != 0))
+ {
+ Node.trimSpaces(lexer, field);
+ }
+
+ if (node.start >= node.end)
+ {
+ continue;
+ }
+
+ field.insertNodeAtEnd(node);
+ continue;
+ }
+
+ // for textarea should all cases of < and & be escaped?
+ // discard inline tags e.g. font
+ if (node.tag != null
+ && ((node.tag.model & Dict.CM_INLINE) != 0)
+ && (node.tag.model & Dict.CM_FIELD) == 0) // #487283 -
fix by Lee Passey 25 Jan 02
+ {
+ lexer.report.warning(lexer, field, node,
Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ // terminate element on other tags
+ if (!((field.tag.model & Dict.CM_OPT) != 0))
+ {
+ lexer.report.warning(lexer, field, node,
Report.MISSING_ENDTAG_BEFORE);
+ }
+
+ lexer.ungetToken();
+ Node.trimSpaces(lexer, field);
+ return;
+ }
+
+ if (!((field.tag.model & Dict.CM_OPT) != 0))
+ {
+ lexer.report.warning(lexer, field, node, Report.MISSING_ENDTAG_FOR);
+ }
+ }
+
+ }
+
+ /**
+ * Parser for OPTGROUP.
+ */
+ public static class ParseOptGroup implements Parser
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.Parser#parse(org.ajax4jsf.org.w3c.tidy.Lexer,
org.ajax4jsf.org.w3c.tidy.Node, short)
+ */
+ public void parse(Lexer lexer, Node field, short mode)
+ {
+ Node node;
+ TagTable tt = lexer.configuration.tt;
+
+ lexer.insert = -1; // defer implicit inline start tags
+
+ while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
+ {
+ if (node.tag == field.tag && node.type == Node.END_TAG)
+ {
+ field.closed = true;
+ Node.trimSpaces(lexer, field);
+ return;
+ }
+
+ // deal with comments etc.
+ if (Node.insertMisc(field, node))
+ {
+ continue;
+ }
+
+ if (node.type == Node.START_TAG && (node.tag == tt.tagOption ||
node.tag == tt.tagOptgroup))
+ {
+ if (node.tag == tt.tagOptgroup)
+ {
+ lexer.report.warning(lexer, field, node, Report.CANT_BE_NESTED);
+ }
+
+ field.insertNodeAtEnd(node);
+ parseTag(lexer, node, Lexer.MIXED_CONTENT);
+ continue;
+ }
+
+ // discard unexpected tags
+ lexer.report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED);
+ }
+ }
+
+ }
+
+ /**
+ * HTML is the top level element.
+ */
+ public static Node parseDocument(Lexer lexer)
+ {
+ Node node, document, html;
+ Node doctype = null;
+ TagTable tt = lexer.configuration.tt;
+
+ document = lexer.newNode();
+ document.type = Node.ROOT_NODE;
+
+ lexer.root = document;
+
+ while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
+ {
+ // deal with comments etc.
+ if (Node.insertMisc(document, node))
+ {
+ continue;
+ }
+
+ if (node.type == Node.DOCTYPE_TAG)
+ {
+ if (doctype == null)
+ {
+ document.insertNodeAtEnd(node);
+ doctype = node;
+ }
+ else
+ {
+ lexer.report.warning(lexer, document, node,
Report.DISCARDING_UNEXPECTED);
+ }
+ continue;
+ }
+
+ if (node.type == Node.END_TAG)
+ {
+ lexer.report.warning(lexer, document, node,
Report.DISCARDING_UNEXPECTED); //TODO?
+ continue;
+ }
+
+ if (node.type != Node.START_TAG || node.tag != tt.tagHtml)
+ {
+ lexer.ungetToken();
+ html = lexer.inferredTag("html");
+ }
+ else
+ {
+ html = node;
+ }
+
+ if (document.findDocType() == null && !lexer.configuration.bodyOnly)
+ {
+ lexer.report.warning(lexer, null, null, Report.MISSING_DOCTYPE);
+ }
+
+ document.insertNodeAtEnd(html);
+ HTML.parse(lexer, html, (short) 0); // TODO?
+ break;
+ }
+
+ return document;
+ }
+
+ /**
+ * Indicates whether or not whitespace should be preserved for this element. If an
<code>xml:space</code>
+ * attribute is found, then if the attribute value is
<code>preserve</code>, returns <code>true</code>. For
+ * any other value, returns <code>false</code>. If an
<code>xml:space</code> attribute was <em>not</em>
+ * found, then the following element names result in a return value of
<code>true:
+ * pre, script, style,</code> and
+ * <code>xsl:text</code>. Finally, if a <code>TagTable</code>
was passed in and the element appears as the
+ * "pre" element in the <code>TagTable</code>, then
<code>true</code> will be returned. Otherwise,
+ * <code>false</code> is returned.
+ * @param element The <code>Node</code> to test to see if whitespace
should be preserved.
+ * @param tt The <code>TagTable</code> to test for the
<code>getNodePre()</code> function. This may be
+ * <code>null</code>, in which case this test is bypassed.
+ * @return <code>true</code> or <code>false</code>, as
explained above.
+ */
+ public static boolean XMLPreserveWhiteSpace(Node element, TagTable tt)
+ {
+ AttVal attribute;
+
+ // search attributes for xml:space
+ for (attribute = element.attributes; attribute != null; attribute =
attribute.next)
+ {
+ if (attribute.attribute.equals("xml:space"))
+ {
+ if (attribute.value.equals("preserve"))
+ {
+ return true;
+ }
+
+ return false;
+ }
+ }
+
+ if (element.element == null) // Debian Bug #137124. Fix based on suggestion by
Cesar Eduardo Barros 06 Mar 02
+ {
+ return false;
+ }
+
+ // kludge for html docs without explicit xml:space attribute
+ if ("pre".equalsIgnoreCase(element.element)
+ || "script".equalsIgnoreCase(element.element)
+ || "style".equalsIgnoreCase(element.element))
+ {
+ return true;
+ }
+
+ if ((tt != null) && (tt.findParser(element) == PRE))
+ {
+ return true;
+ }
+
+ // kludge for XSL docs
+ if ("xsl:text".equalsIgnoreCase(element.element))
+ {
+ return true;
+ }
+
+ return false;
+ }
+
+ /**
+ * XML documents.
+ */
+ public static void parseXMLElement(Lexer lexer, Node element, short mode)
+ {
+ Node node;
+
+ // if node is pre or has xml:space="preserve" then do so
+
+ if (XMLPreserveWhiteSpace(element, lexer.configuration.tt))
+ {
+ mode = Lexer.PREFORMATTED;
+ }
+
+ while ((node = lexer.getToken(mode)) != null)
+ {
+ if (node.type == Node.END_TAG &&
node.element.equals(element.element))
+ {
+ element.closed = true;
+ break;
+ }
+
+ // discard unexpected end tags
+ if (node.type == Node.END_TAG)
+ {
+ lexer.report.error(lexer, element, node, Report.UNEXPECTED_ENDTAG);
+ continue;
+ }
+
+ // parse content on seeing start tag
+ if (node.type == Node.START_TAG)
+ {
+ parseXMLElement(lexer, node, mode);
+ }
+
+ element.insertNodeAtEnd(node);
+ }
+
+ // if first child is text then trim initial space and delete text node if it is
empty.
+
+ node = element.content;
+
+ if (node != null && node.type == Node.TEXT_NODE && mode !=
Lexer.PREFORMATTED)
+ {
+ if (node.textarray[node.start] == (byte) ' ')
+ {
+ node.start++;
+
+ if (node.start >= node.end)
+ {
+ Node.discardElement(node);
+ }
+ }
+ }
+
+ // if last child is text then trim final space and delete the text node if it is
empty
+
+ node = element.last;
+
+ if (node != null && node.type == Node.TEXT_NODE && mode !=
Lexer.PREFORMATTED)
+ {
+ if (node.textarray[node.end - 1] == (byte) ' ')
+ {
+ node.end--;
+
+ if (node.start >= node.end)
+ {
+ Node.discardElement(node);
+ }
+ }
+ }
+ }
+
+ public static Node parseXMLDocument(Lexer lexer)
+ {
+ Node node, document, doctype;
+
+ document = lexer.newNode();
+ document.type = Node.ROOT_NODE;
+ doctype = null;
+ lexer.configuration.xmlTags = true;
+
+ while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
+ {
+ // discard unexpected end tags
+ if (node.type == Node.END_TAG)
+ {
+ lexer.report.warning(lexer, null, node, Report.UNEXPECTED_ENDTAG);
+ continue;
+ }
+
+ // deal with comments etc.
+ if (Node.insertMisc(document, node))
+ {
+ continue;
+ }
+
+ if (node.type == Node.DOCTYPE_TAG)
+ {
+ if (doctype == null)
+ {
+ document.insertNodeAtEnd(node);
+ doctype = node;
+ }
+ else
+ {
+ lexer.report.warning(lexer, document, node,
Report.DISCARDING_UNEXPECTED); // TODO
+ }
+ continue;
+ }
+
+ if (node.type == Node.START_END_TAG)
+ {
+ document.insertNodeAtEnd(node);
+ continue;
+ }
+
+ // if start tag then parse element's content
+ if (node.type == Node.START_TAG)
+ {
+ document.insertNodeAtEnd(node);
+ parseXMLElement(lexer, node, Lexer.IGNORE_WHITESPACE);
+ }
+
+ }
+
+ if (doctype != null && !lexer.checkDocTypeKeyWords(doctype))
+ {
+ lexer.report.warning(lexer, doctype, null, Report.DTYPE_NOT_UPPER_CASE);
+ }
+
+ // ensure presence of initial <?XML version="1.0"?>
+ if (lexer.configuration.xmlPi)
+ {
+ lexer.fixXmlDecl(document);
+ }
+
+ return document;
+ }
+
+ /**
+ * errors in positioning of form start or end tags generally require human
intervention to fix.
+ */
+ static void badForm(Lexer lexer)
+ {
+ lexer.badForm = 1;
+ lexer.errors++;
+ }
+
+}
\ No newline at end of file
Added: branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Report.java
===================================================================
--- branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Report.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Report.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,1823 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+import java.io.PrintWriter;
+import java.text.MessageFormat;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.MissingResourceException;
+import java.util.ResourceBundle;
+
+import org.ajax4jsf.org.w3c.tidy.TidyMessage.Level;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+
+
+/**
+ * Error/informational message reporter. You should only need to edit the file
TidyMessages.properties to localize HTML
+ * tidy.
+ * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org
</a>
+ * @author Andy Quick <a
href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a>
(translation to Java)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public final class Report
+{
+
+ private static final Log log = LogFactory.getLog(Report.class);
+
+ /**
+ * used to point to Web Accessibility Guidelines.
+ */
+ public static final String ACCESS_URL = "http://www.w3.org/WAI/GL";
+
+ /**
+ * Release date.
+ */
+ public static final Date RELEASE_DATE = new Date(1096227718000L);
+
+ /**
+ * Release date String.
+ */
+ public static final String RELEASE_DATE_STRING = new SimpleDateFormat("dd MMM
yyyy").format(RELEASE_DATE);
+
+ /**
+ * invalid entity: missing semicolon.
+ */
+ public static final short MISSING_SEMICOLON = 1;
+
+ /**
+ * invalid entity: missing semicolon.
+ */
+ public static final short MISSING_SEMICOLON_NCR = 2;
+
+ /**
+ * invalid entity: unknown entity.
+ */
+ public static final short UNKNOWN_ENTITY = 3;
+
+ /**
+ * invalid entity: unescaped ampersand.
+ */
+ public static final short UNESCAPED_AMPERSAND = 4;
+
+ /**
+ * invalid entity: apos undefined in current definition.
+ */
+ public static final short APOS_UNDEFINED = 5;
+
+ /**
+ * missing an end tag.
+ */
+ public static final short MISSING_ENDTAG_FOR = 6;
+
+ /**
+ * missing end tag before.
+ */
+ public static final short MISSING_ENDTAG_BEFORE = 7;
+
+ /**
+ * discarding unexpected element.
+ */
+ public static final short DISCARDING_UNEXPECTED = 8;
+
+ /**
+ * nested emphasis.
+ */
+ public static final short NESTED_EMPHASIS = 9;
+
+ /**
+ * non matching end tag.
+ */
+ public static final short NON_MATCHING_ENDTAG = 10;
+
+ /**
+ * tag not allowed in.
+ */
+ public static final short TAG_NOT_ALLOWED_IN = 11;
+
+ /**
+ * missing start tag.
+ */
+ public static final short MISSING_STARTTAG = 12;
+
+ /**
+ * unexpected end tag.
+ */
+ public static final short UNEXPECTED_ENDTAG = 13;
+
+ /**
+ * unsing br in place of.
+ */
+ public static final short USING_BR_INPLACE_OF = 14;
+
+ /**
+ * inserting tag.
+ */
+ public static final short INSERTING_TAG = 15;
+
+ /**
+ * suspected missing quote.
+ */
+ public static final short SUSPECTED_MISSING_QUOTE = 16;
+
+ /**
+ * missing title element.
+ */
+ public static final short MISSING_TITLE_ELEMENT = 17;
+
+ /**
+ * duplicate frameset.
+ */
+ public static final short DUPLICATE_FRAMESET = 18;
+
+ /**
+ * elments can be nested.
+ */
+ public static final short CANT_BE_NESTED = 19;
+
+ /**
+ * obsolete element.
+ */
+ public static final short OBSOLETE_ELEMENT = 20;
+
+ /**
+ * proprietary element.
+ */
+ public static final short PROPRIETARY_ELEMENT = 21;
+
+ /**
+ * unknown element.
+ */
+ public static final short UNKNOWN_ELEMENT = 22;
+
+ /**
+ * trim empty element.
+ */
+ public static final short TRIM_EMPTY_ELEMENT = 23;
+
+ /**
+ * coerce to end tag.
+ */
+ public static final short COERCE_TO_ENDTAG = 24;
+
+ /**
+ * illegal nesting.
+ */
+ public static final short ILLEGAL_NESTING = 25;
+
+ /**
+ * noframes content.
+ */
+ public static final short NOFRAMES_CONTENT = 26;
+
+ /**
+ * content after body.
+ */
+ public static final short CONTENT_AFTER_BODY = 27;
+
+ /**
+ * inconsistent version.
+ */
+ public static final short INCONSISTENT_VERSION = 28;
+
+ /**
+ * malformed comment.
+ */
+ public static final short MALFORMED_COMMENT = 29;
+
+ /**
+ * bad coment chars.
+ */
+ public static final short BAD_COMMENT_CHARS = 30;
+
+ /**
+ * bad xml comment.
+ */
+ public static final short BAD_XML_COMMENT = 31;
+
+ /**
+ * bad cdata comment.
+ */
+ public static final short BAD_CDATA_CONTENT = 32;
+
+ /**
+ * inconsistent namespace.
+ */
+ public static final short INCONSISTENT_NAMESPACE = 33;
+
+ /**
+ * doctype after tags.
+ */
+ public static final short DOCTYPE_AFTER_TAGS = 34;
+
+ /**
+ * malformed doctype.
+ */
+ public static final short MALFORMED_DOCTYPE = 35;
+
+ /**
+ * unexpected end of file.
+ */
+ public static final short UNEXPECTED_END_OF_FILE = 36;
+
+ /**
+ * doctype not upper case.
+ */
+ public static final short DTYPE_NOT_UPPER_CASE = 37;
+
+ /**
+ * too many element.
+ */
+ public static final short TOO_MANY_ELEMENTS = 38;
+
+ /**
+ * unescaped element.
+ */
+ public static final short UNESCAPED_ELEMENT = 39;
+
+ /**
+ * nested quotation.
+ */
+ public static final short NESTED_QUOTATION = 40;
+
+ /**
+ * element not empty.
+ */
+ public static final short ELEMENT_NOT_EMPTY = 41;
+
+ /**
+ * encoding IO conflict.
+ */
+ public static final short ENCODING_IO_CONFLICT = 42;
+
+ /**
+ * mixed content in block.
+ */
+ public static final short MIXED_CONTENT_IN_BLOCK = 43;
+
+ /**
+ * missing doctype.
+ */
+ public static final short MISSING_DOCTYPE = 44;
+
+ /**
+ * space preceding xml declaration.
+ */
+ public static final short SPACE_PRECEDING_XMLDECL = 45;
+
+ /**
+ * too many elements in.
+ */
+ public static final short TOO_MANY_ELEMENTS_IN = 46;
+
+ /**
+ * unexpected endag in.
+ */
+ public static final short UNEXPECTED_ENDTAG_IN = 47;
+
+ /**
+ * replacing element.
+ */
+ public static final short REPLACING_ELEMENT = 83;
+
+ /**
+ * replacing unexcaped element.
+ */
+ public static final short REPLACING_UNEX_ELEMENT = 84;
+
+ /**
+ * coerce to endtag.
+ */
+ public static final short COERCE_TO_ENDTAG_WARN = 85;
+
+ /**
+ * attribute: unknown attribute.
+ */
+ public static final short UNKNOWN_ATTRIBUTE = 48;
+
+ /**
+ * attribute: missing attribute.
+ */
+ public static final short MISSING_ATTRIBUTE = 49;
+
+ /**
+ * attribute: missing attribute value.
+ */
+ public static final short MISSING_ATTR_VALUE = 50;
+
+ /**
+ * attribute: bad attribute value.
+ */
+ public static final short BAD_ATTRIBUTE_VALUE = 51;
+
+ /**
+ * attribute: unexpected gt.
+ */
+ public static final short UNEXPECTED_GT = 52;
+
+ /**
+ * attribute: proprietary attribute.
+ */
+ public static final short PROPRIETARY_ATTRIBUTE = 53;
+
+ /**
+ * attribute: proprietary attribute value.
+ */
+ public static final short PROPRIETARY_ATTR_VALUE = 54;
+
+ /**
+ * attribute: repeated attribute.
+ */
+ public static final short REPEATED_ATTRIBUTE = 55;
+
+ /**
+ * attribute: missing image map.
+ */
+ public static final short MISSING_IMAGEMAP = 56;
+
+ /**
+ * attribute: xml attribute value.
+ */
+ public static final short XML_ATTRIBUTE_VALUE = 57;
+
+ /**
+ * attribute: missing quotemark.
+ */
+ public static final short MISSING_QUOTEMARK = 58;
+
+ /**
+ * attribute: unexpected quotemark.
+ */
+ public static final short UNEXPECTED_QUOTEMARK = 59;
+
+ /**
+ * attribute: id and name mismatch.
+ */
+ public static final short ID_NAME_MISMATCH = 60;
+
+ /**
+ * attribute: backslash in URI.
+ */
+ public static final short BACKSLASH_IN_URI = 61;
+
+ /**
+ * attribute: fixed backslash.
+ */
+ public static final short FIXED_BACKSLASH = 62;
+
+ /**
+ * attribute: illegal URI reference.
+ */
+ public static final short ILLEGAL_URI_REFERENCE = 63;
+
+ /**
+ * attribute: escaped illegal URI.
+ */
+ public static final short ESCAPED_ILLEGAL_URI = 64;
+
+ /**
+ * attribute: newline in URI.
+ */
+ public static final short NEWLINE_IN_URI = 65;
+
+ /**
+ * attribute: anchor not unique.
+ */
+ public static final short ANCHOR_NOT_UNIQUE = 66;
+
+ /**
+ * attribute: entity in id.
+ */
+ public static final short ENTITY_IN_ID = 67;
+
+ /**
+ * attribute: joining attribute.
+ */
+ public static final short JOINING_ATTRIBUTE = 68;
+
+ /**
+ * attribute: expected equalsign.
+ */
+ public static final short UNEXPECTED_EQUALSIGN = 69;
+
+ /**
+ * attribute: attribute value not lower case.
+ */
+ public static final short ATTR_VALUE_NOT_LCASE = 70;
+
+ /**
+ * attribute: id sintax.
+ */
+ public static final short XML_ID_SYNTAX = 71;
+
+ /**
+ * attribute: invalid attribute.
+ */
+ public static final short INVALID_ATTRIBUTE = 72;
+
+ /**
+ * attribute: bad attribute value replaced.
+ */
+ public static final short BAD_ATTRIBUTE_VALUE_REPLACED = 73;
+
+ /**
+ * attribute: invalid xml id.
+ */
+ public static final short INVALID_XML_ID = 74;
+
+ /**
+ * attribute: unexpected end of file.
+ */
+ public static final short UNEXPECTED_END_OF_FILE_ATTR = 75;
+
+ /**
+ * character encoding: vendor specific chars.
+ */
+ public static final short VENDOR_SPECIFIC_CHARS = 76;
+
+ /**
+ * character encoding: invalid sgml chars.
+ */
+ public static final short INVALID_SGML_CHARS = 77;
+
+ /**
+ * character encoding: invalid utf8.
+ */
+ public static final short INVALID_UTF8 = 78;
+
+ /**
+ * character encoding: invalid utf16.
+ */
+ public static final short INVALID_UTF16 = 79;
+
+ /**
+ * character encoding: encoding mismatch.
+ */
+ public static final short ENCODING_MISMATCH = 80;
+
+ /**
+ * character encoding: nvalid URI.
+ */
+ public static final short INVALID_URI = 81;
+
+ /**
+ * character encoding: invalid NCR.
+ */
+ public static final short INVALID_NCR = 82;
+
+ /**
+ * Constant used for reporting of given doctype.
+ */
+ public static final short DOCTYPE_GIVEN_SUMMARY = 110;
+
+ /**
+ * Constant used for reporting of version summary.
+ */
+ public static final short REPORT_VERSION_SUMMARY = 111;
+
+ /**
+ * Constant used for reporting of bad access summary.
+ */
+ public static final short BADACCESS_SUMMARY = 112;
+
+ /**
+ * Constant used for reporting of bad form summary.
+ */
+ public static final short BADFORM_SUMMARY = 113;
+
+ /**
+ * accessibility flaw: missing image map.
+ */
+ public static final short MISSING_IMAGE_ALT = 1;
+
+ /**
+ * accessibility flaw: missing link alt.
+ */
+ public static final short MISSING_LINK_ALT = 2;
+
+ /**
+ * accessibility flaw: missing summary.
+ */
+ public static final short MISSING_SUMMARY = 4;
+
+ /**
+ * accessibility flaw: missing image map.
+ */
+ public static final short MISSING_IMAGE_MAP = 8;
+
+ /**
+ * accessibility flaw: using frames.
+ */
+ public static final short USING_FRAMES = 16;
+
+ /**
+ * accessibility flaw: using noframes.
+ */
+ public static final short USING_NOFRAMES = 32;
+
+ /**
+ * presentation flaw: using spacer.
+ */
+ public static final short USING_SPACER = 1;
+
+ /**
+ * presentation flaw: using layer.
+ */
+ public static final short USING_LAYER = 2;
+
+ /**
+ * presentation flaw: using nobr.
+ */
+ public static final short USING_NOBR = 4;
+
+ /**
+ * presentation flaw: using font.
+ */
+ public static final short USING_FONT = 8;
+
+ /**
+ * presentation flaw: using body.
+ */
+ public static final short USING_BODY = 16;
+
+ /**
+ * character encoding error: windows chars.
+ */
+ public static final short WINDOWS_CHARS = 1;
+
+ /**
+ * character encoding error: non ascii.
+ */
+ public static final short NON_ASCII = 2;
+
+ /**
+ * character encoding error: found utf16.
+ */
+ public static final short FOUND_UTF16 = 4;
+
+ /**
+ * char has been replaced.
+ */
+ public static final short REPLACED_CHAR = 0;
+
+ /**
+ * char has been discarder.
+ */
+ public static final short DISCARDED_CHAR = 1;
+
+ /**
+ * Resource bundle with messages.
+ */
+ private static ResourceBundle res;
+
+ /**
+ * Printed in GNU Emacs messages.
+ */
+ private String currentFile;
+
+ /**
+ * message listener for error reporting.
+ */
+ private TidyMessageListener listener;
+
+ static
+ {
+ try
+ {
+ res =
ResourceBundle.getBundle("org/ajax4jsf/org/w3c/tidy/TidyMessages");
+ }
+ catch (MissingResourceException e)
+ {
+ throw new Error(e.toString());
+ }
+ }
+
+ /**
+ * Instantiated only in Tidy() constructor.
+ */
+ protected Report()
+ {
+ super();
+ }
+
+ /**
+ * Generates a complete message for the warning/error. The message is composed by:
+ * <ul>
+ * <li>position in file</li>
+ * <li>prefix for the error level (warning: | error:)</li>
+ * <li>message read from ResourceBundle</li>
+ * <li>optional parameters added to message using MessageFormat</li>
+ * </ul>
+ * @param errorCode tidy error code
+ * @param lexer Lexer
+ * @param element last appended element - for insert comment in generated dom tree
with error message.
+ * @param message key for the ResourceBundle
+ * @param params optional parameters added with MessageFormat
+ * @param level message level. One of
<code>TidyMessage.LEVEL_ERROR</code>,
+ *
<code>TidyMessage.LEVEL_WARNING</code>,<code>TidyMessage.LEVEL_INFO</code>
+ * @return formatted message
+ * @throws MissingResourceException if <code>message</code> key is not
available in jtidy resource bundle.
+ * @see TidyMessage
+ */
+ protected String getMessage(int errorCode, Lexer lexer, Node element, String message,
Object[] params, Level level)
+ throws MissingResourceException
+ {
+ String resource;
+ resource = res.getString(message);
+
+ String position;
+
+ if (lexer != null && level != Level.SUMMARY)
+ {
+ position = getPosition(lexer);
+ }
+ else
+ {
+ position = "";
+ }
+
+ String prefix;
+
+ if (level == Level.ERROR)
+ {
+ prefix = res.getString("error");
+ }
+ else if (level == Level.WARNING)
+ {
+ prefix = res.getString("warning");
+ }
+ else
+ {
+ prefix = "";
+ }
+
+ String messageString;
+
+ if (params != null)
+ {
+ messageString = MessageFormat.format(resource, params);
+ }
+ else
+ {
+ messageString = resource;
+ }
+
+ if (listener != null)
+ {
+ TidyMessage msg = new TidyMessage(errorCode, (lexer != null) ? lexer.lines :
0, (lexer != null)
+ ? lexer.columns
+ : 0, level, messageString);
+ // Changed by asmirnov - add reference to lexer for manipulate tree.
+ msg.setLexer(lexer);
+ msg.setElement(element);
+ listener.messageReceived(msg);
+ }
+
+ return position + prefix + messageString;
+ }
+
+ /**
+ * Prints a message to lexer.errout after calling getMessage().
+ * @param errorCode tidy error code
+ * @param lexer Lexer
+ * @param element TODO
+ * @param message key for the ResourceBundle
+ * @param params optional parameters added with MessageFormat
+ * @param level message level. One of
<code>TidyMessage.LEVEL_ERROR</code>,
+ *
<code>TidyMessage.LEVEL_WARNING</code>,<code>TidyMessage.LEVEL_INFO</code>
+ * @see TidyMessage
+ */
+ private void printMessage(int errorCode, Lexer lexer, Node element, String message,
Object[] params, Level level)
+ {
+ String resource;
+ try
+ {
+ resource = getMessage(errorCode, lexer, element, message, params, level);
+ }
+ catch (MissingResourceException e)
+ {
+ lexer.errout.println(e.toString());
+ return;
+ }
+
+ lexer.errout.println(resource);
+ }
+
+ /**
+ * Prints a message to errout after calling getMessage(). Used when lexer is not yet
defined.
+ * @param errout PrintWriter
+ * @param message key for the ResourceBundle
+ * @param params optional parameters added with MessageFormat
+ * @param level message level. One of
<code>TidyMessage.LEVEL_ERROR</code>,
+ *
<code>TidyMessage.LEVEL_WARNING</code>,<code>TidyMessage.LEVEL_INFO</code>
+ * @see TidyMessage
+ */
+ private void printMessage(PrintWriter errout, String message, Object[] params, Level
level)
+ {
+ String resource;
+ try
+ {
+ resource = getMessage(-1, null, null, message, params, level);
+ }
+ catch (MissingResourceException e)
+ {
+ errout.println(e.toString());
+ return;
+ }
+ errout.println(resource);
+ }
+
+ /**
+ * print version information.
+ * @param p printWriter
+ */
+ public void showVersion(PrintWriter p)
+ {
+ printMessage(p, "version_summary", new Object[]{RELEASE_DATE},
Level.SUMMARY);
+ }
+
+ /**
+ * Returns a formatted tag name handling start and ent tags, nulls, doctypes, and
text.
+ * @param tag Node
+ * @return formatted tag name
+ */
+ private String getTagName(Node tag)
+ {
+ if (tag != null)
+ {
+ if (tag.type == Node.START_TAG)
+ {
+ return "<" + tag.element + ">";
+ }
+ else if (tag.type == Node.END_TAG)
+ {
+ return "</" + tag.element + ">";
+ }
+ else if (tag.type == Node.DOCTYPE_TAG)
+ {
+ return "<!DOCTYPE>";
+ }
+ else if (tag.type == Node.TEXT_NODE)
+ {
+ return "plain text";
+ }
+ else
+ {
+ return tag.element;
+ }
+ }
+ return "";
+ }
+
+ /**
+ * Prints an "unknown option" error message. Lexer is not defined when this
is called.
+ * @param option unknown option name
+ */
+ public void unknownOption(String option)
+ {
+ try
+ {
+ log.error(MessageFormat.format(res.getString("unknown_option"), new
Object[]{option}));
+ }
+ catch (MissingResourceException e)
+ {
+ log.error(e.toString());
+ }
+ }
+
+ /**
+ * Prints a "bad argument" error message. Lexer is not defined when this is
called.
+ * @param key argument name
+ * @param value bad argument value
+ */
+ public void badArgument(String key, String value)
+ {
+ try
+ {
+ log.error(MessageFormat.format(res.getString("bad_argument"), new
Object[]{value, key}));
+ }
+ catch (MissingResourceException e)
+ {
+ log.error(e.toString());
+ }
+ }
+
+ /**
+ * Returns a formatted String describing the current position in file.
+ * @param lexer Lexer
+ * @return String position ("line:column")
+ */
+ private String getPosition(Lexer lexer)
+ {
+ try
+ {
+ // Change formatting to be parsable by GNU Emacs
+ if (lexer.configuration.emacs)
+ {
+ return MessageFormat.format(res.getString("emacs_format"), new
Object[]{
+ this.currentFile,
+ new Integer(lexer.lines),
+ new Integer(lexer.columns)})
+ + " ";
+ }
+ // traditional format
+ return MessageFormat.format(res.getString("line_column"), new
Object[]{
+ new Integer(lexer.lines),
+ new Integer(lexer.columns)});
+
+ }
+ catch (MissingResourceException e)
+ {
+ lexer.errout.println(e.toString());
+ }
+ return "";
+ }
+
+ /**
+ * Prints encoding error messages.
+ * @param lexer Lexer
+ * @param code error code
+ * @param c invalid char
+ */
+ public void encodingError(Lexer lexer, int code, int c)
+ {
+ lexer.warnings++;
+
+ if (lexer.errors > lexer.configuration.showErrors) // keep quiet after
<showErrors> errors
+ {
+ return;
+ }
+
+ if (lexer.configuration.showWarnings)
+ {
+ String buf = Integer.toHexString(c);
+
+ // An encoding mismatch is currently treated as a non-fatal error
+ if ((code & ~DISCARDED_CHAR) == ENCODING_MISMATCH)
+ {
+ // actual encoding passed in "c"
+ lexer.badChars |= ENCODING_MISMATCH;
+ printMessage(
+ code,
+ lexer,
+ null,
+ "encoding_mismatch",
+ new Object[]{
+ lexer.configuration.getInCharEncodingName(),
+ ParsePropertyImpl.CHAR_ENCODING.getFriendlyName(null, new
Integer(c), lexer.configuration)}, Level.WARNING);
+ }
+ else if ((code & ~DISCARDED_CHAR) == VENDOR_SPECIFIC_CHARS)
+ {
+ lexer.badChars |= VENDOR_SPECIFIC_CHARS;
+ printMessage(
+ code,
+ lexer,
+ null,
+ "invalid_char",
+ new Object[]{new Integer(code & DISCARDED_CHAR), buf},
Level.WARNING);
+ }
+ else if ((code & ~DISCARDED_CHAR) == INVALID_SGML_CHARS)
+ {
+ lexer.badChars |= INVALID_SGML_CHARS;
+ printMessage(
+ code,
+ lexer,
+ null,
+ "invalid_char",
+ new Object[]{new Integer(code & DISCARDED_CHAR), buf},
Level.WARNING);
+ }
+ else if ((code & ~DISCARDED_CHAR) == INVALID_UTF8)
+ {
+ lexer.badChars |= INVALID_UTF8;
+ printMessage(
+ code,
+ lexer,
+ null,
+ "invalid_utf8",
+ new Object[]{new Integer(code & DISCARDED_CHAR), buf},
Level.WARNING);
+ }
+
+ else if ((code & ~DISCARDED_CHAR) == INVALID_UTF16)
+ {
+ lexer.badChars |= INVALID_UTF16;
+ printMessage(
+ code,
+ lexer,
+ null,
+ "invalid_utf16",
+ new Object[]{new Integer(code & DISCARDED_CHAR), buf},
Level.WARNING);
+
+ }
+
+ else if ((code & ~DISCARDED_CHAR) == INVALID_NCR)
+ {
+ lexer.badChars |= INVALID_NCR;
+ printMessage(
+ code,
+ lexer,
+ null,
+ "invalid_ncr",
+ new Object[]{new Integer(code & DISCARDED_CHAR), buf},
Level.WARNING);
+ }
+
+ }
+ }
+
+ /**
+ * Prints entity error messages.
+ * @param lexer Lexer
+ * @param code error code
+ * @param entity invalid entity String
+ * @param c invalid char
+ */
+ public void entityError(Lexer lexer, short code, String entity, int c)
+ {
+ lexer.warnings++;
+
+ if (lexer.errors > lexer.configuration.showErrors) // keep quiet after
<showErrors> errors
+ {
+ return;
+ }
+
+ if (lexer.configuration.showWarnings)
+ {
+ switch (code)
+ {
+ case MISSING_SEMICOLON :
+ printMessage(code, lexer, null, "missing_semicolon", new
Object[]{entity}, Level.WARNING);
+ break;
+ case MISSING_SEMICOLON_NCR :
+ printMessage(code, lexer, null, "missing_semicolon_ncr",
new Object[]{entity}, Level.WARNING);
+ break;
+ case UNKNOWN_ENTITY :
+ printMessage(code, lexer, null, "unknown_entity", new
Object[]{entity}, Level.WARNING);
+ break;
+ case UNESCAPED_AMPERSAND :
+ printMessage(code, lexer, null, "unescaped_ampersand",
null, Level.WARNING);
+ break;
+ case APOS_UNDEFINED :
+ printMessage(code, lexer, null, "apos_undefined", null,
Level.WARNING);
+ break;
+ default :
+ // should not reach here
+ break;
+ }
+ }
+ }
+
+ /**
+ * Prints error messages for attributes.
+ * @param lexer Lexer
+ * @param node current tag
+ * @param attribute attribute
+ * @param code error code
+ */
+ public void attrError(Lexer lexer, Node node, AttVal attribute, short code)
+ {
+ // for insertion message BEFORE node.
+ Node element = (node != null && node.prev != null)?node.prev:node;
+ if (code == UNEXPECTED_GT)
+ {
+ lexer.errors++;
+ }
+ else
+ {
+ lexer.warnings++;
+ }
+
+ if (lexer.errors > lexer.configuration.showErrors) // keep quiet after
<showErrors> errors
+ {
+ return;
+ }
+
+ if (code == UNEXPECTED_GT) // error
+ {
+ printMessage(code, lexer, element, "unexpected_gt", new
Object[]{getTagName(node)}, Level.ERROR);
+ }
+
+ if (!lexer.configuration.showWarnings) // warnings
+ {
+ return;
+ }
+
+ switch (code)
+ {
+ case UNKNOWN_ATTRIBUTE :
+ printMessage(code, lexer, element, "unknown_attribute", new
Object[]{attribute.attribute}, Level.WARNING);
+ break;
+
+ case MISSING_ATTRIBUTE :
+ printMessage(
+ code,
+ lexer,
+ element,
+ "missing_attribute",
+ new Object[]{getTagName(node), attribute.attribute}, Level.WARNING);
+ break;
+
+ case MISSING_ATTR_VALUE :
+ printMessage(
+ code,
+ lexer,
+ element,
+ "missing_attr_value",
+ new Object[]{getTagName(node), attribute.attribute}, Level.WARNING);
+ break;
+
+ case MISSING_IMAGEMAP :
+ printMessage(code, lexer, element, "missing_imagemap", new
Object[]{getTagName(node)}, Level.WARNING);
+ lexer.badAccess |= MISSING_IMAGE_MAP;
+ break;
+
+ case BAD_ATTRIBUTE_VALUE :
+ printMessage(code, lexer, element, "bad_attribute_value", new
Object[]{
+ getTagName(node),
+ attribute.attribute,
+ attribute.value}, Level.WARNING);
+ break;
+
+ case XML_ID_SYNTAX :
+ printMessage(
+ code,
+ lexer,
+ element,
+ "xml_id_sintax",
+ new Object[]{getTagName(node), attribute.attribute}, Level.WARNING);
+ break;
+
+ case XML_ATTRIBUTE_VALUE :
+ printMessage(
+ code,
+ lexer,
+ element,
+ "xml_attribute_value",
+ new Object[]{getTagName(node), attribute.attribute}, Level.WARNING);
+ break;
+
+ case UNEXPECTED_QUOTEMARK :
+ printMessage(code, lexer, element, "unexpected_quotemark", new
Object[]{getTagName(node)}, Level.WARNING);
+ break;
+
+ case MISSING_QUOTEMARK :
+ printMessage(code, lexer, element, "missing_quotemark", new
Object[]{getTagName(node)}, Level.WARNING);
+ break;
+
+ case REPEATED_ATTRIBUTE :
+ printMessage(code, lexer, element, "repeated_attribute", new
Object[]{
+ getTagName(node),
+ attribute.value,
+ attribute.attribute}, Level.WARNING);
+ break;
+
+ case PROPRIETARY_ATTR_VALUE :
+ printMessage(
+ code,
+ lexer,
+ element,
+ "proprietary_attr_value",
+ new Object[]{getTagName(node), attribute.value}, Level.WARNING);
+ break;
+
+ case PROPRIETARY_ATTRIBUTE :
+ printMessage(
+ code,
+ lexer,
+ element,
+ "proprietary_attribute",
+ new Object[]{getTagName(node), attribute.attribute}, Level.WARNING);
+ break;
+
+ case UNEXPECTED_END_OF_FILE :
+ // on end of file adjust reported position to end of input
+ lexer.lines = lexer.in.getCurline();
+ lexer.columns = lexer.in.getCurcol();
+ printMessage(code, lexer, element, "unexpected_end_of_file",
new Object[]{getTagName(node)}, Level.WARNING);
+ break;
+
+ case ID_NAME_MISMATCH :
+ printMessage(code, lexer, element, "id_name_mismatch", new
Object[]{getTagName(node)}, Level.WARNING);
+ break;
+
+ case BACKSLASH_IN_URI :
+ printMessage(code, lexer, element, "backslash_in_uri", new
Object[]{getTagName(node)}, Level.WARNING);
+ break;
+
+ case FIXED_BACKSLASH :
+ printMessage(code, lexer, element, "fixed_backslash", new
Object[]{getTagName(node)}, Level.WARNING);
+ break;
+
+ case ILLEGAL_URI_REFERENCE :
+ printMessage(code, lexer, element, "illegal_uri_reference", new
Object[]{getTagName(node)}, Level.WARNING);
+ break;
+
+ case ESCAPED_ILLEGAL_URI :
+ printMessage(code, lexer, element, "escaped_illegal_uri", new
Object[]{getTagName(node)}, Level.WARNING);
+ break;
+
+ case NEWLINE_IN_URI :
+ printMessage(code, lexer, element, "newline_in_uri", new
Object[]{getTagName(node)}, Level.WARNING);
+ break;
+
+ case ANCHOR_NOT_UNIQUE :
+ printMessage(
+ code,
+ lexer,
+ element,
+ "anchor_not_unique",
+ new Object[]{getTagName(node), attribute.value}, Level.WARNING);
+ break;
+
+ case ENTITY_IN_ID :
+ printMessage(code, lexer, element, "entity_in_id", null,
Level.WARNING);
+ break;
+
+ case JOINING_ATTRIBUTE :
+ printMessage(
+ code,
+ lexer,
+ element,
+ "joining_attribute",
+ new Object[]{getTagName(node), attribute.attribute}, Level.WARNING);
+ break;
+
+ case UNEXPECTED_EQUALSIGN :
+ printMessage(code, lexer, element, "expected_equalsign", new
Object[]{getTagName(node)}, Level.WARNING);
+ break;
+
+ case ATTR_VALUE_NOT_LCASE :
+ printMessage(code, lexer, element, "attr_value_not_lcase", new
Object[]{
+ getTagName(node),
+ attribute.value,
+ attribute.attribute}, Level.WARNING);
+ break;
+
+ default :
+ break;
+ }
+ }
+
+ /**
+ * Prints warnings.
+ * @param lexer Lexer
+ * @param element parent/missing tag
+ * @param node current tag
+ * @param code error code
+ */
+ public void warning(Lexer lexer, Node element, Node node, short code)
+ {
+
+ TagTable tt = lexer.configuration.tt;
+ if (!((code == DISCARDING_UNEXPECTED) && lexer.badForm != 0)) //
lexer->errors++; already done in BadForm()
+ {
+ lexer.warnings++;
+ }
+
+ // keep quiet after <showErrors> errors
+ if (lexer.errors > lexer.configuration.showErrors)
+ {
+ return;
+ }
+
+ if (lexer.configuration.showWarnings)
+ {
+ switch (code)
+ {
+ case MISSING_ENDTAG_FOR :
+ printMessage(code, lexer, element, "missing_endtag_for",
new Object[]{element.element}, Level.WARNING);
+ break;
+
+ case MISSING_ENDTAG_BEFORE :
+ printMessage(
+ code,
+ lexer,
+ element,
+ "missing_endtag_before",
+ new Object[]{element.element, getTagName(node)}, Level.WARNING);
+ break;
+
+ case DISCARDING_UNEXPECTED :
+ if (lexer.badForm == 0)
+ {
+ // the case for when this is an error not a warning, is handled
later
+ printMessage(
+ code,
+ lexer,
+ element,
+ "discarding_unexpected",
+ new Object[]{getTagName(node)}, Level.WARNING);
+ }
+ break;
+
+ case NESTED_EMPHASIS :
+ printMessage(code, lexer, element, "nested_emphasis", new
Object[]{getTagName(node)}, Level.INFO);
+ break;
+
+ case COERCE_TO_ENDTAG :
+ printMessage(code, lexer, element, "coerce_to_endtag", new
Object[]{element.element}, Level.INFO);
+ break;
+
+ case NON_MATCHING_ENDTAG :
+ printMessage(
+ code,
+ lexer,
+ element,
+ "non_matching_endtag",
+ new Object[]{getTagName(node), element.element}, Level.WARNING);
+ break;
+
+ case TAG_NOT_ALLOWED_IN :
+ printMessage(
+ code,
+ lexer,
+ element,
+ "tag_not_allowed_in",
+ new Object[]{getTagName(node), element.element}, Level.WARNING);
+ break;
+
+ case DOCTYPE_AFTER_TAGS :
+ printMessage(code, lexer, element, "doctype_after_tags",
null, Level.WARNING);
+ break;
+
+ case MISSING_STARTTAG :
+ printMessage(code, lexer, element, "missing_starttag", new
Object[]{node.element}, Level.WARNING);
+ break;
+
+ case UNEXPECTED_ENDTAG :
+ if (element != null)
+ {
+ printMessage(
+ code,
+ lexer,
+ element,
+ "unexpected_endtag_in",
+ new Object[]{node.element, element.element}, Level.WARNING);
+ }
+ else
+ {
+ printMessage(code, lexer, null, "unexpected_endtag",
new Object[]{node.element}, Level.WARNING);
+ }
+ break;
+
+ case TOO_MANY_ELEMENTS :
+ if (element != null)
+ {
+ printMessage(
+ code,
+ lexer,
+ element,
+ "too_many_elements_in",
+ new Object[]{node.element, element.element}, Level.WARNING);
+ }
+ else
+ {
+ printMessage(code, lexer, element, "too_many_elements",
new Object[]{node.element}, Level.WARNING);
+ }
+ break;
+
+ case USING_BR_INPLACE_OF :
+ printMessage(code, lexer, element, "using_br_inplace_of",
new Object[]{getTagName(node)}, Level.WARNING);
+ break;
+
+ case INSERTING_TAG :
+ printMessage(code, lexer, element, "inserting_tag", new
Object[]{node.element}, Level.WARNING);
+ break;
+
+ case CANT_BE_NESTED :
+ printMessage(code, lexer, element, "cant_be_nested", new
Object[]{getTagName(node)}, Level.WARNING);
+ break;
+
+ case PROPRIETARY_ELEMENT :
+ printMessage(code, lexer, element, "proprietary_element",
new Object[]{getTagName(node)}, Level.WARNING);
+
+ if (node.tag == tt.tagLayer)
+ {
+ lexer.badLayout |= USING_LAYER;
+ }
+ else if (node.tag == tt.tagSpacer)
+ {
+ lexer.badLayout |= USING_SPACER;
+ }
+ else if (node.tag == tt.tagNobr)
+ {
+ lexer.badLayout |= USING_NOBR;
+ }
+ break;
+
+ case OBSOLETE_ELEMENT :
+ if (element.tag != null && (element.tag.model &
Dict.CM_OBSOLETE) != 0)
+ {
+ printMessage(code, lexer, element, "obsolete_element",
new Object[]{
+ getTagName(element),
+ getTagName(node)}, Level.WARNING);
+ }
+ else
+ {
+ printMessage(code, lexer, element, "replacing_element",
new Object[]{
+ getTagName(element),
+ getTagName(node)}, Level.WARNING);
+ }
+ break;
+
+ case UNESCAPED_ELEMENT :
+ printMessage(code, lexer, element, "unescaped_element", new
Object[]{getTagName(element)}, Level.WARNING);
+ break;
+
+ case TRIM_EMPTY_ELEMENT :
+ printMessage(code, lexer, element, "trim_empty_element",
new Object[]{getTagName(element)}, Level.WARNING);
+ break;
+
+ case MISSING_TITLE_ELEMENT :
+ printMessage(code, lexer, element, "missing_title_element",
null, Level.WARNING);
+ break;
+
+ case ILLEGAL_NESTING :
+ printMessage(code, lexer, element, "illegal_nesting", new
Object[]{getTagName(element)}, Level.WARNING);
+ break;
+
+ case NOFRAMES_CONTENT :
+ printMessage(code, lexer, element, "noframes_content", new
Object[]{getTagName(node)}, Level.WARNING);
+ break;
+
+ case INCONSISTENT_VERSION :
+ printMessage(code, lexer, element, "inconsistent_version",
null, Level.WARNING);
+ break;
+
+ case MALFORMED_DOCTYPE :
+ printMessage(code, lexer, element, "malformed_doctype",
null, Level.WARNING);
+ break;
+
+ case CONTENT_AFTER_BODY :
+ printMessage(code, lexer, element, "content_after_body",
null, Level.WARNING);
+ break;
+
+ case MALFORMED_COMMENT :
+ printMessage(code, lexer, element, "malformed_comment",
null, Level.WARNING);
+ break;
+
+ case BAD_COMMENT_CHARS :
+ printMessage(code, lexer, element, "bad_comment_chars",
null, Level.WARNING);
+ break;
+
+ case BAD_XML_COMMENT :
+ printMessage(code, lexer, element, "bad_xml_comment", null,
Level.WARNING);
+ break;
+
+ case BAD_CDATA_CONTENT :
+ printMessage(code, lexer, element, "bad_cdata_content",
null, Level.WARNING);
+ break;
+
+ case INCONSISTENT_NAMESPACE :
+ printMessage(code, lexer, element,
"inconsistent_namespace", null, Level.WARNING);
+ break;
+
+ case DTYPE_NOT_UPPER_CASE :
+ printMessage(code, lexer, element, "dtype_not_upper_case",
null, Level.WARNING);
+ break;
+
+ case UNEXPECTED_END_OF_FILE :
+ // on end of file adjust reported position to end of input
+ lexer.lines = lexer.in.getCurline();
+ lexer.columns = lexer.in.getCurcol();
+ printMessage(
+ code,
+ lexer,
+ element,
+ "unexpected_end_of_file",
+ new Object[]{getTagName(element)}, Level.WARNING);
+ break;
+
+ case NESTED_QUOTATION :
+ printMessage(code, lexer, element, "nested_quotation",
null, Level.WARNING);
+ break;
+
+ case ELEMENT_NOT_EMPTY :
+ printMessage(code, lexer, element, "element_not_empty", new
Object[]{getTagName(element)}, Level.WARNING);
+ break;
+
+ case MISSING_DOCTYPE :
+ printMessage(code, lexer, element, "missing_doctype", null,
Level.WARNING);
+ break;
+
+ default :
+ break;
+ }
+ }
+
+ if ((code == DISCARDING_UNEXPECTED) && lexer.badForm != 0)
+ {
+ // the case for when this is a warning not an error, is handled earlier
+ printMessage(code, lexer, element, "discarding_unexpected", new
Object[]{getTagName(node)}, Level.ERROR);
+ }
+
+ }
+
+ /**
+ * Prints errors.
+ * @param lexer Lexer
+ * @param element parent/missing tag
+ * @param node current tag
+ * @param code error code
+ */
+ public void error(Lexer lexer, Node element, Node node, short code)
+ {
+ lexer.errors++;
+
+ // keep quiet after <showErrors> errors
+ if (lexer.errors > lexer.configuration.showErrors)
+ {
+ return;
+ }
+
+ if (code == SUSPECTED_MISSING_QUOTE)
+ {
+ printMessage(code, lexer, element, "suspected_missing_quote", null,
Level.ERROR);
+ }
+ else if (code == DUPLICATE_FRAMESET)
+ {
+ printMessage(code, lexer, element, "duplicate_frameset", null,
Level.ERROR);
+ }
+ else if (code == UNKNOWN_ELEMENT)
+ {
+ printMessage(code, lexer, element, "unknown_element", new
Object[]{getTagName(node)}, Level.ERROR);
+ }
+ else if (code == UNEXPECTED_ENDTAG)
+ {
+ if (element != null)
+ {
+ printMessage(
+ code,
+ lexer,
+ element,
+ "unexpected_endtag_in",
+ new Object[]{node.element, element.element}, Level.ERROR);
+ }
+ else
+ {
+ printMessage(code, lexer, node, "unexpected_endtag", new
Object[]{node.element}, Level.ERROR);
+ }
+ }
+ }
+
+ /**
+ * Prints error summary.
+ * @param lexer Lexer
+ */
+ public void errorSummary(Lexer lexer)
+ {
+ // adjust badAccess to that its null if frames are ok
+ if ((lexer.badAccess & (USING_FRAMES | USING_NOFRAMES)) != 0)
+ {
+ if (!(((lexer.badAccess & USING_FRAMES) != 0) &&
((lexer.badAccess & USING_NOFRAMES) == 0)))
+ {
+ lexer.badAccess &= ~(USING_FRAMES | USING_NOFRAMES);
+ }
+ }
+ if (lexer.badChars != 0)
+ {
+ if ((lexer.badChars & VENDOR_SPECIFIC_CHARS) != 0)
+ {
+ int encodingChoiche = 0;
+
+ if
("Cp1252".equals(lexer.configuration.getInCharEncodingName()))
+ {
+ encodingChoiche = 1;
+ }
+ else if
("MacRoman".equals(lexer.configuration.getInCharEncodingName()))
+ {
+ encodingChoiche = 2;
+ }
+
+ printMessage(VENDOR_SPECIFIC_CHARS, lexer, null,
"vendor_specific_chars_summary", new Object[]{new Integer(
+ encodingChoiche)}, Level.SUMMARY);
+ }
+
+ if ((lexer.badChars & INVALID_SGML_CHARS) != 0 || (lexer.badChars &
INVALID_NCR) != 0)
+ {
+ int encodingChoiche = 0;
+
+ if
("Cp1252".equals(lexer.configuration.getInCharEncodingName()))
+ {
+ encodingChoiche = 1;
+ }
+ else if
("MacRoman".equals(lexer.configuration.getInCharEncodingName()))
+ {
+ encodingChoiche = 2;
+ }
+
+ printMessage(INVALID_SGML_CHARS, lexer, null,
"invalid_sgml_chars_summary", new Object[]{new Integer(
+ encodingChoiche)}, Level.SUMMARY);
+ }
+
+ if ((lexer.badChars & INVALID_UTF8) != 0)
+ {
+ printMessage(INVALID_UTF8, lexer, null, "invalid_utf8_summary",
null, Level.SUMMARY);
+ }
+
+ if ((lexer.badChars & INVALID_UTF16) != 0)
+ {
+ printMessage(INVALID_UTF16, lexer, null,
"invalid_utf16_summary", null, Level.SUMMARY);
+ }
+
+ if ((lexer.badChars & INVALID_URI) != 0)
+ {
+ printMessage(INVALID_URI, lexer, null, "invaliduri_summary",
null, Level.SUMMARY);
+ }
+ }
+
+ if (lexer.badForm != 0)
+ {
+ printMessage(BADFORM_SUMMARY, lexer, null, "badform_summary", null,
Level.SUMMARY);
+ }
+
+ if (lexer.badAccess != 0)
+ {
+ if ((lexer.badAccess & MISSING_SUMMARY) != 0)
+ {
+ printMessage(MISSING_SUMMARY, lexer, null,
"badaccess_missing_summary", null, Level.SUMMARY);
+ }
+
+ if ((lexer.badAccess & MISSING_IMAGE_ALT) != 0)
+ {
+ printMessage(MISSING_IMAGE_ALT, lexer, null,
"badaccess_missing_image_alt", null, Level.SUMMARY);
+ }
+
+ if ((lexer.badAccess & MISSING_IMAGE_MAP) != 0)
+ {
+ printMessage(MISSING_IMAGE_MAP, lexer, null,
"badaccess_missing_image_map", null, Level.SUMMARY);
+ }
+
+ if ((lexer.badAccess & MISSING_LINK_ALT) != 0)
+ {
+ printMessage(MISSING_LINK_ALT, lexer, null,
"badaccess_missing_link_alt", null, Level.SUMMARY);
+ }
+
+ if (((lexer.badAccess & USING_FRAMES) != 0) && ((lexer.badAccess
& USING_NOFRAMES) == 0))
+ {
+ printMessage(USING_FRAMES, lexer, null, "badaccess_frames",
null, Level.SUMMARY);
+ }
+
+ printMessage(BADACCESS_SUMMARY, lexer, null, "badaccess_summary",
new Object[]{ACCESS_URL}, Level.SUMMARY);
+ }
+
+ if (lexer.badLayout != 0)
+ {
+ if ((lexer.badLayout & USING_LAYER) != 0)
+ {
+ printMessage(USING_LAYER, lexer, null, "badlayout_using_layer",
null, Level.SUMMARY);
+ }
+
+ if ((lexer.badLayout & USING_SPACER) != 0)
+ {
+ printMessage(USING_SPACER, lexer, null,
"badlayout_using_spacer", null, Level.SUMMARY);
+ }
+
+ if ((lexer.badLayout & USING_FONT) != 0)
+ {
+ printMessage(USING_FONT, lexer, null, "badlayout_using_font",
null, Level.SUMMARY);
+ }
+
+ if ((lexer.badLayout & USING_NOBR) != 0)
+ {
+ printMessage(USING_NOBR, lexer, null, "badlayout_using_nobr",
null, Level.SUMMARY);
+ }
+
+ if ((lexer.badLayout & USING_BODY) != 0)
+ {
+ printMessage(USING_BODY, lexer, null, "badlayout_using_body",
null, Level.SUMMARY);
+ }
+ }
+ }
+
+ /**
+ * Prints the "unknown option" message.
+ * @param errout PrintWriter
+ * @param c invalid option char
+ */
+ public void unknownOption(PrintWriter errout, char c)
+ {
+ printMessage(errout, "unrecognized_option", new Object[]{new String(new
char[]{c})}, Level.ERROR);
+ }
+
+ /**
+ * Prints the "unknown file" message.
+ * @param errout PrintWriter
+ * @param file invalid file name
+ */
+ public void unknownFile(PrintWriter errout, String file)
+ {
+ printMessage(errout, "unknown_file", new Object[]{"Tidy",
file}, Level.ERROR);
+ }
+
+ /**
+ * Prints the "needs author intervention" message.
+ * @param errout PrintWriter
+ */
+ public void needsAuthorIntervention(PrintWriter errout)
+ {
+ printMessage(errout, "needs_author_intervention", null,
Level.SUMMARY);
+ }
+
+ /**
+ * Prints the "missing body" message.
+ * @param errout PrintWriter
+ */
+ public void missingBody(PrintWriter errout)
+ {
+ printMessage(errout, "missing_body", null, Level.ERROR);
+ }
+
+ /**
+ * Prints the number of generated slides.
+ * @param errout PrintWriter
+ * @param count slides count
+ */
+ public void reportNumberOfSlides(PrintWriter errout, int count)
+ {
+ printMessage(errout, "slides_found", new Object[]{new Integer(count)},
Level.SUMMARY);
+ }
+
+ /**
+ * Prints tidy general info.
+ * @param errout PrintWriter
+ */
+ public void generalInfo(PrintWriter errout)
+ {
+ printMessage(errout, "general_info", null, Level.SUMMARY);
+ }
+
+ /**
+ * Prints tidy hello message.
+ * @param errout PrintWriter
+ */
+ public void helloMessage(PrintWriter errout)
+ {
+ printMessage(errout, "hello_message", new Object[]{Report.RELEASE_DATE,
this.currentFile}, Level.SUMMARY);
+ }
+
+ /**
+ * Sets the current file name.
+ * @param filename current file.
+ */
+ public void setFilename(String filename)
+ {
+ this.currentFile = filename; // for use with Gnu Emacs
+ }
+
+ /**
+ * Prints information for html version in input file.
+ * @param errout PrintWriter
+ * @param lexer Lexer
+ * @param filename file name
+ * @param doctype doctype Node
+ */
+ public void reportVersion(PrintWriter errout, Lexer lexer, String filename, Node
doctype)
+ {
+ int i, c;
+ int state = 0;
+ String vers = lexer.htmlVersionName();
+ int[] cc = new int[1];
+
+ // adjust reported position to first line
+ lexer.lines = 1;
+ lexer.columns = 1;
+
+ if (doctype != null)
+ {
+
+ StringBuffer doctypeBuffer = new StringBuffer();
+ for (i = doctype.start; i < doctype.end; ++i)
+ {
+ c = doctype.textarray[i];
+
+ // look for UTF-8 multibyte character
+ if (c < 0)
+ {
+ i += PPrint.getUTF8(doctype.textarray, i, cc);
+ c = cc[0];
+ }
+
+ if (c == '"')
+ {
+ ++state;
+ }
+ else if (state == 1)
+ {
+ doctypeBuffer.append((char) c);
+ }
+ }
+
+ printMessage(
+ DOCTYPE_GIVEN_SUMMARY,
+ lexer,
+ null,
+ "doctype_given",
+ new Object[]{filename, doctypeBuffer}, Level.SUMMARY);
+ }
+
+ printMessage(REPORT_VERSION_SUMMARY, lexer, null, "report_version", new
Object[]{
+ filename,
+ (vers != null ? vers : "HTML proprietary")}, Level.SUMMARY);
+ }
+
+ /**
+ * Prints the number of error/warnings found.
+ * @param errout PrintWriter
+ * @param lexer Lexer
+ */
+ public void reportNumWarnings(PrintWriter errout, Lexer lexer)
+ {
+ if (lexer.warnings > 0 || lexer.errors > 0)
+ {
+ printMessage(
+ errout,
+ "num_warnings",
+ new Object[]{new Integer(lexer.warnings), new Integer(lexer.errors)},
+ Level.SUMMARY);
+ }
+ else
+ {
+ printMessage(errout, "no_warnings", null, Level.SUMMARY);
+ }
+ }
+
+ /**
+ * Prints tidy help.
+ * @param out PrintWriter
+ */
+ public void helpText(PrintWriter out)
+ {
+ printMessage(out, "help_text", new Object[]{"Tidy",
RELEASE_DATE}, Level.SUMMARY);
+ }
+
+ /**
+ * Prints the "bad tree" message.
+ * @param errout PrintWriter
+ */
+ public void badTree(PrintWriter errout)
+ {
+ printMessage(errout, "bad_tree", null, Level.ERROR);
+ }
+
+ /**
+ * Adds a message listener.
+ * @param listener TidyMessageListener
+ */
+ public void addMessageListener(TidyMessageListener listener)
+ {
+ this.listener = listener;
+ }
+}
\ No newline at end of file
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/StreamIn.java
===================================================================
--- branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/StreamIn.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/StreamIn.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,114 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+/**
+ * Input Stream.
+ * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org
</a>
+ * @author Andy Quick <a
href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a>
(translation to Java)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+
+public interface StreamIn
+{
+
+ /**
+ * end of stream char.
+ */
+ int END_OF_STREAM = -1;
+
+ /**
+ * Getter for <code>curcol</code>.
+ * @return Returns the curcol.
+ */
+ int getCurcol();
+
+ /**
+ * Getter for <code>curline</code>.
+ * @return Returns the curline.
+ */
+ int getCurline();
+
+ /**
+ * reads a char from the stream.
+ * @return char
+ */
+ int readCharFromStream();
+
+ /**
+ * Read a char.
+ * @return char
+ */
+ int readChar();
+
+ /**
+ * Unget a char.
+ * @param c char
+ */
+ void ungetChar(int c);
+
+ /**
+ * Has end of stream been reached?
+ * @return <code>true</code> if end of stream has been reached
+ */
+ boolean isEndOfStream();
+
+ /**
+ * Setter for lexer instance (needed for error reporting).
+ * @param lexer Lexer
+ */
+ void setLexer(Lexer lexer);
+
+}
\ No newline at end of file
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/StreamInFactory.java
===================================================================
---
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/StreamInFactory.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/StreamInFactory.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,104 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+import java.io.InputStream;
+import java.io.Reader;
+import java.io.UnsupportedEncodingException;
+
+import org.ajax4jsf.Messages;
+
+
+/**
+ * Tidy Input factory.
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public final class StreamInFactory
+{
+
+ /**
+ * Don't instantiate.
+ */
+ private StreamInFactory()
+ {
+ // unused
+ }
+
+ /**
+ * Returns the appropriate StreamIn implementation.
+ * @param config configuration instance
+ * @param stream input stream
+ * @return StreamIn instance
+ */
+ public static StreamIn getStreamIn(Configuration config, Object in)
+ {
+ if (in instanceof Reader) {
+ return new StreamInJavaImpl((Reader) in, config.tabsize);
+ } else if (in instanceof String) {
+ return new StreamInJavaImpl((String) in, config.tabsize);
+ } else if (in instanceof InputStream) {
+ try
+ {
+ return new StreamInJavaImpl((InputStream)in, config.getInCharEncodingName(),
config.tabsize);
+ }
+ catch (UnsupportedEncodingException e)
+ {
+ throw new
RuntimeException(Messages.getMessage(Messages.UNSUPPORTED_ENCODING_ERROR,
e.getMessage()));
+ }
+ } else {
+ throw new
RuntimeException(Messages.getMessage(Messages.UNSUPPORTED_INPUT_SOURCE_ERROR,
in.getClass().getName()));
+ }
+ }
+}
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/StreamInImpl.java
===================================================================
---
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/StreamInImpl.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/StreamInImpl.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,832 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.ajax4jsf.org.w3c.tidy.EncodingUtils.GetBytes;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+
+
+/**
+ * Input Stream Implementation. This implementation is from the c version of tidy and it
doesn't take advantage of java
+ * readers.
+ * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org
</a>
+ * @author Andy Quick <a
href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a>
(translation to Java)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public class StreamInImpl implements StreamIn
+{
+
+ private final static Log log = LogFactory.getLog(StreamInImpl.class);
+
+ /**
+ * number of characters kept in buffer.
+ */
+ private static final int CHARBUF_SIZE = 5;
+
+ /**
+ * needed for error reporting.
+ */
+ private Lexer lexer;
+
+ /**
+ * character buffer.
+ */
+ private int[] charbuf = new int[CHARBUF_SIZE];
+
+ /**
+ * actual position in buffer.
+ */
+ private int bufpos;
+
+ /**
+ * Private unget buffer for the raw bytes read from the input stream. Normally this
will only be used by the UTF-8
+ * decoder to resynchronize the input stream after finding an illegal UTF-8
sequences. But it can be used for other
+ * purposes when reading bytes in ReadCharFromStream.
+ */
+ private char[] rawBytebuf = new char[CHARBUF_SIZE];
+
+ /**
+ * actual position in rawBytebuf.
+ */
+ private int rawBufpos;
+
+ /**
+ * has a raw byte been pushed into stack?
+ */
+ private boolean rawPushed;
+
+ /**
+ * looking for an UTF BOM?
+ */
+ private boolean lookingForBOM = true;
+
+ /**
+ * has end of stream been reached?
+ */
+ private boolean endOfStream;
+
+ private boolean pushed;
+
+ private int tabs;
+
+ /**
+ * tab size in chars.
+ */
+ private int tabsize;
+
+ /**
+ * FSM for ISO2022.
+ */
+ private int state;
+
+ /**
+ * Encoding.
+ */
+ private int encoding;
+
+ /**
+ * current column number.
+ */
+ private int curcol;
+
+ /**
+ * last column.
+ */
+ private int lastcol;
+
+ /**
+ * current line number.
+ */
+ private int curline;
+
+ /**
+ * input stream.
+ */
+ private InputStream stream;
+
+ /**
+ * Getter.
+ */
+ private GetBytes getBytes;
+
+ /**
+ * Avoid mapping values > 127 to entities.
+ */
+ private boolean rawOut;
+
+ /**
+ * Instatiates a new StreamInImpl.
+ * @param stream input stream
+ * @param configuration Configuration
+ */
+ public StreamInImpl(InputStream stream, Configuration configuration)
+ {
+ this.stream = stream;
+ this.charbuf[0] = '\0';
+ this.tabsize = configuration.tabsize;
+ this.curline = 1;
+ this.curcol = 1;
+ this.encoding = configuration.getInCharEncoding();
+ this.rawOut = configuration.rawOut;
+ this.state = EncodingUtils.FSM_ASCII;
+ this.getBytes = new GetBytes()
+ {
+
+ StreamInImpl in;
+
+ GetBytes setStreamIn(StreamInImpl in)
+ {
+ this.in = in;
+ return this;
+ }
+
+ public void doGet(int[] buf, int[] count, boolean unget)
+ {
+ in.readRawBytesFromStream(buf, count, unget);
+ }
+ } // set the StreamInImpl instance directly
+ .setStreamIn(this);
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.StreamIn#getCurcol()
+ */
+ public int getCurcol()
+ {
+ return this.curcol;
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.StreamIn#getCurline()
+ */
+ public int getCurline()
+ {
+ return this.curline;
+ }
+
+ /**
+ * Setter for <code>lexer</code>.
+ * @param lexer The lexer to set.
+ */
+ public void setLexer(Lexer lexer)
+ {
+ this.lexer = lexer;
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.StreamIn#readChar()
+ */
+ public int readChar()
+ {
+ int c;
+
+ if (this.pushed)
+ {
+ c = this.charbuf[--(this.bufpos)];
+ if ((this.bufpos) == 0)
+ {
+ this.pushed = false;
+ }
+
+ if (c == '\n')
+ {
+ this.curcol = 1;
+ this.curline++;
+ }
+ else
+ {
+ this.curcol++;
+ }
+
+ return c;
+ }
+
+ this.lastcol = this.curcol;
+
+ if (this.tabs > 0)
+ {
+ this.curcol++;
+ this.tabs--;
+ return ' ';
+ }
+
+ while (true)
+ {
+ c = readCharFromStream();
+
+ if (c < 0)
+ {
+ return END_OF_STREAM;
+ }
+
+ if (c == '\n')
+ {
+ this.curcol = 1;
+ this.curline++;
+ break;
+ }
+
+ // #427663 - map '\r' to '\n' - Andy Quick 11 Aug 00
+ if (c == '\r')
+ {
+ c = readCharFromStream();
+ if (c != '\n')
+ {
+ if (c != END_OF_STREAM) // EOF fix by Terry Teague 12 Aug 01
+ {
+ ungetChar(c);
+ }
+ c = '\n';
+ }
+ this.curcol = 1;
+ this.curline++;
+ break;
+ }
+
+ if (c == '\t')
+ {
+ this.tabs = this.tabsize - ((this.curcol - 1) % this.tabsize) - 1;
+ this.curcol++;
+ c = ' ';
+ break;
+ }
+
+ // strip control characters, except for Esc
+ if (c == '\033')
+ {
+ break;
+ }
+ else if (c == '\015' && !lexer.configuration.xmlTags) //Form
Feed is allowed in HTML
+ {
+ break;
+ }
+ else if (0 < c && c < 32)
+ {
+ continue; // discard control char
+ }
+
+ // watch out for chars that have already been decoded such as
+ // IS02022, UTF-8 etc, that don't require further decoding
+ if (rawOut
+ || this.encoding == Configuration.ISO2022
+ || this.encoding == Configuration.UTF8
+ || this.encoding == Configuration.SHIFTJIS // #431953 - RJ
+ || this.encoding == Configuration.BIG5) // #431953 - RJ
+ {
+ this.curcol++;
+ break;
+ }
+
+ // handle surrogate pairs
+ if ((this.encoding == Configuration.UTF16LE)
+ || (this.encoding == Configuration.UTF16)
+ || (this.encoding == Configuration.UTF16BE))
+ {
+ if (c > EncodingUtils.MAX_UTF8_FROM_UCS4)
+ {
+ // invalid UTF-16 value
+ this.lexer.report.encodingError(this.lexer, Report.INVALID_UTF16 |
Report.DISCARDED_CHAR, c);
+ c = 0;
+ }
+ // high surrogate
+ else if (c >= EncodingUtils.UTF16_LOW_SURROGATE_BEGIN && c
<= EncodingUtils.UTF16_LOW_SURROGATE_END)
+ {
+ int n, m;
+
+ n = c;
+
+ m = readCharFromStream();
+ if (m < 0)
+ {
+ return END_OF_STREAM;
+ }
+ // low surrogate
+ if (m >= EncodingUtils.UTF16_HIGH_SURROGATE_BEGIN && m
<= EncodingUtils.UTF16_HIGH_SURROGATE_END)
+ {
+ // pair found, recombine them
+ c = (n - EncodingUtils.UTF16_LOW_SURROGATE_BEGIN)
+ * 0x400
+ + (m - EncodingUtils.UTF16_HIGH_SURROGATE_BEGIN)
+ + 0x10000;
+
+ // check for invalid pairs
+ if (((c & 0x0000FFFE) == 0x0000FFFE)
+ || ((c & 0x0000FFFF) == 0x0000FFFF)
+ || (c < EncodingUtils.UTF16_SURROGATES_BEGIN))
+ {
+ this.lexer.report
+ .encodingError(this.lexer, Report.INVALID_UTF16 |
Report.DISCARDED_CHAR, c);
+ c = 0;
+ }
+ }
+ else
+ {
+ // not a valid pair
+ this.lexer.report.encodingError(this.lexer, Report.INVALID_UTF16
| Report.DISCARDED_CHAR, c);
+ c = 0;
+ // should we unget the just read char?
+ }
+ }
+ else
+ {
+ // no recombination needed
+ }
+ }
+
+ if (this.encoding == Configuration.MACROMAN)
+ {
+ c = EncodingUtils.decodeMacRoman(c);
+ }
+
+ // produced e.g. as a side-effect of smart quotes in Word
+ // but can't happen if using MACROMAN encoding
+ if (127 < c && c < 160)
+ {
+ int c1 = 0;
+ int replaceMode;
+
+ // set error position just before offending character
+ this.lexer.lines = this.curline;
+ this.lexer.columns = this.curcol;
+
+ if ((this.encoding == Configuration.WIN1252)
+ || (this.lexer.configuration.replacementCharEncoding ==
Configuration.WIN1252))
+ {
+ c1 = EncodingUtils.decodeWin1252(c);
+ }
+ else if (this.lexer.configuration.replacementCharEncoding ==
Configuration.MACROMAN)
+ {
+ c1 = EncodingUtils.decodeMacRoman(c);
+ }
+
+ replaceMode = TidyUtils.toBoolean(c1) ? Report.REPLACED_CHAR :
Report.DISCARDED_CHAR;
+
+ if ((c1 == 0) && (this.encoding == Configuration.WIN1252) ||
(this.encoding == Configuration.MACROMAN))
+ {
+ this.lexer.report.encodingError(this.lexer,
Report.VENDOR_SPECIFIC_CHARS | replaceMode, c);
+ }
+ else if ((this.encoding != Configuration.WIN1252) &&
(this.encoding != Configuration.MACROMAN))
+ {
+ this.lexer.report.encodingError(this.lexer, Report.INVALID_SGML_CHARS
| replaceMode, c);
+ }
+
+ c = c1;
+ }
+
+ if (c == 0)
+ {
+ continue; // illegal char is discarded
+ }
+
+ this.curcol++;
+ break;
+ }
+
+ return c;
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.StreamIn#ungetChar(int)
+ */
+ public void ungetChar(int c)
+ {
+ this.pushed = true;
+ if (this.bufpos >= CHARBUF_SIZE)
+ {
+ // pop last element
+ System.arraycopy(this.charbuf, 0, this.charbuf, 1, CHARBUF_SIZE - 1);
+ this.bufpos--;
+ }
+ this.charbuf[(this.bufpos)++] = c;
+
+ if (c == '\n')
+ {
+ --this.curline;
+ }
+
+ this.curcol = this.lastcol;
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.StreamIn#isEndOfStream()
+ */
+ public boolean isEndOfStream()
+ {
+ return this.endOfStream;
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.StreamIn#readCharFromStream()
+ */
+ public int readCharFromStream()
+ {
+ int c;
+ int[] n = new int[]{0};
+ int[] tempchar = new int[1];
+ int[] count = new int[]{1};
+
+ readRawBytesFromStream(tempchar, count, false);
+ if (count[0] <= 0)
+ {
+ endOfStream = true;
+ return END_OF_STREAM;
+ }
+
+ c = tempchar[0];
+
+ if (lookingForBOM
+ && (this.encoding == Configuration.UTF16
+ || this.encoding == Configuration.UTF16LE
+ || this.encoding == Configuration.UTF16BE || this.encoding ==
Configuration.UTF8))
+ {
+ // check for a Byte Order Mark
+ int c1, bom;
+
+ lookingForBOM = false;
+
+ if (c == END_OF_STREAM)
+ {
+ lookingForBOM = false;
+ endOfStream = true;
+ return END_OF_STREAM;
+ }
+
+ count[0] = 1;
+ readRawBytesFromStream(tempchar, count, false);
+ c1 = tempchar[0];
+
+ bom = (c << 8) + c1;
+
+ if (bom == EncodingUtils.UNICODE_BOM_BE)
+ {
+ // big-endian UTF-16
+ if (this.encoding != Configuration.UTF16 && this.encoding !=
Configuration.UTF16BE)
+ {
+ this.lexer.report.encodingError(this.lexer, Report.ENCODING_MISMATCH,
Configuration.UTF16BE);
+ // non-fatal error
+ }
+ this.encoding = Configuration.UTF16BE;
+ this.lexer.configuration.setInCharEncoding(Configuration.UTF16BE);
+ return EncodingUtils.UNICODE_BOM; // return decoded BOM
+ }
+ else if (bom == EncodingUtils.UNICODE_BOM_LE)
+ {
+ // little-endian UTF-16
+ if (this.encoding != Configuration.UTF16 && this.encoding !=
Configuration.UTF16LE)
+ {
+ this.lexer.report.encodingError(this.lexer, Report.ENCODING_MISMATCH,
Configuration.UTF16LE);
+ // non-fatal error
+ }
+ this.encoding = Configuration.UTF16LE;
+ this.lexer.configuration.setInCharEncoding(Configuration.UTF16LE);
+ return EncodingUtils.UNICODE_BOM; // return decoded BOM
+ }
+ else
+ {
+ int c2;
+
+ count[0] = 1;
+ readRawBytesFromStream(tempchar, count, false);
+ c2 = tempchar[0];
+
+ if (((c << 16) + (c1 << 8) + c2) ==
EncodingUtils.UNICODE_BOM_UTF8)
+ {
+ // UTF-8
+ this.encoding = Configuration.UTF8;
+ if (this.encoding != Configuration.UTF8)
+ {
+ this.lexer.report.encodingError(this.lexer,
Report.ENCODING_MISMATCH, Configuration.UTF8);
+ // non-fatal error
+ }
+ this.lexer.configuration.setInCharEncoding(Configuration.UTF8);
+ return EncodingUtils.UNICODE_BOM; // return decoded BOM
+ }
+
+ // the 2nd and/or 3rd bytes weren't what we were expecting, so unget
the extra 2 bytes
+ rawPushed = true;
+
+ if ((rawBufpos + 1) >= CHARBUF_SIZE)
+ {
+ System.arraycopy(rawBytebuf, 2, rawBytebuf, 0, CHARBUF_SIZE - 2);
+ rawBufpos -= 2;
+ }
+ // make sure the bytes are pushed in the right order
+ rawBytebuf[rawBufpos++] = (char) c2;
+ rawBytebuf[rawBufpos++] = (char) c1;
+ // drop through to code below, with the original char
+
+ }
+ }
+
+ this.lookingForBOM = false;
+
+ // A document in ISO-2022 based encoding uses some ESC sequences called
"designator" to switch character sets.
+ // The designators defined and used in ISO-2022-JP are: "ESC" +
"(" + ? for ISO646 variants "ESC" + "$" + ? and
+ // "ESC" + "$" + "(" + ? for multibyte character
sets Where ? stands for a single character used to indicate the
+ // character set for multibyte characters. Tidy handles this by preserving the
escape sequence and setting the
+ // top bit of each byte for non-ascii chars. This bit is then cleared on output.
The input stream keeps track of
+ // the state to determine when to set/clear the bit.
+
+ if (this.encoding == Configuration.ISO2022)
+ {
+ if (c == 0x1b) // ESC
+ {
+ this.state = EncodingUtils.FSM_ESC;
+ return c;
+ }
+
+ switch (this.state)
+ {
+ case EncodingUtils.FSM_ESC :
+ if (c == '$')
+ {
+ this.state = EncodingUtils.FSM_ESCD;
+ }
+ else if (c == '(')
+ {
+ this.state = EncodingUtils.FSM_ESCP;
+ }
+ else
+ {
+ this.state = EncodingUtils.FSM_ASCII;
+ }
+ break;
+
+ case EncodingUtils.FSM_ESCD :
+ if (c == '(')
+ {
+ this.state = EncodingUtils.FSM_ESCDP;
+ }
+ else
+ {
+ this.state = EncodingUtils.FSM_NONASCII;
+ }
+ break;
+
+ case EncodingUtils.FSM_ESCDP :
+ this.state = EncodingUtils.FSM_NONASCII;
+ break;
+
+ case EncodingUtils.FSM_ESCP :
+ this.state = EncodingUtils.FSM_ASCII;
+ break;
+
+ case EncodingUtils.FSM_NONASCII :
+ c |= 0x80;
+ break;
+
+ default :
+ //
+ break;
+ }
+
+ return c;
+ }
+
+ if (this.encoding == Configuration.UTF16LE)
+ {
+ int c1;
+
+ count[0] = 1;
+ readRawBytesFromStream(tempchar, count, false);
+ if (count[0] <= 0)
+ {
+ endOfStream = true;
+ return END_OF_STREAM;
+ }
+ c1 = tempchar[0];
+
+ n[0] = (c1 << 8) + c;
+
+ return n[0];
+ }
+
+ // UTF-16 is big-endian by default
+ if ((this.encoding == Configuration.UTF16) || (this.encoding ==
Configuration.UTF16BE))
+ {
+ int c1;
+
+ count[0] = 1;
+ readRawBytesFromStream(tempchar, count, false);
+ if (count[0] <= 0)
+ {
+ endOfStream = true;
+ return END_OF_STREAM;
+ }
+ c1 = tempchar[0];
+
+ n[0] = (c << 8) + c1;
+
+ return n[0];
+ }
+
+ if (this.encoding == Configuration.UTF8)
+ {
+ // deal with UTF-8 encoded char
+ int[] count2 = new int[]{0};
+
+ // first byte "c" is passed in separately
+ boolean err = EncodingUtils.decodeUTF8BytesToChar(n, c, new byte[0],
this.getBytes, count2, 0);
+ if (!err && (n[0] == END_OF_STREAM) && (count2[0] == 1)) /*
EOF */
+ {
+ endOfStream = true;
+ return END_OF_STREAM;
+ }
+ else if (err)
+ {
+ /* set error position just before offending character */
+ this.lexer.lines = this.curline;
+ this.lexer.columns = this.curcol;
+
+ this.lexer.report.encodingError(this.lexer, (short) (Report.INVALID_UTF8
| Report.REPLACED_CHAR), n[0]);
+ n[0] = 0xFFFD; /* replacement char */
+ }
+
+ return n[0];
+ }
+
+ // #431953 - start RJ
+ // This section is suitable for any "multibyte" variable-width
character encoding in which a one-byte code is
+ // less than 128, and the first byte of a two-byte code is greater or equal to
128. Note that Big5 and ShiftJIS
+ // fit into this kind, even though their second byte may be less than 128
+
+ if ((this.encoding == Configuration.BIG5) || (this.encoding ==
Configuration.SHIFTJIS))
+ {
+ if (c < 128)
+ {
+ return c;
+ }
+ else if ((this.encoding == Configuration.SHIFTJIS) && (c >= 0xa1
&& c <= 0xdf))
+ {
+ // 461643 - fix suggested by Rick Cameron 14 Sep 01
+ // for Shift_JIS, the values from 0xa1 through 0xdf represent singe-byte
characters (U+FF61 to U+FF9F -
+ // half-shift Katakana)
+ return c;
+ }
+ else
+ {
+ int c1;
+ count[0] = 1;
+ readRawBytesFromStream(tempchar, count, false);
+
+ if (count[0] <= 0)
+ {
+ endOfStream = true;
+ return END_OF_STREAM;
+ }
+
+ c1 = tempchar[0];
+ n[0] = (c << 8) + c1;
+ return n[0];
+ }
+ }
+ // #431953 - end RJ
+ n[0] = c;
+
+ return n[0];
+ }
+
+ /**
+ * Read raw bytes from stream, return <= 0 if EOF; or if "unget" is
true, Unget the bytes to re-synchronize the
+ * input stream Normally UTF-8 successor bytes are read using this routine.
+ * @param buf character buffer
+ * @param count number of bytes to read
+ * @param unget unget bytes
+ */
+ protected void readRawBytesFromStream(int[] buf, int[] count, boolean unget)
+ {
+
+ try
+ {
+ for (int i = 0; i < count[0]; i++)
+ {
+ if (unget)
+ {
+
+/// int c = this.stream.read();
+ int c = readByteFromStreamBuffer();
+
+ // should never get here; testing for 0xFF, a valid char, is not a
good idea
+ if (c == END_OF_STREAM) // || buf[i] == (unsigned char)EndOfStream
+ {
+ count[0] = -i;
+ return;
+ }
+
+ rawPushed = true;
+
+ if (rawBufpos >= CHARBUF_SIZE)
+ {
+ System.arraycopy(rawBytebuf, 1, rawBytebuf, 0, CHARBUF_SIZE -
1);
+ rawBufpos--;
+ }
+ rawBytebuf[rawBufpos++] = (char) buf[i];
+ }
+ else
+ {
+ if (rawPushed)
+ {
+ buf[i] = rawBytebuf[--rawBufpos];
+ if (rawBufpos == 0)
+ {
+ rawPushed = false;
+ }
+ }
+ else
+ {
+/// int c = this.stream.read();
+ int c = readByteFromStreamBuffer();
+ if (c == END_OF_STREAM)
+ {
+ count[0] = -i;
+ break;
+ }
+ buf[i] = (char) c;
+ }
+ }
+ }
+ }
+ catch (IOException e)
+ {
+ log.error("StreamInImpl.readRawBytesFromStream: " + e.toString());
+ }
+ return;
+ }
+
+ private byte[] bytes = new byte[256];
+ private int index = 0;
+ private int length = 0;
+
+ private int readByteFromStreamBuffer() throws IOException {
+ if(index >= length) {
+ if(length < 0) return -1;
+ length = stream.read(bytes);
+ index = 0;
+ }
+ if(index < length) {
+ int c = bytes[index];
+ if(c < 0) c += 256;
+ index++;
+ return c;
+ }
+ return -1;
+ }
+
+}
\ No newline at end of file
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/StreamInJavaImpl.java
===================================================================
---
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/StreamInJavaImpl.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/StreamInJavaImpl.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,342 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.io.StringReader;
+import java.io.UnsupportedEncodingException;
+
+
+/**
+ * StreamIn Implementation using java writers.
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public class StreamInJavaImpl implements StreamIn
+{
+
+ /**
+ * number of characters kept in buffer.
+ */
+ private static final int CHARBUF_SIZE = 10;
+
+ /**
+ * character buffer.
+ */
+ private int[] charbuf = new int[CHARBUF_SIZE];
+
+ /**
+ * actual position in buffer.
+ */
+ private int bufpos;
+
+ /**
+ * Java input stream reader.
+ */
+ private Reader reader;
+
+ /**
+ * has end of stream been reached?
+ */
+ private boolean endOfStream;
+
+ /**
+ * Is char pushed?
+ */
+ private boolean pushed;
+
+ /**
+ * current column number.
+ */
+ private int curcol;
+
+ /**
+ * last column.
+ */
+ private int lastcol;
+
+ /**
+ * current line number.
+ */
+ private int curline;
+
+ /**
+ * tab size in chars.
+ */
+ private int tabsize;
+
+ private int tabs;
+
+ public StreamInJavaImpl(Reader in , int tabsize)
+ {
+ reader = in;
+ this.pushed = false;
+ this.tabsize = tabsize;
+ this.curline = 1;
+ this.curcol = 1;
+ this.endOfStream = false;
+ }
+ /**
+ * @param content
+ * @param tabsize
+ * @throws UnsupportedEncodingException
+ */
+ public StreamInJavaImpl(String content, int tabsize)
+ {
+ reader = new StringReader(content);
+ this.pushed = false;
+ this.tabsize = tabsize;
+ this.curline = 1;
+ this.curcol = 1;
+ this.endOfStream = false;
+ }
+ /**
+ * Instantiates a new StreamInJavaImpl.
+ * @param stream
+ * @param encoding
+ * @param tabsize
+ * @throws UnsupportedEncodingException
+ */
+ public StreamInJavaImpl(InputStream stream, String encoding, int tabsize) throws
UnsupportedEncodingException
+ {
+ reader = new InputStreamReader(stream, encoding);
+ this.pushed = false;
+ this.tabsize = tabsize;
+ this.curline = 1;
+ this.curcol = 1;
+ this.endOfStream = false;
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.StreamIn#readCharFromStream()
+ */
+ public int readCharFromStream()
+ {
+ int c;
+ try
+ {
+/// c = reader.read();
+ c = readCharFromStreamBuffer();
+ if (c < 0)
+ {
+ endOfStream = true;
+ }
+
+ }
+ catch (IOException e)
+ {
+ // @todo how to handle?
+ endOfStream = true;
+ return END_OF_STREAM;
+ }
+
+ return c;
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.StreamIn#readChar()
+ */
+ public int readChar()
+ {
+ int c;
+
+ if (this.pushed)
+ {
+ c = this.charbuf[--(this.bufpos)];
+ if ((this.bufpos) == 0)
+ {
+ this.pushed = false;
+ }
+
+ if (c == '\n')
+ {
+ this.curcol = 1;
+ this.curline++;
+ return c;
+ }
+
+ this.curcol++;
+ return c;
+ }
+
+ this.lastcol = this.curcol;
+
+ if (this.tabs > 0)
+ {
+ this.curcol++;
+ this.tabs--;
+ return ' ';
+ }
+
+ c = readCharFromStream();
+
+ if (c < 0)
+ {
+ endOfStream = true;
+ return END_OF_STREAM;
+ }
+
+ if (c == '\n')
+ {
+ this.curcol = 1;
+ this.curline++;
+ return c;
+ }
+ else if (c == '\r') // \r\n
+ {
+ c = readCharFromStream();
+ if (c != '\n')
+ {
+ if (c != END_OF_STREAM)
+ {
+ ungetChar(c);
+ }
+ c = '\n';
+ }
+ this.curcol = 1;
+ this.curline++;
+ return c;
+ }
+
+ if (c == '\t')
+ {
+ this.tabs = this.tabsize - ((this.curcol - 1) % this.tabsize) - 1;
+ this.curcol++;
+ c = ' ';
+ return c;
+ }
+
+ this.curcol++;
+
+ return c;
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.StreamIn#ungetChar(int)
+ */
+ public void ungetChar(int c)
+ {
+ this.pushed = true;
+ if (this.bufpos >= CHARBUF_SIZE)
+ {
+ // pop last element
+ System.arraycopy(this.charbuf, 0, this.charbuf, 1, CHARBUF_SIZE - 1);
+ this.bufpos--;
+ }
+ this.charbuf[(this.bufpos)++] = c;
+
+ if (c == '\n')
+ {
+ --this.curline;
+ }
+
+ this.curcol = this.lastcol;
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.StreamIn#isEndOfStream()
+ */
+ public boolean isEndOfStream()
+ {
+ return endOfStream;
+ }
+
+ /**
+ * Getter for <code>curcol</code>.
+ * @return Returns the curcol.
+ */
+ public int getCurcol()
+ {
+ return this.curcol;
+ }
+
+ /**
+ * Getter for <code>curline</code>.
+ * @return Returns the curline.
+ */
+ public int getCurline()
+ {
+ return this.curline;
+ }
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.StreamIn#setLexer(org.ajax4jsf.org.w3c.tidy.Lexer)
+ */
+ public void setLexer(Lexer lexer)
+ {
+ // unused in the java implementation
+ }
+
+ private char[] chars = new char[256];
+ private int index = 0;
+ private int length = 0;
+
+ private int readCharFromStreamBuffer() throws IOException {
+ if(index >= length) {
+ if(length < 0) return -1;
+ length = reader.read(chars);
+ index = 0;
+ }
+ if(index < length) {
+ char c = chars[index];
+ index++;
+ return c;
+ }
+ return -1;
+ }
+
+}
\ No newline at end of file
Added: branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Style.java
===================================================================
--- branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Style.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Style.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,101 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+/**
+ * Linked list of class names and styles.
+ * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org
</a>
+ * @author Andy Quick <a
href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a>
(translation to Java)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public class Style
+{
+
+ /**
+ * Tag name.
+ */
+ protected String tag;
+
+ /**
+ * Tag class.
+ */
+ protected String tagClass;
+
+ /**
+ * Style properties.
+ */
+ protected String properties;
+
+ /**
+ * Next linked style element.
+ */
+ protected Style next;
+
+ /**
+ * Instantiates a new style.
+ * @param tag Tag name
+ * @param tagClass Tag class
+ * @param properties Style properties
+ * @param next Next linked style element. Can be null.
+ */
+ public Style(String tag, String tagClass, String properties, Style next)
+ {
+ this.tag = tag;
+ this.tagClass = tagClass;
+ this.properties = properties;
+ this.next = next;
+ }
+
+}
\ No newline at end of file
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/StyleProp.java
===================================================================
--- branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/StyleProp.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/StyleProp.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,94 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+/**
+ * Linked list of style properties.
+ * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org
</a>
+ * @author Andy Quick <a
href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a>
(translation to Java)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public class StyleProp
+{
+
+ /**
+ * Style name.
+ */
+ protected String name;
+
+ /**
+ * Style value.
+ */
+ protected String value;
+
+ /**
+ * Next linked style property.
+ */
+ protected StyleProp next;
+
+ /**
+ * Instantiates a new style property.
+ * @param name Style name
+ * @param value Style value
+ * @param next Next linked style property. Can be null.
+ */
+ public StyleProp(String name, String value, StyleProp next)
+ {
+ this.name = name;
+ this.value = value;
+ this.next = next;
+ }
+
+}
\ No newline at end of file
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/TagCheck.java
===================================================================
--- branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/TagCheck.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/TagCheck.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,74 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+
+package org.ajax4jsf.org.w3c.tidy;
+
+/**
+ * Check HTML attributes.
+ * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org
</a>
+ * @author Andy Quick <a
href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a>
(translation to Java)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public interface TagCheck
+{
+
+ /**
+ * Checks attributes in given Node.
+ * @param lexer Lexer
+ * @param node Node to check for valid attributes.
+ */
+ void check(Lexer lexer, Node node);
+
+}
\ No newline at end of file
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/TagCheckImpl.java
===================================================================
---
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/TagCheckImpl.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/TagCheckImpl.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,637 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+/**
+ * Check HTML attributes implementation.
+ * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org
</a>
+ * @author Andy Quick <a
href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a>
(translation to Java)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public final class TagCheckImpl
+{
+
+ /**
+ * CheckHTML instance.
+ */
+ public static final TagCheck HTML = new CheckHTML();
+
+ /**
+ * CheckSCRIPT instance.
+ */
+ public static final TagCheck SCRIPT = new CheckSCRIPT();
+
+ /**
+ * CheckTABLE instance.
+ */
+ public static final TagCheck TABLE = new CheckTABLE();
+
+ /**
+ * CheckCaption instance.
+ */
+ public static final TagCheck CAPTION = new CheckCaption();
+
+ /**
+ * CheckIMG instance.
+ */
+ public static final TagCheck IMG = new CheckIMG();
+
+ /**
+ * CheckAREA instance.
+ */
+ public static final TagCheck AREA = new CheckAREA();
+
+ /**
+ * CheckAnchor instance.
+ */
+ public static final TagCheck ANCHOR = new CheckAnchor();
+
+ /**
+ * CheckMap instance.
+ */
+ public static final TagCheck MAP = new CheckMap();
+
+ /**
+ * CheckSTYLE instance.
+ */
+ public static final TagCheck STYLE = new CheckSTYLE();
+
+ /**
+ * CheckTableCell instance.
+ */
+ public static final TagCheck TABLECELL = new CheckTableCell();
+
+ /**
+ * CheckLINK instance.
+ */
+ public static final TagCheck LINK = new CheckLINK();
+
+ /**
+ * CheckHR instance.
+ */
+ public static final TagCheck HR = new CheckHR();
+
+ /**
+ * CheckForm instance.
+ */
+ public static final TagCheck FORM = new CheckForm();
+
+ /**
+ * CheckMeta instance.
+ */
+ public static final TagCheck META = new CheckMeta();
+
+ /**
+ * don't instantiate.
+ */
+ private TagCheckImpl()
+ {
+ // unused
+ }
+
+ /**
+ * Checker implementation for html tag.
+ */
+ public static class CheckHTML implements TagCheck
+ {
+
+ /**
+ * xhtml namepace String.
+ */
+ private static final String XHTML_NAMESPACE =
"http://www.w3.org/1999/xhtml";
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.TagCheck#check(org.ajax4jsf.org.w3c.tidy.Lexer,
org.ajax4jsf.org.w3c.tidy.Node)
+ */
+ public void check(Lexer lexer, Node node)
+ {
+
+ AttVal attval;
+ AttVal xmlns;
+
+ xmlns = node.getAttrByName("xmlns");
+
+ if (xmlns != null && XHTML_NAMESPACE.equals(xmlns.value))
+ {
+ lexer.isvoyager = true;
+ if (!lexer.configuration.htmlOut) // Unless user has specified plain HTML
output,
+ {
+ lexer.configuration.xHTML = true; // output format will be XHTML.
+ }
+ // adjust other config options, just as in Configuration
+ lexer.configuration.xmlOut = true;
+ lexer.configuration.upperCaseTags = false;
+ lexer.configuration.upperCaseAttrs = false;
+ }
+
+ for (attval = node.attributes; attval != null; attval = attval.next)
+ {
+ attval.checkAttribute(lexer, node);
+ }
+ }
+
+ }
+
+ /**
+ * Checker implementation for script tags.
+ */
+ public static class CheckSCRIPT implements TagCheck
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.TagCheck#check(org.ajax4jsf.org.w3c.tidy.Lexer,
org.ajax4jsf.org.w3c.tidy.Node)
+ */
+ public void check(Lexer lexer, Node node)
+ {
+ AttVal lang, type;
+
+ node.checkAttributes(lexer);
+
+ lang = node.getAttrByName("language");
+ type = node.getAttrByName("type");
+
+ if (type == null)
+ {
+ AttVal missingType = new AttVal(null, null, '"',
"type", "");
+ lexer.report.attrError(lexer, node, missingType,
Report.MISSING_ATTRIBUTE);
+
+ // check for javascript
+ if (lang != null)
+ {
+ String str = lang.value;
+ if ("javascript".equalsIgnoreCase(str) ||
"jscript".equalsIgnoreCase(str))
+ {
+ node.addAttribute("type",
"text/javascript");
+ }
+ else if ("vbscript".equalsIgnoreCase(str))
+ {
+ // per Randy Waki 8/6/01
+ node.addAttribute("type", "text/vbscript");
+ }
+ }
+ else
+ {
+ node.addAttribute("type", "text/javascript");
+ }
+ }
+ }
+
+ }
+
+ /**
+ * Checker implementation for table.
+ */
+ public static class CheckTABLE implements TagCheck
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.TagCheck#check(org.ajax4jsf.org.w3c.tidy.Lexer,
org.ajax4jsf.org.w3c.tidy.Node)
+ */
+ public void check(Lexer lexer, Node node)
+ {
+ AttVal attval;
+ Attribute attribute;
+ boolean hasSummary = false;
+
+ for (attval = node.attributes; attval != null; attval = attval.next)
+ {
+ attribute = attval.checkAttribute(lexer, node);
+
+ if (attribute == AttributeTable.attrSummary)
+ {
+ hasSummary = true;
+ }
+ }
+
+ /* suppress warning for missing summary for HTML 2.0 and HTML 3.2 */
+ if (!hasSummary && lexer.doctype != Dict.VERS_HTML20 &&
lexer.doctype != Dict.VERS_HTML32)
+ {
+ lexer.badAccess |= Report.MISSING_SUMMARY;
+
+ // summary is not required, should be only an accessibility warning
+ // AttVal missingSummary = new AttVal(null, null, '"',
"summary", "");
+ // lexer.report.attrError(lexer, node, missingSummary,
Report.MISSING_ATTRIBUTE);
+ }
+
+ /* convert <table border> to <table border="1"> */
+ if (lexer.configuration.xmlOut)
+ {
+ attval = node.getAttrByName("border");
+ if (attval != null)
+ {
+ if (attval.value == null)
+ {
+ attval.value = "1";
+ }
+ }
+ }
+
+ /* <table height="..."> is proprietary */
+ if ((attval = node.getAttrByName("height")) != null)
+ {
+ lexer.report.attrError(lexer, node, attval,
Report.PROPRIETARY_ATTRIBUTE);
+ lexer.versions &= Dict.VERS_PROPRIETARY;
+ }
+
+ }
+
+ }
+
+ /**
+ * Checker implementation for table caption.
+ */
+ public static class CheckCaption implements TagCheck
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.TagCheck#check(org.ajax4jsf.org.w3c.tidy.Lexer,
org.ajax4jsf.org.w3c.tidy.Node)
+ */
+ public void check(Lexer lexer, Node node)
+ {
+ AttVal attval;
+ String value = null;
+
+ node.checkAttributes(lexer);
+
+ for (attval = node.attributes; attval != null; attval = attval.next)
+ {
+ if ("align".equalsIgnoreCase(attval.attribute))
+ {
+ value = attval.value;
+ break;
+ }
+ }
+
+ if (value != null)
+ {
+ if ("left".equalsIgnoreCase(value) ||
"right".equalsIgnoreCase(value))
+ {
+ lexer.constrainVersion(Dict.VERS_HTML40_LOOSE);
+ }
+ else if ("top".equalsIgnoreCase(value) ||
"bottom".equalsIgnoreCase(value))
+ {
+ lexer.constrainVersion(~(Dict.VERS_HTML20 | Dict.VERS_HTML32));
+ }
+ else
+ {
+ lexer.report.attrError(lexer, node, attval,
Report.BAD_ATTRIBUTE_VALUE);
+ }
+ }
+ }
+
+ }
+
+ /**
+ * Checker implementation for hr.
+ */
+ public static class CheckHR implements TagCheck
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.TagCheck#check(org.ajax4jsf.org.w3c.tidy.Lexer,
org.ajax4jsf.org.w3c.tidy.Node)
+ */
+ public void check(Lexer lexer, Node node)
+ {
+ AttVal av = node.getAttrByName("src");
+
+ node.checkAttributes(lexer);
+
+ if (av != null)
+ {
+ lexer.report.attrError(lexer, node, av, Report.PROPRIETARY_ATTR_VALUE);
+ }
+ }
+ }
+
+ /**
+ * Checker implementation for image tags.
+ */
+ public static class CheckIMG implements TagCheck
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.TagCheck#check(org.ajax4jsf.org.w3c.tidy.Lexer,
org.ajax4jsf.org.w3c.tidy.Node)
+ */
+ public void check(Lexer lexer, Node node)
+ {
+ AttVal attval;
+ Attribute attribute;
+ boolean hasAlt = false;
+ boolean hasSrc = false;
+ boolean hasUseMap = false;
+ boolean hasIsMap = false;
+ boolean hasDataFld = false;
+
+ for (attval = node.attributes; attval != null; attval = attval.next)
+ {
+ attribute = attval.checkAttribute(lexer, node);
+
+ if (attribute == AttributeTable.attrAlt)
+ {
+ hasAlt = true;
+ }
+ else if (attribute == AttributeTable.attrSrc)
+ {
+ hasSrc = true;
+ }
+ else if (attribute == AttributeTable.attrUsemap)
+ {
+ hasUseMap = true;
+ }
+ else if (attribute == AttributeTable.attrIsmap)
+ {
+ hasIsMap = true;
+ }
+ else if (attribute == AttributeTable.attrDatafld)
+ {
+ hasDataFld = true;
+ }
+ else if (attribute == AttributeTable.attrWidth || attribute ==
AttributeTable.attrHeight)
+ {
+ lexer.constrainVersion(~Dict.VERS_HTML20);
+ }
+ }
+
+ if (!hasAlt)
+ {
+ lexer.badAccess |= Report.MISSING_IMAGE_ALT;
+ AttVal missingAlt = new AttVal(null, null, '"',
"alt", "");
+ lexer.report.attrError(lexer, node, missingAlt,
Report.MISSING_ATTRIBUTE);
+ if (lexer.configuration.altText != null)
+ {
+ node.addAttribute("alt", lexer.configuration.altText);
+ }
+ }
+
+ if (!hasSrc && !hasDataFld)
+ {
+ AttVal missingSrc = new AttVal(null, null, '"',
"src", "");
+ lexer.report.attrError(lexer, node, missingSrc,
Report.MISSING_ATTRIBUTE);
+ }
+
+ if (hasIsMap && !hasUseMap)
+ {
+ AttVal missingIsMap = new AttVal(null, null, '"',
"ismap", "");
+ lexer.report.attrError(lexer, node, missingIsMap,
Report.MISSING_IMAGEMAP);
+ }
+ }
+
+ }
+
+ /**
+ * Checker implementation for area.
+ */
+ public static class CheckAREA implements TagCheck
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.TagCheck#check(org.ajax4jsf.org.w3c.tidy.Lexer,
org.ajax4jsf.org.w3c.tidy.Node)
+ */
+ public void check(Lexer lexer, Node node)
+ {
+ AttVal attval;
+ Attribute attribute;
+ boolean hasAlt = false;
+ boolean hasHref = false;
+
+ for (attval = node.attributes; attval != null; attval = attval.next)
+ {
+ attribute = attval.checkAttribute(lexer, node);
+
+ if (attribute == AttributeTable.attrAlt)
+ {
+ hasAlt = true;
+ }
+ else if (attribute == AttributeTable.attrHref)
+ {
+ hasHref = true;
+ }
+ }
+
+ if (!hasAlt)
+ {
+ lexer.badAccess |= Report.MISSING_LINK_ALT;
+ AttVal missingAlt = new AttVal(null, null, '"',
"alt", "");
+ lexer.report.attrError(lexer, node, missingAlt,
Report.MISSING_ATTRIBUTE);
+ }
+ if (!hasHref)
+ {
+ AttVal missingHref = new AttVal(null, null, '"',
"href", "");
+ lexer.report.attrError(lexer, node, missingHref,
Report.MISSING_ATTRIBUTE);
+ }
+ }
+
+ }
+
+ /**
+ * Checker implementation for anchors.
+ */
+ public static class CheckAnchor implements TagCheck
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.TagCheck#check(org.ajax4jsf.org.w3c.tidy.Lexer,
org.ajax4jsf.org.w3c.tidy.Node)
+ */
+ public void check(Lexer lexer, Node node)
+ {
+ node.checkAttributes(lexer);
+
+ lexer.fixId(node);
+ }
+ }
+
+ /**
+ * Checker implementation for image maps.
+ */
+ public static class CheckMap implements TagCheck
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.TagCheck#check(org.ajax4jsf.org.w3c.tidy.Lexer,
org.ajax4jsf.org.w3c.tidy.Node)
+ */
+ public void check(Lexer lexer, Node node)
+ {
+ node.checkAttributes(lexer);
+
+ lexer.fixId(node);
+ }
+ }
+
+ /**
+ * Checker implementation for style tags.
+ */
+ public static class CheckSTYLE implements TagCheck
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.TagCheck#check(org.ajax4jsf.org.w3c.tidy.Lexer,
org.ajax4jsf.org.w3c.tidy.Node)
+ */
+ public void check(Lexer lexer, Node node)
+ {
+ AttVal type = node.getAttrByName("type");
+
+ node.checkAttributes(lexer);
+
+ if (type == null)
+ {
+ AttVal missingType = new AttVal(null, null, '"',
"type", "");
+ lexer.report.attrError(lexer, node, missingType,
Report.MISSING_ATTRIBUTE);
+
+ node.addAttribute("type", "text/css");
+ }
+ }
+ }
+
+ /**
+ * Checker implementation for forms. Reports missing action attribute.
+ */
+ public static class CheckForm implements TagCheck
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.TagCheck#check(org.ajax4jsf.org.w3c.tidy.Lexer,
org.ajax4jsf.org.w3c.tidy.Node)
+ */
+ public void check(Lexer lexer, Node node)
+ {
+ AttVal action = node.getAttrByName("action");
+
+ node.checkAttributes(lexer);
+
+ if (action == null)
+ {
+ AttVal missingAttribute = new AttVal(null, null, '"',
"action", "");
+ lexer.report.attrError(lexer, node, missingAttribute,
Report.MISSING_ATTRIBUTE);
+ }
+ }
+ }
+
+ /**
+ * Checker implementation for meta tags. Reports missing content attribute.
+ */
+ public static class CheckMeta implements TagCheck
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.TagCheck#check(org.ajax4jsf.org.w3c.tidy.Lexer,
org.ajax4jsf.org.w3c.tidy.Node)
+ */
+ public void check(Lexer lexer, Node node)
+ {
+ AttVal content = node.getAttrByName("content");
+
+ node.checkAttributes(lexer);
+
+ if (content == null)
+ {
+ AttVal missingAttribute = new AttVal(null, null, '"',
"content", "");
+ lexer.report.attrError(lexer, node, missingAttribute,
Report.MISSING_ATTRIBUTE);
+ }
+
+ // name or http-equiv attribute must also be set
+ }
+ }
+
+ /**
+ * Checker implementation for table cells.
+ */
+ public static class CheckTableCell implements TagCheck
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.TagCheck#check(org.ajax4jsf.org.w3c.tidy.Lexer,
org.ajax4jsf.org.w3c.tidy.Node)
+ */
+ public void check(Lexer lexer, Node node)
+ {
+ node.checkAttributes(lexer);
+
+ // HTML4 strict doesn't allow mixed content for elements with %block; as
their content model
+
+ if (node.getAttrByName("width") != null ||
node.getAttrByName("height") != null)
+ {
+ lexer.constrainVersion(~Dict.VERS_HTML40_STRICT);
+ }
+ }
+ }
+
+ /**
+ * add missing type attribute when appropriate.
+ */
+ public static class CheckLINK implements TagCheck
+ {
+
+ /**
+ * @see org.ajax4jsf.org.w3c.tidy.TagCheck#check(org.ajax4jsf.org.w3c.tidy.Lexer,
org.ajax4jsf.org.w3c.tidy.Node)
+ */
+ public void check(Lexer lexer, Node node)
+ {
+ AttVal rel = node.getAttrByName("rel");
+
+ node.checkAttributes(lexer);
+
+ if (rel != null && rel.value != null &&
rel.value.equals("stylesheet"))
+ {
+ AttVal type = node.getAttrByName("type");
+
+ if (type == null)
+ {
+ AttVal missingType = new AttVal(null, null, '"',
"type", "");
+ lexer.report.attrError(lexer, node, missingType,
Report.MISSING_ATTRIBUTE);
+
+ node.addAttribute("type", "text/css");
+ }
+ }
+ }
+ }
+
+}
\ No newline at end of file
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/TagTable.java
===================================================================
--- branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/TagTable.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/TagTable.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,1069 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+import java.util.ArrayList;
+import java.util.Hashtable;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+
+/**
+ * Tag dictionary node hash table.
+ * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org
</a>
+ * @author Andy Quick <a
href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a>
(translation to Java)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public final class TagTable
+{
+
+ /**
+ * dummy entry for all xml tags.
+ */
+ public static final Dict XML_TAGS = new Dict(null, Dict.VERS_ALL, Dict.CM_BLOCK,
null, null);
+
+ /**
+ * all the known tags.
+ */
+ private static final Dict[] TAGS = {
+ new Dict(
+ "html",
+ Dict.VERS_ALL,
+ (Dict.CM_HTML | Dict.CM_OPT | Dict.CM_OMITST),
+ ParserImpl.HTML,
+ TagCheckImpl.HTML),
+ new Dict("head", Dict.VERS_ALL, (Dict.CM_HTML | Dict.CM_OPT |
Dict.CM_OMITST), ParserImpl.HEAD, null),
+ new Dict("title", Dict.VERS_ALL, Dict.CM_HEAD, ParserImpl.TITLE,
null),
+ new Dict("base", Dict.VERS_ALL, (Dict.CM_HEAD | Dict.CM_EMPTY),
ParserImpl.EMPTY, null),
+ new Dict("link", Dict.VERS_ALL, (Dict.CM_HEAD | Dict.CM_EMPTY),
ParserImpl.EMPTY, TagCheckImpl.LINK),
+ new Dict("meta", Dict.VERS_ALL, (Dict.CM_HEAD | Dict.CM_EMPTY),
ParserImpl.EMPTY, TagCheckImpl.META),
+ new Dict(
+ "style",
+ (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
+ Dict.CM_HEAD,
+ ParserImpl.SCRIPT,
+ TagCheckImpl.STYLE),
+ new Dict(
+ "script",
+ (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
+ (Dict.CM_HEAD | Dict.CM_MIXED | Dict.CM_BLOCK | Dict.CM_INLINE),
+ ParserImpl.SCRIPT,
+ TagCheckImpl.SCRIPT),
+ new Dict(
+ "server",
+ Dict.VERS_NETSCAPE,
+ (Dict.CM_HEAD | Dict.CM_MIXED | Dict.CM_BLOCK | Dict.CM_INLINE),
+ ParserImpl.SCRIPT,
+ null),
+ new Dict("body", Dict.VERS_ALL, (Dict.CM_HTML | Dict.CM_OPT |
Dict.CM_OMITST), ParserImpl.BODY, null),
+ new Dict("frameset", Dict.VERS_FRAMESET, (Dict.CM_HTML |
Dict.CM_FRAMES), ParserImpl.FRAMESET, null),
+ new Dict("p", Dict.VERS_ALL, (Dict.CM_BLOCK | Dict.CM_OPT),
ParserImpl.INLINE, null),
+ new Dict("h1", Dict.VERS_ALL, (Dict.CM_BLOCK | Dict.CM_HEADING),
ParserImpl.INLINE, null),
+ new Dict("h2", Dict.VERS_ALL, (Dict.CM_BLOCK | Dict.CM_HEADING),
ParserImpl.INLINE, null),
+ new Dict("h3", Dict.VERS_ALL, (Dict.CM_BLOCK | Dict.CM_HEADING),
ParserImpl.INLINE, null),
+ new Dict("h4", Dict.VERS_ALL, (Dict.CM_BLOCK | Dict.CM_HEADING),
ParserImpl.INLINE, null),
+ new Dict("h5", Dict.VERS_ALL, (Dict.CM_BLOCK | Dict.CM_HEADING),
ParserImpl.INLINE, null),
+ new Dict("h6", Dict.VERS_ALL, (Dict.CM_BLOCK | Dict.CM_HEADING),
ParserImpl.INLINE, null),
+ new Dict("ul", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.LIST, null),
+ new Dict("ol", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.LIST, null),
+ new Dict("dl", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.DEFLIST,
null),
+ new Dict("dir", Dict.VERS_LOOSE, (Dict.CM_BLOCK | Dict.CM_OBSOLETE),
ParserImpl.LIST, null),
+ new Dict("menu", Dict.VERS_LOOSE, (Dict.CM_BLOCK | Dict.CM_OBSOLETE),
ParserImpl.LIST, null),
+ new Dict("pre", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.PRE, null),
+ new Dict("listing", Dict.VERS_ALL, (Dict.CM_BLOCK | Dict.CM_OBSOLETE),
ParserImpl.PRE, null),
+ new Dict("xmp", Dict.VERS_ALL, (Dict.CM_BLOCK | Dict.CM_OBSOLETE),
ParserImpl.PRE, null),
+ new Dict("plaintext", Dict.VERS_ALL, (Dict.CM_BLOCK |
Dict.CM_OBSOLETE), ParserImpl.PRE, null),
+ new Dict("address", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.BLOCK,
null),
+ new Dict("blockquote", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.BLOCK,
null),
+ new Dict("form", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.BLOCK,
TagCheckImpl.FORM),
+ new Dict("isindex", Dict.VERS_LOOSE, (Dict.CM_BLOCK | Dict.CM_EMPTY),
ParserImpl.EMPTY, null),
+ new Dict("fieldset", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
Dict.CM_BLOCK, ParserImpl.BLOCK, null),
+ new Dict("table", Dict.VERS_FROM32, Dict.CM_BLOCK, ParserImpl.TABLETAG,
TagCheckImpl.TABLE),
+ new Dict(
+ "hr",
+ (short) (Dict.VERS_ALL & ~Dict.VERS_BASIC),
+ (Dict.CM_BLOCK | Dict.CM_EMPTY),
+ ParserImpl.EMPTY,
+ TagCheckImpl.HR),
+ new Dict("div", Dict.VERS_FROM32, Dict.CM_BLOCK, ParserImpl.BLOCK,
null),
+ new Dict("multicol", Dict.VERS_NETSCAPE, Dict.CM_BLOCK,
ParserImpl.BLOCK, null),
+ new Dict("nosave", Dict.VERS_NETSCAPE, Dict.CM_BLOCK, ParserImpl.BLOCK,
null),
+ new Dict("layer", Dict.VERS_NETSCAPE, Dict.CM_BLOCK, ParserImpl.BLOCK,
null),
+ new Dict("ilayer", Dict.VERS_NETSCAPE, Dict.CM_INLINE,
ParserImpl.INLINE, null),
+ new Dict(
+ "nolayer",
+ Dict.VERS_NETSCAPE,
+ (Dict.CM_BLOCK | Dict.CM_INLINE | Dict.CM_MIXED),
+ ParserImpl.BLOCK,
+ null),
+ new Dict("align", Dict.VERS_NETSCAPE, Dict.CM_BLOCK, ParserImpl.BLOCK,
null),
+ new Dict("center", Dict.VERS_LOOSE, Dict.CM_BLOCK, ParserImpl.BLOCK,
null),
+ new Dict(
+ "ins",
+ (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
+ (Dict.CM_INLINE | Dict.CM_BLOCK | Dict.CM_MIXED),
+ ParserImpl.INLINE,
+ null),
+ new Dict(
+ "del",
+ (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
+ (Dict.CM_INLINE | Dict.CM_BLOCK | Dict.CM_MIXED),
+ ParserImpl.INLINE,
+ null),
+ new Dict("li", Dict.VERS_ALL, (Dict.CM_LIST | Dict.CM_OPT |
Dict.CM_NO_INDENT), ParserImpl.BLOCK, null),
+ new Dict("dt", Dict.VERS_ALL, (Dict.CM_DEFLIST | Dict.CM_OPT |
Dict.CM_NO_INDENT), ParserImpl.INLINE, null),
+ new Dict("dd", Dict.VERS_ALL, (Dict.CM_DEFLIST | Dict.CM_OPT |
Dict.CM_NO_INDENT), ParserImpl.BLOCK, null),
+ new Dict("caption", Dict.VERS_FROM32, Dict.CM_TABLE, ParserImpl.INLINE,
TagCheckImpl.CAPTION),
+ new Dict("colgroup", Dict.VERS_HTML40, (Dict.CM_TABLE | Dict.CM_OPT),
ParserImpl.COLGROUP, null),
+ new Dict("col", Dict.VERS_HTML40, (Dict.CM_TABLE | Dict.CM_EMPTY),
ParserImpl.EMPTY, null),
+ new Dict(
+ "thead",
+ (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
+ (Dict.CM_TABLE | Dict.CM_ROWGRP | Dict.CM_OPT),
+ ParserImpl.ROWGROUP,
+ null),
+ new Dict(
+ "tfoot",
+ (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
+ (Dict.CM_TABLE | Dict.CM_ROWGRP | Dict.CM_OPT),
+ ParserImpl.ROWGROUP,
+ null),
+ new Dict(
+ "tbody",
+ (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
+ (Dict.CM_TABLE | Dict.CM_ROWGRP | Dict.CM_OPT),
+ ParserImpl.ROWGROUP,
+ null),
+ new Dict("tr", Dict.VERS_FROM32, (Dict.CM_TABLE | Dict.CM_OPT),
ParserImpl.ROW, null),
+ new Dict(
+ "td",
+ Dict.VERS_FROM32,
+ (Dict.CM_ROW | Dict.CM_OPT | Dict.CM_NO_INDENT),
+ ParserImpl.BLOCK,
+ TagCheckImpl.TABLECELL),
+ new Dict(
+ "th",
+ Dict.VERS_FROM32,
+ (Dict.CM_ROW | Dict.CM_OPT | Dict.CM_NO_INDENT),
+ ParserImpl.BLOCK,
+ TagCheckImpl.TABLECELL),
+ new Dict("q", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.INLINE,
null),
+ new Dict("a", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.INLINE,
TagCheckImpl.ANCHOR),
+ new Dict("br", Dict.VERS_ALL, (Dict.CM_INLINE | Dict.CM_EMPTY),
ParserImpl.EMPTY, null),
+ new Dict(
+ "img",
+ Dict.VERS_ALL,
+ (Dict.CM_INLINE | Dict.CM_IMG | Dict.CM_EMPTY),
+ ParserImpl.EMPTY,
+ TagCheckImpl.IMG),
+ new Dict(
+ "object",
+ Dict.VERS_HTML40,
+ (Dict.CM_OBJECT | Dict.CM_HEAD | Dict.CM_IMG | Dict.CM_INLINE |
Dict.CM_PARAM),
+ ParserImpl.BLOCK,
+ null),
+ new Dict(
+ "applet",
+ Dict.VERS_LOOSE,
+ (Dict.CM_OBJECT | Dict.CM_IMG | Dict.CM_INLINE | Dict.CM_PARAM),
+ ParserImpl.BLOCK,
+ null),
+ new Dict(
+ "servlet",
+ Dict.VERS_SUN,
+ (Dict.CM_OBJECT | Dict.CM_IMG | Dict.CM_INLINE | Dict.CM_PARAM),
+ ParserImpl.BLOCK,
+ null),
+ new Dict("param", Dict.VERS_FROM32, (Dict.CM_INLINE | Dict.CM_EMPTY),
ParserImpl.EMPTY, null),
+ new Dict("embed", Dict.VERS_NETSCAPE, (Dict.CM_INLINE | Dict.CM_IMG |
Dict.CM_EMPTY), ParserImpl.EMPTY, null),
+ new Dict("noembed", Dict.VERS_NETSCAPE, Dict.CM_INLINE,
ParserImpl.INLINE, null),
+ new Dict("iframe", Dict.VERS_HTML40_LOOSE, Dict.CM_INLINE,
ParserImpl.BLOCK, null),
+ new Dict("frame", Dict.VERS_FRAMESET, (Dict.CM_FRAMES | Dict.CM_EMPTY),
ParserImpl.EMPTY, null),
+ new Dict("noframes", Dict.VERS_IFRAME, (Dict.CM_BLOCK |
Dict.CM_FRAMES), ParserImpl.NOFRAMES, null),
+ new Dict(
+ "noscript",
+ (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
+ (Dict.CM_BLOCK | Dict.CM_INLINE | Dict.CM_MIXED),
+ ParserImpl.BLOCK,
+ null),
+ new Dict("b", (short) (Dict.VERS_ALL & ~Dict.VERS_BASIC),
Dict.CM_INLINE, ParserImpl.INLINE, null),
+ new Dict("i", (short) (Dict.VERS_ALL & ~Dict.VERS_BASIC),
Dict.CM_INLINE, ParserImpl.INLINE, null),
+ new Dict("u", Dict.VERS_LOOSE, Dict.CM_INLINE, ParserImpl.INLINE,
null),
+ new Dict("tt", (short) (Dict.VERS_ALL & ~Dict.VERS_BASIC),
Dict.CM_INLINE, ParserImpl.INLINE, null),
+ new Dict("s", Dict.VERS_LOOSE, Dict.CM_INLINE, ParserImpl.INLINE,
null),
+ new Dict("strike", Dict.VERS_LOOSE, Dict.CM_INLINE, ParserImpl.INLINE,
null),
+ new Dict("big", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
Dict.CM_INLINE, ParserImpl.INLINE, null),
+ new Dict("small", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
Dict.CM_INLINE, ParserImpl.INLINE, null),
+ new Dict("sub", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
Dict.CM_INLINE, ParserImpl.INLINE, null),
+ new Dict("sup", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
Dict.CM_INLINE, ParserImpl.INLINE, null),
+ new Dict("em", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.INLINE,
null),
+ new Dict("strong", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.INLINE,
null),
+ new Dict("dfn", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.INLINE,
null),
+ new Dict("code", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.INLINE,
null),
+ new Dict("samp", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.INLINE,
null),
+ new Dict("kbd", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.INLINE,
null),
+ new Dict("var", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.INLINE,
null),
+ new Dict("cite", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.INLINE,
null),
+ new Dict("abbr", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.INLINE,
null),
+ new Dict("acronym", Dict.VERS_HTML40, Dict.CM_INLINE,
ParserImpl.INLINE, null),
+ new Dict("span", Dict.VERS_FROM32, Dict.CM_INLINE, ParserImpl.INLINE,
null),
+ new Dict("blink", Dict.VERS_PROPRIETARY, Dict.CM_INLINE,
ParserImpl.INLINE, null),
+ new Dict("nobr", Dict.VERS_PROPRIETARY, Dict.CM_INLINE,
ParserImpl.INLINE, null),
+ new Dict("wbr", Dict.VERS_PROPRIETARY, (Dict.CM_INLINE |
Dict.CM_EMPTY), ParserImpl.EMPTY, null),
+ new Dict("marquee", Dict.VERS_MICROSOFT, (Dict.CM_INLINE |
Dict.CM_OPT), ParserImpl.INLINE, null),
+ new Dict("bgsound", Dict.VERS_MICROSOFT, (Dict.CM_HEAD |
Dict.CM_EMPTY), ParserImpl.EMPTY, null),
+ new Dict("comment", Dict.VERS_MICROSOFT, Dict.CM_INLINE,
ParserImpl.INLINE, null),
+ new Dict("spacer", Dict.VERS_NETSCAPE, (Dict.CM_INLINE |
Dict.CM_EMPTY), ParserImpl.EMPTY, null),
+ new Dict("keygen", Dict.VERS_NETSCAPE, (Dict.CM_INLINE |
Dict.CM_EMPTY), ParserImpl.EMPTY, null),
+ new Dict(
+ "nolayer",
+ Dict.VERS_NETSCAPE,
+ (Dict.CM_BLOCK | Dict.CM_INLINE | Dict.CM_MIXED),
+ ParserImpl.BLOCK,
+ null),
+ new Dict("ilayer", Dict.VERS_NETSCAPE, Dict.CM_INLINE,
ParserImpl.INLINE, null),
+ new Dict(
+ "map",
+ (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
+ Dict.CM_INLINE,
+ ParserImpl.BLOCK,
+ TagCheckImpl.MAP),
+ new Dict(
+ "area",
+ (short) (Dict.VERS_ALL & ~Dict.VERS_BASIC),
+ (Dict.CM_BLOCK | Dict.CM_EMPTY),
+ ParserImpl.EMPTY,
+ TagCheckImpl.AREA),
+ new Dict("input", Dict.VERS_ALL, (Dict.CM_INLINE | Dict.CM_IMG |
Dict.CM_EMPTY), ParserImpl.EMPTY, null),
+ new Dict("select", Dict.VERS_ALL, (Dict.CM_INLINE | Dict.CM_FIELD),
ParserImpl.SELECT, null),
+ new Dict("option", Dict.VERS_ALL, (Dict.CM_FIELD | Dict.CM_OPT),
ParserImpl.TEXT, null),
+ new Dict(
+ "optgroup",
+ (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
+ (Dict.CM_FIELD | Dict.CM_OPT),
+ ParserImpl.OPTGROUP,
+ null),
+ new Dict("textarea", Dict.VERS_ALL, (Dict.CM_INLINE | Dict.CM_FIELD),
ParserImpl.TEXT, null),
+ new Dict("label", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.INLINE,
null),
+ new Dict("legend", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
Dict.CM_INLINE, ParserImpl.INLINE, null),
+ new Dict("button", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
Dict.CM_INLINE, ParserImpl.INLINE, null),
+ new Dict("basefont", Dict.VERS_LOOSE, (Dict.CM_INLINE | Dict.CM_EMPTY),
ParserImpl.EMPTY, null),
+ new Dict("font", Dict.VERS_LOOSE, Dict.CM_INLINE, ParserImpl.INLINE,
null),
+ new Dict("bdo", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
Dict.CM_INLINE, ParserImpl.INLINE, null),
+ // elements for XHTML 1.1
+ new Dict("ruby", Dict.VERS_XHTML11, Dict.CM_INLINE, ParserImpl.INLINE,
null),
+ new Dict("rbc", Dict.VERS_XHTML11, Dict.CM_INLINE, ParserImpl.INLINE,
null),
+ new Dict("rtc", Dict.VERS_XHTML11, Dict.CM_INLINE, ParserImpl.INLINE,
null),
+ new Dict("rb", Dict.VERS_XHTML11, Dict.CM_INLINE, ParserImpl.INLINE,
null),
+ new Dict("rt", Dict.VERS_XHTML11, Dict.CM_INLINE, ParserImpl.INLINE,
null),
+ new Dict("", Dict.VERS_XHTML11, Dict.CM_INLINE, ParserImpl.INLINE,
null),
+ new Dict("rp", Dict.VERS_XHTML11, Dict.CM_INLINE, ParserImpl.INLINE,
null),
+ //
+ };
+
+ /**
+ * html tag.
+ */
+ protected Dict tagHtml;
+
+ /**
+ * head tag.
+ */
+ protected Dict tagHead;
+
+ /**
+ * body tag.
+ */
+ protected Dict tagBody;
+
+ /**
+ * frameset tag.
+ */
+ protected Dict tagFrameset;
+
+ /**
+ * frame tag.
+ */
+ protected Dict tagFrame;
+
+ /**
+ * iframe tag.
+ */
+ protected Dict tagIframe;
+
+ /**
+ * noframes tag.
+ */
+ protected Dict tagNoframes;
+
+ /**
+ * meta tag.
+ */
+ protected Dict tagMeta;
+
+ /**
+ * title tag.
+ */
+ protected Dict tagTitle;
+
+ /**
+ * base tag.
+ */
+ protected Dict tagBase;
+
+ /**
+ * hr tag.
+ */
+ protected Dict tagHr;
+
+ /**
+ * pre tag.
+ */
+ protected Dict tagPre;
+
+ /**
+ * listing tag.
+ */
+ protected Dict tagListing;
+
+ /**
+ * h1 tag.
+ */
+ protected Dict tagH1;
+
+ /**
+ * h2 tag.
+ */
+ protected Dict tagH2;
+
+ /**
+ * p tag.
+ */
+ protected Dict tagP;
+
+ /**
+ * ul tag.
+ */
+ protected Dict tagUl;
+
+ /**
+ * ol tag.
+ */
+ protected Dict tagOl;
+
+ /**
+ * dir tag.
+ */
+ protected Dict tagDir;
+
+ /**
+ * li tag.
+ */
+ protected Dict tagLi;
+
+ /**
+ * dt tag.
+ */
+ protected Dict tagDt;
+
+ /**
+ * dd tag.
+ */
+ protected Dict tagDd;
+
+ /**
+ * dl tag.
+ */
+ protected Dict tagDl;
+
+ /**
+ * td tag.
+ */
+ protected Dict tagTd;
+
+ /**
+ * th tag.
+ */
+ protected Dict tagTh;
+
+ /**
+ * tr tag.
+ */
+ protected Dict tagTr;
+
+ /**
+ * col tag.
+ */
+ protected Dict tagCol;
+
+ /**
+ * colgroup tag.
+ */
+ protected Dict tagColgroup;
+
+ /**
+ * br tag.
+ */
+ protected Dict tagBr;
+
+ /**
+ * a tag.
+ */
+ protected Dict tagA;
+
+ /**
+ * link tag.
+ */
+ protected Dict tagLink;
+
+ /**
+ * b tag.
+ */
+ protected Dict tagB;
+
+ /**
+ * i tag.
+ */
+ protected Dict tagI;
+
+ /**
+ * strong tag.
+ */
+ protected Dict tagStrong;
+
+ /**
+ * em tag.
+ */
+ protected Dict tagEm;
+
+ /**
+ * big tag.
+ */
+ protected Dict tagBig;
+
+ /**
+ * small tag.
+ */
+ protected Dict tagSmall;
+
+ /**
+ * param tag.
+ */
+ protected Dict tagParam;
+
+ /**
+ * option tag.
+ */
+ protected Dict tagOption;
+
+ /**
+ * optgroup tag.
+ */
+ protected Dict tagOptgroup;
+
+ /**
+ * img tag.
+ */
+ protected Dict tagImg;
+
+ /**
+ * map tag.
+ */
+ protected Dict tagMap;
+
+ /**
+ * area tag.
+ */
+ protected Dict tagArea;
+
+ /**
+ * nobr tag.
+ */
+ protected Dict tagNobr;
+
+ /**
+ * wbr tag.
+ */
+ protected Dict tagWbr;
+
+ /**
+ * font tag.
+ */
+ protected Dict tagFont;
+
+ /**
+ * spacer tag.
+ */
+ protected Dict tagSpacer;
+
+ /**
+ * layer tag.
+ */
+ protected Dict tagLayer;
+
+ /**
+ * center tag.
+ */
+ protected Dict tagCenter;
+
+ /**
+ * style tag.
+ */
+ protected Dict tagStyle;
+
+ /**
+ * script tag.
+ */
+ protected Dict tagScript;
+
+ /**
+ * noscript tag.
+ */
+ protected Dict tagNoscript;
+
+ /**
+ * table tag.
+ */
+ protected Dict tagTable;
+
+ /**
+ * caption tag.
+ */
+ protected Dict tagCaption;
+
+ /**
+ * form tag.
+ */
+ protected Dict tagForm;
+
+ /**
+ * textarea tag.
+ */
+ protected Dict tagTextarea;
+
+ /**
+ * blockquote tag.
+ */
+ protected Dict tagBlockquote;
+
+ /**
+ * applet tag.
+ */
+ protected Dict tagApplet;
+
+ /**
+ * object tag.
+ */
+ protected Dict tagObject;
+
+ /**
+ * div tag.
+ */
+ protected Dict tagDiv;
+
+ /**
+ * span tag.
+ */
+ protected Dict tagSpan;
+
+ /**
+ * input tag.
+ */
+ protected Dict tagInput;
+
+ /**
+ * tag.
+ */
+ protected Dict tagQ;
+
+ /**
+ * a proprietary tag added by Tidy, along with tag_nobr, tag_wbr.
+ */
+ protected Dict tagBlink;
+
+ /**
+ * anchor/node hash.
+ */
+ protected Anchor anchorList;
+
+ /**
+ * configuration.
+ */
+ private Configuration configuration;
+
+ /**
+ * hashTable containing tags.
+ */
+ private Map<String, Dict> tagHashtable = new Hashtable<String, Dict>();
+
+ /**
+ * Instantiates a new tag table with known tags.
+ */
+ protected TagTable()
+ {
+ for (int i = 0; i < TAGS.length; i++)
+ {
+ install(TAGS[i]);
+ }
+ tagHtml = lookup("html");
+ tagHead = lookup("head");
+ tagBody = lookup("body");
+ tagFrameset = lookup("frameset");
+ tagFrame = lookup("frame");
+ tagIframe = lookup("iframe");
+ tagNoframes = lookup("noframes");
+ tagMeta = lookup("meta");
+ tagTitle = lookup("title");
+ tagBase = lookup("base");
+ tagHr = lookup("hr");
+ tagPre = lookup("pre");
+ tagListing = lookup("listing");
+ tagH1 = lookup("h1");
+ tagH2 = lookup("h2");
+ tagP = lookup("p");
+ tagUl = lookup("ul");
+ tagOl = lookup("ol");
+ tagDir = lookup("dir");
+ tagLi = lookup("li");
+ tagDt = lookup("dt");
+ tagDd = lookup("dd");
+ tagDl = lookup("dl");
+ tagTd = lookup("td");
+ tagTh = lookup("th");
+ tagTr = lookup("tr");
+ tagCol = lookup("col");
+ tagColgroup = lookup("colgroup");
+ tagBr = lookup("br");
+ tagA = lookup("a");
+ tagLink = lookup("link");
+ tagB = lookup("b");
+ tagI = lookup("i");
+ tagStrong = lookup("strong");
+ tagEm = lookup("em");
+ tagBig = lookup("big");
+ tagSmall = lookup("small");
+ tagParam = lookup("param");
+ tagOption = lookup("option");
+ tagOptgroup = lookup("optgroup");
+ tagImg = lookup("img");
+ tagMap = lookup("map");
+ tagArea = lookup("area");
+ tagNobr = lookup("nobr");
+ tagWbr = lookup("wbr");
+ tagFont = lookup("font");
+ tagSpacer = lookup("spacer");
+ tagLayer = lookup("layer");
+ tagCenter = lookup("center");
+ tagStyle = lookup("style");
+ tagScript = lookup("script");
+ tagNoscript = lookup("noscript");
+ tagTable = lookup("table");
+ tagCaption = lookup("caption");
+ tagForm = lookup("form");
+ tagTextarea = lookup("textarea");
+ tagBlockquote = lookup("blockquote");
+ tagApplet = lookup("applet");
+ tagObject = lookup("object");
+ tagDiv = lookup("div");
+ tagSpan = lookup("span");
+ tagInput = lookup("input");
+ tagQ = lookup("q");
+ tagBlink = lookup("blink");
+ }
+
+ /**
+ * Setter for the current configuration instance.
+ * @param configuration configuration instance
+ */
+ public void setConfiguration(Configuration configuration)
+ {
+ this.configuration = configuration;
+ }
+
+ /**
+ * Lookup a tag definition by its name.
+ * @param name tag name
+ * @return tag definition (Dict)
+ */
+ public Dict lookup(String name)
+ {
+ return (Dict) tagHashtable.get(name);
+ }
+
+ /**
+ * Installs a new tag in the tag table, or modify an existing one.
+ * @param dict tag definition
+ * @return installed Dict instance
+ */
+ public Dict install(Dict dict)
+ {
+ Dict d = (Dict) tagHashtable.get(dict.name);
+ if (d != null)
+ {
+ d.versions = dict.versions;
+ d.model |= dict.model;
+ d.setParser(dict.getParser());
+ d.setChkattrs(dict.getChkattrs());
+ return d;
+ }
+
+ tagHashtable.put(dict.name, dict);
+ return dict;
+
+ }
+
+ /**
+ * Finds a tag by name.
+ * @param node Node to find. If the element is found the tag property of node will be
set.
+ * @return true if the tag is found, false otherwise
+ */
+ public boolean findTag(Node node)
+ {
+ Dict np;
+
+ if (configuration != null && configuration.xmlTags)
+ {
+ node.tag = XML_TAGS;
+ return true;
+ }
+
+ if (node.element != null)
+ {
+ np = lookup(node.element);
+ if (np != null)
+ {
+ node.tag = np;
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * Finds a parser fo the given node.
+ * @param node Node
+ * @return parser for the node
+ */
+ public Parser findParser(Node node)
+ {
+ Dict np;
+
+ if (node.element != null)
+ {
+ np = lookup(node.element);
+ if (np != null)
+ {
+ return np.getParser();
+ }
+ }
+
+ return null;
+ }
+
+ /**
+ * May id or name serve as anchor?
+ * @param node Node
+ * @return <code>true</code> if tag can serve as an anchor
+ */
+ boolean isAnchorElement(Node node)
+ {
+ return node.tag == this.tagA
+ || node.tag == this.tagApplet
+ || node.tag == this.tagForm
+ || node.tag == this.tagFrame
+ || node.tag == this.tagIframe
+ || node.tag == this.tagImg
+ || node.tag == this.tagMap;
+ }
+
+ /**
+ * Defines a new tag.
+ * @param tagType tag type. Can be TAGTYPE_BLOCK | TAGTYPE_EMPTY | TAGTYPE_PRE |
TAGTYPE_INLINE
+ * @param name tag name
+ */
+ public void defineTag(short tagType, String name)
+ {
+ Parser tagParser;
+ short model;
+
+ switch (tagType)
+ {
+ case Dict.TAGTYPE_BLOCK :
+ model = (short) (Dict.CM_BLOCK | Dict.CM_NO_INDENT | Dict.CM_NEW);
+ tagParser = ParserImpl.BLOCK;
+ break;
+
+ case Dict.TAGTYPE_EMPTY :
+ model = (short) (Dict.CM_EMPTY | Dict.CM_NO_INDENT | Dict.CM_NEW);
+ tagParser = ParserImpl.BLOCK;
+ break;
+
+ case Dict.TAGTYPE_PRE :
+ model = (short) (Dict.CM_BLOCK | Dict.CM_NO_INDENT | Dict.CM_NEW);
+ tagParser = ParserImpl.PRE;
+ break;
+
+ case Dict.TAGTYPE_INLINE :
+ default :
+ // default to inline tag
+ model = (short) (Dict.CM_INLINE | Dict.CM_NO_INDENT | Dict.CM_NEW);
+ tagParser = ParserImpl.INLINE;
+ break;
+ }
+
+ install(new Dict(name, Dict.VERS_PROPRIETARY, model, tagParser, null));
+ }
+
+ /**
+ * return a List containing all the user-defined tag names.
+ * @param tagType one of Dict.TAGTYPE_EMPTY | Dict.TAGTYPE_INLINE |
Dict.TAGTYPE_BLOCK | Dict.TAGTYPE_PRE
+ * @return List containing all the user-defined tag names
+ */
+ List<String> findAllDefinedTag(short tagType)
+ {
+ List<String> tagNames = new ArrayList<String>();
+
+ Iterator<Dict> iterator = tagHashtable.values().iterator();
+ while (iterator.hasNext())
+ {
+ Dict curDictEntry = (Dict) iterator.next();
+
+ if (curDictEntry != null)
+ {
+ switch (tagType)
+ {
+ // defined tags can be empty + inline
+ case Dict.TAGTYPE_EMPTY :
+ if ((curDictEntry.versions == Dict.VERS_PROPRIETARY)
+ && ((curDictEntry.model & Dict.CM_EMPTY) ==
Dict.CM_EMPTY)
+ && // (curDictEntry.parser == ParseBlock) &&
+ (curDictEntry != tagWbr))
+ {
+ tagNames.add(curDictEntry.name);
+ }
+ break;
+
+ // defined tags can be empty + inline
+ case Dict.TAGTYPE_INLINE :
+ if ((curDictEntry.versions == Dict.VERS_PROPRIETARY)
+ && ((curDictEntry.model & Dict.CM_INLINE) ==
Dict.CM_INLINE)
+ && // (curDictEntry.parser == ParseInline)
&&
+ (curDictEntry != tagBlink)
+ && (curDictEntry != tagNobr)
+ && (curDictEntry != tagWbr))
+ {
+ tagNames.add(curDictEntry.name);
+ }
+ break;
+
+ // defined tags can be empty + block
+ case Dict.TAGTYPE_BLOCK :
+ if ((curDictEntry.versions == Dict.VERS_PROPRIETARY)
+ && ((curDictEntry.model & Dict.CM_BLOCK) ==
Dict.CM_BLOCK)
+ && (curDictEntry.getParser() == ParserImpl.BLOCK))
+ {
+ tagNames.add(curDictEntry.name);
+ }
+ break;
+
+ case Dict.TAGTYPE_PRE :
+ if ((curDictEntry.versions == Dict.VERS_PROPRIETARY)
+ && ((curDictEntry.model & Dict.CM_BLOCK) ==
Dict.CM_BLOCK)
+ && (curDictEntry.getParser() == ParserImpl.PRE))
+ {
+ tagNames.add(curDictEntry.name);
+ }
+ break;
+ }
+ }
+ }
+
+ return tagNames;
+ }
+
+ /**
+ * Free node's attributes.
+ * @param node Node
+ */
+ public void freeAttrs(Node node)
+ {
+ while (node.attributes != null)
+ {
+ AttVal av = node.attributes;
+ if ("id".equalsIgnoreCase(av.attribute) ||
"name".equalsIgnoreCase(av.attribute) && isAnchorElement(node))
+ {
+ removeAnchorByNode(node);
+ }
+
+ node.attributes = av.next;
+ }
+ }
+
+ /**
+ * Removes anchor for specific node.
+ * @param node Node
+ */
+ void removeAnchorByNode(Node node)
+ {
+ Anchor delme = null;
+ Anchor found = null;
+ Anchor prev = null;
+ Anchor next = null;
+
+ for (found = anchorList; found != null; found = found.next)
+ {
+ next = found.next;
+
+ if (found.node == node)
+ {
+ if (prev != null)
+ {
+ prev.next = next;
+ }
+ else
+ {
+ anchorList = next;
+ }
+
+ delme = found;
+ }
+ else
+ {
+ prev = found;
+ }
+ }
+ if (delme != null)
+ {
+ delme = null; // freeAnchor
+ }
+ }
+
+ /**
+ * Initialize a new anchor.
+ * @return a new anchor element
+ */
+ Anchor newAnchor()
+ {
+ Anchor a = new Anchor();
+ return a;
+ }
+
+ /**
+ * Adds a new anchor to namespace.
+ * @param name anchor name
+ * @param node destination for this anchor
+ * @return Anchor
+ */
+ Anchor addAnchor(String name, Node node)
+ {
+ Anchor a = newAnchor();
+
+ a.name = name;
+ a.node = node;
+
+ if (anchorList == null)
+ {
+ anchorList = a;
+ }
+ else
+ {
+ Anchor here = anchorList;
+
+ while (here.next != null)
+ {
+ here = here.next;
+ }
+ here.next = a;
+ }
+
+ return anchorList;
+ }
+
+ /**
+ * Return node associated with anchor.
+ * @param name anchor name
+ * @return node associated with anchor
+ */
+ Node getNodeByAnchor(String name)
+ {
+ Anchor found;
+
+ for (found = anchorList; found != null; found = found.next)
+ {
+ if (name.equalsIgnoreCase(found.name))
+ {
+ break;
+ }
+ }
+
+ if (found != null)
+ {
+ return found.node;
+ }
+
+ return null;
+ }
+
+ /**
+ * free all anchors.
+ */
+ void freeAnchors()
+ {
+ anchorList = null;
+ }
+
+}
\ No newline at end of file
Added: branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Tidy.java
===================================================================
--- branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Tidy.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/Tidy.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,2380 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.PrintWriter;
+import java.io.Reader;
+import java.io.Serializable;
+import java.io.Writer;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Properties;
+
+import org.ajax4jsf.io.FastBufferOutputStream;
+import org.ajax4jsf.io.FastBufferWriter;
+
+
+/**
+ * HTML parser and pretty printer.
+ * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org
</a>
+ * @author Andy Quick <a
href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a>
(translation to Java)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public class Tidy implements Serializable
+{
+
+ /**
+ * Serial Version UID to avoid problems during serialization.
+ */
+ static final long serialVersionUID = -2794371560623987718L;
+
+ /**
+ * Alias for configuration options accepted in command line.
+ */
+ private static final Map<String, String> CMDLINE_ALIAS = new HashMap<String,
String>();
+
+ static
+ {
+ CMDLINE_ALIAS.put("xml", "input-xml");
+ CMDLINE_ALIAS.put("xml", "output-xhtml");
+ CMDLINE_ALIAS.put("asxml", "output-xhtml");
+ CMDLINE_ALIAS.put("ashtml", "output-html");
+ CMDLINE_ALIAS.put("omit", "hide-endtags");
+ CMDLINE_ALIAS.put("upper", "uppercase-tags");
+ CMDLINE_ALIAS.put("raw", "output-raw");
+ CMDLINE_ALIAS.put("numeric", "numeric-entities");
+ CMDLINE_ALIAS.put("change", "write-back");
+ CMDLINE_ALIAS.put("update", "write-back");
+ CMDLINE_ALIAS.put("modify", "write-back");
+ CMDLINE_ALIAS.put("errors", "only-errors");
+ CMDLINE_ALIAS.put("slides", "split");
+ CMDLINE_ALIAS.put("lang", "language");
+ CMDLINE_ALIAS.put("w", "wrap");
+ CMDLINE_ALIAS.put("file", "error-file");
+ CMDLINE_ALIAS.put("f", "error-file");
+ }
+
+ /**
+ * Error output stream.
+ */
+ private PrintWriter errout;
+
+ private PrintWriter stderr;
+
+ private Configuration configuration;
+
+ private String inputStreamName = "InputStream";
+
+ private int parseErrors;
+
+ private int parseWarnings;
+
+ private Report report;
+
+ /**
+ * Instantiates a new Tidy instance. It's reccomended that a new instance is used
at each parsing.
+ */
+ public Tidy()
+ {
+ this.report = new Report();
+ configuration = new Configuration(this.report);
+ if (configuration == null)
+ {
+ return;
+ }
+
+ AttributeTable at = AttributeTable.getDefaultAttributeTable();
+ if (at == null)
+ {
+ return;
+ }
+ TagTable tt = new TagTable();
+ if (tt == null)
+ {
+ return;
+ }
+ tt.setConfiguration(configuration);
+ configuration.tt = tt;
+ EntityTable et = EntityTable.getDefaultEntityTable();
+ if (et == null)
+ {
+ return;
+ }
+
+ configuration.errfile = null;
+ stderr = new PrintWriter(System.err, true);
+ errout = stderr;
+ }
+
+ /**
+ * Returns the actual configuration
+ * @return tidy configuration
+ */
+ public Configuration getConfiguration()
+ {
+ return configuration;
+ }
+
+ public PrintWriter getStderr()
+ {
+ return stderr;
+ }
+
+ /**
+ * ParseErrors - the number of errors that occurred in the most recent parse
operation.
+ * @return number of errors that occurred in the most recent parse operation.
+ */
+ public int getParseErrors()
+ {
+ return parseErrors;
+ }
+
+ /**
+ * ParseWarnings - the number of warnings that occurred in the most recent parse
operation.
+ * @return number of warnings that occurred in the most recent parse operation.
+ */
+ public int getParseWarnings()
+ {
+ return parseWarnings;
+ }
+
+ /**
+ * InputStreamName - the name of the input stream (printed in the header
information).
+ * @param name input stream name
+ */
+ public void setInputStreamName(String name)
+ {
+ if (name != null)
+ {
+ inputStreamName = name;
+ }
+ }
+
+ public String getInputStreamName()
+ {
+ return inputStreamName;
+ }
+
+ /**
+ * Errout - the error output stream.
+ * @return error output stream.
+ */
+ public PrintWriter getErrout()
+ {
+ return errout;
+ }
+
+ public void setErrout(PrintWriter out)
+ {
+ this.errout = out;
+ }
+
+ /**
+ * Sets the configuration from a configuration file.
+ * @param filename configuration file name/path.
+ */
+ public void setConfigurationFromFile(String filename)
+ {
+ configuration.parseFile(filename);
+ }
+
+ /**
+ * Sets the configuration from a properties object.
+ * @param props Properties object
+ */
+ public void setConfigurationFromProps(Properties props)
+ {
+ configuration.addProps(props);
+ }
+
+ /**
+ * Parses InputStream in and returns the root Node. If out is non-null, pretty prints
to OutputStream out.
+ * @param in input content Possible {@link InputStream} , {@link Reader} or {@link
String}.
+ * @param out optional output stream
+ * @return parsed org.w3c.tidy.Node
+ */
+ public Node parse(Object in, OutputStream out)
+ {
+ Node document = null;
+
+ try
+ {
+ document = parse(in, null, out);
+ }
+ catch (FileNotFoundException fnfe)
+ {
+ // ignore
+ }
+ catch (IOException e)
+ {
+ // ignore
+ }
+
+ return document;
+ }
+
+ /**
+ * Internal routine that actually does the parsing. The caller can pass either an
InputStream or file name. If both
+ * are passed, the file name is preferred.
+ * @param in input content (used only if <code>file</code> is null)
Possible {@link InputStream} , {@link Reader} or {@link String}.
+ * @param file file name
+ * @param out output stream
+ * @return parsed org.w3c.tidy.Node
+ * @throws FileNotFoundException if <code>file</code> is not null but it
can't be found
+ * @throws IOException for errors in reading input stream or file
+ */
+ private Node parse(Object in, String file, OutputStream out) throws
FileNotFoundException, IOException
+ {
+ Lexer lexer;
+ Node document = null;
+ Node doctype;
+ PPrint pprint;
+
+ if (errout == null)
+ {
+ return null;
+ }
+
+ parseErrors = 0;
+ parseWarnings = 0;
+
+ // ensure config is self-consistent
+ configuration.adjust();
+
+ if (file != null)
+ {
+ in = new FileInputStream(file);
+ inputStreamName = file;
+ }
+ else if (in == null)
+ {
+ in = System.in;
+ inputStreamName = "stdin";
+ }
+
+ if (in != null)
+ {
+
+ StreamIn streamIn = StreamInFactory.getStreamIn(configuration, in);
+
+ lexer = new Lexer(streamIn, configuration, this.report);
+ lexer.errout = errout;
+
+ // store pointer to lexer in input stream to allow character encoding errors
to be reported
+ streamIn.setLexer(lexer);
+
+ this.report.setFilename(inputStreamName); // #431895 - fix by Dave Bryan 04
Jan 01
+
+ if (!configuration.quiet)
+ {
+ this.report.helloMessage(errout);
+ }
+
+ // skip byte order mark
+
+ // if (lexer.configuration.getInCharEncoding() ==
Configuration.UTF8
+ // || lexer.configuration.getInCharEncoding() ==
Configuration.UTF16LE
+ // || lexer.configuration.getInCharEncoding() ==
Configuration.UTF16BE
+ // || lexer.configuration.getInCharEncoding() ==
Configuration.UTF16)
+ // {
+ // int c = lexer.in.readChar();
+ // if (c != EncodingUtils.UNICODE_BOM)
+ // {
+ // lexer.in.ungetChar(c);
+ // }
+ // }
+
+ // Tidy doesn't alter the doctype for generic XML docs
+ if (configuration.xmlTags)
+ {
+ document = ParserImpl.parseXMLDocument(lexer);
+ if (!document.checkNodeIntegrity())
+ {
+ if (!configuration.quiet)
+ {
+ report.badTree(errout);
+ }
+ return null;
+ }
+ }
+ else
+ {
+ lexer.warnings = 0;
+
+ document = ParserImpl.parseDocument(lexer);
+
+ if (!document.checkNodeIntegrity())
+ {
+ if (!configuration.quiet)
+ {
+ this.report.badTree(errout);
+ }
+ return null;
+ }
+
+ Clean cleaner = new Clean(configuration.tt);
+
+ // simplifies <b><b> ... </b> ... </b> etc.
+ cleaner.nestedEmphasis(document);
+
+ // cleans up <dir> indented text </dir> etc.
+ cleaner.list2BQ(document);
+ cleaner.bQ2Div(document);
+
+ // replaces i by em and b by strong
+ if (configuration.logicalEmphasis)
+ {
+ cleaner.emFromI(document);
+ }
+
+ if (configuration.word2000 && cleaner.isWord2000(document))
+ {
+ // prune Word2000's <![if ...]> ... <![endif]>
+ cleaner.dropSections(lexer, document);
+
+ // drop style & class attributes and empty p, span elements
+ cleaner.cleanWord2000(lexer, document);
+ }
+
+ // replaces presentational markup by style rules
+ if (configuration.makeClean || configuration.dropFontTags)
+ {
+ cleaner.cleanTree(lexer, document);
+ }
+
+ if (!document.checkNodeIntegrity())
+ {
+ this.report.badTree(errout);
+ return null;
+ }
+
+ doctype = document.findDocType();
+
+ // remember given doctype
+ if (doctype != null)
+ {
+ doctype = (Node) doctype.clone();
+ }
+
+ if (document.content != null)
+ {
+ if (configuration.xHTML)
+ {
+ lexer.setXHTMLDocType(document);
+ }
+ else
+ {
+ lexer.fixDocType(document);
+ }
+
+ if (configuration.tidyMark)
+ {
+ lexer.addGenerator(document);
+ }
+ }
+
+ // ensure presence of initial <?XML version="1.0"?>
+ if (configuration.xmlOut && configuration.xmlPi)
+ {
+ lexer.fixXmlDecl(document);
+ }
+
+ if (!configuration.quiet && document.content != null)
+ {
+ this.report.reportVersion(errout, lexer, inputStreamName, doctype);
+ }
+ }
+
+ // Try to close the InputStream but only if if we created it.
+ if ((file != null) && (in != System.in))
+ {
+ try
+ {
+ ((FileInputStream) in).close();
+ }
+ catch (IOException e)
+ {
+ // ignore
+ }
+ }
+
+ if (!configuration.quiet)
+ {
+ parseWarnings = lexer.warnings;
+ parseErrors = lexer.errors;
+ this.report.reportNumWarnings(errout, lexer);
+ }
+
+ if (!configuration.quiet && lexer.errors > 0 &&
!configuration.forceOutput)
+ {
+ this.report.needsAuthorIntervention(errout);
+ }
+
+ if (!configuration.onlyErrors && (lexer.errors == 0 ||
configuration.forceOutput))
+ {
+ if (configuration.burstSlides)
+ {
+ Node body;
+
+ body = null;
+ // remove doctype to avoid potential clash with markup introduced
when bursting into slides
+
+ // discard the document type
+ doctype = document.findDocType();
+
+ if (doctype != null)
+ {
+ Node.discardElement(doctype);
+ }
+
+ /* slides use transitional features */
+ lexer.versions |= Dict.VERS_HTML40_LOOSE;
+
+ // and patch up doctype to match
+ if (configuration.xHTML)
+ {
+ lexer.setXHTMLDocType(document);
+ }
+ else
+ {
+ lexer.fixDocType(document);
+ }
+
+ // find the body element which may be implicit
+ body = document.findBody(configuration.tt);
+
+ if (body != null)
+ {
+ pprint = new PPrint(configuration);
+ if (!configuration.quiet)
+ {
+ this.report.reportNumberOfSlides(errout,
pprint.countSlides(body));
+ }
+ pprint.createSlides(lexer, document);
+ }
+ else if (!configuration.quiet)
+ {
+ this.report.missingBody(errout);
+ }
+ }
+ else if (configuration.writeback && (file != null))
+ {
+ try
+ {
+ pprint = new PPrint(configuration);
+ FileOutputStream fis = new FileOutputStream(file);
+
+ Out o = OutFactory.getOut(this.configuration, fis);
+
+ if (document.findDocType() == null)
+ {
+ // only use numeric character references if no doctype could
be determined (e.g., because
+ // the document contains proprietary features) to ensure
well-formedness.
+ configuration.numEntities = true;
+ }
+ if (configuration.bodyOnly)
+ {
+ // Feature request #434940 - fix by Dave Raggett/Ignacio
Vazquez-Abrams 21 Jun 01
+ pprint.printBody(o, lexer, document, configuration.xmlOut);
+ }
+ else if (configuration.xmlOut && !configuration.xHTML)
+ {
+ pprint.printXMLTree(o, (short) 0, 0, lexer, document);
+ }
+ else
+ {
+ pprint.printTree(o, (short) 0, 0, lexer, document);
+ }
+
+ pprint.flushLine(o, 0);
+ o.close();
+ }
+ catch (IOException e)
+ {
+ errout.println(file + e.toString());
+ }
+ }
+ else if (out != null)
+ {
+ pprint = new PPrint(configuration);
+
+ Out o = OutFactory.getOut(this.configuration, out); // normal output
stream
+
+ if (document.findDocType() == null)
+ {
+ // only use numeric character references if no doctype could be
determined (e.g., because
+ // the document contains proprietary features) to ensure
well-formedness.
+ configuration.numEntities = true;
+ }
+ if (configuration.bodyOnly)
+ {
+ // Feature request #434940 - fix by Dave Raggett/Ignacio
Vazquez-Abrams 21 Jun 01
+ pprint.printBody(o, lexer, document, configuration.xmlOut);
+ }
+ else if (configuration.xmlOut && !configuration.xHTML)
+ {
+ pprint.printXMLTree(o, (short) 0, 0, lexer, document);
+ }
+ else
+ {
+ pprint.printTree(o, (short) 0, 0, lexer, document);
+ }
+
+ pprint.flushLine(o, 0);
+ o.close();
+ }
+
+ }
+
+ if (!configuration.quiet)
+ {
+ this.report.errorSummary(lexer);
+ }
+ }
+ return document;
+ }
+
+ /**
+ * Parses InputStream in and returns a DOM Document node. If out is non-null, pretty
prints to OutputStream out.
+ * @param in input stream. Possible {@link InputStream} , {@link Reader} or {@link
String}.
+ * @param out optional output stream
+ * @return parsed org.w3c.dom.Document
+ */
+ public org.w3c.dom.Document parseDOM(Object in, OutputStream out)
+ {
+ Node document = parse(in, out);
+ if (document != null)
+ {
+ return (org.w3c.dom.Document) document.getAdapter();
+ }
+ return null;
+ }
+
+ /**
+ * Creates an empty DOM Document.
+ * @return a new org.w3c.dom.Document
+ */
+ public static org.w3c.dom.Document createEmptyDocument()
+ {
+ Node document = new Node(Node.ROOT_NODE, new byte[0], 0, 0);
+ Node node = new Node(Node.START_TAG, new byte[0], 0, 0, "html", new
TagTable());
+ if (document != null && node != null)
+ {
+ document.insertNodeAtStart(node);
+ return (org.w3c.dom.Document) document.getAdapter();
+ }
+
+ return null;
+ }
+
+ /**
+ * Pretty-prints a DOM Document. Must be an instance of
org.w3c.tidy.DOMDocumentImpl.
+ * @param doc org.w3c.dom.Document
+ * @param out output stream
+ * @throws IOException
+ */
+ public void pprint(org.w3c.dom.Document doc, OutputStream out) throws IOException
+ {
+ if (!(doc instanceof DOMDocumentImpl))
+ {
+ // @todo should we inform users that tidy can't print a generic Document
or change the method signature?
+ return;
+ }
+
+ if (null != out) {
+ pprint(((DOMDocumentImpl) doc).adaptee, OutFactory.getOut(
+ this.configuration, out));
+ }
+ }
+
+ /**
+ * Pretty-prints a DOM Document. Must be an instance of
org.w3c.tidy.DOMDocumentImpl.
+ * @param doc org.w3c.dom.Document
+ * @param out output stream
+ * @throws IOException
+ */
+ public void pprint(org.w3c.dom.Document doc, Writer out) throws IOException
+ {
+ if (!(doc instanceof DOMDocumentImpl))
+ {
+ // @todo should we inform users that tidy can't print a generic Document
or change the method signature?
+ return;
+ }
+
+ if (null != out) {
+ if(!(out instanceof FastBufferWriter)) {
+ FastBufferWriter bout = new FastBufferWriter();
+ pprint(((DOMDocumentImpl) doc).adaptee, OutFactory.getOut(
+ this.configuration, bout));
+ bout.writeTo(out);
+ } else {
+ pprint(((DOMDocumentImpl) doc).adaptee, OutFactory.getOut(
+ this.configuration, out));
+ }
+ }
+ }
+
+ /**
+ * Pretty-prints a DOM Node.
+ * @param node org.w3c.dom.Node. Must be an instance of org.w3c.tidy.DOMNodeImpl.
+ * @param out output stream
+ * @throws IOException
+ */
+ public void pprint(org.w3c.dom.Node node, OutputStream out) throws IOException
+ {
+ if (!(node instanceof DOMNodeImpl))
+ {
+ // @todo should we inform users than tidy can't print a generic Node or
change the method signature?
+ return;
+ }
+ if (null != out) {
+ if(!(out instanceof FastBufferOutputStream)) {
+ FastBufferOutputStream bout = new FastBufferOutputStream();
+ pprint(((DOMNodeImpl) node).adaptee, OutFactory.getOut(
+ this.configuration, bout));
+ bout.writeTo(out);
+ } else {
+ pprint(((DOMNodeImpl) node).adaptee, OutFactory.getOut(
+ this.configuration, out));
+ }
+ }
+ }
+
+ /**
+ * Pretty-prints a tidy Node.
+ * @param node org.w3c.tidy.Node
+ * @param out output stream
+ * @throws IOException
+ */
+ private void pprint(Node node, Out o) throws IOException
+ {
+
+ Lexer lexer = new Lexer(null, this.configuration, this.report);
+
+ PPrint pprint = new PPrint(configuration);
+
+ if (configuration.xmlTags)
+ {
+ pprint.printXMLTree(o, (short) 0, 0, lexer, node);
+ }
+ else
+ {
+ pprint.printTree(o, (short) 0, 0, lexer, node);
+ }
+
+ pprint.flushLine(o, 0);
+ o.close();
+ }
+
+ /**
+ * Command line interface to parser and pretty printer.
+ * @param argv command line parameters
+ */
+ public static void main(String[] argv)
+ {
+ Tidy tidy = new Tidy();
+ int returnCode = tidy.mainExec(argv);
+ System.exit(returnCode);
+ }
+
+ /**
+ * Main method, but returns the return code as an int instead of calling
System.exit(code). Needed for testing main
+ * method without shutting down tests.
+ * @param argv command line parameters
+ * @return return code
+ */
+ protected int mainExec(String[] argv)
+ {
+ String file;
+ int argCount = argv.length;
+ int argIndex = 0;
+
+ // read command line
+ Properties properties = new Properties();
+
+ while (argCount > 0)
+ {
+ if (argv[argIndex].startsWith("-"))
+ {
+ // support -foo and --foo
+ String argName = argv[argIndex].toLowerCase();
+ while (argName.length() > 0 && argName.charAt(0) ==
'-')
+ {
+ argName = argName.substring(1);
+ }
+
+ // "exclusive" options
+ if (argName.equals("help") || argName.equals("h") ||
argName.equals("?"))
+ {
+ this.report.helpText(new PrintWriter(System.out, true));
+ return 0;
+ }
+ else if (argName.equals("help-config"))
+ {
+ configuration.printConfigOptions(new PrintWriter(System.out, true),
false);
+ return 0;
+ }
+ else if (argName.equals("show-config"))
+ {
+ configuration.adjust(); // ensure config is self-consistent
+ configuration.printConfigOptions(errout, true);
+ return 0;
+ }
+ else if (argName.equals("version") ||
argName.equals("v"))
+ {
+ this.report.showVersion(errout);
+ return 0;
+ }
+
+ // optional value for non boolean options
+ String argValue = null;
+ if (argCount > 2 && !argv[argIndex +
1].startsWith("-"))
+ {
+ argValue = argv[argIndex + 1];
+ --argCount;
+ ++argIndex;
+ }
+
+ // handle "special" aliases
+ String alias = (String) CMDLINE_ALIAS.get(argName);
+ if (alias != null)
+ {
+ argName = alias;
+ }
+
+ if (Configuration.isKnownOption(argName)) // handle any standard config
option
+ {
+ properties.setProperty(argName, (argValue == null ? "" :
argName));
+ }
+ else if (argName.equals("config")) // parse a property file
+ {
+ if (argValue != null)
+ {
+ configuration.parseFile(argValue);
+ }
+ }
+ else if (TidyUtils.isCharEncodingSupported(argName)) // handle any
encoding name
+ {
+ properties.setProperty("char-encoding", argName);
+ }
+ else
+ {
+
+ for (int i = 0; i < argName.length(); i++)
+ {
+ switch (argName.charAt(i))
+ {
+ case 'i' :
+ configuration.indentContent = true;
+ configuration.smartIndent = true;
+ break;
+
+ case 'o' :
+ configuration.hideEndTags = true;
+ break;
+
+ case 'u' :
+ configuration.upperCaseTags = true;
+ break;
+
+ case 'c' :
+ configuration.makeClean = true;
+ break;
+
+ case 'b' :
+ configuration.makeBare = true;
+ break;
+
+ case 'n' :
+ configuration.numEntities = true;
+ break;
+
+ case 'm' :
+ configuration.writeback = true;
+ break;
+
+ case 'e' :
+ configuration.onlyErrors = true;
+ break;
+
+ case 'q' :
+ configuration.quiet = true;
+ break;
+
+ default :
+ this.report.unknownOption(this.errout,
argName.charAt(i));
+ break;
+ }
+ }
+ }
+
+ --argCount;
+ ++argIndex;
+ continue;
+ }
+
+ configuration.addProps(properties);
+
+ // ensure config is self-consistent
+ configuration.adjust();
+
+ // user specified error file
+ if (configuration.errfile != null)
+ {
+
+ String errorfile = "stderr";
+
+ // is it same as the currently opened file?
+ if (!configuration.errfile.equals(errorfile))
+ {
+ // no so close previous error file
+
+ if (this.errout != this.stderr)
+ {
+ this.errout.close();
+ }
+
+ // and try to open the new error file
+ try
+ {
+ this.setErrout(new PrintWriter(new
FileWriter(configuration.errfile), true));
+ errorfile = configuration.errfile;
+ }
+ catch (IOException e)
+ {
+ // can't be opened so fall back to stderr
+ errorfile = "stderr";
+ this.setErrout(stderr);
+ }
+ }
+ }
+
+ if (argCount > 0)
+ {
+ file = argv[argIndex];
+ }
+ else
+ {
+ file = "stdin";
+ }
+
+ try
+ {
+ parse(null, file, System.out);
+ }
+ catch (FileNotFoundException fnfe)
+ {
+ this.report.unknownFile(this.errout, file);
+ }
+ catch (IOException ioe)
+ {
+ this.report.unknownFile(this.errout, file);
+ }
+
+ --argCount;
+ ++argIndex;
+
+ if (argCount <= 0)
+ {
+ break;
+ }
+ }
+
+ if (this.parseErrors + this.parseWarnings > 0 &&
!configuration.quiet)
+ {
+ this.report.generalInfo(this.errout);
+ }
+
+ if (this.errout != this.stderr)
+ {
+ this.errout.close();
+ }
+
+ // return status can be used by scripts
+ if (this.parseErrors > 0)
+ {
+ return 2;
+ }
+
+ if (this.parseWarnings > 0)
+ {
+ return 1;
+ }
+
+ // 0 means all is ok
+ return 0;
+ }
+
+ /**
+ * Attach a TidyMessageListener which will be notified for messages and errors.
+ * @param listener TidyMessageListener implementation
+ */
+ public void setMessageListener(TidyMessageListener listener)
+ {
+ this.report.addMessageListener(listener);
+ }
+
+ /**
+ * <code>indent-spaces</code>- default indentation.
+ * @param spaces number of spaces used for indentation
+ * @see Configuration#spaces
+ */
+ public void setSpaces(int spaces)
+ {
+ configuration.spaces = spaces;
+ }
+
+ /**
+ * <code>indent-spaces</code>- default indentation.
+ * @return number of spaces used for indentation
+ * @see Configuration#spaces
+ */
+ public int getSpaces()
+ {
+ return configuration.spaces;
+ }
+
+ /**
+ * <code>wrap</code>- default wrap margin.
+ * @param wraplen default wrap margin
+ * @see Configuration#wraplen
+ */
+ public void setWraplen(int wraplen)
+ {
+ configuration.wraplen = wraplen;
+ }
+
+ /**
+ * <code>wrap</code>- default wrap margin.
+ * @return default wrap margin
+ * @see Configuration#wraplen
+ */
+ public int getWraplen()
+ {
+ return configuration.wraplen;
+ }
+
+ /**
+ * <code>tab-size</code>- tab size in chars.
+ * @param tabsize tab size in chars
+ * @see Configuration#tabsize
+ */
+ public void setTabsize(int tabsize)
+ {
+ configuration.tabsize = tabsize;
+ }
+
+ /**
+ * <code>tab-size</code>- tab size in chars.
+ * @return tab size in chars
+ * @see Configuration#tabsize
+ */
+ public int getTabsize()
+ {
+ return configuration.tabsize;
+ }
+
+ /**
+ * Errfile - file name to write errors to.
+ * @param errfile file name to write errors to
+ * @see Configuration#errfile
+ */
+ public void setErrfile(String errfile)
+ {
+ configuration.errfile = errfile;
+ }
+
+ /**
+ * Errfile - file name to write errors to.
+ * @return error file name
+ * @see Configuration#errfile
+ */
+ public String getErrfile()
+ {
+ return configuration.errfile;
+ }
+
+ /**
+ * writeback - if true then output tidied markup. NOTE: this property is ignored when
parsing from an InputStream.
+ * @param writeback <code>true</code>= output tidied markup
+ * @see Configuration#writeback
+ */
+ public void setWriteback(boolean writeback)
+ {
+ configuration.writeback = writeback;
+ }
+
+ /**
+ * writeback - if true then output tidied markup. NOTE: this property is ignored when
parsing from an InputStream.
+ * @return <code>true</code> if tidy will output tidied markup in input
file
+ * @see Configuration#writeback
+ */
+ public boolean getWriteback()
+ {
+ return configuration.writeback;
+ }
+
+ /**
+ * only-errors - if true normal output is suppressed.
+ * @param onlyErrors if <code>true</code> normal output is suppressed.
+ * @see Configuration#onlyErrors
+ */
+ public void setOnlyErrors(boolean onlyErrors)
+ {
+ configuration.onlyErrors = onlyErrors;
+ }
+
+ /**
+ * only-errors - if true normal output is suppressed.
+ * @return <code>true</code> if normal output is suppressed.
+ * @see Configuration#onlyErrors
+ */
+ public boolean getOnlyErrors()
+ {
+ return configuration.onlyErrors;
+ }
+
+ /**
+ * show-warnings - show warnings? (errors are always shown).
+ * @param showWarnings if <code>false</code> warnings are not shown
+ * @see Configuration#showWarnings
+ */
+ public void setShowWarnings(boolean showWarnings)
+ {
+ configuration.showWarnings = showWarnings;
+ }
+
+ /**
+ * show-warnings - show warnings? (errors are always shown).
+ * @return <code>false</code> if warnings are not shown
+ * @see Configuration#showWarnings
+ */
+ public boolean getShowWarnings()
+ {
+ return configuration.showWarnings;
+ }
+
+ /**
+ * quiet - no 'Parsing X', guessed DTD or summary.
+ * @param quiet <code>true</code>= don't output summary, warnings or
errors
+ * @see Configuration#quiet
+ */
+ public void setQuiet(boolean quiet)
+ {
+ configuration.quiet = quiet;
+ }
+
+ /**
+ * quiet - no 'Parsing X', guessed DTD or summary.
+ * @return <code>true</code> if tidy will not output summary, warnings or
errors
+ * @see Configuration#quiet
+ */
+ public boolean getQuiet()
+ {
+ return configuration.quiet;
+ }
+
+ /**
+ * indent - indent content of appropriate tags.
+ * @param indentContent indent content of appropriate tags
+ * @see Configuration#indentContent
+ */
+ public void setIndentContent(boolean indentContent)
+ {
+ configuration.indentContent = indentContent;
+ }
+
+ /**
+ * indent - indent content of appropriate tags.
+ * @return <code>true</code> if tidy will indent content of appropriate
tags
+ * @see Configuration#indentContent
+ */
+ public boolean getIndentContent()
+ {
+ return configuration.indentContent;
+ }
+
+ /**
+ * SmartIndent - does text/block level content effect indentation.
+ * @param smartIndent <code>true</code> if text/block level content
should effect indentation
+ * @see Configuration#smartIndent
+ */
+ public void setSmartIndent(boolean smartIndent)
+ {
+ configuration.smartIndent = smartIndent;
+ }
+
+ /**
+ * SmartIndent - does text/block level content effect indentation.
+ * @return <code>true</code> if text/block level content should effect
indentation
+ * @see Configuration#smartIndent
+ */
+ public boolean getSmartIndent()
+ {
+ return configuration.smartIndent;
+ }
+
+ /**
+ * hide-endtags - suppress optional end tags.
+ * @param hideEndTags <code>true</code>= suppress optional end tags
+ * @see Configuration#hideEndTags
+ */
+ public void setHideEndTags(boolean hideEndTags)
+ {
+ configuration.hideEndTags = hideEndTags;
+ }
+
+ /**
+ * hide-endtags - suppress optional end tags.
+ * @return <code>true</code> if tidy will suppress optional end tags
+ * @see Configuration#hideEndTags
+ */
+ public boolean getHideEndTags()
+ {
+ return configuration.hideEndTags;
+ }
+
+ /**
+ * input-xml - treat input as XML.
+ * @param xmlTags <code>true</code> if tidy should treat input as XML
+ * @see Configuration#xmlTags
+ */
+ public void setXmlTags(boolean xmlTags)
+ {
+ configuration.xmlTags = xmlTags;
+ }
+
+ /**
+ * input-xml - treat input as XML.
+ * @return <code>true</code> if tidy will treat input as XML
+ * @see Configuration#xmlTags
+ */
+ public boolean getXmlTags()
+ {
+ return configuration.xmlTags;
+ }
+
+ /**
+ * output-xml - create output as XML.
+ * @param xmlOut <code>true</code> if tidy should create output as xml
+ * @see Configuration#xmlOut
+ */
+ public void setXmlOut(boolean xmlOut)
+ {
+ configuration.xmlOut = xmlOut;
+ }
+
+ /**
+ * output-xml - create output as XML.
+ * @return <code>true</code> if tidy will create output as xml
+ * @see Configuration#xmlOut
+ */
+ public boolean getXmlOut()
+ {
+ return configuration.xmlOut;
+ }
+
+ /**
+ * output-xhtml - output extensible HTML.
+ * @param xhtml <code>true</code> if tidy should output XHTML
+ * @see Configuration#xHTML
+ */
+ public void setXHTML(boolean xhtml)
+ {
+ configuration.xHTML = xhtml;
+ }
+
+ /**
+ * output-xhtml - output extensible HTML.
+ * @return <code>true</code> if tidy will output XHTML
+ * @see Configuration#xHTML
+ */
+ public boolean getXHTML()
+ {
+ return configuration.xHTML;
+ }
+
+ /**
+ * uppercase-tags - output tags in upper case.
+ * @param upperCaseTags <code>true</code> if tidy should output tags in
upper case (default is lowercase)
+ * @see Configuration#upperCaseTags
+ */
+ public void setUpperCaseTags(boolean upperCaseTags)
+ {
+ configuration.upperCaseTags = upperCaseTags;
+ }
+
+ /**
+ * uppercase-tags - output tags in upper case.
+ * @return <code>true</code> if tidy should will tags in upper case
+ * @see Configuration#upperCaseTags
+ */
+ public boolean getUpperCaseTags()
+ {
+ return configuration.upperCaseTags;
+ }
+
+ /**
+ * uppercase-attributes - output attributes in upper case.
+ * @param upperCaseAttrs <code>true</code> if tidy should output
attributes in upper case (default is lowercase)
+ * @see Configuration#upperCaseAttrs
+ */
+ public void setUpperCaseAttrs(boolean upperCaseAttrs)
+ {
+ configuration.upperCaseAttrs = upperCaseAttrs;
+ }
+
+ /**
+ * uppercase-attributes - output attributes in upper case.
+ * @return <code>true</code> if tidy should will attributes in upper
case
+ * @see Configuration#upperCaseAttrs
+ */
+ public boolean getUpperCaseAttrs()
+ {
+ return configuration.upperCaseAttrs;
+ }
+
+ /**
+ * make-clean - remove presentational clutter.
+ * @param makeClean true to remove presentational clutter
+ * @see Configuration#makeClean
+ */
+ public void setMakeClean(boolean makeClean)
+ {
+ configuration.makeClean = makeClean;
+ }
+
+ /**
+ * make-clean - remove presentational clutter.
+ * @return true if tidy will remove presentational clutter
+ * @see Configuration#makeClean
+ */
+ public boolean getMakeClean()
+ {
+ return configuration.makeClean;
+ }
+
+ /**
+ * make-bare - remove Microsoft cruft.
+ * @param makeBare true to remove Microsoft cruft
+ * @see Configuration#makeBare
+ */
+ public void setMakeBare(boolean makeBare)
+ {
+ configuration.makeBare = makeBare;
+ }
+
+ /**
+ * make-clean - remove Microsoft cruft.
+ * @return true if tidy will remove Microsoft cruft
+ * @see Configuration#makeBare
+ */
+ public boolean getMakeBare()
+ {
+ return configuration.makeBare;
+ }
+
+ /**
+ * break-before-br - output newline before <br>.
+ * @param breakBeforeBR <code>true</code> if tidy should output a newline
before <br>
+ * @see Configuration#breakBeforeBR
+ */
+ public void setBreakBeforeBR(boolean breakBeforeBR)
+ {
+ configuration.breakBeforeBR = breakBeforeBR;
+ }
+
+ /**
+ * break-before-br - output newline before <br>.
+ * @return <code>true</code> if tidy will output a newline before
<br>
+ * @see Configuration#breakBeforeBR
+ */
+ public boolean getBreakBeforeBR()
+ {
+ return configuration.breakBeforeBR;
+ }
+
+ /**
+ * <code>split</code>- create slides on each h2 element.
+ * @param burstSlides <code>true</code> if tidy should create slides on
each h2 element
+ * @see Configuration#burstSlides
+ */
+ public void setBurstSlides(boolean burstSlides)
+ {
+ configuration.burstSlides = burstSlides;
+ }
+
+ /**
+ * <code>split</code>- create slides on each h2 element.
+ * @return <code>true</code> if tidy will create slides on each h2
element
+ * @see Configuration#burstSlides
+ */
+ public boolean getBurstSlides()
+ {
+ return configuration.burstSlides;
+ }
+
+ /**
+ * <code>numeric-entities</code>- output entities other than the built-in
HTML entities in the numeric rather
+ * than the named entity form.
+ * @param numEntities <code>true</code> if tidy should output entities in
the numeric form.
+ * @see Configuration#numEntities
+ */
+ public void setNumEntities(boolean numEntities)
+ {
+ configuration.numEntities = numEntities;
+ }
+
+ /**
+ * <code>numeric-entities</code>- output entities other than the built-in
HTML entities in the numeric rather
+ * than the named entity form.
+ * @return <code>true</code> if tidy will output entities in the numeric
form.
+ * @see Configuration#numEntities
+ */
+ public boolean getNumEntities()
+ {
+ return configuration.numEntities;
+ }
+
+ /**
+ * <code>quote-marks</code>- output " marks as &quot;.
+ * @param quoteMarks <code>true</code> if tidy should output " marks
as &quot;
+ * @see Configuration#quoteMarks
+ */
+ public void setQuoteMarks(boolean quoteMarks)
+ {
+ configuration.quoteMarks = quoteMarks;
+ }
+
+ /**
+ * <code>quote-marks</code>- output " marks as &quot;.
+ * @return <code>true</code> if tidy will output " marks as
&quot;
+ * @see Configuration#quoteMarks
+ */
+ public boolean getQuoteMarks()
+ {
+ return configuration.quoteMarks;
+ }
+
+ /**
+ * <code>quote-nbsp</code>- output non-breaking space as entity.
+ * @param quoteNbsp <code>true</code> if tidy should output non-breaking
space as entity
+ * @see Configuration#quoteNbsp
+ */
+ public void setQuoteNbsp(boolean quoteNbsp)
+ {
+ configuration.quoteNbsp = quoteNbsp;
+ }
+
+ /**
+ * <code>quote-nbsp</code>- output non-breaking space as entity.
+ * @return <code>true</code> if tidy will output non-breaking space as
entity
+ * @see Configuration#quoteNbsp
+ */
+ public boolean getQuoteNbsp()
+ {
+ return configuration.quoteNbsp;
+ }
+
+ /**
+ * <code>quote-ampersand</code>- output naked ampersand as &.
+ * @param quoteAmpersand <code>true</code> if tidy should output naked
ampersand as &
+ * @see Configuration#quoteAmpersand
+ */
+ public void setQuoteAmpersand(boolean quoteAmpersand)
+ {
+ configuration.quoteAmpersand = quoteAmpersand;
+ }
+
+ /**
+ * <code>quote-ampersand</code>- output naked ampersand as &.
+ * @return <code>true</code> if tidy will output naked ampersand as
&
+ * @see Configuration#quoteAmpersand
+ */
+ public boolean getQuoteAmpersand()
+ {
+ return configuration.quoteAmpersand;
+ }
+
+ /**
+ * <code>wrap-attributes</code>- wrap within attribute values.
+ * @param wrapAttVals <code>true</code> if tidy should wrap within
attribute values
+ * @see Configuration#wrapAttVals
+ */
+ public void setWrapAttVals(boolean wrapAttVals)
+ {
+ configuration.wrapAttVals = wrapAttVals;
+ }
+
+ /**
+ * <code>wrap-attributes</code>- wrap within attribute values.
+ * @return <code>true</code> if tidy will wrap within attribute values
+ * @see Configuration#wrapAttVals
+ */
+ public boolean getWrapAttVals()
+ {
+ return configuration.wrapAttVals;
+ }
+
+ /**
+ * <code>wrap-script-literals</code>- wrap within JavaScript string
literals.
+ * @param wrapScriptlets <code>true</code> if tidy should wrap within
JavaScript string literals
+ * @see Configuration#wrapScriptlets
+ */
+ public void setWrapScriptlets(boolean wrapScriptlets)
+ {
+ configuration.wrapScriptlets = wrapScriptlets;
+ }
+
+ /**
+ * <code>wrap-script-literals</code>- wrap within JavaScript string
literals.
+ * @return <code>true</code> if tidy will wrap within JavaScript string
literals
+ * @see Configuration#wrapScriptlets
+ */
+ public boolean getWrapScriptlets()
+ {
+ return configuration.wrapScriptlets;
+ }
+
+ /**
+ * <code>wrap-sections</code>- wrap within <![ ... ]>
section tags
+ * @param wrapSection <code>true</code> if tidy should wrap within
<![ ... ]> section tags
+ * @see Configuration#wrapSection
+ */
+ public void setWrapSection(boolean wrapSection)
+ {
+ configuration.wrapSection = wrapSection;
+ }
+
+ /**
+ * <code>wrap-sections</code>- wrap within <![ ... ]>
section tags
+ * @return <code>true</code> if tidy will wrap within <![ ...
]> section tags
+ * @see Configuration#wrapSection
+ */
+ public boolean getWrapSection()
+ {
+ return configuration.wrapSection;
+ }
+
+ /**
+ * <code>alt-text</code>- default text for alt attribute.
+ * @param altText default text for alt attribute
+ * @see Configuration#altText
+ */
+ public void setAltText(String altText)
+ {
+ configuration.altText = altText;
+ }
+
+ /**
+ * <code>alt-text</code>- default text for alt attribute.
+ * @return default text for alt attribute
+ * @see Configuration#altText
+ */
+ public String getAltText()
+ {
+ return configuration.altText;
+ }
+
+ /**
+ * <code>add-xml-pi</code>- add <?xml?> for XML docs.
+ * @param xmlPi <code>true</code> if tidy should add
<?xml?> for XML docs
+ * @see Configuration#xmlPi
+ */
+ public void setXmlPi(boolean xmlPi)
+ {
+ configuration.xmlPi = xmlPi;
+ }
+
+ /**
+ * <code>add-xml-pi</code>- add <?xml?> for XML docs.
+ * @return <code>true</code> if tidy will add <?xml?> for
XML docs
+ * @see Configuration#xmlPi
+ */
+ public boolean getXmlPi()
+ {
+ return configuration.xmlPi;
+ }
+
+ /**
+ * <code>drop-font-tags</code>- discard presentation tags.
+ * @param dropFontTags <code>true</code> if tidy should discard
presentation tags
+ * @see Configuration#dropFontTags
+ */
+ public void setDropFontTags(boolean dropFontTags)
+ {
+ configuration.dropFontTags = dropFontTags;
+ }
+
+ /**
+ * <code>drop-font-tags</code>- discard presentation tags.
+ * @return <code>true</code> if tidy will discard presentation tags
+ * @see Configuration#dropFontTags
+ */
+ public boolean getDropFontTags()
+ {
+ return configuration.dropFontTags;
+ }
+
+ /**
+ * <code>drop-proprietary-attributes</code>- discard proprietary
attributes.
+ * @param dropProprietaryAttributes <code>true</code> if tidy should
discard proprietary attributes
+ * @see Configuration#dropProprietaryAttributes
+ */
+ public void setDropProprietaryAttributes(boolean dropProprietaryAttributes)
+ {
+ configuration.dropProprietaryAttributes = dropProprietaryAttributes;
+ }
+
+ /**
+ * <code>drop-proprietary-attributes</code>- discard proprietary
attributes.
+ * @return <code>true</code> if tidy will discard proprietary attributes
+ * @see Configuration#dropProprietaryAttributes
+ */
+ public boolean getDropProprietaryAttributes()
+ {
+ return configuration.dropProprietaryAttributes;
+ }
+
+ /**
+ * <code>drop-empty-paras</code>- discard empty p elements.
+ * @param dropEmptyParas <code>true</code> if tidy should discard empty p
elements
+ * @see Configuration#dropEmptyParas
+ */
+ public void setDropEmptyParas(boolean dropEmptyParas)
+ {
+ configuration.dropEmptyParas = dropEmptyParas;
+ }
+
+ /**
+ * <code>drop-empty-paras</code>- discard empty p elements.
+ * @return <code>true</code> if tidy will discard empty p elements
+ * @see Configuration#dropEmptyParas
+ */
+ public boolean getDropEmptyParas()
+ {
+ return configuration.dropEmptyParas;
+ }
+
+ /**
+ * <code>fix-bad-comments</code>- fix comments with adjacent hyphens.
+ * @param fixComments <code>true</code> if tidy should fix comments with
adjacent hyphens
+ * @see Configuration#fixComments
+ */
+ public void setFixComments(boolean fixComments)
+ {
+ configuration.fixComments = fixComments;
+ }
+
+ /**
+ * <code>fix-bad-comments</code>- fix comments with adjacent hyphens.
+ * @return <code>true</code> if tidy will fix comments with adjacent
hyphens
+ * @see Configuration#fixComments
+ */
+ public boolean getFixComments()
+ {
+ return configuration.fixComments;
+ }
+
+ /**
+ * <code>wrap-asp</code>- wrap within ASP pseudo elements.
+ * @param wrapAsp <code>true</code> if tidy should wrap within ASP pseudo
elements
+ * @see Configuration#wrapAsp
+ */
+ public void setWrapAsp(boolean wrapAsp)
+ {
+ configuration.wrapAsp = wrapAsp;
+ }
+
+ /**
+ * <code>wrap-asp</code>- wrap within ASP pseudo elements.
+ * @return <code>true</code> if tidy will wrap within ASP pseudo
elements
+ * @see Configuration#wrapAsp
+ */
+ public boolean getWrapAsp()
+ {
+ return configuration.wrapAsp;
+ }
+
+ /**
+ * <code>wrap-jste</code>- wrap within JSTE pseudo elements.
+ * @param wrapJste <code>true</code> if tidy should wrap within JSTE
pseudo elements
+ * @see Configuration#wrapJste
+ */
+ public void setWrapJste(boolean wrapJste)
+ {
+ configuration.wrapJste = wrapJste;
+ }
+
+ /**
+ * <code>wrap-jste</code>- wrap within JSTE pseudo elements.
+ * @return <code>true</code> if tidy will wrap within JSTE pseudo
elements
+ * @see Configuration#wrapJste
+ */
+ public boolean getWrapJste()
+ {
+ return configuration.wrapJste;
+ }
+
+ /**
+ * <code>wrap-php</code>- wrap within PHP pseudo elements.
+ * @param wrapPhp <code>true</code> if tidy should wrap within PHP pseudo
elements
+ * @see Configuration#wrapPhp
+ */
+ public void setWrapPhp(boolean wrapPhp)
+ {
+ configuration.wrapPhp = wrapPhp;
+ }
+
+ /**
+ * <code>wrap-php</code>- wrap within PHP pseudo elements.
+ * @return <code>true</code> if tidy will wrap within PHP pseudo
elements
+ * @see Configuration#wrapPhp
+ */
+ public boolean getWrapPhp()
+ {
+ return configuration.wrapPhp;
+ }
+
+ /**
+ * <code>fix-backslash</code>- fix URLs by replacing \ with /.
+ * @param fixBackslash <code>true</code> if tidy should fix URLs by
replacing \ with /
+ * @see Configuration#fixBackslash
+ */
+ public void setFixBackslash(boolean fixBackslash)
+ {
+ configuration.fixBackslash = fixBackslash;
+ }
+
+ /**
+ * <code>fix-backslash</code>- fix URLs by replacing \ with /.
+ * @return <code>true</code> if tidy will fix URLs by replacing \ with /
+ * @see Configuration#fixBackslash
+ */
+ public boolean getFixBackslash()
+ {
+ return configuration.fixBackslash;
+ }
+
+ /**
+ * <code>indent-attributes</code>- newline+indent before each attribute.
+ * @param indentAttributes <code>true</code> if tidy should output a
newline+indent before each attribute
+ * @see Configuration#indentAttributes
+ */
+ public void setIndentAttributes(boolean indentAttributes)
+ {
+ configuration.indentAttributes = indentAttributes;
+ }
+
+ /**
+ * <code>indent-attributes</code>- newline+indent before each attribute.
+ * @return <code>true</code> if tidy will output a newline+indent before
each attribute
+ * @see Configuration#indentAttributes
+ */
+ public boolean getIndentAttributes()
+ {
+ return configuration.indentAttributes;
+ }
+
+ /**
+ * <code>doctype</code>- user specified doctype.
+ * @param doctype <code>omit | auto | strict | loose |
<em>fpi</em></code> where the <em>fpi </em> is a string
+ * similar to "-//ACME//DTD HTML 3.14159//EN" Note: for
<em>fpi </em> include the double-quotes in the
+ * string.
+ * @see Configuration#docTypeStr
+ * @see Configuration#docTypeMode
+ */
+ public void setDocType(String doctype)
+ {
+ if (doctype != null)
+ {
+ configuration.docTypeStr = (String) ParsePropertyImpl.DOCTYPE.parse(doctype,
"doctype", configuration);
+ }
+ }
+
+ /**
+ * <code>doctype</code>- user specified doctype.
+ * @return <code>omit | auto | strict | loose |
<em>fpi</em></code> where the <em>fpi </em> is a string
similar
+ * to "-//ACME//DTD HTML 3.14159//EN" Note: for <em>fpi
</em> include the double-quotes in the string.
+ * @see Configuration#docTypeStr
+ * @see Configuration#docTypeMode
+ */
+ public String getDocType()
+ {
+ String result = null;
+ switch (configuration.docTypeMode)
+ {
+ case Configuration.DOCTYPE_OMIT :
+ result = "omit";
+ break;
+ case Configuration.DOCTYPE_AUTO :
+ result = "auto";
+ break;
+ case Configuration.DOCTYPE_STRICT :
+ result = "strict";
+ break;
+ case Configuration.DOCTYPE_LOOSE :
+ result = "loose";
+ break;
+ case Configuration.DOCTYPE_IGNORE :
+ result = "ignore";
+ break;
+ case Configuration.DOCTYPE_USER :
+ result = configuration.docTypeStr;
+ break;
+ }
+ return result;
+ }
+
+ /**
+ * <code>logical-emphasis</code>- replace i by em and b by strong.
+ * @param logicalEmphasis <code>true</code> if tidy should replace i by
em and b by strong
+ * @see Configuration#logicalEmphasis
+ */
+ public void setLogicalEmphasis(boolean logicalEmphasis)
+ {
+ configuration.logicalEmphasis = logicalEmphasis;
+ }
+
+ /**
+ * <code>logical-emphasis</code>- replace i by em and b by strong.
+ * @return <code>true</code> if tidy will replace i by em and b by
strong
+ * @see Configuration#logicalEmphasis
+ */
+ public boolean getLogicalEmphasis()
+ {
+ return configuration.logicalEmphasis;
+ }
+
+ /**
+ * <code>assume-xml-procins</code> This option specifies if Tidy should
change the parsing of processing
+ * instructions to require ?> as the terminator rather than >. This option is
automatically set if the input is in
+ * XML.
+ * @param xmlPIs <code>true</code> if tidy should expect a ?> at the
end of processing instructions
+ * @see Configuration#xmlPIs
+ */
+ public void setXmlPIs(boolean xmlPIs)
+ {
+ configuration.xmlPIs = xmlPIs;
+ }
+
+ /**
+ * <code>assume-xml-procins</code> This option specifies if Tidy should
change the parsing of processing
+ * instructions to require ?> as the terminator rather than >. This option is
automatically set if the input is in
+ * XML.
+ * @return <code>true</code> if tidy will expect a ?> at the end of
processing instructions
+ * @see Configuration#xmlPIs
+ */
+ public boolean getXmlPIs()
+ {
+ return configuration.xmlPIs;
+ }
+
+ /**
+ * <code>enclose-text</code>- if true text at body is wrapped in
<p>'s.
+ * @param encloseText <code>true</code> if tidy should wrap text at body
in <p>'s.
+ * @see Configuration#encloseBodyText
+ */
+ public void setEncloseText(boolean encloseText)
+ {
+ configuration.encloseBodyText = encloseText;
+ }
+
+ /**
+ * <code>enclose-text</code>- if true text at body is wrapped in
<p>'s.
+ * @return <code>true</code> if tidy will wrap text at body in
<p>'s.
+ * @see Configuration#encloseBodyText
+ */
+ public boolean getEncloseText()
+ {
+ return configuration.encloseBodyText;
+ }
+
+ /**
+ * <code>enclose-block-text</code>- if true text in blocks is wrapped in
<p>'s.
+ * @param encloseBlockText <code>true</code> if tidy should wrap text
text in blocks in <p>'s.
+ * @see Configuration#encloseBlockText
+ */
+ public void setEncloseBlockText(boolean encloseBlockText)
+ {
+ configuration.encloseBlockText = encloseBlockText;
+ }
+
+ /**
+ * <code>enclose-block-text</code>- if true text in blocks is wrapped in
<p>'s. return <code>true</code>
+ * if tidy should will text text in blocks in <p>'s.
+ * @see Configuration#encloseBlockText
+ */
+ public boolean getEncloseBlockText()
+ {
+ return configuration.encloseBlockText;
+ }
+
+ /**
+ * <code>word-2000</code>- draconian cleaning for Word2000.
+ * @param word2000 <code>true</code> if tidy should clean word2000
documents
+ * @see Configuration#word2000
+ */
+ public void setWord2000(boolean word2000)
+ {
+ configuration.word2000 = word2000;
+ }
+
+ /**
+ * <code>word-2000</code>- draconian cleaning for Word2000.
+ * @return <code>true</code> if tidy will clean word2000 documents
+ * @see Configuration#word2000
+ */
+ public boolean getWord2000()
+ {
+ return configuration.word2000;
+ }
+
+ /**
+ * <code>tidy-mark</code>- add meta element indicating tidied doc.
+ * @param tidyMark <code>true</code> if tidy should add meta element
indicating tidied doc
+ * @see Configuration#tidyMark
+ */
+ public void setTidyMark(boolean tidyMark)
+ {
+ configuration.tidyMark = tidyMark;
+ }
+
+ /**
+ * <code>tidy-mark</code>- add meta element indicating tidied doc.
+ * @return <code>true</code> if tidy will add meta element indicating
tidied doc
+ * @see Configuration#tidyMark
+ */
+ public boolean getTidyMark()
+ {
+ return configuration.tidyMark;
+ }
+
+ /**
+ * <code>add-xml-space</code>- if set to yes adds xml:space attr as
needed.
+ * @param xmlSpace <code>true</code> if tidy should add xml:space attr as
needed
+ * @see Configuration#xmlSpace
+ */
+ public void setXmlSpace(boolean xmlSpace)
+ {
+ configuration.xmlSpace = xmlSpace;
+ }
+
+ /**
+ * <code>add-xml-space</code>- if set to yes adds xml:space attr as
needed.
+ * @return <code>true</code> if tidy will add xml:space attr as needed
+ * @see Configuration#xmlSpace
+ */
+ public boolean getXmlSpace()
+ {
+ return configuration.xmlSpace;
+ }
+
+ /**
+ * <code>gnu-emacs</code>- if true format error output for GNU Emacs.
+ * @param emacs <code>true</code> if tidy should format error output for
GNU Emacs
+ * @see Configuration#emacs
+ */
+ public void setEmacs(boolean emacs)
+ {
+ configuration.emacs = emacs;
+ }
+
+ /**
+ * <code>gnu-emacs</code>- if true format error output for GNU Emacs.
+ * @return <code>true</code> if tidy will format error output for GNU
Emacs
+ * @see Configuration#emacs
+ */
+ public boolean getEmacs()
+ {
+ return configuration.emacs;
+ }
+
+ /**
+ * <code>literal-attributes</code>- if true attributes may use newlines.
+ * @param literalAttribs <code>true</code> if attributes may use
newlines
+ * @see Configuration#literalAttribs
+ */
+ public void setLiteralAttribs(boolean literalAttribs)
+ {
+ configuration.literalAttribs = literalAttribs;
+ }
+
+ /**
+ * <code>literal-attributes</code>- if true attributes may use newlines.
+ * @return <code>true</code> if attributes may use newlines
+ * @see Configuration#literalAttribs
+ */
+ public boolean getLiteralAttribs()
+ {
+ return configuration.literalAttribs;
+ }
+
+ /**
+ * <code>print-body-only</code>- output BODY content only.
+ * @param bodyOnly true = print only the document body
+ * @see Configuration#bodyOnly
+ */
+ public void setPrintBodyOnly(boolean bodyOnly)
+ {
+ configuration.bodyOnly = bodyOnly;
+ }
+
+ /**
+ * <code>print-body-only</code>- output BODY content only.
+ * @return true if tidy will print only the document body
+ */
+ public boolean getPrintBodyOnly()
+ {
+ return configuration.bodyOnly;
+ }
+
+ /**
+ * <code>fix-uri</code>- fix uri references applying URI encoding if
necessary.
+ * @param fixUri true = fix uri references
+ * @see Configuration#fixUri
+ */
+ public void setFixUri(boolean fixUri)
+ {
+ configuration.fixUri = fixUri;
+ }
+
+ /**
+ * <code>fix-uri</code>- output BODY content only.
+ * @return true if tidy will fix uri references
+ */
+ public boolean getFixUri()
+ {
+ return configuration.fixUri;
+ }
+
+ /**
+ * <code>lower-literals</code>- folds known attribute values to lower
case.
+ * @param lowerLiterals true = folds known attribute values to lower case
+ * @see Configuration#lowerLiterals
+ */
+ public void setLowerLiterals(boolean lowerLiterals)
+ {
+ configuration.lowerLiterals = lowerLiterals;
+ }
+
+ /**
+ * <code>lower-literals</code>- folds known attribute values to lower
case.
+ * @return true if tidy will folds known attribute values to lower case
+ */
+ public boolean getLowerLiterals()
+ {
+ return configuration.lowerLiterals;
+ }
+
+ /**
+ * <code>hide-comments</code>- hides all (real) comments in output.
+ * @param hideComments true = hides all comments in output
+ * @see Configuration#hideComments
+ */
+ public void setHideComments(boolean hideComments)
+ {
+ configuration.hideComments = hideComments;
+ }
+
+ /**
+ * <code>hide-comments</code>- hides all (real) comments in output.
+ * @return true if tidy will hide all comments in output
+ */
+ public boolean getHideComments()
+ {
+ return configuration.hideComments;
+ }
+
+ /**
+ * <code>indent-cdata</code>- indent CDATA sections.
+ * @param indentCdata true = indent CDATA sections
+ * @see Configuration#indentCdata
+ */
+ public void setIndentCdata(boolean indentCdata)
+ {
+ configuration.indentCdata = indentCdata;
+ }
+
+ /**
+ * <code>indent-cdata</code>- indent CDATA sections.
+ * @return true if tidy will indent CDATA sections
+ */
+ public boolean getIndentCdata()
+ {
+ return configuration.indentCdata;
+ }
+
+ /**
+ * <code>force-output</code>- output document even if errors were found.
+ * @param forceOutput true = output document even if errors were found
+ * @see Configuration#forceOutput
+ */
+ public void setForceOutput(boolean forceOutput)
+ {
+ configuration.forceOutput = forceOutput;
+ }
+
+ /**
+ * <code>force-output</code>- output document even if errors were found.
+ * @return true if tidy will output document even if errors were found
+ */
+ public boolean getForceOutput()
+ {
+ return configuration.forceOutput;
+ }
+
+ /**
+ * <code>show-errors</code>- set the number of errors to put out.
+ * @param showErrors number of errors to put out
+ * @see Configuration#showErrors
+ */
+ public void setShowErrors(int showErrors)
+ {
+ configuration.showErrors = showErrors;
+ }
+
+ /**
+ * <code>show-errors</code>- number of errors to put out.
+ * @return the number of errors tidy will put out
+ */
+ public int getShowErrors()
+ {
+ return configuration.showErrors;
+ }
+
+ /**
+ * <code>ascii-chars</code>- convert quotes and dashes to nearest ASCII
char.
+ * @param asciiChars true = convert quotes and dashes to nearest ASCII char
+ * @see Configuration#asciiChars
+ */
+ public void setAsciiChars(boolean asciiChars)
+ {
+ configuration.asciiChars = asciiChars;
+ }
+
+ /**
+ * <code>ascii-chars</code>- convert quotes and dashes to nearest ASCII
char.
+ * @return true if tidy will convert quotes and dashes to nearest ASCII char
+ */
+ public boolean getAsciiChars()
+ {
+ return configuration.asciiChars;
+ }
+
+ /**
+ * <code>join-classes</code>- join multiple class attributes.
+ * @param joinClasses true = join multiple class attributes
+ * @see Configuration#joinClasses
+ */
+ public void setJoinClasses(boolean joinClasses)
+ {
+ configuration.joinClasses = joinClasses;
+ }
+
+ /**
+ * <code>join-classes</code>- join multiple class attributes.
+ * @return true if tidy will join multiple class attributes
+ */
+ public boolean getJoinClasses()
+ {
+ return configuration.joinClasses;
+ }
+
+ /**
+ * <code>join-styles</code>- join multiple style attributes.
+ * @param joinStyles true = join multiple style attributes
+ * @see Configuration#joinStyles
+ */
+ public void setJoinStyles(boolean joinStyles)
+ {
+ configuration.joinStyles = joinStyles;
+ }
+
+ /**
+ * <code>join-styles</code>- join multiple style attributes.
+ * @return true if tidy will join multiple style attributes
+ */
+ public boolean getJoinStyles()
+ {
+ return configuration.joinStyles;
+ }
+
+ /**
+ * <code>trim-empty-elements</code>- trim empty elements.
+ * @param trim-empty-elements true = trim empty elements
+ * @see Configuration#trimEmpty
+ */
+ public void setTrimEmptyElements(boolean trimEmpty)
+ {
+ configuration.trimEmpty = trimEmpty;
+ }
+
+ /**
+ * <code>trim-empty-elements</code>- trim empty elements.
+ * @return true if tidy will trim empty elements
+ */
+ public boolean getTrimEmptyElements()
+ {
+ return configuration.trimEmpty;
+ }
+
+ /**
+ * <code>replace-color</code>- replace hex color attribute values with
names.
+ * @param replaceColor true = replace hex color attribute values with names
+ * @see Configuration#replaceColor
+ */
+ public void setReplaceColor(boolean replaceColor)
+ {
+ configuration.replaceColor = replaceColor;
+ }
+
+ /**
+ * <code>replace-color</code>- replace hex color attribute values with
names.
+ * @return true if tidy will replace hex color attribute values with names
+ */
+ public boolean getReplaceColor()
+ {
+ return configuration.replaceColor;
+ }
+
+ /**
+ * <code>escape-cdata</code>- replace CDATA sections with escaped text.
+ * @param escapeCdata true = replace CDATA sections with escaped text
+ * @see Configuration#escapeCdata
+ */
+ public void setEscapeCdata(boolean escapeCdata)
+ {
+ configuration.escapeCdata = escapeCdata;
+ }
+
+ /**
+ * <code>escape-cdata</code> -replace CDATA sections with escaped text.
+ * @return true if tidy will replace CDATA sections with escaped text
+ */
+ public boolean getEscapeCdata()
+ {
+ return configuration.escapeCdata;
+ }
+
+ /**
+ * <code>repeated-attributes</code>- keep first or last duplicate
attribute.
+ * @param repeatedAttributes <code>Configuration.KEEP_FIRST |
Configuration.KEEP_LAST</code>
+ * @see Configuration#duplicateAttrs
+ */
+ public void setRepeatedAttributes(int repeatedAttributes)
+ {
+ configuration.duplicateAttrs = repeatedAttributes;
+ }
+
+ /**
+ * <code>repeated-attributes</code>- keep first or last duplicate
attribute.
+ * @return <code>Configuration.KEEP_FIRST |
Configuration.KEEP_LAST</code>
+ */
+ public int getRepeatedAttributes()
+ {
+ return configuration.duplicateAttrs;
+ }
+
+ /**
+ * <code>keep-time</code>- if true last modified time is preserved.
+ * @param keepFileTimes <code>true</code> if tidy should preserved last
modified time in input file.
+ * @todo <strong>this is NOT supported at this time. </strong>
+ * @see Configuration#keepFileTimes
+ */
+ public void setKeepFileTimes(boolean keepFileTimes)
+ {
+ configuration.keepFileTimes = keepFileTimes;
+ }
+
+ /**
+ * <code>keep-time</code>- if true last modified time is preserved.
+ * @return <code>true</code> if tidy will preserved last modified time in
input file.
+ * @todo <strong>this is NOT supported at this time. </strong>
+ * @see Configuration#keepFileTimes
+ */
+ public boolean getKeepFileTimes()
+ {
+ return configuration.keepFileTimes;
+ }
+
+ /**
+ * Sets the character encoding used both for input and for output.
+ * @param charencoding encoding constant
+ * @deprecated set input/output encoding using java encoding names
+ */
+ public void setCharEncoding(int charencoding)
+ {
+ String ceName = configuration.convertCharEncoding(charencoding);
+ if (ceName != null)
+ {
+ configuration.setInCharEncodingName(ceName);
+ configuration.setOutCharEncodingName(ceName);
+ }
+ }
+
+ /**
+ * Returns the configured character encoding.
+ * @return character encoding constant
+ * @deprecated from r8 tidy can use different encoding for input and output. This
method will only return the
+ * <strong>input </strong> character encoding.
+ */
+ public int getCharEncoding()
+ {
+ return configuration.getInCharEncoding();
+ }
+
+ /**
+ * @param slidestyle N/A
+ * @deprecated does nothing
+ */
+ public void setSlidestyle(String slidestyle)
+ {
+ configuration.slidestyle = slidestyle;
+ }
+
+ /**
+ * @deprecated does nothing
+ * @return <code>null</code>
+ */
+ public String getSlidestyle()
+ {
+ return null;
+ }
+
+ /**
+ * <code>output-raw</code>- avoid mapping values > 127 to entities.
This has the same effect of specifying a
+ * "raw" encoding in the original version of tidy.
+ * @param rawOut avoid mapping values > 127 to entities
+ * @see Configuration#rawOut
+ */
+ public void setRawOut(boolean rawOut)
+ {
+ configuration.rawOut = rawOut;
+ }
+
+ /**
+ * <code>output-raw</code>- avoid mapping values > 127 to entities.
+ * @return <code>true</code> if tidy will not map values > 127 to
entities
+ * @see Configuration#rawOut
+ */
+ public boolean getRawOut()
+ {
+ return configuration.rawOut;
+ }
+
+ /**
+ * <code>input-encoding</code> the character encoding used for input.
+ * @param encoding a valid java encoding name
+ */
+ public void setInputEncoding(String encoding)
+ {
+ configuration.setInCharEncodingName(encoding);
+ }
+
+ /**
+ * <code>input-encoding</code> the character encoding used for input.
+ * @return the java name of the encoding currently used for input
+ */
+ public String getInputEncoding()
+ {
+ return configuration.getInCharEncodingName();
+ }
+
+ /**
+ * <code>output-encoding</code> the character encoding used for output.
+ * @param encoding a valid java encoding name
+ */
+ public void setOutputEncoding(String encoding)
+ {
+ configuration.setOutCharEncodingName(encoding);
+ }
+
+ /**
+ * <code>output-encoding</code> the character encoding used for output.
+ * @return the java name of the encoding currently used for output
+ */
+ public String getOutputEncoding()
+ {
+ return configuration.getOutCharEncodingName();
+ }
+ /**
+ * <code>move-elements</code>- move style to head, th to thead etc.
+ * @param move allow to move elements in page
+ * @see Configuration#moveElements
+ */
+ public void setMoveElements(boolean move)
+ {
+ configuration.moveElements = move;
+ }
+
+ /**
+ * <code>move-elements</code>- move style to head, th to thead etc.
+ * @return <code>true</code> if tidy will move elements in page to proper
position
+ * @see Configuration#moveElements
+ */
+ public boolean getMoveElements()
+ {
+ return configuration.moveElements;
+ }
+
+}
\ No newline at end of file
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/TidyBeanInfo.java
===================================================================
---
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/TidyBeanInfo.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/TidyBeanInfo.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,78 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+import java.awt.Image;
+import java.beans.SimpleBeanInfo;
+
+
+/**
+ * BeanInfo for Tidy.
+ * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org
</a>
+ * @author Andy Quick <a
href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a>
(translation to Java)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public class TidyBeanInfo extends SimpleBeanInfo
+{
+
+ /**
+ * @see java.beans.BeanInfo#getIcon(int)
+ */
+ public Image getIcon(int kind)
+ {
+ return loadImage("tidy.gif");
+ }
+
+}
\ No newline at end of file
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/TidyLexerListener.java
===================================================================
---
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/TidyLexerListener.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/TidyLexerListener.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,31 @@
+/**
+ * License Agreement.
+ *
+ * Rich Faces - Natural Ajax for Java Server Faces (JSF)
+ *
+ * Copyright (C) 2007 Exadel, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1 as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+package org.ajax4jsf.org.w3c.tidy;
+
+/**
+ * @author asmirnov(a)exadel.com (latest modification by $Author: alexsmirnov $)
+ * @version $Revision: 1.1.2.1 $ $Date: 2007/01/09 18:56:47 $
+ *
+ */
+public interface TidyLexerListener extends TidyMessageListener {
+
+}
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/TidyMessage.java
===================================================================
---
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/TidyMessage.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/TidyMessage.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,314 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+/**
+ * Message sent to listeners for validation errors/warnings and info.
+ * @see Tidy#setMessageListener(TidyMessageListener)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public final class TidyMessage
+{
+
+ /**
+ * Reference to lexer for manipulate output tree
+ */
+ private Lexer lexer;
+ /**
+ * Line in the source file (can be 0 if the message is not related to a particular
line, such as a summary message).
+ */
+ private int line;
+
+ /**
+ * Column in the source file (can be 0 if the message is not related to a particular
column, such as a summary
+ * message).
+ */
+ private int column;
+
+ /**
+ * Level for this message. Can be TidyMessage.Level.SUMMARY | TidyMessage.Level.INFO
| TidyMessage.Level.WARNING |
+ * TidyMessage.Level.ERROR.
+ */
+ private Level level;
+
+ /**
+ * Formatted text for this message.
+ */
+ private String message;
+
+ /**
+ * Tidy internal error code.
+ */
+ private int errorCode;
+
+ private Node element;
+
+ /**
+ * Instantiates a new message.
+ * @param errorCode Tidy internal error code.
+ * @param line Line number in the source file
+ * @param column Column number in the source file
+ * @param level severity
+ * @param message message text
+ */
+ public TidyMessage(int errorCode, int line, int column, Level level, String message)
+ {
+ this.errorCode = errorCode;
+ this.line = line;
+ this.column = column;
+ this.level = level;
+ this.message = message;
+ }
+
+ /**
+ * Getter for <code>errorCode</code>.
+ * @return Returns the errorCode.
+ */
+ public int getErrorCode()
+ {
+ return this.errorCode;
+ }
+
+ /**
+ * Getter for <code>column</code>.
+ * @return Returns the column.
+ */
+ public int getColumn()
+ {
+ return this.column;
+ }
+
+ /**
+ * Getter for <code>level</code>.
+ * @return Returns the level.
+ */
+ public Level getLevel()
+ {
+ return this.level;
+ }
+
+ /**
+ * Getter for <code>line</code>.
+ * @return Returns the line.
+ */
+ public int getLine()
+ {
+ return this.line;
+ }
+
+ /**
+ * Getter for <code>message</code>.
+ * @return Returns the message.
+ */
+ public String getMessage()
+ {
+ return this.message;
+ }
+
+ /**
+ * Message severity enumeration.
+ * @author fgiust
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+ public static final class Level implements Comparable<Level>
+ {
+
+ /**
+ * level = summary (0).
+ */
+ public static final Level SUMMARY = new Level(0);
+
+ /**
+ * level = info (1).
+ */
+ public static final Level INFO = new Level(1);
+
+ /**
+ * level = warning (2).
+ */
+ public static final Level WARNING = new Level(2);
+
+ /**
+ * level = error (3).
+ */
+ public static final Level ERROR = new Level(3);
+
+ /**
+ * short value for this level.
+ */
+ private short code;
+
+ /**
+ * Instantiates a new message with the given code.
+ * @param code int value for this level
+ */
+ private Level(int code)
+ {
+ this.code = (short) code;
+ }
+
+ /**
+ * Returns the int value for this level.
+ * @return int value for this level
+ */
+ public short getCode()
+ {
+ return this.code;
+ }
+
+ /**
+ * Returns the Level instance corresponding to the given int value.
+ * @param code int value for the level
+ * @return Level instance
+ */
+ public static Level fromCode(int code)
+ {
+ switch (code)
+ {
+ case 0 :
+ return SUMMARY;
+ case 1 :
+ return INFO;
+ case 2 :
+ return WARNING;
+ case 3 :
+ return ERROR;
+
+ default :
+ return null;
+ }
+ }
+
+ /**
+ * @see java.lang.Comparable#compareTo(Object)
+ */
+ public int compareTo(Level object)
+ {
+ return this.code - ((Level) object).code;
+ }
+
+ /**
+ * @see java.lang.Object#equals(Object)
+ */
+ public boolean equals(Object object)
+ {
+ if (!(object instanceof Level))
+ {
+ return false;
+ }
+ return this.code == ((Level) object).code;
+ }
+
+ /**
+ * @see java.lang.Object#toString()
+ */
+ public String toString()
+ {
+ switch (code)
+ {
+ case 0 :
+ return "SUMMARY";
+ case 1 :
+ return "INFO";
+ case 2 :
+ return "WARNING";
+ case 3 :
+ return "ERROR";
+
+ default :
+ // should not happen
+ return "?";
+ }
+ }
+
+ /**
+ * @see java.lang.Object#hashCode()
+ */
+ public int hashCode()
+ {
+ // new instances should not be created
+ return super.hashCode();
+ }
+ }
+
+ /**
+ * @return Returns the lexer.
+ */
+ public Lexer getLexer() {
+ return lexer;
+ }
+
+ /**
+ * @param lexer The lexer to set.
+ */
+ public void setLexer(Lexer lexer) {
+ this.lexer = lexer;
+ }
+
+ /**
+ * @return Returns the element.
+ */
+ public Node getElement() {
+ return element;
+ }
+
+ /**
+ * @param element The element to set.
+ */
+ public void setElement(Node element) {
+ this.element = element;
+ }
+
+}
\ No newline at end of file
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/TidyMessageListener.java
===================================================================
---
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/TidyMessageListener.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/TidyMessageListener.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,71 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+/**
+ * Listener interface for validation errors/warnings and info.
+ * @see Tidy#setMessageListener(TidyMessageListener)
+ * @author Fabrizio Giustina
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public interface TidyMessageListener
+{
+
+ /**
+ * Called by tidy when a warning or error occurs.
+ * @param message Tidy message
+ */
+ void messageReceived(TidyMessage message);
+
+}
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/TidyUtils.java
===================================================================
--- branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/TidyUtils.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/TidyUtils.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,870 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+
+package org.ajax4jsf.org.w3c.tidy;
+
+import org.ajax4jsf.Messages;
+
+/**
+ * Utility class with handy methods, mainly for String handling or for reproducing c
behaviours.
+ * @author Fabrizio Giustina
+ * @version $Revision $ ($Author $)
+ */
+public final class TidyUtils
+{
+
+ /**
+ * char type: digit.
+ */
+ private static final short DIGIT = 1;
+
+ /**
+ * char type: letter.
+ */
+ private static final short LETTER = 2;
+
+ /**
+ * char type: namechar.
+ */
+ private static final short NAMECHAR = 4;
+
+ /**
+ * char type: whitespace.
+ */
+ private static final short WHITE = 8;
+
+ /**
+ * char type: newline.
+ */
+ private static final short NEWLINE = 16;
+
+ /**
+ * char type: lowercase.
+ */
+ private static final short LOWERCASE = 32;
+
+ /**
+ * char type: uppercase.
+ */
+ private static final short UPPERCASE = 64;
+
+ /**
+ * used to classify chars for lexical purposes.
+ */
+ private static short[] lexmap = new short[128];
+
+ static
+ {
+ mapStr("\r\n\f", (short) (NEWLINE | WHITE));
+ mapStr(" \t", WHITE);
+ mapStr("-.:_", NAMECHAR);
+ mapStr("0123456789", (short) (DIGIT | NAMECHAR));
+ mapStr("abcdefghijklmnopqrstuvwxyz", (short) (LOWERCASE | LETTER |
NAMECHAR));
+ mapStr("ABCDEFGHIJKLMNOPQRSTUVWXYZ", (short) (UPPERCASE | LETTER |
NAMECHAR));
+ }
+
+ /**
+ * utility class, don't instantiate.
+ */
+ private TidyUtils()
+ {
+ // unused
+ }
+
+ /**
+ * Converts a int to a boolean.
+ * @param value int value
+ * @return <code>true</code> if value is != 0
+ */
+ static boolean toBoolean(int value)
+ {
+ return value != 0;
+ }
+
+ /**
+ * convert an int to unsigned (& 0xFF).
+ * @param c signed int
+ * @return unsigned int
+ */
+ static int toUnsigned(int c)
+ {
+ return c & 0xFF;
+ }
+
+ /**
+ * check if the first String contains the second one.
+ * @param s1 full String
+ * @param len1 maximum position in String
+ * @param s2 String to search for
+ * @return true if s1 contains s2 in the range 0-len1
+ */
+ static boolean wsubstrn(String s1, int len1, String s2)
+ {
+ int searchIndex = s1.indexOf(s2);
+ return searchIndex > -1 && searchIndex <= len1;
+ }
+
+ /**
+ * check if the first String contains the second one (ignore case).
+ * @param s1 full String
+ * @param len1 maximum position in String
+ * @param s2 String to search for
+ * @return true if s1 contains s2 in the range 0-len1
+ */
+ static boolean wsubstrncase(String s1, int len1, String s2)
+ {
+ return wsubstrn(s1.toLowerCase(), len1, s2.toLowerCase());
+ }
+
+ /**
+ * return offset of cc from beginning of s1, -1 if not found.
+ * @param s1 String
+ * @param len1 maximum offset (values > than lenl are ignored and returned as -1)
+ * @param cc character to search for
+ * @return index of cc in s1
+ */
+ static int wstrnchr(String s1, int len1, char cc)
+ {
+ int indexOf = s1.indexOf(cc);
+ if (indexOf < len1)
+ {
+ return indexOf;
+ }
+
+ return -1;
+ }
+
+ /**
+ * Same as wsubstrn, but without a specified length.
+ * @param s1 full String
+ * @param s2 String to search for
+ * @return <code>true</code> if s2 is found in s2 (case insensitive
search)
+ */
+ static boolean wsubstr(String s1, String s2)
+ {
+ int i;
+ int len1 = s1.length();
+ int len2 = s2.length();
+
+ for (i = 0; i <= len1 - len2; ++i)
+ {
+ if (s2.equalsIgnoreCase(s1.substring(i)))
+ {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * Is the character a hex digit?
+ * @param c char
+ * @return <code>true</code> if he given character is a hex digit
+ */
+ static boolean isxdigit(char c)
+ {
+ return Character.isDigit(c) || (Character.toLowerCase(c) >= 'a'
&& Character.toLowerCase(c) <= 'f');
+ }
+
+ /**
+ * Check if the string valueToCheck is contained in validValues array (case insesitie
comparison).
+ * @param validValues array of valid values
+ * @param valueToCheck value to search for
+ * @return <code>true</code> if valueToCheck is found in validValues
+ */
+ static boolean isInValuesIgnoreCase(String[] validValues, String valueToCheck)
+ {
+ int len = validValues.length;
+ for (int j = 0; j < len; j++)
+ {
+ if (validValues[j].equalsIgnoreCase(valueToCheck))
+ {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Return true if substring s is in p and isn't all in upper case. This is used
to check the case of SYSTEM, PUBLIC,
+ * DTD and EN.
+ * @param s substring
+ * @param p full string
+ * @param len how many chars to check in p
+ * @return true if substring s is in p and isn't all in upper case
+ */
+ public static boolean findBadSubString(String s, String p, int len)
+ {
+ int n = s.length();
+ int i = 0;
+ String ps;
+
+ while (n < len)
+ {
+ ps = p.substring(i, i + n);
+ if (s.equalsIgnoreCase(ps))
+ {
+ return (!ps.equals(s.substring(0, n)));
+ }
+
+ ++i;
+ --len;
+ }
+
+ return false;
+ }
+
+ /**
+ * Is the given char a valid xml letter?
+ * @param c char
+ * @return <code>true</code> if the char is a valid xml letter
+ */
+ static boolean isXMLLetter(char c)
+ {
+ return ((c >= 0x41 && c <= 0x5a)
+ || (c >= 0x61 && c <= 0x7a)
+ || (c >= 0xc0 && c <= 0xd6)
+ || (c >= 0xd8 && c <= 0xf6)
+ || (c >= 0xf8 && c <= 0xff)
+ || (c >= 0x100 && c <= 0x131)
+ || (c >= 0x134 && c <= 0x13e)
+ || (c >= 0x141 && c <= 0x148)
+ || (c >= 0x14a && c <= 0x17e)
+ || (c >= 0x180 && c <= 0x1c3)
+ || (c >= 0x1cd && c <= 0x1f0)
+ || (c >= 0x1f4 && c <= 0x1f5)
+ || (c >= 0x1fa && c <= 0x217)
+ || (c >= 0x250 && c <= 0x2a8)
+ || (c >= 0x2bb && c <= 0x2c1)
+ || c == 0x386
+ || (c >= 0x388 && c <= 0x38a)
+ || c == 0x38c
+ || (c >= 0x38e && c <= 0x3a1)
+ || (c >= 0x3a3 && c <= 0x3ce)
+ || (c >= 0x3d0 && c <= 0x3d6)
+ || c == 0x3da
+ || c == 0x3dc
+ || c == 0x3de
+ || c == 0x3e0
+ || (c >= 0x3e2 && c <= 0x3f3)
+ || (c >= 0x401 && c <= 0x40c)
+ || (c >= 0x40e && c <= 0x44f)
+ || (c >= 0x451 && c <= 0x45c)
+ || (c >= 0x45e && c <= 0x481)
+ || (c >= 0x490 && c <= 0x4c4)
+ || (c >= 0x4c7 && c <= 0x4c8)
+ || (c >= 0x4cb && c <= 0x4cc)
+ || (c >= 0x4d0 && c <= 0x4eb)
+ || (c >= 0x4ee && c <= 0x4f5)
+ || (c >= 0x4f8 && c <= 0x4f9)
+ || (c >= 0x531 && c <= 0x556)
+ || c == 0x559
+ || (c >= 0x561 && c <= 0x586)
+ || (c >= 0x5d0 && c <= 0x5ea)
+ || (c >= 0x5f0 && c <= 0x5f2)
+ || (c >= 0x621 && c <= 0x63a)
+ || (c >= 0x641 && c <= 0x64a)
+ || (c >= 0x671 && c <= 0x6b7)
+ || (c >= 0x6ba && c <= 0x6be)
+ || (c >= 0x6c0 && c <= 0x6ce)
+ || (c >= 0x6d0 && c <= 0x6d3)
+ || c == 0x6d5
+ || (c >= 0x6e5 && c <= 0x6e6)
+ || (c >= 0x905 && c <= 0x939)
+ || c == 0x93d
+ || (c >= 0x958 && c <= 0x961)
+ || (c >= 0x985 && c <= 0x98c)
+ || (c >= 0x98f && c <= 0x990)
+ || (c >= 0x993 && c <= 0x9a8)
+ || (c >= 0x9aa && c <= 0x9b0)
+ || c == 0x9b2
+ || (c >= 0x9b6 && c <= 0x9b9)
+ || (c >= 0x9dc && c <= 0x9dd)
+ || (c >= 0x9df && c <= 0x9e1)
+ || (c >= 0x9f0 && c <= 0x9f1)
+ || (c >= 0xa05 && c <= 0xa0a)
+ || (c >= 0xa0f && c <= 0xa10)
+ || (c >= 0xa13 && c <= 0xa28)
+ || (c >= 0xa2a && c <= 0xa30)
+ || (c >= 0xa32 && c <= 0xa33)
+ || (c >= 0xa35 && c <= 0xa36)
+ || (c >= 0xa38 && c <= 0xa39)
+ || (c >= 0xa59 && c <= 0xa5c)
+ || c == 0xa5e
+ || (c >= 0xa72 && c <= 0xa74)
+ || (c >= 0xa85 && c <= 0xa8b)
+ || c == 0xa8d
+ || (c >= 0xa8f && c <= 0xa91)
+ || (c >= 0xa93 && c <= 0xaa8)
+ || (c >= 0xaaa && c <= 0xab0)
+ || (c >= 0xab2 && c <= 0xab3)
+ || (c >= 0xab5 && c <= 0xab9)
+ || c == 0xabd
+ || c == 0xae0
+ || (c >= 0xb05 && c <= 0xb0c)
+ || (c >= 0xb0f && c <= 0xb10)
+ || (c >= 0xb13 && c <= 0xb28)
+ || (c >= 0xb2a && c <= 0xb30)
+ || (c >= 0xb32 && c <= 0xb33)
+ || (c >= 0xb36 && c <= 0xb39)
+ || c == 0xb3d
+ || (c >= 0xb5c && c <= 0xb5d)
+ || (c >= 0xb5f && c <= 0xb61)
+ || (c >= 0xb85 && c <= 0xb8a)
+ || (c >= 0xb8e && c <= 0xb90)
+ || (c >= 0xb92 && c <= 0xb95)
+ || (c >= 0xb99 && c <= 0xb9a)
+ || c == 0xb9c
+ || (c >= 0xb9e && c <= 0xb9f)
+ || (c >= 0xba3 && c <= 0xba4)
+ || (c >= 0xba8 && c <= 0xbaa)
+ || (c >= 0xbae && c <= 0xbb5)
+ || (c >= 0xbb7 && c <= 0xbb9)
+ || (c >= 0xc05 && c <= 0xc0c)
+ || (c >= 0xc0e && c <= 0xc10)
+ || (c >= 0xc12 && c <= 0xc28)
+ || (c >= 0xc2a && c <= 0xc33)
+ || (c >= 0xc35 && c <= 0xc39)
+ || (c >= 0xc60 && c <= 0xc61)
+ || (c >= 0xc85 && c <= 0xc8c)
+ || (c >= 0xc8e && c <= 0xc90)
+ || (c >= 0xc92 && c <= 0xca8)
+ || (c >= 0xcaa && c <= 0xcb3)
+ || (c >= 0xcb5 && c <= 0xcb9)
+ || c == 0xcde
+ || (c >= 0xce0 && c <= 0xce1)
+ || (c >= 0xd05 && c <= 0xd0c)
+ || (c >= 0xd0e && c <= 0xd10)
+ || (c >= 0xd12 && c <= 0xd28)
+ || (c >= 0xd2a && c <= 0xd39)
+ || (c >= 0xd60 && c <= 0xd61)
+ || (c >= 0xe01 && c <= 0xe2e)
+ || c == 0xe30
+ || (c >= 0xe32 && c <= 0xe33)
+ || (c >= 0xe40 && c <= 0xe45)
+ || (c >= 0xe81 && c <= 0xe82)
+ || c == 0xe84
+ || (c >= 0xe87 && c <= 0xe88)
+ || c == 0xe8a
+ || c == 0xe8d
+ || (c >= 0xe94 && c <= 0xe97)
+ || (c >= 0xe99 && c <= 0xe9f)
+ || (c >= 0xea1 && c <= 0xea3)
+ || c == 0xea5
+ || c == 0xea7
+ || (c >= 0xeaa && c <= 0xeab)
+ || (c >= 0xead && c <= 0xeae)
+ || c == 0xeb0
+ || (c >= 0xeb2 && c <= 0xeb3)
+ || c == 0xebd
+ || (c >= 0xec0 && c <= 0xec4)
+ || (c >= 0xf40 && c <= 0xf47)
+ || (c >= 0xf49 && c <= 0xf69)
+ || (c >= 0x10a0 && c <= 0x10c5)
+ || (c >= 0x10d0 && c <= 0x10f6)
+ || c == 0x1100
+ || (c >= 0x1102 && c <= 0x1103)
+ || (c >= 0x1105 && c <= 0x1107)
+ || c == 0x1109
+ || (c >= 0x110b && c <= 0x110c)
+ || (c >= 0x110e && c <= 0x1112)
+ || c == 0x113c
+ || c == 0x113e
+ || c == 0x1140
+ || c == 0x114c
+ || c == 0x114e
+ || c == 0x1150
+ || (c >= 0x1154 && c <= 0x1155)
+ || c == 0x1159
+ || (c >= 0x115f && c <= 0x1161)
+ || c == 0x1163
+ || c == 0x1165
+ || c == 0x1167
+ || c == 0x1169
+ || (c >= 0x116d && c <= 0x116e)
+ || (c >= 0x1172 && c <= 0x1173)
+ || c == 0x1175
+ || c == 0x119e
+ || c == 0x11a8
+ || c == 0x11ab
+ || (c >= 0x11ae && c <= 0x11af)
+ || (c >= 0x11b7 && c <= 0x11b8)
+ || c == 0x11ba
+ || (c >= 0x11bc && c <= 0x11c2)
+ || c == 0x11eb
+ || c == 0x11f0
+ || c == 0x11f9
+ || (c >= 0x1e00 && c <= 0x1e9b)
+ || (c >= 0x1ea0 && c <= 0x1ef9)
+ || (c >= 0x1f00 && c <= 0x1f15)
+ || (c >= 0x1f18 && c <= 0x1f1d)
+ || (c >= 0x1f20 && c <= 0x1f45)
+ || (c >= 0x1f48 && c <= 0x1f4d)
+ || (c >= 0x1f50 && c <= 0x1f57)
+ || c == 0x1f59
+ || c == 0x1f5b
+ || c == 0x1f5d
+ || (c >= 0x1f5f && c <= 0x1f7d)
+ || (c >= 0x1f80 && c <= 0x1fb4)
+ || (c >= 0x1fb6 && c <= 0x1fbc)
+ || c == 0x1fbe
+ || (c >= 0x1fc2 && c <= 0x1fc4)
+ || (c >= 0x1fc6 && c <= 0x1fcc)
+ || (c >= 0x1fd0 && c <= 0x1fd3)
+ || (c >= 0x1fd6 && c <= 0x1fdb)
+ || (c >= 0x1fe0 && c <= 0x1fec)
+ || (c >= 0x1ff2 && c <= 0x1ff4)
+ || (c >= 0x1ff6 && c <= 0x1ffc)
+ || c == 0x2126
+ || (c >= 0x212a && c <= 0x212b)
+ || c == 0x212e
+ || (c >= 0x2180 && c <= 0x2182)
+ || (c >= 0x3041 && c <= 0x3094)
+ || (c >= 0x30a1 && c <= 0x30fa)
+ || (c >= 0x3105 && c <= 0x312c)
+ || (c >= 0xac00 && c <= 0xd7a3)
+ || (c >= 0x4e00 && c <= 0x9fa5)
+ || c == 0x3007
+ || (c >= 0x3021 && c <= 0x3029)
+ || (c >= 0x4e00 && c <= 0x9fa5)
+ || c == 0x3007 || (c >= 0x3021 && c <= 0x3029));
+ }
+
+ /**
+ * Is the given char valid in xml name?
+ * @param c char
+ * @return <code>true</code> if the char is a valid xml name char
+ */
+ static boolean isXMLNamechar(char c)
+ {
+ return (isXMLLetter(c)
+ || c == '.'
+ || c == '_'
+ || c == ':'
+ || c == '-'
+ || (c >= 0x300 && c <= 0x345)
+ || (c >= 0x360 && c <= 0x361)
+ || (c >= 0x483 && c <= 0x486)
+ || (c >= 0x591 && c <= 0x5a1)
+ || (c >= 0x5a3 && c <= 0x5b9)
+ || (c >= 0x5bb && c <= 0x5bd)
+ || c == 0x5bf
+ || (c >= 0x5c1 && c <= 0x5c2)
+ || c == 0x5c4
+ || (c >= 0x64b && c <= 0x652)
+ || c == 0x670
+ || (c >= 0x6d6 && c <= 0x6dc)
+ || (c >= 0x6dd && c <= 0x6df)
+ || (c >= 0x6e0 && c <= 0x6e4)
+ || (c >= 0x6e7 && c <= 0x6e8)
+ || (c >= 0x6ea && c <= 0x6ed)
+ || (c >= 0x901 && c <= 0x903)
+ || c == 0x93c
+ || (c >= 0x93e && c <= 0x94c)
+ || c == 0x94d
+ || (c >= 0x951 && c <= 0x954)
+ || (c >= 0x962 && c <= 0x963)
+ || (c >= 0x981 && c <= 0x983)
+ || c == 0x9bc
+ || c == 0x9be
+ || c == 0x9bf
+ || (c >= 0x9c0 && c <= 0x9c4)
+ || (c >= 0x9c7 && c <= 0x9c8)
+ || (c >= 0x9cb && c <= 0x9cd)
+ || c == 0x9d7
+ || (c >= 0x9e2 && c <= 0x9e3)
+ || c == 0xa02
+ || c == 0xa3c
+ || c == 0xa3e
+ || c == 0xa3f
+ || (c >= 0xa40 && c <= 0xa42)
+ || (c >= 0xa47 && c <= 0xa48)
+ || (c >= 0xa4b && c <= 0xa4d)
+ || (c >= 0xa70 && c <= 0xa71)
+ || (c >= 0xa81 && c <= 0xa83)
+ || c == 0xabc
+ || (c >= 0xabe && c <= 0xac5)
+ || (c >= 0xac7 && c <= 0xac9)
+ || (c >= 0xacb && c <= 0xacd)
+ || (c >= 0xb01 && c <= 0xb03)
+ || c == 0xb3c
+ || (c >= 0xb3e && c <= 0xb43)
+ || (c >= 0xb47 && c <= 0xb48)
+ || (c >= 0xb4b && c <= 0xb4d)
+ || (c >= 0xb56 && c <= 0xb57)
+ || (c >= 0xb82 && c <= 0xb83)
+ || (c >= 0xbbe && c <= 0xbc2)
+ || (c >= 0xbc6 && c <= 0xbc8)
+ || (c >= 0xbca && c <= 0xbcd)
+ || c == 0xbd7
+ || (c >= 0xc01 && c <= 0xc03)
+ || (c >= 0xc3e && c <= 0xc44)
+ || (c >= 0xc46 && c <= 0xc48)
+ || (c >= 0xc4a && c <= 0xc4d)
+ || (c >= 0xc55 && c <= 0xc56)
+ || (c >= 0xc82 && c <= 0xc83)
+ || (c >= 0xcbe && c <= 0xcc4)
+ || (c >= 0xcc6 && c <= 0xcc8)
+ || (c >= 0xcca && c <= 0xccd)
+ || (c >= 0xcd5 && c <= 0xcd6)
+ || (c >= 0xd02 && c <= 0xd03)
+ || (c >= 0xd3e && c <= 0xd43)
+ || (c >= 0xd46 && c <= 0xd48)
+ || (c >= 0xd4a && c <= 0xd4d)
+ || c == 0xd57
+ || c == 0xe31
+ || (c >= 0xe34 && c <= 0xe3a)
+ || (c >= 0xe47 && c <= 0xe4e)
+ || c == 0xeb1
+ || (c >= 0xeb4 && c <= 0xeb9)
+ || (c >= 0xebb && c <= 0xebc)
+ || (c >= 0xec8 && c <= 0xecd)
+ || (c >= 0xf18 && c <= 0xf19)
+ || c == 0xf35
+ || c == 0xf37
+ || c == 0xf39
+ || c == 0xf3e
+ || c == 0xf3f
+ || (c >= 0xf71 && c <= 0xf84)
+ || (c >= 0xf86 && c <= 0xf8b)
+ || (c >= 0xf90 && c <= 0xf95)
+ || c == 0xf97
+ || (c >= 0xf99 && c <= 0xfad)
+ || (c >= 0xfb1 && c <= 0xfb7)
+ || c == 0xfb9
+ || (c >= 0x20d0 && c <= 0x20dc)
+ || c == 0x20e1
+ || (c >= 0x302a && c <= 0x302f)
+ || c == 0x3099
+ || c == 0x309a
+ || (c >= 0x30 && c <= 0x39)
+ || (c >= 0x660 && c <= 0x669)
+ || (c >= 0x6f0 && c <= 0x6f9)
+ || (c >= 0x966 && c <= 0x96f)
+ || (c >= 0x9e6 && c <= 0x9ef)
+ || (c >= 0xa66 && c <= 0xa6f)
+ || (c >= 0xae6 && c <= 0xaef)
+ || (c >= 0xb66 && c <= 0xb6f)
+ || (c >= 0xbe7 && c <= 0xbef)
+ || (c >= 0xc66 && c <= 0xc6f)
+ || (c >= 0xce6 && c <= 0xcef)
+ || (c >= 0xd66 && c <= 0xd6f)
+ || (c >= 0xe50 && c <= 0xe59)
+ || (c >= 0xed0 && c <= 0xed9)
+ || (c >= 0xf20 && c <= 0xf29)
+ || c == 0xb7
+ || c == 0x2d0
+ || c == 0x2d1
+ || c == 0x387
+ || c == 0x640
+ || c == 0xe46
+ || c == 0xec6
+ || c == 0x3005
+ || (c >= 0x3031 && c <= 0x3035)
+ || (c >= 0x309d && c <= 0x309e) || (c >= 0x30fc && c
<= 0x30fe));
+ }
+
+ /**
+ * Is the given character a single or double quote?
+ * @param c char
+ * @return <code>true</code> if c is " or '
+ */
+ static boolean isQuote(int c)
+ {
+ return (c == '\'' || c == '\"');
+ }
+
+ /**
+ * Should always be able convert to/from UTF-8, so encoding exceptions are converted
to an Error to avoid adding
+ * throws declarations in lots of methods.
+ * @param str String
+ * @return utf8 bytes
+ * @see String#getBytes()
+ */
+ public static byte[] getBytes(String str)
+ {
+ try
+ {
+ return str.getBytes("UTF8");
+ }
+ catch (java.io.UnsupportedEncodingException e)
+ {
+ throw new Error(Messages.getMessage(Messages.STRING_CONVERSION_ERROR,
e.getMessage()));
+ }
+ }
+
+ /**
+ * Should always be able convert to/from UTF-8, so encoding exceptions are converted
to an Error to avoid adding
+ * throws declarations in lots of methods.
+ * @param bytes byte array
+ * @param offset starting offset in byte array
+ * @param length length in byte array starting from offset
+ * @return same as <code>new String(bytes, offset, length,
"UTF8")</code>
+ */
+ public static String getString(byte[] bytes, int offset, int length)
+ {
+ try
+ {
+ return new String(bytes, offset, length, "UTF8");
+ }
+ catch (java.io.UnsupportedEncodingException e)
+ {
+ throw new Error(Messages.getMessage(Messages.UTF_CONVERSION_ERROR,
e.getMessage()));
+ }
+ }
+
+ /**
+ * Return the last char in string. This is useful when trailing quotemark is missing
on an attribute
+ * @param str String
+ * @return last char in String
+ */
+ public static int lastChar(String str)
+ {
+ if (str != null && str.length() > 0)
+ {
+ return str.charAt(str.length() - 1);
+ }
+
+ return 0;
+ }
+
+ /**
+ * Determines if the specified character is whitespace.
+ * @param c char
+ * @return <code>true</code> if char is whitespace.
+ */
+ public static boolean isWhite(char c)
+ {
+ short m = map(c);
+ return TidyUtils.toBoolean(m & WHITE);
+ }
+
+ /**
+ * Is the given char a digit?
+ * @param c char
+ * @return <code>true</code> if the given char is a digit
+ */
+ public static boolean isDigit(char c)
+ {
+ short m;
+ m = map(c);
+ return TidyUtils.toBoolean(m & DIGIT);
+ }
+
+ /**
+ * Is the given char a letter?
+ * @param c char
+ * @return <code>true</code> if the given char is a letter
+ */
+ public static boolean isLetter(char c)
+ {
+ short m;
+ m = map(c);
+ return TidyUtils.toBoolean(m & LETTER);
+ }
+
+ /**
+ * Is the given char valid in name? (letter, digit or "-", ".",
":", "_")
+ * @param c char
+ * @return <code>true</code> if char is a name char.
+ */
+ public static boolean isNamechar(char c)
+ {
+ short map = map(c);
+
+ return TidyUtils.toBoolean(map & NAMECHAR);
+ }
+
+ /**
+ * Determines if the specified character is a lowercase character.
+ * @param c char
+ * @return <code>true</code> if char is lower case.
+ */
+ public static boolean isLower(char c)
+ {
+ short map = map(c);
+
+ return TidyUtils.toBoolean(map & LOWERCASE);
+ }
+
+ /**
+ * Determines if the specified character is a uppercase character.
+ * @param c char
+ * @return <code>true</code> if char is upper case.
+ */
+ public static boolean isUpper(char c)
+ {
+ short map = map(c);
+
+ return TidyUtils.toBoolean(map & UPPERCASE);
+ }
+
+ /**
+ * Maps the given character to its lowercase equivalent.
+ * @param c char
+ * @return lowercase char.
+ */
+ public static char toLower(char c)
+ {
+ short m = map(c);
+
+ if (TidyUtils.toBoolean(m & UPPERCASE))
+ {
+ c = (char) (c + 'a' - 'A');
+ }
+
+ return c;
+ }
+
+ /**
+ * Maps the given character to its uppercase equivalent.
+ * @param c char
+ * @return uppercase char.
+ */
+ public static char toUpper(char c)
+ {
+ short m = map(c);
+
+ if (TidyUtils.toBoolean(m & LOWERCASE))
+ {
+ c = (char) (c + 'A' - 'a');
+ }
+
+ return c;
+ }
+
+ /**
+ * Fold case of a char.
+ * @param c char
+ * @param tocaps convert to caps
+ * @param xmlTags use xml tags? If true no change will be performed
+ * @return folded char
+ * @todo check the use of xmlTags parameter
+ */
+ public static char foldCase(char c, boolean tocaps, boolean xmlTags)
+ {
+
+ if (!xmlTags)
+ {
+
+ if (tocaps)
+ {
+ if (isLower(c))
+ {
+ c = toUpper(c);
+ }
+ }
+ else
+ {
+ // force to lower case
+ if (isUpper(c))
+ {
+ c = toLower(c);
+ }
+ }
+ }
+
+ return c;
+ }
+
+ /**
+ * Classify chars in String and put them in lexmap.
+ * @param str String
+ * @param code code associated to chars in the String
+ */
+ private static void mapStr(String str, short code)
+ {
+ int c;
+ for (int i = 0; i < str.length(); i++)
+ {
+ c = str.charAt(i);
+ lexmap[c] |= code;
+ }
+ }
+
+ /**
+ * Returns the constant which defines the classification of char in lexmap.
+ * @param c char
+ * @return char type
+ */
+ private static short map(char c)
+ {
+ return (c < 128 ? lexmap[c] : 0);
+ }
+
+ /**
+ * Is the given character encoding supported?
+ * @param name character encoding name
+ * @return <code>true</code> if encoding is supported, false otherwhise.
+ */
+ public static boolean isCharEncodingSupported(String name)
+ {
+ name = EncodingNameMapper.toJava(name);
+ if (name == null)
+ {
+ return false;
+ }
+
+ try
+ {
+ "".getBytes(name);
+ }
+ catch (java.io.UnsupportedEncodingException e)
+ {
+ return false;
+ }
+ return true;
+ }
+}
\ No newline at end of file
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/ValidUTF8Sequence.java
===================================================================
---
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/ValidUTF8Sequence.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/ValidUTF8Sequence.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,98 @@
+/*
+ * Java HTML Tidy - JTidy
+ * HTML parser and pretty printer
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ * Contributing Author(s):
+ *
+ * Dave Raggett <dsr(a)w3.org>
+ * Andy Quick <ac.quick(a)sympatico.ca> (translation to Java)
+ * Gary L Peskin <garyp(a)firstech.com> (Java development)
+ * Sami Lempinen <sami(a)lempinen.net> (release management)
+ * Fabrizio Giustina <fgiust at users.sourceforge.net>
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ * 2. Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * 3. This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ */
+package org.ajax4jsf.org.w3c.tidy;
+
+/**
+ * @author Fabrizio Giustina (translation from c)
+ * @version $Revision: 1.1.2.1 $ ($Author: alexsmirnov $)
+ */
+public class ValidUTF8Sequence
+{
+
+ /**
+ * low char.
+ */
+ int lowChar;
+
+ /**
+ * high char.
+ */
+ int highChar;
+
+ /**
+ * number of bytes.
+ */
+ int numBytes;
+
+ /**
+ * array of valid bytes.
+ */
+ char[] validBytes = new char[8];
+
+ /**
+ * Instantiates a new ValidUTF8Sequence.
+ * @param lowChar low utf8 char
+ * @param highChar high utf8 char
+ * @param numBytes number of bytes in the sequence
+ * @param validBytes valid bytes array
+ */
+ public ValidUTF8Sequence(int lowChar, int highChar, int numBytes, char[] validBytes)
+ {
+ this.lowChar = lowChar;
+ this.highChar = highChar;
+ this.numBytes = numBytes;
+ this.validBytes = validBytes;
+ }
+
+}
Added:
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/package-info.java
===================================================================
---
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/package-info.java
(rev 0)
+++
branches/jsf2.0/framework/impl/src/main/java/org/ajax4jsf/org/w3c/tidy/package-info.java 2009-07-07
17:08:12 UTC (rev 14813)
@@ -0,0 +1,4 @@
+/**
+ * Customized implementation of Tidy library
+ */
+package org.ajax4jsf.org.w3c.tidy;