Author: abelevich
Date: 2008-11-04 06:34:08 -0500 (Tue, 04 Nov 2008)
New Revision: 11011
Added:
trunk/sandbox/ui/editor/src/main/antlr/
trunk/sandbox/ui/editor/src/main/antlr/html-seamtext.g
Log:
Added: trunk/sandbox/ui/editor/src/main/antlr/html-seamtext.g
===================================================================
--- trunk/sandbox/ui/editor/src/main/antlr/html-seamtext.g (rev
0)
+++ trunk/sandbox/ui/editor/src/main/antlr/html-seamtext.g 2008-11-04 11:34:08 UTC (rev
11011)
@@ -0,0 +1,1070 @@
+header
+{
+ package org.richfaces.antlr;
+}
+
+class HtmlSeamTextParser extends Parser;
+
+options
+{
+ k=4;
+ defaultErrorHandler=false;
+}
+
+{
+ public class HtmlRecognitionException extends RecognitionException {
+ Token openingElement;
+ RecognitionException wrappedException;
+
+ public HtmlRecognitionException(Token openingElement, RecognitionException
wrappedException) {
+ this.openingElement = openingElement;
+ this.wrappedException = wrappedException;
+ }
+
+ public Token getOpeningElement() {
+ return openingElement;
+ }
+
+ public String getMessage() {
+ return wrappedException.getMessage();
+ }
+
+ public Throwable getCause() {
+ return wrappedException;
+ }
+ }
+
+
+ public interface Sanitizer {
+
+ public void validateLinkTagURI(Token element, String uri) throws
SemanticException;
+ public void validateHtmlElement(Token element) throws SemanticException;
+ public void validateHtmlAttribute(Token element, Token attribute) throws
SemanticException;
+ public void validateHtmlAttributeValue(Token element, Token attribute, String
attributeValue) throws SemanticException;
+ public void escapeSeamTextToken(Token element);
+ public String getInvalidURIMessage(String uri);
+ public String getInvalidElementMessage(String elementName);
+ public String getInvalidAttributeMessage(String elementName, String
attributeName);
+ public String getInvalidAttributeValueMessage(String elementName, String
attributeName, String value);
+
+ }
+
+ public static class DefaultSanitizer implements HtmlSeamTextParser.Sanitizer {
+
+ protected java.util.Set<String> attributesWhoseValueIsAURI = new
java.util.HashSet(java.util.Arrays.asList(
+ "action", "cite", "href", "longdesc",
"src", "xlink:href", "xml:base"
+ ));
+
+
+ protected java.util.Set<String> uriSchemes = new
java.util.HashSet(java.util.Arrays.asList(
+ "afs", "aim", "callto", "ed2k",
"feed", "ftp", "gopher", "http",
"https",
+ "irc", "mailto", "news", "nntp",
"rsync", "rtsp", "sftp", "ssh", "tag",
+ "tel", "telnet", "urn", "webcal",
"wtai", "xmpp"
+ ));
+
+ protected java.util.Set<String> acceptableElements = new
java.util.HashSet(java.util.Arrays.asList(
+ "a", "abbr", "acronym", "address",
"area", "b", "bdo", "big",
"blockquote",
+ "br", "button", "caption", "center",
"cite", "code", "col", "colgroup",
"dd",
+ "del", "dfn", "dir", "div",
"dl", "dt", "em", "fieldset", "font",
"form",
+ "h1", "h2", "h3", "h4",
"h5", "h6", "hr", "i", "img",
"input", "ins", "kbd",
+ "label", "legend", "li", "map",
"menu", "ol", "optgroup", "option",
"p",
+ "pre", "q", "s", "samp",
"select", "small", "span", "strike",
"strong",
+ "sub", "sup", "table", "tbody",
"td", "textarea", "tfoot", "th",
"thead",
+ "tr", "tt", "u", "ul",
"var", "wbr"
+ ));
+
+ protected java.util.Set<String> mathmlElements = new
java.util.HashSet(java.util.Arrays.asList(
+ "maction", "math", "merror", "mfrac",
"mi", "mmultiscripts", "mn", "mo",
+ "mover", "mpadded", "mphantom",
"mprescripts", "mroot", "mrow", "mspace",
+ "msqrt", "mstyle", "msub", "msubsup",
"msup", "mtable", "mtd", "mtext",
+ "mtr", "munder", "munderover",
"none"
+ ));
+
+ protected java.util.Set<String> svgElements = new
java.util.HashSet(java.util.Arrays.asList(
+ "a", "animate", "animateColor",
"animateMotion", "animateTransform",
+ "circle", "defs", "desc", "ellipse",
"font-face", "font-face-name",
+ "font-face-src", "g", "glyph",
"hkern", "image", "line", "linearGradient",
+ "marker", "metadata", "missing-glyph",
"mpath", "path", "polygon",
+ "polyline", "radialGradient", "rect",
"set", "stop", "svg", "switch", "text",
+ "title", "tspan", "use"
+ ));
+
+ protected java.util.Set<String> acceptableAttributes = new
java.util.HashSet(java.util.Arrays.asList(
+ "abbr", "accept", "accept-charset",
"accesskey", "action", "align", "alt",
+ "axis", "border", "cellpadding",
"cellspacing", "char", "charoff", "charset",
+ "checked", "cite", "class", "clear",
"color", "cols", "colspan", "compact",
+ "coords", "datetime", "dir",
"disabled", "enctype", "for", "frame",
+ "headers", "height", "href",
"hreflang", "hspace", "id", "ismap",
"label",
+ "lang", "longdesc", "maxlength",
"media", "method", "multiple", "name",
+ "nohref", "noshade", "nowrap",
"prompt", "readonly", "rel", "rev",
"rows",
+ "rowspan", "rules", "scope",
"selected", "shape", "size", "span",
"src",
+ "start", "style", "summary",
"tabindex", "target", "title", "type",
"usemap",
+ "valign", "value", "vspace", "width",
"xml:lang"
+ ));
+
+ protected java.util.Set<String> mathmlAttributes = new
java.util.HashSet(java.util.Arrays.asList(
+ "actiontype", "align", "columnalign",
"columnalign", "columnalign",
+ "columnlines", "columnspacing", "columnspan",
"depth", "display",
+ "displaystyle", "equalcolumns", "equalrows",
"fence", "fontstyle",
+ "fontweight", "frame", "height",
"linethickness", "lspace", "mathbackground",
+ "mathcolor", "mathvariant", "mathvariant",
"maxsize", "minsize", "other",
+ "rowalign", "rowalign", "rowalign",
"rowlines", "rowspacing", "rowspan",
+ "rspace", "scriptlevel", "selection",
"separator", "stretchy", "width",
+ "width", "xlink:href", "xlink:show",
"xlink:type", "xmlns", "xmlns:xlink"
+ ));
+
+
+ protected java.util.Set<String> svgAttributes = new
java.util.HashSet(java.util.Arrays.asList(
+ "accent-height", "accumulate", "additive",
"alphabetic", "arabic-form",
+ "ascent", "attributeName", "attributeType",
"baseProfile", "bbox", "begin",
+ "by", "calcMode", "cap-height",
"class", "color", "color-rendering",
+ "content", "cx", "cy", "d",
"descent", "display", "dur", "dx", "dy",
"end",
+ "fill", "fill-rule", "font-family",
"font-size", "font-stretch",
+ "font-style", "font-variant", "font-weight",
"from", "fx", "fy", "g1", "g2",
+ "glyph-name", "gradientUnits", "hanging",
"height", "horiz-adv-x",
+ "horiz-origin-x", "id", "ideographic",
"k", "keyPoints", "keySplines",
+ "keyTimes", "lang", "marker-end",
"marker-mid", "marker-start",
+ "markerHeight", "markerUnits", "markerWidth",
"mathematical", "max", "min",
+ "name", "offset", "opacity",
"orient", "origin", "overline-position",
+ "overline-thickness", "panose-1", "path",
"pathLength", "points",
+ "preserveAspectRatio", "r", "refX",
"refY", "repeatCount", "repeatDur",
+ "requiredExtensions", "requiredFeatures",
"restart", "rotate", "rx", "ry",
+ "slope", "stemh", "stemv",
"stop-color", "stop-opacity",
+ "strikethrough-position", "strikethrough-thickness",
"stroke",
+ "stroke-dasharray", "stroke-dashoffset",
"stroke-linecap", "stroke-linejoin",
+ "stroke-miterlimit", "stroke-opacity",
"stroke-width", "systemLanguage",
+ "target", "text-anchor", "to",
"transform", "type", "u1", "u2",
+ "underline-position", "underline-thickness",
"unicode", "unicode-range",
+ "units-per-em", "values", "version",
"viewBox", "visibility", "width",
+ "widths", "x", "x-height", "x1",
"x2", "xlink:actuate", "xlink:arcrole",
+ "xlink:href", "xlink:role", "xlink:show",
"xlink:title", "xlink:type",
+ "xml:base", "xml:lang", "xml:space",
"xmlns", "xmlns:xlink", "y", "y1",
"y2",
+ "zoomAndPan"
+ ));
+
+ public final java.util.regex.Pattern REGEX_VALID_CSS_VALUE =
java.util.regex.Pattern.compile(
+
"^(#[0-9a-f]{3,6}|rgb\\(\\d{1,3}%?,\\d{1,3}%?,?\\d{1,3}%?\\)?|-?\\d{0,2}\\.?\\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\\))?)$"
+ );
+
+ public final java.util.regex.Pattern REGEX_VALID_CSS_STRING1 =
java.util.regex.Pattern.compile(
+
"^([-:,;#%.\\sa-zA-Z0-9!]|\\w-\\w|'[\\s\\w]+'|\"[\\s\\w]+\"|\\([\\d,\\s]+\\))*$"
+ );
+
+ public final java.util.regex.Pattern REGEX_VALID_CSS_STRING2 =
java.util.regex.Pattern.compile(
+ "^(\\s*[-\\w]+\\s*:\\s*[^:;]*(;|$))*$"
+ );
+
+
+ protected java.util.Set<String> styleProperties = new
java.util.HashSet(java.util.Arrays.asList(
+ "azimuth",
+ "background", "background-attachment",
"background-color", "background-image",
+ "background-position", "background-repeat",
+ "border", "border-bottom",
"border-bottom-color", "border-bottom-style",
+ "border-bottom-width", "border-collapse",
"border-color", "border-left",
+ "border-left-color", "border-left-style",
"border-left-width", "border-right",
+ "border-right-color", "border-right-style",
"border-right-width", "border-spacing",
+ "border-style", "border-top",
"border-top-color", "border-top-style",
+ "border-top-width", "border-width",
+ "clear", "color",
+ "cursor", "direction", "display",
"elevation", "float", "font",
+ "font-family", "font-size", "font-style",
"font-variant", "font-weight",
+ "height", "letter-spacing", "line-height",
+ "margin", "margin-bottom", "margin-left",
"margin-right", "margin-top",
+ "max-height", "max-width", "min-height",
"min-width",
+ "overflow",
+ "padding", "padding-bottom", "padding-left",
"padding-right", "padding-top",
+ "pause", "pause-after", "pause-before",
"pitch",
+ "pitch-range", "richness", "speak",
"speak-header", "speak-numeral",
+ "speak-punctuation", "speech-rate", "stress",
"text-align",
+ "text-decoration", "text-indent",
"unicode-bidi", "vertical-align",
+ "voice-family", "volume", "white-space",
"width"
+ ));
+
+
+ protected java.util.Set<String> stylePropertiesValues = new
java.util.HashSet(java.util.Arrays.asList(
+ "aqua", "auto", "baseline", "black",
"block", "blue", "bold", "both",
"bottom", "brown",
+ "center", "collapse", "dashed",
"dotted", "fuchsia", "gray", "green",
+ "inherit", "italic", "left",
"length", "lime", "maroon", "medium",
"middle", "navy", "none", "normal",
+ "nowrap", "olive", "percentage",
"pointer", "purple", "red", "right",
"silver", "solid", "sub", "super",
+ "teal", "text-bottom", "text-top",
"top", "transparent", "underline", "white",
"yellow"
+ ));
+
+ protected java.util.Set<String> svgStyleProperties = new
java.util.HashSet(java.util.Arrays.asList(
+ "fill", "fill-opacity", "fill-rule",
"stroke", "stroke-linecap",
+ "stroke-linejoin", "stroke-opacity",
"stroke-width"
+ ));
+
+
+ public void validateLinkTagURI(Token element, String uri) throws SemanticException
{
+ if (!validateURI(uri)) {
+ throw createSemanticException("Invalid URI", element);
+ }
+ }
+
+ public void validateHtmlElement(Token element) throws SemanticException {
+ String elementName = element.getText().toLowerCase();
+ if (!acceptableElements.contains(elementName) &&
+ !svgElements.contains(elementName) &&
+ !mathmlElements.contains(elementName)) {
+ throw createSemanticException(getInvalidElementMessage(elementName),
element);
+ }
+ }
+
+ public void validateHtmlAttribute(Token element, Token attribute) throws
SemanticException {
+ String elementName = element.getText().toLowerCase();
+ String attributeName = attribute.getText().toLowerCase();
+ if (!acceptableAttributes.contains(attributeName) &&
+ !svgAttributes.contains(attributeName) &&
+ !mathmlAttributes.contains(attributeName)) {
+ throw createSemanticException(getInvalidAttributeMessage(elementName,
attributeName), element);
+ }
+
+ }
+
+ public void validateHtmlAttributeValue(Token element, Token attribute, String
attributeValue) throws SemanticException{
+
+ if (attributeValue == null || attributeValue.length() == 0) return;
+
+ String elementName = element.getText().toLowerCase();
+ String attributeName = attribute.getText().toLowerCase();
+
+ // Check element with attribute that has URI value (href, src, etc.)
+ if (attributesWhoseValueIsAURI.contains(attributeName) &&
!validateURI(attributeValue)) {
+ throw createSemanticException(getInvalidURIMessage(attributeValue),
element);
+ }
+
+ // Check attribute value of style (CSS filtering)
+ if (attributeName.equals("style")) {
+ if (!REGEX_VALID_CSS_STRING1.matcher(attributeValue).matches() ||
+ !REGEX_VALID_CSS_STRING2.matcher(attributeValue).matches()) {
+ throw createSemanticException(
+ getInvalidAttributeValueMessage(elementName, attributeName,
attributeValue),
+ element
+ );
+ }
+
+ String[] cssProperties = attributeValue.split(";");
+ for (String cssProperty : cssProperties) {
+ if (!cssProperty.contains(":")) {
+ throw createSemanticException(
+ getInvalidAttributeValueMessage(elementName, attributeName,
attributeValue),
+ element
+ );
+ }
+ String[] property = cssProperty.split(":");
+ String propertyName = property[0].trim();
+ String propertyValue = property.length == 2 ? property[1].trim() :
null;
+
+ // CSS property name
+ if (!styleProperties.contains(propertyName) &&
+ !svgStyleProperties.contains(propertyName)) {
+ throw createSemanticException(
+ getInvalidAttributeValueMessage(elementName, attributeName,
attributeValue),
+ element
+ );
+ }
+
+ // CSS property value
+ if (propertyValue != null &&
!stylePropertiesValues.contains(propertyValue)) {
+ // Not in list, now check the regex
+ if (!REGEX_VALID_CSS_VALUE.matcher(propertyValue).matches()) {
+ throw createSemanticException(
+ getInvalidAttributeValueMessage(elementName,
attributeName, attributeValue),
+ element
+ );
+ }
+ }
+ }
+ }
+
+
+ }
+
+ public String getInvalidURIMessage(String uri){
+ return "invalid URI";
+ }
+
+ public String getInvalidElementMessage(String elementName){
+ return "invalid element '" + elementName + "'";
+ }
+
+ public String getInvalidAttributeMessage(String elementName, String
attributeName){
+ return "invalid attribute '" + attributeName + "' for
element '" + elementName + "'";
+ }
+
+ public String getInvalidAttributeValueMessage(String elementName, String
attributeName, String value){
+ return "invalid value of attribute '" + attributeName +
"' for element '" + elementName + "'";
+ }
+
+ public void escapeSeamTextToken(Token element) {
+ }
+
+ protected boolean validateURI(String uri) {
+
+ // Relative URI starts with a slash
+ if (uri.startsWith("/")) return true;
+
+ java.net.URI parsedURI;
+ try {
+ parsedURI = new java.net.URI(uri);
+ } catch (java.net.URISyntaxException ex) {
+ return false;
+ }
+
+ if (!uriSchemes.contains(parsedURI.getScheme())) {
+ return false;
+ }
+ return true;
+ }
+
+ public SemanticException createSemanticException(String message, Token element) {
+ return new SemanticException(
+ message,
+ element.getFilename(), element.getLine(), element.getColumn()
+ );
+ }
+
+ }
+
+
+ private final String SEAMTEXT_MONOSPACE = "|";
+
+ private final String SEAMTEXT_TWIDDLE = "~";
+
+ private final String SEAMTEXT_HASH = "#";
+
+ private final String SEAMTEXT_HAT = "^";
+
+ private final String SEAMTEXT_PLUS = "+";
+
+ private final String SEAMTEXT_STAR = "*";
+
+ private final String SEAMTEXT_UNDERSCORE = "_";
+
+ private final String SEAMTEXT_EQ = "=";
+
+ private final String SEAMTEXT_BACKTICK = "`";
+
+ private final String SEAMTEXT_PARAGRAPH = "\n\n";
+
+ private final String SEAM_DOUBLEQUOTE = "\"";
+
+ private final String SEAM_OPEN = "[";
+
+ private final String SEAM_CLOSE = "]";
+
+ private final String SEAM_GT = ">";
+
+
+ protected java.util.Set<String> seamTextSymbols = new
java.util.HashSet(java.util.Arrays.asList(
+ SEAMTEXT_MONOSPACE, SEAMTEXT_TWIDDLE, SEAMTEXT_HASH, SEAMTEXT_HAT, SEAMTEXT_PLUS,
SEAMTEXT_STAR,
+ SEAMTEXT_UNDERSCORE, SEAMTEXT_EQ, SEAMTEXT_BACKTICK, SEAMTEXT_PARAGRAPH,
SEAM_DOUBLEQUOTE, SEAM_OPEN,
+ SEAM_CLOSE, SEAM_GT
+ ));
+
+ protected java.util.Set<String> htmlSeamTextElements = new
java.util.HashSet(java.util.Arrays.asList(
+ "del", "sup", "pre","p", "q"
,"h1" ,"h2" ,"h3" ,"h4" ,"ul"
,"ol" ,"li" ,"i" ,"tt" ,"u"
,"a"));
+
+ private Sanitizer sanitizer = new DefaultSanitizer();
+
+ private java.util.Stack<Token> htmlElementStack = new
java.util.Stack<Token>();
+
+ private StringBuilder mainBuilder = new StringBuilder();
+
+ private StringBuilder linkValueCollector = new StringBuilder();
+
+ private String linkHolder;
+
+ private StringBuilder builder = mainBuilder;
+
+
+ public void setSanitizer(Sanitizer sanitizer) {
+ this.sanitizer = sanitizer;
+ }
+
+ public String toString() {
+ return builder.toString();
+ }
+
+ private void beginCapture() {
+ builder = new StringBuilder();
+ }
+
+ private String endCapture() {
+ String result = builder.toString();
+ builder = mainBuilder;
+ return result;
+ }
+
+ private void append(String... strings) {
+ for (String string: strings) builder.append(string);
+ }
+
+
+ public boolean isLink(Token token) {
+ String name = token.getText().toLowerCase();
+ return "a".equals(name);
+ }
+
+ private String createSeamTextLink(String link, String value) {
+
+ StringBuilder builder = new StringBuilder();
+ builder.append("[");
+
+ if (value != null) {
+ builder.append(value.trim());
+ }
+
+ builder.append("=>");
+ builder.append(link);
+ builder.append("]");
+
+ return builder.toString();
+
+ }
+
+ public boolean isHeader(Token token) {
+ String name = token.getText().toLowerCase();
+ return ("h1".equals(name) || "h2".equals(name) ||
"h3".equals(name) || "h4".equals(name));
+ }
+
+ public String createSeamTextHeader(Token token) throws SemanticException {
+
+ String name = token.getText();
+ StringBuilder seamHeader = new StringBuilder();
+
+ if("h1".equals(name)) {
+ seamHeader.append("\n").append(SEAMTEXT_PLUS);
+ } else if("h2".equals(name)) {
+ seamHeader.append("\n").append(SEAMTEXT_PLUS).append(SEAMTEXT_PLUS);
+ } else if("h3".equals(name)) {
+
seamHeader.append("\n").append(SEAMTEXT_PLUS).append(SEAMTEXT_PLUS).append(SEAMTEXT_PLUS);
+ } else if("h4".equals(name)) {
+
seamHeader.append("\n").append(SEAMTEXT_PLUS).append(SEAMTEXT_PLUS).append(SEAMTEXT_PLUS).append(SEAMTEXT_PLUS);
+ }
+
+ return seamHeader.toString();
+ }
+
+ public boolean isList(Token token){
+ String name = token.getText();
+ return ("ul".equals(name) || "ol".equals(name));
+ }
+
+ public boolean isListItem(Token token) {
+ String name = token.getText().toLowerCase();
+ return "li".equals(name);
+ }
+
+ public String createSeamTextList(Token token, java.util.Stack <Token>
htmlElementStack) throws SemanticException {
+ String seamText = null;
+
+ Token parent = htmlElementStack.peek();
+ String parentName = parent.getText().toLowerCase();
+ if(parentName.equals("ul")) {
+ seamText = SEAMTEXT_HASH;
+ } else if (parentName.equals("ol")){
+ seamText = SEAMTEXT_EQ;
+ } else {
+ String message = "<li> must follow <ol> or <ul> not <"
+ parent.getText() + ">";
+ throw new SemanticException( message, parent.getFilename(), parent.getLine(),
parent.getColumn());
+ }
+
+ return seamText != null ? seamText : "";
+ }
+
+ // validate lists and headers'
+ public void validateNestedMarkup(Token name, java.util.Stack <Token>
htmlElementStack) throws SemanticException {
+ if(!htmlElementStack.isEmpty()) {
+ for(Token token : htmlElementStack) {
+
+ if(token.getText().equals("h1") ||
token.getText().equals("h2") || token.getText().equals("h3")
+ || token.getText().equals("h4")
|| token.getText().equals("ol") || token.getText().equals("ul")) {
+ String message = "<" + token.getText() + "> contains nested
<" + name.getText() + "> token";
+ throw new SemanticException( message, name.getFilename(), name.getLine(),
name.getColumn());
+ }
+ }
+ }
+
+ }
+
+ public void validateHeaderMarkup(Token token, java.util.Stack <Token>
htmlElementStack) throws TokenStreamException, SemanticException{
+ int EOF = 1;
+ int ALPHANUMERICWORD = 4;
+ int i = 0;
+ int type;
+
+ boolean containText = false;
+
+ Token element = htmlElementStack.peek();
+ String header = element.getText();
+
+ if (header.equals("h1") || header.equals("h2") ||
header.equals("h3") || header.equals("h4")) {
+ do {
+ i++;
+ type = LT(i).getType();
+ if (type == ALPHANUMERICWORD) {
+ containText = true;
+ break;
+ }
+
+ } while ( type != EOF);
+
+ if (!containText) {
+ String message = "You must have some text following a heading";
+ throw new SemanticException( message, element.getFilename(), element.getLine(),
element.getColumn());
+ }
+ }
+
+ }
+
+ public String createSimpleSeamText(Token token) throws SemanticException{
+
+ String name = token.getText().toLowerCase();
+ StringBuilder seamText = new StringBuilder();
+
+ if("tt".equals(name)) {
+ seamText.append(SEAMTEXT_MONOSPACE);
+ } else if("del".equals(name)) {
+ seamText.append(SEAMTEXT_TWIDDLE);
+ } else if("i".equals(name)) {
+ seamText.append(SEAMTEXT_STAR);
+ } else if("sup".equals(name)) {
+ seamText.append(SEAMTEXT_HAT);
+ } else if("u".equals(name)) {
+ seamText.append(SEAMTEXT_UNDERSCORE);
+ } else if("pre".equals(name)) {
+ seamText.append(SEAMTEXT_BACKTICK);
+ } else if("p".equals(name)) {
+ seamText.append(SEAMTEXT_PARAGRAPH);
+ } else if("q".equals(name)) {
+ seamText.append(SEAM_DOUBLEQUOTE);
+ } else if("blockquote".equals(name)) {
+ seamText.append(SEAM_DOUBLEQUOTE);
+ }
+
+ return seamText.toString();
+
+ }
+
+ public boolean isSeamTextElement(Token element){
+ String name = element.getText().toLowerCase();
+ return htmlSeamTextElements.contains(name);
+ }
+
+
+ public String escapeSeamText(Token token, java.util.Stack <Token>
parentHtmlTokens) throws TokenStreamException {
+
+ StringBuilder result = new StringBuilder();
+ String tokenName = token.getText();
+
+ if(parentHtmlTokens != null && !parentHtmlTokens.isEmpty()){
+ Token parentToken = parentHtmlTokens.peek();
+ String parentTokenName = parentToken.getText().toLowerCase();
+
+ if ("tt".equals(parentTokenName) ||
"pre".equals(parentTokenName)) {
+
+ if ("<".equals(tokenName)) {
+ result.append("<");
+ } else if("&".equals(tokenName)) {
+ result.append("&");
+ } else if (">".equals(tokenName)) {
+ result.append(">");
+ } else if(""".equals(tokenName)){
+ result.append("\"");
+ }else if(seamTextSymbols.contains(tokenName)) {
+ result.append(tokenName);
+ }
+ }
+ }
+ result = result.length() != 0 ? result :
result.append("\\").append(tokenName);
+ return result.toString();
+ }
+
+
+
+}
+
+
+startRule: (newline)* (text eof)?
+ ;
+
+text: ((seamCharacters|plain|html|htmlSpecialChars) (newline)*)+
+ ;
+
+word: an:ALPHANUMERICWORD { append( an.getText() ); } | uc:UNICODEWORD { append(
uc.getText() ); }
+ ;
+
+htmlSpecialChars:
+ DOUBLEQUOTE { append("\""); }
+ | lt:ESCAPED_LT {append(escapeSeamText(lt, htmlElementStack));}
+ | gt:ESCAPED_GT {append(escapeSeamText(gt, htmlElementStack));}
+ | amp:ESCAPED_AMP {append(escapeSeamText(amp, htmlElementStack));}
+ | qout:ESCAPED_QOUT {append(escapeSeamText(qout, htmlElementStack));}
+
+ ;
+eof: EOF;
+
+punctuation: p:PUNCTUATION { append( p.getText() ); }
+ | sq:SINGLEQUOTE { append( sq.getText() ); }
+ | s:SLASH { append( s.getText() ); }
+ ;
+
+specialChars:
+ st:STAR {append( st.getText() ); }
+ | b:BAR { append( b.getText() ); }
+ | h:HAT { append( h.getText() ); }
+ | p:PLUS { append( p.getText() ); }
+ | eq:EQ { append( eq.getText() ); }
+ | hh:HASH { append( hh.getText() ); }
+ | e:ESCAPE { append( e.getText() ); }
+ | t:TWIDDLE { append( t.getText() ); }
+ | u:UNDERSCORE { append( u.getText() ); }
+ ;
+
+
+seamCharacters:
+ hat:HAT {append(escapeSeamText(hat, htmlElementStack));}
+ | hash:HASH {append(escapeSeamText(hash, htmlElementStack));}
+ | open:OPEN {append(escapeSeamText(open, htmlElementStack)) ;}
+ | close:CLOSE {append(escapeSeamText(close, htmlElementStack));}
+ | twiddle:TWIDDLE {append(escapeSeamText(twiddle, htmlElementStack));}
+ | bar:BAR {append(escapeSeamText(bar, htmlElementStack));}
+ | eq:EQ {append(escapeSeamText(eq, htmlElementStack));}
+ | plus:PLUS {append(escapeSeamText(plus, htmlElementStack));}
+ | backtick:BACKTICK {append(escapeSeamText(backtick, htmlElementStack));}
+ | st:STAR {append(escapeSeamText(st, htmlElementStack));}
+ | e:ESCAPE {append(escapeSeamText(e, htmlElementStack));}
+ | gt:GT {append(escapeSeamText(gt, htmlElementStack));}
+ ;
+
+space: s:SPACE {
+
+ if(!htmlElementStack.isEmpty()) {
+ String tokenName = htmlElementStack.peek().getText();
+ if(!("ul".equals(tokenName) || "ol".equals(tokenName))) {
+ append(s.getText());
+ }
+
+ } else {
+ append(s.getText());
+ }
+
+ }
+ ;
+
+newline: n:NEWLINE { append(n.getText());}
+
+
+ ;
+
+newlineOrEof: newline | EOF
+ ;
+
+html: openTag ( space | space attribute )* ( ( beforeBody body closeTagWithBody ) |
closeTagWithNoBody)
+ ;
+
+plain: (word|punctuation|space)
+ ;
+
+body: (
+
+ {
+ Token token = htmlElementStack.peek();
+ boolean isLink = isLink(token);
+ linkValueCollector = new StringBuilder();
+ }
+ (
+
+ seamCharacters|
+
+
+ { if(isLink) {
+ beginCapture();
+ }
+
+ }
+ plain
+ {
+ if(isLink) {
+ String plain = endCapture();
+ linkValueCollector.append(plain);
+ }
+
+ }
+
+ |html
+ |htmlSpecialChars
+ {
+ if(isLink) {
+ String message = "unexpected token";
+ throw new SemanticException(message);
+ }
+ }
+ |newline: NEWLINE )*)
+ ;
+
+openTag:
+ LT name:ALPHANUMERICWORD
+ {
+
+
+ sanitizer.validateHtmlElement(name);
+
+ if (isSeamTextElement(name)) {
+ if (isList(name)) {
+ validateNestedMarkup(name, htmlElementStack);
+ } else if (isListItem(name)) {
+ append(createSeamTextList(name,htmlElementStack));
+ } else if (isHeader(name)) {
+ validateNestedMarkup(name, htmlElementStack);
+ append(createSeamTextHeader(name));
+ }
+
+ } else {
+ append("<");
+ append(name.getText());
+ }
+ htmlElementStack.push(name);
+
+ }
+ ;
+ exception
+ catch [RecognitionException ex] {
+ if (htmlElementStack.isEmpty()) throw ex;
+ Token tok = htmlElementStack.peek();
+ if (tok != null) {
+ throw new HtmlRecognitionException(tok, ex);
+ } else {
+ throw ex;
+ }
+ }
+
+
+beforeBody: GT {
+ Token name = htmlElementStack.peek();
+ if(isSeamTextElement(name)){
+ append(createSimpleSeamText(name));
+ } else {
+ append(">");
+ }
+ }
+ ;
+ exception
+ catch [RecognitionException ex] {
+ if (htmlElementStack.isEmpty()) throw ex;
+ Token tok = htmlElementStack.peek();
+ if (tok != null) {
+ throw new HtmlRecognitionException(tok, ex);
+ } else {
+ throw ex;
+ }
+ }
+
+closeTagWithBody:
+ LT SLASH name:ALPHANUMERICWORD GT
+ {
+ if(isSeamTextElement(name)){
+ if(isLink(name)){
+ append(createSeamTextLink(linkHolder,linkValueCollector.toString().trim()));
+ } else {
+ append(createSimpleSeamText(name));
+ }
+
+ if(isListItem(name) || isHeader(name)) {
+ append("\n");
+ validateHeaderMarkup(name,htmlElementStack);
+ }
+
+ } else {
+ append("</");
+ append(name.getText());
+ append(">");
+ }
+
+
+ htmlElementStack.pop();
+ }
+ ;
+
+closeTagWithNoBody:
+ SLASH GT
+ { append("/>");
+ htmlElementStack.pop();
+ }
+ ;
+
+attribute: att:ALPHANUMERICWORD (space)* EQ (space)*
+ DOUBLEQUOTE
+ {
+ Token token = htmlElementStack.peek();
+ sanitizer.validateHtmlAttribute(token, att);
+ boolean isSeamTextProcessed = isSeamTextElement(token);
+
+ if (!isSeamTextProcessed) {
+ append(att.getText());
+ append("=\"");
+ }
+ beginCapture();
+ }
+ attributeValue
+ {
+ String attValue = endCapture();
+ sanitizer.validateHtmlAttributeValue(token, att, attValue);
+
+ if (!isSeamTextProcessed) {
+ append(attValue);
+ } else if(isLink(token) && "href".equals(att.getText()))
{
+
+ linkHolder = attValue;
+ }
+
+ }
+ DOUBLEQUOTE {
+ if(!isSeamTextProcessed) {
+ append("\"");
+ }
+ }
+ ;
+ exception
+ catch [RecognitionException ex] {
+ if (htmlElementStack.isEmpty()) throw ex;
+ Token tok = htmlElementStack.peek();
+ if (tok != null) {
+ throw new HtmlRecognitionException(tok, ex);
+ } else {
+ throw ex;
+ }
+ }
+
+attributeValue: ( AMPERSAND { append("&"); } |
+ an:ALPHANUMERICWORD { append( an.getText() ); } |
+ p:PUNCTUATION { append( p.getText() ); } |
+ s:SLASH { append( s.getText() ); } |
+ space|specialChars )*
+ ;
+ exception
+ catch [RecognitionException ex] {
+ if (htmlElementStack.isEmpty()) throw ex;
+ Token tok = htmlElementStack.peek();
+ if (tok != null) {
+ throw new HtmlRecognitionException(tok, ex);
+ } else {
+ throw ex;
+ }
+ }
+
+class HtmlSeamTextLexer extends Lexer;
+
+options
+{
+ k=2;
+
+ // Allow any char but \uFFFF (16 bit -1)
+ charVocabulary='\u0000'..'\uFFFE';
+}
+
+
+// Unicode sets allowed:
+// '\u00a0'..'\u00ff' Latin 1 supplement (no control characters)
http://www.unicode.org/charts/PDF/U0080.pdf
+// '\u0100'..'\u017f' Latin Extended A
http://www.unicode.org/charts/PDF/U0100.pdf
+// '\u0180'..'\u024f' Latin Extended B
http://www.unicode.org/charts/PDF/U0180.pdf
+// '\u0250'..'\ufaff' Various other languages, punctuation etc.
(excluding "presentation forms")
+// '\uff00'..'\uffef' Halfwidth and Fullwidth forms (including CJK
punctuation)
+
+ALPHANUMERICWORD
+ options {
+ paraphrase = "letters or digits";
+ }
+ : ('a'..'z'|'A'..'Z'|'0'..'9')+
+ ;
+
+UNICODEWORD
+ options {
+ paraphrase = "letters or digits";
+ }
+ : (
+ '\u00a0'..'\u00ff' |
+ '\u0100'..'\u017f' |
+ '\u0180'..'\u024f' |
+ '\u0250'..'\ufaff' |
+ '\uff00'..'\uffef'
+ )+
+ ;
+
+PUNCTUATION
+ options {
+ paraphrase = "a punctuation character";
+ }
+ : '-' | ';' | ':' | '(' | ')' | '{' |
'}' | '?' | '!' | '@' | '%' | '.' |
',' | '$'
+ ;
+
+EQ
+ options {
+ paraphrase = "an equals '='";
+ }
+ : '='
+ ;
+
+PLUS
+ options {
+ paraphrase = "a plus '+'";
+ }
+ : '+'
+ ;
+
+UNDERSCORE
+ options {
+ paraphrase = "an underscore '_'";
+ }
+ : '_'
+ ;
+
+STAR
+ options {
+ paraphrase = "a star '*'";
+ }
+ : '*'
+ ;
+
+SLASH
+ options {
+ paraphrase = "a slash '/'";
+ }
+
+ : '/'
+ ;
+
+ESCAPE
+ options {
+ paraphrase = "the escaping blackslash '\'";
+ }
+ : '\\'
+ ;
+
+BAR
+ options {
+ paraphrase = "a bar or pipe '|'";
+ }
+ : '|'
+ ;
+
+BACKTICK
+ options {
+ paraphrase = "a backtick '`'";
+ }
+ : '`'
+ ;
+
+
+TWIDDLE
+ options {
+ paraphrase = "a tilde '~'";
+ }
+ : '~'
+ ;
+
+DOUBLEQUOTE
+ options {
+ paraphrase = "a doublequote \"";
+ }
+ : '"'
+ ;
+
+SINGLEQUOTE
+ options {
+ paraphrase = "a single quote '";
+ }
+ : '\''
+ ;
+
+OPEN
+ options {
+ paraphrase = "an opening square bracket '['";
+ }
+ : '['
+ ;
+
+CLOSE
+ options {
+ paraphrase = "a closing square bracket ']'";
+ }
+ : ']'
+ ;
+
+HASH
+ options {
+ paraphrase = "a hash '#'";
+ }
+ : '#'
+ ;
+
+HAT
+ options {
+ paraphrase = "a caret '^'";
+ }
+ : '^'
+ ;
+
+GT
+ options {
+ paraphrase = "a closing angle bracket '>'";
+ }
+ : '>'
+ ;
+
+LT
+ options {
+ paraphrase = "an opening angle bracket '<'";
+ }
+ : '<'
+ ;
+
+AMPERSAND
+ options {
+ paraphrase = "an ampersand '&'";
+ }
+ : '&'
+ ;
+
+SPACE
+ options {
+ paraphrase = "a space or tab";
+ }
+ : (' '|'\t')+
+ ;
+
+NEWLINE
+ options {
+ paraphrase = "a newline";
+ }
+ : "\r\n" | '\r' | '\n'
+ ;
+
+EOF
+ options {
+ paraphrase = "the end of the text";
+ }
+ : '\uFFFF'
+ ;
+
+ESCAPED_LT : "<"
+ ;
+
+ESCAPED_GT : ">"
+ ;
+
+ESCAPED_AMP : "&"
+ ;
+
+ESCAPED_QOUT : """
+ ;
+
+
+
+
+
+
+
+
+