Author: abelevich
Date: 2008-11-04 06:32:25 -0500 (Tue, 04 Nov 2008)
New Revision: 11009
Removed:
trunk/sandbox/ui/editor/html-seamtext.g
Log:
moved to the src/main/antlr
Deleted: trunk/sandbox/ui/editor/html-seamtext.g
===================================================================
--- trunk/sandbox/ui/editor/html-seamtext.g 2008-11-04 11:27:53 UTC (rev 11008)
+++ trunk/sandbox/ui/editor/html-seamtext.g 2008-11-04 11:32:25 UTC (rev 11009)
@@ -1,1070 +0,0 @@
-header
-{
- package org.richfaces.antlr;
-}
-
-class HtmlSeamTextParser extends Parser;
-
-options
-{
- k=4;
- defaultErrorHandler=false;
-}
-
-{
- public class HtmlRecognitionException extends RecognitionException {
- Token openingElement;
- RecognitionException wrappedException;
-
- public HtmlRecognitionException(Token openingElement, RecognitionException
wrappedException) {
- this.openingElement = openingElement;
- this.wrappedException = wrappedException;
- }
-
- public Token getOpeningElement() {
- return openingElement;
- }
-
- public String getMessage() {
- return wrappedException.getMessage();
- }
-
- public Throwable getCause() {
- return wrappedException;
- }
- }
-
-
- public interface Sanitizer {
-
- public void validateLinkTagURI(Token element, String uri) throws
SemanticException;
- public void validateHtmlElement(Token element) throws SemanticException;
- public void validateHtmlAttribute(Token element, Token attribute) throws
SemanticException;
- public void validateHtmlAttributeValue(Token element, Token attribute, String
attributeValue) throws SemanticException;
- public void escapeSeamTextToken(Token element);
- public String getInvalidURIMessage(String uri);
- public String getInvalidElementMessage(String elementName);
- public String getInvalidAttributeMessage(String elementName, String
attributeName);
- public String getInvalidAttributeValueMessage(String elementName, String
attributeName, String value);
-
- }
-
- public static class DefaultSanitizer implements HtmlSeamTextParser.Sanitizer {
-
- protected java.util.Set<String> attributesWhoseValueIsAURI = new
java.util.HashSet(java.util.Arrays.asList(
- "action", "cite", "href", "longdesc",
"src", "xlink:href", "xml:base"
- ));
-
-
- protected java.util.Set<String> uriSchemes = new
java.util.HashSet(java.util.Arrays.asList(
- "afs", "aim", "callto", "ed2k",
"feed", "ftp", "gopher", "http",
"https",
- "irc", "mailto", "news", "nntp",
"rsync", "rtsp", "sftp", "ssh", "tag",
- "tel", "telnet", "urn", "webcal",
"wtai", "xmpp"
- ));
-
- protected java.util.Set<String> acceptableElements = new
java.util.HashSet(java.util.Arrays.asList(
- "a", "abbr", "acronym", "address",
"area", "b", "bdo", "big",
"blockquote",
- "br", "button", "caption", "center",
"cite", "code", "col", "colgroup",
"dd",
- "del", "dfn", "dir", "div",
"dl", "dt", "em", "fieldset", "font",
"form",
- "h1", "h2", "h3", "h4",
"h5", "h6", "hr", "i", "img",
"input", "ins", "kbd",
- "label", "legend", "li", "map",
"menu", "ol", "optgroup", "option",
"p",
- "pre", "q", "s", "samp",
"select", "small", "span", "strike",
"strong",
- "sub", "sup", "table", "tbody",
"td", "textarea", "tfoot", "th",
"thead",
- "tr", "tt", "u", "ul",
"var", "wbr"
- ));
-
- protected java.util.Set<String> mathmlElements = new
java.util.HashSet(java.util.Arrays.asList(
- "maction", "math", "merror", "mfrac",
"mi", "mmultiscripts", "mn", "mo",
- "mover", "mpadded", "mphantom",
"mprescripts", "mroot", "mrow", "mspace",
- "msqrt", "mstyle", "msub", "msubsup",
"msup", "mtable", "mtd", "mtext",
- "mtr", "munder", "munderover",
"none"
- ));
-
- protected java.util.Set<String> svgElements = new
java.util.HashSet(java.util.Arrays.asList(
- "a", "animate", "animateColor",
"animateMotion", "animateTransform",
- "circle", "defs", "desc", "ellipse",
"font-face", "font-face-name",
- "font-face-src", "g", "glyph",
"hkern", "image", "line", "linearGradient",
- "marker", "metadata", "missing-glyph",
"mpath", "path", "polygon",
- "polyline", "radialGradient", "rect",
"set", "stop", "svg", "switch", "text",
- "title", "tspan", "use"
- ));
-
- protected java.util.Set<String> acceptableAttributes = new
java.util.HashSet(java.util.Arrays.asList(
- "abbr", "accept", "accept-charset",
"accesskey", "action", "align", "alt",
- "axis", "border", "cellpadding",
"cellspacing", "char", "charoff", "charset",
- "checked", "cite", "class", "clear",
"color", "cols", "colspan", "compact",
- "coords", "datetime", "dir",
"disabled", "enctype", "for", "frame",
- "headers", "height", "href",
"hreflang", "hspace", "id", "ismap",
"label",
- "lang", "longdesc", "maxlength",
"media", "method", "multiple", "name",
- "nohref", "noshade", "nowrap",
"prompt", "readonly", "rel", "rev",
"rows",
- "rowspan", "rules", "scope",
"selected", "shape", "size", "span",
"src",
- "start", "style", "summary",
"tabindex", "target", "title", "type",
"usemap",
- "valign", "value", "vspace", "width",
"xml:lang"
- ));
-
- protected java.util.Set<String> mathmlAttributes = new
java.util.HashSet(java.util.Arrays.asList(
- "actiontype", "align", "columnalign",
"columnalign", "columnalign",
- "columnlines", "columnspacing", "columnspan",
"depth", "display",
- "displaystyle", "equalcolumns", "equalrows",
"fence", "fontstyle",
- "fontweight", "frame", "height",
"linethickness", "lspace", "mathbackground",
- "mathcolor", "mathvariant", "mathvariant",
"maxsize", "minsize", "other",
- "rowalign", "rowalign", "rowalign",
"rowlines", "rowspacing", "rowspan",
- "rspace", "scriptlevel", "selection",
"separator", "stretchy", "width",
- "width", "xlink:href", "xlink:show",
"xlink:type", "xmlns", "xmlns:xlink"
- ));
-
-
- protected java.util.Set<String> svgAttributes = new
java.util.HashSet(java.util.Arrays.asList(
- "accent-height", "accumulate", "additive",
"alphabetic", "arabic-form",
- "ascent", "attributeName", "attributeType",
"baseProfile", "bbox", "begin",
- "by", "calcMode", "cap-height",
"class", "color", "color-rendering",
- "content", "cx", "cy", "d",
"descent", "display", "dur", "dx", "dy",
"end",
- "fill", "fill-rule", "font-family",
"font-size", "font-stretch",
- "font-style", "font-variant", "font-weight",
"from", "fx", "fy", "g1", "g2",
- "glyph-name", "gradientUnits", "hanging",
"height", "horiz-adv-x",
- "horiz-origin-x", "id", "ideographic",
"k", "keyPoints", "keySplines",
- "keyTimes", "lang", "marker-end",
"marker-mid", "marker-start",
- "markerHeight", "markerUnits", "markerWidth",
"mathematical", "max", "min",
- "name", "offset", "opacity",
"orient", "origin", "overline-position",
- "overline-thickness", "panose-1", "path",
"pathLength", "points",
- "preserveAspectRatio", "r", "refX",
"refY", "repeatCount", "repeatDur",
- "requiredExtensions", "requiredFeatures",
"restart", "rotate", "rx", "ry",
- "slope", "stemh", "stemv",
"stop-color", "stop-opacity",
- "strikethrough-position", "strikethrough-thickness",
"stroke",
- "stroke-dasharray", "stroke-dashoffset",
"stroke-linecap", "stroke-linejoin",
- "stroke-miterlimit", "stroke-opacity",
"stroke-width", "systemLanguage",
- "target", "text-anchor", "to",
"transform", "type", "u1", "u2",
- "underline-position", "underline-thickness",
"unicode", "unicode-range",
- "units-per-em", "values", "version",
"viewBox", "visibility", "width",
- "widths", "x", "x-height", "x1",
"x2", "xlink:actuate", "xlink:arcrole",
- "xlink:href", "xlink:role", "xlink:show",
"xlink:title", "xlink:type",
- "xml:base", "xml:lang", "xml:space",
"xmlns", "xmlns:xlink", "y", "y1",
"y2",
- "zoomAndPan"
- ));
-
- public final java.util.regex.Pattern REGEX_VALID_CSS_VALUE =
java.util.regex.Pattern.compile(
-
"^(#[0-9a-f]{3,6}|rgb\\(\\d{1,3}%?,\\d{1,3}%?,?\\d{1,3}%?\\)?|-?\\d{0,2}\\.?\\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\\))?)$"
- );
-
- public final java.util.regex.Pattern REGEX_VALID_CSS_STRING1 =
java.util.regex.Pattern.compile(
-
"^([-:,;#%.\\sa-zA-Z0-9!]|\\w-\\w|'[\\s\\w]+'|\"[\\s\\w]+\"|\\([\\d,\\s]+\\))*$"
- );
-
- public final java.util.regex.Pattern REGEX_VALID_CSS_STRING2 =
java.util.regex.Pattern.compile(
- "^(\\s*[-\\w]+\\s*:\\s*[^:;]*(;|$))*$"
- );
-
-
- protected java.util.Set<String> styleProperties = new
java.util.HashSet(java.util.Arrays.asList(
- "azimuth",
- "background", "background-attachment",
"background-color", "background-image",
- "background-position", "background-repeat",
- "border", "border-bottom",
"border-bottom-color", "border-bottom-style",
- "border-bottom-width", "border-collapse",
"border-color", "border-left",
- "border-left-color", "border-left-style",
"border-left-width", "border-right",
- "border-right-color", "border-right-style",
"border-right-width", "border-spacing",
- "border-style", "border-top",
"border-top-color", "border-top-style",
- "border-top-width", "border-width",
- "clear", "color",
- "cursor", "direction", "display",
"elevation", "float", "font",
- "font-family", "font-size", "font-style",
"font-variant", "font-weight",
- "height", "letter-spacing", "line-height",
- "margin", "margin-bottom", "margin-left",
"margin-right", "margin-top",
- "max-height", "max-width", "min-height",
"min-width",
- "overflow",
- "padding", "padding-bottom", "padding-left",
"padding-right", "padding-top",
- "pause", "pause-after", "pause-before",
"pitch",
- "pitch-range", "richness", "speak",
"speak-header", "speak-numeral",
- "speak-punctuation", "speech-rate", "stress",
"text-align",
- "text-decoration", "text-indent",
"unicode-bidi", "vertical-align",
- "voice-family", "volume", "white-space",
"width"
- ));
-
-
- protected java.util.Set<String> stylePropertiesValues = new
java.util.HashSet(java.util.Arrays.asList(
- "aqua", "auto", "baseline", "black",
"block", "blue", "bold", "both",
"bottom", "brown",
- "center", "collapse", "dashed",
"dotted", "fuchsia", "gray", "green",
- "inherit", "italic", "left",
"length", "lime", "maroon", "medium",
"middle", "navy", "none", "normal",
- "nowrap", "olive", "percentage",
"pointer", "purple", "red", "right",
"silver", "solid", "sub", "super",
- "teal", "text-bottom", "text-top",
"top", "transparent", "underline", "white",
"yellow"
- ));
-
- protected java.util.Set<String> svgStyleProperties = new
java.util.HashSet(java.util.Arrays.asList(
- "fill", "fill-opacity", "fill-rule",
"stroke", "stroke-linecap",
- "stroke-linejoin", "stroke-opacity",
"stroke-width"
- ));
-
-
- public void validateLinkTagURI(Token element, String uri) throws SemanticException
{
- if (!validateURI(uri)) {
- throw createSemanticException("Invalid URI", element);
- }
- }
-
- public void validateHtmlElement(Token element) throws SemanticException {
- String elementName = element.getText().toLowerCase();
- if (!acceptableElements.contains(elementName) &&
- !svgElements.contains(elementName) &&
- !mathmlElements.contains(elementName)) {
- throw createSemanticException(getInvalidElementMessage(elementName),
element);
- }
- }
-
- public void validateHtmlAttribute(Token element, Token attribute) throws
SemanticException {
- String elementName = element.getText().toLowerCase();
- String attributeName = attribute.getText().toLowerCase();
- if (!acceptableAttributes.contains(attributeName) &&
- !svgAttributes.contains(attributeName) &&
- !mathmlAttributes.contains(attributeName)) {
- throw createSemanticException(getInvalidAttributeMessage(elementName,
attributeName), element);
- }
-
- }
-
- public void validateHtmlAttributeValue(Token element, Token attribute, String
attributeValue) throws SemanticException{
-
- if (attributeValue == null || attributeValue.length() == 0) return;
-
- String elementName = element.getText().toLowerCase();
- String attributeName = attribute.getText().toLowerCase();
-
- // Check element with attribute that has URI value (href, src, etc.)
- if (attributesWhoseValueIsAURI.contains(attributeName) &&
!validateURI(attributeValue)) {
- throw createSemanticException(getInvalidURIMessage(attributeValue),
element);
- }
-
- // Check attribute value of style (CSS filtering)
- if (attributeName.equals("style")) {
- if (!REGEX_VALID_CSS_STRING1.matcher(attributeValue).matches() ||
- !REGEX_VALID_CSS_STRING2.matcher(attributeValue).matches()) {
- throw createSemanticException(
- getInvalidAttributeValueMessage(elementName, attributeName,
attributeValue),
- element
- );
- }
-
- String[] cssProperties = attributeValue.split(";");
- for (String cssProperty : cssProperties) {
- if (!cssProperty.contains(":")) {
- throw createSemanticException(
- getInvalidAttributeValueMessage(elementName, attributeName,
attributeValue),
- element
- );
- }
- String[] property = cssProperty.split(":");
- String propertyName = property[0].trim();
- String propertyValue = property.length == 2 ? property[1].trim() :
null;
-
- // CSS property name
- if (!styleProperties.contains(propertyName) &&
- !svgStyleProperties.contains(propertyName)) {
- throw createSemanticException(
- getInvalidAttributeValueMessage(elementName, attributeName,
attributeValue),
- element
- );
- }
-
- // CSS property value
- if (propertyValue != null &&
!stylePropertiesValues.contains(propertyValue)) {
- // Not in list, now check the regex
- if (!REGEX_VALID_CSS_VALUE.matcher(propertyValue).matches()) {
- throw createSemanticException(
- getInvalidAttributeValueMessage(elementName,
attributeName, attributeValue),
- element
- );
- }
- }
- }
- }
-
-
- }
-
- public String getInvalidURIMessage(String uri){
- return "invalid URI";
- }
-
- public String getInvalidElementMessage(String elementName){
- return "invalid element '" + elementName + "'";
- }
-
- public String getInvalidAttributeMessage(String elementName, String
attributeName){
- return "invalid attribute '" + attributeName + "' for
element '" + elementName + "'";
- }
-
- public String getInvalidAttributeValueMessage(String elementName, String
attributeName, String value){
- return "invalid value of attribute '" + attributeName +
"' for element '" + elementName + "'";
- }
-
- public void escapeSeamTextToken(Token element) {
- }
-
- protected boolean validateURI(String uri) {
-
- // Relative URI starts with a slash
- if (uri.startsWith("/")) return true;
-
- java.net.URI parsedURI;
- try {
- parsedURI = new java.net.URI(uri);
- } catch (java.net.URISyntaxException ex) {
- return false;
- }
-
- if (!uriSchemes.contains(parsedURI.getScheme())) {
- return false;
- }
- return true;
- }
-
- public SemanticException createSemanticException(String message, Token element) {
- return new SemanticException(
- message,
- element.getFilename(), element.getLine(), element.getColumn()
- );
- }
-
- }
-
-
- private final String SEAMTEXT_MONOSPACE = "|";
-
- private final String SEAMTEXT_TWIDDLE = "~";
-
- private final String SEAMTEXT_HASH = "#";
-
- private final String SEAMTEXT_HAT = "^";
-
- private final String SEAMTEXT_PLUS = "+";
-
- private final String SEAMTEXT_STAR = "*";
-
- private final String SEAMTEXT_UNDERSCORE = "_";
-
- private final String SEAMTEXT_EQ = "=";
-
- private final String SEAMTEXT_BACKTICK = "`";
-
- private final String SEAMTEXT_PARAGRAPH = "\n\n";
-
- private final String SEAM_DOUBLEQUOTE = "\"";
-
- private final String SEAM_OPEN = "[";
-
- private final String SEAM_CLOSE = "]";
-
- private final String SEAM_GT = ">";
-
-
- protected java.util.Set<String> seamTextSymbols = new
java.util.HashSet(java.util.Arrays.asList(
- SEAMTEXT_MONOSPACE, SEAMTEXT_TWIDDLE, SEAMTEXT_HASH, SEAMTEXT_HAT, SEAMTEXT_PLUS,
SEAMTEXT_STAR,
- SEAMTEXT_UNDERSCORE, SEAMTEXT_EQ, SEAMTEXT_BACKTICK, SEAMTEXT_PARAGRAPH,
SEAM_DOUBLEQUOTE, SEAM_OPEN,
- SEAM_CLOSE, SEAM_GT
- ));
-
- protected java.util.Set<String> htmlSeamTextElements = new
java.util.HashSet(java.util.Arrays.asList(
- "del", "sup", "pre","p", "q"
,"h1" ,"h2" ,"h3" ,"h4" ,"ul"
,"ol" ,"li" ,"i" ,"tt" ,"u"
,"a"));
-
- private Sanitizer sanitizer = new DefaultSanitizer();
-
- private java.util.Stack<Token> htmlElementStack = new
java.util.Stack<Token>();
-
- private StringBuilder mainBuilder = new StringBuilder();
-
- private StringBuilder linkValueCollector = new StringBuilder();
-
- private String linkHolder;
-
- private StringBuilder builder = mainBuilder;
-
-
- public void setSanitizer(Sanitizer sanitizer) {
- this.sanitizer = sanitizer;
- }
-
- public String toString() {
- return builder.toString();
- }
-
- private void beginCapture() {
- builder = new StringBuilder();
- }
-
- private String endCapture() {
- String result = builder.toString();
- builder = mainBuilder;
- return result;
- }
-
- private void append(String... strings) {
- for (String string: strings) builder.append(string);
- }
-
-
- public boolean isLink(Token token) {
- String name = token.getText().toLowerCase();
- return "a".equals(name);
- }
-
- private String createSeamTextLink(String link, String value) {
-
- StringBuilder builder = new StringBuilder();
- builder.append("[");
-
- if (value != null) {
- builder.append(value.trim());
- }
-
- builder.append("=>");
- builder.append(link);
- builder.append("]");
-
- return builder.toString();
-
- }
-
- public boolean isHeader(Token token) {
- String name = token.getText().toLowerCase();
- return ("h1".equals(name) || "h2".equals(name) ||
"h3".equals(name) || "h4".equals(name));
- }
-
- public String createSeamTextHeader(Token token) throws SemanticException {
-
- String name = token.getText();
- StringBuilder seamHeader = new StringBuilder();
-
- if("h1".equals(name)) {
- seamHeader.append("\n").append(SEAMTEXT_PLUS);
- } else if("h2".equals(name)) {
- seamHeader.append("\n").append(SEAMTEXT_PLUS).append(SEAMTEXT_PLUS);
- } else if("h3".equals(name)) {
-
seamHeader.append("\n").append(SEAMTEXT_PLUS).append(SEAMTEXT_PLUS).append(SEAMTEXT_PLUS);
- } else if("h4".equals(name)) {
-
seamHeader.append("\n").append(SEAMTEXT_PLUS).append(SEAMTEXT_PLUS).append(SEAMTEXT_PLUS).append(SEAMTEXT_PLUS);
- }
-
- return seamHeader.toString();
- }
-
- public boolean isList(Token token){
- String name = token.getText();
- return ("ul".equals(name) || "ol".equals(name));
- }
-
- public boolean isListItem(Token token) {
- String name = token.getText().toLowerCase();
- return "li".equals(name);
- }
-
- public String createSeamTextList(Token token, java.util.Stack <Token>
htmlElementStack) throws SemanticException {
- String seamText = null;
-
- Token parent = htmlElementStack.peek();
- String parentName = parent.getText().toLowerCase();
- if(parentName.equals("ul")) {
- seamText = SEAMTEXT_HASH;
- } else if (parentName.equals("ol")){
- seamText = SEAMTEXT_EQ;
- } else {
- String message = "<li> must follow <ol> or <ul> not <"
+ parent.getText() + ">";
- throw new SemanticException( message, parent.getFilename(), parent.getLine(),
parent.getColumn());
- }
-
- return seamText != null ? seamText : "";
- }
-
- // validate lists and headers'
- public void validateNestedMarkup(Token name, java.util.Stack <Token>
htmlElementStack) throws SemanticException {
- if(!htmlElementStack.isEmpty()) {
- for(Token token : htmlElementStack) {
-
- if(token.getText().equals("h1") ||
token.getText().equals("h2") || token.getText().equals("h3")
- || token.getText().equals("h4")
|| token.getText().equals("ol") || token.getText().equals("ul")) {
- String message = "<" + token.getText() + "> contains nested
<" + name.getText() + "> token";
- throw new SemanticException( message, name.getFilename(), name.getLine(),
name.getColumn());
- }
- }
- }
-
- }
-
- public void validateHeaderMarkup(Token token, java.util.Stack <Token>
htmlElementStack) throws TokenStreamException, SemanticException{
- int EOF = 1;
- int ALPHANUMERICWORD = 4;
- int i = 0;
- int type;
-
- boolean containText = false;
-
- Token element = htmlElementStack.peek();
- String header = element.getText();
-
- if (header.equals("h1") || header.equals("h2") ||
header.equals("h3") || header.equals("h4")) {
- do {
- i++;
- type = LT(i).getType();
- if (type == ALPHANUMERICWORD) {
- containText = true;
- break;
- }
-
- } while ( type != EOF);
-
- if (!containText) {
- String message = "You must have some text following a heading";
- throw new SemanticException( message, element.getFilename(), element.getLine(),
element.getColumn());
- }
- }
-
- }
-
- public String createSimpleSeamText(Token token) throws SemanticException{
-
- String name = token.getText().toLowerCase();
- StringBuilder seamText = new StringBuilder();
-
- if("tt".equals(name)) {
- seamText.append(SEAMTEXT_MONOSPACE);
- } else if("del".equals(name)) {
- seamText.append(SEAMTEXT_TWIDDLE);
- } else if("i".equals(name)) {
- seamText.append(SEAMTEXT_STAR);
- } else if("sup".equals(name)) {
- seamText.append(SEAMTEXT_HAT);
- } else if("u".equals(name)) {
- seamText.append(SEAMTEXT_UNDERSCORE);
- } else if("pre".equals(name)) {
- seamText.append(SEAMTEXT_BACKTICK);
- } else if("p".equals(name)) {
- seamText.append(SEAMTEXT_PARAGRAPH);
- } else if("q".equals(name)) {
- seamText.append(SEAM_DOUBLEQUOTE);
- } else if("blockquote".equals(name)) {
- seamText.append(SEAM_DOUBLEQUOTE);
- }
-
- return seamText.toString();
-
- }
-
- public boolean isSeamTextElement(Token element){
- String name = element.getText().toLowerCase();
- return htmlSeamTextElements.contains(name);
- }
-
-
- public String escapeSeamText(Token token, java.util.Stack <Token>
parentHtmlTokens) throws TokenStreamException {
-
- StringBuilder result = new StringBuilder();
- String tokenName = token.getText();
-
- if(parentHtmlTokens != null && !parentHtmlTokens.isEmpty()){
- Token parentToken = parentHtmlTokens.peek();
- String parentTokenName = parentToken.getText().toLowerCase();
-
- if ("tt".equals(parentTokenName) ||
"pre".equals(parentTokenName)) {
-
- if ("<".equals(tokenName)) {
- result.append("<");
- } else if("&".equals(tokenName)) {
- result.append("&");
- } else if (">".equals(tokenName)) {
- result.append(">");
- } else if(""".equals(tokenName)){
- result.append("\"");
- }else if(seamTextSymbols.contains(tokenName)) {
- result.append(tokenName);
- }
- }
- }
- result = result.length() != 0 ? result :
result.append("\\").append(tokenName);
- return result.toString();
- }
-
-
-
-}
-
-
-startRule: (newline)* (text eof)?
- ;
-
-text: ((seamCharacters|plain|html|htmlSpecialChars) (newline)*)+
- ;
-
-word: an:ALPHANUMERICWORD { append( an.getText() ); } | uc:UNICODEWORD { append(
uc.getText() ); }
- ;
-
-htmlSpecialChars:
- DOUBLEQUOTE { append("\""); }
- | lt:ESCAPED_LT {append(escapeSeamText(lt, htmlElementStack));}
- | gt:ESCAPED_GT {append(escapeSeamText(gt, htmlElementStack));}
- | amp:ESCAPED_AMP {append(escapeSeamText(amp, htmlElementStack));}
- | qout:ESCAPED_QOUT {append(escapeSeamText(qout, htmlElementStack));}
-
- ;
-eof: EOF;
-
-punctuation: p:PUNCTUATION { append( p.getText() ); }
- | sq:SINGLEQUOTE { append( sq.getText() ); }
- | s:SLASH { append( s.getText() ); }
- ;
-
-specialChars:
- st:STAR {append( st.getText() ); }
- | b:BAR { append( b.getText() ); }
- | h:HAT { append( h.getText() ); }
- | p:PLUS { append( p.getText() ); }
- | eq:EQ { append( eq.getText() ); }
- | hh:HASH { append( hh.getText() ); }
- | e:ESCAPE { append( e.getText() ); }
- | t:TWIDDLE { append( t.getText() ); }
- | u:UNDERSCORE { append( u.getText() ); }
- ;
-
-
-seamCharacters:
- hat:HAT {append(escapeSeamText(hat, htmlElementStack));}
- | hash:HASH {append(escapeSeamText(hash, htmlElementStack));}
- | open:OPEN {append(escapeSeamText(open, htmlElementStack)) ;}
- | close:CLOSE {append(escapeSeamText(close, htmlElementStack));}
- | twiddle:TWIDDLE {append(escapeSeamText(twiddle, htmlElementStack));}
- | bar:BAR {append(escapeSeamText(bar, htmlElementStack));}
- | eq:EQ {append(escapeSeamText(eq, htmlElementStack));}
- | plus:PLUS {append(escapeSeamText(plus, htmlElementStack));}
- | backtick:BACKTICK {append(escapeSeamText(backtick, htmlElementStack));}
- | st:STAR {append(escapeSeamText(st, htmlElementStack));}
- | e:ESCAPE {append(escapeSeamText(e, htmlElementStack));}
- | gt:GT {append(escapeSeamText(gt, htmlElementStack));}
- ;
-
-space: s:SPACE {
-
- if(!htmlElementStack.isEmpty()) {
- String tokenName = htmlElementStack.peek().getText();
- if(!("ul".equals(tokenName) || "ol".equals(tokenName))) {
- append(s.getText());
- }
-
- } else {
- append(s.getText());
- }
-
- }
- ;
-
-newline: n:NEWLINE { append(n.getText());}
-
-
- ;
-
-newlineOrEof: newline | EOF
- ;
-
-html: openTag ( space | space attribute )* ( ( beforeBody body closeTagWithBody ) |
closeTagWithNoBody)
- ;
-
-plain: (word|punctuation|space)
- ;
-
-body: (
-
- {
- Token token = htmlElementStack.peek();
- boolean isLink = isLink(token);
- linkValueCollector = new StringBuilder();
- }
- (
-
- seamCharacters|
-
-
- { if(isLink) {
- beginCapture();
- }
-
- }
- plain
- {
- if(isLink) {
- String plain = endCapture();
- linkValueCollector.append(plain);
- }
-
- }
-
- |html
- |htmlSpecialChars
- {
- if(isLink) {
- String message = "unexpected token";
- throw new SemanticException(message);
- }
- }
- |newline: NEWLINE )*)
- ;
-
-openTag:
- LT name:ALPHANUMERICWORD
- {
-
-
- sanitizer.validateHtmlElement(name);
-
- if (isSeamTextElement(name)) {
- if (isList(name)) {
- validateNestedMarkup(name, htmlElementStack);
- } else if (isListItem(name)) {
- append(createSeamTextList(name,htmlElementStack));
- } else if (isHeader(name)) {
- validateNestedMarkup(name, htmlElementStack);
- append(createSeamTextHeader(name));
- }
-
- } else {
- append("<");
- append(name.getText());
- }
- htmlElementStack.push(name);
-
- }
- ;
- exception
- catch [RecognitionException ex] {
- if (htmlElementStack.isEmpty()) throw ex;
- Token tok = htmlElementStack.peek();
- if (tok != null) {
- throw new HtmlRecognitionException(tok, ex);
- } else {
- throw ex;
- }
- }
-
-
-beforeBody: GT {
- Token name = htmlElementStack.peek();
- if(isSeamTextElement(name)){
- append(createSimpleSeamText(name));
- } else {
- append(">");
- }
- }
- ;
- exception
- catch [RecognitionException ex] {
- if (htmlElementStack.isEmpty()) throw ex;
- Token tok = htmlElementStack.peek();
- if (tok != null) {
- throw new HtmlRecognitionException(tok, ex);
- } else {
- throw ex;
- }
- }
-
-closeTagWithBody:
- LT SLASH name:ALPHANUMERICWORD GT
- {
- if(isSeamTextElement(name)){
- if(isLink(name)){
- append(createSeamTextLink(linkHolder,linkValueCollector.toString().trim()));
- } else {
- append(createSimpleSeamText(name));
- }
-
- if(isListItem(name) || isHeader(name)) {
- append("\n");
- validateHeaderMarkup(name,htmlElementStack);
- }
-
- } else {
- append("</");
- append(name.getText());
- append(">");
- }
-
-
- htmlElementStack.pop();
- }
- ;
-
-closeTagWithNoBody:
- SLASH GT
- { append("/>");
- htmlElementStack.pop();
- }
- ;
-
-attribute: att:ALPHANUMERICWORD (space)* EQ (space)*
- DOUBLEQUOTE
- {
- Token token = htmlElementStack.peek();
- sanitizer.validateHtmlAttribute(token, att);
- boolean isSeamTextProcessed = isSeamTextElement(token);
-
- if (!isSeamTextProcessed) {
- append(att.getText());
- append("=\"");
- }
- beginCapture();
- }
- attributeValue
- {
- String attValue = endCapture();
- sanitizer.validateHtmlAttributeValue(token, att, attValue);
-
- if (!isSeamTextProcessed) {
- append(attValue);
- } else if(isLink(token) && "href".equals(att.getText()))
{
-
- linkHolder = attValue;
- }
-
- }
- DOUBLEQUOTE {
- if(!isSeamTextProcessed) {
- append("\"");
- }
- }
- ;
- exception
- catch [RecognitionException ex] {
- if (htmlElementStack.isEmpty()) throw ex;
- Token tok = htmlElementStack.peek();
- if (tok != null) {
- throw new HtmlRecognitionException(tok, ex);
- } else {
- throw ex;
- }
- }
-
-attributeValue: ( AMPERSAND { append("&"); } |
- an:ALPHANUMERICWORD { append( an.getText() ); } |
- p:PUNCTUATION { append( p.getText() ); } |
- s:SLASH { append( s.getText() ); } |
- space|specialChars )*
- ;
- exception
- catch [RecognitionException ex] {
- if (htmlElementStack.isEmpty()) throw ex;
- Token tok = htmlElementStack.peek();
- if (tok != null) {
- throw new HtmlRecognitionException(tok, ex);
- } else {
- throw ex;
- }
- }
-
-class HtmlSeamTextLexer extends Lexer;
-
-options
-{
- k=2;
-
- // Allow any char but \uFFFF (16 bit -1)
- charVocabulary='\u0000'..'\uFFFE';
-}
-
-
-// Unicode sets allowed:
-// '\u00a0'..'\u00ff' Latin 1 supplement (no control characters)
http://www.unicode.org/charts/PDF/U0080.pdf
-// '\u0100'..'\u017f' Latin Extended A
http://www.unicode.org/charts/PDF/U0100.pdf
-// '\u0180'..'\u024f' Latin Extended B
http://www.unicode.org/charts/PDF/U0180.pdf
-// '\u0250'..'\ufaff' Various other languages, punctuation etc.
(excluding "presentation forms")
-// '\uff00'..'\uffef' Halfwidth and Fullwidth forms (including CJK
punctuation)
-
-ALPHANUMERICWORD
- options {
- paraphrase = "letters or digits";
- }
- : ('a'..'z'|'A'..'Z'|'0'..'9')+
- ;
-
-UNICODEWORD
- options {
- paraphrase = "letters or digits";
- }
- : (
- '\u00a0'..'\u00ff' |
- '\u0100'..'\u017f' |
- '\u0180'..'\u024f' |
- '\u0250'..'\ufaff' |
- '\uff00'..'\uffef'
- )+
- ;
-
-PUNCTUATION
- options {
- paraphrase = "a punctuation character";
- }
- : '-' | ';' | ':' | '(' | ')' | '{' |
'}' | '?' | '!' | '@' | '%' | '.' |
',' | '$'
- ;
-
-EQ
- options {
- paraphrase = "an equals '='";
- }
- : '='
- ;
-
-PLUS
- options {
- paraphrase = "a plus '+'";
- }
- : '+'
- ;
-
-UNDERSCORE
- options {
- paraphrase = "an underscore '_'";
- }
- : '_'
- ;
-
-STAR
- options {
- paraphrase = "a star '*'";
- }
- : '*'
- ;
-
-SLASH
- options {
- paraphrase = "a slash '/'";
- }
-
- : '/'
- ;
-
-ESCAPE
- options {
- paraphrase = "the escaping blackslash '\'";
- }
- : '\\'
- ;
-
-BAR
- options {
- paraphrase = "a bar or pipe '|'";
- }
- : '|'
- ;
-
-BACKTICK
- options {
- paraphrase = "a backtick '`'";
- }
- : '`'
- ;
-
-
-TWIDDLE
- options {
- paraphrase = "a tilde '~'";
- }
- : '~'
- ;
-
-DOUBLEQUOTE
- options {
- paraphrase = "a doublequote \"";
- }
- : '"'
- ;
-
-SINGLEQUOTE
- options {
- paraphrase = "a single quote '";
- }
- : '\''
- ;
-
-OPEN
- options {
- paraphrase = "an opening square bracket '['";
- }
- : '['
- ;
-
-CLOSE
- options {
- paraphrase = "a closing square bracket ']'";
- }
- : ']'
- ;
-
-HASH
- options {
- paraphrase = "a hash '#'";
- }
- : '#'
- ;
-
-HAT
- options {
- paraphrase = "a caret '^'";
- }
- : '^'
- ;
-
-GT
- options {
- paraphrase = "a closing angle bracket '>'";
- }
- : '>'
- ;
-
-LT
- options {
- paraphrase = "an opening angle bracket '<'";
- }
- : '<'
- ;
-
-AMPERSAND
- options {
- paraphrase = "an ampersand '&'";
- }
- : '&'
- ;
-
-SPACE
- options {
- paraphrase = "a space or tab";
- }
- : (' '|'\t')+
- ;
-
-NEWLINE
- options {
- paraphrase = "a newline";
- }
- : "\r\n" | '\r' | '\n'
- ;
-
-EOF
- options {
- paraphrase = "the end of the text";
- }
- : '\uFFFF'
- ;
-
-ESCAPED_LT : "<"
- ;
-
-ESCAPED_GT : ">"
- ;
-
-ESCAPED_AMP : "&"
- ;
-
-ESCAPED_QOUT : """
- ;
-
-
-
-
-
-
-
-
-