[seam-commits] Seam SVN: r8317 - trunk.
seam-commits at lists.jboss.org
seam-commits at lists.jboss.org
Mon Jun 2 07:47:36 EDT 2008
Author: christian.bauer at jboss.com
Date: 2008-06-02 07:47:36 -0400 (Mon, 02 Jun 2008)
New Revision: 8317
Modified:
trunk/seam-text.g
Log:
JBSEAM-3058, customizable HTML/CSS sanitization for SeamTextParser
Modified: trunk/seam-text.g
===================================================================
--- trunk/seam-text.g 2008-06-02 10:50:09 UTC (rev 8316)
+++ trunk/seam-text.g 2008-06-02 11:47:36 UTC (rev 8317)
@@ -6,24 +6,359 @@
class SeamTextParser extends Parser;
options
{
- k=4;
- defaultErrorHandler=false;
+ k=4;
+ defaultErrorHandler=false;
}
{
- private java.util.Set htmlElements = new java.util.HashSet( java.util.Arrays.asList( new String[] { "a", "p", "q", "blockquote", "code", "pre", "table", "tr", "td", "th", "ul", "ol", "li", "b", "i", "u", "tt", "del", "em", "hr", "br", "div", "span", "h1", "h2", "h3", "h4", "img"} ) );
- private java.util.Set htmlAttributes = new java.util.HashSet( java.util.Arrays.asList( new String[] { "src", "href", "lang", "class", "id", "style", "width", "height", "name", "value", "type", "cellpadding", "cellspacing", "border" } ) );
+ public class Macro {
+ public String name;
+ public java.util.SortedMap<String,String> params = new java.util.TreeMap<String,String>();
- public class Macro {
- public String name;
- public java.util.SortedMap<String,String> params = new java.util.TreeMap<String,String>();
+ public Macro(String name) {
+ this.name = name;
+ }
+ }
- public Macro(String name) {
- this.name = name;
- }
- }
+ /**
+ * Sanitization of user input, used to clean links and plain HTML.
+ */
+ public interface Sanitizer {
- private Macro currentMacro;
-
+ /**
+ * Called by the SeamTextParser when a link tag is parsed, i.e. [=>some URI].
+ *
+ * @param uri the user-entered link text
+ * @throws SemanticException thrown if the URI is not syntactically or semantically valid
+ */
+ public void validateLinkTagURI(String uri) throws SemanticException;
+
+ /**
+ * Called by the SeamTextParser when a plain HTML element is parsed.
+ *
+ * @param element the token of the parse tree, call <tt>getText()</tt> to access the HTML tag name
+ * @throws SemanticException thrown when the HTML tag is not valid
+ */
+ public void validateHtmlElement(Token element) throws SemanticException;
+
+ /**
+ * Called by the SeamTextParser when a plain HTML attribute is parsed.
+ *
+ * @param element the token of the parse tree that represents the HTML tag
+ * @param attribute the token of the parse tree that represents the HTML attribute
+ * @throws SemanticException thrown if the attribute is not valid for the given HTML tag
+ */
+ public void validateHtmlAttribute(Token element, Token attribute) throws SemanticException;
+
+ /**
+ * Called by the SeamTextParser when a plain HTML attribute value is parsed.
+ *
+ * @param element the token of the parse tree that represents the HTML tag
+ * @param attribute the token of the parse tree that represents the HTML attribute
+ * @param attributeValue the plain string value of the HTML attribute
+ * @throws SemanticException thrown if the attribute value is not valid for the given HTML attribute and element
+ */
+ public void validateHtmlAttributeValue(Token element, Token attribute, String attributeValue) throws SemanticException;
+
+ public String getInvalidURIMessage(String uri);
+ public String getInvalidElementMessage(String elementName);
+ public String getInvalidAttributeMessage(String elementName, String attributeName);
+ public String getInvalidAttributeValueMessage(String elementName, String attributeName, String value);
+ }
+
+ /**
+ * Implementation of the rules in http://wiki.whatwg.org/wiki/Sanitization_rules
+ *
+ * Changes and additions:
+ *
+ * 1. Expanded all -* wildcard values to their full CSS property name (e.g. border-*).
+ *
+ * 2. Added dash as allowed characater to REGEX_VALID_CSS_STRING1.
+ *
+ * 3. Improved REGEX_VALID_CSS_VALUE with range {n,m} checks for color values and negative units.
+ *
+ * 4. Added more options (mostly of vertical-align property, e.g. "middle", "text-top") as allowed CSS values.
+ *
+ * 5. Added "max-height", "max-width", "min-height", "min-width" to CSS properties.
+ *
+ * 6. Removed 'data' URI scheme.
+ *
+ * 7. Not implemented filtering of CSS url() - it's an invalid value always.
+ *
+ */
+ public static class DefaultSanitizer implements SeamTextParser.Sanitizer {
+
+ public final java.util.regex.Pattern REGEX_VALID_CSS_STRING1 = java.util.regex.Pattern.compile(
+ "^([-:,;#%.\\sa-zA-Z0-9!]|\\w-\\w|'[\\s\\w]+'|\"[\\s\\w]+\"|\\([\\d,\\s]+\\))*$"
+ );
+
+ public final java.util.regex.Pattern REGEX_VALID_CSS_STRING2 = java.util.regex.Pattern.compile(
+ "^(\\s*[-\\w]+\\s*:\\s*[^:;]*(;|$))*$"
+ );
+
+ public final java.util.regex.Pattern REGEX_VALID_CSS_VALUE = java.util.regex.Pattern.compile(
+ "^(#[0-9a-f]{3,6}|rgb\\(\\d{1,3}%?,\\d{1,3}%?,?\\d{1,3}%?\\)?|-?\\d{0,2}\\.?\\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\\))?)$"
+ );
+
+ public final java.util.regex.Pattern REGEX_INVALID_CSS_URL = java.util.regex.Pattern.compile(
+ "url\\s*\\(\\s*[^\\s)]+?\\s*\\)\\s*"
+ );
+
+ protected java.util.Set<String> acceptableElements = new java.util.HashSet(java.util.Arrays.asList(
+ "a", "abbr", "acronym", "address", "area", "b", "bdo", "big", "blockquote",
+ "br", "button", "caption", "center", "cite", "code", "col", "colgroup", "dd",
+ "del", "dfn", "dir", "div", "dl", "dt", "em", "fieldset", "font", "form",
+ "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "img", "input", "ins", "kbd",
+ "label", "legend", "li", "map", "menu", "ol", "optgroup", "option", "p",
+ "pre", "q", "s", "samp", "select", "small", "span", "strike", "strong",
+ "sub", "sup", "table", "tbody", "td", "textarea", "tfoot", "th", "thead",
+ "tr", "tt", "u", "ul", "var", "wbr"
+ ));
+
+ protected java.util.Set<String> mathmlElements = new java.util.HashSet(java.util.Arrays.asList(
+ "maction", "math", "merror", "mfrac", "mi", "mmultiscripts", "mn", "mo",
+ "mover", "mpadded", "mphantom", "mprescripts", "mroot", "mrow", "mspace",
+ "msqrt", "mstyle", "msub", "msubsup", "msup", "mtable", "mtd", "mtext",
+ "mtr", "munder", "munderover", "none"
+ ));
+
+ protected java.util.Set<String> svgElements = new java.util.HashSet(java.util.Arrays.asList(
+ "a", "animate", "animateColor", "animateMotion", "animateTransform",
+ "circle", "defs", "desc", "ellipse", "font-face", "font-face-name",
+ "font-face-src", "g", "glyph", "hkern", "image", "line", "linearGradient",
+ "marker", "metadata", "missing-glyph", "mpath", "path", "polygon",
+ "polyline", "radialGradient", "rect", "set", "stop", "svg", "switch", "text",
+ "title", "tspan", "use"
+ ));
+
+ protected java.util.Set<String> acceptableAttributes = new java.util.HashSet(java.util.Arrays.asList(
+ "abbr", "accept", "accept-charset", "accesskey", "action", "align", "alt",
+ "axis", "border", "cellpadding", "cellspacing", "char", "charoff", "charset",
+ "checked", "cite", "class", "clear", "color", "cols", "colspan", "compact",
+ "coords", "datetime", "dir", "disabled", "enctype", "for", "frame",
+ "headers", "height", "href", "hreflang", "hspace", "id", "ismap", "label",
+ "lang", "longdesc", "maxlength", "media", "method", "multiple", "name",
+ "nohref", "noshade", "nowrap", "prompt", "readonly", "rel", "rev", "rows",
+ "rowspan", "rules", "scope", "selected", "shape", "size", "span", "src",
+ "start", "style", "summary", "tabindex", "target", "title", "type", "usemap",
+ "valign", "value", "vspace", "width", "xml:lang"
+ ));
+
+ protected java.util.Set<String> mathmlAttributes = new java.util.HashSet(java.util.Arrays.asList(
+ "actiontype", "align", "columnalign", "columnalign", "columnalign",
+ "columnlines", "columnspacing", "columnspan", "depth", "display",
+ "displaystyle", "equalcolumns", "equalrows", "fence", "fontstyle",
+ "fontweight", "frame", "height", "linethickness", "lspace", "mathbackground",
+ "mathcolor", "mathvariant", "mathvariant", "maxsize", "minsize", "other",
+ "rowalign", "rowalign", "rowalign", "rowlines", "rowspacing", "rowspan",
+ "rspace", "scriptlevel", "selection", "separator", "stretchy", "width",
+ "width", "xlink:href", "xlink:show", "xlink:type", "xmlns", "xmlns:xlink"
+ ));
+
+ protected java.util.Set<String> svgAttributes = new java.util.HashSet(java.util.Arrays.asList(
+ "accent-height", "accumulate", "additive", "alphabetic", "arabic-form",
+ "ascent", "attributeName", "attributeType", "baseProfile", "bbox", "begin",
+ "by", "calcMode", "cap-height", "class", "color", "color-rendering",
+ "content", "cx", "cy", "d", "descent", "display", "dur", "dx", "dy", "end",
+ "fill", "fill-rule", "font-family", "font-size", "font-stretch",
+ "font-style", "font-variant", "font-weight", "from", "fx", "fy", "g1", "g2",
+ "glyph-name", "gradientUnits", "hanging", "height", "horiz-adv-x",
+ "horiz-origin-x", "id", "ideographic", "k", "keyPoints", "keySplines",
+ "keyTimes", "lang", "marker-end", "marker-mid", "marker-start",
+ "markerHeight", "markerUnits", "markerWidth", "mathematical", "max", "min",
+ "name", "offset", "opacity", "orient", "origin", "overline-position",
+ "overline-thickness", "panose-1", "path", "pathLength", "points",
+ "preserveAspectRatio", "r", "refX", "refY", "repeatCount", "repeatDur",
+ "requiredExtensions", "requiredFeatures", "restart", "rotate", "rx", "ry",
+ "slope", "stemh", "stemv", "stop-color", "stop-opacity",
+ "strikethrough-position", "strikethrough-thickness", "stroke",
+ "stroke-dasharray", "stroke-dashoffset", "stroke-linecap", "stroke-linejoin",
+ "stroke-miterlimit", "stroke-opacity", "stroke-width", "systemLanguage",
+ "target", "text-anchor", "to", "transform", "type", "u1", "u2",
+ "underline-position", "underline-thickness", "unicode", "unicode-range",
+ "units-per-em", "values", "version", "viewBox", "visibility", "width",
+ "widths", "x", "x-height", "x1", "x2", "xlink:actuate", "xlink:arcrole",
+ "xlink:href", "xlink:role", "xlink:show", "xlink:title", "xlink:type",
+ "xml:base", "xml:lang", "xml:space", "xmlns", "xmlns:xlink", "y", "y1", "y2",
+ "zoomAndPan"
+ ));
+
+ protected java.util.Set<String> styleProperties = new java.util.HashSet(java.util.Arrays.asList(
+ "azimuth",
+ "background", "background-attachment", "background-color", "background-image",
+ "background-position", "background-repeat",
+ "border", "border-bottom", "border-bottom-color", "border-bottom-style",
+ "border-bottom-width", "border-collapse", "border-color", "border-left",
+ "border-left-color", "border-left-style", "border-left-width", "border-right",
+ "border-right-color", "border-right-style", "border-right-width", "border-spacing",
+ "border-style", "border-top", "border-top-color", "border-top-style",
+ "border-top-width", "border-width",
+ "clear", "color",
+ "cursor", "direction", "display", "elevation", "float", "font",
+ "font-family", "font-size", "font-style", "font-variant", "font-weight",
+ "height", "letter-spacing", "line-height",
+ "margin", "margin-bottom", "margin-left", "margin-right", "margin-top",
+ "max-height", "max-width", "min-height", "min-width",
+ "overflow",
+ "padding", "padding-bottom", "padding-left", "padding-right", "padding-top",
+ "pause", "pause-after", "pause-before", "pitch",
+ "pitch-range", "richness", "speak", "speak-header", "speak-numeral",
+ "speak-punctuation", "speech-rate", "stress", "text-align",
+ "text-decoration", "text-indent", "unicode-bidi", "vertical-align",
+ "voice-family", "volume", "white-space", "width"
+ ));
+
+ protected java.util.Set<String> stylePropertiesValues = new java.util.HashSet(java.util.Arrays.asList(
+ "aqua", "auto", "baseline", "black", "block", "blue", "bold", "both", "bottom", "brown",
+ "center", "collapse", "dashed", "dotted", "fuchsia", "gray", "green",
+ "inherit", "italic", "left", "length", "lime", "maroon", "medium", "middle", "navy", "none", "normal",
+ "nowrap", "olive", "percentage", "pointer", "purple", "red", "right", "silver", "solid", "sub", "super",
+ "teal", "text-bottom", "text-top", "top", "transparent", "underline", "white", "yellow"
+ ));
+
+ protected java.util.Set<String> svgStyleProperties = new java.util.HashSet(java.util.Arrays.asList(
+ "fill", "fill-opacity", "fill-rule", "stroke", "stroke-linecap",
+ "stroke-linejoin", "stroke-opacity", "stroke-width"
+ ));
+
+ protected java.util.Set<String> attributesWhoseValueIsAURI = new java.util.HashSet(java.util.Arrays.asList(
+ "action", "cite", "href", "longdesc", "src", "xlink:href", "xml:base"
+ ));
+
+ protected java.util.Set<String> uriSchemes = new java.util.HashSet(java.util.Arrays.asList(
+ "afs", "aim", "callto", "ed2k", "feed", "ftp", "gopher", "http", "https",
+ "irc", "mailto", "news", "nntp", "rsync", "rtsp", "sftp", "ssh", "tag",
+ "tel", "telnet", "urn", "webcal", "wtai", "xmpp"
+ ));
+
+ public void validateLinkTagURI(String uri) throws SemanticException {
+ if (!validateURI(uri)) {
+ throw new SemanticException("Invalid URI");
+ }
+ }
+
+ public void validateHtmlElement(Token element) throws SemanticException {
+ String elementName = element.getText().toLowerCase();
+
+ if (!acceptableElements.contains(elementName) &&
+ !svgElements.contains(elementName) &&
+ !mathmlElements.contains(elementName)) {
+ throw new SemanticException(getInvalidElementMessage(elementName));
+ }
+ }
+
+ public void validateHtmlAttribute(Token element, Token attribute) throws SemanticException {
+ String elementName = element.getText().toLowerCase();
+ String attributeName = attribute.getText().toLowerCase();
+ if (!acceptableAttributes.contains(attributeName) &&
+ !svgAttributes.contains(attributeName) &&
+ !mathmlAttributes.contains(attributeName)) {
+ throw new SemanticException(getInvalidAttributeMessage(elementName, attributeName));
+ }
+ }
+
+ public void validateHtmlAttributeValue(Token element,
+ Token attribute,
+ String attributeValue) throws SemanticException {
+
+ String elementName = element.getText().toLowerCase();
+ String attributeName = attribute.getText().toLowerCase();
+
+ // Check element with attribute that has URI value (href, src, etc.)
+ if (attributesWhoseValueIsAURI.contains(attributeName) && !validateURI(attributeValue)) {
+ throw new SemanticException(getInvalidURIMessage(attributeValue));
+ }
+
+ // Check attribute value of style (CSS filtering)
+ if (attributeName.equals("style")) {
+ if (!REGEX_VALID_CSS_STRING1.matcher(attributeValue).matches() ||
+ !REGEX_VALID_CSS_STRING2.matcher(attributeValue).matches()) {
+ throw new SemanticException(getInvalidAttributeValueMessage(elementName, attributeName, attributeValue));
+ }
+
+ String[] cssProperties = attributeValue.split(";");
+ for (String cssProperty : cssProperties) {
+ if (!cssProperty.contains(":")) {
+ throw new SemanticException(getInvalidAttributeValueMessage(elementName, attributeName, attributeValue));
+ }
+ String[] property = cssProperty.split(":");
+ String propertyName = property[0].trim();
+ String propertyValue = property.length == 2 ? property[1].trim() : null;
+
+ // CSS property name
+ if (!styleProperties.contains(propertyName) &&
+ !svgStyleProperties.contains(propertyName)) {
+ throw new SemanticException(getInvalidAttributeValueMessage(elementName, attributeName, attributeValue));
+ }
+
+ // CSS property value
+ if (!stylePropertiesValues.contains(propertyValue)) {
+ // Not in list, now check the regex
+ if (!REGEX_VALID_CSS_VALUE.matcher(propertyValue).matches()) {
+ throw new SemanticException(getInvalidAttributeValueMessage(elementName, attributeName, attributeValue));
+ }
+ }
+ }
+ }
+
+ // TODO: Implement SVG style checking?! Who cares...
+ }
+
+ /**
+ * Validate a URI string.
+ * <p>
+ * The default implementation accepts any URI string that starts with a slash,
+ * this is considered a relative URL. Any absolute URI is parsed by the JDK with
+ * the <tt>java.net.URI</tt> constructor. Finally, the scheme of the parsed
+ * absolute URI is checked with a list of valid schemes.
+ * </p>
+ *
+ * @param uri the URI string
+ * @return return true if the String represents a safe and valid URI
+ */
+ protected boolean validateURI(String uri) {
+
+ // Relative URI starts with a slash
+ if (uri.startsWith("/")) return true;
+
+ java.net.URI parsedURI;
+ try {
+ parsedURI = new java.net.URI(uri);
+ } catch (java.net.URISyntaxException ex) {
+ return false;
+ }
+
+ if (!uriSchemes.contains(parsedURI.getScheme())) {
+ return false;
+ }
+ return true;
+ }
+
+ public String getInvalidURIMessage(String uri) {
+ return "invalid URI";
+ }
+
+ public String getInvalidElementMessage(String elementName) {
+ return "invalid element '" + elementName + "'";
+ }
+
+ public String getInvalidAttributeMessage(String elementName, String attributeName) {
+ return "invalid attribute '" + attributeName + "' for element '" + elementName + "'";
+ }
+
+ public String getInvalidAttributeValueMessage(String elementName, String attributeName, String value) {
+ return "invalid value of attribute '" + attributeName + "' for element '" + elementName + "'";
+ };
+
+ }
+
+ private Sanitizer sanitizer = new DefaultSanitizer();
+ public void setSanitizer(Sanitizer sanitizer) {
+ this.sanitizer = sanitizer;
+ }
+
+ private Macro currentMacro;
+ private java.util.Stack<Token> htmlElementStack = new java.util.Stack<Token>();
+
private StringBuilder mainBuilder = new StringBuilder();
private StringBuilder builder = mainBuilder;
@@ -38,19 +373,7 @@
private static boolean hasMultiple(String string, char c) {
return string.indexOf(c)!=string.lastIndexOf(c);
}
-
- private void validateElement(Token t) throws NoViableAltException {
- if ( !htmlElements.contains( t.getText().toLowerCase() ) ) {
- throw new NoViableAltException(t, null);
- }
- }
- private void validateAttribute(Token t) throws NoViableAltException {
- if ( !htmlAttributes.contains( t.getText().toLowerCase() ) ) {
- throw new NoViableAltException(t, null);
- }
- }
-
private void beginCapture() {
builder = new StringBuilder();
}
@@ -200,7 +523,11 @@
EQ GT
{ beginCapture(); }
attributeValue
- { String link = endCapture(); append(linkTag(text, link)); }
+ {
+ String link = endCapture();
+ sanitizer.validateLinkTagURI(link);
+ append(linkTag(text, link));
+ }
CLOSE
;
@@ -342,22 +669,54 @@
body: (plain|formatted|preformatted|quoted|html|list|newline)*
;
-openTag: LT name:ALPHANUMERICWORD { validateElement(name); append("<"); append(name.getText()); }
+openTag:
+ LT name:ALPHANUMERICWORD
+ {
+ htmlElementStack.push(name);
+ sanitizer.validateHtmlElement(name);
+ append("<");
+ append(name.getText());
+ }
;
-
+
beforeBody: GT { append(">"); }
;
-closeTagWithBody: LT SLASH name:ALPHANUMERICWORD GT { append("</"); append(name.getText()); append(">"); }
+closeTagWithBody:
+ LT SLASH name:ALPHANUMERICWORD GT
+ {
+ append("</");
+ append(name.getText());
+ append(">");
+ htmlElementStack.pop();
+ }
;
-closeTagWithNoBody: SLASH GT { append("/>"); }
+closeTagWithNoBody:
+ SLASH GT
+ {
+ append("/>");
+ htmlElementStack.pop();
+ }
;
attribute: att:ALPHANUMERICWORD (space)* EQ (space)*
- DOUBLEQUOTE { validateAttribute(att); append(att.getText()); append("=\""); }
- attributeValue
- DOUBLEQUOTE { append("\""); }
+ DOUBLEQUOTE
+ {
+ sanitizer.validateHtmlAttribute(htmlElementStack.peek(), att);
+ append(att.getText());
+ append("=\"");
+ }
+ {
+ beginCapture();
+ }
+ attributeValue
+ {
+ String attValue = endCapture();
+ sanitizer.validateHtmlAttributeValue(htmlElementStack.peek(), att, attValue);
+ append(attValue);
+ }
+ DOUBLEQUOTE { append("\""); }
;
attributeValue: ( AMPERSAND { append("&"); } |
More information about the seam-commits
mailing list