[jboss-cvs] jboss-portal/cms/src/main/org/jboss/portal/test/cms ...
Roy Russo
russo at jboss.com
Tue Aug 22 16:08:23 EDT 2006
User: russo
Date: 06/08/22 16:08:23
Added: cms/src/main/org/jboss/portal/test/cms TestRegEx.java
Log:
JBPORTAL-937 - done.
Revision Changes Path
1.1 date: 2006/08/22 20:08:23; author: russo; state: Exp;jboss-portal/cms/src/main/org/jboss/portal/test/cms/TestRegEx.java
Index: TestRegEx.java
===================================================================
/*
* JBoss, Home of Professional Open Source
* Copyright 2005, JBoss Inc., and individual contributors as indicated
* by the @authors tag. See the copyright.txt in the distribution for a
* full listing of individual contributors.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
package org.jboss.portal.test.cms;
import junit.framework.TestCase;
import org.jboss.portal.cms.util.FileUtil;
import java.io.BufferedReader;
import java.io.FileReader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Tests for the CMS Regex when retrieving stored content.
*
* @author <a href="mailto:roy at jboss.org">Roy Russo</a>
*/
public class TestRegEx extends TestCase
{
String HTMLHeaderFile = "resources/test/jcr/headerpage.html";
String HTMLHeaderFile_Good = "resources/test/jcr/headerpage_good.html";
private static final String URI_schemeRegex = "[a-z][-+.0-9a-z]*:";
/**
* For our purposes, ignore URIs that start with a scheme idicator,
* a slash (indicating an absolute path), or a hash sign (# = ASCII hex 23).
*/
private static final String URI_ignoreRegex = "" + URI_schemeRegex + "|/|\\x23";
private static final String
regex = "((?:href|src)\\s*=\\s*) # Capture preliminaries in $1. \n"
+ "(?: # First look for URL in quotes. \n"
+ " ([\"\']) # Capture open quote in $2. \n"
+ " (?!" + URI_ignoreRegex + ") # If it isn't absolute... \n"
+ " /?(.+?) # ...capture URL in $3 \n"
+ " \\2 # Match the closing quote \n"
+ " | # Look for non-quoted URL. \n"
+ " (?![\"\']|" + URI_ignoreRegex + ") # If it isn't absolute... \n"
+ " /?([^\\s>]+) # ...capture URL in $4 \n"
+ ")";
/**
* Removes header content, and leaves content between body tags
*/
private static final String HTMLStripperRegex = "(.*<body[^>]*>(.+)</body>.*)";
//private static final String HTMLStripperRegex = "(.*<body[^>]*>(.+)</body>.*')";
private static final Pattern RELATIVE_URI_PATTERN = Pattern.compile(regex, Pattern.MULTILINE | Pattern.CASE_INSENSITIVE | Pattern.COMMENTS);
private static final Pattern STRIP_TAGS_PATTERN = Pattern.compile(HTMLStripperRegex, Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
public void setUp() throws Exception
{
super.setUp();
}
/**
* Tests HTML rewriting of header content and links in CMSPortlet.
*
* @throws Exception
*/
public void testHTMLPageHeaderRewrite() throws Exception
{
String fileHTML = "";
StringBuffer fileData = new StringBuffer(1000);
BufferedReader reader = new BufferedReader(new FileReader(HTMLHeaderFile));
char[] buffer = new char[1024];
int count = 0;
while((count = reader.read(buffer)) != -1)
{
String read = String.valueOf(buffer, 0, count);
fileData.append(read);
buffer = new char[1024];
}
reader.close();
fileHTML = fileData.toString();
// begin cleaner
String CleanHTML = "";
Matcher h = STRIP_TAGS_PATTERN.matcher(fileHTML);
while(h.find())
{
CleanHTML = h.group(2);
}
// begin modifying links
StringBuffer stringbuffer = new StringBuffer();
Matcher m = RELATIVE_URI_PATTERN.matcher(CleanHTML);
while(m.find())
{
String relURI = m.group(3) != null ? m.group(3) : m.group(4);
String absoluteURI = this.buildURL("/" + relURI);
m.appendReplacement(stringbuffer, "$1$2" + FileUtil.cleanDoubleSlashes(absoluteURI) + "$2");
}
m.appendTail(stringbuffer);
CleanHTML = stringbuffer.toString();
/* Assert */
String goodHTML = "";
fileData = new StringBuffer(1000);
reader = new BufferedReader(new FileReader(HTMLHeaderFile_Good));
buffer = new char[1024];
count = 0;
while((count = reader.read(buffer)) != -1)
{
String read = String.valueOf(buffer, 0, count);
fileData.append(read);
buffer = new char[1024];
}
reader.close();
goodHTML = fileData.toString();
assertEquals("RegEx failed to match!", CleanHTML, goodHTML);
}
protected void tearDown() throws Exception
{
super.tearDown();
}
/**
* Faking it.
*/
private String buildURL(String path)
{
return "http://localhost:8080/portal/content" + path;
}
}
More information about the jboss-cvs-commits
mailing list