Author: remy.maucherat(a)jboss.com
Date: 2008-03-27 10:22:16 -0400 (Thu, 27 Mar 2008)
New Revision: 558
Modified:
trunk/java/org/apache/catalina/connector/OutputBuffer.java
trunk/java/org/apache/catalina/connector/Response.java
trunk/java/org/apache/tomcat/util/buf/C2BConverter.java
trunk/java/org/apache/tomcat/util/buf/UEncoder.java
trunk/webapps/docs/changelog.xml
Log:
- For consistency, refactor output using NIO. This makes input and output very similar,
but the performance
improvements and memory savings should be minimal since output was already relatively
sane, unlike input
(except sendRedirects should be much faster; woohoo, party).
Modified: trunk/java/org/apache/catalina/connector/OutputBuffer.java
===================================================================
--- trunk/java/org/apache/catalina/connector/OutputBuffer.java 2008-03-27 03:04:12 UTC
(rev 557)
+++ trunk/java/org/apache/catalina/connector/OutputBuffer.java 2008-03-27 14:22:16 UTC
(rev 558)
@@ -30,6 +30,7 @@
import org.apache.catalina.Globals;
import org.apache.tomcat.util.buf.ByteChunk;
import org.apache.tomcat.util.buf.C2BConverter;
+import org.apache.tomcat.util.buf.CharChunk;
/**
@@ -41,7 +42,7 @@
* @author Remy Maucherat
*/
public class OutputBuffer extends Writer
- implements ByteChunk.ByteOutputChannel {
+ implements ByteChunk.ByteOutputChannel, CharChunk.CharOutputChannel {
// -------------------------------------------------------------- Constants
@@ -62,6 +63,12 @@
/**
+ * The chunk buffer.
+ */
+ private CharChunk cb;
+
+
+ /**
* State of the output buffer.
*/
private boolean initial = true;
@@ -98,6 +105,12 @@
/**
+ * Char chunk used to output chars.
+ */
+ private CharChunk outputCharChunk = new CharChunk();
+
+
+ /**
* Encoding to use.
*/
private String enc;
@@ -112,7 +125,8 @@
/**
* List of encoders.
*/
- protected HashMap encoders = new HashMap();
+ protected HashMap<String, C2BConverter> encoders =
+ new HashMap<String, C2BConverter>();
/**
@@ -156,6 +170,10 @@
bb = new ByteChunk(size);
bb.setLimit(size);
bb.setByteOutputChannel(this);
+ cb = new CharChunk(size);
+ cb.setLimit(size);
+ cb.setOptimizedWrite(false);
+ cb.setCharOutputChannel(this);
}
@@ -169,7 +187,7 @@
* @param coyoteResponse Associated Coyote response
*/
public void setResponse(Response coyoteResponse) {
- this.coyoteResponse = coyoteResponse;
+ this.coyoteResponse = coyoteResponse;
}
@@ -225,7 +243,9 @@
bytesWritten = 0;
charsWritten = 0;
- bb.recycle();
+ bb.recycle();
+ cb.recycle();
+ outputCharChunk.setChars(null, 0, 0);
closed = false;
suspended = false;
@@ -261,6 +281,12 @@
if (suspended)
return;
+ // If there are chars, flush all of them to the byte buffer now as bytes are used
to
+ // calculate the content-length (if everything fits into the byte buffer, of
course).
+ if (cb.getLength() > 0) {
+ cb.flushBuffer();
+ }
+
if ((!coyoteResponse.isCommitted())
&& (coyoteResponse.getContentLengthLong() == -1)) {
// If this didn't cause a commit of the response, the final content
@@ -305,6 +331,9 @@
coyoteResponse.sendHeaders();
initial = false;
}
+ if (cb.getLength() > 0) {
+ cb.flushBuffer();
+ }
if (bb.getLength() > 0) {
bb.flushBuffer();
}
@@ -350,7 +379,7 @@
* @throws IOException An underlying IOException occurred
*/
public void realWriteBytes(byte buf[], int off, int cnt)
- throws IOException {
+ throws IOException {
if (closed)
return;
@@ -417,25 +446,43 @@
// ------------------------------------------------- Chars Handling Methods
+ /**
+ * Convert the chars to bytes, then send the data to the client.
+ *
+ * @param buf Char buffer to be written to the response
+ * @param off Offset
+ * @param len Length
+ *
+ * @throws IOException An underlying IOException occurred
+ */
+ public void realWriteChars(char buf[], int off, int len)
+ throws IOException {
+
+ charsWritten += len;
+ outputCharChunk.setChars(buf, off, len);
+ while (outputCharChunk.getLength() > 0) {
+ conv.convert(outputCharChunk, bb);
+ if (outputCharChunk.getLength() > 0) {
+ bb.flushBuffer();
+ }
+ }
+
+ }
+
public void write(int c)
throws IOException {
if (suspended)
return;
- conv.convert((char) c);
- conv.flushBuffer();
- charsWritten++;
-
+ cb.append((char) c);
+
}
public void write(char c[])
throws IOException {
- if (suspended)
- return;
-
write(c, 0, c.length);
}
@@ -447,9 +494,7 @@
if (suspended)
return;
- conv.convert(c, off, len);
- conv.flushBuffer();
- charsWritten += len;
+ cb.append(c, off, len);
}
@@ -463,11 +508,9 @@
if (suspended)
return;
- charsWritten += len;
if (s == null)
s = "null";
- conv.convert(s, off, len);
- conv.flushBuffer();
+ cb.append(s, off, len);
}
@@ -480,8 +523,7 @@
if (s == null)
s = "null";
- conv.convert(s);
- conv.flushBuffer();
+ cb.append(s);
}
@@ -518,7 +560,7 @@
new PrivilegedExceptionAction(){
public Object run() throws IOException{
- return new C2BConverter(bb, enc);
+ return new C2BConverter(enc);
}
}
@@ -529,7 +571,7 @@
throw (IOException)e;
}
} else {
- conv = new C2BConverter(bb, enc);
+ conv = new C2BConverter(enc);
}
encoders.put(enc, conv);
@@ -596,6 +638,7 @@
public void reset() {
bb.recycle();
+ cb.recycle();
bytesWritten = 0;
charsWritten = 0;
gotEnc = false;
Modified: trunk/java/org/apache/catalina/connector/Response.java
===================================================================
--- trunk/java/org/apache/catalina/connector/Response.java 2008-03-27 03:04:12 UTC (rev
557)
+++ trunk/java/org/apache/catalina/connector/Response.java 2008-03-27 14:22:16 UTC (rev
558)
@@ -1502,16 +1502,16 @@
if (!leadingSlash) {
String relativePath = request.getDecodedRequestURI();
int pos = relativePath.lastIndexOf('/');
- relativePath = relativePath.substring(0, pos);
- String encodedURI = null;
- final String frelativePath = relativePath;
+ CharChunk encodedURI = null;
if (SecurityUtil.isPackageProtectionEnabled() ){
+ final String frelativePath = relativePath;
+ final int fend = pos;
try{
- encodedURI = (String)AccessController.doPrivileged(
+ encodedURI = (CharChunk)AccessController.doPrivileged(
new PrivilegedExceptionAction(){
public Object run() throws IOException{
- return urlEncoder.encodeURL(frelativePath);
+ return urlEncoder.encodeURL(frelativePath, 0,
fend);
}
});
} catch (PrivilegedActionException pae){
@@ -1521,9 +1521,10 @@
throw iae;
}
} else {
- encodedURI = urlEncoder.encodeURL(relativePath);
+ encodedURI = urlEncoder.encodeURL(relativePath, 0, pos);
}
- redirectURLCC.append(encodedURI, 0, encodedURI.length());
+ redirectURLCC.append(encodedURI);
+ encodedURI.recycle();
redirectURLCC.append('/');
}
redirectURLCC.append(location, 0, location.length());
Modified: trunk/java/org/apache/tomcat/util/buf/C2BConverter.java
===================================================================
--- trunk/java/org/apache/tomcat/util/buf/C2BConverter.java 2008-03-27 03:04:12 UTC (rev
557)
+++ trunk/java/org/apache/tomcat/util/buf/C2BConverter.java 2008-03-27 14:22:16 UTC (rev
558)
@@ -18,251 +18,76 @@
package org.apache.tomcat.util.buf;
import java.io.IOException;
-import java.io.OutputStream;
-import java.io.OutputStreamWriter;
-import java.io.UnsupportedEncodingException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
-/** Efficient conversion of character to bytes.
- *
- * This uses the standard JDK mechansim - a writer - but provides mechanisms
- * to recycle all the objects that are used. It is compatible with JDK1.1 and up,
- * ( nio is better, but it's not available even in 1.2 or 1.3 )
- *
+/**
+ * NIO based character encoder.
*/
public final class C2BConverter {
- private static org.jboss.logging.Logger log=
- org.jboss.logging.Logger.getLogger(C2BConverter.class );
-
- private IntermediateOutputStream ios;
- private WriteConvertor conv;
- private ByteChunk bb;
- private String enc;
-
- /** Create a converter, with bytes going to a byte buffer
- */
- public C2BConverter(ByteChunk output, String encoding) throws IOException {
- this.bb=output;
- ios=new IntermediateOutputStream( output );
- conv=new WriteConvertor( ios, encoding );
- this.enc=encoding;
- }
+ protected static org.jboss.logging.Logger log =
+ org.jboss.logging.Logger.getLogger(C2BConverter.class);
- /** Create a converter
- */
- public C2BConverter(String encoding) throws IOException {
- this( new ByteChunk(1024), encoding );
- }
+ protected CharsetEncoder encoder = null;
+ protected ByteBuffer bb = null;
+ protected CharBuffer cb = null;
- public ByteChunk getByteChunk() {
- return bb;
- }
-
- public String getEncoding() {
- return enc;
- }
-
- public void setByteChunk(ByteChunk bb) {
- this.bb=bb;
- ios.setByteChunk( bb );
- }
-
- /** Reset the internal state, empty the buffers.
- * The encoding remain in effect, the internal buffers remain allocated.
+ /**
+ * Create an encoder for the specified charset.
*/
- public final void recycle() {
- conv.recycle();
- bb.recycle();
+ public C2BConverter(String charset) {
+ encoder = Charset.forName(charset).newEncoder();
}
- /** Generate the bytes using the specified encoding
+ /**
+ * The encoding remain in effect, the encoder remains allocated.
*/
- public final void convert(char c[], int off, int len ) throws IOException {
- conv.write( c, off, len );
+ public void recycle() {
+ encoder.reset();
}
- /** Generate the bytes using the specified encoding
+ /**
+ * Convert the given charaters to bytes.
*/
- public final void convert(String s, int off, int len ) throws IOException {
- conv.write( s, off, len );
- }
-
- /** Generate the bytes using the specified encoding
- */
- public final void convert(String s ) throws IOException {
- conv.write( s );
- }
-
- /** Generate the bytes using the specified encoding
- */
- public final void convert(char c ) throws IOException {
- conv.write( c );
- }
-
- /** Convert a message bytes chars to bytes
- */
- public final void convert(MessageBytes mb ) throws IOException {
- int type=mb.getType();
- if( type==MessageBytes.T_BYTES )
- return;
- ByteChunk orig=bb;
- setByteChunk( mb.getByteChunk());
- bb.recycle();
- bb.allocate( 32, -1 );
-
- if( type==MessageBytes.T_STR ) {
- convert( mb.getString() );
- // System.out.println("XXX Converting " + mb.getString() );
- } else if( type==MessageBytes.T_CHARS ) {
- CharChunk charC=mb.getCharChunk();
- convert( charC.getBuffer(),
- charC.getOffset(), charC.getLength());
- //System.out.println("XXX Converting " + mb.getCharChunk() );
+ public void convert(CharChunk cc, ByteChunk bc)
+ throws IOException {
+ if ((bb == null) || (bb.array() != bc.getBuffer())) {
+ // Create a new byte buffer if anything changed
+ bb = ByteBuffer.wrap(bc.getBuffer(), bc.getEnd(),
+ bc.getBuffer().length - bc.getEnd());
} else {
- if (log.isDebugEnabled())
- log.debug("XXX unknowon type " + type );
+ // Initialize the byte buffer
+ bb.position(bc.getEnd());
+ bb.limit(bc.getBuffer().length);
}
- flushBuffer();
- //System.out.println("C2B: XXX " + bb.getBuffer() + bb.getLength());
- setByteChunk(orig);
+ if ((cb == null) || (cb.array() != cc.getBuffer())) {
+ // Create a new char buffer if anything changed
+ cb = CharBuffer.wrap(cc.getBuffer(), cc.getStart(),
+ cc.getLength());
+ } else {
+ // Initialize the char buffer
+ cb.position(cc.getStart());
+ cb.limit(cc.getEnd());
+ }
+ // Parse leftover if any are present
+ CoderResult result = null;
+ // Do the decoding and get the results into the byte chunk and the char chunk
+ result = encoder.encode(cb, bb, false);
+ if (result.isError() || result.isMalformed()) {
+ result.throwException();
+ } else if (result.isOverflow()) {
+ // Propagate current positions to the byte chunk and char chunk
+ bc.setEnd(bb.position());
+ cc.setOffset(cb.position());
+ } else if (result.isUnderflow()) {
+ // Propagate current positions to the byte chunk and char chunk
+ bc.setEnd(bb.position());
+ cc.setOffset(cb.position());
+ }
}
-
- /** Flush any internal buffers into the ByteOutput or the internal
- * byte[]
- */
- public final void flushBuffer() throws IOException {
- conv.flush();
- }
-
-}
-
-// -------------------- Private implementation --------------------
-
-
-
-/**
- * Special writer class, where close() is overritten. The default implementation
- * would set byteOutputter to null, and the writter can't be recycled.
- *
- * Note that the flush method will empty the internal buffers _and_ call
- * flush on the output stream - that's why we use an intermediary output stream
- * that overrides flush(). The idea is to have full control: flushing the
- * char->byte converter should be independent of flushing the OutputStream.
- *
- * When a WriteConverter is created, it'll allocate one or 2 byte buffers,
- * with a 8k size that can't be changed ( at least in JDK1.1 -> 1.4 ). It would
- * also allocate a ByteOutputter or equivalent - again some internal buffers.
- *
- * It is essential to keep this object around and reuse it. You can use either
- * pools or per thread data - but given that in most cases a converter will be
- * needed for every thread and most of the time only 1 ( or 2 ) encodings will
- * be used, it is far better to keep it per thread and eliminate the pool
- * overhead too.
- *
- */
- final class WriteConvertor extends OutputStreamWriter {
- // stream with flush() and close(). overriden.
- private IntermediateOutputStream ios;
- // Has a private, internal byte[8192]
-
- /** Create a converter.
- */
- public WriteConvertor( IntermediateOutputStream out, String enc )
- throws UnsupportedEncodingException
- {
- super( out, enc );
- ios=out;
- }
-
- /** Overriden - will do nothing but reset internal state.
- */
- public final void close() throws IOException {
- // NOTHING
- // Calling super.close() would reset out and cb.
- }
-
- /**
- * Flush the characters only
- */
- public final void flush() throws IOException {
- // Will flushBuffer and out()
- // flushBuffer put any remaining chars in the byte[]
- super.flush();
- }
-
- public final void write(char cbuf[], int off, int len) throws IOException {
- // will do the conversion and call write on the output stream
- super.write( cbuf, off, len );
- }
-
- /** Reset the buffer
- */
- public final void recycle() {
- ios.disable();
- try {
- // System.out.println("Reseting writer");
- flush();
- } catch( Exception ex ) {
- ex.printStackTrace();
- }
- ios.enable();
- }
-
}
-
-
-/** Special output stream where close() is overriden, so super.close()
- is never called.
-
- This allows recycling. It can also be disabled, so callbacks will
- not be called if recycling the converter and if data was not flushed.
-*/
-final class IntermediateOutputStream extends OutputStream {
- private ByteChunk tbuff;
- private boolean enabled=true;
-
- public IntermediateOutputStream(ByteChunk tbuff) {
- this.tbuff=tbuff;
- }
-
- public final void close() throws IOException {
- // shouldn't be called - we filter it out in writer
- throw new IOException("close() called - shouldn't happen ");
- }
-
- public final void flush() throws IOException {
- // nothing - write will go directly to the buffer,
- // we don't keep any state
- }
-
- public final void write(byte cbuf[], int off, int len) throws IOException {
- // will do the conversion and call write on the output stream
- if( enabled ) {
- tbuff.append( cbuf, off, len );
- }
- }
-
- public final void write( int i ) throws IOException {
- throw new IOException("write( int ) called - shouldn't happen ");
- }
-
- // -------------------- Internal methods --------------------
-
- void setByteChunk( ByteChunk bb ) {
- tbuff=bb;
- }
-
- /** Temporary disable - this is used to recycle the converter without
- * generating an output if the buffers were not flushed
- */
- final void disable() {
- enabled=false;
- }
-
- /** Reenable - used to recycle the converter
- */
- final void enable() {
- enabled=true;
- }
-}
Modified: trunk/java/org/apache/tomcat/util/buf/UEncoder.java
===================================================================
--- trunk/java/org/apache/tomcat/util/buf/UEncoder.java 2008-03-27 03:04:12 UTC (rev 557)
+++ trunk/java/org/apache/tomcat/util/buf/UEncoder.java 2008-03-27 14:22:16 UTC (rev 558)
@@ -31,113 +31,99 @@
* while encoding a URL you can add "/".
*
* @author Costin Manolache
+ * @author Remy Maucherat
*/
public final class UEncoder {
- private static org.jboss.logging.Logger log=
- org.jboss.logging.Logger.getLogger(UEncoder.class );
+ private static org.jboss.logging.Logger log =
+ org.jboss.logging.Logger.getLogger(UEncoder.class);
// Not static - the set may differ ( it's better than adding
// an extra check for "/", "+", etc
private BitSet safeChars=null;
private C2BConverter c2b=null;
private ByteChunk bb=null;
+ private CharChunk cb=null;
+ private CharChunk output=null;
private String encoding="UTF8";
private static final int debug=0;
public UEncoder() {
- initSafeChars();
+ initSafeChars();
}
public void setEncoding( String s ) {
- encoding=s;
+ encoding=s;
}
public void addSafeCharacter( char c ) {
- safeChars.set( c );
+ safeChars.set( c );
}
-
/** URL Encode string, using a specified encoding.
*
* @param buf The writer
* @param s string to be encoded
* @throws IOException If an I/O error occurs
*/
- public void urlEncode( Writer buf, String s )
- throws IOException
- {
- if( c2b==null ) {
- bb=new ByteChunk(16); // small enough.
- c2b=new C2BConverter( bb, encoding );
- }
+ public CharChunk encodeURL(String s, int start, int end)
+ throws IOException {
+ if (c2b == null) {
+ bb = new ByteChunk(8); // small enough.
+ cb = new CharChunk(2); // small enough.
+ output = new CharChunk(64); // small enough.
+ c2b = new C2BConverter(encoding);
+ } else {
+ bb.recycle();
+ cb.recycle();
+ }
- for (int i = 0; i < s.length(); i++) {
- int c = (int) s.charAt(i);
- if( safeChars.get( c ) ) {
- if( debug > 0 ) log("Safe: " + (char)c);
- buf.write((char)c);
- } else {
- if( debug > 0 ) log("Unsafe: " + (char)c);
- c2b.convert( (char)c );
-
- // "surrogate" - UTF is _not_ 16 bit, but 21 !!!!
- // ( while UCS is 31 ). Amazing...
- if (c >= 0xD800 && c <= 0xDBFF) {
- if ( (i+1) < s.length()) {
- int d = (int) s.charAt(i+1);
- if (d >= 0xDC00 && d <= 0xDFFF) {
- if( debug > 0 ) log("Unsafe: " + c);
- c2b.convert( (char)d);
- i++;
- }
- }
- }
+ for (int i = start; i < end; i++) {
+ char c = s.charAt(i);
+ if (safeChars.get(c)) {
+ if( debug > 0 ) log("Safe: " + (char)c);
+ output.append(c);
+ } else {
+ if( debug > 0 ) log("Unsafe: " + (char)c);
+ cb.append(c);
+ c2b.convert(cb, bb);
- c2b.flushBuffer();
-
- urlEncode( buf, bb.getBuffer(), bb.getOffset(),
- bb.getLength() );
- bb.recycle();
- }
- }
+ // "surrogate" - UTF is _not_ 16 bit, but 21 !!!!
+ // ( while UCS is 31 ). Amazing...
+ if (c >= 0xD800 && c <= 0xDBFF) {
+ if ((i+1) < end) {
+ char d = s.charAt(i+1);
+ if (d >= 0xDC00 && d <= 0xDFFF) {
+ if( debug > 0 ) log("Unsafe: " + d);
+ cb.append(d);
+ c2b.convert(cb, bb);
+ i++;
+ }
+ }
+ }
+
+ urlEncode(output, bb);
+ cb.recycle();
+ bb.recycle();
+ }
+ }
+
+ return output;
}
- /**
- */
- public void urlEncode( Writer buf, byte bytes[], int off, int len)
- throws IOException
- {
- for( int j=off; j< len; j++ ) {
- buf.write( '%' );
- char ch = Character.forDigit((bytes[j] >> 4) & 0xF, 16);
- if( debug > 0 ) log("Encode: " + ch);
- buf.write(ch);
- ch = Character.forDigit(bytes[j] & 0xF, 16);
- if( debug > 0 ) log("Encode: " + ch);
- buf.write(ch);
- }
+ protected void urlEncode(CharChunk out, ByteChunk bb)
+ throws IOException {
+ byte[] bytes = bb.getBuffer();
+ for (int j = bb.getStart(); j < bb.getEnd(); j++) {
+ out.append('%');
+ char ch = Character.forDigit((bytes[j] >> 4) & 0xF, 16);
+ out.append(ch);
+ ch = Character.forDigit(bytes[j] & 0xF, 16);
+ out.append(ch);
+ }
}
- /**
- * Utility funtion to re-encode the URL.
- * Still has problems with charset, since UEncoder mostly
- * ignores it.
- */
- public String encodeURL(String uri) {
- String outUri=null;
- try {
- // XXX optimize - recycle, etc
- CharArrayWriter out = new CharArrayWriter();
- urlEncode(out, uri);
- outUri=out.toString();
- } catch (IOException iex) {
- }
- return outUri;
- }
-
-
// -------------------- Internal implementation --------------------
//
Modified: trunk/webapps/docs/changelog.xml
===================================================================
--- trunk/webapps/docs/changelog.xml 2008-03-27 03:04:12 UTC (rev 557)
+++ trunk/webapps/docs/changelog.xml 2008-03-27 14:22:16 UTC (rev 558)
@@ -61,6 +61,9 @@
<bug>44494</bug>: Fix incorrect reads with multibyte charsets by
moving the byte to char
converter to the NIO character decoders. (remm)
</update>
+ <update>
+ For consistency, refactor character output using the NIO character decoders.
(remm)
+ </update>
</changelog>
</subsection>
</section>