JBossWeb SVN: r555 - in trunk: java/org/apache/tomcat/util/buf and 1 other directories. - jbossweb-commits

Wednesday, 26 March 2008

Author: remy.maucherat(a)jboss.com
Date: 2008-03-26 20:42:29 -0400 (Wed, 26 Mar 2008)
New Revision: 555

Modified:
   trunk/java/org/apache/catalina/connector/CoyoteAdapter.java
   trunk/java/org/apache/catalina/connector/InputBuffer.java
   trunk/java/org/apache/tomcat/util/buf/B2CConverter.java
   trunk/java/org/apache/tomcat/util/buf/ByteChunk.java
   trunk/java/org/apache/tomcat/util/buf/CharChunk.java
   trunk/java/org/apache/tomcat/util/buf/UTF8Decoder.java
   trunk/webapps/docs/changelog.xml
Log:
- Move the B2C converter to use a NIO CharsetDecoder backend. The main issue with the
previous code
  was the unknown state of the various buffers used in the JDK objects. This change will
also
  save a significant amount of memory (the NIO decoder does not have any internal
buffer).

Modified: trunk/java/org/apache/catalina/connector/CoyoteAdapter.java
===================================================================

--- trunk/java/org/apache/catalina/connector/CoyoteAdapter.java	2008-03-27 00:26:25 UTC
(rev 554)
+++ trunk/java/org/apache/catalina/connector/CoyoteAdapter.java	2008-03-27 00:42:29 UTC
(rev 555)
@@ -651,7 +651,7 @@
                 } else {
                     conv.recycle();
                 }
-            } catch (IOException e) {
+            } catch (Exception e) {
                 // Ignore
                 log.error("Invalid URI encoding; using HTTP default");
                 connector.setURIEncoding(null);

Modified: trunk/java/org/apache/catalina/connector/InputBuffer.java
===================================================================
--- trunk/java/org/apache/catalina/connector/InputBuffer.java	2008-03-27 00:26:25 UTC (rev
554)
+++ trunk/java/org/apache/catalina/connector/InputBuffer.java	2008-03-27 00:42:29 UTC (rev
555)
@@ -297,10 +297,6 @@
                 }
             }
         }
-        /*if (available == 0) {
-            coyoteRequest.action(ActionCode.ACTION_AVAILABLE, null);
-            available = (coyoteRequest.getAvailable() > 0) ? 1 : 0;
-        }*/
         return available;
     }
 
@@ -386,8 +382,7 @@
             setConverter();
 
         if (bb.getLength() <= 0) {
-            int nRead = realReadBytes(bb.getBytes(), 0, bb.getBytes().length);
-            if (nRead < 0) {
+            if (realReadBytes(bb.getBytes(), 0, bb.getBytes().length) < 0) {
                 return -1;
             }
         }
@@ -395,10 +390,19 @@
         if (markPos == -1) {
             cb.setOffset(0);
             cb.setEnd(0);
+        } else {
+            // Make sure there's enough space in the worst case
+            cb.makeSpace(bb.getLength());
+            if ((cb.getBuffer().length - cb.getEnd()) == 0) {
+                // We went over the limit
+                cb.setOffset(0);
+                cb.setEnd(0);
+                markPos = -1;
+            }
         }
 
         state = CHAR_STATE;
-        conv.convert(bb, cb, bb.getLength());
+        conv.convert(bb, cb);
 
         return cb.getLength();
 
@@ -484,7 +488,9 @@
         if (offset < size) {
             offset = size;
         }
-        cb.setLimit(cb.getStart() + offset);
+        if (cb.getStart() + offset > cb.getLimit()) {
+            cb.setLimit(cb.getStart() + offset);
+        }
         markPos = cb.getStart();
     }
 
@@ -493,8 +499,6 @@
         throws IOException {
         if (state == CHAR_STATE) {
             if (markPos < 0) {
-                cb.recycle();
-                markPos = -1;
                 throw new IOException();
             } else {
                 cb.setOffset(markPos);

Modified: trunk/java/org/apache/tomcat/util/buf/B2CConverter.java
===================================================================
--- trunk/java/org/apache/tomcat/util/buf/B2CConverter.java	2008-03-27 00:26:25 UTC (rev
554)
+++ trunk/java/org/apache/tomcat/util/buf/B2CConverter.java	2008-03-27 00:42:29 UTC (rev
555)
@@ -19,242 +19,113 @@
 package org.apache.tomcat.util.buf;
 
 import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.UnsupportedEncodingException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CoderResult;
 
-/** Efficient conversion of bytes  to character .
- *  
- *  This uses the standard JDK mechansim - a reader - but provides mechanisms
- *  to recycle all the objects that are used. It is compatible with JDK1.1
- *  and up,
- *  ( nio is better, but it's not available even in 1.2 or 1.3 )
- *
- *  Not used in the current code, the performance gain is not very big
- *  in the current case ( since String is created anyway ), but it will
- *  be used in a later version or after the remaining optimizations.
+/**
+ * Helper class to handle byte to char conversion using NIO.
+ * 
+ * @author Remy Maucherat
  */
 public class B2CConverter {
-    
-    
-    private static org.jboss.logging.Logger log=
-        org.jboss.logging.Logger.getLogger( B2CConverter.class );
-    
-    private IntermediateInputStream iis;
-    private ReadConvertor conv;
-    private String encoding;
 
-    protected B2CConverter() {
-    }
-    
-    /** Create a converter, with bytes going to a byte buffer
-     */
-    public B2CConverter(String encoding)
-	throws IOException
-    {
-	this.encoding=encoding;
-	reset();
-    }
+    private static org.jboss.logging.Logger log =
+        org.jboss.logging.Logger.getLogger(B2CConverter.class);
 
-    
-    /** Reset the internal state, empty the buffers.
-     *  The encoding remain in effect, the internal buffers remain allocated.
+    protected String charset = null;
+    protected CharsetDecoder decoder = null;
+    protected ByteBuffer bb = null;
+    protected CharBuffer cb = null;
+
+    /**
+     * Leftover buffer used for incomplete characters.
      */
-    public  void recycle() {
-	conv.recycle();
-    }
+    protected ByteBuffer leftovers = null;
 
-    static final int BUFFER_SIZE=8192;
-    char result[]=new char[BUFFER_SIZE];
-
-    /** Convert a buffer of bytes into a chars
-     * @deprecated
+    /**
+     * Create a decoder for the specified charset.
      */
-    public  void convert( ByteChunk bb, CharChunk cb )
-	throws IOException
-    {
-	// Set the ByteChunk as input to the Intermediate reader
-	convert(bb, cb, bb.getLength());
+    public B2CConverter(String charset) {
+        decoder = Charset.forName(charset).newDecoder();
+        this.charset = charset;
+        byte[] left = new byte[4];
+        leftovers = ByteBuffer.wrap(left);
     }
 
-    public void convert(ByteChunk bb, CharChunk cb, int limit)
-    throws IOException {
-        iis.setByteChunk(bb);
-        try {
-            // read from the reader
-            int l = 0;
-            while( limit > 0 ) { // conv.ready() ) {
-                int size = limit < BUFFER_SIZE ? limit : BUFFER_SIZE;
-                l = bb.getLength();
-                int cnt=conv.read( result, 0, size );
-                if( cnt <= 0 ) {
-                    // End of stream ! - we may be in a bad state
-                    if( debug>0)
-                        log( "EOF" );
-                    //		    reset();
-                    return;
-                }
-                if( debug > 1 )
-                    log("Converted: " + new String( result, 0, cnt ));
-                // Make sure there's enough space to append the characters which
-                // have been converted
-                if (cb.getEnd() + cnt  > cb.getLimit()) {
-                    cb.setLimit(cb.getEnd() + cnt);
-                }
-                cb.append( result, 0, cnt );
-                limit = limit - (l - bb.getLength());
+    /**
+     * Convert the given bytes to chars.
+     * 
+     * @param bc
+     * @param cc
+     */
+    public void convert(ByteChunk bc, CharChunk cc) 
+        throws IOException {
+        if ((bb == null) || (bb.array() != bc.getBuffer())) {
+            // Create a new byte buffer if anything changed
+            bb = ByteBuffer.wrap(bc.getBuffer(), bc.getStart(), bc.getLength());
+        } else {
+            // Initialize the byte buffer
+            bb.position(bc.getStart());
+            bb.limit(bc.getEnd());
+        }
+        if ((cb == null) || (cb.array() != cc.getBuffer())) {
+            // Create a new char buffer if anything changed
+            cb = CharBuffer.wrap(cc.getBuffer(), cc.getEnd(), 
+                    cc.getBuffer().length - cc.getEnd());
+        } else {
+            // Initialize the char buffer
+            cb.position(cc.getEnd());
+            cb.limit(cc.getBuffer().length);
+        }
+        // Parse leftover if any are present
+        CoderResult result = null;
+        if (leftovers.position() > 0) {
+            int pos = cb.position();
+            do {
+                leftovers.put(bc.substractB());
+                leftovers.flip();
+                result = decoder.decode(leftovers, cb, false);
+                leftovers.position(leftovers.limit());
+                leftovers.limit(leftovers.array().length);
+            } while (result.isUnderflow() && (cb.position() == pos));
+            if (result.isError() || result.isMalformed()) {
+                result.throwException();
             }
-        } catch( IOException ex) {
-            if( debug>0)
-                log( "Reseting the converter " + ex.toString() );
-            reset();
-            throw ex;
+            bb.position(bc.getStart());
+            leftovers.position(0);
         }
+        // Do the decoding and get the results into the byte chunk and the char chunk
+        result = decoder.decode(bb, cb, false);
+        if (result.isError() || result.isMalformed()) {
+            result.throwException();
+        } else if (result.isOverflow()) {
+            // Propagate current positions to the byte chunk and char chunk, if this
+            // continues the char buffer will get resized
+            bc.setOffset(bb.position());
+            cc.setEnd(cb.position());
+        } else if (result.isUnderflow()) {
+            // Propagate current positions to the byte chunk and char chunk
+            bc.setOffset(bb.position());
+            cc.setEnd(cb.position());
+            // Put leftovers in the leftovers byte buffer
+            if (bc.getLength() > 0) {
+                leftovers.position(bc.getLength());
+                leftovers.limit(leftovers.array().length);
+                bc.substract(leftovers.array(), 0, bc.getLength());
+            }
+        }
     }
 
-    public void reset()
-	throws IOException
-    {
-	// destroy the reader/iis
-	iis=new IntermediateInputStream();
-	conv=new ReadConvertor( iis, encoding );
-    }
-
-    private final int debug=0;
-    void log( String s ) {
-        if (log.isDebugEnabled())
-            log.debug("B2CConverter: " + s );
-    }
-
-    // -------------------- Not used - the speed improvemnt is quite small
-
-    /*
-    private Hashtable decoders;
-    public static final boolean useNewString=false;
-    public static final boolean useSpecialDecoders=true;
-    private UTF8Decoder utfD;
-    // private char[] conversionBuff;
-    CharChunk conversionBuf;
-
-
-    private  static String decodeString(ByteChunk mb, String enc)
-	throws IOException
-    {
-	byte buff=mb.getBuffer();
-	int start=mb.getStart();
-	int end=mb.getEnd();
-	if( useNewString ) {
-	    if( enc==null) enc="UTF8";
-	    return new String( buff, start, end-start, enc );
-	}
-	B2CConverter b2c=null;
-	if( useSpecialDecoders &&
-	    (enc==null || "UTF8".equalsIgnoreCase(enc))) {
-	    if( utfD==null ) utfD=new UTF8Decoder();
-	    b2c=utfD;
-	}
-	if(decoders == null ) decoders=new Hashtable();
-	if( enc==null ) enc="UTF8";
-	b2c=(B2CConverter)decoders.get( enc );
-	if( b2c==null ) {
-	    if( useSpecialDecoders ) {
-		if( "UTF8".equalsIgnoreCase( enc ) ) {
-		    b2c=new UTF8Decoder();
-		}
-	    }
-	    if( b2c==null )
-		b2c=new B2CConverter( enc );
-	    decoders.put( enc, b2c );
-	}
-	if( conversionBuf==null ) conversionBuf=new CharChunk(1024);
-
-	try {
-	    conversionBuf.recycle();
-	    b2c.convert( this, conversionBuf );
-	    //System.out.println("XXX 1 " + conversionBuf );
-	    return conversionBuf.toString();
-	} catch( IOException ex ) {
-	    ex.printStackTrace();
-	    return null;
-	}
-    }
-
-    */
-}
-
-// -------------------- Private implementation --------------------
-
-
-
-/**
- * 
- */
-final class  ReadConvertor extends InputStreamReader {
-    // stream with flush() and close(). overriden.
-    private IntermediateInputStream iis;
-    
-    // Has a private, internal byte[8192]
-    
-    /** Create a converter.
+    /**
+     * Reset the internal state, empty the buffers.
+     * The encoding remain in effect, the internal buffers remain allocated.
      */
-    public ReadConvertor( IntermediateInputStream in, String enc )
-	throws UnsupportedEncodingException
-    {
-	super( in, enc );
-	iis=in;
+    public void recycle() {
+        decoder.reset();
+        leftovers.position(0);
     }
-    
-    /** Overriden - will do nothing but reset internal state.
-     */
-    public  final void close() throws IOException {
-	// NOTHING
-	// Calling super.close() would reset out and cb.
-    }
-    
-    public  final int read(char cbuf[], int off, int len)
-	throws IOException
-    {
-	// will do the conversion and call write on the output stream
-	return super.read( cbuf, off, len );
-    }
-    
-    /** Reset the buffer
-     */
-    public  final void recycle() {
-    }
-}
 
-
-/** Special output stream where close() is overriden, so super.close()
-    is never called.
-    
-    This allows recycling. It can also be disabled, so callbacks will
-    not be called if recycling the converter and if data was not flushed.
-*/
-final class IntermediateInputStream extends InputStream {
-    ByteChunk bc = null;
-    
-    public IntermediateInputStream() {
-    }
-    
-    public  final void close() throws IOException {
-	// shouldn't be called - we filter it out in writer
-	throw new IOException("close() called - shouldn't happen ");
-    }
-    
-    public  final  int read(byte cbuf[], int off, int len) throws IOException {
-	return bc.substract(cbuf, off, len);
-    }
-    
-    public  final int read() throws IOException {
-	return bc.substract();
-    }
-
-    // -------------------- Internal methods --------------------
-
-
-    void setByteChunk( ByteChunk mb ) {
-        bc = mb;
-    }
-
 }

Modified: trunk/java/org/apache/tomcat/util/buf/ByteChunk.java
===================================================================
--- trunk/java/org/apache/tomcat/util/buf/ByteChunk.java	2008-03-27 00:26:25 UTC (rev
554)
+++ trunk/java/org/apache/tomcat/util/buf/ByteChunk.java	2008-03-27 00:42:29 UTC (rev
555)
@@ -378,6 +378,21 @@
 
     }
 
+    public byte substractB()
+        throws IOException {
+
+        if ((end - start) == 0) {
+            if (in == null)
+                return -1;
+            int n = in.realReadBytes( buff, 0, buff.length );
+            if (n < 0)
+                return -1;
+        }
+
+        return (buff[start++]);
+
+    }
+
     public int substract(ByteChunk src)
         throws IOException {
 

Modified: trunk/java/org/apache/tomcat/util/buf/CharChunk.java
===================================================================
--- trunk/java/org/apache/tomcat/util/buf/CharChunk.java	2008-03-27 00:26:25 UTC (rev
554)
+++ trunk/java/org/apache/tomcat/util/buf/CharChunk.java	2008-03-27 00:42:29 UTC (rev
555)
@@ -444,7 +444,7 @@
     /** Make space for len chars. If len is small, allocate
      *	a reserve space too. Never grow bigger than limit.
      */
-    private void makeSpace(int count)
+    public void makeSpace(int count)
     {
 	char[] tmp = null;
 

Modified: trunk/java/org/apache/tomcat/util/buf/UTF8Decoder.java
===================================================================
--- trunk/java/org/apache/tomcat/util/buf/UTF8Decoder.java	2008-03-27 00:26:25 UTC (rev
554)
+++ trunk/java/org/apache/tomcat/util/buf/UTF8Decoder.java	2008-03-27 00:42:29 UTC (rev
555)
@@ -40,7 +40,7 @@
     // may have state !!
     
     public UTF8Decoder() {
-
+        super("UTF-8");
     }
     
     public void recycle() {

Modified: trunk/webapps/docs/changelog.xml
===================================================================
--- trunk/webapps/docs/changelog.xml	2008-03-27 00:26:25 UTC (rev 554)
+++ trunk/webapps/docs/changelog.xml	2008-03-27 00:42:29 UTC (rev 555)
@@ -57,9 +57,10 @@
       <update>
         Add support for specifying defaults for properties (format is
${property:default}). (remm)
       </update>
-      <fix>
-        <bug>44494</bug>: Fix incorrect reads with multibyte charsets.
(remm)
-      </fix>
+      <update>
+        <bug>44494</bug>: Fix incorrect reads with multibyte charsets by
moving the byte to char
+        converter to the NIO character decoders. (remm)
+      </update>
     </changelog>
   </subsection>
 </section>


    

2026

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

2008

2007

JBossWeb SVN: r555 - in trunk: java/org/apache/tomcat/util/buf and 1 other directories.