Index: gnu/java/nio/charset/Provider.java =================================================================== RCS file: /cvsroot/classpath/classpath/gnu/java/nio/charset/Provider.java,v retrieving revision 1.3 diff -u -r1.3 Provider.java --- gnu/java/nio/charset/Provider.java 3 Feb 2005 22:37:29 -0000 1.3 +++ gnu/java/nio/charset/Provider.java 7 Apr 2005 22:15:37 -0000 @@ -88,6 +88,66 @@ // ISO-8859-1 aka ISO-LATIN-1 addCharset (new ISO_8859_1 ()); + // ISO-8859-2 aka ISO-LATIN-2 + addCharset (new ISO_8859_2 ()); + + // ISO-8859-3 aka ISO-LATIN-3 + addCharset (new ISO_8859_3 ()); + + // ISO-8859-4 aka ISO-LATIN-4 + addCharset (new ISO_8859_4 ()); + + // ISO-8859-5 (Cyrillic) + addCharset (new ISO_8859_5 ()); + + // ISO-8859-6 (Arabic) + addCharset (new ISO_8859_6 ()); + + // ISO-8859-7 (Greek) + addCharset (new ISO_8859_7 ()); + + // ISO-8859-8 (Hebrew) + addCharset (new ISO_8859_8 ()); + + // ISO-8859-9 aka ISO-LATIN-5 + addCharset (new ISO_8859_9 ()); + + // ISO-8859-13 aka ISO-LATIN-7 + addCharset (new ISO_8859_13 ()); + + // ISO-8859-15 aka ISO-LATIN-9 + addCharset (new ISO_8859_15 ()); + + // KOI8 (Cyrillic) + addCharset (new KOI_8 ()); + + // Windows-1250 aka cp-1250 (East European) + addCharset (new Windows1250 ()); + + // Windows-1251 (Cyrillic) + addCharset (new Windows1251 ()); + + // Windows-1252 aka cp-1252 (Latin-1) + addCharset (new Windows1252 ()); + + // Windows-1253 (Greek) + addCharset (new Windows1253 ()); + + // Windows-1254 (Turkish) + addCharset (new Windows1254 ()); + + // Windows-1255 (Hebrew) + addCharset (new Windows1255 ()); + + // Windows-1256 (Arabic) + addCharset (new Windows1256 ()); + + // Windows-1257 (Baltic) + addCharset (new Windows1257 ()); + + // Windows-1258 (Vietnamese) + addCharset (new Windows1258 ()); + // UTF-8 addCharset (new UTF_8 ()); @@ -99,6 +159,42 @@ // UTF-16 addCharset (new UTF_16 ()); + + // MS874 Windows Thai + addCharset (new MS874 ()); + + // Macintosh charsets + addCharset (new MacCentralEurope()); + addCharset (new MacDingbat()); + addCharset (new MacRoman()); + addCharset (new MacThai()); + addCharset (new MacCroatian()); + addCharset (new MacGreek()); + addCharset (new MacRomania()); + addCharset (new MacTurkish()); + addCharset (new MacCyrillic()); + addCharset (new MacIceland()); + addCharset (new MacSymbol()); + + addCharset (new Cp424()); // IBM Hebrew EBCDIC + addCharset (new Cp437()); // MSDOS USA,NZ,Australia,South Africa + addCharset (new Cp737()); // PC Greek + addCharset (new Cp775()); // PC Baltic + addCharset (new Cp850()); // MSDOS Latin1 + addCharset (new Cp852()); // MSDOS Latin2 + + // Some more codepages + addCharset (new Cp855()); // IBM Cyrillic + addCharset (new Cp857()); // IBM Turkish + addCharset (new Cp860()); // MSDOS Portugese + addCharset (new Cp861()); // MSDOS Icelandic + addCharset (new Cp862()); // PC Hebrew + addCharset (new Cp863()); // MSDOS Can. French + addCharset (new Cp864()); // PC Arabic + addCharset (new Cp865()); // MSDOS Nordic + addCharset (new Cp866()); // MSDOS Russian + addCharset (new Cp869()); // IBM modern Greek + addCharset (new Cp874()); // IBM Thai } public Iterator charsets () Index: gnu/java/nio/charset/UTF_16.java =================================================================== RCS file: /cvsroot/classpath/classpath/gnu/java/nio/charset/UTF_16.java,v retrieving revision 1.4 diff -u -r1.4 UTF_16.java --- gnu/java/nio/charset/UTF_16.java 3 Feb 2005 22:37:29 -0000 1.4 +++ gnu/java/nio/charset/UTF_16.java 7 Apr 2005 22:15:37 -0000 @@ -75,6 +75,6 @@ public CharsetEncoder newEncoder () { - return new UTF_16Encoder (this, UTF_16Encoder.BIG_ENDIAN, false); + return new UTF_16Encoder (this, UTF_16Encoder.BIG_ENDIAN, true); } } Index: gnu/java/nio/charset/UTF_16Decoder.java =================================================================== RCS file: /cvsroot/classpath/classpath/gnu/java/nio/charset/UTF_16Decoder.java,v retrieving revision 1.3 diff -u -r1.3 UTF_16Decoder.java --- gnu/java/nio/charset/UTF_16Decoder.java 23 Apr 2004 21:13:20 -0000 1.3 +++ gnu/java/nio/charset/UTF_16Decoder.java 7 Apr 2005 22:15:37 -0000 @@ -55,8 +55,8 @@ static final int LITTLE_ENDIAN = 1; static final int UNKNOWN_ENDIAN = 2; - private static final char BYTE_ORDER_MARK = '\uFEFF'; - private static final char REVERSED_BYTE_ORDER_MARK = '\uFFFE'; + private static final char BYTE_ORDER_MARK = 0xFEFF; + private static final char REVERSED_BYTE_ORDER_MARK = 0xFFFE; private final int originalByteOrder; private int byteOrder; @@ -83,7 +83,7 @@ // handle byte order mark if (byteOrder == UNKNOWN_ENDIAN) { - char c = (char) ((b1 << 8) | b2); + char c = (char) (((b1 & 0xFF) << 8) | (b2 & 0xFF)); if (c == BYTE_ORDER_MARK) { byteOrder = BIG_ENDIAN; @@ -104,6 +104,7 @@ } } + // FIXME: Change so you only do a single comparison here. char c = byteOrder == BIG_ENDIAN ? (char) ((b1 << 8) | b2) : (char) ((b2 << 8) | b1); Index: gnu/java/nio/charset/UTF_16Encoder.java =================================================================== RCS file: /cvsroot/classpath/classpath/gnu/java/nio/charset/UTF_16Encoder.java,v retrieving revision 1.3 diff -u -r1.3 UTF_16Encoder.java --- gnu/java/nio/charset/UTF_16Encoder.java 15 Oct 2004 08:59:55 -0000 1.3 +++ gnu/java/nio/charset/UTF_16Encoder.java 7 Apr 2005 22:15:37 -0000 @@ -38,6 +38,7 @@ package gnu.java.nio.charset; import java.nio.ByteBuffer; +import java.nio.ByteOrder; import java.nio.CharBuffer; import java.nio.charset.Charset; import java.nio.charset.CharsetEncoder; @@ -54,20 +55,21 @@ static final int BIG_ENDIAN = 0; static final int LITTLE_ENDIAN = 1; - private static final char BYTE_ORDER_MARK = '\uFEFF'; + private static final char BYTE_ORDER_MARK = 0xFEFF; - private final int byteOrder; + private final ByteOrder byteOrder; private final boolean useByteOrderMark; private boolean needsByteOrderMark; UTF_16Encoder (Charset cs, int byteOrder, boolean useByteOrderMark) { super (cs, 2.0f, - useByteOrderMark ? 2.0f : 4.0f, + useByteOrderMark ? 4.0f : 2.0f, byteOrder == BIG_ENDIAN ? new byte[] { (byte) 0xFF, (byte) 0xFD } : new byte[] { (byte) 0xFD, (byte) 0xFF }); - this.byteOrder = byteOrder; + this.byteOrder = (byteOrder == BIG_ENDIAN) ? + ByteOrder.BIG_ENDIAN : ByteOrder.LITTLE_ENDIAN; this.useByteOrderMark = useByteOrderMark; this.needsByteOrderMark = useByteOrderMark; } @@ -76,11 +78,17 @@ { // TODO: Optimize this in the case in.hasArray() / out.hasArray() + ByteOrder originalBO = out.order(); + out.order(byteOrder); + if (needsByteOrderMark) { if (out.remaining () < 2) - return CoderResult.OVERFLOW; - put (out, BYTE_ORDER_MARK); + { + out.order(originalBO); + return CoderResult.OVERFLOW; + } + out.putChar (BYTE_ORDER_MARK); needsByteOrderMark = false; } @@ -90,7 +98,6 @@ while (in.hasRemaining ()) { char c = in.get (); - if (0xD800 <= c && c <= 0xDFFF) { // c is a surrogate @@ -104,19 +111,20 @@ // make sure d is a low surrogate if (d < 0xDC00 || d > 0xDFFF) return CoderResult.malformedForLength (1); - put (out, c); - put (out, d); + out.putChar (c); + out.putChar (d); inPos += 2; } else { + out.order(originalBO); if (out.remaining () < 2) return CoderResult.OVERFLOW; - put (out, c); + out.putChar (c); inPos++; } } - + out.order(originalBO); return CoderResult.UNDERFLOW; } finally @@ -125,24 +133,6 @@ } } - /** - * Writes c to out in the byte order - * specified by byteOrder. - **/ - private void put (ByteBuffer out, char c) - { - if (byteOrder == BIG_ENDIAN) - { - out.put ((byte) (c >> 8)); - out.put ((byte) (c & 0xFF)); - } - else - { - out.put ((byte) (c & 0xFF)); - out.put ((byte) (c >> 8)); - } - } - protected void implReset () { needsByteOrderMark = useByteOrderMark; Index: gnu/java/nio/charset/UTF_8.java =================================================================== RCS file: /cvsroot/classpath/classpath/gnu/java/nio/charset/UTF_8.java,v retrieving revision 1.5 diff -u -r1.5 UTF_8.java --- gnu/java/nio/charset/UTF_8.java 16 Feb 2005 23:22:07 -0000 1.5 +++ gnu/java/nio/charset/UTF_8.java 7 Apr 2005 22:15:37 -0000 @@ -95,7 +95,7 @@ // Package-private to avoid a trampoline constructor. Decoder (Charset cs) { - super (cs, 1.0f, 1.0f); + super (cs, 0.9f, 0.25f); } protected CoderResult decodeLoop (ByteBuffer in, CharBuffer out) @@ -108,8 +108,7 @@ { char c; byte b1 = in.get (); - int highNibble = (b1 >> 4) & 0xF; - + int highNibble = ((b1 & 0xFF) >> 4) & 0xF; switch (highNibble) { case 0: case 1: case 2: case 3: @@ -156,6 +155,31 @@ inPos += 3; break; + case 0xF: + byte b4; + if (in.remaining () < 3) + return CoderResult.UNDERFLOW; + if((b1&0x0F) > 4) + return CoderResult.malformedForLength (4); + if (out.remaining () < 2) + return CoderResult.OVERFLOW; + if (!isContinuation (b2 = in.get ())) + return CoderResult.malformedForLength (3); + if (!isContinuation (b3 = in.get ())) + return CoderResult.malformedForLength (2); + if (!isContinuation (b4 = in.get ())) + return CoderResult.malformedForLength (1); + int n = (((b1 & 0x3) << 18) + | ((b2 & 0x3F) << 12) + | ((b3 & 0x3F) << 6) + | (b4 & 0x3F)) - 0x10000; + char c1 = (char)(0xD800 | (n & 0xFFC00)>>10); + char c2 = (char)(0xDC00 | (n & 0x003FF)); + out.put (c1); + out.put (c2); + inPos += 4; + break; + default: return CoderResult.malformedForLength (1); } @@ -217,7 +241,6 @@ // u uuuu zzzz yyyy yyxx xxxx 1101 10ww wwzz zzyy 1111 0uuu 10uu zzzz 10yy yyyy 10xx xxxx // + 1101 11yy yyxx xxxx // Note: uuuuu = wwww + 1 - if (c <= 0x7F) { if (remaining < 1) @@ -256,11 +279,10 @@ // int value2 = (c - 0xD800) * 0x400 + (d - 0xDC00) + 0x10000; int value = (((c & 0x3FF) << 10) | (d & 0x3FF)) + 0x10000; // assert value == value2; - out.put ((byte) (0xF0 | (value >> 18))); + out.put ((byte) (0xF0 | ((value >> 18) & 0x07))); out.put ((byte) (0x80 | ((value >> 12) & 0x3F))); out.put ((byte) (0x80 | ((value >> 6) & 0x3F))); out.put ((byte) (0x80 | ((value ) & 0x3F))); - inPos += 2; } else