Index: gnu/java/nio/charset/Provider.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/java/nio/charset/Provider.java,v
retrieving revision 1.3
diff -u -r1.3 Provider.java
--- gnu/java/nio/charset/Provider.java 3 Feb 2005 22:37:29 -0000 1.3
+++ gnu/java/nio/charset/Provider.java 7 Apr 2005 22:15:37 -0000
@@ -88,6 +88,66 @@
// ISO-8859-1 aka ISO-LATIN-1
addCharset (new ISO_8859_1 ());
+ // ISO-8859-2 aka ISO-LATIN-2
+ addCharset (new ISO_8859_2 ());
+
+ // ISO-8859-3 aka ISO-LATIN-3
+ addCharset (new ISO_8859_3 ());
+
+ // ISO-8859-4 aka ISO-LATIN-4
+ addCharset (new ISO_8859_4 ());
+
+ // ISO-8859-5 (Cyrillic)
+ addCharset (new ISO_8859_5 ());
+
+ // ISO-8859-6 (Arabic)
+ addCharset (new ISO_8859_6 ());
+
+ // ISO-8859-7 (Greek)
+ addCharset (new ISO_8859_7 ());
+
+ // ISO-8859-8 (Hebrew)
+ addCharset (new ISO_8859_8 ());
+
+ // ISO-8859-9 aka ISO-LATIN-5
+ addCharset (new ISO_8859_9 ());
+
+ // ISO-8859-13 aka ISO-LATIN-7
+ addCharset (new ISO_8859_13 ());
+
+ // ISO-8859-15 aka ISO-LATIN-9
+ addCharset (new ISO_8859_15 ());
+
+ // KOI8 (Cyrillic)
+ addCharset (new KOI_8 ());
+
+ // Windows-1250 aka cp-1250 (East European)
+ addCharset (new Windows1250 ());
+
+ // Windows-1251 (Cyrillic)
+ addCharset (new Windows1251 ());
+
+ // Windows-1252 aka cp-1252 (Latin-1)
+ addCharset (new Windows1252 ());
+
+ // Windows-1253 (Greek)
+ addCharset (new Windows1253 ());
+
+ // Windows-1254 (Turkish)
+ addCharset (new Windows1254 ());
+
+ // Windows-1255 (Hebrew)
+ addCharset (new Windows1255 ());
+
+ // Windows-1256 (Arabic)
+ addCharset (new Windows1256 ());
+
+ // Windows-1257 (Baltic)
+ addCharset (new Windows1257 ());
+
+ // Windows-1258 (Vietnamese)
+ addCharset (new Windows1258 ());
+
// UTF-8
addCharset (new UTF_8 ());
@@ -99,6 +159,42 @@
// UTF-16
addCharset (new UTF_16 ());
+
+ // MS874 Windows Thai
+ addCharset (new MS874 ());
+
+ // Macintosh charsets
+ addCharset (new MacCentralEurope());
+ addCharset (new MacDingbat());
+ addCharset (new MacRoman());
+ addCharset (new MacThai());
+ addCharset (new MacCroatian());
+ addCharset (new MacGreek());
+ addCharset (new MacRomania());
+ addCharset (new MacTurkish());
+ addCharset (new MacCyrillic());
+ addCharset (new MacIceland());
+ addCharset (new MacSymbol());
+
+ addCharset (new Cp424()); // IBM Hebrew EBCDIC
+ addCharset (new Cp437()); // MSDOS USA,NZ,Australia,South Africa
+ addCharset (new Cp737()); // PC Greek
+ addCharset (new Cp775()); // PC Baltic
+ addCharset (new Cp850()); // MSDOS Latin1
+ addCharset (new Cp852()); // MSDOS Latin2
+
+ // Some more codepages
+ addCharset (new Cp855()); // IBM Cyrillic
+ addCharset (new Cp857()); // IBM Turkish
+ addCharset (new Cp860()); // MSDOS Portugese
+ addCharset (new Cp861()); // MSDOS Icelandic
+ addCharset (new Cp862()); // PC Hebrew
+ addCharset (new Cp863()); // MSDOS Can. French
+ addCharset (new Cp864()); // PC Arabic
+ addCharset (new Cp865()); // MSDOS Nordic
+ addCharset (new Cp866()); // MSDOS Russian
+ addCharset (new Cp869()); // IBM modern Greek
+ addCharset (new Cp874()); // IBM Thai
}
public Iterator charsets ()
Index: gnu/java/nio/charset/UTF_16.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/java/nio/charset/UTF_16.java,v
retrieving revision 1.4
diff -u -r1.4 UTF_16.java
--- gnu/java/nio/charset/UTF_16.java 3 Feb 2005 22:37:29 -0000 1.4
+++ gnu/java/nio/charset/UTF_16.java 7 Apr 2005 22:15:37 -0000
@@ -75,6 +75,6 @@
public CharsetEncoder newEncoder ()
{
- return new UTF_16Encoder (this, UTF_16Encoder.BIG_ENDIAN, false);
+ return new UTF_16Encoder (this, UTF_16Encoder.BIG_ENDIAN, true);
}
}
Index: gnu/java/nio/charset/UTF_16Decoder.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/java/nio/charset/UTF_16Decoder.java,v
retrieving revision 1.3
diff -u -r1.3 UTF_16Decoder.java
--- gnu/java/nio/charset/UTF_16Decoder.java 23 Apr 2004 21:13:20 -0000 1.3
+++ gnu/java/nio/charset/UTF_16Decoder.java 7 Apr 2005 22:15:37 -0000
@@ -55,8 +55,8 @@
static final int LITTLE_ENDIAN = 1;
static final int UNKNOWN_ENDIAN = 2;
- private static final char BYTE_ORDER_MARK = '\uFEFF';
- private static final char REVERSED_BYTE_ORDER_MARK = '\uFFFE';
+ private static final char BYTE_ORDER_MARK = 0xFEFF;
+ private static final char REVERSED_BYTE_ORDER_MARK = 0xFFFE;
private final int originalByteOrder;
private int byteOrder;
@@ -83,7 +83,7 @@
// handle byte order mark
if (byteOrder == UNKNOWN_ENDIAN)
{
- char c = (char) ((b1 << 8) | b2);
+ char c = (char) (((b1 & 0xFF) << 8) | (b2 & 0xFF));
if (c == BYTE_ORDER_MARK)
{
byteOrder = BIG_ENDIAN;
@@ -104,6 +104,7 @@
}
}
+ // FIXME: Change so you only do a single comparison here.
char c = byteOrder == BIG_ENDIAN ? (char) ((b1 << 8) | b2)
: (char) ((b2 << 8) | b1);
Index: gnu/java/nio/charset/UTF_16Encoder.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/java/nio/charset/UTF_16Encoder.java,v
retrieving revision 1.3
diff -u -r1.3 UTF_16Encoder.java
--- gnu/java/nio/charset/UTF_16Encoder.java 15 Oct 2004 08:59:55 -0000 1.3
+++ gnu/java/nio/charset/UTF_16Encoder.java 7 Apr 2005 22:15:37 -0000
@@ -38,6 +38,7 @@
package gnu.java.nio.charset;
import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
@@ -54,20 +55,21 @@
static final int BIG_ENDIAN = 0;
static final int LITTLE_ENDIAN = 1;
- private static final char BYTE_ORDER_MARK = '\uFEFF';
+ private static final char BYTE_ORDER_MARK = 0xFEFF;
- private final int byteOrder;
+ private final ByteOrder byteOrder;
private final boolean useByteOrderMark;
private boolean needsByteOrderMark;
UTF_16Encoder (Charset cs, int byteOrder, boolean useByteOrderMark)
{
super (cs, 2.0f,
- useByteOrderMark ? 2.0f : 4.0f,
+ useByteOrderMark ? 4.0f : 2.0f,
byteOrder == BIG_ENDIAN
? new byte[] { (byte) 0xFF, (byte) 0xFD }
: new byte[] { (byte) 0xFD, (byte) 0xFF });
- this.byteOrder = byteOrder;
+ this.byteOrder = (byteOrder == BIG_ENDIAN) ?
+ ByteOrder.BIG_ENDIAN : ByteOrder.LITTLE_ENDIAN;
this.useByteOrderMark = useByteOrderMark;
this.needsByteOrderMark = useByteOrderMark;
}
@@ -76,11 +78,17 @@
{
// TODO: Optimize this in the case in.hasArray() / out.hasArray()
+ ByteOrder originalBO = out.order();
+ out.order(byteOrder);
+
if (needsByteOrderMark)
{
if (out.remaining () < 2)
- return CoderResult.OVERFLOW;
- put (out, BYTE_ORDER_MARK);
+ {
+ out.order(originalBO);
+ return CoderResult.OVERFLOW;
+ }
+ out.putChar (BYTE_ORDER_MARK);
needsByteOrderMark = false;
}
@@ -90,7 +98,6 @@
while (in.hasRemaining ())
{
char c = in.get ();
-
if (0xD800 <= c && c <= 0xDFFF)
{
// c is a surrogate
@@ -104,19 +111,20 @@
// make sure d is a low surrogate
if (d < 0xDC00 || d > 0xDFFF)
return CoderResult.malformedForLength (1);
- put (out, c);
- put (out, d);
+ out.putChar (c);
+ out.putChar (d);
inPos += 2;
}
else
{
+ out.order(originalBO);
if (out.remaining () < 2)
return CoderResult.OVERFLOW;
- put (out, c);
+ out.putChar (c);
inPos++;
}
}
-
+ out.order(originalBO);
return CoderResult.UNDERFLOW;
}
finally
@@ -125,24 +133,6 @@
}
}
- /**
- * Writes c
to out
in the byte order
- * specified by byteOrder
.
- **/
- private void put (ByteBuffer out, char c)
- {
- if (byteOrder == BIG_ENDIAN)
- {
- out.put ((byte) (c >> 8));
- out.put ((byte) (c & 0xFF));
- }
- else
- {
- out.put ((byte) (c & 0xFF));
- out.put ((byte) (c >> 8));
- }
- }
-
protected void implReset ()
{
needsByteOrderMark = useByteOrderMark;
Index: gnu/java/nio/charset/UTF_8.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/java/nio/charset/UTF_8.java,v
retrieving revision 1.5
diff -u -r1.5 UTF_8.java
--- gnu/java/nio/charset/UTF_8.java 16 Feb 2005 23:22:07 -0000 1.5
+++ gnu/java/nio/charset/UTF_8.java 7 Apr 2005 22:15:37 -0000
@@ -95,7 +95,7 @@
// Package-private to avoid a trampoline constructor.
Decoder (Charset cs)
{
- super (cs, 1.0f, 1.0f);
+ super (cs, 0.9f, 0.25f);
}
protected CoderResult decodeLoop (ByteBuffer in, CharBuffer out)
@@ -108,8 +108,7 @@
{
char c;
byte b1 = in.get ();
- int highNibble = (b1 >> 4) & 0xF;
-
+ int highNibble = ((b1 & 0xFF) >> 4) & 0xF;
switch (highNibble)
{
case 0: case 1: case 2: case 3:
@@ -156,6 +155,31 @@
inPos += 3;
break;
+ case 0xF:
+ byte b4;
+ if (in.remaining () < 3)
+ return CoderResult.UNDERFLOW;
+ if((b1&0x0F) > 4)
+ return CoderResult.malformedForLength (4);
+ if (out.remaining () < 2)
+ return CoderResult.OVERFLOW;
+ if (!isContinuation (b2 = in.get ()))
+ return CoderResult.malformedForLength (3);
+ if (!isContinuation (b3 = in.get ()))
+ return CoderResult.malformedForLength (2);
+ if (!isContinuation (b4 = in.get ()))
+ return CoderResult.malformedForLength (1);
+ int n = (((b1 & 0x3) << 18)
+ | ((b2 & 0x3F) << 12)
+ | ((b3 & 0x3F) << 6)
+ | (b4 & 0x3F)) - 0x10000;
+ char c1 = (char)(0xD800 | (n & 0xFFC00)>>10);
+ char c2 = (char)(0xDC00 | (n & 0x003FF));
+ out.put (c1);
+ out.put (c2);
+ inPos += 4;
+ break;
+
default:
return CoderResult.malformedForLength (1);
}
@@ -217,7 +241,6 @@
// u uuuu zzzz yyyy yyxx xxxx 1101 10ww wwzz zzyy 1111 0uuu 10uu zzzz 10yy yyyy 10xx xxxx
// + 1101 11yy yyxx xxxx
// Note: uuuuu = wwww + 1
-
if (c <= 0x7F)
{
if (remaining < 1)
@@ -256,11 +279,10 @@
// int value2 = (c - 0xD800) * 0x400 + (d - 0xDC00) + 0x10000;
int value = (((c & 0x3FF) << 10) | (d & 0x3FF)) + 0x10000;
// assert value == value2;
- out.put ((byte) (0xF0 | (value >> 18)));
+ out.put ((byte) (0xF0 | ((value >> 18) & 0x07)));
out.put ((byte) (0x80 | ((value >> 12) & 0x3F)));
out.put ((byte) (0x80 | ((value >> 6) & 0x3F)));
out.put ((byte) (0x80 | ((value ) & 0x3F)));
-
inPos += 2;
}
else