[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Fix u32toutf8 so it encodes values > 0xFFFF correctly.
From: |
John Kearney |
Subject: |
Fix u32toutf8 so it encodes values > 0xFFFF correctly. |
Date: |
Sat, 18 Feb 2012 11:39:43 +0100 |
User-agent: |
Mozilla/5.0 (X11; Linux i686; rv:10.0) Gecko/20120129 Thunderbird/10.0 |
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
Configuration Information [Automatically generated, do not change]:
Machine: x86_64
OS: linux-gnu
Compiler: gcc
Compilation CFLAGS: -DPROGRAM='bash' -DCONF_HOSTTYPE='x86_64'
- -DCONF_OSTYPE='linux-gnu' -DCONF_MACHTYPE='x86_64-pc-linux-gnu'
- -DCONF_VENDOR='pc' -DLOCALEDIR='/usr/share/locale' -DPACKAGE='bash'
- -DSHELL -DHAVE_CONFIG_H -I. -I../bash -I../bash/include
- -I../bash/lib -g -O2 -Wall
uname output: Linux DETH00 3.0.0-15-generic #26-Ubuntu SMP Fri Jan 20
17:23:00 UTC 2012 x86_64 x86_64 x86_64 GNU/Linux
Machine Type: x86_64-pc-linux-gnu
Bash Version: 4.2
Patch Level: 10
Release Status: release
Description:
Current u32toutf8 only encode values below 0xffff correctly.
wchar_t can be ambiguous size better in my opinion to use
unsigned long, or uint32_t, or something clearer.
Repeat-By:
-------'
Fix:
diff --git a/lib/sh/unicode.c b/lib/sh/unicode.c
index d34fa08..3f7d378 100644
- --- a/lib/sh/unicode.c
+++ b/lib/sh/unicode.c
@@ -54,7 +54,7 @@ extern const char *locale_charset __P((void));
extern char *get_locale_var __P((char *));
#endif
- -static int u32init = 0;
+static int u32init = 0;
static int utf8locale = 0;
#if defined (HAVE_ICONV)
static iconv_t localconv;
@@ -115,26 +115,61 @@ u32tochar (wc, s)
}
int
- -u32toutf8 (wc, s)
- - wchar_t wc;
+u32toutf8 (c, s)
+ unsigned long c;
char *s;
{
int l;
- - l = (wc < 0x0080) ? 1 : ((wc < 0x0800) ? 2 : 3);
- -
- - if (wc < 0x0080)
- - s[0] = (unsigned char)wc;
- - else if (wc < 0x0800)
+ if (c <= 0x7F)
+ {
+ s[0] = (char)c;
+ l = 1;
+ }
+ else if (c <= 0x7FF)
+ {
+ s[0] = (c >> 6) | 0xc0; /* 110x xxxx */
+ s[1] = (c & 0x3f) | 0x80; /* 10xx xxxx */
+ l = 2;
+ }
+ else if (c <= 0xFFFF)
+ {
+ s[0] = (c >> 12) | 0xe0; /* 1110 xxxx */
+ s[1] = ((c >> 6) & 0x3f) | 0x80; /* 10xx xxxx */
+ s[2] = (c & 0x3f) | 0x80; /* 10xx xxxx */
+ l = 3;
+ }
+ else if (c <= 0x1FFFFF)
{
- - s[0] = (wc >> 6) | 0xc0;
- - s[1] = (wc & 0x3f) | 0x80;
+ s[0] = (c >> 18) | 0xf0; /* 1111 0xxx */
+ s[1] = ((c >> 12) & 0x3f) | 0x80; /* 10xx xxxx */
+ s[2] = ((c >> 6) & 0x3f) | 0x80; /* 10xx xxxx */
+ s[3] = ( c & 0x3f) | 0x80; /* 10xx xxxx */
+ l = 4;
+ }
+ else if (c <= 0x3FFFFFF)
+ {
+ s[0] = (c >> 24) | 0xf8; /* 1111 10xx */
+ s[1] = ((c >> 18) & 0x3f) | 0x80; /* 10xx xxxx */
+ s[2] = ((c >> 12) & 0x3f) | 0x80; /* 10xx xxxx */
+ s[3] = ((c >> 6) & 0x3f) | 0x80; /* 10xx xxxx */
+ s[4] = ( c & 0x3f) | 0x80; /* 10xx xxxx */
+ l = 5;
+ }
+ else if (c <= 0x7FFFFFFF)
+ {
+ s[0] = (c >> 30) | 0xfc; /* 1111 110x */
+ s[1] = ((c >> 24) & 0x3f) | 0x80; /* 10xx xxxx */
+ s[2] = ((c >> 18) & 0x3f) | 0x80; /* 10xx xxxx */
+ s[3] = ((c >> 12) & 0x3f) | 0x80; /* 10xx xxxx */
+ s[4] = ((c >> 6) & 0x3f) | 0x80; /* 10xx xxxx */
+ s[5] = ( c & 0x3f) | 0x80; /* 10xx xxxx */
+ l = 6;
}
else
{
- - s[0] = (wc >> 12) | 0xe0;
- - s[1] = ((wc >> 6) & 0x3f) | 0x80;
- - s[2] = (wc & 0x3f) | 0x80;
+ /* Error Invalid UTF-8 */
+ l = 0;
}
s[l] = '\0';
return l;
@@ -150,7 +185,7 @@ u32cconv (c, s)
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.11 (GNU/Linux)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org/
iQEcBAEBAgAGBQJPP3/tAAoJEKUDtR0WmS059CcH/iIyBOGhf0IgSmnIFyw0YLpA
3ZWSaXWoEZodrDr1fX67hj2424icXm9fTZw70G+rS1YjtCfm86O/Qou4VNROylAv
TbjPUWkHRWVci7IqcDGb1tNWRrulxUvNFA/Uc1xBtKckAO6HHHRTYFa+sCkd5Fnx
dm7e0iMTqMMmL/dUwB+di+hSkGD+ZXS1vY76wizdwG7CteUxAVunse+ffP7TRYbn
K86Whc7p7llG12hruCPGArc9iS7YiBaC/XNIKXmN7fn93dhQTcdzzk/UTGmaZgDk
cQk4R7/NBljP4LtQtKwX4JYAi5XJM5TeSLykL97UFxW/5OGM+SmSVJbKLlHU/mQ=
=EJUb
-----END PGP SIGNATURE-----
- Fix u32toutf8 so it encodes values > 0xFFFF correctly.,
John Kearney <=
- Re: Fix u32toutf8 so it encodes values > 0xFFFF correctly., Chet Ramey, 2012/02/20
- Re: Fix u32toutf8 so it encodes values > 0xFFFF correctly., Eric Blake, 2012/02/21
- Re: Fix u32toutf8 so it encodes values > 0xFFFF correctly., John Kearney, 2012/02/21
- Re: Fix u32toutf8 so it encodes values > 0xFFFF correctly., Chet Ramey, 2012/02/21
- Initial test code for \U, John Kearney, 2012/02/21
- Here is a diff of all the changed to the unicode, John Kearney, 2012/02/21
- Re: Initial test code for \U, Chet Ramey, 2012/02/22
- Re: Initial test code for \U, Eric Blake, 2012/02/22
- Re: Initial test code for \U, John Kearney, 2012/02/26
- Re: Fix u32toutf8 so it encodes values > 0xFFFF correctly., Linda Walsh, 2012/02/22