[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH] libunistring: update to Unicode 8.0.0
From: |
Daiki Ueno |
Subject: |
[PATCH] libunistring: update to Unicode 8.0.0 |
Date: |
Thu, 18 Jun 2015 17:42:47 +0900 |
User-agent: |
Gnus/5.13 (Gnus v5.13) Emacs/25.0.50 (gnu/linux) |
* lib/gen-uni-tables.c (SIZEOF): New macro.
(output_numeric): Increase the maximum number of fractions from
128 to 160. Increase the level3 value width from 7 bits to 8
bits. Use SIZEOF instead of a hard-coded integer.
(output_blocks): Decrease the cut-off threshold from 0x30000 to
0x28000.
(fill_blocks): Increase the maximum number of blocks from 256 to
384. Use SIZEOF instead of a hard-coded integer.
(get_lbp): Adjust to new characters added in Unicode 8.0.0.
* lib/unictype/numeric.c (uc_numeric_value): Adjust the level3
value width.
* lib/unilbrk/lbrktables.c (unilbrk_table): Implement LBP21b and
a new case added to LBP22.
* lib/uniwidth/width.c (nonspacing_table_data): Add U+08E3,
U+A69E, U+FE2E..U+FE2F, U+111CA..U+111CC, U+11300,
U+115DC..U+115DD, U+1171D..U+1171F, U+11722..U+11725,
U+11727..U+1172B, U+1DA00..U+1DA36, U+1DA3B..U+1DA6C, U+1DA75,
U+1DA84, U+1DA9B..U+1DA9F, and U+1DAA1..U+1DAAF.
* tests/uniwidth/test-uc_width2.sh: Same updates as in
lib/uniwidth/width.c.
* all generated files under lib/uni* and tests/uni*: Regenerate.
---
ChangeLog | 25 +++++++++++++++
lib/gen-uni-tables.c | 69 ++++++++++++++++++++++++++--------------
lib/unictype/numeric.c | 6 ++--
lib/unilbrk/lbrktables.c | 4 +--
lib/uniwidth/width.c | 29 +++++++++++------
tests/uniwidth/test-uc_width2.sh | 45 +++++++++++++++++++-------
6 files changed, 127 insertions(+), 51 deletions(-)
Due to the size limitation, I'm sending manually adjusted files only.
Generated files have been already committed too.
diff --git a/ChangeLog b/ChangeLog
index 53fc032..8cd1336 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,28 @@
+2015-06-18 Daiki Ueno <address@hidden>
+
+ libunistring: update to Unicode 8.0.0
+ * lib/gen-uni-tables.c (SIZEOF): New macro.
+ (output_numeric): Increase the maximum number of fractions from
+ 128 to 160. Increase the level3 value width from 7 bits to 8
+ bits. Use SIZEOF instead of a hard-coded integer.
+ (output_blocks): Decrease the cut-off threshold from 0x30000 to
+ 0x28000.
+ (fill_blocks): Increase the maximum number of blocks from 256 to
+ 384. Use SIZEOF instead of a hard-coded integer.
+ (get_lbp): Adjust to new characters added in Unicode 8.0.0.
+ * lib/unictype/numeric.c (uc_numeric_value): Adjust the level3
+ value width.
+ * lib/unilbrk/lbrktables.c (unilbrk_table): Implement LBP21b and
+ a new case added to LBP22.
+ * lib/uniwidth/width.c (nonspacing_table_data): Add U+08E3,
+ U+A69E, U+FE2E..U+FE2F, U+111CA..U+111CC, U+11300,
+ U+115DC..U+115DD, U+1171D..U+1171F, U+11722..U+11725,
+ U+11727..U+1172B, U+1DA00..U+1DA36, U+1DA3B..U+1DA6C, U+1DA75,
+ U+1DA84, U+1DA9B..U+1DA9F, and U+1DAA1..U+1DAAF.
+ * tests/uniwidth/test-uc_width2.sh: Same updates as in
+ lib/uniwidth/width.c.
+ * all generated files under lib/uni* and tests/uni*: Regenerate.
+
2015-06-16 Pádraig Brady <address@hidden>
gnu-web-doc-update: add --mirror to remove stale files
diff --git a/lib/gen-uni-tables.c b/lib/gen-uni-tables.c
index 03fac9f..b69229f 100644
--- a/lib/gen-uni-tables.c
+++ b/lib/gen-uni-tables.c
@@ -43,6 +43,8 @@
#include <string.h>
#include <time.h>
+#define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
+
/* ========================================================================= */
/* Reading UnicodeData.txt. */
@@ -2130,7 +2132,7 @@ static void
output_numeric (const char *filename, const char *version)
{
FILE *stream;
- uc_fraction_t fractions[128];
+ uc_fraction_t fractions[160];
unsigned int nfractions;
unsigned int ch, i, j;
struct numeric_table t;
@@ -2161,7 +2163,7 @@ output_numeric (const char *filename, const char *version)
break;
if (i == nfractions)
{
- assert (nfractions != 128);
+ assert (nfractions != SIZEOF (fractions));
for (i = 0; i < nfractions; i++)
if (value.denominator < fractions[i].denominator
|| (value.denominator == fractions[i].denominator
@@ -2226,7 +2228,7 @@ output_numeric (const char *filename, const char *version)
fprintf (stream, " int level1[%zu];\n", t.level1_size);
fprintf (stream, " short level2[%zu << %d];\n", t.level2_size, t.q);
fprintf (stream, " unsigned short level3[%zu * %d + 1];\n", t.level3_size,
- (1 << t.p) * 7 / 16);
+ (1 << t.p) * 8 / 16);
fprintf (stream, " }\n");
fprintf (stream, "u_numeric =\n");
fprintf (stream, "{\n");
@@ -2270,32 +2272,32 @@ output_numeric (const char *filename, const char
*version)
if (t.level2_size << t.q > 8)
fprintf (stream, "\n ");
fprintf (stream, " },\n");
- /* Pack the level3 array. Each entry needs 7 bits only. Use 16-bit units,
+ /* Pack the level3 array. Each entry needs 8 bits only. Use 16-bit units,
not 32-bit units, in order to make the lookup function easier. */
level3_packed =
(uint16_t *)
- calloc ((t.level3_size << t.p) * 7 / 16 + 1, sizeof (uint16_t));
+ calloc ((t.level3_size << t.p) * 8 / 16 + 1, sizeof (uint16_t));
for (i = 0; i < t.level3_size << t.p; i++)
{
- unsigned int j = (i * 7) / 16;
- unsigned int k = (i * 7) % 16;
+ unsigned int j = (i * 8) / 16;
+ unsigned int k = (i * 8) % 16;
uint32_t value = ((unsigned char *) (t.result + level3_offset))[i];
value = level3_packed[j] | (level3_packed[j+1] << 16) | (value << k);
level3_packed[j] = value & 0xffff;
level3_packed[j+1] = value >> 16;
}
fprintf (stream, " {");
- if ((t.level3_size << t.p) * 7 / 16 + 1 > 8)
+ if ((t.level3_size << t.p) * 8 / 16 + 1 > 8)
fprintf (stream, "\n ");
- for (i = 0; i < (t.level3_size << t.p) * 7 / 16 + 1; i++)
+ for (i = 0; i < (t.level3_size << t.p) * 8 / 16 + 1; i++)
{
if (i > 0 && (i % 8) == 0)
fprintf (stream, "\n ");
fprintf (stream, " 0x%04x", level3_packed[i]);
- if (i+1 < (t.level3_size << t.p) * 7 / 16 + 1)
+ if (i+1 < (t.level3_size << t.p) * 8 / 16 + 1)
fprintf (stream, ",");
}
- if ((t.level3_size << t.p) * 7 / 16 + 1 > 8)
+ if ((t.level3_size << t.p) * 8 / 16 + 1 > 8)
fprintf (stream, "\n ");
fprintf (stream, " }\n");
free (level3_packed);
@@ -4772,7 +4774,7 @@ output_scripts_byname (const char *version)
typedef struct { unsigned int start; unsigned int end; const char *name; }
block_t;
-static block_t blocks[256];
+static block_t blocks[384];
static unsigned int numblocks;
static void
@@ -4811,7 +4813,7 @@ fill_blocks (const char *blocks_filename)
/* It must be sorted. */
assert (numblocks == 0 || blocks[numblocks-1].end <
blocks[numblocks].start);
numblocks++;
- assert (numblocks != 256);
+ assert (numblocks != SIZEOF (blocks));
}
if (ferror (stream) || fclose (stream))
@@ -4869,7 +4871,7 @@ output_blocks (const char *version)
{
const char *filename = "unictype/blocks.h";
const unsigned int shift = 8; /* bits to shift away for array access */
- const unsigned int threshold = 0x30000; /* cut-off table here to save space
*/
+ const unsigned int threshold = 0x28000; /* cut-off table here to save space
*/
FILE *stream;
unsigned int i;
unsigned int i1;
@@ -6339,8 +6341,8 @@ get_lbp (unsigned int ch)
{
int64_t attr = 0;
- /* U+20BC..U+20CF is reserved for prefixes. */
- if (ch >= 0x20BC && ch <= 0x20CF)
+ /* U+20BC..U+20CF are reserved for prefixes. */
+ if (unicode_attributes[ch].name == NULL && (ch >= 0x20BC && ch <= 0x20CF))
return (int64_t) 1 << LBP_PR;
if (unicode_attributes[ch].name != NULL)
@@ -6546,15 +6548,18 @@ get_lbp (unsigned int ch)
|| ch == 0x111C5 /* SHARADA DANDA */
|| ch == 0x111C6 /* SHARADA DOUBLE DANDA */
|| ch == 0x111C8 /* SHARADA SEPARATOR */
+ || (ch >= 0x111DD && ch <= 0x111DF) /* SHARADA CONTINUATION
SIGN..SHARADA SECTION MARK-2 */
|| ch == 0x11238 /* KHOJKI DANDA */
|| ch == 0x11239 /* KHOJKI DOUBLE DANDA */
|| ch == 0x1123B /* KHOJKI SECTION MARK */
|| ch == 0x1123C /* KHOJKI DOUBLE SECTION MARK */
+ || ch == 0x112A9 /* MULTANI SECTION MARK */
|| ch == 0x115C2 /* SIDDHAM DANDA */
|| ch == 0x115C3 /* SIDDHAM DOUBLE DANDA */
- || ch == 0x115C9 /* SIDDHAM END OF TEXT MARK */
+ || (ch >= 0x115C9 && ch <= 0x115D7) /* SIDDHAM END OF TEXT
MARK..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES */
|| ch == 0x11641 /* MODI DANDA */
|| ch == 0x11642 /* MODI DOUBLE DANDA */
+ || (ch >= 0x1173C && ch <= 0x1173E) /* AHOM SIGN SMALL SECTION..AHOM
SIGN RULAI */
|| ch == 0x12471 /* CUNEIFORM PUNCTUATION SIGN VERTICAL COLON */
|| ch == 0x12472 /* CUNEIFORM PUNCTUATION SIGN DIAGONAL COLON */
|| ch == 0x12473 /* CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON */
@@ -6566,7 +6571,8 @@ get_lbp (unsigned int ch)
|| ch == 0x16B38 /* PAHAWH HMONG SIGN VOS TSHAB CEEB */
|| ch == 0x16B39 /* PAHAWH HMONG SIGN CIM CHEEM */
|| ch == 0x16B44 /* PAHAWH HMONG SIGN XAUS */
- || ch == 0x1BC9F /* DUPLOYAN PUNCTUATION CHINOOK FULL STOP */)
+ || ch == 0x1BC9F /* DUPLOYAN PUNCTUATION CHINOOK FULL STOP */
+ || (ch >= 0x1DA87 && ch <= 0x1DA8A) /* SIGNWRITING COMMA..SIGNWRITING
COLON */)
attr |= (int64_t) 1 << LBP_BA;
/* break opportunity before */
@@ -6588,8 +6594,10 @@ get_lbp (unsigned int ch)
|| ch == 0x0FD3 /* TIBETAN MARK INITIAL BRDA RNYING YIG MGO MDUN MA
*/
|| ch == 0xA874 /* PHAGS-PA SINGLE HEAD MARK */
|| ch == 0xA875 /* PHAGS-PA DOUBLE HEAD MARK */
+ || ch == 0xA8FC /* DEVANAGARI SIGN SIDDHAM */
|| ch == 0x1806 /* MONGOLIAN TODO SOFT HYPHEN */
|| ch == 0x11175 /* MAHAJANI SECTION MARK */
+ || ch == 0x111DB /* SHARADA SIGN SIDDHAM */
|| ch == 0x115C1 /* SIDDHAM SIGN SIDDHAM */)
attr |= (int64_t) 1 << LBP_BB;
@@ -6628,7 +6636,8 @@ get_lbp (unsigned int ch)
|| ch == 0x13287 /* EGYPTIAN HIEROGLYPH O036B */
|| ch == 0x13289 /* EGYPTIAN HIEROGLYPH O036D */
|| ch == 0x1337A /* EGYPTIAN HIEROGLYPH V011B */
- || ch == 0x1337B /* EGYPTIAN HIEROGLYPH V011C */)
+ || ch == 0x1337B /* EGYPTIAN HIEROGLYPH V011C */
+ || ch == 0x145CF /* ANATOLIAN HIEROGLYPH A410A END LOGOGRAM MARK */)
attr |= (int64_t) 1 << LBP_CL;
/* exclamation/interrogation */
@@ -6674,6 +6683,7 @@ get_lbp (unsigned int ch)
if (ch == 0x2024 /* ONE DOT LEADER */
|| ch == 0x2025 /* TWO DOT LEADER */
|| ch == 0x2026 /* HORIZONTAL ELLIPSIS */
+ || ch == 0x22EF /* MIDLINE HORIZONTAL ELLIPSIS */
|| ch == 0xFE19 /* PRESENTATION FORM FOR VERTICAL HORIZONTAL
ELLIPSIS */
|| ch == 0x10AF6 /* MANICHAEAN PUNCTUATION LINE FILLER */)
attr |= (int64_t) 1 << LBP_IN;
@@ -6726,7 +6736,8 @@ get_lbp (unsigned int ch)
|| ch == 0x1325A /* EGYPTIAN HIEROGLYPH O006C */
|| ch == 0x13286 /* EGYPTIAN HIEROGLYPH O036A */
|| ch == 0x13288 /* EGYPTIAN HIEROGLYPH O036C */
- || ch == 0x13379 /* EGYPTIAN HIEROGLYPH V011A */)
+ || ch == 0x13379 /* EGYPTIAN HIEROGLYPH V011A */
+ || ch == 0x145CE /* ANATOLIAN HIEROGLYPH A410 BEGIN LOGOGRAM MARK */)
attr |= (int64_t) 1 << LBP_OP;
/* ambiguous quotation */
@@ -6806,6 +6817,7 @@ get_lbp (unsigned int ch)
|| ch == 0x09F9 /* BENGALI CURRENCY DENOMINATOR SIXTEEN */
|| ch == 0x0D79 /* MALAYALAM DATE MARK */
|| ch == 0x20B6 /* LIVRE TOURNOIS SIGN */
+ || ch == 0x20BE /* LARI SIGN */
|| ch == 0xA838 /* NORTH INDIC RUPEE MARK */)
attr |= (int64_t) 1 << LBP_PO;
@@ -6868,7 +6880,9 @@ get_lbp (unsigned int ch)
|| (ch >= 0xA9E0 && ch <= 0xA9EF) /* Myanmar */
|| (ch >= 0xA9FA && ch <= 0xA9FE) /* Myanmar */
|| (ch >= 0xAA77 && ch <= 0xAA79) /* MYANMAR SYMBOL AITON */
- || (ch >= 0xAADE && ch <= 0xAADF) /* TAI VIET SYMBOL */)
+ || (ch >= 0xAADE && ch <= 0xAADF) /* TAI VIET SYMBOL */
+ || (ch >= 0x1173A && ch <= 0x1173B) /* Ahom */
+ || ch == 0x1173F /* Ahom */)
&& ((ch >= 0x0E00 && ch <= 0x0EFF) /* Thai, Lao */
|| (ch >= 0x1000 && ch <= 0x109F) /* Myanmar */
|| (ch >= 0x1780 && ch <= 0x17FF) /* Khmer */
@@ -6876,7 +6890,11 @@ get_lbp (unsigned int ch)
|| (ch >= 0x1A20 && ch <= 0x1AAF) /* Tai Tham */
|| (ch >= 0xA9E0 && ch <= 0xA9EF) /* Myanmar */
|| (ch >= 0xA9FA && ch <= 0xA9FE) /* Myanmar */
- || (ch >= 0xAA60 && ch <= 0xAADF) /* Myanmar Extended-A, Tai
Viet */))
+ || (ch >= 0xAA60 && ch <= 0xAADF) /* Myanmar Extended-A, Tai
Viet */
+ || (ch >= 0x11700 && ch <= 0x11719) /* Ahom */
+ || (ch >= 0x1171D && ch <= 0x1172B) /* Ahom */
+ || (ch >= 0x1173A && ch <= 0x1173B) /* Ahom */
+ || ch == 0x1173F /* Ahom */))
attr |= (int64_t) 1 << LBP_SA;
/* attached characters and combining marks */
@@ -7039,17 +7057,20 @@ get_lbp (unsigned int ch)
&& ch != 0x1F4A0 && ch != 0x1F4A2 && ch != 0x1F4A4
&& ch != 0x1F4AF && ch != 0x1F4B1 && ch != 0x1F4B2
&& !(ch >= 0x1F39C && ch <= 0x1F39D)
+ && !(ch >= 0x1F3FB && ch <= 0x1F3FF)
&& !(ch >= 0x1F500 && ch <= 0x1F506)
&& !(ch >= 0x1F517 && ch <= 0x1F524)
&& !(ch >= 0x1F532 && ch <= 0x1F549)
&& !(ch >= 0x1F5D4 && ch <= 0x1F5DB)
&& !(ch >= 0x1F5F4 && ch <= 0x1F5F9))
|| (ch >= 0x1F600 && ch <= 0x1F64F) /* Emoticons */
- || (ch >= 0x1F680 && ch <= 0x1F6CF) /* Transport and Map Symbols */
+ || (ch >= 0x1F680 && ch <= 0x1F6D0) /* Transport and Map Symbols */
|| (ch >= 0x1F6E0 && ch <= 0x1F6EC) /* Transport and Map Symbols */
|| (ch >= 0x1F6F0 && ch <= 0x1F6F3) /* Transport and Map Symbols */
+ || (ch >= 0x1F900 && ch <= 0x1F9FF) /* Supplemental Symbols and
Pictographs */
|| (ch >= 0x2A700 && ch <= 0x2B734) /* CJK Ideograph Extension C */
- || (ch >= 0x2B740 && ch <= 0x2B81D) /* CJK Ideograph Extension D */)
+ || (ch >= 0x2B740 && ch <= 0x2B81D) /* CJK Ideograph Extension D */
+ || (ch >= 0x2B820 && ch <= 0x2CEAF) /* CJK Ideograph Extension E */)
if (!(attr & (((int64_t) 1 << LBP_NS) | ((int64_t) 1 << LBP_CM))))
{
/* ambiguous (ideograph) ? */
diff --git a/lib/unictype/numeric.c b/lib/unictype/numeric.c
index 63d0212..cc716de 100644
--- a/lib/unictype/numeric.c
+++ b/lib/unictype/numeric.c
@@ -36,13 +36,13 @@ uc_numeric_value (ucs4_t uc)
int lookup2 = u_numeric.level2[lookup1 + index2];
if (lookup2 >= 0)
{
- unsigned int index3 = ((uc & numeric_header_4) + lookup2) * 7;
- /* level3 contains 7-bit values, packed into 16-bit words. */
+ unsigned int index3 = ((uc & numeric_header_4) + lookup2) * 8;
+ /* level3 contains 8-bit values, packed into 16-bit words. */
unsigned int lookup3 =
((u_numeric.level3[index3>>4]
| (u_numeric.level3[(index3>>4)+1] << 16))
>> (index3 % 16))
- & 0x7f;
+ & 0xff;
return u_numeric_values[lookup3];
}
diff --git a/lib/unilbrk/lbrktables.c b/lib/unilbrk/lbrktables.c
index acafb52..5702a93 100644
--- a/lib/unilbrk/lbrktables.c
+++ b/lib/unilbrk/lbrktables.c
@@ -35,7 +35,7 @@ const unsigned char unilbrk_table[27][27] =
/* HY */ { P, D, D, I, D, I, P, P, P, D, I, D, I, P, I, D, D, P, D, D, D, D,
D, D, D, D, D, },
/* CL */ { P, I, D, I, D, I, P, P, P, D, P, D, I, P, D, I, I, P, D, D, D, D,
D, D, D, D, D, },
/* CP */ { P, I, D, I, D, I, P, P, P, D, P, D, I, P, I, I, I, P, I, D, D, D,
D, D, D, I, D, },
-/* EX */ { P, I, D, I, D, I, P, P, P, D, I, D, I, P, D, D, D, P, D, D, D, D,
D, D, D, D, D, },
+/* EX */ { P, I, D, I, D, I, P, P, P, I, I, D, I, P, D, D, D, P, D, D, D, D,
D, D, D, D, D, },
/* IN */ { P, I, D, I, D, I, P, P, P, I, I, D, I, P, D, D, D, P, D, D, D, D,
D, D, D, D, D, },
/* NS */ { P, I, D, I, D, I, P, P, P, D, I, D, I, P, D, D, D, P, D, D, D, D,
D, D, D, D, D, },
/* OP */ { P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P,
P, P, P, P, P, },
@@ -44,7 +44,7 @@ const unsigned char unilbrk_table[27][27] =
/* NU */ { P, I, D, I, D, I, P, P, P, I, I, I, I, P, I, I, I, P, I, D, D, D,
D, D, D, I, D, },
/* PO */ { P, I, D, I, D, I, P, P, P, D, I, I, I, P, I, D, D, P, I, D, D, D,
D, D, D, I, D, },
/* PR */ { P, I, D, I, D, I, P, P, P, D, I, I, I, P, I, D, D, P, I, I, I, I,
I, I, I, I, D, },
-/* SY */ { P, I, D, I, D, I, P, P, P, D, I, D, I, P, I, D, D, P, D, D, D, D,
D, D, D, D, D, },
+/* SY */ { P, I, D, I, D, I, P, P, P, D, I, D, I, P, I, D, D, P, D, D, D, D,
D, D, D, I, D, },
/* AL */ { P, I, D, I, D, I, P, P, P, I, I, I, I, P, I, D, D, P, I, D, D, D,
D, D, D, I, D, },
/* H2 */ { P, I, D, I, D, I, P, P, P, I, I, D, I, P, D, I, D, P, D, D, D, D,
D, I, I, D, D, },
/* H3 */ { P, I, D, I, D, I, P, P, P, I, I, D, I, P, D, I, D, P, D, D, D, D,
D, D, I, D, D, },
diff --git a/lib/uniwidth/width.c b/lib/uniwidth/width.c
index b499b11..a651499 100644
--- a/lib/uniwidth/width.c
+++ b/lib/uniwidth/width.c
@@ -32,7 +32,7 @@
* - Zero width characters; generated from
* "grep '^[^;]*;ZERO WIDTH ' UnicodeData.txt"
*/
-static const unsigned char nonspacing_table_data[35*64] = {
+static const unsigned char nonspacing_table_data[36*64] = {
/* 0x0000-0x01ff */
0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, /* 0x0000-0x003f */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, /* 0x0040-0x007f */
@@ -73,7 +73,7 @@ static const unsigned char nonspacing_table_data[35*64] = {
0x00, 0x00, 0xc0, 0xfb, 0xef, 0x3e, 0x00, 0x00, /* 0x0800-0x083f */
0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, /* 0x0840-0x087f */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0880-0x08bf */
- 0x00, 0x00, 0x00, 0x00, 0xf0, 0xff, 0xff, 0xff, /* 0x08c0-0x08ff */
+ 0x00, 0x00, 0x00, 0x00, 0xf8, 0xff, 0xff, 0xff, /* 0x08c0-0x08ff */
0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, /* 0x0900-0x093f */
0xfe, 0x21, 0xfe, 0x00, 0x0c, 0x00, 0x00, 0x00, /* 0x0940-0x097f */
0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, /* 0x0980-0x09bf */
@@ -189,7 +189,7 @@ static const unsigned char nonspacing_table_data[35*64] = {
/* 0xa600-0xa7ff */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa600-0xa63f */
0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0xf7, 0x3f, /* 0xa640-0xa67f */
- 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, /* 0xa680-0xa6bf */
+ 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, 0x00, /* 0xa680-0xa6bf */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, /* 0xa6c0-0xa6ff */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa700-0xa73f */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa740-0xa77f */
@@ -223,7 +223,7 @@ static const unsigned char nonspacing_table_data[35*64] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xfb80-0xfbbf */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xfbc0-0xfbff */
/* 0xfe00-0xffff */
- 0xff, 0xff, 0x00, 0x00, 0xff, 0x3f, 0x00, 0x00, /* 0xfe00-0xfe3f */
+ 0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, /* 0xfe00-0xfe3f */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xfe40-0xfe7f */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xfe80-0xfebf */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, /* 0xfec0-0xfeff */
@@ -266,13 +266,13 @@ static const unsigned char nonspacing_table_data[35*64] =
{
0x07, 0x00, 0x00, 0x00, 0x80, 0xef, 0x1f, 0x00, /* 0x11100-0x1113f */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, /* 0x11140-0x1117f */
0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x7f, /* 0x11180-0x111bf */
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x111c0-0x111ff */
+ 0x00, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x111c0-0x111ff */
/* 0x11200-0x113ff */
0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0xd3, 0x00, /* 0x11200-0x1123f */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x11240-0x1127f */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x11280-0x112bf */
0x00, 0x00, 0x00, 0x80, 0xf8, 0x07, 0x00, 0x00, /* 0x112c0-0x112ff */
- 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, /* 0x11300-0x1133f */
+ 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, /* 0x11300-0x1133f */
0x01, 0x00, 0x00, 0x00, 0xc0, 0x1f, 0x1f, 0x00, /* 0x11340-0x1137f */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x11380-0x113bf */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x113c0-0x113ff */
@@ -284,13 +284,13 @@ static const unsigned char nonspacing_table_data[35*64] =
{
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x11500-0x1153f */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x11540-0x1157f */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0xb0, /* 0x11580-0x115bf */
- 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x115c0-0x115ff */
+ 0x01, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, /* 0x115c0-0x115ff */
/* 0x11600-0x117ff */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0xa7, /* 0x11600-0x1163f */
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x11640-0x1167f */
0x00, 0x00, 0x00, 0x00, 0x00, 0x28, 0xbf, 0x00, /* 0x11680-0x116bf */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x116c0-0x116ff */
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x11700-0x1173f */
+ 0x00, 0x00, 0x00, 0xe0, 0xbc, 0x0f, 0x00, 0x00, /* 0x11700-0x1173f */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x11740-0x1177f */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x11780-0x117bf */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x117c0-0x117ff */
@@ -339,6 +339,15 @@ static const unsigned char nonspacing_table_data[35*64] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1d340-0x1d37f */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1d380-0x1d3bf */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1d3c0-0x1d3ff */
+ /* 0x1da00-0x1dbff */
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f, 0xf8, /* 0x1da00-0x1da3f */
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0x1f, 0x20, 0x00, /* 0x1da40-0x1da7f */
+ 0x10, 0x00, 0x00, 0xf8, 0xfe, 0xff, 0x00, 0x00, /* 0x1da80-0x1dabf */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1dac0-0x1daff */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1db00-0x1db3f */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1db40-0x1db7f */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1db80-0x1dbbf */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1dbc0-0x1dbff */
/* 0x1e800-0x1e9ff */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e800-0x1e83f */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e840-0x1e87f */
@@ -379,8 +388,8 @@ static const signed char nonspacing_table_ind[248] = {
-1, -1, -1, -1, -1, -1, -1, -1, /* 0x1a000-0x1afff */
-1, -1, -1, -1, -1, -1, 31, -1, /* 0x1b000-0x1bfff */
-1, -1, -1, -1, -1, -1, -1, -1, /* 0x1c000-0x1cfff */
- 32, 33, -1, -1, -1, -1, -1, -1, /* 0x1d000-0x1dfff */
- -1, -1, -1, -1, 34, -1, -1, -1 /* 0x1e000-0x1efff */
+ 32, 33, -1, -1, -1, 34, -1, -1, /* 0x1d000-0x1dfff */
+ -1, -1, -1, -1, 35, -1, -1, -1 /* 0x1e000-0x1efff */
};
/* Determine number of column positions required for UC. */
diff --git a/tests/uniwidth/test-uc_width2.sh b/tests/uniwidth/test-uc_width2.sh
index 8ab3331..1463d34 100755
--- a/tests/uniwidth/test-uc_width2.sh
+++ b/tests/uniwidth/test-uc_width2.sh
@@ -65,8 +65,8 @@ cat > uc_width.ok <<\EOF
0829..082D 0
082E..0858 A
0859..085B 0
-085C..08E3 A
-08E4..0902 0
+085C..08E2 A
+08E3..0902 0
0903..0939 A
093A 0
093B A
@@ -363,8 +363,8 @@ A4D0..A66E A
A66F..A672 0
A673 A
A674..A67D 0
-A67E..A69E A
-A69F 0
+A67E..A69D A
+A69E..A69F 0
A6A0..A6EF A
A6F0..A6F1 0
A6F2..A801 A
@@ -434,8 +434,7 @@ FB1E 0
FB1F..FDFF A
FE00..FE0F 0
FE10..FE1F 2
-FE20..FE2D 0
-FE2E..FE2F A
+FE20..FE2F 0
FE30..FE6F 2
FE70..FEFE A
FEFF 0
@@ -486,7 +485,9 @@ FFFC..101FC 1
11180..11181 0
11182..111B5 1
111B6..111BE 0
-111BF..1122E 1
+111BF..111C9 1
+111CA..111CC 0
+111CD..1122E 1
1122F..11231 0
11232..11233 1
11234 0
@@ -496,8 +497,8 @@ FFFC..101FC 1
112DF 0
112E0..112E2 1
112E3..112EA 0
-112EB..11300 1
-11301 0
+112EB..112FF 1
+11300..11301 0
11302..1133B 1
1133C 0
1133D..1133F 1
@@ -520,7 +521,9 @@ FFFC..101FC 1
115BC..115BD 0
115BE 1
115BF..115C0 0
-115C1..11632 1
+115C1..115DB 1
+115DC..115DD 0
+115DE..11632 1
11633..1163A 0
1163B..1163C 1
1163D 0
@@ -534,7 +537,13 @@ FFFC..101FC 1
116B0..116B5 0
116B6 1
116B7 0
-116B8..16AEF 1
+116B8..1171C 1
+1171D..1171F 0
+11720..11721 1
+11722..11725 0
+11726 1
+11727..1172B 0
+1172C..16AEF 1
16AF0..16AF4 0
16AF5..16B2F 1
16B30..16B36 0
@@ -554,7 +563,19 @@ FFFC..101FC 1
1D1AA..1D1AD 0
1D1AE..1D241 1
1D242..1D244 0
-1D245..1E8CF 1
+1D245..1D9FF 1
+1DA00..1DA36 0
+1DA37..1DA3A 1
+1DA3B..1DA6C 0
+1DA6D..1DA74 1
+1DA75 0
+1DA76..1DA83 1
+1DA84 0
+1DA85..1DA9A 1
+1DA9B..1DA9F 0
+1DAA0 1
+1DAA1..1DAAF 0
+1DAB0..1E8CF 1
1E8D0..1E8D6 0
1E8D7..1FFFF 1
20000..3FFFF 2
--
2.4.2
- [PATCH] libunistring: update to Unicode 8.0.0,
Daiki Ueno <=