[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v2 07/10] Update to Unicode 6.1.0
From: |
Daiki Ueno |
Subject: |
[PATCH v2 07/10] Update to Unicode 6.1.0 |
Date: |
Thu, 23 Oct 2014 17:01:38 +0900 |
* lib/unictype/joininggroup_byname.gperf: Add Rohingya Yeh
joining group name.
* lib/unictype/joininggroup_name.h: Likewise.
* lib/unilbrk/lbrktables.h (LBP_HL): New enumeration value.
(unilbrk_table): Adjust table size.
* lib/unilbrk/lbrktables.c (unilbrk_table): Add a row and column
for LBP_HL.
* lib/gen-uni-tables.c (UC_JOINING_GROUP_ROHINGYA_YEH): New
enumeration value.
(fill_arabicshaping, joining_group_as_c_identifier): Support
UC_JOINING_GROUP_ROHINGYA_YEH.
(is_property_default_ignorable_code_point): Reject U+0604.
(LBP_HL): New enumeration value.
(get_lbp, debug_output_lbp, fill_org_lbp, debug_output_org_lbp)
(output_lbp): Support LBP_HL.
(fill_org_lbp): Resolve CJ as NS, for backward compatibility.
---
lib/gen-uni-tables.c | 42 +++++++++++++++++---------
lib/unictype.in.h | 3 +-
lib/unictype/joininggroup_byname.gperf | 2 ++
lib/unictype/joininggroup_name.h | 1 +
lib/unilbrk/lbrktables.c | 55 +++++++++++++++++-----------------
lib/unilbrk/lbrktables.h | 22 +++++++-------
6 files changed, 73 insertions(+), 52 deletions(-)
diff --git a/lib/gen-uni-tables.c b/lib/gen-uni-tables.c
index 1af832e..ec1aba5 100644
--- a/lib/gen-uni-tables.c
+++ b/lib/gen-uni-tables.c
@@ -32,7 +32,7 @@
/usr/local/share/Unidata/CompositionExclusions.txt \
/usr/local/share/Unidata/SpecialCasing.txt \
/usr/local/share/Unidata/CaseFolding.txt \
- 6.0.0
+ 6.1.0
*/
#include <stdbool.h>
@@ -2868,7 +2868,7 @@ is_property_default_ignorable_code_point (unsigned int ch)
bool result1 =
(is_category_Cf (ch)
&& !(ch >= 0xFFF9 && ch <= 0xFFFB) /* Annotations */
- && !((ch >= 0x0600 && ch <= 0x0603) || ch == 0x06DD || ch == 0x070F)
+ && !((ch >= 0x0600 && ch <= 0x0604) || ch == 0x06DD || ch == 0x070F)
/* For some reason, the following are not listed as having property
Default_Ignorable_Code_Point. */
&& !(ch == 0x110BD))
@@ -3746,7 +3746,8 @@ enum
UC_JOINING_GROUP_YUDH, /* Yudh */
UC_JOINING_GROUP_YUDH_HE, /* Yudh_He */
UC_JOINING_GROUP_ZAIN, /* Zain */
- UC_JOINING_GROUP_ZHAIN /* Zhain */
+ UC_JOINING_GROUP_ZHAIN, /* Zhain */
+ UC_JOINING_GROUP_ROHINGYA_YEH /* Rohingya_Yeh */
};
static uint8_t unicode_joining_group[0x110000];
@@ -3886,6 +3887,7 @@ fill_arabicshaping (const char *arabicshaping_filename)
TRY(UC_JOINING_GROUP_YUDH_HE, "YUDH HE")
TRY(UC_JOINING_GROUP_ZAIN, "ZAIN")
TRY(UC_JOINING_GROUP_ZHAIN, "ZHAIN")
+ TRY(UC_JOINING_GROUP_ROHINGYA_YEH, "ROHINGYA YEH")
#undef TRY
else
{
@@ -3987,7 +3989,7 @@ output_joining_type (const char *filename, const char
*version)
}
fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
- fprintf (stream, "/* Arabic joining group of Unicode characters. */\n");
+ fprintf (stream, "/* Arabic joining type of Unicode characters. */\n");
fprintf (stream, "/* Generated automatically by gen-uni-tables.c for Unicode
%s. */\n",
version);
@@ -4167,6 +4169,7 @@ joining_group_as_c_identifier (int joining_group)
TRY(UC_JOINING_GROUP_YUDH_HE)
TRY(UC_JOINING_GROUP_ZAIN)
TRY(UC_JOINING_GROUP_ZHAIN)
+ TRY(UC_JOINING_GROUP_ROHINGYA_YEH)
#undef TRY
abort ();
}
@@ -6210,22 +6213,22 @@ output_width_property_test (const char *filename)
enum
{
- /* Values >= 25 are resolved at run time. */
- LBP_BK = 25, /* mandatory break */
+ /* Values >= 26 are resolved at run time. */
+ LBP_BK = 26, /* mandatory break */
/*LBP_CR, carriage return - not used here because it's a DOSism */
/*LBP_LF, line feed - not used here because it's a DOSism */
- LBP_CM = 26, /* attached characters and combining marks */
+ LBP_CM = 27, /* attached characters and combining marks */
/*LBP_NL, next line - not used here because it's equivalent to LBP_BK
*/
/*LBP_SG, surrogates - not used here because they are not characters */
LBP_WJ = 0, /* word joiner */
- LBP_ZW = 27, /* zero width space */
+ LBP_ZW = 28, /* zero width space */
LBP_GL = 1, /* non-breaking (glue) */
- LBP_SP = 28, /* space */
+ LBP_SP = 29, /* space */
LBP_B2 = 2, /* break opportunity before and after */
LBP_BA = 3, /* break opportunity after */
LBP_BB = 4, /* break opportunity before */
LBP_HY = 5, /* hyphen */
- LBP_CB = 29, /* contingent break opportunity */
+ LBP_CB = 30, /* contingent break opportunity */
LBP_CL = 6, /* closing punctuation */
LBP_CP = 7, /* closing parenthesis */
LBP_EX = 8, /* exclamation/interrogation */
@@ -6238,16 +6241,18 @@ enum
LBP_PO = 15, /* postfix (numeric) */
LBP_PR = 16, /* prefix (numeric) */
LBP_SY = 17, /* symbols allowing breaks */
- LBP_AI = 30, /* ambiguous (alphabetic or ideograph) */
+ LBP_AI = 31, /* ambiguous (alphabetic or ideograph) */
LBP_AL = 18, /* ordinary alphabetic and symbol characters */
+/*LBP_CJ, conditional Japanese starter, resolved to NS */
LBP_H2 = 19, /* Hangul LV syllable */
LBP_H3 = 20, /* Hangul LVT syllable */
+ LBP_HL = 25, /* Hebrew letter */
LBP_ID = 21, /* ideographic */
LBP_JL = 22, /* Hangul L Jamo */
LBP_JV = 23, /* Hangul V Jamo */
LBP_JT = 24, /* Hangul T Jamo */
- LBP_SA = 31, /* complex context (South East Asian) */
- LBP_XX = 32 /* unknown */
+ LBP_SA = 32, /* complex context (South East Asian) */
+ LBP_XX = 33 /* unknown */
};
/* Returns the line breaking classification for ch, as a bit mask. */
@@ -6692,6 +6697,10 @@ get_lbp (unsigned int ch)
if (ch >= 0xAC00 && ch <= 0xD7A3 && ((ch - 0xAC00) % 28) != 0)
attr |= (int64_t) 1 << LBP_H3;
+ if ((ch >= 0x05D0 && ch <= 0x05F2) || ch == 0xFB1D
+ || (ch >= 0xFB1F && ch <= 0xFB28) || (ch >= 0xFB2A && ch <= 0xFB4F))
+ attr |= (int64_t) 1 << LBP_HL;
+
if ((ch >= 0x1100 && ch <= 0x115F) || (ch >= 0xA960 && ch <= 0xA97C))
attr |= (int64_t) 1 << LBP_JL;
@@ -6853,7 +6862,7 @@ get_lbp (unsigned int ch)
|| ch == 0x2064 /* INVISIBLE PLUS */
/* Extra characters for compatibility with Unicode LineBreak.txt. */
|| ch == 0x110BD /* KAITHI NUMBER SIGN */)
- if (!(attr & (((int64_t) 1 << LBP_GL) | ((int64_t) 1 << LBP_B2) |
((int64_t) 1 << LBP_BA) | ((int64_t) 1 << LBP_BB) | ((int64_t) 1 << LBP_HY) |
((int64_t) 1 << LBP_CB) | ((int64_t) 1 << LBP_CL) | ((int64_t) 1 << LBP_CP) |
((int64_t) 1 << LBP_EX) | ((int64_t) 1 << LBP_IN) | ((int64_t) 1 << LBP_NS) |
((int64_t) 1 << LBP_OP) | ((int64_t) 1 << LBP_QU) | ((int64_t) 1 << LBP_IS) |
((int64_t) 1 << LBP_NU) | ((int64_t) 1 << LBP_PO) | ((int64_t) 1 << LBP_PR) |
((int64_t) 1 << LBP_SY) | ((int64_t) 1 << LBP_H2) | ((int64_t) 1 << LBP_H3) |
((int64_t) 1 << LBP_JL) | ((int64_t) 1 << LBP_JV) | ((int64_t) 1 << LBP_JT) |
((int64_t) 1 << LBP_SA) | ((int64_t) 1 << LBP_ID))))
+ if (!(attr & (((int64_t) 1 << LBP_GL) | ((int64_t) 1 << LBP_B2) |
((int64_t) 1 << LBP_BA) | ((int64_t) 1 << LBP_BB) | ((int64_t) 1 << LBP_HY) |
((int64_t) 1 << LBP_CB) | ((int64_t) 1 << LBP_CL) | ((int64_t) 1 << LBP_CP) |
((int64_t) 1 << LBP_EX) | ((int64_t) 1 << LBP_IN) | ((int64_t) 1 << LBP_NS) |
((int64_t) 1 << LBP_OP) | ((int64_t) 1 << LBP_QU) | ((int64_t) 1 << LBP_IS) |
((int64_t) 1 << LBP_NU) | ((int64_t) 1 << LBP_PO) | ((int64_t) 1 << LBP_PR) |
((int64_t) 1 << LBP_SY) | ((int64_t) 1 << LBP_H2) | ((int64_t) 1 << LBP_H3) |
((int64_t) 1 << LBP_HL) | ((int64_t) 1 << LBP_JL) | ((int64_t) 1 << LBP_JV) |
((int64_t) 1 << LBP_JT) | ((int64_t) 1 << LBP_SA) | ((int64_t) 1 << LBP_ID))))
{
/* ambiguous (alphabetic) ? */
if ((unicode_width[ch] != NULL
@@ -6973,6 +6982,7 @@ debug_output_lbp (FILE *stream)
PRINT_BIT(attr,LBP_AL);
PRINT_BIT(attr,LBP_H2);
PRINT_BIT(attr,LBP_H3);
+ PRINT_BIT(attr,LBP_HL);
PRINT_BIT(attr,LBP_ID);
PRINT_BIT(attr,LBP_JL);
PRINT_BIT(attr,LBP_JV);
@@ -7087,6 +7097,7 @@ fill_org_lbp (const char *linebreak_filename)
TRY(LBP_AL)
TRY(LBP_H2)
TRY(LBP_H3)
+ TRY(LBP_HL)
TRY(LBP_ID)
TRY(LBP_JL)
TRY(LBP_JV)
@@ -7098,6 +7109,7 @@ fill_org_lbp (const char *linebreak_filename)
else if (strcmp (field1, "CR") == 0) value = LBP_BK;
else if (strcmp (field1, "NL") == 0) value = LBP_BK;
else if (strcmp (field1, "SG") == 0) value = LBP_XX;
+ else if (strcmp (field1, "CJ") == 0) value = LBP_NS;
else
{
fprintf (stderr, "unknown property value \"%s\" in '%s':%d\n",
@@ -7167,6 +7179,7 @@ debug_output_org_lbp (FILE *stream)
PRINT_BIT(attr,LBP_AL);
PRINT_BIT(attr,LBP_H2);
PRINT_BIT(attr,LBP_H3);
+ PRINT_BIT(attr,LBP_HL);
PRINT_BIT(attr,LBP_ID);
PRINT_BIT(attr,LBP_JL);
PRINT_BIT(attr,LBP_JV);
@@ -7340,6 +7353,7 @@ output_lbp (FILE *stream1, FILE *stream2)
CASE(LBP_AL);
CASE(LBP_H2);
CASE(LBP_H3);
+ CASE(LBP_HL);
CASE(LBP_ID);
CASE(LBP_JL);
CASE(LBP_JV);
diff --git a/lib/unictype.in.h b/lib/unictype.in.h
index 5125e96..30c71aa 100644
--- a/lib/unictype.in.h
+++ b/lib/unictype.in.h
@@ -518,7 +518,8 @@ enum
UC_JOINING_GROUP_YUDH, /* Yudh */
UC_JOINING_GROUP_YUDH_HE, /* Yudh_He */
UC_JOINING_GROUP_ZAIN, /* Zain */
- UC_JOINING_GROUP_ZHAIN /* Zhain */
+ UC_JOINING_GROUP_ZHAIN, /* Zhain */
+ UC_JOINING_GROUP_ROHINGYA_YEH /* Rohingya_Yeh */
};
/* Return the name of a joining group. */
diff --git a/lib/unictype/joininggroup_byname.gperf
b/lib/unictype/joininggroup_byname.gperf
index bc2fbc8..90be16e 100644
--- a/lib/unictype/joininggroup_byname.gperf
+++ b/lib/unictype/joininggroup_byname.gperf
@@ -83,3 +83,5 @@ Yudh He, UC_JOINING_GROUP_YUDH_HE
YudhHe, UC_JOINING_GROUP_YUDH_HE
Zain, UC_JOINING_GROUP_ZAIN
Zhain, UC_JOINING_GROUP_ZHAIN
+Rohingya Yeh, UC_JOINING_GROUP_ROHINGYA_YEH
+RohingyaYeh, UC_JOINING_GROUP_ROHINGYA_YEH
diff --git a/lib/unictype/joininggroup_name.h b/lib/unictype/joininggroup_name.h
index 78d4a10..681f1a5 100644
--- a/lib/unictype/joininggroup_name.h
+++ b/lib/unictype/joininggroup_name.h
@@ -72,3 +72,4 @@ ELEM (YUDH, "Yudh")
ELEM (YUDH_HE, "Yudh He")
ELEM (ZAIN, "Zain")
ELEM (ZHAIN, "Zhain")
+ELEM (ROHINGYA_YEH, "Rohingya Yeh")
diff --git a/lib/unilbrk/lbrktables.c b/lib/unilbrk/lbrktables.c
index f0b3d59..d60321d 100644
--- a/lib/unilbrk/lbrktables.c
+++ b/lib/unilbrk/lbrktables.c
@@ -23,35 +23,36 @@
/* Define unilbrkprop, table of line breaking properties. */
#include "unilbrk/lbrkprop2.h"
-const unsigned char unilbrk_table[25][25] =
+const unsigned char unilbrk_table[26][26] =
{
/* after */
- /* WJ GL B2 BA BB HY CL CP EX IN NS OP QU IS NU PO PR SY AL H2 H3 ID
JL JV JT */
-/* WJ */ { P, I, I, I, I, I, P, P, P, I, I, I, I, P, I, I, I, P, I, I, I, I,
I, I, I, },
-/* GL */ { P, I, I, I, I, I, P, P, P, I, I, I, I, P, I, I, I, P, I, I, I, I,
I, I, I, },
-/* B2 */ { P, I, P, I, D, I, P, P, P, D, I, D, I, P, D, D, D, P, D, D, D, D,
D, D, D, },
-/* BA */ { P, D, D, I, D, I, P, P, P, D, I, D, I, P, D, D, D, P, D, D, D, D,
D, D, D, },
-/* BB */ { P, I, I, I, I, I, P, P, P, I, I, I, I, P, I, I, I, P, I, I, I, I,
I, I, I, },
-/* HY */ { P, D, D, I, D, I, P, P, P, D, I, D, I, P, I, D, D, P, D, D, D, D,
D, D, D, },
-/* CL */ { P, I, D, I, D, I, P, P, P, D, P, D, I, P, D, I, I, P, D, D, D, D,
D, D, D, },
-/* CP */ { P, I, D, I, D, I, P, P, P, D, P, D, I, P, I, I, I, P, I, D, D, D,
D, D, D, },
-/* EX */ { P, I, D, I, D, I, P, P, P, D, I, D, I, P, D, D, D, P, D, D, D, D,
D, D, D, },
-/* IN */ { P, I, D, I, D, I, P, P, P, I, I, D, I, P, D, D, D, P, D, D, D, D,
D, D, D, },
-/* NS */ { P, I, D, I, D, I, P, P, P, D, I, D, I, P, D, D, D, P, D, D, D, D,
D, D, D, },
-/* OP */ { P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P,
P, P, P, },
-/* QU */ { P, I, I, I, I, I, P, P, P, I, I, P, I, P, I, I, I, P, I, I, I, I,
I, I, I, },
-/* IS */ { P, I, D, I, D, I, P, P, P, D, I, D, I, P, I, D, D, P, D, D, D, D,
D, D, D, },
-/* NU */ { P, I, D, I, D, I, P, P, P, I, I, I, I, P, I, I, I, P, I, D, D, D,
D, D, D, },
-/* PO */ { P, I, D, I, D, I, P, P, P, D, I, I, I, P, I, D, D, P, I, D, D, D,
D, D, D, },
-/* PR */ { P, I, D, I, D, I, P, P, P, D, I, I, I, P, I, D, D, P, I, I, I, I,
I, I, I, },
-/* SY */ { P, I, D, I, D, I, P, P, P, D, I, D, I, P, I, D, D, P, D, D, D, D,
D, D, D, },
-/* AL */ { P, I, D, I, D, I, P, P, P, I, I, I, I, P, I, D, D, P, I, D, D, D,
D, D, D, },
-/* H2 */ { P, I, D, I, D, I, P, P, P, I, I, D, I, P, D, I, D, P, D, D, D, D,
D, I, I, },
-/* H3 */ { P, I, D, I, D, I, P, P, P, I, I, D, I, P, D, I, D, P, D, D, D, D,
D, D, I, },
-/* ID */ { P, I, D, I, D, I, P, P, P, I, I, D, I, P, D, I, D, P, D, D, D, D,
D, D, D, },
-/* JL */ { P, I, D, I, D, I, P, P, P, I, I, D, I, P, D, I, D, P, D, I, I, D,
I, I, D, },
-/* JV */ { P, I, D, I, D, I, P, P, P, I, I, D, I, P, D, I, D, P, D, D, D, D,
D, I, I, },
-/* JT */ { P, I, D, I, D, I, P, P, P, I, I, D, I, P, D, I, D, P, D, D, D, D,
D, D, I, },
+ /* WJ GL B2 BA BB HY CL CP EX IN NS OP QU IS NU PO PR SY AL H2 H3 ID
JL JV JT HL */
+/* WJ */ { P, I, I, I, I, I, P, P, P, I, I, I, I, P, I, I, I, P, I, I, I, I,
I, I, I, I, },
+/* GL */ { P, I, I, I, I, I, P, P, P, I, I, I, I, P, I, I, I, P, I, I, I, I,
I, I, I, I, },
+/* B2 */ { P, I, P, I, D, I, P, P, P, D, I, D, I, P, D, D, D, P, D, D, D, D,
D, D, D, D, },
+/* BA */ { P, D, D, I, D, I, P, P, P, D, I, D, I, P, D, D, D, P, D, D, D, D,
D, D, D, D, },
+/* BB */ { P, I, I, I, I, I, P, P, P, I, I, I, I, P, I, I, I, P, I, I, I, I,
I, I, I, I, },
+/* HY */ { P, D, D, I, D, I, P, P, P, D, I, D, I, P, I, D, D, P, D, D, D, D,
D, D, D, D, },
+/* CL */ { P, I, D, I, D, I, P, P, P, D, P, D, I, P, D, I, I, P, D, D, D, D,
D, D, D, D, },
+/* CP */ { P, I, D, I, D, I, P, P, P, D, P, D, I, P, I, I, I, P, I, D, D, D,
D, D, D, I, },
+/* EX */ { P, I, D, I, D, I, P, P, P, D, I, D, I, P, D, D, D, P, D, D, D, D,
D, D, D, D, },
+/* IN */ { P, I, D, I, D, I, P, P, P, I, I, D, I, P, D, D, D, P, D, D, D, D,
D, D, D, D, },
+/* NS */ { P, I, D, I, D, I, P, P, P, D, I, D, I, P, D, D, D, P, D, D, D, D,
D, D, D, D, },
+/* OP */ { P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P,
P, P, P, P, },
+/* QU */ { P, I, I, I, I, I, P, P, P, I, I, P, I, P, I, I, I, P, I, I, I, I,
I, I, I, I, },
+/* IS */ { P, I, D, I, D, I, P, P, P, D, I, D, I, P, I, D, D, P, D, D, D, D,
D, D, D, D, },
+/* NU */ { P, I, D, I, D, I, P, P, P, I, I, I, I, P, I, I, I, P, I, D, D, D,
D, D, D, I, },
+/* PO */ { P, I, D, I, D, I, P, P, P, D, I, I, I, P, I, D, D, P, I, D, D, D,
D, D, D, I, },
+/* PR */ { P, I, D, I, D, I, P, P, P, D, I, I, I, P, I, D, D, P, I, I, I, I,
I, I, I, I, },
+/* SY */ { P, I, D, I, D, I, P, P, P, D, I, D, I, P, I, D, D, P, D, D, D, D,
D, D, D, D, },
+/* AL */ { P, I, D, I, D, I, P, P, P, I, I, I, I, P, I, D, D, P, I, D, D, D,
D, D, D, I, },
+/* H2 */ { P, I, D, I, D, I, P, P, P, I, I, D, I, P, D, I, D, P, D, D, D, D,
D, I, I, D, },
+/* H3 */ { P, I, D, I, D, I, P, P, P, I, I, D, I, P, D, I, D, P, D, D, D, D,
D, D, I, D, },
+/* ID */ { P, I, D, I, D, I, P, P, P, I, I, D, I, P, D, I, D, P, D, D, D, D,
D, D, D, D, },
+/* JL */ { P, I, D, I, D, I, P, P, P, I, I, D, I, P, D, I, D, P, D, I, I, D,
I, I, D, D, },
+/* JV */ { P, I, D, I, D, I, P, P, P, I, I, D, I, P, D, I, D, P, D, D, D, D,
D, I, I, D, },
+/* JT */ { P, I, D, I, D, I, P, P, P, I, I, D, I, P, D, I, D, P, D, D, D, D,
D, D, I, D, },
+/* HL */ { P, I, D, I, D, I, P, P, P, I, I, I, I, P, I, D, D, P, I, D, D, D,
D, D, D, I, },
/* "" */
/* before */
};
diff --git a/lib/unilbrk/lbrktables.h b/lib/unilbrk/lbrktables.h
index e651d71..95bb502 100644
--- a/lib/unilbrk/lbrktables.h
+++ b/lib/unilbrk/lbrktables.h
@@ -21,22 +21,22 @@
enum
{
- /* Values >= 25 are resolved at run time. */
- LBP_BK = 25, /* mandatory break */
+ /* Values >= 26 are resolved at run time. */
+ LBP_BK = 26, /* mandatory break */
/*LBP_CR, carriage return - not used here because it's a DOSism */
/*LBP_LF, line feed - not used here because it's a DOSism */
- LBP_CM = 26, /* attached characters and combining marks */
+ LBP_CM = 27, /* attached characters and combining marks */
/*LBP_NL, next line - not used here because it's equivalent to LBP_BK
*/
/*LBP_SG, surrogates - not used here because they are not characters */
LBP_WJ = 0, /* word joiner */
- LBP_ZW = 27, /* zero width space */
+ LBP_ZW = 28, /* zero width space */
LBP_GL = 1, /* non-breaking (glue) */
- LBP_SP = 28, /* space */
+ LBP_SP = 29, /* space */
LBP_B2 = 2, /* break opportunity before and after */
LBP_BA = 3, /* break opportunity after */
LBP_BB = 4, /* break opportunity before */
LBP_HY = 5, /* hyphen */
- LBP_CB = 29, /* contingent break opportunity */
+ LBP_CB = 30, /* contingent break opportunity */
LBP_CL = 6, /* closing punctuation */
LBP_CP = 7, /* closing parenthesis */
LBP_EX = 8, /* exclamation/interrogation */
@@ -49,16 +49,18 @@ enum
LBP_PO = 15, /* postfix (numeric) */
LBP_PR = 16, /* prefix (numeric) */
LBP_SY = 17, /* symbols allowing breaks */
- LBP_AI = 30, /* ambiguous (alphabetic or ideograph) */
+ LBP_AI = 31, /* ambiguous (alphabetic or ideograph) */
LBP_AL = 18, /* ordinary alphabetic and symbol characters */
+/*LBP_CJ, conditional Japanese starters, resolved to NS */
LBP_H2 = 19, /* Hangul LV syllable */
LBP_H3 = 20, /* Hangul LVT syllable */
+ LBP_HL = 25, /* Hebrew letter */
LBP_ID = 21, /* ideographic */
LBP_JL = 22, /* Hangul L Jamo */
LBP_JV = 23, /* Hangul V Jamo */
LBP_JT = 24, /* Hangul T Jamo */
- LBP_SA = 31, /* complex context (South East Asian) */
- LBP_XX = 32 /* unknown */
+ LBP_SA = 32, /* complex context (South East Asian) */
+ LBP_XX = 33 /* unknown */
};
#include "lbrkprop1.h"
@@ -89,7 +91,7 @@ unilbrkprop_lookup (ucs4_t uc)
#define I 2 /* indirect break opportunity, '%' in table 7.3 of UTR #14 */
#define P 3 /* prohibited break, '^' in table 7.3 of UTR #14 */
-extern const unsigned char unilbrk_table[25][25];
+extern const unsigned char unilbrk_table[26][26];
/* We don't support line breaking of complex-context dependent characters
(Thai, Lao, Myanmar, Khmer) yet, because it requires dictionary lookup. */
--
1.9.3
- [PATCH v2 00/10] Update libunistring-related modules to Unicode 7.0.0, Daiki Ueno, 2014/10/23
- [PATCH v2 01/10] gen-uni-tables: Minor style fixes, Daiki Ueno, 2014/10/23
- [PATCH v2 04/10] uniwbrk: Ignore Extended/Format at the beginning of the line, Daiki Ueno, 2014/10/23
- [PATCH v2 02/10] gen-uni-tables: Check out-of-range values added to 3-level tables, Daiki Ueno, 2014/10/23
- [PATCH v2 05/10] uniwbrk/u32-wordbreaks-tests: Test using WordBreakTest.txt from UCD, Daiki Ueno, 2014/10/23
- [PATCH v2 06/10] uniname: Make codepoint transformation more flexible, Daiki Ueno, 2014/10/23
- [PATCH v2 03/10] unictype/joininggroup-of: Switch to 3-level table, Daiki Ueno, 2014/10/23
- [PATCH v2 07/10] Update to Unicode 6.1.0,
Daiki Ueno <=
- [PATCH v2 08/10] Update to Unicode 6.2.0, Daiki Ueno, 2014/10/23
- [PATCH v2 09/10] Update to Unicode 6.3.0, Daiki Ueno, 2014/10/23
- Re: [PATCH v2 00/10] Update libunistring-related modules to Unicode 7.0.0, Pádraig Brady, 2014/10/23