bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v2 03/10] unictype/joininggroup-of: Switch to 3-level table


From: Daiki Ueno
Subject: [PATCH v2 03/10] unictype/joininggroup-of: Switch to 3-level table
Date: Thu, 23 Oct 2014 17:01:34 +0900

* lib/gen-uni-tables.c (output_joining_group): Switch to
3-level table to accommodate a joining group defined with higher
code-point value.  Since there are only 88 groups defined in
Unicode 7.0.0, use 7-bit packed format for level3 entries.
* lib/unictype/joininggroup_of.c (uc_joining_group): Adjust to use
3-level table.
* lib/unictype/joininggroup_of.h: Regenerate.
---
 lib/gen-uni-tables.c           | 155 ++++++++++++++++++++++++++++++-----------
 lib/unictype/joininggroup_of.c |  29 ++++++--
 2 files changed, 139 insertions(+), 45 deletions(-)

diff --git a/lib/gen-uni-tables.c b/lib/gen-uni-tables.c
index 7a273fc..1af832e 100644
--- a/lib/gen-uni-tables.c
+++ b/lib/gen-uni-tables.c
@@ -3987,7 +3987,7 @@ output_joining_type (const char *filename, const char 
*version)
     }
 
   fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
-  fprintf (stream, "/* Arabic joining type of Unicode characters.  */\n");
+  fprintf (stream, "/* Arabic joining group of Unicode characters.  */\n");
   fprintf (stream, "/* Generated automatically by gen-uni-tables.c for Unicode 
%s.  */\n",
            version);
 
@@ -4213,11 +4213,22 @@ output_joining_group_test (const char *filename, const 
char *version)
     }
 }
 
+/* Construction of sparse 3-level tables.  */
+#define TABLE joining_group_table
+#define ELEMENT uint8_t
+#define DEFAULT UC_JOINING_GROUP_NONE
+#define xmalloc malloc
+#define xrealloc realloc
+#include "3level.h"
+
 static void
 output_joining_group (const char *filename, const char *version)
 {
   FILE *stream;
-  unsigned int ch_min, ch_max, ch, i;
+  unsigned int ch, i;
+  struct joining_group_table t;
+  unsigned int level1_offset, level2_offset, level3_offset;
+  uint16_t *level3_packed;
 
   stream = fopen (filename, "w");
   if (stream == NULL)
@@ -4231,51 +4242,115 @@ output_joining_group (const char *filename, const char 
*version)
   fprintf (stream, "/* Generated automatically by gen-uni-tables.c for Unicode 
%s.  */\n",
            version);
 
-  ch_min = 0x10FFFF;
+  t.p = 7;
+  t.q = 9;
+  joining_group_table_init (&t);
+
   for (ch = 0; ch < 0x110000; ch++)
-    if (unicode_joining_group[ch] != UC_JOINING_GROUP_NONE)
-      {
-        ch_min = ch;
-        break;
-      }
+    {
+      uint8_t value = unicode_joining_group[ch];
 
-  ch_max = 0;
-  for (ch = 0x10FFFF; ch > 0; ch--)
-    if (unicode_joining_group[ch] != UC_JOINING_GROUP_NONE)
-      {
-        ch_max = ch;
-        break;
-      }
+      if (value > 0x7f)
+        abort ();
 
-  if (!(ch_min <= ch_max))
-    abort ();
+      joining_group_table_add (&t, ch, value);
+    }
 
-  /* If the interval [ch_min, ch_max] is too large, we should better use a
-     3-level table.  */
-  if (!(ch_max - ch_min < 0x200))
-    abort ();
+  joining_group_table_finalize (&t);
 
-  fprintf (stream, "#define joining_group_header_0 0x%x\n", ch_min);
-  fprintf (stream, "static const unsigned char u_joining_group[0x%x - 0x%x] 
=\n",
-           ch_max + 1, ch_min);
-  fprintf (stream, "{");
-  for (i = 0; i <= ch_max - ch_min; i++)
-    {
-      const char *s;
+  /* Offsets in t.result, in memory of this process.  */
+  level1_offset =
+    5 * sizeof (uint32_t);
+  level2_offset =
+    5 * sizeof (uint32_t)
+    + t.level1_size * sizeof (uint32_t);
+  level3_offset =
+    5 * sizeof (uint32_t)
+    + t.level1_size * sizeof (uint32_t)
+    + (t.level2_size << t.q) * sizeof (uint32_t);
 
-      ch = ch_min + i;
-      if ((i % 2) == 0)
-        fprintf (stream, "\n ");
-      s = joining_group_as_c_identifier (unicode_joining_group[ch]);
-      fprintf (stream, " %s", s);
-      if (i+1 <= ch_max - ch_min)
-        {
-          fprintf (stream, ",");
-          if (((i+1) % 2) != 0)
-            fprintf (stream, "%*s", 38 - (int) strlen (s), "");
-        }
+  for (i = 0; i < 5; i++)
+    fprintf (stream, "#define joining_group_header_%d %d\n", i,
+             ((uint32_t *) t.result)[i]);
+  fprintf (stream, "static const\n");
+  fprintf (stream, "struct\n");
+  fprintf (stream, "  {\n");
+  fprintf (stream, "    int level1[%zu];\n", t.level1_size);
+  fprintf (stream, "    short level2[%zu << %d];\n", t.level2_size, t.q);
+  fprintf (stream, "    unsigned short level3[%zu * %d + 1];\n", t.level3_size,
+           (1 << t.p) * 7 / 16);
+  fprintf (stream, "  }\n");
+  fprintf (stream, "u_joining_group =\n");
+  fprintf (stream, "{\n");
+  fprintf (stream, "  {");
+  if (t.level1_size > 8)
+    fprintf (stream, "\n   ");
+  for (i = 0; i < t.level1_size; i++)
+    {
+      uint32_t offset;
+      if (i > 0 && (i % 8) == 0)
+        fprintf (stream, "\n   ");
+      offset = ((uint32_t *) (t.result + level1_offset))[i];
+      if (offset == 0)
+        fprintf (stream, " %5d", -1);
+      else
+        fprintf (stream, " %5zu",
+                 (offset - level2_offset) / sizeof (uint32_t));
+      if (i+1 < t.level1_size)
+        fprintf (stream, ",");
     }
-  fprintf (stream, "\n");
+  if (t.level1_size > 8)
+    fprintf (stream, "\n ");
+  fprintf (stream, " },\n");
+  fprintf (stream, "  {");
+  if (t.level2_size << t.q > 8)
+    fprintf (stream, "\n   ");
+  for (i = 0; i < t.level2_size << t.q; i++)
+    {
+      uint32_t offset;
+      if (i > 0 && (i % 8) == 0)
+        fprintf (stream, "\n   ");
+      offset = ((uint32_t *) (t.result + level2_offset))[i];
+      if (offset == 0)
+        fprintf (stream, " %5d", -1);
+      else
+        fprintf (stream, " %5zu",
+                 (offset - level3_offset) / sizeof (uint8_t));
+      if (i+1 < t.level2_size << t.q)
+        fprintf (stream, ",");
+    }
+  if (t.level2_size << t.q > 8)
+    fprintf (stream, "\n ");
+  fprintf (stream, " },\n");
+  /* Pack the level3 array.  Each entry needs 7 bits only.  Use 16-bit units,
+     not 32-bit units, in order to make the lookup function easier.  */
+  level3_packed =
+    (uint16_t *)
+    calloc ((t.level3_size << t.p) * 7 / 16 + 1, sizeof (uint16_t));
+  for (i = 0; i < t.level3_size << t.p; i++)
+    {
+      unsigned int j = (i * 7) / 16;
+      unsigned int k = (i * 7) % 16;
+      uint32_t value = ((unsigned char *) (t.result + level3_offset))[i];
+      value = level3_packed[j] | (level3_packed[j+1] << 16) | (value << k);
+      level3_packed[j] = value & 0xffff;
+      level3_packed[j+1] = value >> 16;
+    }
+  fprintf (stream, "  {");
+  if ((t.level3_size << t.p) * 7 / 16 + 1 > 8)
+    fprintf (stream, "\n   ");
+  for (i = 0; i < (t.level3_size << t.p) * 7 / 16 + 1; i++)
+    {
+      if (i > 0 && (i % 8) == 0)
+        fprintf (stream, "\n   ");
+      fprintf (stream, " 0x%04x", level3_packed[i]);
+      if (i+1 < (t.level3_size << t.p) * 7 / 16 + 1)
+        fprintf (stream, ",");
+    }
+  if ((t.level3_size << t.p) * 7 / 16 + 1 > 8)
+    fprintf (stream, "\n ");
+  fprintf (stream, " }\n");
+  free (level3_packed);
   fprintf (stream, "};\n");
 
   if (ferror (stream) || fclose (stream))
diff --git a/lib/unictype/joininggroup_of.c b/lib/unictype/joininggroup_of.c
index 987af1e..c7b6846 100644
--- a/lib/unictype/joininggroup_of.c
+++ b/lib/unictype/joininggroup_of.c
@@ -20,14 +20,33 @@
 /* Specification.  */
 #include "unictype.h"
 
-#include "unictype/joininggroup_of.h"
+/* Define u_joining_group table.  */
+#include "joininggroup_of.h"
 
 int
 uc_joining_group (ucs4_t uc)
 {
-  if (uc >= joining_group_header_0
-      && uc < joining_group_header_0
-              + sizeof (u_joining_group) / sizeof (u_joining_group[0]))
-    return u_joining_group[uc - joining_group_header_0];
+  unsigned int index1 = uc >> joining_group_header_0;
+  if (index1 < joining_group_header_1)
+    {
+      int lookup1 = u_joining_group.level1[index1];
+      if (lookup1 >= 0)
+        {
+          unsigned int index2 = (uc >> joining_group_header_2) & 
joining_group_header_3;
+          int lookup2 = u_joining_group.level2[lookup1 + index2];
+          if (lookup2 >= 0)
+            {
+              unsigned int index3 = ((uc & joining_group_header_4) + lookup2) 
* 7;
+              /* level3 contains 7-bit values, packed into 16-bit words.  */
+              unsigned int lookup3 =
+                ((u_joining_group.level3[index3>>4]
+                  | (u_joining_group.level3[(index3>>4)+1] << 16))
+                 >> (index3 % 16))
+                & 0x7f;
+
+              return lookup3;
+            }
+        }
+    }
   return UC_JOINING_GROUP_NONE;
 }
-- 
1.9.3




reply via email to

[Prev in Thread] Current Thread [Next in Thread]