bug-gettext
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [bug-gettext] Plural rule definitions


From: Daiki Ueno
Subject: Re: [bug-gettext] Plural rule definitions
Date: Thu, 21 May 2015 17:09:31 +0900
User-agent: Gnus/5.13 (Gnus v5.13) Emacs/25.0.50 (gnu/linux)

Michele Locati <address@hidden> writes:

> - we need to integrate something like the cldr-to-gettext-plural-rules
> tool of mine: that's not a big problem - just a rewrite from php to c.

I've tried that to familiarize myself with CLDR.  See the attached
patch, which adds 'cldr-plural' utility to gettext-tools.

To make the comparison easier, I'm also attaching the generated output
for the languages currently defined in plural-table.c.

> - we have to take for sure that the CLDR repository structure does not
> change: that's a problem (for instance, the CLDR team moved the json
> data from http://unicode.org/Public/cldr/ to GitHub)

Unlike the JSON files, I suppose the XML files will remain available
from the canonical location.  If it is not the case, we can make the
location customizable or provide a shell-script wrapper like
/usr/share/gettext/projects/*/team-address.

Regards,
-- 
Daiki Ueno
>From 314bd1b1b8f487c3dd9a4761ee451323bda3680d Mon Sep 17 00:00:00 2001
From: Daiki Ueno <address@hidden>
Date: Thu, 21 May 2015 13:03:50 +0900
Subject: [PATCH] gettext-tools: Add a new utility cldr-plural

* Makefile.am (noinst_PROGRAMS): Add cldr-plural.
(cldr_plural_SOURCES): New variable.
(cldr_plural_LDADD): New variable.
* cldr-plural-exp.h: New file.
* cldr-plural-exp.c: New file.
* cldr-plural.y: New file.
---
 gettext-tools/src/Makefile.am       |   4 +-
 gettext-tools/src/cldr-plural-exp.c | 571 ++++++++++++++++++++++++++++++++++++
 gettext-tools/src/cldr-plural-exp.h | 133 +++++++++
 gettext-tools/src/cldr-plural.y     | 465 +++++++++++++++++++++++++++++
 4 files changed, 1172 insertions(+), 1 deletion(-)
 create mode 100644 gettext-tools/src/cldr-plural-exp.c
 create mode 100644 gettext-tools/src/cldr-plural-exp.h
 create mode 100644 gettext-tools/src/cldr-plural.y

diff --git a/gettext-tools/src/Makefile.am b/gettext-tools/src/Makefile.am
index 9a23be0..81c47b1 100644
--- a/gettext-tools/src/Makefile.am
+++ b/gettext-tools/src/Makefile.am
@@ -29,7 +29,7 @@ msgcmp msgfmt msgmerge msgunfmt xgettext \
 msgattrib msgcat msgcomm msgconv msgen msgexec msgfilter msggrep msginit 
msguniq \
 recode-sr-latin
 
-noinst_PROGRAMS = hostname urlget
+noinst_PROGRAMS = hostname urlget cldr-plural
 
 lib_LTLIBRARIES = libgettextsrc.la
 
@@ -236,6 +236,8 @@ endif
 recode_sr_latin_SOURCES = recode-sr-latin.c filter-sr-latin.c
 hostname_SOURCES = hostname.c
 urlget_SOURCES = urlget.c
+cldr_plural_SOURCES = cldr-plural-exp.c cldr-plural.y
+cldr_plural_LDADD = $(LDADD) -lm
 
 # How to build libgettextsrc.la.
 # Need ../gnulib-lib/libgettextlib.la.
diff --git a/gettext-tools/src/cldr-plural-exp.c 
b/gettext-tools/src/cldr-plural-exp.c
new file mode 100644
index 0000000..61902f1
--- /dev/null
+++ b/gettext-tools/src/cldr-plural-exp.c
@@ -0,0 +1,571 @@
+/* Unicode CLDR plural rule parser and converter
+   Copyright (C) 2015 Free Software Foundation, Inc.
+
+   This file was written by Daiki Ueno <address@hidden>, 2015.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include "unistr.h"
+#include "xalloc.h"
+#include <math.h>
+
+#include "cldr-plural-exp.h"
+#include "cldr-plural.h"
+
+/* The grammar of Unicode CLDR plural rules is defined at:
+   http://unicode.org/reports/tr35/tr35-numbers.html#Plural_rules_syntax
+
+   This implementation only supports the "preferred" form, which
+   doesn't support obsolete keywords "in", "is", "not", and "within".
+
+   Unlike gettext, CLDR allows an unsigned decimal value as an
+   operand, in addition to unsigned integers.  For simplicity, we
+   treat decimal relations as if it is a constant truth value.
+
+   The implementation is largely based on the idea of Michele Locati's
+   cldr-to-gettext-plural-rules:
+   https://github.com/mlocati/cldr-to-gettext-plural-rules  */
+
+void
+cldr_plural_range_free (struct cldr_plural_range_ty *range)
+{
+  if (range->start != range->end)
+    free (range->start);
+  free (range->end);
+  free (range);
+}
+
+void
+cldr_plural_range_list_free (struct cldr_plural_range_list_ty *ranges)
+{
+  while (ranges->nitems-- > 0)
+    cldr_plural_range_free (ranges->items[ranges->nitems]);
+  free (ranges->items);
+  free (ranges);
+}
+
+void
+cldr_plural_condition_free (struct cldr_plural_condition_ty *condition)
+{
+  if (condition->type == CLDR_PLURAL_CONDITION_AND
+      || condition->type == CLDR_PLURAL_CONDITION_OR)
+    {
+      cldr_plural_condition_free (condition->value.conditions[0]);
+      cldr_plural_condition_free (condition->value.conditions[1]);
+    }
+  else if (condition->type == CLDR_PLURAL_CONDITION_RELATION)
+    cldr_plural_relation_free (condition->value.relation);
+  free (condition);
+}
+
+void
+cldr_plural_relation_free (struct cldr_plural_relation_ty *relation)
+{
+  free (relation->expression);
+  cldr_plural_range_list_free (relation->ranges);
+  free (relation);
+}
+
+static void
+cldr_plural_rule_free (struct cldr_plural_rule_ty *rule)
+{
+  free (rule->name);
+  cldr_plural_condition_free (rule->condition);
+  free (rule);
+}
+
+static void
+cldr_plural_rule_list_free (struct cldr_plural_rule_list_ty *rules)
+{
+  while (rules->nitems-- > 0)
+    cldr_plural_rule_free (rules->items[rules->nitems]);
+  free (rules->items);
+  free (rules);
+}
+
+static struct cldr_plural_rule_list_ty *
+cldr_plural_parse (const char *input)
+{
+  struct cldr_plural_parse_args arg;
+
+  memset (&arg, 0, sizeof (struct cldr_plural_parse_args));
+  arg.cp = input;
+  arg.cp_end = input + strlen (input);
+  arg.result = XMALLOC (struct cldr_plural_rule_list_ty);
+  memset (arg.result, 0, sizeof (struct cldr_plural_rule_list_ty));
+
+  if (yyparse (&arg) != 0)
+    return NULL;
+
+  return arg.result;
+}
+
+#define OPERAND_ZERO_P(o)                               \
+    (((o)->type == CLDR_PLURAL_OPERAND_INTEGER          \
+      && (o)->value.ival == 0)                          \
+    || ((o)->type == CLDR_PLURAL_OPERAND_DECIMAL        \
+        && (o)->value.dval.d == 0))
+
+static enum cldr_plural_condition
+eval_relation (struct cldr_plural_relation_ty *relation)
+{
+  switch (relation->expression->operand)
+    {
+    case 'n': case 'i':
+      {
+        /* Coerce decimal values in ranges into integers.  */
+        size_t i;
+        for (i = 0; i < relation->ranges->nitems; i++)
+          {
+            struct cldr_plural_range_ty *range = relation->ranges->items[i];
+            if (range->start->type == CLDR_PLURAL_OPERAND_DECIMAL)
+              {
+                range->start->type = CLDR_PLURAL_OPERAND_INTEGER;
+                range->start->value.ival = ceil (range->start->value.dval.d);
+              }
+            if (range->end->type == CLDR_PLURAL_OPERAND_DECIMAL)
+              {
+                range->end->type = CLDR_PLURAL_OPERAND_INTEGER;
+                range->end->value.ival = floor (range->end->value.dval.d);
+              }
+          }
+        relation->expression->operand = 'i';
+      }
+      break;
+    case 'f': case 't':
+    case 'v': case 'w':
+      {
+        /* Since plural expression in gettext only supports unsigned
+           integer, turn relations whose operand is either 'f', 't',
+           'v', or 'w' into a constant truth value.  */
+        /* FIXME: check mod?  */
+        size_t i;
+        for (i = 0; i < relation->ranges->nitems; i++)
+          {
+            struct cldr_plural_range_ty *range = relation->ranges->items[i];
+            if ((relation->type == CLDR_PLURAL_RELATION_EQUAL
+                 && (!OPERAND_ZERO_P (range->start)
+                     || !OPERAND_ZERO_P (range->end)))
+                || (relation->type == CLDR_PLURAL_RELATION_NOT_EQUAL
+                    && (OPERAND_ZERO_P (range->start)
+                        || OPERAND_ZERO_P (range->end))))
+              return CLDR_PLURAL_CONDITION_FALSE;
+          }
+        return CLDR_PLURAL_CONDITION_TRUE;
+      }
+      break;
+    }
+  return CLDR_PLURAL_CONDITION_RELATION;
+}
+
+static void
+eval_condition (struct cldr_plural_condition_ty *condition)
+{
+  if (condition->type == CLDR_PLURAL_CONDITION_AND)
+    {
+      eval_condition (condition->value.conditions[0]);
+      eval_condition (condition->value.conditions[1]);
+
+      if (condition->value.conditions[0]->type
+          == CLDR_PLURAL_CONDITION_FALSE
+          || condition->value.conditions[1]->type
+          == CLDR_PLURAL_CONDITION_FALSE)
+        {
+          cldr_plural_condition_free (condition->value.conditions[0]);
+          cldr_plural_condition_free (condition->value.conditions[1]);
+          condition->type = CLDR_PLURAL_CONDITION_FALSE;
+        }
+      else if (condition->value.conditions[0]->type
+               == CLDR_PLURAL_CONDITION_TRUE
+               && condition->value.conditions[1]->type
+               == CLDR_PLURAL_CONDITION_TRUE)
+        {
+          cldr_plural_condition_free (condition->value.conditions[0]);
+          cldr_plural_condition_free (condition->value.conditions[1]);
+          condition->type = CLDR_PLURAL_CONDITION_TRUE;
+        }
+      else if (condition->value.conditions[0]->type
+               == CLDR_PLURAL_CONDITION_TRUE)
+        {
+          struct cldr_plural_condition_ty *original
+            = condition->value.conditions[1];
+          cldr_plural_condition_free (condition->value.conditions[0]);
+          condition->type = condition->value.conditions[1]->type;
+          condition->value = condition->value.conditions[1]->value;
+          free (original);
+        }
+      else if (condition->value.conditions[1]->type
+               == CLDR_PLURAL_CONDITION_TRUE)
+        {
+          struct cldr_plural_condition_ty *original
+            = condition->value.conditions[0];
+          cldr_plural_condition_free (condition->value.conditions[1]);
+          condition->type = condition->value.conditions[0]->type;
+          condition->value = condition->value.conditions[0]->value;
+          free (original);
+        }
+    }
+  else if (condition->type == CLDR_PLURAL_CONDITION_OR)
+    {
+      eval_condition (condition->value.conditions[0]);
+      eval_condition (condition->value.conditions[1]);
+
+      if (condition->value.conditions[0]->type
+          == CLDR_PLURAL_CONDITION_TRUE
+          || condition->value.conditions[1]->type
+          == CLDR_PLURAL_CONDITION_TRUE)
+        {
+          cldr_plural_condition_free (condition->value.conditions[0]);
+          cldr_plural_condition_free (condition->value.conditions[1]);
+          condition->type = CLDR_PLURAL_CONDITION_TRUE;
+        }
+      else if (condition->value.conditions[0]->type
+               == CLDR_PLURAL_CONDITION_FALSE
+               && condition->value.conditions[1]->type
+               == CLDR_PLURAL_CONDITION_FALSE)
+        {
+          cldr_plural_condition_free (condition->value.conditions[0]);
+          cldr_plural_condition_free (condition->value.conditions[1]);
+          condition->type = CLDR_PLURAL_CONDITION_FALSE;
+        }
+      else if (condition->value.conditions[0]->type
+               == CLDR_PLURAL_CONDITION_FALSE)
+        {
+          struct cldr_plural_condition_ty *original
+            = condition->value.conditions[1];
+          cldr_plural_condition_free (condition->value.conditions[0]);
+          condition->type = condition->value.conditions[1]->type;
+          condition->value = condition->value.conditions[1]->value;
+          free (original);
+        }
+      else if (condition->value.conditions[1]->type
+               == CLDR_PLURAL_CONDITION_FALSE)
+        {
+          struct cldr_plural_condition_ty *original
+            = condition->value.conditions[0];
+          cldr_plural_condition_free (condition->value.conditions[1]);
+          condition->type = condition->value.conditions[0]->type;
+          condition->value = condition->value.conditions[0]->value;
+          free (original);
+        }
+    }
+  else
+    {
+      enum cldr_plural_condition value =
+        eval_relation (condition->value.relation);
+      if (value == CLDR_PLURAL_CONDITION_TRUE
+          || value == CLDR_PLURAL_CONDITION_FALSE)
+        {
+          cldr_plural_relation_free (condition->value.relation);
+          condition->type = value;
+        }
+    }
+}
+
+static void
+print_expression (struct cldr_plural_expression_ty *expression, bool space)
+{
+  if (expression->mod == 0)
+    printf ("n");
+  else
+    printf (space ? "n %% %d" : "n%%%d", expression->mod);
+}
+
+static void
+print_relation (struct cldr_plural_relation_ty *relation,
+                enum cldr_plural_condition parent, bool space)
+{
+  if (relation->type == CLDR_PLURAL_RELATION_EQUAL)
+    {
+      size_t i;
+      if (parent == CLDR_PLURAL_CONDITION_AND
+          && relation->ranges->nitems > 1)
+        putchar ('(');
+      for (i = 0; i < relation->ranges->nitems; i++)
+        {
+          struct cldr_plural_range_ty *range = relation->ranges->items[i];
+          if (i > 0)
+            printf (" || ");
+          if (range->start->value.ival == range->end->value.ival)
+            {
+              print_expression (relation->expression, space);
+              printf (space && relation->ranges->nitems == 1
+                      ? " == %d" : "==%d",
+                      range->start->value.ival);
+            }
+          else if (range->start->value.ival == 0)
+            {
+              print_expression (relation->expression, false);
+              printf ("<=%d", range->end->value.ival);
+            }
+          else
+            {
+              if (parent == CLDR_PLURAL_CONDITION_OR
+                  || relation->ranges->nitems > 1)
+                putchar ('(');
+              print_expression (relation->expression, false);
+              printf (">=%d", range->start->value.ival);
+              printf (" && ");
+              print_expression (relation->expression, false);
+              printf ("<=%d", range->end->value.ival);
+              if (parent == CLDR_PLURAL_CONDITION_OR
+                  || relation->ranges->nitems > 1)
+                putchar (')');
+            }
+        }
+      if (parent == CLDR_PLURAL_CONDITION_AND
+          && relation->ranges->nitems > 1)
+        putchar (')');
+    }
+  else
+    {
+      size_t i;
+      if (parent == CLDR_PLURAL_CONDITION_OR
+          && relation->ranges->nitems > 1)
+        putchar ('(');
+      for (i = 0; i < relation->ranges->nitems; i++)
+        {
+          struct cldr_plural_range_ty *range = relation->ranges->items[i];
+         if (i > 0)
+            printf (" && ");
+          if (range->start->value.ival == range->end->value.ival)
+            {
+              print_expression (relation->expression, space);
+              printf (space && relation->ranges->nitems == 1
+                      ? " != %d" : "!=%d", range->start->value.ival);
+            }
+          else if (range->start->value.ival == 0)
+            {
+              print_expression (relation->expression, false);
+              printf (">%d", range->end->value.ival);
+            }
+          else
+            {
+              if (parent == CLDR_PLURAL_CONDITION_AND
+                  || relation->ranges->nitems > 1)
+                putchar ('(');
+              print_expression (relation->expression, false);
+              printf ("<%d", range->start->value.ival);
+              printf (" || ");
+              print_expression (relation->expression, false);
+              printf (">%d", range->end->value.ival);
+              if (parent == CLDR_PLURAL_CONDITION_AND
+                  || relation->ranges->nitems > 1)
+                putchar (')');
+            }
+        }
+      if (parent == CLDR_PLURAL_CONDITION_OR
+          && relation->ranges->nitems > 1)
+        putchar (')');
+    }
+}
+
+static bool
+print_condition (struct cldr_plural_condition_ty *condition,
+                 enum cldr_plural_condition parent, bool space)
+{
+  if (condition->type == CLDR_PLURAL_CONDITION_AND)
+    {
+      if (parent == CLDR_PLURAL_CONDITION_OR)
+        putchar ('(');
+      print_condition (condition->value.conditions[0],
+                       CLDR_PLURAL_CONDITION_AND, false);
+      printf (" && ");
+      print_condition (condition->value.conditions[1],
+                       CLDR_PLURAL_CONDITION_AND, false);
+      if (parent == CLDR_PLURAL_CONDITION_OR)
+        putchar (')');
+      return true;
+    }
+  else if (condition->type == CLDR_PLURAL_CONDITION_OR)
+    {
+      if (parent == CLDR_PLURAL_CONDITION_AND)
+        putchar ('(');
+      print_condition (condition->value.conditions[0],
+                       CLDR_PLURAL_CONDITION_OR, false);
+      printf (" || ");
+      print_condition (condition->value.conditions[1],
+                       CLDR_PLURAL_CONDITION_OR, false);
+      if (parent == CLDR_PLURAL_CONDITION_AND)
+        putchar (')');
+      return true;
+    }
+  else if (condition->type == CLDR_PLURAL_CONDITION_RELATION)
+    {
+      print_relation (condition->value.relation, parent, space);
+      return true;
+    }
+  return false;
+}
+
+#define RULE_PRINTABLE_P(r)                                     \
+  ((r)->condition->type != CLDR_PLURAL_CONDITION_TRUE           \
+   && (r)->condition->type != CLDR_PLURAL_CONDITION_FALSE)
+
+/* Convert n == N into n != N.  */
+static bool
+print_condition_negation (struct cldr_plural_condition_ty *condition)
+{
+  if (condition->type == CLDR_PLURAL_CONDITION_RELATION
+      && condition->value.relation->type == CLDR_PLURAL_RELATION_EQUAL
+      && condition->value.relation->ranges->nitems == 1
+      && condition->value.relation->ranges->items[0]->start
+      == condition->value.relation->ranges->items[0]->end)
+    {
+      printf ("nplurals=2; plural=(n != %d);\n",
+              condition->value.relation->ranges->items[0]->start->value.ival);
+      return true;
+    }
+  return false;
+}
+
+/* Convert n == 0,...,N into n > N.  */
+static bool
+print_condition_greater (struct cldr_plural_condition_ty *condition)
+{
+  if (condition->type == CLDR_PLURAL_CONDITION_RELATION
+      && condition->value.relation->type == CLDR_PLURAL_RELATION_EQUAL)
+    {
+      int last = -1;
+      size_t i;
+      for (i = 0; i < condition->value.relation->ranges->nitems; i++)
+        {
+          struct cldr_plural_range_ty *range =
+            condition->value.relation->ranges->items[i];
+          if (range->start->type != CLDR_PLURAL_OPERAND_INTEGER
+              || range->end->type != CLDR_PLURAL_OPERAND_INTEGER
+              || range->start->value.ival != last + 1)
+            break;
+          last = range->end->value.ival;
+        }
+      if (i == condition->value.relation->ranges->nitems)
+        {
+          struct cldr_plural_range_ty *range =
+            condition->value.relation->ranges->items[i - 1];
+          printf ("nplurals=2; plural=(n > %d);\n",
+                  range->end->value.ival);
+          return true;
+        }
+    }
+  return false;
+}
+
+typedef bool (*print_condition_function_ty) (struct cldr_plural_condition_ty 
*);
+static print_condition_function_ty print_condition_functions[] =
+  {
+    print_condition_negation,
+    print_condition_greater
+  };
+
+#define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
+
+static void
+process_rule_list (struct cldr_plural_rule_list_ty *rules)
+{
+  size_t i;
+  size_t count;
+  size_t nplurals;
+
+  /* Prune trivial conditions.  */
+  for (i = 0, nplurals = 0; i < rules->nitems; i++)
+    {
+      struct cldr_plural_rule_ty *rule = rules->items[i];
+      eval_condition (rule->condition);
+      if (RULE_PRINTABLE_P (rules->items[i]))
+        nplurals++;
+    }
+
+  /* Special case when rules is empty.  */
+  if (nplurals == 0)
+    {
+      printf ("nplurals=1; plural=0;\n");
+      return;
+    }
+
+  /* If we have only one printable rule, apply some heuristics.  */
+  if (nplurals == 1)
+    {
+      struct cldr_plural_condition_ty *condition;
+      size_t j;
+
+      for (j = 0; j < rules->nitems; j++)
+        if (RULE_PRINTABLE_P (rules->items[j]))
+          break;
+
+      condition = rules->items[j]->condition;
+      for (j = 0; j < SIZEOF (print_condition_functions); j++)
+        if (print_condition_functions[j] (condition))
+          return;
+    }
+
+  /* If there are more printable rules, build a tertiary operator.  */
+  printf ("nplurals=%zu; plural=(", nplurals + 1);
+  for (i = 0, count = 0; i < rules->nitems; i++)
+    {
+      struct cldr_plural_rule_ty *rule = rules->items[i];
+      if (print_condition (rule->condition, CLDR_PLURAL_CONDITION_FALSE, 
nplurals == 1)
+          && rules->nitems > 1)
+        {
+          bool printable_left = false;
+          size_t j;
+
+          for (j = i + 1; j < rules->nitems; j++)
+            if (RULE_PRINTABLE_P (rules->items[j]))
+              printable_left = true;
+
+          if (i < rules->nitems - 1 && printable_left)
+            printf (" ? %zu : ", count++);
+        }
+    }
+  if (rules->nitems > 1)
+    printf (" ? %zu : %zu", count, count + 1);
+  printf (");\n");
+}
+
+int
+main (int argc, char **argv)
+{
+  char *line = NULL;
+  size_t line_size = 0;
+
+  for (;;)
+    {
+      int line_len;
+      struct cldr_plural_rule_list_ty *result;
+
+      line_len = getline (&line, &line_size, stdin);
+      if (line_len < 0)
+        break;
+      if (line_len > 0 && line[line_len - 1] == '\n')
+        line[--line_len] = '\0';
+
+      result = cldr_plural_parse (line);
+      if (result)
+        {
+          process_rule_list (result);
+          cldr_plural_rule_list_free (result);
+        }
+    }
+
+  free (line);
+  return 0;
+}
diff --git a/gettext-tools/src/cldr-plural-exp.h 
b/gettext-tools/src/cldr-plural-exp.h
new file mode 100644
index 0000000..1c0c70c
--- /dev/null
+++ b/gettext-tools/src/cldr-plural-exp.h
@@ -0,0 +1,133 @@
+/* Unicode CLDR plural rule parser and converter
+   Copyright (C) 2015 Free Software Foundation, Inc.
+
+   This file was written by Daiki Ueno <address@hidden>, 2015.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#ifndef _CLDR_PLURAL_EXP_H
+#define _CLDR_PLURAL_EXP_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum cldr_plural_operand
+  {
+    CLDR_PLURAL_OPERAND_INTEGER,
+    CLDR_PLURAL_OPERAND_DECIMAL
+  };
+
+struct cldr_plural_operand_ty
+{
+  enum cldr_plural_operand type;
+  union
+  {
+    int ival;
+    struct
+    {
+      double d;
+      int nfractions;
+    } dval;
+  } value;
+};
+
+enum cldr_plural_relation
+  {
+    CLDR_PLURAL_RELATION_EQUAL,
+    CLDR_PLURAL_RELATION_NOT_EQUAL
+  };
+
+struct cldr_plural_range_ty
+{
+  struct cldr_plural_operand_ty *start;
+  struct cldr_plural_operand_ty *end;
+};
+
+struct cldr_plural_range_list_ty
+{
+  struct cldr_plural_range_ty **items;
+  size_t nitems;
+  size_t nitems_max;
+};
+
+struct cldr_plural_expression_ty
+{
+  /* 'n', 'i', 'f', 't', 'v', 'w' */
+  int operand;
+
+  /* 0 if not given */
+  int mod;
+};
+
+struct cldr_plural_relation_ty
+{
+  struct cldr_plural_expression_ty *expression;
+  enum cldr_plural_relation type;
+  struct cldr_plural_range_list_ty *ranges;
+};
+
+enum cldr_plural_condition
+  {
+    CLDR_PLURAL_CONDITION_AND,
+    CLDR_PLURAL_CONDITION_OR,
+    CLDR_PLURAL_CONDITION_RELATION,
+    CLDR_PLURAL_CONDITION_TRUE,
+    CLDR_PLURAL_CONDITION_FALSE
+  };
+
+struct cldr_plural_condition_ty
+{
+  enum cldr_plural_condition type;
+  union
+  {
+    struct cldr_plural_relation_ty *relation;
+    struct cldr_plural_condition_ty *conditions[2];
+  } value;
+};
+
+struct cldr_plural_rule_ty
+{
+  char *name;
+  struct cldr_plural_condition_ty *condition;
+};
+
+struct cldr_plural_rule_list_ty
+{
+  struct cldr_plural_rule_ty **items;
+  size_t nitems;
+  size_t nitems_max;
+};
+
+struct cldr_plural_parse_args
+{
+  const char *cp;
+  const char *cp_end;
+  struct cldr_plural_rule_list_ty *result;
+};
+
+extern void
+cldr_plural_range_free (struct cldr_plural_range_ty *range);
+extern void
+cldr_plural_range_list_free (struct cldr_plural_range_list_ty *ranges);
+extern void
+cldr_plural_condition_free (struct cldr_plural_condition_ty *condition);
+extern void
+cldr_plural_relation_free (struct cldr_plural_relation_ty *relation);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  /* _CLDR_PLURAL_EXP_H */
diff --git a/gettext-tools/src/cldr-plural.y b/gettext-tools/src/cldr-plural.y
new file mode 100644
index 0000000..9db4a67
--- /dev/null
+++ b/gettext-tools/src/cldr-plural.y
@@ -0,0 +1,465 @@
+/* Unicode CLDR plural rule parser and converter
+   Copyright (C) 2015 Free Software Foundation, Inc.
+
+   This file was written by Daiki Ueno <address@hidden>, 2015.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+%{
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include "unistr.h"
+#include "xalloc.h"
+
+#include "cldr-plural-exp.h"
+#include "cldr-plural.h"
+
+/* Prototypes for local functions.  */
+static int yylex (YYSTYPE *lval, struct cldr_plural_parse_args *arg);
+static void yyerror (struct cldr_plural_parse_args *arg, const char *str);
+
+/* Allocation of expressions.  */
+
+static struct cldr_plural_rule_ty *
+new_rule (char *name, struct cldr_plural_condition_ty *condition)
+{
+  struct cldr_plural_rule_ty *result =
+    XMALLOC (struct cldr_plural_rule_ty);
+  result->name = name;
+  result->condition = condition;
+  return result;
+}
+
+static struct cldr_plural_condition_ty *
+new_leaf_condition (struct cldr_plural_relation_ty *relation)
+{
+  struct cldr_plural_condition_ty *result =
+    XMALLOC (struct cldr_plural_condition_ty);
+  result->type = CLDR_PLURAL_CONDITION_RELATION;
+  result->value.relation = relation;
+  return result;
+}
+
+static struct cldr_plural_condition_ty *
+new_branch_condition (enum cldr_plural_condition type,
+                      struct cldr_plural_condition_ty *condition0,
+                      struct cldr_plural_condition_ty *condition1)
+{
+  struct cldr_plural_condition_ty *result =
+    XMALLOC (struct cldr_plural_condition_ty);
+  result->type = type;
+  result->value.conditions[0] = condition0;
+  result->value.conditions[1] = condition1;
+  return result;
+}
+
+static struct cldr_plural_relation_ty *
+new_relation (struct cldr_plural_expression_ty *expression,
+              enum cldr_plural_relation type,
+              struct cldr_plural_range_list_ty *ranges)
+{
+  struct cldr_plural_relation_ty *result =
+    XMALLOC (struct cldr_plural_relation_ty);
+  result->expression = expression;
+  result->type = type;
+  result->ranges = ranges;
+  return result;
+}
+
+static struct cldr_plural_expression_ty *
+new_expression (int operand, int mod)
+{
+  struct cldr_plural_expression_ty *result =
+    XMALLOC (struct cldr_plural_expression_ty);
+  result->operand = operand;
+  result->mod = mod;
+  return result;
+}
+
+static struct cldr_plural_range_list_ty *
+add_range (struct cldr_plural_range_list_ty *ranges,
+           struct cldr_plural_range_ty *range)
+{
+  if (ranges->nitems == ranges->nitems_max)
+    {
+      ranges->nitems_max = ranges->nitems_max * 2 + 1;
+      ranges->items = xrealloc (ranges->items,
+                                sizeof (struct cldr_plural_range_ty *)
+                                * ranges->nitems_max);
+    }
+  ranges->items[ranges->nitems++] = range;
+  return ranges;
+}
+
+static struct cldr_plural_range_ty *
+new_range (struct cldr_plural_operand_ty *start,
+           struct cldr_plural_operand_ty *end)
+{
+  struct cldr_plural_range_ty *result =
+    XMALLOC (struct cldr_plural_range_ty);
+  result->start = start;
+  result->end = end;
+  return result;
+}
+%}
+
+%parse-param {struct cldr_plural_parse_args *arg}
+%lex-param {struct cldr_plural_parse_args *arg}
+%define api.pure full
+
+%union {
+  char *sval;
+  struct cldr_plural_condition_ty *cval;
+  struct cldr_plural_relation_ty *lval;
+  struct cldr_plural_expression_ty *eval;
+  struct cldr_plural_range_ty *gval;
+  struct cldr_plural_operand_ty *oval;
+  struct cldr_plural_range_list_ty *rval;
+  int ival;
+}
+
+%destructor { free ($$); } <sval>
+%destructor { cldr_plural_condition_free ($$); } <cval>
+%destructor { cldr_plural_relation_free ($$); } <lval>
+%destructor { free ($$); } <eval>
+%destructor { cldr_plural_range_free ($$); } <gval>
+%destructor { free ($$); } <oval>
+%destructor { cldr_plural_range_list_free ($$); } <rval>
+%destructor { } <ival>
+
+%token AND OR RANGE ELLIPSIS OTHER AT_INTEGER AT_DECIMAL
+%token<sval> KEYWORD
+%token<oval> INTEGER DECIMAL
+%token<ival> OPERAND
+%type<cval> condition and_condition
+%type<lval> relation
+%type<eval> expression
+%type<gval> range range_or_integer
+%type<rval> range_list
+
+%%
+
+rules: rule
+        | rules ';' rule
+        ;
+
+rule:   KEYWORD ':' condition samples
+        {
+          struct cldr_plural_rule_ty *rule = new_rule ($1, $3);
+          struct cldr_plural_rule_list_ty *result = arg->result;
+          if (result->nitems == result->nitems_max)
+            {
+              result->nitems_max = result->nitems_max * 2 + 1;
+              result->items = xrealloc (result->items,
+                                        sizeof (struct cldr_plural_rule_ty *)
+                                        * result->nitems_max);
+            }
+          result->items[result->nitems++] = rule;
+        }
+        | OTHER ':' samples
+        ;
+
+condition: and_condition
+        {
+          $$ = $1;
+        }
+        | condition OR and_condition
+        {
+          $$ = new_branch_condition (CLDR_PLURAL_CONDITION_OR, $1, $3);
+        }
+        ;
+
+and_condition: relation
+        {
+          $$ = new_leaf_condition ($1);
+        }
+        | and_condition AND relation
+        {
+          $$ = new_branch_condition (CLDR_PLURAL_CONDITION_AND,
+                                     $1,
+                                     new_leaf_condition ($3));
+        }
+        ;
+
+relation: expression '=' range_list
+        {
+          $$ = new_relation ($1, CLDR_PLURAL_RELATION_EQUAL, $3);
+        }
+        | expression '!' range_list
+        {
+          $$ = new_relation ($1, CLDR_PLURAL_RELATION_NOT_EQUAL, $3);
+        }
+        ;
+
+expression: OPERAND
+        {
+          $$ = new_expression ($1, 0);
+        }
+        | OPERAND '%' INTEGER
+        {
+          $$ = new_expression ($1, $3->value.ival);
+        }
+        ;
+
+range_list: range_or_integer
+        {
+          struct cldr_plural_range_list_ty *ranges =
+            XMALLOC (struct cldr_plural_range_list_ty);
+          memset (ranges, 0, sizeof (struct cldr_plural_range_list_ty));
+          $$ = add_range (ranges, $1);
+        }
+        | range_list ',' range_or_integer
+        {
+          $$ = add_range ($1, $3);
+        }
+        ;
+
+range_or_integer: range
+        {
+          $$ = $1;
+        }
+        | INTEGER
+        {
+          $$ = new_range ($1, $1);
+        }
+        ;
+
+range: INTEGER RANGE INTEGER
+        {
+          $$ = new_range ($1, $3);
+        }
+        ;
+
+/* FIXME: collect samples */
+samples: at_integer at_decimal
+        ;
+
+at_integer: %empty
+        | AT_INTEGER sample_list
+        ;
+
+at_decimal: %empty
+        | AT_DECIMAL sample_list
+        ;
+
+sample_list: sample_list1 sample_ellipsis
+        ;
+sample_list1: sample_range
+        | sample_list1 ',' sample_range
+        ;
+sample_ellipsis: %empty
+        | ',' ELLIPSIS
+        ;
+
+sample_range: DECIMAL
+        | DECIMAL '~' DECIMAL
+        | INTEGER
+        | INTEGER '~' INTEGER
+        ;
+
+%%
+
+static int
+yylex (YYSTYPE *lval, struct cldr_plural_parse_args *arg)
+{
+  const char *exp = arg->cp;
+  ucs4_t uc;
+  int length;
+  int result;
+  static char *buffer;
+  static size_t bufmax;
+  size_t bufpos;
+
+  while (1)
+    {
+      if (exp[0] == '\0')
+        {
+          arg->cp = exp;
+          return YYEOF;
+        }
+
+      if (exp[0] != ' ' && exp[0] != '\t')
+        break;
+
+      ++exp;
+    }
+
+  length = u8_mbtouc (&uc, (const uint8_t *) exp, arg->cp_end - exp);
+  if (uc == 0x2026)
+    {
+      arg->cp = exp + length;
+      return ELLIPSIS;
+    }
+  else if (strncmp ("...", exp, 3) == 0)
+    {
+      arg->cp = exp + 3;
+      return ELLIPSIS;
+    }
+  else if (strncmp ("..", exp, 2) == 0)
+    {
+      arg->cp = exp + 2;
+      return RANGE;
+    }
+  else if (strncmp ("other", exp, 5) == 0)
+    {
+      arg->cp = exp + 5;
+      return OTHER;
+    }
+  else if (strncmp ("@integer", exp, 8) == 0)
+    {
+      arg->cp = exp + 8;
+      return AT_INTEGER;
+    }
+  else if (strncmp ("@decimal", exp, 8) == 0)
+    {
+      arg->cp = exp + 8;
+      return AT_DECIMAL;
+    }
+
+  result = *exp++;
+  switch (result)
+    {
+    case '0': case '1': case '2': case '3': case '4':
+    case '5': case '6': case '7': case '8': case '9':
+      {
+        unsigned long int ival = result - '0';
+
+        while (exp[0] >= '0' && exp[0] <= '9')
+          {
+            ival *= 10;
+            ival += exp[0] - '0';
+            ++exp;
+          }
+
+        lval->oval = XMALLOC (struct cldr_plural_operand_ty);
+        if (exp[0] == '.' && exp[1] >= '0' && exp[1] <= '9')
+          {
+            double dval = ival;
+            int denominator = 10, nfractions = 0;
+            ++exp;
+            while (exp[0] >= '0' && exp[0] <= '9')
+              {
+                dval += (exp[0] - '0') / (double) denominator;
+                denominator *= 10;
+                ++nfractions;
+                ++exp;
+              }
+            lval->oval->type = CLDR_PLURAL_OPERAND_DECIMAL;
+            lval->oval->value.dval.d = dval;
+            lval->oval->value.dval.nfractions = nfractions;
+            result = DECIMAL;
+          }
+        else
+          {
+            lval->oval->type = CLDR_PLURAL_OPERAND_INTEGER;
+            lval->oval->value.ival = ival;
+            result = INTEGER;
+          }
+      }
+      break;
+    case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
+    case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
+    case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
+    case 'v': case 'w': case 'x': case 'y': case 'z':
+      bufpos = 0;
+      for (;;)
+        {
+          if (bufpos >= bufmax)
+            {
+              bufmax = 2 * bufmax + 10;
+              buffer = xrealloc (buffer, bufmax);
+            }
+          buffer[bufpos++] = result;
+          result = *exp;
+          switch (result)
+            {
+            case 'a': case 'b': case 'c': case 'd': case 'e':
+            case 'f': case 'g': case 'h': case 'i': case 'j':
+            case 'k': case 'l': case 'm': case 'n': case 'o':
+            case 'p': case 'q': case 'r': case 's': case 't':
+            case 'u': case 'v': case 'w': case 'x': case 'y':
+            case 'z':
+              ++exp;
+              continue;
+            default:
+              break;
+            }
+          break;
+        }
+
+      if (bufpos >= bufmax)
+        {
+          bufmax = 2 * bufmax + 10;
+          buffer = xrealloc (buffer, bufmax);
+        }
+      buffer[bufpos] = '\0';
+
+      /* Operands.  */
+      if (bufpos == 1)
+        {
+          switch (buffer[0])
+            {
+            case 'n': case 'i': case 'f': case 't': case 'v': case 'w':
+              arg->cp = exp;
+              lval->ival = buffer[0];
+              return OPERAND;
+            default:
+              break;
+            }
+        }
+
+      /* Keywords.  */
+      if (strcmp (buffer, "and") == 0)
+        {
+          arg->cp = exp;
+          return AND;
+        }
+      else if (strcmp (buffer, "or") == 0)
+        {
+          arg->cp = exp;
+          return OR;
+        }
+
+      lval->sval = xstrdup (buffer);
+      result = KEYWORD;
+      break;
+    case '!':
+      if (exp[0] == '=')
+        {
+          ++exp;
+          result = '!';
+        }
+      else
+        result = YYERRCODE;
+      break;
+    default:
+      break;
+    }
+
+  arg->cp = exp;
+
+  return result;
+}
+
+static void
+yyerror (struct cldr_plural_parse_args *arg, char const *s)
+{
+  fprintf (stderr, "%s\n", s);
+}
-- 
2.1.0

ja      nplurals=1; plural=0;
vi      nplurals=1; plural=0;
ko      nplurals=1; plural=0;
en      nplurals=2; plural=(n != 1);
de      nplurals=2; plural=(n != 1);
nl      nplurals=2; plural=(n != 1);
sv      nplurals=2; plural=(n != 1);
da      nplurals=2; plural=(n != 1);
no      nplurals=2; plural=(n != 1);
nb      nplurals=2; plural=(n != 1);
nn      nplurals=2; plural=(n != 1);
fo      nplurals=2; plural=(n != 1);
es      nplurals=2; plural=(n != 1);
pt      nplurals=2; plural=(n != 1);
it      nplurals=2; plural=(n != 1);
bg      nplurals=2; plural=(n != 1);
el      nplurals=2; plural=(n != 1);
fi      nplurals=2; plural=(n != 1);
et      nplurals=2; plural=(n != 1);
he      nplurals=4; plural=(n==1 ? 0 : n==2 ? 1 : n>10 && n%10==0 ? 2 : 3);
eo      nplurals=2; plural=(n != 1);
hu      nplurals=2; plural=(n != 1);
tr      nplurals=2; plural=(n != 1);
pt_BR   
fr      nplurals=2; plural=(n > 1);
lv      nplurals=3; plural=(n%10==0 || (n%100>=11 && n%100<=19) ? 0 : n%10==1 
&& n%100!=11 ? 1 : 2);
ga      nplurals=5; plural=(n==1 ? 0 : n==2 ? 1 : n>=3 && n<=6 ? 2 : n>=7 && 
n<=10 ? 3 : 4);
ro      nplurals=3; plural=(n==1 ? 0 : n==0 || (n!=1 && n%100>=1 && n%100<=19) 
? 1 : 2);
lt      nplurals=3; plural=(n%10==1 && (n%100<11 || n%100>19) ? 0 : n%10>=2 && 
n%10<=9 && (n%100<11 || n%100>19) ? 1 : 2);
ru      nplurals=4; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && 
(n%100<12 || n%100>14) ? 1 : n%10==0 || (n%10>=5 && n%10<=9) || (n%100>=11 && 
n%100<=14) ? 2 : 3);
uk      nplurals=4; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && 
(n%100<12 || n%100>14) ? 1 : n%10==0 || (n%10>=5 && n%10<=9) || (n%100>=11 && 
n%100<=14) ? 2 : 3);
be      nplurals=4; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && 
(n%100<12 || n%100>14) ? 1 : n%10==0 || (n%10>=5 && n%10<=9) || (n%100>=11 && 
n%100<=14) ? 2 : 3);
sr      nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && 
(n%100<12 || n%100>14) ? 1 : 2);
hr      nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && 
(n%100<12 || n%100>14) ? 1 : 2);
cs      nplurals=3; plural=(n==1 ? 0 : n>=2 && n<=4 ? 1 : 2);
sk      nplurals=3; plural=(n==1 ? 0 : n>=2 && n<=4 ? 1 : 2);
pl      nplurals=4; plural=(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<12 || 
n%100>14) ? 1 : (n!=1 && n%10<=1) || (n%10>=5 && n%10<=9) || (n%100>=12 && 
n%100<=14) ? 2 : 3);
sl      nplurals=4; plural=(n%100==1 ? 0 : n%100==2 ? 1 : n%100>=3 && n%100<=4 
? 2 : 3);

reply via email to

[Prev in Thread] Current Thread [Next in Thread]