[bug-gettext] [PATCH 1/3] Support for Python braced format strings.

bug-gettext
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[bug-gettext] [PATCH 1/3] Support for Python braced format strings.

From:	Daiki Ueno
Subject:	[bug-gettext] [PATCH 1/3] Support for Python braced format strings.
Date:	Fri, 14 Dec 2012 17:47:42 +0900
---
 gettext-tools/libgettextpo/ChangeLog   |   4 +
 gettext-tools/libgettextpo/Makefile.am |   1 +
 gettext-tools/src/ChangeLog            |  26 ++++
 gettext-tools/src/FILES                |   1 +
 gettext-tools/src/Makefile.am          |   1 +
 gettext-tools/src/format.c             |   1 +
 gettext-tools/src/format.h             |   1 +
 gettext-tools/src/message.c            |   2 +
 gettext-tools/src/message.h            |   3 +-
 gettext-tools/src/x-python.c           | 231 +++++++++++++++++++++++++++++++--
 gettext-tools/src/x-python.h           |   2 +-
 gettext-tools/src/xgettext.c           |   5 +
 12 files changed, 265 insertions(+), 13 deletions(-)

diff --git a/gettext-tools/libgettextpo/ChangeLog 
b/gettext-tools/libgettextpo/ChangeLog
index d4cdbb0..60d8e64 100644
--- a/gettext-tools/libgettextpo/ChangeLog
+++ b/gettext-tools/libgettextpo/ChangeLog
@@ -1,3 +1,7 @@
+2012-12-14  Daiki Ueno  <address@hidden>
+
+       * Makefile.am (libgettextpo_la_AUXSOURCES): Add format-python-brace.c.
+
 2010-11-07  Bruno Haible  <address@hidden>
 
        Rename gettext-po.h.in to gettext-po.in.h.
diff --git a/gettext-tools/libgettextpo/Makefile.am 
b/gettext-tools/libgettextpo/Makefile.am
index c8fb05d..2da0d14 100644
--- a/gettext-tools/libgettextpo/Makefile.am
+++ b/gettext-tools/libgettextpo/Makefile.am
@@ -65,6 +65,7 @@ libgettextpo_la_AUXSOURCES = \
   ../src/format-c.c \
   ../src/format-sh.c \
   ../src/format-python.c \
+  ../src/format-python-brace.c \
   ../src/format-lisp.c \
   ../src/format-elisp.c \
   ../src/format-librep.c \
diff --git a/gettext-tools/src/ChangeLog b/gettext-tools/src/ChangeLog
index d0e66c7..c1534e3 100644
--- a/gettext-tools/src/ChangeLog
+++ b/gettext-tools/src/ChangeLog
@@ -1,3 +1,29 @@
+2012-12-14  Daiki Ueno  <address@hidden>
+
+       * message.h (format_type): New enum value 'format_python'.
+       (NFORMATS): Increment.
+       * message.c (format_language): Add format_python_brace.
+       (format_language_pretty): Likewise.
+       * format.h (formatstring_python_brace): New declaration.
+       * format-python-brace.c: New file, based on format-perl-brace.c.
+       * format.c (formatstring_parsers): Add formatstring_python_brace.
+       * x-python.c (init_flag_table_python): Also register flags for
+       python-brace-format.
+       (struct token_buffer_ty): New type.
+       (token_buffer_alloc): New function.
+       (token_buffer_push_first): New function.
+       (token_buffer_push_last): New function.
+       (token_buffer_pull): New function.
+       (token_buffer_free): New function.
+       (skip_balanced): New function.
+       (extract_balanced): Handle python brace format.  Add extra
+       argument BUFFER.
+       * x-python.h (SCANNERS_PYTHON): Refer to formatstring_python_brace.
+       * xgettext.c (xgettext_record_flag): Store format_python_brace
+       flags in flag_table_python.
+       * Makefile.am (FORMAT_SOURCE): Add format-python-brace.c.
+       * FILES: Update.
+
 2012-06-03  Jim Meyering  <address@hidden>
 
        * msginit.c: Spelling fixes.
diff --git a/gettext-tools/src/FILES b/gettext-tools/src/FILES
index 9a41f48..08f3116 100644
--- a/gettext-tools/src/FILES
+++ b/gettext-tools/src/FILES
@@ -214,6 +214,7 @@ format-c.c             Format string handling for C.
 format-c-parse.h         Format string handling for C, parsing routine.
 format-sh.c            Format string handling for Shell.
 format-python.c        Format string handling for Python.
+format-python-brace.c  Format string handling for Python, braced syntax.
 format-lisp.c          Format string handling for Common Lisp.
 format-elisp.c         Format string handling for Emacs Lisp.
 format-librep.c        Format string handling for librep.
diff --git a/gettext-tools/src/Makefile.am b/gettext-tools/src/Makefile.am
index 87cc358..51df181 100644
--- a/gettext-tools/src/Makefile.am
+++ b/gettext-tools/src/Makefile.am
@@ -117,6 +117,7 @@ FORMAT_SOURCE += \
   format-c.c format-c-parse.h \
   format-sh.c \
   format-python.c \
+  format-python-brace.c \
   format-lisp.c \
   format-elisp.c \
   format-librep.c \
diff --git a/gettext-tools/src/format.c b/gettext-tools/src/format.c
index e6c5de9..a67e75c 100644
--- a/gettext-tools/src/format.c
+++ b/gettext-tools/src/format.c
@@ -38,6 +38,7 @@ struct formatstring_parser *formatstring_parsers[NFORMATS] =
   /* format_objc */             &formatstring_objc,
   /* format_sh */               &formatstring_sh,
   /* format_python */           &formatstring_python,
+  /* format_python_brace */     &formatstring_python_brace,
   /* format_lisp */             &formatstring_lisp,
   /* format_elisp */            &formatstring_elisp,
   /* format_librep */           &formatstring_librep,
diff --git a/gettext-tools/src/format.h b/gettext-tools/src/format.h
index 60f0adc..8179a54 100644
--- a/gettext-tools/src/format.h
+++ b/gettext-tools/src/format.h
@@ -99,6 +99,7 @@ extern DLL_VARIABLE struct formatstring_parser formatstring_c;
 extern DLL_VARIABLE struct formatstring_parser formatstring_objc;
 extern DLL_VARIABLE struct formatstring_parser formatstring_sh;
 extern DLL_VARIABLE struct formatstring_parser formatstring_python;
+extern DLL_VARIABLE struct formatstring_parser formatstring_python_brace;
 extern DLL_VARIABLE struct formatstring_parser formatstring_lisp;
 extern DLL_VARIABLE struct formatstring_parser formatstring_elisp;
 extern DLL_VARIABLE struct formatstring_parser formatstring_librep;
diff --git a/gettext-tools/src/message.c b/gettext-tools/src/message.c
index 5162b06..801a801 100644
--- a/gettext-tools/src/message.c
+++ b/gettext-tools/src/message.c
@@ -38,6 +38,7 @@ const char *const format_language[NFORMATS] =
   /* format_objc */             "objc",
   /* format_sh */               "sh",
   /* format_python */           "python",
+  /* format_python_brace */     "python-brace",
   /* format_lisp */             "lisp",
   /* format_elisp */            "elisp",
   /* format_librep */           "librep",
@@ -66,6 +67,7 @@ const char *const format_language_pretty[NFORMATS] =
   /* format_objc */             "Objective C",
   /* format_sh */               "Shell",
   /* format_python */           "Python",
+  /* format_python_brace */     "Python brace",
   /* format_lisp */             "Lisp",
   /* format_elisp */            "Emacs Lisp",
   /* format_librep */           "librep",
diff --git a/gettext-tools/src/message.h b/gettext-tools/src/message.h
index af9244a..f48e52f 100644
--- a/gettext-tools/src/message.h
+++ b/gettext-tools/src/message.h
@@ -47,6 +47,7 @@ enum format_type
   format_objc,
   format_sh,
   format_python,
+  format_python_brace,
   format_lisp,
   format_elisp,
   format_librep,
@@ -68,7 +69,7 @@ enum format_type
   format_kde,
   format_boost
 };
-#define NFORMATS 24     /* Number of format_type enum values.  */
+#define NFORMATS 25     /* Number of format_type enum values.  */
 extern DLL_VARIABLE const char *const format_language[NFORMATS];
 extern DLL_VARIABLE const char *const format_language_pretty[NFORMATS];
 
diff --git a/gettext-tools/src/x-python.c b/gettext-tools/src/x-python.c
index aa6a7d6..879ffdd 100644
--- a/gettext-tools/src/x-python.c
+++ b/gettext-tools/src/x-python.c
@@ -1,5 +1,5 @@
 /* xgettext Python backend.
-   Copyright (C) 2002-2003, 2005-2011 Free Software Foundation, Inc.
+   Copyright (C) 2002-2003, 2005-2012 Free Software Foundation, Inc.
 
    This file was written by Bruno Haible <address@hidden>, 2002.
 
@@ -133,6 +133,18 @@ init_flag_table_python ()
   xgettext_record_flag ("dngettext:3:pass-python-format");
   xgettext_record_flag ("_:1:pass-python-format");
   /* xgettext_record_flag ("%:1:python-format"); // % is an infix operator! */
+
+  xgettext_record_flag ("gettext:1:pass-python-brace-format");
+  xgettext_record_flag ("ugettext:1:pass-python-brace-format");
+  xgettext_record_flag ("dgettext:2:pass-python-brace-format");
+  xgettext_record_flag ("ngettext:1:pass-python-brace-format");
+  xgettext_record_flag ("ngettext:2:pass-python-brace-format");
+  xgettext_record_flag ("ungettext:1:pass-python-brace-format");
+  xgettext_record_flag ("ungettext:2:pass-python-brace-format");
+  xgettext_record_flag ("dngettext:2:pass-python-brace-format");
+  xgettext_record_flag ("dngettext:3:pass-python-brace-format");
+  xgettext_record_flag ("_:1:pass-python-brace-format");
+  xgettext_record_flag (".format:1:python-brace-format");
 }
 
 
@@ -990,6 +1002,7 @@ enum token_type_ty
   token_type_lparen,            /* ( */
   token_type_rparen,            /* ) */
   token_type_comma,             /* , */
+  token_type_period,            /* . */
   token_type_lbracket,          /* [ */
   token_type_rbracket,          /* ] */
   token_type_string,            /* "abc", 'abc', """abc""", '''abc''' */
@@ -1403,7 +1416,7 @@ phase5_get (token_ty *tp)
             if (!(c1 >= '0' && c1 <= '9'))
               {
 
-                tp->type = token_type_other;
+                tp->type = token_type_period;
                 return;
               }
           }
@@ -1646,6 +1659,132 @@ x_python_lex (token_ty *tp)
 }
 
 
+/* A token buffer used as a lookahead buffer.  */
+
+typedef struct token_buffer_ty token_buffer_ty;
+struct token_buffer_ty
+{
+  token_ty *items;
+  size_t first;                 /* first index */
+  size_t last;                  /* last index, exclusive */
+  size_t nitems;
+  token_buffer_ty *outer_buffer;
+};
+
+static token_buffer_ty *
+token_buffer_alloc (token_buffer_ty *outer_buffer)
+{
+  token_buffer_ty *result = XZALLOC (token_buffer_ty);
+  result->outer_buffer = outer_buffer;
+  return result;
+}
+
+/* Pushes the token TOKEN onto the beginning of the buffer BUFFER.  */
+static void
+token_buffer_push_first (token_buffer_ty *buffer, token_ty *token)
+{
+  if (buffer->last >= buffer->nitems)
+    {
+      size_t nbytes;
+
+      buffer->nitems = 2 * buffer->nitems + 4;
+      nbytes = buffer->nitems * sizeof (token_ty);
+      buffer->items = xrealloc (buffer->items, nbytes);
+    }
+  memmove (&buffer->items[buffer->first + 1],
+           &buffer->items[buffer->first],
+           (buffer->last - buffer->first) * sizeof (token_ty));
+  buffer->last++;
+  memcpy (&buffer->items[buffer->first], token, sizeof (token_ty));
+}
+
+/* Pushes the token TOKEN onto the end of the buffer BUFFER.  */
+static inline void
+token_buffer_push_last (token_buffer_ty *buffer, token_ty *token)
+{
+  if (buffer->last >= buffer->nitems)
+    {
+      size_t nbytes;
+
+      buffer->nitems = 2 * buffer->nitems + 4;
+      nbytes = buffer->nitems * sizeof (token_ty);
+      buffer->items = xrealloc (buffer->items, nbytes);
+    }
+  memcpy (&buffer->items[buffer->last++], token, sizeof (token_ty));
+}
+
+/* Pops the least recently pushed token from the buffer BUFFER and returns it.
+   Returns NULL if the buffer is empty.  */
+static inline bool
+token_buffer_pull (token_buffer_ty *buffer, token_ty *token)
+{
+  if (buffer->last - buffer->first > 0)
+    {
+      memcpy (token, &buffer->items[buffer->first++], sizeof (token_ty));
+      return true;
+    }
+  if (buffer->outer_buffer)
+    return token_buffer_pull (buffer->outer_buffer, token);
+  return false;
+}
+
+/* Frees all resources allocated by buffer BUFFER.  */
+static inline void
+token_buffer_free (token_buffer_ty *buffer)
+{
+  free (buffer->items);
+  free (buffer);
+}
+
+static bool
+skip_balanced (token_type_ty delim,
+               token_buffer_ty *buffer)
+{
+  for (;;)
+    {
+      token_ty token;
+
+      x_python_lex (&token);
+      token_buffer_push_last (buffer, &token);
+
+      switch (token.type)
+        {
+        case token_type_symbol:
+        case token_type_comma:
+        case token_type_string:
+        case token_type_other:
+        case token_type_period:
+          break;
+
+        case token_type_lparen:
+          if (skip_balanced (token_type_rparen, buffer))
+            return true;
+          break;
+
+        case token_type_rparen:
+          if (delim == token_type_rparen || delim == token_type_eof)
+            return false;
+          break;
+
+        case token_type_lbracket:
+          if (skip_balanced (token_type_rbracket, buffer))
+            return true;
+          break;
+
+        case token_type_rbracket:
+          if (delim == token_type_rbracket || delim == token_type_eof)
+            return false;
+          break;
+
+        case token_type_eof:
+          return true;
+
+        default:
+          abort ();
+        }
+    }
+}
+
 /* ========================= Extracting strings.  ========================== */
 
 
@@ -1660,9 +1799,9 @@ static flag_context_list_table_ty 
*flag_context_list_table;
    the grammar to the compiler.
 
      Normal handling: Look for
-       keyword ( ... msgid ... )
+       keyword ( ... msgid ... ) dot_keyword
      Plural handling: Look for
-       keyword ( ... msgid ... msgid_plural ... )
+       keyword ( ... msgid ... msgid_plural ... ) dot_keyword
 
    We use recursion because the arguments before msgid or between msgid
    and msgid_plural can contain subexpressions of the same form.  */
@@ -1678,7 +1817,8 @@ extract_balanced (message_list_ty *mlp,
                   token_type_ty delim,
                   flag_context_ty outer_context,
                   flag_context_list_iterator_ty context_iter,
-                  struct arglist_parser *argparser)
+                  struct arglist_parser *argparser,
+                  token_buffer_ty *buffer)
 {
   /* Current argument number.  */
   int arg = 1;
@@ -1699,9 +1839,13 @@ extract_balanced (message_list_ty *mlp,
 
   for (;;)
     {
-      token_ty token;
+      token_ty token, keyword_token;
+      bool is_from_buffer = false;
+
+      is_from_buffer = token_buffer_pull (buffer, &token);
+      if (!is_from_buffer)
+        x_python_lex (&token);
 
-      x_python_lex (&token);
       switch (token.type)
         {
         case token_type_symbol:
@@ -1713,6 +1857,7 @@ extract_balanced (message_list_ty *mlp,
                 == 0)
               {
                 next_shapes = (const struct callshapes *) keyword_value;
+                keyword_token = token;
                 state = 1;
               }
             else
@@ -1723,18 +1868,75 @@ extract_balanced (message_list_ty *mlp,
               flag_context_list_table_lookup (
                 flag_context_list_table,
                 token.string, strlen (token.string)));
-          free (token.string);
+          if (state == 0 || is_from_buffer)
+            free (token.string);
           continue;
 
         case token_type_lparen:
+          if (state == 1 && !is_from_buffer)
+            {
+              token_ty next_token;
+              token_ty lparen_token, rparen_token;
+              char *dot_keyword;
+
+              token_buffer_push_last (buffer, &keyword_token);
+              token_buffer_push_last (buffer, &token);
+
+              if (skip_balanced (token_type_rparen, buffer))
+                {
+                  xgettext_current_source_encoding = po_charset_utf8;
+                  arglist_parser_done (argparser, arg);
+                  xgettext_current_source_encoding = 
xgettext_current_file_source_encoding;
+                  token_buffer_free (buffer);
+                  return true;
+                }
+
+              x_python_lex (&next_token);
+              if (next_token.type != token_type_period)
+                {
+                  token_buffer_push_last (buffer, &next_token);
+                  next_context_iter = null_context_list_iterator;
+                  state = 0;
+                  continue;
+                }
+
+              x_python_lex (&next_token);
+              if (next_token.type != token_type_symbol)
+                {
+                  token_buffer_push_last (buffer, &next_token);
+                  next_context_iter = null_context_list_iterator;
+                  state = 0;
+                  continue;
+                }
+
+              lparen_token.type = token_type_lparen;
+              lparen_token.line_number = next_token.line_number;
+              token_buffer_push_first (buffer, &lparen_token);
+
+              rparen_token.type = token_type_rparen;
+              rparen_token.line_number = next_token.line_number;
+              token_buffer_push_last (buffer, &rparen_token);
+
+              dot_keyword = xasprintf (".%s", next_token.string);
+              free (next_token.string);
+              next_token.string = dot_keyword;
+              token_buffer_push_first (buffer, &next_token);
+
+              next_context_iter = null_context_list_iterator;
+              state = 0;
+              continue;
+            }
+
           if (extract_balanced (mlp, token_type_rparen,
                                 inner_context, next_context_iter,
                                 arglist_parser_alloc (mlp,
-                                                      state ? next_shapes : 
NULL)))
+                                                      state ? next_shapes : 
NULL),
+                                token_buffer_alloc (buffer)))
             {
               xgettext_current_source_encoding = po_charset_utf8;
               arglist_parser_done (argparser, arg);
               xgettext_current_source_encoding = 
xgettext_current_file_source_encoding;
+              token_buffer_free (buffer);
               return true;
             }
           next_context_iter = null_context_list_iterator;
@@ -1747,6 +1949,7 @@ extract_balanced (message_list_ty *mlp,
               xgettext_current_source_encoding = po_charset_utf8;
               arglist_parser_done (argparser, arg);
               xgettext_current_source_encoding = 
xgettext_current_file_source_encoding;
+              token_buffer_free (buffer);
               return false;
             }
           next_context_iter = null_context_list_iterator;
@@ -1766,11 +1969,13 @@ extract_balanced (message_list_ty *mlp,
         case token_type_lbracket:
           if (extract_balanced (mlp, token_type_rbracket,
                                 null_context, null_context_list_iterator,
-                                arglist_parser_alloc (mlp, NULL)))
+                                arglist_parser_alloc (mlp, NULL),
+                                token_buffer_alloc (buffer)))
             {
               xgettext_current_source_encoding = po_charset_utf8;
               arglist_parser_done (argparser, arg);
               xgettext_current_source_encoding = 
xgettext_current_file_source_encoding;
+              token_buffer_free (buffer);
               return true;
             }
           next_context_iter = null_context_list_iterator;
@@ -1783,6 +1988,7 @@ extract_balanced (message_list_ty *mlp,
               xgettext_current_source_encoding = po_charset_utf8;
               arglist_parser_done (argparser, arg);
               xgettext_current_source_encoding = 
xgettext_current_file_source_encoding;
+              token_buffer_free (buffer);
               return false;
             }
           next_context_iter = null_context_list_iterator;
@@ -1815,8 +2021,10 @@ extract_balanced (message_list_ty *mlp,
           xgettext_current_source_encoding = po_charset_utf8;
           arglist_parser_done (argparser, arg);
           xgettext_current_source_encoding = 
xgettext_current_file_source_encoding;
+          token_buffer_free (buffer);
           return true;
 
+        case token_type_period:
         case token_type_other:
           next_context_iter = null_context_list_iterator;
           state = 0;
@@ -1869,7 +2077,8 @@ extract_python (FILE *f,
      due to an unbalanced closing parenthesis, just restart it.  */
   while (!extract_balanced (mlp, token_type_eof,
                             null_context, null_context_list_iterator,
-                            arglist_parser_alloc (mlp, NULL)))
+                            arglist_parser_alloc (mlp, NULL),
+                            token_buffer_alloc (NULL)))
     ;
 
   fp = NULL;
diff --git a/gettext-tools/src/x-python.h b/gettext-tools/src/x-python.h
index 14f8bc5..b70b048 100644
--- a/gettext-tools/src/x-python.h
+++ b/gettext-tools/src/x-python.h
@@ -32,7 +32,7 @@ extern "C" {
 
 #define SCANNERS_PYTHON \
   { "Python",           extract_python,                                   \
-                        &flag_table_python, &formatstring_python, NULL }, \
+                        &flag_table_python, &formatstring_python, 
&formatstring_python_brace }, \
 
 /* Scan a Python file and add its translatable strings to mdlp.  */
 extern void extract_python (FILE *fp, const char *real_filename,
diff --git a/gettext-tools/src/xgettext.c b/gettext-tools/src/xgettext.c
index eb3271f..45b27dc 100644
--- a/gettext-tools/src/xgettext.c
+++ b/gettext-tools/src/xgettext.c
@@ -1674,6 +1674,11 @@ xgettext_record_flag (const char *optionstring)
                                                     name_start, name_end,
                                                     argnum, value, pass);
                     break;
+                  case format_python_brace:
+                    flag_context_list_table_insert (&flag_table_python, 1,
+                                                    name_start, name_end,
+                                                    argnum, value, pass);
+                    break;
                   case format_lisp:
                     flag_context_list_table_insert (&flag_table_lisp, 0,
                                                     name_start, name_end,
-- 
1.7.11.7
[Prev in Thread]
Current Thread
[Next in Thread]
[bug-gettext] [PATCH 0/3] Support for Python format strings in braced syntax, Daiki Ueno, 2012/12/14
- [bug-gettext] [PATCH 3/3] Tests for python-brace-format., Daiki Ueno, 2012/12/14
- [bug-gettext] [PATCH 1/3] Support for Python braced format strings., Daiki Ueno <=
- [bug-gettext] [PATCH 2/3] Document python-brace-for mat., Daiki Ueno, 2012/12/14
- Re: [bug-gettext] [PATCH 0/3] Support for Python format strings in braced syntax, Stefano Lattarini, 2012/12/14
  - Re: [bug-gettext] [PATCH 0/3] Support for Python format strings in braced syntax, Daiki Ueno, 2012/12/14
    - Re: [bug-gettext] [PATCH 0/3] Support for Python format strings in braced syntax, Stefano Lattarini, 2012/12/14
Prev by Date: [bug-gettext] [PATCH 3/3] Tests for python-brace-format.
Next by Date: [bug-gettext] [PATCH 2/3] Document python-brace-for mat.
Previous by thread: [bug-gettext] [PATCH 3/3] Tests for python-brace-format.
Next by thread: [bug-gettext] [PATCH 2/3] Document python-brace-for mat.
Index(es):
- Date
- Thread