[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[bug-gettext] [PATCH 1/3] Support for Python braced format strings.
From: |
Daiki Ueno |
Subject: |
[bug-gettext] [PATCH 1/3] Support for Python braced format strings. |
Date: |
Fri, 14 Dec 2012 17:47:42 +0900 |
---
gettext-tools/libgettextpo/ChangeLog | 4 +
gettext-tools/libgettextpo/Makefile.am | 1 +
gettext-tools/src/ChangeLog | 26 ++++
gettext-tools/src/FILES | 1 +
gettext-tools/src/Makefile.am | 1 +
gettext-tools/src/format.c | 1 +
gettext-tools/src/format.h | 1 +
gettext-tools/src/message.c | 2 +
gettext-tools/src/message.h | 3 +-
gettext-tools/src/x-python.c | 231 +++++++++++++++++++++++++++++++--
gettext-tools/src/x-python.h | 2 +-
gettext-tools/src/xgettext.c | 5 +
12 files changed, 265 insertions(+), 13 deletions(-)
diff --git a/gettext-tools/libgettextpo/ChangeLog
b/gettext-tools/libgettextpo/ChangeLog
index d4cdbb0..60d8e64 100644
--- a/gettext-tools/libgettextpo/ChangeLog
+++ b/gettext-tools/libgettextpo/ChangeLog
@@ -1,3 +1,7 @@
+2012-12-14 Daiki Ueno <address@hidden>
+
+ * Makefile.am (libgettextpo_la_AUXSOURCES): Add format-python-brace.c.
+
2010-11-07 Bruno Haible <address@hidden>
Rename gettext-po.h.in to gettext-po.in.h.
diff --git a/gettext-tools/libgettextpo/Makefile.am
b/gettext-tools/libgettextpo/Makefile.am
index c8fb05d..2da0d14 100644
--- a/gettext-tools/libgettextpo/Makefile.am
+++ b/gettext-tools/libgettextpo/Makefile.am
@@ -65,6 +65,7 @@ libgettextpo_la_AUXSOURCES = \
../src/format-c.c \
../src/format-sh.c \
../src/format-python.c \
+ ../src/format-python-brace.c \
../src/format-lisp.c \
../src/format-elisp.c \
../src/format-librep.c \
diff --git a/gettext-tools/src/ChangeLog b/gettext-tools/src/ChangeLog
index d0e66c7..c1534e3 100644
--- a/gettext-tools/src/ChangeLog
+++ b/gettext-tools/src/ChangeLog
@@ -1,3 +1,29 @@
+2012-12-14 Daiki Ueno <address@hidden>
+
+ * message.h (format_type): New enum value 'format_python'.
+ (NFORMATS): Increment.
+ * message.c (format_language): Add format_python_brace.
+ (format_language_pretty): Likewise.
+ * format.h (formatstring_python_brace): New declaration.
+ * format-python-brace.c: New file, based on format-perl-brace.c.
+ * format.c (formatstring_parsers): Add formatstring_python_brace.
+ * x-python.c (init_flag_table_python): Also register flags for
+ python-brace-format.
+ (struct token_buffer_ty): New type.
+ (token_buffer_alloc): New function.
+ (token_buffer_push_first): New function.
+ (token_buffer_push_last): New function.
+ (token_buffer_pull): New function.
+ (token_buffer_free): New function.
+ (skip_balanced): New function.
+ (extract_balanced): Handle python brace format. Add extra
+ argument BUFFER.
+ * x-python.h (SCANNERS_PYTHON): Refer to formatstring_python_brace.
+ * xgettext.c (xgettext_record_flag): Store format_python_brace
+ flags in flag_table_python.
+ * Makefile.am (FORMAT_SOURCE): Add format-python-brace.c.
+ * FILES: Update.
+
2012-06-03 Jim Meyering <address@hidden>
* msginit.c: Spelling fixes.
diff --git a/gettext-tools/src/FILES b/gettext-tools/src/FILES
index 9a41f48..08f3116 100644
--- a/gettext-tools/src/FILES
+++ b/gettext-tools/src/FILES
@@ -214,6 +214,7 @@ format-c.c Format string handling for C.
format-c-parse.h Format string handling for C, parsing routine.
format-sh.c Format string handling for Shell.
format-python.c Format string handling for Python.
+format-python-brace.c Format string handling for Python, braced syntax.
format-lisp.c Format string handling for Common Lisp.
format-elisp.c Format string handling for Emacs Lisp.
format-librep.c Format string handling for librep.
diff --git a/gettext-tools/src/Makefile.am b/gettext-tools/src/Makefile.am
index 87cc358..51df181 100644
--- a/gettext-tools/src/Makefile.am
+++ b/gettext-tools/src/Makefile.am
@@ -117,6 +117,7 @@ FORMAT_SOURCE += \
format-c.c format-c-parse.h \
format-sh.c \
format-python.c \
+ format-python-brace.c \
format-lisp.c \
format-elisp.c \
format-librep.c \
diff --git a/gettext-tools/src/format.c b/gettext-tools/src/format.c
index e6c5de9..a67e75c 100644
--- a/gettext-tools/src/format.c
+++ b/gettext-tools/src/format.c
@@ -38,6 +38,7 @@ struct formatstring_parser *formatstring_parsers[NFORMATS] =
/* format_objc */ &formatstring_objc,
/* format_sh */ &formatstring_sh,
/* format_python */ &formatstring_python,
+ /* format_python_brace */ &formatstring_python_brace,
/* format_lisp */ &formatstring_lisp,
/* format_elisp */ &formatstring_elisp,
/* format_librep */ &formatstring_librep,
diff --git a/gettext-tools/src/format.h b/gettext-tools/src/format.h
index 60f0adc..8179a54 100644
--- a/gettext-tools/src/format.h
+++ b/gettext-tools/src/format.h
@@ -99,6 +99,7 @@ extern DLL_VARIABLE struct formatstring_parser formatstring_c;
extern DLL_VARIABLE struct formatstring_parser formatstring_objc;
extern DLL_VARIABLE struct formatstring_parser formatstring_sh;
extern DLL_VARIABLE struct formatstring_parser formatstring_python;
+extern DLL_VARIABLE struct formatstring_parser formatstring_python_brace;
extern DLL_VARIABLE struct formatstring_parser formatstring_lisp;
extern DLL_VARIABLE struct formatstring_parser formatstring_elisp;
extern DLL_VARIABLE struct formatstring_parser formatstring_librep;
diff --git a/gettext-tools/src/message.c b/gettext-tools/src/message.c
index 5162b06..801a801 100644
--- a/gettext-tools/src/message.c
+++ b/gettext-tools/src/message.c
@@ -38,6 +38,7 @@ const char *const format_language[NFORMATS] =
/* format_objc */ "objc",
/* format_sh */ "sh",
/* format_python */ "python",
+ /* format_python_brace */ "python-brace",
/* format_lisp */ "lisp",
/* format_elisp */ "elisp",
/* format_librep */ "librep",
@@ -66,6 +67,7 @@ const char *const format_language_pretty[NFORMATS] =
/* format_objc */ "Objective C",
/* format_sh */ "Shell",
/* format_python */ "Python",
+ /* format_python_brace */ "Python brace",
/* format_lisp */ "Lisp",
/* format_elisp */ "Emacs Lisp",
/* format_librep */ "librep",
diff --git a/gettext-tools/src/message.h b/gettext-tools/src/message.h
index af9244a..f48e52f 100644
--- a/gettext-tools/src/message.h
+++ b/gettext-tools/src/message.h
@@ -47,6 +47,7 @@ enum format_type
format_objc,
format_sh,
format_python,
+ format_python_brace,
format_lisp,
format_elisp,
format_librep,
@@ -68,7 +69,7 @@ enum format_type
format_kde,
format_boost
};
-#define NFORMATS 24 /* Number of format_type enum values. */
+#define NFORMATS 25 /* Number of format_type enum values. */
extern DLL_VARIABLE const char *const format_language[NFORMATS];
extern DLL_VARIABLE const char *const format_language_pretty[NFORMATS];
diff --git a/gettext-tools/src/x-python.c b/gettext-tools/src/x-python.c
index aa6a7d6..879ffdd 100644
--- a/gettext-tools/src/x-python.c
+++ b/gettext-tools/src/x-python.c
@@ -1,5 +1,5 @@
/* xgettext Python backend.
- Copyright (C) 2002-2003, 2005-2011 Free Software Foundation, Inc.
+ Copyright (C) 2002-2003, 2005-2012 Free Software Foundation, Inc.
This file was written by Bruno Haible <address@hidden>, 2002.
@@ -133,6 +133,18 @@ init_flag_table_python ()
xgettext_record_flag ("dngettext:3:pass-python-format");
xgettext_record_flag ("_:1:pass-python-format");
/* xgettext_record_flag ("%:1:python-format"); // % is an infix operator! */
+
+ xgettext_record_flag ("gettext:1:pass-python-brace-format");
+ xgettext_record_flag ("ugettext:1:pass-python-brace-format");
+ xgettext_record_flag ("dgettext:2:pass-python-brace-format");
+ xgettext_record_flag ("ngettext:1:pass-python-brace-format");
+ xgettext_record_flag ("ngettext:2:pass-python-brace-format");
+ xgettext_record_flag ("ungettext:1:pass-python-brace-format");
+ xgettext_record_flag ("ungettext:2:pass-python-brace-format");
+ xgettext_record_flag ("dngettext:2:pass-python-brace-format");
+ xgettext_record_flag ("dngettext:3:pass-python-brace-format");
+ xgettext_record_flag ("_:1:pass-python-brace-format");
+ xgettext_record_flag (".format:1:python-brace-format");
}
@@ -990,6 +1002,7 @@ enum token_type_ty
token_type_lparen, /* ( */
token_type_rparen, /* ) */
token_type_comma, /* , */
+ token_type_period, /* . */
token_type_lbracket, /* [ */
token_type_rbracket, /* ] */
token_type_string, /* "abc", 'abc', """abc""", '''abc''' */
@@ -1403,7 +1416,7 @@ phase5_get (token_ty *tp)
if (!(c1 >= '0' && c1 <= '9'))
{
- tp->type = token_type_other;
+ tp->type = token_type_period;
return;
}
}
@@ -1646,6 +1659,132 @@ x_python_lex (token_ty *tp)
}
+/* A token buffer used as a lookahead buffer. */
+
+typedef struct token_buffer_ty token_buffer_ty;
+struct token_buffer_ty
+{
+ token_ty *items;
+ size_t first; /* first index */
+ size_t last; /* last index, exclusive */
+ size_t nitems;
+ token_buffer_ty *outer_buffer;
+};
+
+static token_buffer_ty *
+token_buffer_alloc (token_buffer_ty *outer_buffer)
+{
+ token_buffer_ty *result = XZALLOC (token_buffer_ty);
+ result->outer_buffer = outer_buffer;
+ return result;
+}
+
+/* Pushes the token TOKEN onto the beginning of the buffer BUFFER. */
+static void
+token_buffer_push_first (token_buffer_ty *buffer, token_ty *token)
+{
+ if (buffer->last >= buffer->nitems)
+ {
+ size_t nbytes;
+
+ buffer->nitems = 2 * buffer->nitems + 4;
+ nbytes = buffer->nitems * sizeof (token_ty);
+ buffer->items = xrealloc (buffer->items, nbytes);
+ }
+ memmove (&buffer->items[buffer->first + 1],
+ &buffer->items[buffer->first],
+ (buffer->last - buffer->first) * sizeof (token_ty));
+ buffer->last++;
+ memcpy (&buffer->items[buffer->first], token, sizeof (token_ty));
+}
+
+/* Pushes the token TOKEN onto the end of the buffer BUFFER. */
+static inline void
+token_buffer_push_last (token_buffer_ty *buffer, token_ty *token)
+{
+ if (buffer->last >= buffer->nitems)
+ {
+ size_t nbytes;
+
+ buffer->nitems = 2 * buffer->nitems + 4;
+ nbytes = buffer->nitems * sizeof (token_ty);
+ buffer->items = xrealloc (buffer->items, nbytes);
+ }
+ memcpy (&buffer->items[buffer->last++], token, sizeof (token_ty));
+}
+
+/* Pops the least recently pushed token from the buffer BUFFER and returns it.
+ Returns NULL if the buffer is empty. */
+static inline bool
+token_buffer_pull (token_buffer_ty *buffer, token_ty *token)
+{
+ if (buffer->last - buffer->first > 0)
+ {
+ memcpy (token, &buffer->items[buffer->first++], sizeof (token_ty));
+ return true;
+ }
+ if (buffer->outer_buffer)
+ return token_buffer_pull (buffer->outer_buffer, token);
+ return false;
+}
+
+/* Frees all resources allocated by buffer BUFFER. */
+static inline void
+token_buffer_free (token_buffer_ty *buffer)
+{
+ free (buffer->items);
+ free (buffer);
+}
+
+static bool
+skip_balanced (token_type_ty delim,
+ token_buffer_ty *buffer)
+{
+ for (;;)
+ {
+ token_ty token;
+
+ x_python_lex (&token);
+ token_buffer_push_last (buffer, &token);
+
+ switch (token.type)
+ {
+ case token_type_symbol:
+ case token_type_comma:
+ case token_type_string:
+ case token_type_other:
+ case token_type_period:
+ break;
+
+ case token_type_lparen:
+ if (skip_balanced (token_type_rparen, buffer))
+ return true;
+ break;
+
+ case token_type_rparen:
+ if (delim == token_type_rparen || delim == token_type_eof)
+ return false;
+ break;
+
+ case token_type_lbracket:
+ if (skip_balanced (token_type_rbracket, buffer))
+ return true;
+ break;
+
+ case token_type_rbracket:
+ if (delim == token_type_rbracket || delim == token_type_eof)
+ return false;
+ break;
+
+ case token_type_eof:
+ return true;
+
+ default:
+ abort ();
+ }
+ }
+}
+
/* ========================= Extracting strings. ========================== */
@@ -1660,9 +1799,9 @@ static flag_context_list_table_ty
*flag_context_list_table;
the grammar to the compiler.
Normal handling: Look for
- keyword ( ... msgid ... )
+ keyword ( ... msgid ... ) dot_keyword
Plural handling: Look for
- keyword ( ... msgid ... msgid_plural ... )
+ keyword ( ... msgid ... msgid_plural ... ) dot_keyword
We use recursion because the arguments before msgid or between msgid
and msgid_plural can contain subexpressions of the same form. */
@@ -1678,7 +1817,8 @@ extract_balanced (message_list_ty *mlp,
token_type_ty delim,
flag_context_ty outer_context,
flag_context_list_iterator_ty context_iter,
- struct arglist_parser *argparser)
+ struct arglist_parser *argparser,
+ token_buffer_ty *buffer)
{
/* Current argument number. */
int arg = 1;
@@ -1699,9 +1839,13 @@ extract_balanced (message_list_ty *mlp,
for (;;)
{
- token_ty token;
+ token_ty token, keyword_token;
+ bool is_from_buffer = false;
+
+ is_from_buffer = token_buffer_pull (buffer, &token);
+ if (!is_from_buffer)
+ x_python_lex (&token);
- x_python_lex (&token);
switch (token.type)
{
case token_type_symbol:
@@ -1713,6 +1857,7 @@ extract_balanced (message_list_ty *mlp,
== 0)
{
next_shapes = (const struct callshapes *) keyword_value;
+ keyword_token = token;
state = 1;
}
else
@@ -1723,18 +1868,75 @@ extract_balanced (message_list_ty *mlp,
flag_context_list_table_lookup (
flag_context_list_table,
token.string, strlen (token.string)));
- free (token.string);
+ if (state == 0 || is_from_buffer)
+ free (token.string);
continue;
case token_type_lparen:
+ if (state == 1 && !is_from_buffer)
+ {
+ token_ty next_token;
+ token_ty lparen_token, rparen_token;
+ char *dot_keyword;
+
+ token_buffer_push_last (buffer, &keyword_token);
+ token_buffer_push_last (buffer, &token);
+
+ if (skip_balanced (token_type_rparen, buffer))
+ {
+ xgettext_current_source_encoding = po_charset_utf8;
+ arglist_parser_done (argparser, arg);
+ xgettext_current_source_encoding =
xgettext_current_file_source_encoding;
+ token_buffer_free (buffer);
+ return true;
+ }
+
+ x_python_lex (&next_token);
+ if (next_token.type != token_type_period)
+ {
+ token_buffer_push_last (buffer, &next_token);
+ next_context_iter = null_context_list_iterator;
+ state = 0;
+ continue;
+ }
+
+ x_python_lex (&next_token);
+ if (next_token.type != token_type_symbol)
+ {
+ token_buffer_push_last (buffer, &next_token);
+ next_context_iter = null_context_list_iterator;
+ state = 0;
+ continue;
+ }
+
+ lparen_token.type = token_type_lparen;
+ lparen_token.line_number = next_token.line_number;
+ token_buffer_push_first (buffer, &lparen_token);
+
+ rparen_token.type = token_type_rparen;
+ rparen_token.line_number = next_token.line_number;
+ token_buffer_push_last (buffer, &rparen_token);
+
+ dot_keyword = xasprintf (".%s", next_token.string);
+ free (next_token.string);
+ next_token.string = dot_keyword;
+ token_buffer_push_first (buffer, &next_token);
+
+ next_context_iter = null_context_list_iterator;
+ state = 0;
+ continue;
+ }
+
if (extract_balanced (mlp, token_type_rparen,
inner_context, next_context_iter,
arglist_parser_alloc (mlp,
- state ? next_shapes :
NULL)))
+ state ? next_shapes :
NULL),
+ token_buffer_alloc (buffer)))
{
xgettext_current_source_encoding = po_charset_utf8;
arglist_parser_done (argparser, arg);
xgettext_current_source_encoding =
xgettext_current_file_source_encoding;
+ token_buffer_free (buffer);
return true;
}
next_context_iter = null_context_list_iterator;
@@ -1747,6 +1949,7 @@ extract_balanced (message_list_ty *mlp,
xgettext_current_source_encoding = po_charset_utf8;
arglist_parser_done (argparser, arg);
xgettext_current_source_encoding =
xgettext_current_file_source_encoding;
+ token_buffer_free (buffer);
return false;
}
next_context_iter = null_context_list_iterator;
@@ -1766,11 +1969,13 @@ extract_balanced (message_list_ty *mlp,
case token_type_lbracket:
if (extract_balanced (mlp, token_type_rbracket,
null_context, null_context_list_iterator,
- arglist_parser_alloc (mlp, NULL)))
+ arglist_parser_alloc (mlp, NULL),
+ token_buffer_alloc (buffer)))
{
xgettext_current_source_encoding = po_charset_utf8;
arglist_parser_done (argparser, arg);
xgettext_current_source_encoding =
xgettext_current_file_source_encoding;
+ token_buffer_free (buffer);
return true;
}
next_context_iter = null_context_list_iterator;
@@ -1783,6 +1988,7 @@ extract_balanced (message_list_ty *mlp,
xgettext_current_source_encoding = po_charset_utf8;
arglist_parser_done (argparser, arg);
xgettext_current_source_encoding =
xgettext_current_file_source_encoding;
+ token_buffer_free (buffer);
return false;
}
next_context_iter = null_context_list_iterator;
@@ -1815,8 +2021,10 @@ extract_balanced (message_list_ty *mlp,
xgettext_current_source_encoding = po_charset_utf8;
arglist_parser_done (argparser, arg);
xgettext_current_source_encoding =
xgettext_current_file_source_encoding;
+ token_buffer_free (buffer);
return true;
+ case token_type_period:
case token_type_other:
next_context_iter = null_context_list_iterator;
state = 0;
@@ -1869,7 +2077,8 @@ extract_python (FILE *f,
due to an unbalanced closing parenthesis, just restart it. */
while (!extract_balanced (mlp, token_type_eof,
null_context, null_context_list_iterator,
- arglist_parser_alloc (mlp, NULL)))
+ arglist_parser_alloc (mlp, NULL),
+ token_buffer_alloc (NULL)))
;
fp = NULL;
diff --git a/gettext-tools/src/x-python.h b/gettext-tools/src/x-python.h
index 14f8bc5..b70b048 100644
--- a/gettext-tools/src/x-python.h
+++ b/gettext-tools/src/x-python.h
@@ -32,7 +32,7 @@ extern "C" {
#define SCANNERS_PYTHON \
{ "Python", extract_python, \
- &flag_table_python, &formatstring_python, NULL }, \
+ &flag_table_python, &formatstring_python,
&formatstring_python_brace }, \
/* Scan a Python file and add its translatable strings to mdlp. */
extern void extract_python (FILE *fp, const char *real_filename,
diff --git a/gettext-tools/src/xgettext.c b/gettext-tools/src/xgettext.c
index eb3271f..45b27dc 100644
--- a/gettext-tools/src/xgettext.c
+++ b/gettext-tools/src/xgettext.c
@@ -1674,6 +1674,11 @@ xgettext_record_flag (const char *optionstring)
name_start, name_end,
argnum, value, pass);
break;
+ case format_python_brace:
+ flag_context_list_table_insert (&flag_table_python, 1,
+ name_start, name_end,
+ argnum, value, pass);
+ break;
case format_lisp:
flag_context_list_table_insert (&flag_table_lisp, 0,
name_start, name_end,
--
1.7.11.7