bug-gettext
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[bug-gettext] [PATCH] msgfilter: Add 'quot' filter


From: Daiki Ueno
Subject: [bug-gettext] [PATCH] msgfilter: Add 'quot' filter
Date: Wed, 09 Apr 2014 19:53:28 +0900
User-agent: Gnus/5.13 (Gnus v5.13) Emacs/24.3.50 (gnu/linux)

Hi,

It is known that po/Rules-quot does not work properly with BSD Sed:

  [bug-gettext] msgfilter: Rules-quot implicity depends on GNU Sed.
  https://lists.gnu.org/archive/html/bug-gettext/2013-04/msg00028.html

The file basically does conversion from ASCII quotations ("...", `...',
'...') to Unicode quotations (“...”, ‘...’), using msgfilter sed.  So, I
wonder if this conversion might be worth an addition to the built-in
filters.  What do people think?  I'm attaching a initial patch for this.
If it makes sense, I'll prepare a filter for boldquot as well (and docs
and tests).

Regards,
--
Daiki Ueno
>From 6daf24b4c3c56915057796c7de2e518bc7d58dfb Mon Sep 17 00:00:00 2001
From: Daiki Ueno <address@hidden>
Date: Wed, 9 Apr 2014 19:25:58 +0900
Subject: [PATCH] msgfilter: Add 'quot' filter

---
 gettext-tools/src/Makefile.am    |   1 +
 gettext-tools/src/filter-quote.c | 153 +++++++++++++++++++++++++++++++++++++++
 gettext-tools/src/filters.h      |   8 ++
 gettext-tools/src/msgfilter.c    |   7 ++
 4 files changed, 169 insertions(+)
 create mode 100755 gettext-tools/src/filter-quote.c

diff --git a/gettext-tools/src/Makefile.am b/gettext-tools/src/Makefile.am
index fe44293..3d50c71 100644
--- a/gettext-tools/src/Makefile.am
+++ b/gettext-tools/src/Makefile.am
@@ -214,6 +214,7 @@ else
 msgfilter_SOURCES = ../woe32dll/c++msgfilter.cc
 endif
 msgfilter_SOURCES += filter-sr-latin.c
+msgfilter_SOURCES += filter-quote.c
 if !WOE32DLL
 msggrep_SOURCES = msggrep.c
 else
diff --git a/gettext-tools/src/filter-quote.c b/gettext-tools/src/filter-quote.c
new file mode 100755
index 0000000..bdfb3c3
--- /dev/null
+++ b/gettext-tools/src/filter-quote.c
@@ -0,0 +1,153 @@
+/* Convert ASCII quotation marks to Unicode quotation marks.
+   Copyright (C) 2014 Free Software Foundation, Inc.
+   Written by Daiki Ueno <address@hidden>, 2014.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+/* Specification.  */
+#include "filters.h"
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include "xalloc.h"
+
+void
+ascii_quote_to_unicode (const char *input, size_t input_len,
+                        char **output_p, size_t *output_len_p)
+{
+  size_t i;
+  const char *start, *p;
+  char *output, *r;
+  bool state = false;
+
+  start = input;
+
+  /* Large enough.  */
+  r = output = XNMALLOC (3 * input_len + 1, char);
+
+  for (i = 0; i < input_len; i++)
+    {
+      int j;
+
+      p = &input[i];
+      switch (*p)
+        {
+        case '"':
+          if (state)
+            {
+              if (*start == '"')
+                {
+                  if (p > start + 1)
+                    {
+                      /* U+201C: LEFT DOUBLE QUOTATION MARK */
+                      memcpy (r, "\xe2\x80\x9c", 3);
+                      r += 3;
+                      memcpy (r, start + 1, p - start - 1);
+                      r += p - start - 1;
+                      /* U+201D: RIGHT DOUBLE QUOTATION MARK */
+                      memcpy (r, "\xe2\x80\x9d", 3);
+                      r += 3;
+                    }
+                  else
+                    {
+                      /* Consider "" as "".  */
+                      memcpy (r, "\"\"", 2);
+                      r += 2;
+                    }
+                  start = p + 1;
+                  state = false;
+                }
+            }
+          else
+            {
+              memcpy (r, start, p - start);
+              r += p - start;
+              start = p;
+              state = true;
+            }
+          break;
+
+        case '`':
+          if (state)
+            {
+              if (*start == '`')
+                {
+                  memcpy (r, start, p - start);
+                  start = p;
+                }
+            }
+          else
+            {
+              memcpy (r, start, p - start);
+              r += p - start;
+              start = p;
+              state = true;
+            }
+          break;
+
+        case '\'':
+          if (state)
+            {
+              if (*start == '`'
+                  || (*start == '\''
+                      && (((start > input && *(start - 1) == ' ')
+                           && (i == input_len - 1 || *(p + 1) == ' '))
+                          || (start == input && i < input_len - 1
+                              && *(p + 1) == ' '))))
+                {
+                  /* U+2018: LEFT SINGLE QUOTATION MARK */
+                  memcpy (r, "\xe2\x80\x98", 3);
+                  r += 3;
+                  memcpy (r, start + 1, p - start - 1);
+                  r += p - start - 1;
+                  /* U+2019: RIGHT SINGLE QUOTATION MARK */
+                  memcpy (r, "\xe2\x80\x99", 3);
+                  r += 3;
+                  start = p + 1;
+                }
+              else
+                {
+                  memcpy (r, start, p - start);
+                  r += p - start;
+                  start = p;
+                }
+              state = false;
+            }
+          else if (start == input || *(start - 1) == ' ')
+            {
+              memcpy (r, start, p - start);
+              r += p - start;
+              start = p;
+              state = true;
+            }
+          break;
+        }
+    }
+
+  p = &input[i];
+  if (p > start)
+    {
+      memcpy (r, start, p - start);
+      r += p - start;
+    }
+  *r = '\0';
+
+  *output_p = output;
+  *output_len_p = r - output;
+}
diff --git a/gettext-tools/src/filters.h b/gettext-tools/src/filters.h
index 93128b0..1d47fbe 100644
--- a/gettext-tools/src/filters.h
+++ b/gettext-tools/src/filters.h
@@ -29,6 +29,14 @@ extern "C" {
 extern void serbian_to_latin (const char *input, size_t input_len,
                               char **output_p, size_t *output_len_p);
 
+/* Convert a string INPUT of INPUT_LEN bytes, converting ASCII quotation
+   marks to Unicode quotation marks.
+   Store the freshly allocated result in *OUTPUT_P and its length (in bytes)
+   in *OUTPUT_LEN_P.
+   Input and output are in UTF-8 encoding.  */
+extern void ascii_quote_to_unicode (const char *input, size_t input_len,
+                                    char **output_p, size_t *output_len_p);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/gettext-tools/src/msgfilter.c b/gettext-tools/src/msgfilter.c
index b92eef0..0cf76b8 100644
--- a/gettext-tools/src/msgfilter.c
+++ b/gettext-tools/src/msgfilter.c
@@ -349,6 +349,13 @@ There is NO WARRANTY, to the extent permitted by law.\n\
       /* Convert the input to UTF-8 first.  */
       result = iconv_msgdomain_list (result, po_charset_utf8, true, 
input_file);
     }
+  else if (strcmp (sub_name, "quot") == 0 && sub_argc == 1)
+    {
+      filter = ascii_quote_to_unicode;
+
+      /* Convert the input to UTF-8 first.  */
+      result = iconv_msgdomain_list (result, po_charset_utf8, true, 
input_file);
+    }
   else
     {
       filter = generic_filter;
-- 
1.9.0


reply via email to

[Prev in Thread] Current Thread [Next in Thread]