bug-gnu-libiconv
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[bug-gnu-libiconv] [PATCH] Support nl_langinfo (CODESET) correctly on OS


From: KO Myung-Hun
Subject: [bug-gnu-libiconv] [PATCH] Support nl_langinfo (CODESET) correctly on OS/2
Date: Mon, 29 Jul 2019 13:22:07 +0900

On OS/2, nl_langinfo (CODESET) returns a codeset as IBM-XXX style. So it's
needed to convert it to CPXXX style.

And it may return as ISOXXXX-X style. Add them to mapping table.
---
 libcharset/lib/localcharset.c | 73 +++++++++++++++++++++++++----------
 1 file changed, 53 insertions(+), 20 deletions(-)

diff --git a/libcharset/lib/localcharset.c b/libcharset/lib/localcharset.c
index da3ac45..40923fc 100644
--- a/libcharset/lib/localcharset.c
+++ b/libcharset/lib/localcharset.c
@@ -378,26 +378,41 @@ static const struct table_entry alias_table[] =
        by Alex Taylor:
        <http://altsan.org/os2/toolkits/uls/index.html#codepages>.
        See also "IBM Globalization - Code page identifiers":
-       <https://www-01.ibm.com/software/globalization/cp/cp_cpgid.html>.  */
-    { "CP1089", "ISO-8859-6" },
-    { "CP1208", "UTF-8" },
-    { "CP1381", "GB2312" },
-    { "CP1386", "GBK" },
-    { "CP3372", "EUC-JP" },
-    { "CP813",  "ISO-8859-7" },
-    { "CP819",  "ISO-8859-1" },
-    { "CP878",  "KOI8-R" },
-    { "CP912",  "ISO-8859-2" },
-    { "CP913",  "ISO-8859-3" },
-    { "CP914",  "ISO-8859-4" },
-    { "CP915",  "ISO-8859-5" },
-    { "CP916",  "ISO-8859-8" },
-    { "CP920",  "ISO-8859-9" },
-    { "CP921",  "ISO-8859-13" },
-    { "CP923",  "ISO-8859-15" },
-    { "CP954",  "EUC-JP" },
-    { "CP964",  "EUC-TW" },
-    { "CP970",  "EUC-KR" }
+       <https://www-01.ibm.com/software/globalization/cp/cp_cpgid.html>.
+       See also "__convcp() of kLIBC":
+       
<http://trac.netlabs.org/libc/browser/branches/libc-0.6/src/emx/src/lib/locale/__convcp.c>,
+       or:
+       
<https://github.com/bitwiseworks/libc/blob/master/src/emx/src/lib/locale/__convcp.c>.
  */
+    { "CP1089",         "ISO-8859-6" },
+    { "CP1200",         "UCS-2" },
+    { "CP1208",         "UTF-8" },
+    { "CP1381",         "GB2312" },
+    { "CP1383",         "EUC-CN" },
+    { "CP1386",         "GBK" },
+    { "CP3372",         "EUC-JP" },
+    { "CP813",          "ISO-8859-7" },
+    { "CP819",          "ISO-8859-1" },
+    { "CP878",          "KOI8-R" },
+    { "CP912",          "ISO-8859-2" },
+    { "CP913",          "ISO-8859-3" },
+    { "CP914",          "ISO-8859-4" },
+    { "CP915",          "ISO-8859-5" },
+    { "CP916",          "ISO-8859-8" },
+    { "CP920",          "ISO-8859-9" },
+    { "CP921",          "ISO-8859-13" },
+    { "CP923",          "ISO-8859-15" },
+    { "CP954",          "EUC-JP" },
+    { "CP964",          "EUC-TW" },
+    { "CP970",          "EUC-KR" },
+    { "ISO8859-1",      "ISO-8859-1" },
+    { "ISO8859-2",      "ISO-8859-2" },
+    { "ISO8859-3",      "ISO-8859-3" },
+    { "ISO8859-4",      "ISO-8859-4" },
+    { "ISO8859-5",      "ISO-8859-5" },
+    { "ISO8859-6",      "ISO-8859-6" },
+    { "ISO8859-7",      "ISO-8859-7" },
+    { "ISO8859-8",      "ISO-8859-8" },
+    { "ISO8859-9",      "ISO-8859-9" }
 #   define alias_table_defined
 #  endif
 #  if defined VMS                                           /* OpenVMS */
@@ -751,6 +766,24 @@ locale_charset (void)
     }
 #  endif
 
+#  ifdef OS2
+  /* On OS/2, nl_langinfo (CODESET) returns IBM-XXX style normally. Convert it
+     to CPXXX style for mapping later except UCS-2LE and UCS-2BE.  */
+  if (strcmp (codeset, "IBM-1200@endian=little") == 0)
+    return "UCS-2LE";
+  else if (strcmp (codeset, "IBM-1200@endian=big") == 0)
+    return "UCS-2BE";
+
+  if (strncmp (codeset, "IBM-", 4) == 0 && isdigit (codeset[4]))
+    {
+      static char buf[2 + 10 + 1];
+
+      snprintf (buf, sizeof (buf), "CP%s", codeset + 4);
+
+      codeset = buf;
+    }
+#  endif
+
   if (codeset == NULL)
     /* The canonical name cannot be determined.  */
     codeset = "";
-- 
2.22.0




reply via email to

[Prev in Thread] Current Thread [Next in Thread]