[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[bug-gnu-libiconv] [PATCH] Support nl_langinfo (CODESET) correctly on OS
From: |
KO Myung-Hun |
Subject: |
[bug-gnu-libiconv] [PATCH] Support nl_langinfo (CODESET) correctly on OS/2 |
Date: |
Mon, 29 Jul 2019 13:22:07 +0900 |
On OS/2, nl_langinfo (CODESET) returns a codeset as IBM-XXX style. So it's
needed to convert it to CPXXX style.
And it may return as ISOXXXX-X style. Add them to mapping table.
---
libcharset/lib/localcharset.c | 73 +++++++++++++++++++++++++----------
1 file changed, 53 insertions(+), 20 deletions(-)
diff --git a/libcharset/lib/localcharset.c b/libcharset/lib/localcharset.c
index da3ac45..40923fc 100644
--- a/libcharset/lib/localcharset.c
+++ b/libcharset/lib/localcharset.c
@@ -378,26 +378,41 @@ static const struct table_entry alias_table[] =
by Alex Taylor:
<http://altsan.org/os2/toolkits/uls/index.html#codepages>.
See also "IBM Globalization - Code page identifiers":
- <https://www-01.ibm.com/software/globalization/cp/cp_cpgid.html>. */
- { "CP1089", "ISO-8859-6" },
- { "CP1208", "UTF-8" },
- { "CP1381", "GB2312" },
- { "CP1386", "GBK" },
- { "CP3372", "EUC-JP" },
- { "CP813", "ISO-8859-7" },
- { "CP819", "ISO-8859-1" },
- { "CP878", "KOI8-R" },
- { "CP912", "ISO-8859-2" },
- { "CP913", "ISO-8859-3" },
- { "CP914", "ISO-8859-4" },
- { "CP915", "ISO-8859-5" },
- { "CP916", "ISO-8859-8" },
- { "CP920", "ISO-8859-9" },
- { "CP921", "ISO-8859-13" },
- { "CP923", "ISO-8859-15" },
- { "CP954", "EUC-JP" },
- { "CP964", "EUC-TW" },
- { "CP970", "EUC-KR" }
+ <https://www-01.ibm.com/software/globalization/cp/cp_cpgid.html>.
+ See also "__convcp() of kLIBC":
+
<http://trac.netlabs.org/libc/browser/branches/libc-0.6/src/emx/src/lib/locale/__convcp.c>,
+ or:
+
<https://github.com/bitwiseworks/libc/blob/master/src/emx/src/lib/locale/__convcp.c>.
*/
+ { "CP1089", "ISO-8859-6" },
+ { "CP1200", "UCS-2" },
+ { "CP1208", "UTF-8" },
+ { "CP1381", "GB2312" },
+ { "CP1383", "EUC-CN" },
+ { "CP1386", "GBK" },
+ { "CP3372", "EUC-JP" },
+ { "CP813", "ISO-8859-7" },
+ { "CP819", "ISO-8859-1" },
+ { "CP878", "KOI8-R" },
+ { "CP912", "ISO-8859-2" },
+ { "CP913", "ISO-8859-3" },
+ { "CP914", "ISO-8859-4" },
+ { "CP915", "ISO-8859-5" },
+ { "CP916", "ISO-8859-8" },
+ { "CP920", "ISO-8859-9" },
+ { "CP921", "ISO-8859-13" },
+ { "CP923", "ISO-8859-15" },
+ { "CP954", "EUC-JP" },
+ { "CP964", "EUC-TW" },
+ { "CP970", "EUC-KR" },
+ { "ISO8859-1", "ISO-8859-1" },
+ { "ISO8859-2", "ISO-8859-2" },
+ { "ISO8859-3", "ISO-8859-3" },
+ { "ISO8859-4", "ISO-8859-4" },
+ { "ISO8859-5", "ISO-8859-5" },
+ { "ISO8859-6", "ISO-8859-6" },
+ { "ISO8859-7", "ISO-8859-7" },
+ { "ISO8859-8", "ISO-8859-8" },
+ { "ISO8859-9", "ISO-8859-9" }
# define alias_table_defined
# endif
# if defined VMS /* OpenVMS */
@@ -751,6 +766,24 @@ locale_charset (void)
}
# endif
+# ifdef OS2
+ /* On OS/2, nl_langinfo (CODESET) returns IBM-XXX style normally. Convert it
+ to CPXXX style for mapping later except UCS-2LE and UCS-2BE. */
+ if (strcmp (codeset, "IBM-1200@endian=little") == 0)
+ return "UCS-2LE";
+ else if (strcmp (codeset, "IBM-1200@endian=big") == 0)
+ return "UCS-2BE";
+
+ if (strncmp (codeset, "IBM-", 4) == 0 && isdigit (codeset[4]))
+ {
+ static char buf[2 + 10 + 1];
+
+ snprintf (buf, sizeof (buf), "CP%s", codeset + 4);
+
+ codeset = buf;
+ }
+# endif
+
if (codeset == NULL)
/* The canonical name cannot be determined. */
codeset = "";
--
2.22.0
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [bug-gnu-libiconv] [PATCH] Support nl_langinfo (CODESET) correctly on OS/2,
KO Myung-Hun <=