discuss-gnustep
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: _DefaultStringEncoding


From: Bruno Haible
Subject: Re: _DefaultStringEncoding
Date: Mon, 8 Dec 2003 19:57:05 +0100
User-agent: KMail/1.5

Richard Frith-Macdonald wrote on 2003-10-18:

> > 3) If GNUSTEP_STRING_ENCODING is not set, why is the default value
> >    (set in Unicode.m:580) ISO-8859-1? On POSIX systems, all programs
> >    are expected to interpret file names and file contents according to
> >    the encoding given by the current locale (nl_langinfo (CODESET)).
> >    IMO this codeset should be taken and transformed into the GNUstep
> >    specific equivalent name. I'm using a de_DE.UTF-8 locale and all
> >    my local files are UTF-8 encoded.
>
> ... I'd be happy to accept a patch to make this change
> as long as nobody knows good reason not to.

Here is the patch to that effect. Tested for me in an UTF-8 locale.
You need to regenerate config.h.in and configure after applying the patch.


2003-12-07  Bruno Haible  <bruno@clisp.org>

        * Source/Additions/Unicode.m (GetDefEncoding): Use the result of
        nl_langinfo(CODESET) before falling back on ISO-8859-1.
        * config/codeset.m4: New file, taken from GNU gettext.
        * configure.ac: Include it, and invoke AM_LANGINFO_CODESET.

*** gnustep-base-1.8.0/Source/Additions/Unicode.m.bak   2003-09-16 
04:56:04.000000000 +0200
--- gnustep-base-1.8.0/Source/Additions/Unicode.m       2003-12-07 
23:42:24.000000000 +0100
***************
*** 45,50 ****
--- 45,54 ----
  #include <stdlib.h>
  #include <string.h>
  
+ #if HAVE_LANGINFO_CODESET
+ #include <langinfo.h>
+ #endif
+ 
  typedef struct {unichar from; unsigned char to;} _ucc_;
  
  #include "GNUstepBase/unicode/cyrillic.h"
***************
*** 577,583 ****
        if (defEnc == GSUndefinedEncoding)
        {
          /* Encoding not set */
!         defEnc = NSISOLatin1StringEncoding;
        }
        else if (GSEncodingSupported(defEnc) == NO)
        {
--- 581,683 ----
        if (defEnc == GSUndefinedEncoding)
        {
          /* Encoding not set */
! #if HAVE_LANGINFO_CODESET
!         /* Take it from the system locale information.  */
!         encoding = nl_langinfo(CODESET);
!         if (strcmp(encoding, "ANSI_X3.4-1968") == 0 /* glibc */
!             || strcmp(encoding, "ISO_646.IRV:1983") == 0 /* glibc */
!             || strcmp(encoding, "646") == 0 /* Solaris NetBSD */)
!           defEnc = NSASCIIStringEncoding;
!         else if (strcmp(encoding, "EUC-JP") == 0 /* glibc */
!                  || strcmp(encoding, "eucJP") == 0 /* HP-UX IRIX OSF/1 
Solaris NetBSD */
!                  || strcmp(encoding, "IBM-eucJP") == 0 /* AIX */)
!           defEnc = NSJapaneseEUCStringEncoding;
!         else if (strcmp(encoding, "UTF-8") == 0 /* glibc AIX OSF/1 Solaris */
!                  || strcmp(encoding, "utf8") == 0 /* HP-UX */)
!           defEnc = NSUTF8StringEncoding;
!         else if (strcmp(encoding, "ISO-8859-1") == 0 /* glibc */
!                  || strcmp(encoding, "ISO8859-1") == 0 /* AIX IRIX OSF/1 
Solaris NetBSD */
!                  || strcmp(encoding, "iso88591") == 0 /* HP-UX */)
!           defEnc = NSISOLatin1StringEncoding;
!         else if (strcmp(encoding, "IBM-932") == 0 /* AIX */
!                  || strcmp(encoding, "SJIS") == 0 /* HP-UX OSF/1 NetBSD */
!                  || strcmp(encoding, "PCK") == 0 /* Solaris */)
!           defEnc = NSShiftJISStringEncoding;
!         else if (strcmp(encoding, "ISO-8859-2") == 0 /* glibc */
!                  || strcmp(encoding, "ISO8859-2") == 0 /* AIX IRIX OSF/1 
Solaris NetBSD */
!                  || strcmp(encoding, "iso88592") == 0 /* HP-UX */)
!           defEnc = NSISOLatin2StringEncoding;
!         else if (strcmp(encoding, "CP1251") == 0 /* glibc */
!                  || strcmp(encoding, "ansi-1251") == 0 /* Solaris */)
!           defEnc = NSWindowsCP1251StringEncoding;
!         else if (strcmp(encoding, "CP1252") == 0 /*  */
!                  || strcmp(encoding, "IBM-1252") == 0 /* AIX */)
!           defEnc = NSWindowsCP1252StringEncoding;
!         else if (strcmp(encoding, "ISO-8859-5") == 0 /* glibc */
!                  || strcmp(encoding, "ISO8859-5") == 0 /* AIX IRIX OSF/1 
Solaris NetBSD */
!                  || strcmp(encoding, "iso88595") == 0 /* HP-UX */)
!           defEnc = NSISOCyrillicStringEncoding;
!         else if (strcmp(encoding, "KOI8-R") == 0 /* glibc */
!                  || strcmp(encoding, "koi8-r") == 0 /* Solaris */)
!           defEnc = NSKOI8RStringEncoding;
!         else if (strcmp(encoding, "ISO-8859-3") == 0 /* glibc */
!                  || strcmp(encoding, "ISO8859-3") == 0 /* Solaris */)
!           defEnc = NSISOLatin3StringEncoding;
!         else if (strcmp(encoding, "ISO-8859-4") == 0 /*  */
!                  || strcmp(encoding, "ISO8859-4") == 0 /* OSF/1 Solaris 
NetBSD */)
!           defEnc = NSISOLatin4StringEncoding;
!         else if (strcmp(encoding, "ISO-8859-6") == 0 /* glibc */
!                  || strcmp(encoding, "ISO8859-6") == 0 /* AIX Solaris */
!                  || strcmp(encoding, "iso88596") == 0 /* HP-UX */)
!           defEnc = NSISOArabicStringEncoding;
!         else if (strcmp(encoding, "ISO-8859-7") == 0 /* glibc */
!                  || strcmp(encoding, "ISO8859-7") == 0 /* AIX IRIX OSF/1 
Solaris */
!                  || strcmp(encoding, "iso88597") == 0 /* HP-UX */)
!           defEnc = NSISOGreekStringEncoding;
!         else if (strcmp(encoding, "ISO-8859-8") == 0 /* glibc */
!                  || strcmp(encoding, "ISO8859-8") == 0 /* AIX OSF/1 Solaris */
!                  || strcmp(encoding, "iso88598") == 0 /* HP-UX */)
!           defEnc = NSISOHebrewStringEncoding;
!         else if (strcmp(encoding, "ISO-8859-9") == 0 /* glibc */
!                  || strcmp(encoding, "ISO8859-9") == 0 /* AIX IRIX OSF/1 
Solaris */
!                  || strcmp(encoding, "iso88599") == 0 /* HP-UX */)
!           defEnc = NSISOLatin5StringEncoding;
!         else if (strcmp(encoding, "ISO-8859-10") == 0 /*  */
!                  || strcmp(encoding, "ISO8859-10") == 0 /*  */)
!           defEnc = NSISOLatin6StringEncoding;
!         else if (strcmp(encoding, "TIS-620") == 0 /* glibc AIX */
!                  || strcmp(encoding, "tis620") == 0 /* HP-UX */
!                  || strcmp(encoding, "TIS620.2533") == 0 /* Solaris */
!                  || strcmp(encoding, "TACTIS") == 0 /* OSF/1 */)
!           defEnc = NSISOThaiStringEncoding;
!         else if (strcmp(encoding, "ISO-8859-13") == 0 /* glibc */
!                  || strcmp(encoding, "ISO8859-13") == 0 /*  */
!                  || strcmp(encoding, "IBM-921") == 0 /* AIX */)
!           defEnc = NSISOLatin7StringEncoding;
!         else if (strcmp(encoding, "ISO-8859-14") == 0 /* glibc */
!                  || strcmp(encoding, "ISO8859-14") == 0 /*  */)
!           defEnc = NSISOLatin8StringEncoding;
!         else if (strcmp(encoding, "ISO-8859-15") == 0 /* glibc */
!                  || strcmp(encoding, "ISO8859-15") == 0 /* AIX OSF/1 Solaris 
NetBSD */
!                  || strcmp(encoding, "iso885915") == 0 /* HP-UX */)
!           defEnc = NSISOLatin9StringEncoding;
!         else if (strcmp(encoding, "GB2312") == 0 /* glibc */
!                  || strcmp(encoding, "gb2312") == 0 /* Solaris */
!                  || strcmp(encoding, "eucCN") == 0 /* IRIX NetBSD */
!                  || strcmp(encoding, "IBM-eucCN") == 0 /* AIX */
!                  || strcmp(encoding, "hp15CN") == 0 /* HP-UX */)
!           defEnc = NSGB2312StringEncoding;
!         else if (strcmp(encoding, "BIG5") == 0 /* glibc Solaris NetBSD */
!                  || strcmp(encoding, "big5") == 0 /* AIX HP-UX OSF/1 */)
!           defEnc = NSBIG5StringEncoding;
!         else if (strcmp(encoding, "EUC-KR") == 0 /* glibc */
!                  || strcmp(encoding, "eucKR") == 0 /* HP-UX IRIX OSF/1 NetBSD 
*/
!                  || strcmp(encoding, "IBM-eucKR") == 0 /* AIX */
!                  || strcmp(encoding, "5601") == 0 /* Solaris */)
!           defEnc = NSKoreanEUCEncoding;
!         else
! #endif
!           defEnc = NSISOLatin1StringEncoding;
        }
        else if (GSEncodingSupported(defEnc) == NO)
        {
*** gnustep-base-1.8.0/config/codeset.m4.bak    2003-12-07 21:59:42.000000000 
+0100
--- gnustep-base-1.8.0/config/codeset.m4        2002-01-13 12:51:27.000000000 
+0100
***************
*** 0 ****
--- 1,23 ----
+ # codeset.m4 serial AM1 (gettext-0.10.40)
+ dnl Copyright (C) 2000-2002 Free Software Foundation, Inc.
+ dnl This file is free software, distributed under the terms of the GNU
+ dnl General Public License.  As a special exception to the GNU General
+ dnl Public License, this file may be distributed as part of a program
+ dnl that contains a configuration script generated by Autoconf, under
+ dnl the same distribution terms as the rest of that program.
+ 
+ dnl From Bruno Haible.
+ 
+ AC_DEFUN([AM_LANGINFO_CODESET],
+ [
+   AC_CACHE_CHECK([for nl_langinfo and CODESET], am_cv_langinfo_codeset,
+     [AC_TRY_LINK([#include <langinfo.h>],
+       [char* cs = nl_langinfo(CODESET);],
+       am_cv_langinfo_codeset=yes,
+       am_cv_langinfo_codeset=no)
+     ])
+   if test $am_cv_langinfo_codeset = yes; then
+     AC_DEFINE(HAVE_LANGINFO_CODESET, 1,
+       [Define if you have <langinfo.h> and nl_langinfo(CODESET).])
+   fi
+ ])
*** gnustep-base-1.8.0/configure.ac.bak 2003-09-23 01:06:51.000000000 +0200
--- gnustep-base-1.8.0/configure.ac     2003-12-07 22:05:50.000000000 +0100
***************
*** 26,31 ****
--- 26,32 ----
  builtin(include, config/procfs-exe-link.m4)dnl
  builtin(include, config/procfs.m4)dnl
  builtin(include, config/pathxml.m4)dnl
+ builtin(include, config/codeset.m4)dnl
  
  AC_INIT
  AC_CONFIG_SRCDIR([Source/NSArray.m])
***************
*** 1007,1012 ****
--- 1008,1018 ----
  AC_SUBST(USE_GMP)
  
  #--------------------------------------------------------------------
+ # Check whether nl_langinfo(CODESET) is supported, needed by Unicode.m.
+ #--------------------------------------------------------------------
+ AM_LANGINFO_CODESET
+ 
+ #--------------------------------------------------------------------
  # Check for iconv support (for Unicode conversion).
  #--------------------------------------------------------------------
  # Check in the glibc library





reply via email to

[Prev in Thread] Current Thread [Next in Thread]