emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Emacs-diffs] /srv/bzr/emacs/emacs-24 r111194: Fix bug #13515 with proce


From: Eli Zaretskii
Subject: [Emacs-diffs] /srv/bzr/emacs/emacs-24 r111194: Fix bug #13515 with processing DBCS file names on MS-Windows.
Date: Wed, 23 Jan 2013 18:11:04 +0200
User-agent: Bazaar (2.5.0)

------------------------------------------------------------
revno: 111194
fixes bug: http://debbugs.gnu.org/13515
committer: Eli Zaretskii <address@hidden>
branch nick: emacs-24
timestamp: Wed 2013-01-23 18:11:04 +0200
message:
  Fix bug #13515 with processing DBCS file names on MS-Windows.
  
   src/w32.c (max_filename_mbslen): New function.
   (normalize_filename, readdir): Use it to detect locales where ANSI
   encoding of file names uses a double-byte character set (DBCS).
   If a DBCS encoding is used, advance by characters using
   CharNextExA, instead of incrementing a 'char *' pointer.  Use
   _mbslwr instead of _strlwr.
modified:
  src/ChangeLog
  src/w32.c
=== modified file 'src/ChangeLog'
--- a/src/ChangeLog     2013-01-22 13:30:17 +0000
+++ b/src/ChangeLog     2013-01-23 16:11:04 +0000
@@ -1,3 +1,12 @@
+2013-01-23  Eli Zaretskii  <address@hidden>
+
+       * w32.c (max_filename_mbslen): New function.
+       (normalize_filename, readdir): Use it to detect locales where ANSI
+       encoding of file names uses a double-byte character set (DBCS).
+       If a DBCS encoding is used, advance by characters using
+       CharNextExA, instead of incrementing a 'char *' pointer.  Use
+       _mbslwr instead of _strlwr.  (Bug#13515)
+
 2013-01-22  Eli Zaretskii  <address@hidden>
 
        * w32heap.c (allocate_heap) [!_WIN64]: Decrease the initial

=== modified file 'src/w32.c'
--- a/src/w32.c 2013-01-01 09:11:05 +0000
+++ b/src/w32.c 2013-01-23 16:11:04 +0000
@@ -37,7 +37,7 @@
 /* must include CRT headers *before* config.h */
 
 #include <config.h>
-#include <mbstring.h>  /* for _mbspbrk */
+#include <mbstring.h>  /* for _mbspbrk and _mbslwr */
 
 #undef access
 #undef chdir
@@ -1304,6 +1304,67 @@
   srand (seed);
 }
 
+/* Current codepage for encoding file names.  */
+static int file_name_codepage;
+
+/* Return the maximum length in bytes of a multibyte character
+   sequence encoded in the current ANSI codepage.  This is required to
+   correctly walk the encoded file names one character at a time.  */
+static int
+max_filename_mbslen (void)
+{
+  /* A simple cache to avoid calling GetCPInfo every time we need to
+     normalize a file name.  The file-name encoding is not supposed to
+     be changed too frequently, if ever.  */
+  static Lisp_Object last_file_name_encoding;
+  static int last_max_mbslen;
+  Lisp_Object current_encoding;
+
+  current_encoding = Vfile_name_coding_system;
+  if (NILP (current_encoding))
+    current_encoding = Vdefault_file_name_coding_system;
+
+  if (!EQ (last_file_name_encoding, current_encoding))
+    {
+      CPINFO cp_info;
+
+      last_file_name_encoding = current_encoding;
+      /* Default to the current ANSI codepage.  */
+      file_name_codepage = w32_ansi_code_page;
+      if (!NILP (current_encoding))
+       {
+         char *cpname = SDATA (SYMBOL_NAME (current_encoding));
+         char *cp = NULL, *end;
+         int cpnum;
+
+         if (strncmp (cpname, "cp", 2) == 0)
+           cp = cpname + 2;
+         else if (strncmp (cpname, "windows-", 8) == 0)
+           cp = cpname + 8;
+
+         if (cp)
+           {
+             end = cp;
+             cpnum = strtol (cp, &end, 10);
+             if (cpnum && *end == '\0' && end - cp >= 2)
+               file_name_codepage = cpnum;
+           }
+       }
+
+      if (!file_name_codepage)
+       file_name_codepage = CP_ACP; /* CP_ACP = 0, but let's not assume that */
+
+      if (!GetCPInfo (file_name_codepage, &cp_info))
+       {
+         file_name_codepage = CP_ACP;
+         if (!GetCPInfo (file_name_codepage, &cp_info))
+           emacs_abort ();
+       }
+      last_max_mbslen = cp_info.MaxCharSize;
+    }
+
+  return last_max_mbslen;
+}
 
 /* Normalize filename by converting all path separators to
    the specified separator.  Also conditionally convert upper
@@ -1313,14 +1374,20 @@
 normalize_filename (register char *fp, char path_sep)
 {
   char sep;
-  char *elem;
+  char *elem, *p2;
+  int dbcs_p = max_filename_mbslen () > 1;
 
   /* Always lower-case drive letters a-z, even if the filesystem
      preserves case in filenames.
      This is so filenames can be compared by string comparison
      functions that are case-sensitive.  Even case-preserving filesystems
      do not distinguish case in drive letters.  */
-  if (fp[1] == ':' && *fp >= 'A' && *fp <= 'Z')
+  if (dbcs_p)
+    p2 = CharNextExA (file_name_codepage, fp, 0);
+  else
+    p2 = fp + 1;
+
+  if (*p2 == ':' && *fp >= 'A' && *fp <= 'Z')
     {
       *fp += 'a' - 'A';
       fp += 2;
@@ -1332,7 +1399,10 @@
        {
          if (*fp == '/' || *fp == '\\')
            *fp = path_sep;
-         fp++;
+         if (!dbcs_p)
+           fp++;
+         else
+           fp = CharNextExA (file_name_codepage, fp, 0);
        }
       return;
     }
@@ -1355,13 +1425,20 @@
        if (elem && elem != fp)
          {
            *fp = 0;            /* temporary end of string */
-           _strlwr (elem);     /* while we convert to lower case */
+           _mbslwr (elem);     /* while we convert to lower case */
          }
        *fp = sep;              /* convert (or restore) path separator */
        elem = fp + 1;          /* next element starts after separator */
        sep = path_sep;
       }
-  } while (*fp++);
+    if (*fp)
+      {
+       if (!dbcs_p)
+         fp++;
+       else
+         fp = CharNextExA (file_name_codepage, fp, 0);
+      }
+  } while (*fp);
 }
 
 /* Destructively turn backslashes into slashes.  */
@@ -2588,15 +2665,22 @@
     strcpy (dir_static.d_name, dir_find_data.cFileName);
   dir_static.d_namlen = strlen (dir_static.d_name);
   if (dir_is_fat)
-    _strlwr (dir_static.d_name);
+    _mbslwr (dir_static.d_name);
   else if (downcase)
     {
       register char *p;
-      for (p = dir_static.d_name; *p; p++)
-       if (*p >= 'a' && *p <= 'z')
-         break;
+      int dbcs_p = max_filename_mbslen () > 1;
+      for (p = dir_static.d_name; *p; )
+       {
+         if (*p >= 'a' && *p <= 'z')
+           break;
+         if (dbcs_p)
+           p = CharNextExA (file_name_codepage, p, 0);
+         else
+           p++;
+       }
       if (!*p)
-       _strlwr (dir_static.d_name);
+       _mbslwr (dir_static.d_name);
     }
 
   return &dir_static;


reply via email to

[Prev in Thread] Current Thread [Next in Thread]