grep-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Changes to grep/src/grep.c


From: Charles Levert
Subject: Changes to grep/src/grep.c
Date: Thu, 03 Nov 2005 20:24:02 -0500

Index: grep/src/grep.c
diff -u grep/src/grep.c:1.113 grep/src/grep.c:1.114
--- grep/src/grep.c:1.113       Wed Aug 24 07:28:29 2005
+++ grep/src/grep.c     Fri Nov  4 01:24:01 2005
@@ -1664,6 +1664,69 @@
          program_name, p, q);
 }
 
+/* mb_icase_keys() is called by main() to convert its "keys" string with
+   strlen() "len" to lowercase if match_icase is true.  Pointers are used
+   to implement in-out call-by-reference parameters.  */
+#ifdef MBS_SUPPORT
+static void
+mb_icase_keys (char **keys, size_t *len)
+{
+  wchar_t wc;
+  mbstate_t sti, stj;          /* i for input/old, j for output/new.  */
+  size_t i, j, li, lj;         /* l for total string length (minus '\0').  */
+  char *ki, *kj;               /* k for keys.  */
+  int mcm;
+
+  if ((mcm = MB_CUR_MAX) == 1)
+    return;
+
+  li = *len;
+  ki = *keys;
+  /* We use a new buffer because some multi-octet characters change
+     length through a lower-case conversion.  For example:
+       len(U+0049)=1 --> len(U+0131)=2   under tr_TR.UTF-8
+       len(U+0130)=2 --> len(U+0069)=1   under en_US.UTF-8
+       len(U+2126)=3 --> len(U+03C9)=2   under en_US.UTF-8
+       len(U+212A)=3 --> len(U+006B)=1   under en_US.UTF-8
+       len(U+212B)=3 --> len(U+00E5)=2   under en_US.UTF-8  */
+  lj = li + mcm;
+  kj = xmalloc(lj + 1);
+
+  memset(&sti, 0, sizeof(mbstate_t));
+  memset(&stj, 0, sizeof(mbstate_t));
+  for (i = j = 0; i < li ;)
+    {
+      size_t mbclen;
+      mbclen = mbrtowc(&wc, ki + i, li - i, &sti);
+      if (lj < j + mcm)
+       {
+         lj += mcm;
+         kj = xrealloc(kj, lj + 1);
+       }
+      if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
+       {
+         /* An invalid sequence, or a truncated multi-octet character.
+            We treat it as a single-octet character.  */
+         kj[j++] = ki[i++];
+       }
+      else
+       {
+         /* Doing towupper() before towlower() helps a few hairy cases and is
+            not too costly since this is the PATTERN and is done only once.  */
+         wc = towupper((wint_t)wc);
+         wc = towlower((wint_t)wc);
+         j += wcrtomb(kj + j, wc, &stj);
+         i += mbclen;
+       }
+    }
+  kj[j] = '\0';
+
+  free(ki);
+  *keys = kj;
+  *len = j;
+}
+#endif /* MBS_SUPPORT */
+
 int
 main (int argc, char **argv)
 {
@@ -2100,34 +2163,8 @@
     abort ();
 
 #ifdef MBS_SUPPORT
-  if (MB_CUR_MAX != 1 && match_icase)
-    {
-      wchar_t wc;
-      mbstate_t cur_state, prev_state;
-      int i, len = strlen(keys);
-
-      memset(&cur_state, 0, sizeof(mbstate_t));
-      for (i = 0; i <= len ;)
-       {
-         size_t mbclen;
-         mbclen = mbrtowc(&wc, keys + i, len - i, &cur_state);
-         if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
-           {
-             /* An invalid sequence, or a truncated multibyte character.
-                We treat it as a single byte character.  */
-             mbclen = 1;
-           }
-         else
-           {
-             if (iswupper((wint_t)wc))
-               {
-                 wc = towlower((wint_t)wc);
-                 wcrtomb(keys + i, wc, &cur_state);
-               }
-           }
-         i += mbclen;
-       }
-    }
+  if (match_icase)
+    mb_icase_keys (&keys, &keycc);
 #endif /* MBS_SUPPORT */
 
   (*compile)(keys, keycc);




reply via email to

[Prev in Thread] Current Thread [Next in Thread]