[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Changes to grep/src/grep.c
From: |
Charles Levert |
Subject: |
Changes to grep/src/grep.c |
Date: |
Thu, 03 Nov 2005 20:24:02 -0500 |
Index: grep/src/grep.c
diff -u grep/src/grep.c:1.113 grep/src/grep.c:1.114
--- grep/src/grep.c:1.113 Wed Aug 24 07:28:29 2005
+++ grep/src/grep.c Fri Nov 4 01:24:01 2005
@@ -1664,6 +1664,69 @@
program_name, p, q);
}
+/* mb_icase_keys() is called by main() to convert its "keys" string with
+ strlen() "len" to lowercase if match_icase is true. Pointers are used
+ to implement in-out call-by-reference parameters. */
+#ifdef MBS_SUPPORT
+static void
+mb_icase_keys (char **keys, size_t *len)
+{
+ wchar_t wc;
+ mbstate_t sti, stj; /* i for input/old, j for output/new. */
+ size_t i, j, li, lj; /* l for total string length (minus '\0'). */
+ char *ki, *kj; /* k for keys. */
+ int mcm;
+
+ if ((mcm = MB_CUR_MAX) == 1)
+ return;
+
+ li = *len;
+ ki = *keys;
+ /* We use a new buffer because some multi-octet characters change
+ length through a lower-case conversion. For example:
+ len(U+0049)=1 --> len(U+0131)=2 under tr_TR.UTF-8
+ len(U+0130)=2 --> len(U+0069)=1 under en_US.UTF-8
+ len(U+2126)=3 --> len(U+03C9)=2 under en_US.UTF-8
+ len(U+212A)=3 --> len(U+006B)=1 under en_US.UTF-8
+ len(U+212B)=3 --> len(U+00E5)=2 under en_US.UTF-8 */
+ lj = li + mcm;
+ kj = xmalloc(lj + 1);
+
+ memset(&sti, 0, sizeof(mbstate_t));
+ memset(&stj, 0, sizeof(mbstate_t));
+ for (i = j = 0; i < li ;)
+ {
+ size_t mbclen;
+ mbclen = mbrtowc(&wc, ki + i, li - i, &sti);
+ if (lj < j + mcm)
+ {
+ lj += mcm;
+ kj = xrealloc(kj, lj + 1);
+ }
+ if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
+ {
+ /* An invalid sequence, or a truncated multi-octet character.
+ We treat it as a single-octet character. */
+ kj[j++] = ki[i++];
+ }
+ else
+ {
+ /* Doing towupper() before towlower() helps a few hairy cases and is
+ not too costly since this is the PATTERN and is done only once. */
+ wc = towupper((wint_t)wc);
+ wc = towlower((wint_t)wc);
+ j += wcrtomb(kj + j, wc, &stj);
+ i += mbclen;
+ }
+ }
+ kj[j] = '\0';
+
+ free(ki);
+ *keys = kj;
+ *len = j;
+}
+#endif /* MBS_SUPPORT */
+
int
main (int argc, char **argv)
{
@@ -2100,34 +2163,8 @@
abort ();
#ifdef MBS_SUPPORT
- if (MB_CUR_MAX != 1 && match_icase)
- {
- wchar_t wc;
- mbstate_t cur_state, prev_state;
- int i, len = strlen(keys);
-
- memset(&cur_state, 0, sizeof(mbstate_t));
- for (i = 0; i <= len ;)
- {
- size_t mbclen;
- mbclen = mbrtowc(&wc, keys + i, len - i, &cur_state);
- if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
- {
- /* An invalid sequence, or a truncated multibyte character.
- We treat it as a single byte character. */
- mbclen = 1;
- }
- else
- {
- if (iswupper((wint_t)wc))
- {
- wc = towlower((wint_t)wc);
- wcrtomb(keys + i, wc, &cur_state);
- }
- }
- i += mbclen;
- }
- }
+ if (match_icase)
+ mb_icase_keys (&keys, &keycc);
#endif /* MBS_SUPPORT */
(*compile)(keys, keycc);
- Changes to grep/src/grep.c,
Charles Levert <=
- Changes to grep/src/grep.c, Charles Levert, 2005/11/08
- Changes to grep/src/grep.c, Charles Levert, 2005/11/09
- Changes to grep/src/grep.c, Charles Levert, 2005/11/10
- Changes to grep/src/grep.c, Charles Levert, 2005/11/11
- Changes to grep/src/grep.c, Charles Levert, 2005/11/17