bug-grep
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 2/9] dfa: fix handling of ranges in multibyte character sets


From: Paolo Bonzini
Subject: [PATCH 2/9] dfa: fix handling of ranges in multibyte character sets
Date: Sun, 14 Mar 2010 16:35:07 +0100

* src/dfa.c (parse_bracket_exp_mb): Add separate ranges for
lowercase and uppercase endpoints if folding case.
* tests/Makefile.am (TESTS): Add case-fold-char-range.
* tests/case-fold-char-range: New.
---
 src/dfa.c                  |   16 ++++++++++++++--
 tests/Makefile.am          |    1 +
 tests/case-fold-char-range |   21 +++++++++++++++++++++
 3 files changed, 36 insertions(+), 2 deletions(-)
 create mode 100644 tests/case-fold-char-range

diff --git a/src/dfa.c b/src/dfa.c
index 6c7494e..3cc405a 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -576,10 +576,22 @@ parse_bracket_exp_mb (void)
            }
          REALLOC_IF_NECESSARY(work_mbc->range_sts, wchar_t,
                               range_sts_al, work_mbc->nranges + 1);
-         work_mbc->range_sts[work_mbc->nranges] = (wchar_t)wc;
          REALLOC_IF_NECESSARY(work_mbc->range_ends, wchar_t,
                               range_ends_al, work_mbc->nranges + 1);
-         work_mbc->range_ends[work_mbc->nranges++] = (wchar_t)wc2;
+         work_mbc->range_sts[work_mbc->nranges] = 
+            case_fold ? towlower(wc) : (wchar_t)wc;
+         work_mbc->range_ends[work_mbc->nranges++] = 
+            case_fold ? towlower(wc2) : (wchar_t)wc2;
+
+         if (case_fold)
+            {
+              REALLOC_IF_NECESSARY(work_mbc->range_sts, wchar_t,
+                                   range_sts_al, work_mbc->nranges + 1);
+              work_mbc->range_sts[work_mbc->nranges] = towupper(wc);
+              REALLOC_IF_NECESSARY(work_mbc->range_ends, wchar_t,
+                                   range_ends_al, work_mbc->nranges + 1);
+              work_mbc->range_ends[work_mbc->nranges++] = towupper(wc2);
+            }
        }
       else if (wc != WEOF)
        /* build normal characters.  */
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 9724b0d..ab5fd4e 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -18,6 +18,7 @@ TESTS =                                               \
   backref.sh                                   \
   bre.sh                                       \
   case-fold-char-class                         \
+  case-fold-char-range                         \
   case-fold-char-type                          \
   dfaexec-multibyte                            \
   empty.sh                                     \
diff --git a/tests/case-fold-char-range b/tests/case-fold-char-range
new file mode 100644
index 0000000..e683da9
--- /dev/null
+++ b/tests/case-fold-char-range
@@ -0,0 +1,21 @@
+#!/bin/sh
+# This would fail for grep-2.5.3
+: ${srcdir=.}
+. "$srcdir/init.sh"; path_prepend_ ../src
+
+printf 'Y\n'      > exp1 || framework_failure
+fail=0
+
+for LOC in en_US.UTF-8 zh_CN $LOCALE_FR_UTF8; do
+  printf '1\nY\n.\n' | LC_ALL=$LOC grep -i '[a-z]' > out1 || fail=1
+  compare out1 exp1 || fail=1
+done
+
+printf 'y\n'      > exp2 || framework_failure
+
+for LOC in en_US.UTF-8 zh_CN $LOCALE_FR_UTF8; do
+  printf '1\ny\n.\n' | LC_ALL=$LOC grep -i '[A-Z]' > out2 || fail=1
+  compare out2 exp2 || fail=1
+done
+
+Exit $fail
-- 
1.6.6.1






reply via email to

[Prev in Thread] Current Thread [Next in Thread]