From 5447010fdbdf3f1a874689dd41a7c916bb262b2a Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Fri, 13 May 2022 23:46:21 -0700 Subject: [PATCH 2/2] grep: fix bug with . and some Hangul Syllables * NEWS: Mention the fix, which comes from the recent Gnulib update. * tests/hangul-syllable: New file. * tests/Makefile.am (TESTS): Add it. --- NEWS | 7 ++++ tests/Makefile.am | 1 + tests/hangul-syllable | 88 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 96 insertions(+) create mode 100755 tests/hangul-syllable diff --git a/NEWS b/NEWS index 86c82ed..fb0e4cf 100644 --- a/NEWS +++ b/NEWS @@ -13,6 +13,13 @@ GNU grep NEWS -*- outline -*- ** Bug fixes + In locales using UTF-8 encoding, the regular expression '.' no + longer sometimes fails to match Unicode characters U+D400 through + U+D7FF (some Hangul Syllables, and Hangul Jamo Extended-B) and + Unicode characters U+108000 through U+10FFFF (half of Supplemental + Private Use Area plane B). + [bug introduced in grep 3.4] + The -s option no longer suppresses "binary file matches" messages. [Bug#51860 introduced in grep 3.5] diff --git a/tests/Makefile.am b/tests/Makefile.am index 708980d..d72637f 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -110,6 +110,7 @@ TESTS = \ grep-dev-null \ grep-dev-null-out \ grep-dir \ + hangul-syllable \ hash-collision-perf \ help-version \ high-bit-range \ diff --git a/tests/hangul-syllable b/tests/hangul-syllable new file mode 100755 index 0000000..9f94d2e --- /dev/null +++ b/tests/hangul-syllable @@ -0,0 +1,88 @@ +#!/bin/sh +# grep 3.4 through 3.7 mishandled matching '.' against the valid UTF-8 +# sequences (ED)(90-9F)(80-BF) corresponding to U+D400 through U+D7FF, +# which are some Hangul Syllables and Hangul Jamo Extended-B. They +# also mishandled (F4)(88-8F)(80-BF)(80-BF) which correspond to +# U+108000 through U+10FFFF (Supplemental Private Use Area plane B). + +. "${srcdir=.}/init.sh"; path_prepend_ ../src + +require_en_utf8_locale_ + +LC_ALL=en_US.UTF-8 +export LC_ALL + +check_char () +{ + printf "$1\\n" >in || framewmork_failure_ + + grep $2 '^.$' in >out || fail=1 + cmp in out || fail=1 +} + +fail=0 + +# "." should match U+D45C HANGUL SYLLABLE PYO. +check_char '\355\221\234' + +# Check boundary-condition characters +# while we are at it. + +check_char '\0' -a +check_char '\177' + +for i in 302 337; do + for j in 200 277; do + check_char "\\$i\\$j" + done +done +for i in 340; do + for j in 240 277; do + for k in 200 277; do + check_char "\\$i\\$j\\$k" + done + done +done +for i in 341 354 356 357; do + for j in 200 277; do + for k in 200 277; do + check_char "\\$i\\$j\\$k" + done + done +done +for i in 355; do + for j in 200 237; do + for k in 200 277; do + check_char "\\$i\\$j\\$k" + done + done +done +for i in 360; do + for j in 220 277; do + for k in 200 277; do + for l in 200 277; do + check_char "\\$i\\$j\\$k\\$l" + done + done + done +done +for i in 361 363; do + for j in 200 277; do + for k in 200 277; do + for l in 200 277; do + check_char "\\$i\\$j\\$k\\$l" + done + done + done +done +for i in 364; do + for j in 200 217; do + for k in 200 277; do + for l in 200 277; do + check_char "\\$i\\$j\\$k\\$l" + done + done + done +done + +Exit $fail -- 2.34.1