bug-coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: tr '[:upper:]' '[:lower:]' -- misaligned construct


From: Jim Meyering
Subject: Re: tr '[:upper:]' '[:lower:]' -- misaligned construct
Date: Sun, 06 Jan 2008 00:34:30 +0100

Thanks again.
My tentative patch introduced a bug (see the tests).
Here's the fix I've just pushed:

        Avoid tr case-conversion failure in some locales.
        * src/tr.c (skip_construct): New function.
        (main): When processing a pair of case-converting classes, don't
        iterate through the elements of each [:upper:] or [:lower:] class.
        Reported by Gerald Pfeifer in
        <http://thread.gmane.org/gmane.comp.gnu.coreutils.bugs/12218>.
        * tests/tr/Test.pm [tolower-F]: New test for the above fix.
        [upcase-xtra, dncase-xtra]: New tests, for a related code path.
        * NEWS: Mention the tr bug fix.

---
 ChangeLog        |   12 ++++++++++++
 NEWS             |    6 ++++++
 THANKS           |    1 +
 src/tr.c         |   24 +++++++++++++++++++++++-
 tests/tr/Test.pm |   12 ++++++++++++
 5 files changed, 54 insertions(+), 1 deletions(-)

diff --git a/NEWS b/NEWS
index 5285d51..5b1b366 100644
--- a/NEWS
+++ b/NEWS
@@ -2,6 +2,12 @@ GNU coreutils NEWS                                    -*- 
outline -*-

 * Noteworthy changes in release 6.? (????-??-??) [?]

+** Bug fixes
+
+  tr's case conversion would fail in a locale with differing numbers
+  of lower case and upper case characters.  E.g., this would fail:
+  env LC_CTYPE=en_US.iso88591 tr '[:upper:]' '[:lower:]'
+  [bug introduced in coreutils-6.9.90]


 * Noteworthy changes in release 6.9.91 (2007-12-15) [beta]
diff --git a/THANKS b/THANKS
index e4658a0..5121495 100644
--- a/THANKS
+++ b/THANKS
@@ -179,6 +179,7 @@ Geoff Collyer                       geoff at collyer.net
 Geoff Kuenning                      address@hidden
 Geoff Odhner                        address@hidden
 Geoff Whale                         address@hidden
+Gerald Pfeifer                      address@hidden
 Gerhard Poul                        address@hidden
 Germano Leichsenring                address@hidden
 Göran Uddeborg                      address@hidden
diff --git a/src/tr.c b/src/tr.c
index dff602e..a7565f8 100644
--- a/src/tr.c
+++ b/src/tr.c
@@ -1,5 +1,5 @@
 /* tr -- a filter to translate characters
-   Copyright (C) 91, 1995-2007 Free Software Foundation, Inc.
+   Copyright (C) 91, 1995-2008 Free Software Foundation, Inc.

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -1019,6 +1019,15 @@ build_spec_list (const struct E_string *es, struct 
Spec_list *result)
   return true;
 }

+/* Advance past the current construct.
+   S->tail must be non-NULL.  */
+static void
+skip_construct (struct Spec_list *s)
+{
+  s->tail = s->tail->next;
+  s->state = NEW_ELEMENT;
+}
+
 /* Given a Spec_list S (with its saved state implicit in the values
    of its members `tail' and `state'), return the next single character
    in the expansion of S's constructs.  If the last character of S was
@@ -1809,6 +1818,7 @@ main (int argc, char **argv)
        {
          int c1, c2;
          int i;
+         bool case_convert = false;
          enum Upper_Lower_class class_s1;
          enum Upper_Lower_class class_s2;

@@ -1818,6 +1828,16 @@ main (int argc, char **argv)
          s2->state = BEGIN_STATE;
          for (;;)
            {
+             /* When the previous pair identified case-converting classes,
+                advance S1 and S2 so that each points to the following
+                construct.  */
+             if (case_convert)
+               {
+                 skip_construct (s1);
+                 skip_construct (s2);
+                 case_convert = false;
+               }
+
              c1 = get_next (s1, &class_s1);
              c2 = get_next (s2, &class_s2);

@@ -1831,12 +1851,14 @@ main (int argc, char **argv)

              if (class_s1 == UL_LOWER && class_s2 == UL_UPPER)
                {
+                 case_convert = true;
                  for (i = 0; i < N_CHARS; i++)
                    if (islower (i))
                      xlate[i] = toupper (i);
                }
              else if (class_s1 == UL_UPPER && class_s2 == UL_LOWER)
                {
+                 case_convert = true;
                  for (i = 0; i < N_CHARS; i++)
                    if (isupper (i))
                      xlate[i] = tolower (i);
diff --git a/tests/tr/Test.pm b/tests/tr/Test.pm
index 7b25a15..cf70213 100644
--- a/tests/tr/Test.pm
+++ b/tests/tr/Test.pm
@@ -139,8 +139,20 @@ my @tv = (
 # Up to coreutils-6.9, tr rejected an unmatched [:lower:] or [:upper:] in SET1.
 ['s1-lower', q|'[:lower:]' '[.*]'|, '#$%123abcABC', '#$%123...ABC', 0],
 ['s1-upper', q|'[:upper:]' '[.*]'|, '#$%123abcABC', '#$%123abc...', 0],
+
+# Up to coreutils-6.9.91, this would fail with the diagnostic:
+# tr: misaligned [:upper:] and/or [:lower:] construct
+# with LC_CTYPE=en_US.iso88591.
+['tolower-F',q|'[:upper:]' '[:lower:]'|, 'A', 'a', 0],
+
+# When doing a case-converting translation with something after the
+# [:upper:] and [:lower:] elements, ensure that tr honors the following byte.
+['upcase-xtra',q|'[:lower:].' '[:upper:]x'|,   'abc.', 'ABCx', 0],
+['dncase-xtra',q|'[:upper:].' '[:lower:]x'|,   'ABC.', 'abcx', 0],
 );

+$Test::env{'tolower-F'} = ['LC_CTYPE=en_US.iso88591'];
+
 sub test_vector
 {
   my $t;
--
1.5.4.rc2.39.g6989c5




reply via email to

[Prev in Thread] Current Thread [Next in Thread]