bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

merged regex with glibc


From: Paolo Bonzini
Subject: merged regex with glibc
Date: Fri, 09 Jan 2009 09:59:31 +0100
User-agent: Thunderbird 2.0.0.19 (Macintosh/20081209)

I applied the attached three patches to sync with glibc and fix two
relatively serious bug.

Paolo
>From d9491838d50536edcf30e219a3ab96791aeb1d5d Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <address@hidden>
Date: Fri, 9 Jan 2009 09:00:58 +0100
Subject: [PATCH] merge regex from glibc: replace mbrtowc with __mbrtowc.

2009-01-09  Paolo Bonzini  <address@hidden>

        * lib/regcomp.c (re_compile_fastmap_iter): Use __mbrtowc.
        * lib/regex_internal.c (build_wcs_buffer, build_wcs_upper_buffer,
        re_string_skip_chars, re_string_reconstruct): Likewise.
        * lib/regex_internal.h [!_LIBC] (__mbrtowc): New #define.
---
 ChangeLog            |    8 ++++++++
 lib/regcomp.c        |    7 ++++---
 lib/regex_internal.c |   22 +++++++++++-----------
 lib/regex_internal.h |    4 +++-
 4 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 74954d5..2059b03 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2009-01-09  Paolo Bonzini  <address@hidden>
+
+       regex: merge glibc changes
+       * lib/regcomp.c (re_compile_fastmap_iter): Use __mbrtowc.
+       * lib/regex_internal.c (build_wcs_buffer, build_wcs_upper_buffer,
+       re_string_skip_chars, re_string_reconstruct): Likewise.
+       * lib/regex_internal.h [!_LIBC] (__mbrtowc): New #define.
+
 2009-01-07  Jim Meyering  <address@hidden>
 
        poll: filter through cppi
diff --git a/lib/regcomp.c b/lib/regcomp.c
index a3a745d..fc3cf98 100644
--- a/lib/regcomp.c
+++ b/lib/regcomp.c
@@ -1,5 +1,6 @@
 /* Extended regular expression matching and search library.
-   Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Free Software Foundation, 
Inc.
+   Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009
+   Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Isamu Hasegawa <address@hidden>.
 
@@ -333,8 +334,8 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t 
*init_state,
                     && dfa->nodes[node].mb_partial)
                *p++ = dfa->nodes[node].opr.c;
              memset (&state, '\0', sizeof (state));
-             if (mbrtowc (&wc, (const char *) buf, p - buf,
-                          &state) == p - buf
+             if (__mbrtowc (&wc, (const char *) buf, p - buf,
+                            &state) == p - buf
                  && (__wcrtomb ((char *) buf, towlower (wc), &state)
                      != (size_t) -1))
                re_set_fastmap (fastmap, false, buf[0]);
diff --git a/lib/regex_internal.c b/lib/regex_internal.c
index 977b15a..904b88e 100644
--- a/lib/regex_internal.c
+++ b/lib/regex_internal.c
@@ -1,6 +1,6 @@
 /* Extended regular expression matching and search library.
-   Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software
-   Foundation, Inc.
+   Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
+   Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Isamu Hasegawa <address@hidden>.
 
@@ -236,7 +236,7 @@ build_wcs_buffer (re_string_t *pstr)
        }
       else
        p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
-      mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
+      mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
       if (BE (mbclen == (size_t) -2, 0))
        {
          /* The buffer doesn't have enough space, finish to build.  */
@@ -306,9 +306,9 @@ build_wcs_upper_buffer (re_string_t *pstr)
 
          remain_len = end_idx - byte_idx;
          prev_st = pstr->cur_state;
-         mbclen = mbrtowc (&wc,
-                           ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
-                            + byte_idx), remain_len, &pstr->cur_state);
+         mbclen = __mbrtowc (&wc,
+                             ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
+                              + byte_idx), remain_len, &pstr->cur_state);
          if (BE (mbclen < (size_t) -2, 1))
            {
              wchar_t wcu = wc;
@@ -376,7 +376,7 @@ build_wcs_upper_buffer (re_string_t *pstr)
          }
        else
          p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
-       mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
+       mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
        if (BE (mbclen < (size_t) -2, 1))
          {
            wchar_t wcu = wc;
@@ -499,8 +499,8 @@ re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, 
wint_t *last_wc)
       Idx remain_len;
       remain_len = pstr->len - rawbuf_idx;
       prev_st = pstr->cur_state;
-      mbclen = mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
-                       remain_len, &pstr->cur_state);
+      mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
+                         remain_len, &pstr->cur_state);
       if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 
0))
        {
          /* We treat these cases as a single byte character.  */
@@ -745,8 +745,8 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int 
eflags)
                          /* XXX Don't use mbrtowc, we know which conversion
                             to use (UTF-8 -> UCS4).  */
                          memset (&cur_state, 0, sizeof (cur_state));
-                         mbclen = mbrtowc (&wc2, (const char *) p, mlen,
-                                           &cur_state);
+                         mbclen = __mbrtowc (&wc2, (const char *) p, mlen,
+                                             &cur_state);
                          if (raw + offset - p <= mbclen
                              && mbclen < (size_t) -2)
                            {
diff --git a/lib/regex_internal.h b/lib/regex_internal.h
index 47b9e13..d3d58e8 100644
--- a/lib/regex_internal.h
+++ b/lib/regex_internal.h
@@ -1,5 +1,6 @@
 /* Extended regular expression matching and search library.
-   Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software 
Foundation, Inc.
+   Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
+   Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Isamu Hasegawa <address@hidden>.
 
@@ -115,6 +116,7 @@
 # define __iswctype iswctype
 # define __btowc btowc
 # define __wcrtomb wcrtomb
+# define __mbrtowc mbrtowc
 # define __regfree regfree
 # define attribute_hidden
 #endif /* not _LIBC */
-- 
1.5.5

>From f9fb3bb5a348aa2381edbe4cbc7eecc3894a1f42 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <address@hidden>
Date: Fri, 9 Jan 2009 09:04:55 +0100
Subject: [PATCH] merge regex from glibc: fix glibc bug 697

2009-01-09  Paolo Bonzini  <address@hidden>

        * lib/regexec.c (prune_impossible_nodes): Handle sifted_states[0]
        being NULL also if there are no backreferences.
---
 ChangeLog     |    6 ++++++
 lib/regexec.c |    9 +++++++--
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 2059b03..d76e1e2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,4 +1,10 @@
 2009-01-09  Paolo Bonzini  <address@hidden>
+    
+       regex: fix glibc bug 697
+       * lib/regexec.c (prune_impossible_nodes): Handle sifted_states[0]
+       being NULL also if there are no backreferences.
+
+2009-01-09  Paolo Bonzini  <address@hidden>
 
        regex: merge glibc changes
        * lib/regcomp.c (re_compile_fastmap_iter): Use __mbrtowc.
diff --git a/lib/regexec.c b/lib/regexec.c
index 2afa5b3..21a8166 100644
--- a/lib/regexec.c
+++ b/lib/regexec.c
@@ -1,6 +1,6 @@
 /* Extended regular expression matching and search library.
-   Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software 
Foundation,
-   Inc.
+   Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
+   Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Isamu Hasegawa <address@hidden>.
 
@@ -1045,6 +1045,11 @@ prune_impossible_nodes (re_match_context_t *mctx)
       re_node_set_free (&sctx.limits);
       if (BE (ret != REG_NOERROR, 0))
        goto free_return;
+      if (sifted_states[0] == NULL)
+       {
+         ret = REG_NOMATCH;
+         goto free_return;
+       }
     }
   re_free (mctx->state_log);
   mctx->state_log = sifted_states;
-- 
1.5.5

>From fa02d58b87e0a4eb2cbf45adb8bf9fe576ae5339 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <address@hidden>
Date: Fri, 9 Jan 2009 09:10:36 +0100
Subject: [PATCH] merge regex from glibc: fix glibc bug 9697

2009-01-09  Paolo Bonzini  <address@hidden>

        * lib/regcomp.c (re_compile_fastmap_iter): Rewrite COMPLEX_BRACKET
        handling.
---
 ChangeLog     |    6 ++++
 lib/regcomp.c |   74 ++++++++++++++++++++++++++++++++++++--------------------
 2 files changed, 53 insertions(+), 27 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index d76e1e2..f7483f4 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,4 +1,10 @@
 2009-01-09  Paolo Bonzini  <address@hidden>
+
+       regex: fix glibc bug 9697
+       * lib/regcomp.c (re_compile_fastmap_iter): Rewrite COMPLEX_BRACKET
+       handling.
+
+2009-01-09  Paolo Bonzini  <address@hidden>
     
        regex: fix glibc bug 697
        * lib/regexec.c (prune_impossible_nodes): Handle sifted_states[0]
diff --git a/lib/regcomp.c b/lib/regcomp.c
index fc3cf98..6472ff6 100644
--- a/lib/regcomp.c
+++ b/lib/regcomp.c
@@ -357,45 +357,65 @@ re_compile_fastmap_iter (regex_t *bufp, const 
re_dfastate_t *init_state,
 #ifdef RE_ENABLE_I18N
       else if (type == COMPLEX_BRACKET)
        {
-         Idx i;
          re_charset_t *cset = dfa->nodes[node].opr.mbcset;
-         if (cset->non_match || cset->ncoll_syms || cset->nequiv_classes
-             || cset->nranges || cset->nchar_classes)
-           {
+         Idx i;
+
 # ifdef _LIBC
-             if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0)
+         /* See if we have to try all bytes which start multiple collation
+            elements.
+            e.g. In da_DK, we want to catch 'a' since "aa" is a valid
+                 collation element, and don't catch 'b' since 'b' is
+                 the only collation element which starts from 'b' (and
+                 it is caught by SIMPLE_BRACKET).  */
+             if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0
+                 && (cset->ncoll_syms || cset->nranges))
                {
-                 /* In this case we want to catch the bytes which are
-                    the first byte of any collation elements.
-                    e.g. In da_DK, we want to catch 'a' since "aa"
-                         is a valid collation element, and don't catch
-                         'b' since 'b' is the only collation element
-                         which starts from 'b'.  */
                  const int32_t *table = (const int32_t *)
                    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
                  for (i = 0; i < SBC_MAX; ++i)
                    if (table[i] < 0)
                      re_set_fastmap (fastmap, icase, i);
                }
-# else
-             if (dfa->mb_cur_max > 1)
-               for (i = 0; i < SBC_MAX; ++i)
-                 if (__btowc (i) == WEOF)
-                   re_set_fastmap (fastmap, icase, i);
-# endif /* not _LIBC */
+# endif /* _LIBC */
+
+         /* See if we have to start the match at all multibyte characters,
+            i.e. where we would not find an invalid sequence.  This only
+            applies to multibyte character sets; for single byte character
+            sets, the SIMPLE_BRACKET again suffices.  */
+         if (dfa->mb_cur_max > 1
+             && (cset->nchar_classes || cset->non_match
+# ifdef _LIBC
+                 || cset->nequiv_classes
+# endif /* _LIBC */
+                ))
+           {
+             unsigned char c = 0;
+             do
+               {
+                 mbstate_t mbs;
+                 memset (&mbs, 0, sizeof (mbs));
+                 if (__mbrtowc (NULL, (char *) &c, 1, &mbs) == (size_t) -2)
+                   re_set_fastmap (fastmap, false, (int) c);
+               }
+             while (++c != 0);
            }
-         for (i = 0; i < cset->nmbchars; ++i)
+
+         else
            {
-             char buf[256];
-             mbstate_t state;
-             memset (&state, '\0', sizeof (state));
-             if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)
-               re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
-             if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
+             /* ... Else catch all bytes which can start the mbchars.  */
+             for (i = 0; i < cset->nmbchars; ++i)
                {
-                 if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
-                     != (size_t) -1)
-                   re_set_fastmap (fastmap, false, *(unsigned char *) buf);
+                 char buf[256];
+                 mbstate_t state;
+                 memset (&state, '\0', sizeof (state));
+                 if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)
+                   re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
+                 if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
+                   {
+                     if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
+                         != (size_t) -1)
+                       re_set_fastmap (fastmap, false, *(unsigned char *) buf);
+                   }
                }
            }
        }
-- 
1.5.5


reply via email to

[Prev in Thread] Current Thread [Next in Thread]