[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
merged regex with glibc
From: |
Paolo Bonzini |
Subject: |
merged regex with glibc |
Date: |
Fri, 09 Jan 2009 09:59:31 +0100 |
User-agent: |
Thunderbird 2.0.0.19 (Macintosh/20081209) |
I applied the attached three patches to sync with glibc and fix two
relatively serious bug.
Paolo
>From d9491838d50536edcf30e219a3ab96791aeb1d5d Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <address@hidden>
Date: Fri, 9 Jan 2009 09:00:58 +0100
Subject: [PATCH] merge regex from glibc: replace mbrtowc with __mbrtowc.
2009-01-09 Paolo Bonzini <address@hidden>
* lib/regcomp.c (re_compile_fastmap_iter): Use __mbrtowc.
* lib/regex_internal.c (build_wcs_buffer, build_wcs_upper_buffer,
re_string_skip_chars, re_string_reconstruct): Likewise.
* lib/regex_internal.h [!_LIBC] (__mbrtowc): New #define.
---
ChangeLog | 8 ++++++++
lib/regcomp.c | 7 ++++---
lib/regex_internal.c | 22 +++++++++++-----------
lib/regex_internal.h | 4 +++-
4 files changed, 26 insertions(+), 15 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 74954d5..2059b03 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2009-01-09 Paolo Bonzini <address@hidden>
+
+ regex: merge glibc changes
+ * lib/regcomp.c (re_compile_fastmap_iter): Use __mbrtowc.
+ * lib/regex_internal.c (build_wcs_buffer, build_wcs_upper_buffer,
+ re_string_skip_chars, re_string_reconstruct): Likewise.
+ * lib/regex_internal.h [!_LIBC] (__mbrtowc): New #define.
+
2009-01-07 Jim Meyering <address@hidden>
poll: filter through cppi
diff --git a/lib/regcomp.c b/lib/regcomp.c
index a3a745d..fc3cf98 100644
--- a/lib/regcomp.c
+++ b/lib/regcomp.c
@@ -1,5 +1,6 @@
/* Extended regular expression matching and search library.
- Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Free Software Foundation,
Inc.
+ Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009
+ Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Isamu Hasegawa <address@hidden>.
@@ -333,8 +334,8 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t
*init_state,
&& dfa->nodes[node].mb_partial)
*p++ = dfa->nodes[node].opr.c;
memset (&state, '\0', sizeof (state));
- if (mbrtowc (&wc, (const char *) buf, p - buf,
- &state) == p - buf
+ if (__mbrtowc (&wc, (const char *) buf, p - buf,
+ &state) == p - buf
&& (__wcrtomb ((char *) buf, towlower (wc), &state)
!= (size_t) -1))
re_set_fastmap (fastmap, false, buf[0]);
diff --git a/lib/regex_internal.c b/lib/regex_internal.c
index 977b15a..904b88e 100644
--- a/lib/regex_internal.c
+++ b/lib/regex_internal.c
@@ -1,6 +1,6 @@
/* Extended regular expression matching and search library.
- Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software
- Foundation, Inc.
+ Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
+ Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Isamu Hasegawa <address@hidden>.
@@ -236,7 +236,7 @@ build_wcs_buffer (re_string_t *pstr)
}
else
p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
- mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
+ mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
if (BE (mbclen == (size_t) -2, 0))
{
/* The buffer doesn't have enough space, finish to build. */
@@ -306,9 +306,9 @@ build_wcs_upper_buffer (re_string_t *pstr)
remain_len = end_idx - byte_idx;
prev_st = pstr->cur_state;
- mbclen = mbrtowc (&wc,
- ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
- + byte_idx), remain_len, &pstr->cur_state);
+ mbclen = __mbrtowc (&wc,
+ ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
+ + byte_idx), remain_len, &pstr->cur_state);
if (BE (mbclen < (size_t) -2, 1))
{
wchar_t wcu = wc;
@@ -376,7 +376,7 @@ build_wcs_upper_buffer (re_string_t *pstr)
}
else
p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
- mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
+ mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
if (BE (mbclen < (size_t) -2, 1))
{
wchar_t wcu = wc;
@@ -499,8 +499,8 @@ re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx,
wint_t *last_wc)
Idx remain_len;
remain_len = pstr->len - rawbuf_idx;
prev_st = pstr->cur_state;
- mbclen = mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
- remain_len, &pstr->cur_state);
+ mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
+ remain_len, &pstr->cur_state);
if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0,
0))
{
/* We treat these cases as a single byte character. */
@@ -745,8 +745,8 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int
eflags)
/* XXX Don't use mbrtowc, we know which conversion
to use (UTF-8 -> UCS4). */
memset (&cur_state, 0, sizeof (cur_state));
- mbclen = mbrtowc (&wc2, (const char *) p, mlen,
- &cur_state);
+ mbclen = __mbrtowc (&wc2, (const char *) p, mlen,
+ &cur_state);
if (raw + offset - p <= mbclen
&& mbclen < (size_t) -2)
{
diff --git a/lib/regex_internal.h b/lib/regex_internal.h
index 47b9e13..d3d58e8 100644
--- a/lib/regex_internal.h
+++ b/lib/regex_internal.h
@@ -1,5 +1,6 @@
/* Extended regular expression matching and search library.
- Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software
Foundation, Inc.
+ Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
+ Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Isamu Hasegawa <address@hidden>.
@@ -115,6 +116,7 @@
# define __iswctype iswctype
# define __btowc btowc
# define __wcrtomb wcrtomb
+# define __mbrtowc mbrtowc
# define __regfree regfree
# define attribute_hidden
#endif /* not _LIBC */
--
1.5.5
>From f9fb3bb5a348aa2381edbe4cbc7eecc3894a1f42 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <address@hidden>
Date: Fri, 9 Jan 2009 09:04:55 +0100
Subject: [PATCH] merge regex from glibc: fix glibc bug 697
2009-01-09 Paolo Bonzini <address@hidden>
* lib/regexec.c (prune_impossible_nodes): Handle sifted_states[0]
being NULL also if there are no backreferences.
---
ChangeLog | 6 ++++++
lib/regexec.c | 9 +++++++--
2 files changed, 13 insertions(+), 2 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 2059b03..d76e1e2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,4 +1,10 @@
2009-01-09 Paolo Bonzini <address@hidden>
+
+ regex: fix glibc bug 697
+ * lib/regexec.c (prune_impossible_nodes): Handle sifted_states[0]
+ being NULL also if there are no backreferences.
+
+2009-01-09 Paolo Bonzini <address@hidden>
regex: merge glibc changes
* lib/regcomp.c (re_compile_fastmap_iter): Use __mbrtowc.
diff --git a/lib/regexec.c b/lib/regexec.c
index 2afa5b3..21a8166 100644
--- a/lib/regexec.c
+++ b/lib/regexec.c
@@ -1,6 +1,6 @@
/* Extended regular expression matching and search library.
- Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software
Foundation,
- Inc.
+ Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
+ Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Isamu Hasegawa <address@hidden>.
@@ -1045,6 +1045,11 @@ prune_impossible_nodes (re_match_context_t *mctx)
re_node_set_free (&sctx.limits);
if (BE (ret != REG_NOERROR, 0))
goto free_return;
+ if (sifted_states[0] == NULL)
+ {
+ ret = REG_NOMATCH;
+ goto free_return;
+ }
}
re_free (mctx->state_log);
mctx->state_log = sifted_states;
--
1.5.5
>From fa02d58b87e0a4eb2cbf45adb8bf9fe576ae5339 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <address@hidden>
Date: Fri, 9 Jan 2009 09:10:36 +0100
Subject: [PATCH] merge regex from glibc: fix glibc bug 9697
2009-01-09 Paolo Bonzini <address@hidden>
* lib/regcomp.c (re_compile_fastmap_iter): Rewrite COMPLEX_BRACKET
handling.
---
ChangeLog | 6 ++++
lib/regcomp.c | 74 ++++++++++++++++++++++++++++++++++++--------------------
2 files changed, 53 insertions(+), 27 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index d76e1e2..f7483f4 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,4 +1,10 @@
2009-01-09 Paolo Bonzini <address@hidden>
+
+ regex: fix glibc bug 9697
+ * lib/regcomp.c (re_compile_fastmap_iter): Rewrite COMPLEX_BRACKET
+ handling.
+
+2009-01-09 Paolo Bonzini <address@hidden>
regex: fix glibc bug 697
* lib/regexec.c (prune_impossible_nodes): Handle sifted_states[0]
diff --git a/lib/regcomp.c b/lib/regcomp.c
index fc3cf98..6472ff6 100644
--- a/lib/regcomp.c
+++ b/lib/regcomp.c
@@ -357,45 +357,65 @@ re_compile_fastmap_iter (regex_t *bufp, const
re_dfastate_t *init_state,
#ifdef RE_ENABLE_I18N
else if (type == COMPLEX_BRACKET)
{
- Idx i;
re_charset_t *cset = dfa->nodes[node].opr.mbcset;
- if (cset->non_match || cset->ncoll_syms || cset->nequiv_classes
- || cset->nranges || cset->nchar_classes)
- {
+ Idx i;
+
# ifdef _LIBC
- if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0)
+ /* See if we have to try all bytes which start multiple collation
+ elements.
+ e.g. In da_DK, we want to catch 'a' since "aa" is a valid
+ collation element, and don't catch 'b' since 'b' is
+ the only collation element which starts from 'b' (and
+ it is caught by SIMPLE_BRACKET). */
+ if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0
+ && (cset->ncoll_syms || cset->nranges))
{
- /* In this case we want to catch the bytes which are
- the first byte of any collation elements.
- e.g. In da_DK, we want to catch 'a' since "aa"
- is a valid collation element, and don't catch
- 'b' since 'b' is the only collation element
- which starts from 'b'. */
const int32_t *table = (const int32_t *)
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
for (i = 0; i < SBC_MAX; ++i)
if (table[i] < 0)
re_set_fastmap (fastmap, icase, i);
}
-# else
- if (dfa->mb_cur_max > 1)
- for (i = 0; i < SBC_MAX; ++i)
- if (__btowc (i) == WEOF)
- re_set_fastmap (fastmap, icase, i);
-# endif /* not _LIBC */
+# endif /* _LIBC */
+
+ /* See if we have to start the match at all multibyte characters,
+ i.e. where we would not find an invalid sequence. This only
+ applies to multibyte character sets; for single byte character
+ sets, the SIMPLE_BRACKET again suffices. */
+ if (dfa->mb_cur_max > 1
+ && (cset->nchar_classes || cset->non_match
+# ifdef _LIBC
+ || cset->nequiv_classes
+# endif /* _LIBC */
+ ))
+ {
+ unsigned char c = 0;
+ do
+ {
+ mbstate_t mbs;
+ memset (&mbs, 0, sizeof (mbs));
+ if (__mbrtowc (NULL, (char *) &c, 1, &mbs) == (size_t) -2)
+ re_set_fastmap (fastmap, false, (int) c);
+ }
+ while (++c != 0);
}
- for (i = 0; i < cset->nmbchars; ++i)
+
+ else
{
- char buf[256];
- mbstate_t state;
- memset (&state, '\0', sizeof (state));
- if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)
- re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
- if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
+ /* ... Else catch all bytes which can start the mbchars. */
+ for (i = 0; i < cset->nmbchars; ++i)
{
- if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
- != (size_t) -1)
- re_set_fastmap (fastmap, false, *(unsigned char *) buf);
+ char buf[256];
+ mbstate_t state;
+ memset (&state, '\0', sizeof (state));
+ if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)
+ re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
+ if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
+ {
+ if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
+ != (size_t) -1)
+ re_set_fastmap (fastmap, false, *(unsigned char *) buf);
+ }
}
}
}
--
1.5.5
- merged regex with glibc,
Paolo Bonzini <=