--- /usr/local/src/Gnu/grep-2.5.3/src/dfa.h 2007-06-28 21:57:19.000000000 +0300 +++ dfa.h 2007-09-03 06:30:12.000000000 +0300 @@ -364,9 +364,4 @@ on a state that potentially could do so. */ int *success; /* Table of acceptance conditions used in dfaexec and computed in build_state. */ struct dfamust *musts; /* List of strings, at least one of which is known to appear in any r.e. matching the dfa. */ +#ifdef GAWK + int broken; /* True if using a feature where there + are bugs and gawk should use regex. */ +#endif }; /* Some macros for user access to dfa internals. */ --- /usr/local/src/Gnu/grep-2.5.3/src/dfa.c 2007-06-28 21:57:19.000000000 +0300 +++ dfa.c 2007-09-03 06:30:12.000000000 +0300 @@ -95,24 +96,13 @@ host does not conform to Posix. */ #define ISASCIIDIGIT(c) ((unsigned) (c) - '0' <= 9) -/* Don't use gettext if ENABLE_NLS is not defined */ -/* If we (don't) have I18N. */ -/* glibc defines _ */ -#ifdef ENABLE_NLS -# ifndef _ -# ifdef HAVE_LIBINTL_H -# include -# ifndef _ -# define _(Str) gettext (Str) -# endif -# endif -# endif -#endif -#ifndef _ -# define _(Str) (Str) -#endif +/* gettext.h ensures that we don't use gettext if ENABLE_NLS is not defined */ +#include "gettext.h" +#define _(str) gettext (str) +#ifndef NO_MBSUPPORT #include "mbsupport.h" /* defines MBS_SUPPORT if appropriate */ +#endif #ifdef MBS_SUPPORT /* We can handle multibyte strings. */ # include @@ -595,6 +585,9 @@ { wctype_t wt; /* Query the character class as wctype_t. */ + if (case_fold && (strcmp(str, "upper") == 0 || strcmp(str, "lower") == 0)) + strcpy(str, "alpha"); + wt = wctype (str); if (ch_classes_al == 0) @@ -681,6 +674,28 @@ REALLOC_IF_NECESSARY(work_mbc->range_ends, wchar_t, range_ends_al, work_mbc->nranges + 1); work_mbc->range_ends[work_mbc->nranges++] = (wchar_t)wc2; + if (case_fold && (iswlower((wint_t)wc) || iswupper((wint_t)wc)) + && (iswlower((wint_t)wc2) || iswupper((wint_t)wc2))) + { + wint_t altcase; + altcase = wc; + if (iswlower((wint_t)wc)) + altcase = towupper((wint_t)wc); + else + altcase = towlower((wint_t)wc); + REALLOC_IF_NECESSARY(work_mbc->range_sts, wchar_t, + range_sts_al, work_mbc->nranges + 1); + work_mbc->range_sts[work_mbc->nranges] = (wchar_t)altcase; + + altcase = wc2; + if (iswlower((wint_t)wc2)) + altcase = towupper((wint_t)wc2); + else + altcase = towlower((wint_t)wc2); + REALLOC_IF_NECESSARY(work_mbc->range_ends, wchar_t, + range_ends_al, work_mbc->nranges + 1); + work_mbc->range_ends[work_mbc->nranges++] = (wchar_t)altcase; + } } else if (wc != WEOF) /* build normal characters. */ @@ -688,6 +703,13 @@ REALLOC_IF_NECESSARY(work_mbc->chars, wchar_t, chars_al, work_mbc->nchars + 1); work_mbc->chars[work_mbc->nchars++] = (wchar_t)wc; + if (case_fold && (iswlower(wc) || iswupper(wc))) + { + REALLOC_IF_NECESSARY(work_mbc->chars, wchar_t, chars_al, + work_mbc->nchars + 1); + work_mbc->chars[work_mbc->nchars++] = + (wchar_t) (iswlower(wc) ? towupper(wc) : towlower(wc)); + } } } while ((wc = wc1) != L']'); @@ -962,6 +984,9 @@ if (c != '}') dfaerror(_("malformed repeat count")); laststart = 0; +#ifdef GAWK + dfa->broken = (minrep == maxrep && minrep == 0); +#endif return lasttok = REPMN; case '|': @@ -1017,6 +1042,21 @@ laststart = 0; return lasttok = CSET + charclass_index(ccl); +#ifndef GAWK + case 's': + case 'S': + if (!backslash || (syntax_bits & RE_NO_GNU_OPS)) + goto normal_char; + zeroset(ccl); + for (c2 = 0; c2 < NOTCHAR; ++c2) + if (ISSPACE(c2)) + setbit(c2, ccl); + if (c == 'S') + notset(ccl); + laststart = 0; + return lasttok = CSET + charclass_index(ccl); +#endif + case 'w': case 'W': if (!backslash || (syntax_bits & RE_NO_GNU_OPS)) @@ -1338,7 +1378,14 @@ int i; for (i = 0; i < ntokens; ++i) - addtok(dfa->tokens[tindex + i]); + { + addtok(dfa->tokens[tindex + i]); +#ifdef MBS_SUPPORT + /* Update index into multibyte csets. */ + if (MB_CUR_MAX > 1 && dfa->tokens[tindex + i] == MBCSET) + dfa->multibyte_prop[dfa->tindex - 1] = dfa->multibyte_prop[tindex + i]; +#endif + } } static void @@ -1567,8 +1614,8 @@ d->states[i].constraint = 0; d->states[i].first_end = 0; #ifdef MBS_SUPPORT - if (MB_CUR_MAX > 1) - d->states[i].mbps.nelem = 0; + d->states[i].mbps.nelem = 0; + d->states[i].mbps.elems = NULL; #endif for (j = 0; j < s->nelem; ++j) if (d->tokens[s->elems[j].index] < 0) @@ -2335,6 +2382,7 @@ d->trans = d->realtrans + 1; REALLOC(d->fails, int *, d->tralloc); REALLOC(d->success, int, d->tralloc); + REALLOC(d->newlines, int, d->tralloc); while (oldalloc < d->tralloc) { d->trans[oldalloc] = NULL; @@ -2992,13 +3063,19 @@ d->tralloc = 0; d->musts = 0; + d->realtrans = 0; + d->fails = 0; + d->success = 0; +#ifdef GAWK + d->broken = 0; +#endif } /* Parse and analyze a single string of the given length. */ void dfacomp (char const *s, size_t len, struct dfa *d, int searchflag) { - if (case_fold) /* dummy folding in service of dfamust() */ + if (case_fold && len) /* dummy folding in service of dfamust() */ { char *lcopy; int i; @@ -3074,8 +3152,13 @@ } #endif /* MBS_SUPPORT */ - for (i = 0; i < d->sindex; ++i) + for (i = 0; i < d->sindex; ++i) { free((ptr_t) d->states[i].elems.elems); +#ifdef MBS_SUPPORT + if (d->states[i].mbps.nelem > 0) + free((ptr_t) d->states[i].mbps.elems); +#endif /* MBS_SUPPORT */ + } free((ptr_t) d->states); for (i = 0; i < d->tindex; ++i) if (d->follows[i].elems)