From 4b2e0641496b6421aee8e81db3b8f62f798957cd Mon Sep 17 00:00:00 2001 From: Norihiro Tanaka Date: Mon, 10 Aug 2015 21:46:50 +0900 Subject: [PATCH 2/3] dfa: not distingish letter and not letter in non-POSIX locales For non-POSIX locales, dfa is not support word delimiter support, so remove distinction between letter and not letter. * dfa.c (struct dfa) [initstate_letter, initstate_others, mb_match_lens]: Remove members and all uses. (struct dfa) [initstate_notbol]: New member. (dfaanalyze, dfaexec_main): Replace old members with new member. (wchar_context): Remove function. Update callers. --- src/dfa.c | 47 ++++++++++++++++++----------------------------- 1 file changed, 18 insertions(+), 29 deletions(-) diff --git a/src/dfa.c b/src/dfa.c index aadc03e..f8c42fc 100644 --- a/src/dfa.c +++ b/src/dfa.c @@ -411,9 +411,11 @@ struct dfa newline is stored separately and handled as a special case. Newline is also used as a sentinel at the end of the buffer. */ - state_num initstate_letter; /* Initial state for letter context. */ - state_num initstate_others; /* Initial state for other contexts. */ - position_set mb_follows; /* Follow set added by ANYCHAR and/or MBCSET + state_num initstate_notbol; /* Initial state for CTX_LETTER and CTX_NONE + context in multibyte locales, in which we + do not distinguish between their contexts, + as not supported word. */ + position_set mb_follows; /* Follow set added by ANYCHAR and/or MBCSET on demand. */ }; @@ -691,16 +693,6 @@ char_context (unsigned char c) return CTX_NONE; } -static int -wchar_context (wint_t wc) -{ - if (wc == (wchar_t) eolbyte || wc == 0) - return CTX_NEWLINE; - if (wc == L'_' || iswalnum (wc)) - return CTX_LETTER; - return CTX_NONE; -} - /* Entry point to set syntax options. */ void dfasyntax (reg_syntax_t bits, int fold, unsigned char eol) @@ -2494,13 +2486,10 @@ dfaanalyze (struct dfa *d, int searchflag) separate_contexts = state_separate_contexts (&merged); if (separate_contexts & CTX_NEWLINE) state_index (d, &merged, CTX_NEWLINE); - d->initstate_others = d->min_trcount + d->initstate_notbol = d->min_trcount = state_index (d, &merged, separate_contexts ^ CTX_ANY); if (separate_contexts & CTX_LETTER) - d->initstate_letter = d->min_trcount - = state_index (d, &merged, CTX_LETTER); - else - d->initstate_letter = d->initstate_others; + d->min_trcount = state_index (d, &merged, CTX_LETTER); d->min_trcount++; free (posalloc); @@ -2983,11 +2972,12 @@ transit_state (struct dfa *d, state_num s, unsigned char const **pp, int context; size_t i, j; int k; + int separate_contexts; /* Note: caller must free the return value of this function. */ mbclen = mbs_to_wchar (&wc, (char const *) *pp, end - *pp, d); - context = wchar_context (wc); + context = (wc == (wchar_t) eolbyte || wc == 0) ? CTX_NEWLINE : CTX_NONE; /* This state has some operators which can match a multibyte character. */ d->mb_follows.nelem = 0; @@ -3015,7 +3005,11 @@ transit_state (struct dfa *d, state_num s, unsigned char const **pp, &d->mb_follows); } - s1 = state_index (d, &d->mb_follows, wchar_context (wc)); + separate_contexts = state_separate_contexts (&d->mb_follows); + if (context == CTX_NEWLINE && separate_contexts & CTX_NEWLINE) + s1 = state_index (d, &d->mb_follows, CTX_NEWLINE); + else + s1 = state_index (d, &d->mb_follows, separate_contexts ^ CTX_ANY); realloc_trans_if_necessary (d, s1); return s1; @@ -3130,16 +3124,11 @@ dfaexec_main (struct dfa *d, char const *begin, char *end, int allow_nl, transit to another initial state after skip. */ if (p < mbp) { - int context = wchar_context (wc); - if (context == CTX_LETTER) - s = d->initstate_letter; - else - /* It's CTX_NONE. CTX_NEWLINE cannot happen, - as we assume that a newline is always a - single byte character. */ - s = d->initstate_others; + /* It's CTX_LETTER or CTX_NONE. CTX_NEWLINE + cannot happen, as we assume that a newline + is always a single byte character. */ + s1 = s = d->initstate_notbol; p = mbp; - s1 = s; } } } -- 2.4.6