[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH 4/4] dfa: optimize wide characters in a bracket expression
From: |
Jim Meyering |
Subject: |
Re: [PATCH 4/4] dfa: optimize wide characters in a bracket expression |
Date: |
Tue, 07 Jun 2011 13:26:29 +0200 |
Paolo Bonzini wrote:
> * src/dfa.c (addtok): Compile characters to an alternation. Handle the
> case when nothing else remains in the MBCSET.
...
Very nice.
This deserves a NEWS entry.
ACK, even without that ;-)
> diff --git a/src/dfa.c b/src/dfa.c
> index 8fc6ed0..aecaad9 100644
> --- a/src/dfa.c
> +++ b/src/dfa.c
> @@ -1449,6 +1449,8 @@ addtok_mb (token t, int mbprop)
> dfa->depth = depth;
> }
>
> +static void addtok_wc (wint_t wc);
> +
> /* Add the given token to the parse tree, maintaining the depth count and
> updating the maximum depth if necessary. */
> static void
> @@ -1457,8 +1459,24 @@ addtok (token t)
> #if MBS_SUPPORT
> if (MB_CUR_MAX > 1 && t == MBCSET)
> {
> + bool need_or = false;
> struct mb_char_classes *work_mbc = &dfa->mbcsets[dfa->nmbcsets - 1];
>
> + /* Extract wide characters into alternations if possible (for
> + better performance). This does not require UTF-8. */
> + if (!work_mbc->invert)
> + {
> + int i;
> + for (i = 0; i < work_mbc->nchars; i++)
> + {
> + addtok_wc (work_mbc->chars[i]);
> + if (need_or)
> + addtok (OR);
> + need_or = true;
> + }
> + work_mbc->nchars = 0;
> + }
> +
> /* UTF-8 allows treating a simple, non-inverted MBCSET like a CSET. */
> if (work_mbc->invert
> || (!using_utf8() && work_mbc->cset != -1)
> @@ -1467,9 +1485,23 @@ addtok (token t)
> || work_mbc->nranges != 0
> || work_mbc->nequivs != 0
> || work_mbc->ncoll_elems != 0)
> - addtok_mb (MBCSET, ((dfa->nmbcsets - 1) << 2) + 3);
> + {
> + addtok_mb (MBCSET, ((dfa->nmbcsets - 1) << 2) + 3);
> + if (need_or)
> + addtok (OR);
> + }
> else
> - addtok (CSET + work_mbc->cset);
> + {
> + /* Characters have been handled above, so it is possible
> + that the mbcset is empty now. Do nothing in that case. */
> + if (work_mbc->cset != -1)
> + {
> + assert (using_utf8 ());
> + addtok (CSET + work_mbc->cset);
> + if (need_or)
> + addtok (OR);
> + }
> + }
> }
> else
> #endif