bug-grep
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH 4/4] dfa: optimize wide characters in a bracket expression


From: Jim Meyering
Subject: Re: [PATCH 4/4] dfa: optimize wide characters in a bracket expression
Date: Tue, 07 Jun 2011 13:26:29 +0200

Paolo Bonzini wrote:
> * src/dfa.c (addtok): Compile characters to an alternation.  Handle the
> case when nothing else remains in the MBCSET.
...
Very nice.
This deserves a NEWS entry.
ACK, even without that ;-)

> diff --git a/src/dfa.c b/src/dfa.c
> index 8fc6ed0..aecaad9 100644
> --- a/src/dfa.c
> +++ b/src/dfa.c
> @@ -1449,6 +1449,8 @@ addtok_mb (token t, int mbprop)
>      dfa->depth = depth;
>  }
>
> +static void addtok_wc (wint_t wc);
> +
>  /* Add the given token to the parse tree, maintaining the depth count and
>     updating the maximum depth if necessary. */
>  static void
> @@ -1457,8 +1459,24 @@ addtok (token t)
>  #if MBS_SUPPORT
>    if (MB_CUR_MAX > 1 && t == MBCSET)
>      {
> +      bool need_or = false;
>        struct mb_char_classes *work_mbc = &dfa->mbcsets[dfa->nmbcsets - 1];
>
> +      /* Extract wide characters into alternations if possible (for
> +         better performance).  This does not require UTF-8.  */
> +      if (!work_mbc->invert)
> +        {
> +          int i;
> +          for (i = 0; i < work_mbc->nchars; i++)
> +            {
> +              addtok_wc (work_mbc->chars[i]);
> +              if (need_or)
> +                addtok (OR);
> +              need_or = true;
> +            }
> +          work_mbc->nchars = 0;
> +        }
> +
>        /* UTF-8 allows treating a simple, non-inverted MBCSET like a CSET.  */
>        if (work_mbc->invert
>            || (!using_utf8() && work_mbc->cset != -1)
> @@ -1467,9 +1485,23 @@ addtok (token t)
>            || work_mbc->nranges != 0
>            || work_mbc->nequivs != 0
>            || work_mbc->ncoll_elems != 0)
> -        addtok_mb (MBCSET, ((dfa->nmbcsets - 1) << 2) + 3);
> +        {
> +          addtok_mb (MBCSET, ((dfa->nmbcsets - 1) << 2) + 3);
> +          if (need_or)
> +            addtok (OR);
> +        }
>        else
> -        addtok (CSET + work_mbc->cset);
> +        {
> +          /* Characters have been handled above, so it is possible
> +             that the mbcset is empty now.  Do nothing in that case.  */
> +          if (work_mbc->cset != -1)
> +            {
> +              assert (using_utf8 ());
> +              addtok (CSET + work_mbc->cset);
> +              if (need_or)
> +                addtok (OR);
> +            }
> +        }
>      }
>    else
>  #endif



reply via email to

[Prev in Thread] Current Thread [Next in Thread]