grep-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

grep branch, master, updated. v3.3-27-g449f1c5


From: Jim Meyering
Subject: grep branch, master, updated. v3.3-27-g449f1c5
Date: Sun, 17 Nov 2019 10:16:00 -0500 (EST)

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "grep".

The branch, master has been updated
       via  449f1c5805adba00ddd6edad30d96dbaeb8a91a3 (commit)
       via  cea97a849038754933dadce9db4ab9761b681c92 (commit)
      from  0172bf6825710b510b05c56136aee2d5f8d400e4 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.savannah.gnu.org/cgit/grep.git/commit/?id=449f1c5805adba00ddd6edad30d96dbaeb8a91a3


commit 449f1c5805adba00ddd6edad30d96dbaeb8a91a3
Author: Norihiro Tanaka <address@hidden>
Date:   Sun Nov 17 07:29:15 2019 +0900

    grep: improve grep -Fw performance in non-UTF8 multibyte locales
    
    * src/searchutils.c (mb_goback): New parameter.  All callers changed.
    * src/search.h (mb_goback): Update prototype.
    * src/kwsearch.c (Fexecute): Use mb_goback's MBCLEN to detect a
    word-boundary even more efficiently.

diff --git a/src/dfasearch.c b/src/dfasearch.c
index 3ebd25e..6c95d8c 100644
--- a/src/dfasearch.c
+++ b/src/dfasearch.c
@@ -279,7 +279,7 @@ EGexecute (void *vdc, char const *buf, size_t size, size_t 
*match_size,
                     goto success;
                   if (mb_start < beg)
                     mb_start = beg;
-                  if (mb_goback (&mb_start, match, buflim) == 0)
+                  if (mb_goback (&mb_start, NULL, match, buflim) == 0)
                     goto success;
                   /* The matched line starts in the middle of a multibyte
                      character.  Perform the DFA search starting from the
diff --git a/src/kwsearch.c b/src/kwsearch.c
index f590d19..f121816 100644
--- a/src/kwsearch.c
+++ b/src/kwsearch.c
@@ -161,6 +161,7 @@ Fexecute (void *vcp, char const *buf, size_t size, size_t 
*match_size,
   bool longest;
   struct kwsearch *kwsearch = vcp;
   kwset_t kwset = kwsearch->kwset;
+  size_t mbclen;
 
   if (match_lines)
     mb_check = longest = false;
@@ -194,7 +195,9 @@ Fexecute (void *vcp, char const *buf, size_t size, size_t 
*match_size,
           return EGexecute (kwsearch->re, buf, size, match_size, start_ptr);
         }
 
-      if (mb_check && mb_goback (&mb_start, beg + offset, buf + size) != 0)
+      mbclen = 0;
+      if (mb_check
+          && mb_goback (&mb_start, &mbclen, beg + offset, buf + size) != 0)
         {
           /* We have matched a single byte that is not at the beginning of a
              multibyte character.  mb_goback has advanced MB_START past that
@@ -225,22 +228,19 @@ Fexecute (void *vcp, char const *buf, size_t size, size_t 
*match_size,
 
       /* We need a preceding mb_start pointer.  Use the beginning of line
          if there is a preceding newline.  */
-      if (mb_check)
+      if (mbclen == 0)
         {
-           char const *nl = memrchr (buf, eol, beg - buf);
-           mb_start = nl ? nl + 1 : buf;
-        }
-      else
-        {
-           char const *nl = memrchr (mb_start, eol, beg - mb_start);
-           if (nl)
-             mb_start = nl + 1;
+          char const *nl = memrchr (mb_start, eol, beg - mb_start);
+          if (nl)
+            mb_start = nl + 1;
         }
 
       /* Succeed if neither the preceding nor the following character is a
          word constituent.  If the preceding is not, yet the following
          character IS a word constituent, keep trying with shorter matches.  */
-      if (! wordchar_prev (mb_start, beg, buf + size))
+      if (mbclen > 0
+          ? ! wordchar_next (beg - mbclen, buf + size)
+          : ! wordchar_prev (mb_start, beg, buf + size))
         for (;;)
           {
             if (! wordchar_next (beg + len, buf + size))
diff --git a/src/search.h b/src/search.h
index a782a0c..d6010b9 100644
--- a/src/search.h
+++ b/src/search.h
@@ -52,7 +52,8 @@ extern size_t wordchars_size (char const *, char const *) 
_GL_ATTRIBUTE_PURE;
 extern size_t wordchar_next (char const *, char const *) _GL_ATTRIBUTE_PURE;
 extern size_t wordchar_prev (char const *, char const *, char const *)
   _GL_ATTRIBUTE_PURE;
-extern ptrdiff_t mb_goback (char const **, char const *, char const *);
+extern ptrdiff_t mb_goback (char const **, size_t *, char const *,
+                            char const *);
 
 /* dfasearch.c */
 extern void *GEAcompile (char *, size_t, reg_syntax_t);
diff --git a/src/searchutils.c b/src/searchutils.c
index 9bb35fd..d6a36f1 100644
--- a/src/searchutils.c
+++ b/src/searchutils.c
@@ -75,18 +75,21 @@ kwsinit (bool mb_trans)
    back from CUR to the previous boundary, where a "boundary" is the
    start of a multibyte character or is an error-encoding byte.  The
    buffer ends at END (i.e., one past the address of the buffer's last
-   byte).  If CUR is already at a boundary, return 0.  If *MB_START is
-   greater than CUR, return the negative value CUR - *MB_START.
+   byte).  If CUR is already at a boundary, return 0.  If CUR is no
+   larger than *MB_START, return CUR - *MB_START without modifying
+   *MB_START or *MBCLEN.
 
    When returning zero, set *MB_START to CUR.  When returning a
-   positive value, set *MB_START to the next boundary after CUR, or to
-   END if there is no such boundary.  When returning a negative value,
-   leave *MB_START alone.  */
+   positive value, set *MB_START to the next boundary after CUR,
+   or to END if there is no such boundary, and set *MBCLEN to the
+   length of the preceding character.  */
 ptrdiff_t
-mb_goback (char const **mb_start, char const *cur, char const *end)
+mb_goback (char const **mb_start, size_t *mbclen, char const *cur,
+           char const *end)
 {
   const char *p = *mb_start;
   const char *p0 = p;
+  size_t clen;
 
   if (cur <= p)
     return cur - p;
@@ -94,13 +97,14 @@ mb_goback (char const **mb_start, char const *cur, char 
const *end)
   if (localeinfo.using_utf8)
     {
       p = cur;
+      clen = 1;
 
       if (cur < end && (*cur & 0xc0) == 0x80)
         for (int i = 1; i <= 3; i++)
           if ((cur[-i] & 0xc0) != 0x80)
             {
               mbstate_t mbs = { 0 };
-              size_t clen = mb_clen (cur - i, end - (cur - i), &mbs);
+              clen = mb_clen (cur - i, end - (cur - i), &mbs);
               if (i < clen && clen < (size_t) -2)
                 {
                   p0 = cur - i;
@@ -114,7 +118,7 @@ mb_goback (char const **mb_start, char const *cur, char 
const *end)
       mbstate_t mbs = { 0 };
       do
         {
-          size_t clen = mb_clen (p, end - p, &mbs);
+          clen = mb_clen (p, end - p, &mbs);
 
           if ((size_t) -2 <= clen)
             {
@@ -130,6 +134,8 @@ mb_goback (char const **mb_start, char const *cur, char 
const *end)
     }
 
   *mb_start = p;
+  if (mbclen)
+    *mbclen = clen;
   return p == cur ? 0 : cur - p0;
 }
 
@@ -192,6 +198,6 @@ wordchar_prev (char const *buf, char const *cur, char const 
*end)
       || (localeinfo.using_utf8 && localeinfo.sbclen[b] != -2))
     return sbwordchar[b];
   char const *p = buf;
-  cur -= mb_goback (&p, cur, end);
+  cur -= mb_goback (&p, NULL, cur, end);
   return wordchar_next (cur, end);
 }

http://git.savannah.gnu.org/cgit/grep.git/commit/?id=cea97a849038754933dadce9db4ab9761b681c92


commit 449f1c5805adba00ddd6edad30d96dbaeb8a91a3
Author: Norihiro Tanaka <address@hidden>
Date:   Sun Nov 17 07:29:15 2019 +0900

    grep: improve grep -Fw performance in non-UTF8 multibyte locales
    
    * src/searchutils.c (mb_goback): New parameter.  All callers changed.
    * src/search.h (mb_goback): Update prototype.
    * src/kwsearch.c (Fexecute): Use mb_goback's MBCLEN to detect a
    word-boundary even more efficiently.

diff --git a/src/dfasearch.c b/src/dfasearch.c
index 3ebd25e..6c95d8c 100644
--- a/src/dfasearch.c
+++ b/src/dfasearch.c
@@ -279,7 +279,7 @@ EGexecute (void *vdc, char const *buf, size_t size, size_t 
*match_size,
                     goto success;
                   if (mb_start < beg)
                     mb_start = beg;
-                  if (mb_goback (&mb_start, match, buflim) == 0)
+                  if (mb_goback (&mb_start, NULL, match, buflim) == 0)
                     goto success;
                   /* The matched line starts in the middle of a multibyte
                      character.  Perform the DFA search starting from the
diff --git a/src/kwsearch.c b/src/kwsearch.c
index f590d19..f121816 100644
--- a/src/kwsearch.c
+++ b/src/kwsearch.c
@@ -161,6 +161,7 @@ Fexecute (void *vcp, char const *buf, size_t size, size_t 
*match_size,
   bool longest;
   struct kwsearch *kwsearch = vcp;
   kwset_t kwset = kwsearch->kwset;
+  size_t mbclen;
 
   if (match_lines)
     mb_check = longest = false;
@@ -194,7 +195,9 @@ Fexecute (void *vcp, char const *buf, size_t size, size_t 
*match_size,
           return EGexecute (kwsearch->re, buf, size, match_size, start_ptr);
         }
 
-      if (mb_check && mb_goback (&mb_start, beg + offset, buf + size) != 0)
+      mbclen = 0;
+      if (mb_check
+          && mb_goback (&mb_start, &mbclen, beg + offset, buf + size) != 0)
         {
           /* We have matched a single byte that is not at the beginning of a
              multibyte character.  mb_goback has advanced MB_START past that
@@ -225,22 +228,19 @@ Fexecute (void *vcp, char const *buf, size_t size, size_t 
*match_size,
 
       /* We need a preceding mb_start pointer.  Use the beginning of line
          if there is a preceding newline.  */
-      if (mb_check)
+      if (mbclen == 0)
         {
-           char const *nl = memrchr (buf, eol, beg - buf);
-           mb_start = nl ? nl + 1 : buf;
-        }
-      else
-        {
-           char const *nl = memrchr (mb_start, eol, beg - mb_start);
-           if (nl)
-             mb_start = nl + 1;
+          char const *nl = memrchr (mb_start, eol, beg - mb_start);
+          if (nl)
+            mb_start = nl + 1;
         }
 
       /* Succeed if neither the preceding nor the following character is a
          word constituent.  If the preceding is not, yet the following
          character IS a word constituent, keep trying with shorter matches.  */
-      if (! wordchar_prev (mb_start, beg, buf + size))
+      if (mbclen > 0
+          ? ! wordchar_next (beg - mbclen, buf + size)
+          : ! wordchar_prev (mb_start, beg, buf + size))
         for (;;)
           {
             if (! wordchar_next (beg + len, buf + size))
diff --git a/src/search.h b/src/search.h
index a782a0c..d6010b9 100644
--- a/src/search.h
+++ b/src/search.h
@@ -52,7 +52,8 @@ extern size_t wordchars_size (char const *, char const *) 
_GL_ATTRIBUTE_PURE;
 extern size_t wordchar_next (char const *, char const *) _GL_ATTRIBUTE_PURE;
 extern size_t wordchar_prev (char const *, char const *, char const *)
   _GL_ATTRIBUTE_PURE;
-extern ptrdiff_t mb_goback (char const **, char const *, char const *);
+extern ptrdiff_t mb_goback (char const **, size_t *, char const *,
+                            char const *);
 
 /* dfasearch.c */
 extern void *GEAcompile (char *, size_t, reg_syntax_t);
diff --git a/src/searchutils.c b/src/searchutils.c
index 9bb35fd..d6a36f1 100644
--- a/src/searchutils.c
+++ b/src/searchutils.c
@@ -75,18 +75,21 @@ kwsinit (bool mb_trans)
    back from CUR to the previous boundary, where a "boundary" is the
    start of a multibyte character or is an error-encoding byte.  The
    buffer ends at END (i.e., one past the address of the buffer's last
-   byte).  If CUR is already at a boundary, return 0.  If *MB_START is
-   greater than CUR, return the negative value CUR - *MB_START.
+   byte).  If CUR is already at a boundary, return 0.  If CUR is no
+   larger than *MB_START, return CUR - *MB_START without modifying
+   *MB_START or *MBCLEN.
 
    When returning zero, set *MB_START to CUR.  When returning a
-   positive value, set *MB_START to the next boundary after CUR, or to
-   END if there is no such boundary.  When returning a negative value,
-   leave *MB_START alone.  */
+   positive value, set *MB_START to the next boundary after CUR,
+   or to END if there is no such boundary, and set *MBCLEN to the
+   length of the preceding character.  */
 ptrdiff_t
-mb_goback (char const **mb_start, char const *cur, char const *end)
+mb_goback (char const **mb_start, size_t *mbclen, char const *cur,
+           char const *end)
 {
   const char *p = *mb_start;
   const char *p0 = p;
+  size_t clen;
 
   if (cur <= p)
     return cur - p;
@@ -94,13 +97,14 @@ mb_goback (char const **mb_start, char const *cur, char 
const *end)
   if (localeinfo.using_utf8)
     {
       p = cur;
+      clen = 1;
 
       if (cur < end && (*cur & 0xc0) == 0x80)
         for (int i = 1; i <= 3; i++)
           if ((cur[-i] & 0xc0) != 0x80)
             {
               mbstate_t mbs = { 0 };
-              size_t clen = mb_clen (cur - i, end - (cur - i), &mbs);
+              clen = mb_clen (cur - i, end - (cur - i), &mbs);
               if (i < clen && clen < (size_t) -2)
                 {
                   p0 = cur - i;
@@ -114,7 +118,7 @@ mb_goback (char const **mb_start, char const *cur, char 
const *end)
       mbstate_t mbs = { 0 };
       do
         {
-          size_t clen = mb_clen (p, end - p, &mbs);
+          clen = mb_clen (p, end - p, &mbs);
 
           if ((size_t) -2 <= clen)
             {
@@ -130,6 +134,8 @@ mb_goback (char const **mb_start, char const *cur, char 
const *end)
     }
 
   *mb_start = p;
+  if (mbclen)
+    *mbclen = clen;
   return p == cur ? 0 : cur - p0;
 }
 
@@ -192,6 +198,6 @@ wordchar_prev (char const *buf, char const *cur, char const 
*end)
       || (localeinfo.using_utf8 && localeinfo.sbclen[b] != -2))
     return sbwordchar[b];
   char const *p = buf;
-  cur -= mb_goback (&p, cur, end);
+  cur -= mb_goback (&p, NULL, cur, end);
   return wordchar_next (cur, end);
 }

-----------------------------------------------------------------------

Summary of changes:
 src/dfasearch.c   |  2 +-
 src/kwsearch.c    | 19 ++++++++++++++-----
 src/search.h      |  3 ++-
 src/searchutils.c | 24 +++++++++++++++---------
 4 files changed, 32 insertions(+), 16 deletions(-)


hooks/post-receive
-- 
grep



reply via email to

[Prev in Thread] Current Thread [Next in Thread]