bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v2] regex: fix backreference matching


From: Egor Ignatov
Subject: [PATCH v2] regex: fix backreference matching
Date: Fri, 9 Jul 2021 15:36:43 +0300

* lib/regexec.c
(proceed_next_node): Disable dest_node check if we have backrefs

(set_regs):Finish set_regs when we are at the last node and all
regs have been set.

(set_regs):
Also shrink the match if we ready to finish but didn't accept the entire
string matched by check_matching.  Because check_matching may return
a wrong match for regexp with back-references. For example
check_matching regex '(a*)*(.)\1' and string 'ab' results in the
match 'ab' where it should be just 'a' in the second capturing group.

All built in tests as well as test from sed and grep have passed.

Signed-off-by: Egor Ignatov <egori@altlinux.org>
---
 lib/regexec.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/lib/regexec.c b/lib/regexec.c
index 5e4eb497a..8f0f14575 100644
--- a/lib/regexec.c
+++ b/lib/regexec.c
@@ -1233,7 +1233,7 @@ proceed_next_node (const re_match_context_t *mctx, Idx 
nregs, regmatch_t *regs,
       for (Idx i = 0; i < edests->nelem; i++)
        {
          Idx candidate = edests->elems[i];
-         if (!re_node_set_contains (cur_nodes, candidate))
+         if (!dfa->nbackref && !re_node_set_contains (cur_nodes, candidate))
            continue;
           if (dest_node == -1)
            dest_node = candidate;
@@ -1296,9 +1296,7 @@ proceed_next_node (const re_match_context_t *mctx, Idx 
nregs, regmatch_t *regs,
              if (__glibc_unlikely (! ok))
                return -2;
              dest_node = dfa->edests[node].elems[0];
-             if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
-                                       dest_node))
-               return dest_node;
+             return dest_node;
            }
        }
 
@@ -1308,8 +1306,9 @@ proceed_next_node (const re_match_context_t *mctx, Idx 
nregs, regmatch_t *regs,
          Idx dest_node = dfa->nexts[node];
          *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
          if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
-                    || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
-                                              dest_node)))
+                    || (!dfa->nbackref &&
+                        !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
+                                               dest_node))))
            return -1;
          re_node_set_empty (eps_via_nodes);
          return dest_node;
@@ -1417,8 +1416,7 @@ set_regs (const regex_t *preg, const re_match_context_t 
*mctx, size_t nmatch,
     {
       update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
 
-      if ((idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
-         || (fs && re_node_set_contains (&eps_via_nodes, cur_node)))
+      if (cur_node == mctx->last_node)
        {
          Idx reg_idx;
          cur_node = -1;
@@ -1434,6 +1432,7 @@ set_regs (const regex_t *preg, const re_match_context_t 
*mctx, size_t nmatch,
            }
          if (cur_node < 0)
            {
+             pmatch[0].rm_eo = idx;
              re_node_set_free (&eps_via_nodes);
              regmatch_list_free (&prev_match);
              return free_fail_stack_return (fs);
-- 
2.29.3




reply via email to

[Prev in Thread] Current Thread [Next in Thread]