From 7606aed8e97666ef3100d9432f9c8575e2094ba6 Mon Sep 17 00:00:00 2001 From: Peter Bex Date: Mon, 5 Jul 2021 15:27:32 +0200 Subject: [PATCH] Update irregex to upstream b3116764 (fc1adacb) to fix issue with "or" When compiling an NFA from a SRE object containing an (or) which contains an empty sequence, the resulting state machine would be invalid, causing a crash when trying to convert it to a DFA. This was due a mismatch in internal bookkeeping between state numbers and the actual state transitions. --- irregex-core.scm | 2 +- tests/test-irregex.scm | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/irregex-core.scm b/irregex-core.scm index a8e7c97f..f86b7992 100644 --- a/irregex-core.scm +++ b/irregex-core.scm @@ -2563,7 +2563,7 @@ flags next)))) (and a - (let ((c (add-state! (new-state-number a) + (let ((c (add-state! (new-state-number (max a b)) '()))) (nfa-add-epsilon! buf c a #f) (nfa-add-epsilon! buf c b #f) diff --git a/tests/test-irregex.scm b/tests/test-irregex.scm index f1aefc21..5cf5b685 100644 --- a/tests/test-irregex.scm +++ b/tests/test-irregex.scm @@ -567,6 +567,15 @@ ;; irregex-flags, irregex-lengths ) +(test-group "SRE representation edge cases" + ;; NFA compilation skipped alternative after empty sequence (#26, found by John Clements) + (test-equal "empty sequence in \"or\"" + "" + (irregex-match-substring (irregex-search `(or (seq) "foo") ""))) + (test-equal "alternative to empty sequence in \"or\"" + "foo" + (irregex-match-substring (irregex-search `(or (seq) "foo") "foo")))) + (test-end) -- 2.20.1