From 14536ad80fee5c43471da21d4e5336b4317819ad Mon Sep 17 00:00:00 2001 From: Peter Bex Date: Tue, 14 Jul 2020 20:34:38 +0200 Subject: [PATCH] Update irregex to the 0.9.8 release (upstream commit ac27338) This fixes behaviour in irregex-replace/all with positive lookbehinds, which would only replace the first match. This also refactors the tests a bit to make them portable beyond CHICKEN, which is not really something we care about here but we copy them to make it easier to keep up with further upstream changes. Reported on chicken-users by Kay Rhodes (upstream issue #21) --- NEWS | 3 ++ irregex-core.scm | 14 ++++++- manual/Acknowledgements | 4 +- tests/test-irregex.scm | 81 +++++++++++++++++++++++++++++++---------- 4 files changed, 79 insertions(+), 23 deletions(-) diff --git a/NEWS b/NEWS index b0e79ed3..e5372080 100644 --- a/NEWS +++ b/NEWS @@ -4,6 +4,9 @@ - Fixed a bug where optimisations for `irregex-match?` would cause runtime errors due to the inlined specialisations not being fully-expanded (see #1690). + - Irregex has been updated to upstream 0.9.8, which fixes behaviour + of irregex-replace/all with positive lookbehind so all matches are + replaced instead of only the first (reported by Kay Rhodes). - current-milliseconds has been deprecated in favor of the name current-process-milliseconds, to avoid confusion due to naming of current-milliseconds versus current-seconds, which do something diff --git a/irregex-core.scm b/irregex-core.scm index ece802a2..42f2a806 100644 --- a/irregex-core.scm +++ b/irregex-core.scm @@ -1,6 +1,6 @@ ;;;; irregex.scm -- IrRegular Expressions ;; -;; Copyright (c) 2005-2019 Alex Shinn. All rights reserved. +;; Copyright (c) 2005-2020 Alex Shinn. All rights reserved. ;; BSD-style license: http://synthcode.com/license.txt ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -30,6 +30,7 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; History +;; 0.9.8: 2020/07/13 - fix irregex-replace/all with look-behind patterns ;; 0.9.7: 2019/12/31 - more intuitive handling of empty matches in -fold, ;; -replace and -split ;; 0.9.6: 2016/12/05 - fixed exponential memory use of + in compilation @@ -394,7 +395,16 @@ (lambda (x) (and (not (eq? x src)) ((chunker-get-next cnk) x))) (chunker-get-str cnk) (chunker-get-start cnk) - (lambda (x) (if (eq? x src) i ((chunker-get-end cnk) x))) + (lambda (x) + ;; TODO: this is a hack workaround for the fact that we don't + ;; have either a notion of chunk equivalence or chunk truncation, + ;; until which time (neg-)look-behind in a fold won't work on + ;; non-basic chunks. + (if (or (eq? x src) + (and (not ((chunker-get-next cnk) x)) + (not ((chunker-get-next cnk) src)))) + i + ((chunker-get-end cnk) x))) (chunker-get-substring cnk) (chunker-get-subchunk cnk))) diff --git a/manual/Acknowledgements b/manual/Acknowledgements index 97cf81eb..29c52909 100644 --- a/manual/Acknowledgements +++ b/manual/Acknowledgements @@ -39,8 +39,8 @@ Ian Oversby, "o.t.", Gene Pavlovsky, Levi Pearson, Jeronimo Pellegrini, Nicolas Pelletier, Derrell Piper, Carlos Pita, "Pluijzer", Robin Lee Powell, Alan Post, "Pupeno", Davide Puricelli, "presto", Doug Quale, Imran Rafique, Eric Raible, Ivan Raikov, Santosh Rajan, -Peder Refnes, Joel Reymont, "rivo", Chris Roberts, Eric Rochester, -Paul Romanchenko, +Peder Refnes, Joel Reymont, Kay Rhodes, "rivo", Chris Roberts, +Eric Rochester, Paul Romanchenko, Andreas Rottman, David Rush, Lars Rustemeier, Daniel Sadilek, Otavio Salvador, Burton Samograd, "Sandro", "satori", Aleksej Saushev, Oskar Schirmer, Vasilij Schneidermann, Reed Sheridan, Ronald Schröder, diff --git a/tests/test-irregex.scm b/tests/test-irregex.scm index 59268364..18582809 100644 --- a/tests/test-irregex.scm +++ b/tests/test-irregex.scm @@ -1,11 +1,48 @@ ;;;: test-irregex.scm -(import (only chicken.string string-split string-intersperse) +(import (only chicken.string string-split) + (rename (only chicken.string string-intersperse) (string-intersperse string-join)) ;; Avoid srfi-13 chicken.format chicken.io chicken.irregex chicken.port) (include "test.scm") +(define (cat . args) + (let ((out (open-output-string))) + (for-each (lambda (x) (display x out)) args) + (get-output-string out))) + +(define (warning . args) + (for-each (lambda (x) (display x (current-error-port))) args) + (newline (current-error-port))) + +(define (call-with-input-file file proc) + (let* ((in (open-input-file file)) + (res (proc in))) + (close-input-port in) + res)) + +(define (call-with-input-string str proc) + (let* ((in (open-input-string str)) + (res (proc in))) + (close-input-port in) + res)) + +(define (call-with-output-string proc) + (let ((out (open-output-string))) + (proc out) + (let ((res (get-output-string out))) + (close-output-port out) + res))) + +(define (port-for-each proc read . o) + (let ((in (if (pair? o) (car o) (current-input-port)))) + (let lp () + (let ((x (read in))) + (unless (eof-object? x) + (proc x) + (lp)))))) + (define (subst-matches matches subst) (define (submatch n) (if (irregex-match-data? matches) @@ -47,7 +84,7 @@ (if (list? splt) (apply (lambda (pattern input result subst output) - (let ((name (sprintf "~A ~A ~A ~A" pattern input result subst))) + (let ((name (cat pattern " " input " " result " " subst))) (cond ((equal? "c" result) (test-error name (matcher pattern input))) @@ -66,15 +103,16 @@ (for-each (lambda (opts) - (test-group (sprintf "irregex - ~S" opts) - (with-input-from-file "re-tests.txt" - (lambda () + (test-group (cat "irregex - " opts) + (call-with-input-file "re-tests.txt" + (lambda (in) (port-for-each (lambda (line) (test-re (lambda (pat str) (irregex-search (apply irregex pat opts) str)) line)) - read-line))))) + read-line + in))))) '((backtrack) (fast) )) @@ -97,7 +135,7 @@ (let lp ((src (cdr src1)) (res (list (substring (caar src1) i (caddar src1))))) (if (eq? src src2) - (string-intersperse + (string-join (reverse (cons (substring (caar src2) (cadar src2) j) res)) "") (lp (cdr src) @@ -153,10 +191,9 @@ (for-each (lambda (opts) - (test-group - (sprintf "irregex/chunked - ~S" opts) - (with-input-from-file "re-tests.txt" - (lambda () + (test-group (cat "irregex/chunked - " opts) + (call-with-input-file "re-tests.txt" + (lambda (in) (port-for-each (lambda (line) (let ((splt (string-split line "\t" #t))) @@ -164,7 +201,7 @@ (apply (lambda (pattern input result subst output) (let ((name - (sprintf "~A ~A ~A ~A" pattern input result subst))) + (cat pattern " " input " " result " " subst))) (cond ((equal? "c" result)) ((equal? "n" result) @@ -189,7 +226,8 @@ (make-shared-ropes input))))))) splt) (warning "invalid regex test line" line)))) - read-line))))) + read-line + in))))) '((backtrack) (fast) )) @@ -198,21 +236,23 @@ ;; pregexp '(test-group "pregexp" - (with-input-from-file "re-tests.txt" - (lambda () + (call-with-input-file "re-tests.txt" + (lambda (in) (port-for-each (lambda (line) (test-re pregexp-match line)) - read-line)))) + read-line + in)))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; default regex (PCRE) '(test-group "regex" - (with-input-from-file "re-tests.txt" - (lambda () + (call-with-input-file "re-tests.txt" + (lambda (in) (port-for-each (lambda (line) (test-re string-search line)) - read-line)))) + read-line + in)))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -395,6 +435,9 @@ (irregex-replace/all 'bol "Line 1\nLine 2" "*")) (test-equal "**p*l*a*t*t*e*r" (irregex-replace/all '(* "poo ") "poo poo platter" "*")) + (test-equal "x- y- z-" + (irregex-replace/all '(: (look-behind (or "x" "y" "z")) "a") + "xa ya za" "-")) (test-equal '("foo" " " "foo" " " "b" "a" "r" " " "foo") (irregex-extract '(or (: bow "foo" eow) any) "foo foo bar foo")) (test-equal '("f" "o" "o" "b" "a" "r" "b" "a" "z") -- 2.20.1