[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[SCM] gawk branch, gawk-5.1-stable, updated. gawk-4.1.0-4277-gbcc0594
From: |
Arnold Robbins |
Subject: |
[SCM] gawk branch, gawk-5.1-stable, updated. gawk-4.1.0-4277-gbcc0594 |
Date: |
Fri, 13 Aug 2021 17:03:55 -0400 (EDT) |
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gawk".
The branch, gawk-5.1-stable has been updated
via bcc0594e9b64c89b56e8ea6891c0a9f8b97c57d1 (commit)
from 492c24d65f760edea1f9228260930728eb747cf7 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://git.sv.gnu.org/cgit/gawk.git/commit/?id=bcc0594e9b64c89b56e8ea6891c0a9f8b97c57d1
commit bcc0594e9b64c89b56e8ea6891c0a9f8b97c57d1
Author: Arnold D. Robbins <arnold@skeeve.com>
Date: Fri Aug 13 17:03:19 2021 -0400
Rationalize strong regex as param to sub/gsub. Add tests.
diff --git a/ChangeLog b/ChangeLog
index c598dac..4e82bff 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2021-08-13 Arnold D. Robbins <arnold@skeeve.com>
+
+ * builtin.c (do_sub): Rationalize handling of strongly typed
+ regex as argument to sub/gsub, as well as rationalize the return
+ value from gensub to always be string. Thanks to John Naman
+ <jnaman2@gmail.com> for the bug report.
+
2021-08-05 Andrew J. Schorr <aschorr@telemetry-investments.com>
* mpfr.c (do_mpfr_func): New argument, warn_negative. If true,
diff --git a/builtin.c b/builtin.c
index 454034f..e1ba5eb 100644
--- a/builtin.c
+++ b/builtin.c
@@ -2934,8 +2934,6 @@ do_sub(int nargs, unsigned int flags)
RESTART(rp, target->stptr) > target->stlen)
goto done;
- target->flags |= STRING;
-
text = target->stptr;
textlen = target->stlen;
@@ -3183,6 +3181,10 @@ done:
DEREF(target);
assert(buf != NULL);
return make_str_node(buf, textlen, ALREADY_MALLOCED);
+ } else if ((target->flags & STRING) == 0) {
+ /* return a copy of original string */
+ DEREF(target);
+ return make_str_node(target->stptr, target->stlen, 0);
}
/* return the original string */
@@ -3193,8 +3195,34 @@ done:
if ((flags & LITERAL) != 0)
DEREF(target);
else if (matches > 0) {
- unref(*lhs);
- *lhs = make_str_node(buf, textlen, ALREADY_MALLOCED);
+ /*
+ * 8/2021: There's a bit of a song and dance here. If someone
does
+ *
+ * x = @/abc/
+ * sub(/b/, "x", x)
+ *
+ * What should the type of x be after the call? Does it get
converted
+ * to string? Or does it remain a regexp? We've decided to let
it
+ * remain a regexp. In that case, we have to update the compiled
+ * regular expression that it holds.
+ */
+ bool is_regex = false;
+ NODE *target = *lhs;
+
+ if ((target->flags & REGEX) != 0) {
+ is_regex = true;
+
+ // free old regex registers
+ refree(target->typed_re->re_reg[0]);
+ if (target->typed_re->re_reg[1] != NULL)
+ refree(target->typed_re->re_reg[1]);
+ freenode(target->typed_re);
+ }
+ unref(*lhs); // nuke original value
+ if (is_regex)
+ *lhs = make_typed_regex(buf, textlen);
+ else
+ *lhs = make_str_node(buf, textlen, ALREADY_MALLOCED);
}
return make_number((AWKNUM) matches);
diff --git a/pc/Makefile.tst b/pc/Makefile.tst
index 4b99204..b9572ba 100644
--- a/pc/Makefile.tst
+++ b/pc/Makefile.tst
@@ -216,7 +216,7 @@ GAWK_EXT_TESTS = \
procinfs profile0 profile1 profile2 profile3 profile4 profile5 profile6
\
profile7 profile8 profile9 profile10 profile11 profile12 profile13 \
profile14 profile15 pty1 pty2 \
- rebuf regnul1 regnul2 regx8bit reginttrad reint reint2 rsgetline
rsglstdin \
+ rebuf regexsub regnul1 regnul2 regx8bit reginttrad reint reint2
rsgetline rsglstdin \
rsstart1 rsstart2 rsstart3 rstest6 \
sandbox1 shadow shadowbuiltin sortfor sortfor2 sortu \
sourcesplit split_after_fpat \
@@ -3152,6 +3152,11 @@ profile15:
@AWKPATH="$(srcdir)" $(AWK) -f $@.awk --pretty-print=_$@ >_$@ 2>&1 ||
echo EXIT CODE: $$? >>_$@
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+regexsub:
+ @echo $@
+ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$?
>>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
regnul1:
@echo $@
@AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$?
>>_$@
diff --git a/test/ChangeLog b/test/ChangeLog
index c92f018..c3dcd55 100644
--- a/test/ChangeLog
+++ b/test/ChangeLog
@@ -1,3 +1,8 @@
+2021-08-13 Arnold D. Robbins <arnold@skeeve.com>
+
+ * Makefile.am (EXTRA_DIST): regexsub, new test.
+ * regexsub.awk, regexsub.ok: New files.
+
2021-05-15 Eli Zaretskii <eliz@gnu.org>
* iolint.ok: Reorder results to follow the order of iolint.awk.
diff --git a/test/Makefile.am b/test/Makefile.am
index 3f9e930..7ee2381 100644
--- a/test/Makefile.am
+++ b/test/Makefile.am
@@ -1057,6 +1057,8 @@ EXTRA_DIST = \
regexpbrack2.ok \
regexprange.awk \
regexprange.ok \
+ regexsub.awk \
+ regexsub.ok \
reginttrad.awk \
reginttrad.ok \
regnul1.awk \
@@ -1456,7 +1458,7 @@ GAWK_EXT_TESTS = \
procinfs profile0 profile1 profile2 profile3 profile4 profile5 profile6
\
profile7 profile8 profile9 profile10 profile11 profile12 profile13 \
profile14 profile15 pty1 pty2 \
- rebuf regnul1 regnul2 regx8bit reginttrad reint reint2 rsgetline
rsglstdin \
+ rebuf regexsub regnul1 regnul2 regx8bit reginttrad reint reint2
rsgetline rsglstdin \
rsstart1 rsstart2 rsstart3 rstest6 \
sandbox1 shadow shadowbuiltin sortfor sortfor2 sortu \
sourcesplit split_after_fpat \
diff --git a/test/Makefile.in b/test/Makefile.in
index e73a950..79ca9a3 100644
--- a/test/Makefile.in
+++ b/test/Makefile.in
@@ -1320,6 +1320,8 @@ EXTRA_DIST = \
regexpbrack2.ok \
regexprange.awk \
regexprange.ok \
+ regexsub.awk \
+ regexsub.ok \
reginttrad.awk \
reginttrad.ok \
regnul1.awk \
@@ -1719,7 +1721,7 @@ GAWK_EXT_TESTS = \
procinfs profile0 profile1 profile2 profile3 profile4 profile5 profile6
\
profile7 profile8 profile9 profile10 profile11 profile12 profile13 \
profile14 profile15 pty1 pty2 \
- rebuf regnul1 regnul2 regx8bit reginttrad reint reint2 rsgetline
rsglstdin \
+ rebuf regexsub regnul1 regnul2 regx8bit reginttrad reint reint2
rsgetline rsglstdin \
rsstart1 rsstart2 rsstart3 rstest6 \
sandbox1 shadow shadowbuiltin sortfor sortfor2 sortu \
sourcesplit split_after_fpat \
@@ -4814,6 +4816,11 @@ profile15:
@AWKPATH="$(srcdir)" $(AWK) -f $@.awk --pretty-print=_$@ >_$@ 2>&1 ||
echo EXIT CODE: $$? >>_$@
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+regexsub:
+ @echo $@
+ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$?
>>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
regnul1:
@echo $@
@AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$?
>>_$@
diff --git a/test/Maketests b/test/Maketests
index a36ac8c..12cc164 100644
--- a/test/Maketests
+++ b/test/Maketests
@@ -1880,6 +1880,11 @@ profile15:
@AWKPATH="$(srcdir)" $(AWK) -f $@.awk --pretty-print=_$@ >_$@ 2>&1 ||
echo EXIT CODE: $$? >>_$@
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+regexsub:
+ @echo $@
+ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$?
>>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
regnul1:
@echo $@
@AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$?
>>_$@
diff --git a/test/regexsub.awk b/test/regexsub.awk
new file mode 100644
index 0000000..92dede7
--- /dev/null
+++ b/test/regexsub.awk
@@ -0,0 +1,48 @@
+BEGIN {
+ print "Initialize strong regex"
+ rgx2 = rgx1 = @/[abc]/
+ print "Test gsub on strong regex"
+ printf("rgx%d = '%s'\ttypeof(rgx%d) = '%s'\n", 1, rgx1, 1, typeof(rgx1))
+ printf("rgx%d = '%s'\ttypeof(rgx%d) = '%s'\n", 2, rgx2, 2, typeof(rgx2))
+ print "Test gsub() a strong regex"
+ gsub(/b/, "e", rgx2)
+ printf("rgx%d = '%s'\ttypeof(rgx%d) = '%s'\n", 1, rgx1, 1, typeof(rgx1))
+ printf("rgx%d = '%s'\ttypeof(rgx%d) = '%s'\n", 2, rgx2, 2, typeof(rgx2))
+
+ print "Test value not found in regex"
+ gsub(/x/, "y", rgx1) # should not change
+ printf("rgx%d = '%s'\ttypeof(rgx%d) = '%s'\n", 1, rgx1, 1, typeof(rgx1))
+
+ print "Test gsub on numbers"
+ v2 = v1 = 12345
+ printf("v%d = '%s'\ttypeof(v%d) = '%s'\n", 1, v1, 1, typeof(v1))
+ printf("v%d = '%s'\ttypeof(v%d) = '%s'\n", 2, v2, 2, typeof(v2))
+ gsub(/3/, "x", v2)
+ printf("v%d = '%s'\ttypeof(v%d) = '%s'\n", 1, v1, 1, typeof(v1))
+ printf("v%d = '%s'\ttypeof(v%d) = '%s'\n", 2, v2, 2, typeof(v2))
+ print "Test value not found in number"
+ gsub(/9/, "x", v1)
+ printf("v%d = '%s'\ttypeof(v%d) = '%s'\n", 1, v1, 1, typeof(v1))
+
+ print "Test gensub on regex"
+ a = b = @/abc/
+ c = gensub(/b/, "x", "g", a)
+ printf("a = @/%s/\ttypeof(a) = '%s'\n", a, typeof(a))
+ printf("c = \"%s\"\ttypeof(c) = '%s'\n", c, typeof(c))
+ print "Test value not found in regex"
+ c = gensub(/q/, "x", "g", b)
+ printf("b = @/%s/\ttypeof(b) = '%s'\n", b, typeof(b))
+ printf("c = \"%s\"\ttypeof(c) = '%s'\n", c, typeof(c))
+
+ print "Test gensub on numbers"
+ a = b = 12345
+ c = gensub(/3/, "x", "g", a)
+ printf("a = \"%s\"\ttypeof(a) = '%s'\n", a, typeof(a))
+ printf("b = \"%s\"\ttypeof(b) = '%s'\n", b, typeof(b))
+ printf("c = \"%s\"\ttypeof(c) = '%s'\n", c, typeof(c))
+ print "Test value not found in number"
+ c = gensub(/9/, "x", "g", b)
+ printf("b = \"%s\"\ttypeof(b) = '%s'\n", b, typeof(b))
+ printf("c = \"%s\"\ttypeof(c) = '%s'\n", c, typeof(c))
+ print typeof(c), c
+}
diff --git a/test/regexsub.ok b/test/regexsub.ok
new file mode 100644
index 0000000..44511eb
--- /dev/null
+++ b/test/regexsub.ok
@@ -0,0 +1,30 @@
+Initialize strong regex
+Test gsub on strong regex
+rgx1 = '[abc]' typeof(rgx1) = 'regexp'
+rgx2 = '[abc]' typeof(rgx2) = 'regexp'
+Test gsub() a strong regex
+rgx1 = '[abc]' typeof(rgx1) = 'regexp'
+rgx2 = '[aec]' typeof(rgx2) = 'regexp'
+Test value not found in regex
+rgx1 = '[abc]' typeof(rgx1) = 'regexp'
+Test gsub on numbers
+v1 = '12345' typeof(v1) = 'number'
+v2 = '12345' typeof(v2) = 'number'
+v1 = '12345' typeof(v1) = 'number'
+v2 = '12x45' typeof(v2) = 'string'
+Test value not found in number
+v1 = '12345' typeof(v1) = 'number'
+Test gensub on regex
+a = @/abc/ typeof(a) = 'regexp'
+c = "axc" typeof(c) = 'string'
+Test value not found in regex
+b = @/abc/ typeof(b) = 'regexp'
+c = "abc" typeof(c) = 'string'
+Test gensub on numbers
+a = "12345" typeof(a) = 'number'
+b = "12345" typeof(b) = 'number'
+c = "12x45" typeof(c) = 'string'
+Test value not found in number
+b = "12345" typeof(b) = 'number'
+c = "12345" typeof(c) = 'string'
+string 12345
-----------------------------------------------------------------------
Summary of changes:
ChangeLog | 7 +++++++
builtin.c | 36 ++++++++++++++++++++++++++++++++----
pc/Makefile.tst | 7 ++++++-
test/ChangeLog | 5 +++++
test/Makefile.am | 4 +++-
test/Makefile.in | 9 ++++++++-
test/Maketests | 5 +++++
test/regexsub.awk | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
test/regexsub.ok | 30 ++++++++++++++++++++++++++++++
9 files changed, 144 insertions(+), 7 deletions(-)
create mode 100644 test/regexsub.awk
create mode 100644 test/regexsub.ok
hooks/post-receive
--
gawk
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [SCM] gawk branch, gawk-5.1-stable, updated. gawk-4.1.0-4277-gbcc0594,
Arnold Robbins <=