[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: Conflict between FPAT and empty RS
From: |
arnold |
Subject: |
Re: Conflict between FPAT and empty RS |
Date: |
Fri, 10 Apr 2020 05:21:03 -0600 |
User-agent: |
Heirloom mailx 12.5 7/5/10 |
luciole75w <address@hidden> wrote:
> Hello,
>
> I'm having an odd behavior with GNU awk when using FPAT and RS together.
> Here is the code.
>
> $ awk '{ print ; $2 = "-" ; print }' RS='' FPAT='\\w+' <<<'a b c d'
> a b c d
> a -
>
> $ ... | xxd -g1 -c8
> 00000000: 61 20 62 20 63 20 64 0a a b c d.
> 00000008: 61 20 2d 20 20 20 20 0a a - .
>
> I'd expect the 2nd print to output "a - c d" but as you can see,
> modifying the field 2 replaces fields 3 and 4 with spaces in $0. When RS
> is set to something other than the empty string (or just not set), the
> output is correct.
This was an interesting one to track down. Thanks for the clear
reproducer. The fix is below. I will push it to git shortly.
Arnold
------------------------------------------------
diff --git a/field.c b/field.c
index 80495862..45ab9f5f 100644
--- a/field.c
+++ b/field.c
@@ -74,6 +74,9 @@ static bool resave_fs;
static NODE *save_FS; /* save current value of FS when line is read,
* to be used in deferred parsing
*/
+static NODE *save_FPAT; /* save current value of FPAT when line
is read,
+ * to be used in deferred parsing
+ */
static awk_fieldwidth_info_t *FIELDWIDTHS = NULL;
NODE **fields_arr; /* array of pointers to the field nodes */
@@ -842,6 +845,8 @@ get_field(long requested, Func_ptr *assign)
bool in_middle = false;
static bool warned = false;
extern int currule;
+ NODE *saved_fs;
+ Regexp *fs_regexp;
if (do_lint && currule == END && ! warned) {
warned = true;
@@ -857,10 +862,17 @@ get_field(long requested, Func_ptr *assign)
/* first, parse remainder of input record */
if (NF == -1) {
in_middle = (parse_high_water != 0);
+ if (current_field_sep() == Using_FPAT) {
+ saved_fs = save_FPAT;
+ fs_regexp = FPAT_regexp;
+ } else {
+ saved_fs = save_FS;
+ fs_regexp = FS_regexp;
+ }
NF = (*parse_field)(UNLIMITED - 1,
&parse_extent,
fields_arr[0]->stlen -
(parse_extent - fields_arr[0]->stptr),
- save_FS, FS_regexp, set_field,
+ saved_fs, fs_regexp, set_field,
(NODE *) NULL,
(NODE *) NULL,
in_middle);
@@ -1424,7 +1436,6 @@ void
set_FPAT()
{
static bool warned = false;
- static NODE *save_fpat = NULL;
bool remake_re = true;
NODE *fpat;
@@ -1447,9 +1458,9 @@ set_FPAT()
* This comparison can't use cmp_nodes(), which pays attention
* to IGNORECASE, and that's not what we want.
*/
- if (save_fpat
- && FPAT_node->var_value->stlen == save_fpat->stlen
- && memcmp(FPAT_node->var_value->stptr, save_fpat->stptr,
save_fpat->stlen) == 0) {
+ if (save_FPAT
+ && FPAT_node->var_value->stlen == save_FPAT->stlen
+ && memcmp(FPAT_node->var_value->stptr, save_FPAT->stptr,
save_FPAT->stlen) == 0) {
if (FPAT_regexp != NULL)
FPAT_regexp = (IGNORECASE ? FPAT_re_no_case :
FPAT_re_yes_case);
@@ -1462,8 +1473,8 @@ set_FPAT()
}
}
- unref(save_fpat);
- save_fpat = dupnode(FPAT_node->var_value);
+ unref(save_FPAT);
+ save_FPAT = dupnode(FPAT_node->var_value);
refree(FPAT_re_yes_case);
refree(FPAT_re_no_case);
FPAT_re_yes_case = FPAT_re_no_case = FPAT_regexp = NULL;