gawk-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[SCM] gawk branch, feature/csv-revamp, created. gawk-4.1.0-5183-g2e18b77


From: Arnold Robbins
Subject: [SCM] gawk branch, feature/csv-revamp, created. gawk-4.1.0-5183-g2e18b77f
Date: Thu, 16 Mar 2023 12:26:10 -0400 (EDT)

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gawk".

The branch, feature/csv-revamp has been created
        at  2e18b77f5b6926e6616ce22d2d3e6d511de69c9b (commit)

- Log -----------------------------------------------------------------
http://git.sv.gnu.org/cgit/gawk.git/commit/?id=2e18b77f5b6926e6616ce22d2d3e6d511de69c9b

commit 2e18b77f5b6926e6616ce22d2d3e6d511de69c9b
Author: Arnold D. Robbins <arnold@skeeve.com>
Date:   Thu Mar 16 18:25:43 2023 +0200

    Start revamp of CSV handling.

diff --git a/ChangeLog b/ChangeLog
index 11c326b9..55d16d3c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,20 @@
+2023-03-16         Arnold D. Robbins     <arnold@skeeve.com>
+
+       * awk.h (enum do_flag_values): Add DO_CSV.
+       (do_csv): New macro.
+       (init_csv_fields, init_csv_records): Add declarations.
+       * field.c (init_csv_fields): New function.
+       (set_parser): Don't set the parser if doing CSV. Add warnings.
+       * io.c (csvscan): New function (placeholder for now).
+       (init_csv_records): New function.
+       (set_RS): Don't set the parser if doing CSV. Add warnings.
+       * main.c (optab): Add new options -k/--csv.
+       (main): Fatal out if --posix and --csv. Call init_csv_records()
+       and init_csv_fields().
+       (usage): Add a line for the new options.
+       (load_procinfo): Install PROCINFO["CSV"] if doing CSV.
+       (parse_args): Update for new options.
+
 2023-03-09         Arnold D. Robbins     <arnold@skeeve.com>
 
        * gawkapi.h: Update copyright year. Small edit in leading comment.
diff --git a/awk.h b/awk.h
index 661eb637..c7b589a3 100644
--- a/awk.h
+++ b/awk.h
@@ -1173,6 +1173,7 @@ extern enum do_flag_values {
        DO_PROFILE         = 0x02000,   /* profile the program */
        DO_DEBUG           = 0x04000,   /* debug the program */
        DO_MPFR            = 0x08000,   /* arbitrary-precision floating-point 
math */
+       DO_CSV             = 0x10000,   /* process comma-separated-value files 
*/
 } do_flags;
 
 #define do_traditional      (do_flags & DO_TRADITIONAL)
@@ -1187,6 +1188,7 @@ extern enum do_flag_values {
 #define do_sandbox          (do_flags & DO_SANDBOX)
 #define do_debug            (do_flags & DO_DEBUG)
 #define do_mpfr             (do_flags & DO_MPFR)
+#define do_csv              (do_flags & DO_CSV)
 
 extern bool do_optimize;
 extern int use_lc_numeric;
@@ -1569,6 +1571,7 @@ extern NODE *get_actual_argument(NODE *, int, bool);
 #endif
 /* field.c */
 extern void init_fields(void);
+extern void init_csv_fields(void);
 extern void set_record(const char *buf, int cnt, const awk_fieldwidth_info_t 
*);
 extern void reset_record(void);
 extern void rebuild_record(void);
@@ -1629,6 +1632,7 @@ extern int isdirpunct(int c);
 /* io.c */
 extern void init_sockets(void);
 extern void init_io(void);
+extern void init_csv_records(void);
 extern void register_input_parser(awk_input_parser_t *input_parser);
 extern void register_output_wrapper(awk_output_wrapper_t *wrapper);
 extern void register_two_way_processor(awk_two_way_processor_t *processor);
diff --git a/field.c b/field.c
index 7f20b69c..44c153dc 100644
--- a/field.c
+++ b/field.c
@@ -114,6 +114,15 @@ init_fields()
        field0_valid = true;
 }
 
+/* init_csv_fields --- set up to handle --csv */
+
+void
+init_csv_fields(void)
+{
+       if (do_csv)
+               parse_field = comma_parse_field;
+}
+
 /* grow_fields --- acquire new fields as needed */
 
 static void
@@ -771,6 +780,7 @@ sc_parse_field(long up_to,  /* parse only up to this field 
number */
  * via (*parse_field)().  This variation is for when FS is a comma,
  * we do very basic CSV parsing, the same as BWK awk.
  */
+
 static long
 comma_parse_field(long up_to,  /* parse only up to this field number */
        char **buf,     /* on input: string to parse; on output: point to start 
next */
@@ -1285,11 +1295,29 @@ do_patsplit(int nargs)
 static void
 set_parser(parse_field_func_t func)
 {
+       /*
+        * Setting FS does nothing if CSV mode, warn in that case,
+        * but don't warn on first call which happens at initialization.
+        */
+       static bool first_time = true;
+       static bool warned = false;
+
+       if (! first_time && do_csv) {
+               if (! warned) {
+                       warned = true;
+                       warning(_("assignment to FS/FIELDWIDTHS/FPAT has no 
effect when using --csv"));
+               }
+               return;
+       }
+
        normal_parse_field = func;
        if (! api_parser_override && parse_field != func) {
                parse_field = func;
                update_PROCINFO_str("FS", current_field_sep_str());
        }
+
+       if (first_time)
+               first_time = false;
 }
 
 /* set_FIELDWIDTHS --- handle an assignment to FIELDWIDTHS */
@@ -1503,8 +1531,6 @@ choose_fs_function:
                        else if (fs->stptr[0] == '\\')
                                /* same special case */
                                strcpy(buf, "[\\\\]");
-                       else if (fs->stptr[0] == ',' && ! do_posix)
-                               set_parser(comma_parse_field);
                        else
                                set_parser(sc_parse_field);
                }
diff --git a/io.c b/io.c
index 85f56447..41167b58 100644
--- a/io.c
+++ b/io.c
@@ -265,6 +265,7 @@ static bool avoid_flush(const char *name);
 static RECVALUE rs1scan(IOBUF *iop, struct recmatch *recm, SCANSTATE *state);
 static RECVALUE rsnullscan(IOBUF *iop, struct recmatch *recm, SCANSTATE 
*state);
 static RECVALUE rsrescan(IOBUF *iop, struct recmatch *recm, SCANSTATE *state);
+static RECVALUE csvscan(IOBUF *iop, struct recmatch *recm, SCANSTATE *state);
 
 static RECVALUE (*matchrec)(IOBUF *iop, struct recmatch *recm, SCANSTATE 
*state) = rs1scan;
 
@@ -341,6 +342,15 @@ init_io()
                read_can_timeout = true;
 }
 
+/* init_csv_records --- set up for CSV handling */
+
+void
+init_csv_records(void)
+{
+       if (do_csv)
+               matchrec = csvscan;
+}
+
 
 #if defined(__MINGW32__) || defined(__CYGWIN__)
 /* binmode --- convert BINMODE to string for fopen */
@@ -3820,6 +3830,14 @@ find_longest_terminator:
        return REC_OK;
 }
 
+/* csvscan --- handle --csv mode */
+
+static RECVALUE
+csvscan(IOBUF *iop, struct recmatch *recm, SCANSTATE *state)
+{
+       return rs1scan(iop, recm, state);       // XXX so it'll compile and run
+}
+
 /* retryable --- return true if PROCINFO[<filename>, "RETRY"] exists */
 
 static inline int
@@ -4069,6 +4087,13 @@ get_a_record(char **out,        /* pointer to pointer to 
data */
 void
 set_RS()
 {
+       /*
+        * Setting RS does nothing if CSV mode, warn in that case,
+        * but don't warn on first call which happens at initialization.
+        */
+       static bool first_time = true;
+       static bool warned = false;
+
        static NODE *save_rs = NULL;
 
        /*
@@ -4099,9 +4124,15 @@ set_RS()
        refree(RS_re[1]);
        RS_re[0] = RS_re[1] = RS_regexp = NULL;
 
+       if (! first_time && ! warned && do_csv) {
+               warned = true;
+               warning(_("assignment to RS has no effect when using --csv"));
+       }
+
        if (RS->stlen == 0) {
                RS_is_null = true;
-               matchrec = rsnullscan;
+               if (first_time || ! do_csv)
+                       matchrec = rsnullscan;
        } else if ((RS->stlen > 1 || (RS->flags & REGEX) != 0) && ! 
do_traditional) {
                static bool warned = false;
 
@@ -4109,17 +4140,23 @@ set_RS()
                RS_re[1] = make_regexp(RS->stptr, RS->stlen, true, true, true);
                RS_regexp = RS_re[IGNORECASE];
 
-               matchrec = rsrescan;
+               if (first_time || ! do_csv)
+                       matchrec = rsrescan;
 
                if (do_lint_extensions && ! warned) {
                        lintwarn(_("multicharacter value of `RS' is a gawk 
extension"));
                        warned = true;
                }
-       } else
-               matchrec = rs1scan;
+       } else {
+               if (first_time || ! do_csv)
+                       matchrec = rs1scan;
+       }
 set_FS:
        if (current_field_sep() == Using_FS)
                set_FS();
+
+       if (first_time)
+               first_time = false;
 }
 
 
diff --git a/main.c b/main.c
index 9fa67f83..c161e8ee 100644
--- a/main.c
+++ b/main.c
@@ -171,6 +171,7 @@ static const struct option optab[] = {
        { "bignum",             no_argument,            NULL,   'M' },
        { "characters-as-bytes", no_argument,           & do_binary,     'b' },
        { "copyright",          no_argument,            NULL,   'C' },
+       { "csv",                no_argument,            NULL,   'k' },
        { "debug",              optional_argument,      NULL,   'D' },
        { "dump-variables",     optional_argument,      NULL,   'd' },
        { "exec",               required_argument,      NULL,   'E' },
@@ -375,6 +376,9 @@ main(int argc, char **argv)
                }
        }
 
+       if (do_csv && do_posix)
+               fatal(_("`--posix' and `--csv' conflict"));
+
        if (do_lint) {
                if (os_is_setuid())
                        lintwarn(_("running %s setuid root may be a security 
problem"), myname);
@@ -415,6 +419,10 @@ main(int argc, char **argv)
        /* Set up the special variables */
        init_vars();
 
+       /* set up CSV */
+       init_csv_records();
+       init_csv_fields();
+
        /* Set up the field variables */
        init_fields();
 
@@ -624,6 +632,7 @@ usage(int exitval, FILE *fp)
        fputs(_("\t-h\t\t\t--help\n"), fp);
        fputs(_("\t-i includefile\t\t--include=includefile\n"), fp);
        fputs(_("\t-I\t\t\t--trace\n"), fp);
+       fputs(_("\t-k\t\t\t--csv\n"), fp);
        fputs(_("\t-l library\t\t--load=library\n"), fp);
        /*
         * TRANSLATORS: the "fatal", "invalid" and "no-ext" here are literal
@@ -1105,6 +1114,9 @@ load_procinfo()
        update_PROCINFO_str("pma", get_pma_version());
 #endif /* USE_PERSISTENT_MALLOC */
 
+       if (do_csv)
+               update_PROCINFO_num("CSV", 1);
+
        load_procinfo_argv();
        return PROCINFO_node;
 }
@@ -1569,7 +1581,7 @@ parse_args(int argc, char **argv)
        /*
         * The + on the front tells GNU getopt not to rearrange argv.
         */
-       const char *optlist = 
"+F:f:v:W;bcCd::D::e:E:ghi:Il:L::nNo::Op::MPrSstVYZ:";
+       const char *optlist = 
"+F:f:v:W;bcCd::D::e:E:ghi:kIl:L::nNo::Op::MPrSstVYZ:";
        int old_optind;
        int c;
        char *scan;
@@ -1668,6 +1680,10 @@ parse_args(int argc, char **argv)
                        do_itrace = true;
                        break;
 
+               case 'k':       // k is for "comma". it's a stretch, I know
+                       do_flags |= DO_CSV;
+                       break;
+
                case 'l':
                        (void) add_srcfile(SRC_EXTLIB, optarg, srcfiles, NULL, 
NULL);
                        break;
diff --git a/pc/ChangeLog b/pc/ChangeLog
index e809bfac..541be9c5 100644
--- a/pc/ChangeLog
+++ b/pc/ChangeLog
@@ -1,3 +1,7 @@
+2023-03-16         Arnold D. Robbins     <arnold@skeeve.com>
+
+       * Makefile.tst: Regenerated.
+
 2023-03-12  Eli Zaretskii  <eliz@gnu.org>
 
        * Makefile.ext (readdir_test.$(SOEXT)): Fix typo.
diff --git a/pc/Makefile.tst b/pc/Makefile.tst
index 316d778c..72f8a9cb 100644
--- a/pc/Makefile.tst
+++ b/pc/Makefile.tst
@@ -288,9 +288,12 @@ NEED_SANDBOX = sandbox1
 # List of tests that need --traditional
 NEED_TRADITIONAL = litoct tradanch rscompat
 
-# Lists of tests that need the PMA allocator and a backing file
+# List of tests that need the PMA allocator and a backing file
 NEED_PMA = pma
 
+# List of tests that need --csv
+NEED_CSV = csv1
+
 # Lists of tests that run a shell script
 RUN_SHELL = exit fflush localenl modifiers next randtest rtlen rtlen01
 
@@ -2721,7 +2724,7 @@ crlf:
 
 csv1:
        @echo $@
-       @-AWKPATH="$(srcdir)" $(AWK) -f $@.awk  < "$(srcdir)"/$@.in >_$@ 2>&1 
|| echo EXIT CODE: $$? >>_$@
+       @-AWKPATH="$(srcdir)" $(AWK) -f $@.awk  --csv < "$(srcdir)"/$@.in >_$@ 
2>&1 || echo EXIT CODE: $$? >>_$@
        @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
 
 dbugeval2:
diff --git a/test/ChangeLog b/test/ChangeLog
index e206fa9b..f247c540 100644
--- a/test/ChangeLog
+++ b/test/ChangeLog
@@ -1,3 +1,10 @@
+2023-03-16         Arnold D. Robbins     <arnold@skeeve.com>
+
+       * Gentests: Handle NEED_CSV.
+       * Makefile.am (NEED_CSV): New list of tests that need --csv.
+       * badargs.ok: Update after code changes.
+       * csv1.awk: Adjust after code changes.
+
 2023-03-09         Arnold D. Robbins     <arnold@skeeve.com>
 
        * badargs.ok: Update after code changes.
diff --git a/test/Gentests b/test/Gentests
index 42a81cff..b3a8f787 100755
--- a/test/Gentests
+++ b/test/Gentests
@@ -108,6 +108,13 @@ BEGIN {
        next
 }
 
+/^NEED_CSV *=/,/[^\\]$/ {
+       gsub(/(^NEED_CSV *=|\\$)/,"")
+       for (i = 1; i <= NF; i++)
+               csv[$i]
+       next
+}
+
 /^GENTESTS_UNUSED *=/,/[^\\]$/ {
        gsub(/(^GENTESTS_UNUSED *=|\\$)/,"")
        for (i = 1; i <= NF; i++)
@@ -229,6 +236,10 @@ function generate(x,       s, i, locale_string)
                s = s " --re-interval"
                delete re_interval[x]
        }
+       if (x in csv) {
+               s = s " --csv"
+               delete csv[x]
+       }
        if (x".in" in files) {
                s = s " < \"$(srcdir)\"/$@.in"
                delete files[x".in"]
diff --git a/test/Makefile.am b/test/Makefile.am
index 6d000178..7bb4c983 100644
--- a/test/Makefile.am
+++ b/test/Makefile.am
@@ -1604,9 +1604,12 @@ NEED_SANDBOX = sandbox1
 # List of tests that need --traditional
 NEED_TRADITIONAL = litoct tradanch rscompat
 
-# Lists of tests that need the PMA allocator and a backing file
+# List of tests that need the PMA allocator and a backing file
 NEED_PMA = pma
 
+# List of tests that need --csv
+NEED_CSV = csv1
+
 # Lists of tests that run a shell script
 RUN_SHELL = exit fflush localenl modifiers next randtest rtlen rtlen01
 
diff --git a/test/Makefile.in b/test/Makefile.in
index 1cd775d1..28c5ebc5 100644
--- a/test/Makefile.in
+++ b/test/Makefile.in
@@ -1868,9 +1868,12 @@ NEED_SANDBOX = sandbox1
 # List of tests that need --traditional
 NEED_TRADITIONAL = litoct tradanch rscompat
 
-# Lists of tests that need the PMA allocator and a backing file
+# List of tests that need the PMA allocator and a backing file
 NEED_PMA = pma
 
+# List of tests that need --csv
+NEED_CSV = csv1
+
 # Lists of tests that run a shell script
 RUN_SHELL = exit fflush localenl modifiers next randtest rtlen rtlen01
 
@@ -4484,7 +4487,7 @@ crlf:
 
 csv1:
        @echo $@
-       @-AWKPATH="$(srcdir)" $(AWK) -f $@.awk  < "$(srcdir)"/$@.in >_$@ 2>&1 
|| echo EXIT CODE: $$? >>_$@
+       @-AWKPATH="$(srcdir)" $(AWK) -f $@.awk  --csv < "$(srcdir)"/$@.in >_$@ 
2>&1 || echo EXIT CODE: $$? >>_$@
        @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
 
 dbugeval2:
diff --git a/test/Maketests b/test/Maketests
index 628ff3fa..d284aab6 100644
--- a/test/Maketests
+++ b/test/Maketests
@@ -1414,7 +1414,7 @@ crlf:
 
 csv1:
        @echo $@
-       @-AWKPATH="$(srcdir)" $(AWK) -f $@.awk  < "$(srcdir)"/$@.in >_$@ 2>&1 
|| echo EXIT CODE: $$? >>_$@
+       @-AWKPATH="$(srcdir)" $(AWK) -f $@.awk  --csv < "$(srcdir)"/$@.in >_$@ 
2>&1 || echo EXIT CODE: $$? >>_$@
        @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
 
 dbugeval2:
diff --git a/test/badargs.ok b/test/badargs.ok
index d2c67cac..1d79bc78 100644
--- a/test/badargs.ok
+++ b/test/badargs.ok
@@ -17,6 +17,7 @@ Short options:                GNU long options: (extensions)
        -h                      --help
        -i includefile          --include=includefile
        -I                      --trace
+       -k                      --csv
        -l library              --load=library
        -L[fatal|invalid|no-ext]        --lint[=fatal|invalid|no-ext]
        -M                      --bignum
diff --git a/test/csv1.awk b/test/csv1.awk
index 12bbf1e5..4896ef7c 100644
--- a/test/csv1.awk
+++ b/test/csv1.awk
@@ -1,6 +1,6 @@
-BEGIN {
-       FS = ","
-}
+# BEGIN {
+#      FS = ","
+# }
 
 {
        printf(" \t%s\t", $0)

-----------------------------------------------------------------------


hooks/post-receive
-- 
gawk



reply via email to

[Prev in Thread] Current Thread [Next in Thread]