gawk-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gawk-diffs] [SCM] gawk branch, gawk-4.1-stable, updated. gawk-4.1.0-962


From: Andrew J. Schorr
Subject: [gawk-diffs] [SCM] gawk branch, gawk-4.1-stable, updated. gawk-4.1.0-962-g3cf67f5
Date: Tue, 9 Aug 2016 15:34:15 +0000 (UTC)

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gawk".

The branch, gawk-4.1-stable has been updated
       via  3cf67f58ce8e42f9ce8d7be45936eedf79751b46 (commit)
      from  560cf6d52f978e360e9b2a10f7ddd9a0197e0b31 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.sv.gnu.org/cgit/gawk.git/commit/?id=3cf67f58ce8e42f9ce8d7be45936eedf79751b46

commit 3cf67f58ce8e42f9ce8d7be45936eedf79751b46
Author: Andrew J. Schorr <address@hidden>
Date:   Tue Aug 9 11:33:27 2016 -0400

    If a strnum integer has a non-standard string representation, do not accept 
it as an integer array subscript.

diff --git a/ChangeLog b/ChangeLog
index 9ac5be6..c3da019 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2016-08-09         Andrew J. Schorr     <address@hidden>
+
+       * awk.h: Add a comment explaining the NUMINT flag in more detail.
+       * int_array.c (standard_integer_string): New function to test whether
+       a string matches what would be produced by sprintf("%ld", <value>).
+       (is_integer): Fix bug -- if NUMBER was set, then the function was
+       accepting strnum values with nonstandard string representations. We
+       now call standard_integer_string to check that the string looks OK.
+       Also added ifdef'ed code to simplify the function by relying upon
+       force_number to parse the string, but this is disabled due to possible
+       negative performance impact.
+
 2016-08-01         Arnold D. Robbins     <address@hidden>
 
        * README, NEWS: Mark DJGPP port as unsupported.
diff --git a/awk.h b/awk.h
index 7288e20..ff62289 100644
--- a/awk.h
+++ b/awk.h
@@ -420,6 +420,14 @@ typedef struct exp_node {
         * and add WSTRCUR to the flags so that we don't have to do the
         * conversion more than once.
         *
+        * The NUMINT flag may be used with a value of any type -- NUMBER,
+        * STRING, or STRNUM. It indicates that the string representation
+        * equals the result of sprintf("%ld", <numeric value>). So, for
+        * example, NUMINT should NOT be set if it's a strnum or string value
+        * where the string is " 1" or "01" or "+1" or "1.0" or "0.1E1". This
+        * is a hint to indicate that an integer array optimization may be
+        * used when this value appears as a subscript.
+        *
         * We hope that the rest of the flags are self-explanatory. :-)
         */
 #              define  STRING  0x0002       /* assigned as string */
diff --git a/int_array.c b/int_array.c
index a8de3d5..1fa32bd 100644
--- a/int_array.c
+++ b/int_array.c
@@ -78,27 +78,86 @@ int_array_init(NODE *symbol, NODE *subs ATTRIBUTE_UNUSED)
        return & success_node;
 }
 
+/*
+ * standard_integer_string -- check whether the string matches what
+ * sprintf("%ld", <value>) would produce. This is accomplished by accepting
+ * only strings that look like /^0$/ or /^-?[1-9][0-9]*$/. This should be
+ * faster than comparing vs. the results of actually calling sprintf.
+ */
+
+static bool
+standard_integer_string(const char *s, size_t len)
+{
+       const char *end;
+
+       if (len == 0)
+               return false;
+       if (*s == '0' && len == 1)
+               return true;
+       end = s + len;
+       /* ignore leading minus sign */
+       if (*s == '-' && ++s == end)
+               return false;
+       /* check first char is [1-9] */
+       if (*s < '1' || *s > '9')
+               return false;
+       while (++s < end) {
+               if (*s < '0' || *s > '9')
+                       return false;
+       }
+       return true;
+}
+
 /* is_integer --- check if subscript is an integer */
 
 NODE **
 is_integer(NODE *symbol, NODE *subs)
 {
+#ifndef CHECK_INTEGER_USING_FORCE_NUMBER
        long l;
+#endif
        AWKNUM d;
 
+       if ((subs->flags & NUMINT) != 0)
+               /* quick exit */
+               return & success_node;
+
        if (subs == Nnull_string || do_mpfr)
                return NULL;
 
-       if ((subs->flags & NUMINT) != 0)
-               return & success_node;
+#ifdef CHECK_INTEGER_USING_FORCE_NUMBER
+       /*
+        * This approach is much simpler, because we remove all of the strtol
+        * logic below. But this may be slower in some usage cases.
+        */
+       if ((subs->flags & NUMCUR) == 0) {
+               str2number(subs);
 
-       if ((subs->flags & NUMBER) != 0) {
+               /* check again in case force_number set NUMINT */
+               if ((subs->flags & NUMINT) != 0)
+                       return & success_node;
+       }
+#else /* CHECK_INTEGER_USING_FORCE_NUMBER */
+       if ((subs->flags & NUMCUR) != 0) {
+#endif /* CHECK_INTEGER_USING_FORCE_NUMBER */
                d = subs->numbr;
                if (d <= INT32_MAX && d >= INT32_MIN && d == (int32_t) d) {
-                       subs->flags |= NUMINT;
-                       return & success_node;
+                       /*
+                        * the numeric value is an integer, but we must
+                        * protect against strings that cannot be generated
+                        * from sprintf("%ld", <subscript>). This can happen
+                        * with strnum or string values. We could skip this
+                        * check for pure NUMBER values, but unfortunately the
+                        * code does not currently distinguish between NUMBER
+                        * and strnum values.
+                        */
+                       if ((subs->flags & STRCUR) == 0 || 
standard_integer_string(subs->stptr, subs->stlen)) {
+                               subs->flags |= NUMINT;
+                               return & success_node;
+                       }
                }
                return NULL;
+#ifndef CHECK_INTEGER_USING_FORCE_NUMBER
        }
 
        /* a[3]=1; print "3" in a    -- true
@@ -151,6 +210,7 @@ is_integer(NODE *symbol, NODE *subs)
                }
        }
        return NULL;
+#endif /* CHECK_INTEGER_USING_FORCE_NUMBER */
 }
 
 

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog   |   12 ++++++++++
 awk.h       |    8 +++++++
 int_array.c |   70 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 85 insertions(+), 5 deletions(-)


hooks/post-receive
-- 
gawk



reply via email to

[Prev in Thread] Current Thread [Next in Thread]