Re: gawk number to string bug

bug-gnu-utils

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: gawk number to string bug

From:	Paul Eggert
Subject:	Re: gawk number to string bug
Date:	Wed, 21 Dec 2005 12:28:38 -0800
User-agent:	Gnus/5.1007 (Gnus v5.10.7) Emacs/21.4 (gnu/linux)

"Andrew J. Schorr" <address@hidden> writes:

> I do not see how your patch addresses this issue.  The problem seems
> to be in builtin.c:format_tree().  Did your patch change that
> function?

No, as I mentioned in
<http://lists.gnu.org/archive/html/bug-gnu-utils/2005-12/msg00093.html>
there are two areas of bugs here, and my previous patch fixed only the
one.  The other area causes only a mild problem on Solaris SPARC
64-bit, but apparently a more-severe bug on your platform.  As you
mentioned, your patch works on your platform but relies on undefined
behavior and so isn't portable.

I thought about it a bit more and came up with the following patch,
which should be portable to all platforms.  It's a bit nicer than my
previous patch, which was a bit _too_ conservative (as in a few rare
cases it fell back on floating point when an integer type would work).

Can you please give this patch a try on your 64-bit platform?  It
works on mine.  Please try both 'make check' and David Ellsworth's
test case.  Thanks.

2005-12-19  Paul Eggert  <address@hidden>

        Fix rounding bugs on hosts where an integer type is wider than the
        floating-point type being converted to.  Problem reported by David
        Ellsworth in
        <http://lists.gnu.org/archive/html/bug-gnu-utils/2005-12/msg00067.html>.
        
        * awk.h (FLT_RADIX, FLT_MANT_DIG, DBL_MANT_DIG, AWKSMALL_MANT_DIG):
        (AWKNUM_MANT_DIG, AWKNUM_FRACTION_BITS): Moved here from builtin.c.
        * builtin.c: Move those same macros to awk.h.
        * awk.h (DOUBLE_FRACTION_BITS, LONG_MAX_AS_DOUBLE): New macros.
        * builtin.c (UINTMAX_MAX_AS_AWKNUM): New macro.
        (format_tree): Fix rounding bug on hosts where uintmax_t is wider
        than awknum (e.g., 64-bit uintmax_t, 64-bit awknum).
        (do_gensub): Fix rounding bug on hosts where long is wider than double.
        * node.c (format_val): Likewise.

--- awk.h-bak1  2005-07-26 11:07:43.000000000 -0700
+++ awk.h       2005-12-21 12:24:51.000000000 -0800
@@ -293,6 +293,50 @@ extern double gawk_strtod();
 
 #define AWKNUM double
 
+/* Assume IEEE-754 arithmetic on pre-C89 hosts.  */
+#ifndef FLT_RADIX
+#define FLT_RADIX 2
+#endif
+#ifndef FLT_MANT_DIG
+#define FLT_MANT_DIG 24
+#endif
+#ifndef DBL_MANT_DIG
+#define DBL_MANT_DIG 53
+#endif
+
+/*
+ * The number of base-FLT_RADIX digits in an AWKNUM fraction, assuming
+ * that AWKNUM is not long double.
+ */
+#define AWKSMALL_MANT_DIG \
+  (sizeof (AWKNUM) == sizeof (double) ? DBL_MANT_DIG : FLT_MANT_DIG)
+
+/*
+ * The number of base-FLT_DIGIT digits in an AWKNUM fraction, even if
+ * AWKNUM is long double.  Don't mention 'long double' unless
+ * LDBL_MANT_DIG is defined, for the sake of ancient compilers that
+ * lack 'long double'.
+ */
+#ifdef LDBL_MANT_DIG
+#define AWKNUM_MANT_DIG \
+  (sizeof (AWKNUM) == sizeof (long double) ? LDBL_MANT_DIG : AWKSMALL_MANT_DIG)
+#else
+#define AWKNUM_MANT_DIG AWKSMALL_MANT_DIG
+#endif
+
+/*
+ * The number of bits in an AWKNUM fraction, assuming FLT_RADIX is
+ * either 2 or 16.  IEEE and VAX formats use radix 2, and IBM
+ * mainframe format uses radix 16; we know of no other radices in
+ * practical use.
+ */
+#if FLT_RADIX != 2 && FLT_RADIX != 16
+Please port the following code to your weird host;
+#endif
+#define AWKNUM_FRACTION_BITS (AWKNUM_MANT_DIG * (FLT_RADIX == 2 ? 1 : 4))
+#define DOUBLE_FRACTION_BITS (DBL_MANT_DIG * (FLT_RADIX == 2 ? 1 : 4))
+
+
 #ifndef TRUE
 /* a bit hackneyed, but what the heck */
 #define TRUE   1
@@ -693,6 +737,13 @@ struct flagtab {
 #endif
 #define UNLIMITED    LONG_MAX 
 
+/* the maximum long value that is exactly representable as an AWKNUM */
+#define LONG_MAX_AS_DOUBLE \
+  (LONG_MAX \
+   & ~ (sizeof (long) * CHAR_BIT - 1 < DOUBLE_FRACTION_BITS \
+        ? 0L \
+        : (1L << (sizeof (long) * CHAR_BIT - 1 - DOUBLE_FRACTION_BITS)) - 1))
+
 /* -------------------------- External variables -------------------------- */
 /* gawk builtin variables */
 extern long NF;
--- builtin.c-bak1      2005-07-26 11:07:43.000000000 -0700
+++ builtin.c   2005-12-21 10:59:06.000000000 -0800
@@ -58,6 +58,13 @@
 # define UINTMAX_MAX TYPE_MAXIMUM (uintmax_t)
 #endif
 
+/* the maximum uintmax_t value that is exactly representable as an AWKNUM */
+#define UINTMAX_MAX_AS_AWKNUM \
+  (UINTMAX_MAX \
+   << (sizeof (uintmax_t) * CHAR_BIT < AWKNUM_FRACTION_BITS \
+       ? 0 \
+       : sizeof (uintmax_t) * CHAR_BIT - AWKNUM_FRACTION_BITS))
+
 #ifndef SIZE_MAX       /* C99 constant, can't rely on it everywhere */
 #define SIZE_MAX ((size_t) -1)
 #endif
@@ -73,17 +80,6 @@ extern int output_is_tty;
 
 static NODE *sub_common P((NODE *tree, long how_many, int backdigs));
 
-/* Assume IEEE-754 arithmetic on pre-C89 hosts.  */
-#ifndef FLT_RADIX
-#define FLT_RADIX 2
-#endif
-#ifndef FLT_MANT_DIG
-#define FLT_MANT_DIG 24
-#endif
-#ifndef DBL_MANT_DIG
-#define DBL_MANT_DIG 53
-#endif
-
 #ifdef _CRAY
 /* Work around a problem in conversion of doubles to exact integers. */
 #define Floor(n) floor((n) * (1.0 + DBL_EPSILON))
@@ -961,7 +957,7 @@ check_pos:
                                uval = - (uintmax_t) (intmax_t) tmpval;
                        } else {
                                /* Use !, so that NaNs are out of range.  */
-                               if (! (tmpval <= UINTMAX_MAX))
+                               if (! (tmpval <= UINTMAX_MAX_AS_AWKNUM))
                                        goto out_of_range;
                                sgn = FALSE;
                                uval = (uintmax_t) tmpval;
@@ -1052,7 +1048,7 @@ check_pos:
                                uval = (uintmax_t) (intmax_t) tmpval;
                        } else {
                                /* Use !, so that NaNs are out of range.  */
-                               if (! (tmpval <= UINTMAX_MAX))
+                               if (! (tmpval <= UINTMAX_MAX_AS_AWKNUM))
                                        goto out_of_range;
                                uval = (uintmax_t) tmpval;
                        }
@@ -2547,7 +2543,7 @@ do_gensub(NODE *tree)
 set_how_many:
                if (d < 1)
                        how_many = 1;
-               else if (d < LONG_MAX)
+               else if (d < LONG_MAX_AS_DOUBLE)
                        how_many = d;
                else
                        how_many = LONG_MAX;
@@ -2645,37 +2641,6 @@ sgfmt(char *buf, /* return buffer; assum
 }
 #endif /* GFMT_WORKAROUND */
 
-/*
- * The number of base-FLT_RADIX digits in an AWKNUM fraction, assuming
- * that AWKNUM is not long double.
- */
-#define AWKSMALL_MANT_DIG \
-  (sizeof (AWKNUM) == sizeof (double) ? DBL_MANT_DIG : FLT_MANT_DIG)
-
-/*
- * The number of base-FLT_DIGIT digits in an AWKNUM fraction, even if
- * AWKNUM is long double.  Don't mention 'long double' unless
- * LDBL_MANT_DIG is defined, for the sake of ancient compilers that
- * lack 'long double'.
- */
-#ifdef LDBL_MANT_DIG
-#define AWKNUM_MANT_DIG \
-  (sizeof (AWKNUM) == sizeof (long double) ? LDBL_MANT_DIG : AWKSMALL_MANT_DIG)
-#else
-#define AWKNUM_MANT_DIG AWKSMALL_MANT_DIG
-#endif
-
-/*
- * The number of bits in an AWKNUM fraction, assuming FLT_RADIX is
- * either 2 or 16.  IEEE and VAX formats use radix 2, and IBM
- * mainframe format uses radix 16; we know of no other radices in
- * practical use.
- */
-#if FLT_RADIX != 2 && FLT_RADIX != 16
-Please port the following code to your weird host;
-#endif
-#define AWKNUM_FRACTION_BITS (AWKNUM_MANT_DIG * (FLT_RADIX == 2 ? 1 : 4))
- 
 /* tmp_integer - Convert an integer to a temporary number node.  */
 
 static NODE *
--- node.c-bak1 2005-11-30 13:33:44.000000000 -0800
+++ node.c      2005-12-21 12:04:47.000000000 -0800
@@ -165,7 +165,7 @@ format_val(const char *format, int index
 
        /* not an integral value, or out of range */
        if ((val = double_to_int(s->numbr)) != s->numbr
-           || val < LONG_MIN || val > LONG_MAX) {
+           || val < LONG_MIN || val > LONG_MAX_AS_DOUBLE) {
                /*
                 * Once upon a time, if GFMT_WORKAROUND wasn't defined,
                 * we just blindly did this:

[Prev in Thread]

Current Thread

[Next in Thread]

Re: gawk number to string bug, (continued)
- Re: gawk number to string bug, David Ellsworth, 2005/12/19
  - Re: gawk number to string bug, Paul Eggert, 2005/12/20
    - Re: gawk number to string bug, Andrew J. Schorr, 2005/12/20
    - Re: gawk number to string bug, Andrew J. Schorr, 2005/12/20
    - Re: gawk number to string bug, Andrew J. Schorr, 2005/12/20
    - Re: gawk number to string bug, Paul Eggert <=
    - Re: gawk number to string bug, Andrew J. Schorr, 2005/12/21
    - Re: gawk number to string bug, Andrew J. Schorr, 2005/12/21
    - Re: gawk number to string bug, Andrew J. Schorr, 2005/12/21
    - Re: gawk number to string bug, Paul Eggert, 2005/12/21
    - Re: gawk number to string bug, Andrew J. Schorr, 2005/12/22
    - Re: gawk number to string bug, Andrew J. Schorr, 2005/12/23
    - Re: gawk number to string bug, Andrew J. Schorr, 2005/12/23
    - Re: gawk number to string bug, Andrew J. Schorr, 2005/12/23
    - Re: gawk number to string bug, Andrew J. Schorr, 2005/12/23
    - Re: gawk number to string bug, Eli Zaretskii, 2005/12/24

Prev by Date: Re: gawk number to string bug
Next by Date: Re: gawk number to string bug
Previous by thread: Re: gawk number to string bug
Next by thread: Re: gawk number to string bug
Index(es):
- Date
- Thread