diff --git a/NEWS b/NEWS index 3fd1332..450c3c6 100644 --- a/NEWS +++ b/NEWS @@ -9,6 +9,10 @@ - Runtime system - fixed handling of "inf" and nan" floating-point predicates for Solaris (thanks to Claude Marinier) + - deprecated "[+-]nan", "[+-]inf" and other notations "accidentally" + accepted by Chicken due to the underlying C library's strtod() function, + standardizing on "[+-]nan.0" and "[+-]inf.0" from R6RS (and soon R7RS), + when displaying numbers only these forms are generated now. - support for re-loading of compiled files has now been completely removed diff --git a/runtime.c b/runtime.c index 399713e..5f8a0ca 100644 --- a/runtime.c +++ b/runtime.c @@ -7196,9 +7196,9 @@ void C_ccall C_quotient(C_word c, C_word closure, C_word k, C_word n1, C_word n2 C_regparm C_word C_fcall C_a_i_string_to_number(C_word **a, int c, C_word str, C_word radix0) { - int radix, radixpf = 0, sharpf = 0, ratp = 0, exactf, exactpf = 0, periodf = 0; + int radix, radixpf = 0, sharpf = 0, ratf = 0, exactf, exactpf = 0, periodf = 0, expf = 0; C_word n1, n; - C_char *sptr, *eptr; + C_char *sptr, *eptr, *rptr; double fn1, fn; if(radix0 & C_FIXNUM_BIT) radix = C_unfix(radix0); @@ -7219,47 +7219,73 @@ C_a_i_string_to_number(C_word **a, int c, C_word str, C_word radix0) buffer[ n ] = '\0'; while(*sptr == '#') { - switch(*(++sptr)) { + switch(C_tolower((int)*(++sptr))) { case 'b': if(radixpf) goto fail; else { radix = 2; radixpf = 1; } break; case 'o': if(radixpf) goto fail; else { radix = 8; radixpf = 1; } break; case 'd': if(radixpf) goto fail; else { radix = 10; radixpf = 1; } break; case 'x': if(radixpf) goto fail; else { radix = 16; radixpf = 1; } break; case 'e': if(exactpf) goto fail; else { exactf = 1; exactpf = 1; } break; case 'i': if(exactpf) goto fail; else { exactf = 0; exactpf = 1; } break; - default: --sptr; + default: goto fail; /* Unknown prefix type */ } ++sptr; } - - /* check for embedded '#'s and double '.'s: */ - for(eptr = sptr; *eptr != '\0'; ++eptr) { - switch(*eptr) { + + /* Scan for embedded special characters and do basic sanity checking: */ + for(eptr = sptr, rptr = sptr; *eptr != '\0'; ++eptr) { + switch(C_tolower((int)*eptr)) { case '.': - if(periodf) goto fail; + if(periodf || ratf || expf) goto fail; periodf = 1; break; case '#': - if(eptr[ 1 ] == '\0' || C_strchr("#.0123456789", eptr[ 1 ]) != NULL) { - sharpf = 1; - *eptr = '0'; - } - else goto fail; + if (expf || (eptr == rptr) || + (!sharpf && (eptr == rptr+1) && (C_strchr("+-.", *rptr) != NULL))) + goto fail; + + sharpf = 1; + *eptr = '0'; + + break; + case '/': + if(periodf || ratf || expf || eptr == sptr) goto fail; + sharpf = 0; /* Allow sharp signs in the denominator */ + ratf = 1; + rptr = eptr+1; + break; + case 'e': + case 'd': + case 'f': + case 'l': + case 's': + /* Don't set exp flag if we see the "f" in "inf.0" (preceded by 'n') */ + /* Other failure modes are handled elsewhere. */ + if(radix == 10 && eptr > sptr && C_tolower((int)*(eptr-1)) != 'n') { + if (ratf) goto fail; + + expf = 1; + sharpf = 0; + *eptr = 'e'; /* strtod() normally only understands 'e', not dfls */ + } + break; + default: + if(sharpf) goto fail; break; } } - + if (eptr == rptr) goto fail; /* Disallow "empty" numbers like "#x" and "1/" */ + /* check for rational representation: */ - if((eptr = C_strchr(sptr, '/')) != NULL) { - if (eptr == sptr) { - n = C_SCHEME_FALSE; - goto fini; + if(rptr != sptr) { + if (*(rptr) == '-' || *(rptr) == '+') { + n = C_SCHEME_FALSE; + goto fini; } - *eptr = '\0'; - ratp = 1; + *(rptr-1) = '\0'; switch(convert_string_to_number(sptr, radix, &n1, &fn1)) { case 0: @@ -7273,7 +7299,7 @@ C_a_i_string_to_number(C_word **a, int c, C_word str, C_word radix0) /* case 2: nop */ } - sptr = eptr + 1; + sptr = rptr; } /* convert number and return result: */ @@ -7283,8 +7309,8 @@ C_a_i_string_to_number(C_word **a, int c, C_word str, C_word radix0) break; case 1: /* fixnum */ - if(sharpf || ratp || (exactpf && !exactf)) { - n = C_flonum(a, ratp ? fn1 / (double)n : (double)n); + if(sharpf || ratf || (exactpf && !exactf)) { + n = C_flonum(a, ratf ? fn1 / (double)n : (double)n); if(exactpf && exactf) n = C_i_inexact_to_exact(n); } @@ -7293,7 +7319,7 @@ C_a_i_string_to_number(C_word **a, int c, C_word str, C_word radix0) break; case 2: /* flonum */ - n = C_flonum(a, ratp ? fn1 / fn : fn); + n = C_flonum(a, ratf ? fn1 / fn : fn); if(exactpf && exactf) n = C_i_inexact_to_exact(n); @@ -7354,28 +7380,35 @@ C_regparm C_word C_fcall convert_string_to_number(C_char *str, int radix, C_word C_word n; C_char *eptr, *eptr2; double fn; -#if defined(__CYGWIN__) || defined(__MINGW32__) || defined(__OpenBSD__) int len = C_strlen(str); - if(len >= 4) { - if(!C_strncmp(str, "+nan.0", len)) { - *flo = 0.0/0.0; - return 2; - } - else if(!C_strncmp(str, "-nan.0", len)) { - *flo = -0.0/0.0; - return 2; - } - else if(!C_strncmp(str, "+inf.0", len)) { - *flo = 1.0/0.0; - return 2; + if(radix == 10) { + if (len >= 4 && len <= 6) { /* DEPRECATED, TODO: Change to (len == 4) */ + if((*str == '+' || *str == '-') && + C_strchr("inIN", *(str+1)) != NULL && + C_strchr("naNA", *(str+2)) != NULL && + C_strchr("fnFN", *(str+3)) != NULL && + /* DEPRECATED, TODO: Rip out len checks */ + (len == 4 || *(str+4) == '.') && (len == 5 || (*(str+5) == '0'))) { + if (*(str+1) == 'i' || *(str+1) == 'I') /* Inf */ + *flo = 1.0/0.0; + else /* NaN */ + *flo = 0.0/0.0; + if (*str == '-') + *flo *= -1.0; + return 2; + } } - else if(!C_strncmp(str, "-inf.0", len)) { - *flo = -1.0/0.0; - return 2; + /* DEPRECATED (enable in next release) */ +#if 0 + /* This is disabled during the deprecation period of "+nan" syntax */ + /* Prevent C parser from accepting things like "-inf" on its own... */ + for(n = 0; n < len; ++n) { + if (C_strchr("+-0123456789e.", *(str+n)) == NULL) + return 0; } - } #endif + } if(C_strpbrk(str, "xX\0") != NULL) return 0; @@ -7505,7 +7538,6 @@ void C_ccall C_number_to_string(C_word c, C_word closure, C_word k, C_word num, } } -#if defined(__CYGWIN__) || defined(__MINGW32__) if(C_isnan(f)) { C_strcpy(p = buffer, "+nan.0"); goto fini; @@ -7514,7 +7546,6 @@ void C_ccall C_number_to_string(C_word c, C_word closure, C_word k, C_word num, C_sprintf(p = buffer, "%cinf.0", f > 0 ? '+' : '-'); goto fini; } -#endif #ifdef HAVE_GCVT p = C_gcvt(f, flonum_print_precision, buffer); /* p unused, but we want to avoid stupid warnings */ diff --git a/tests/numbers-string-conversion-tests.scm b/tests/numbers-string-conversion-tests.scm new file mode 100644 index 0000000..c6e1d3c --- /dev/null +++ b/tests/numbers-string-conversion-tests.scm @@ -0,0 +1,280 @@ +;;; +;;; Numerical syntax "torture test" +;;; +;;; This tries to test a lot of edge cases in Scheme's numerical syntax. +;;; +;;; Output is written so that if you run it through "grep ERROR" it will +;;; output nothing (and exit status will be nonzero) if there are no errors. +;;; If you run it through "tail -n 1" you will just get the total error summary. +;;; +;;; This code assumes that string->number accepts numbers with embedded radix +;;; specifiers (R5RS mentions that it's allowed to return #f in those cases). +;;; It also doesn't try to support Schemes which support *only* integers or +;;; *only* flonums (which is also allowed by R5RS). +;;; + +(define the-nan (fp/ 0.0 0.0)) +(define pos-inf (fp/ 1.0 0.0)) +(define neg-inf (fp/ -1.0 0.0)) + +(define (nan? x) (and (number? x) (not (= x x)))) + +(define total-errors 0) + +;; Here comes a horrible nasty hack. It seems to work though ;) +(define-syntax test-numbers + (syntax-rules (compnums fractions) + ((_ (str value ...) rest ...) + (begin + (let ((res (string->number str))) + (if (not (or (and (not (string? value)) (equal? res value)) ... + (and res (nan? res) (or (and value (nan? value)) ...)))) + (begin (display "PARSE ERROR ") + (write '(str value ...)) + (display " => ") (write res) (newline) + (set! total-errors (+ total-errors 1))) + (let ((re-str (and res (number->string res)))) + (if (not (or (and res (string=? re-str str)) + (and (not res) (not value)) ... + (and res (string? value) (string=? re-str value)) ...)) + (begin (display "SERIALIZATION ERROR ") + (write `(str value ...)) + (display " => ") (write re-str) (newline) + (set! total-errors (+ total-errors 1))) + (begin (display "OK ") + (write '(str value ...)) (newline)))))) + (test-numbers rest ...))) + ((_ "no-totals") #f) + ((_ x rest ...) + (begin (newline) (display "-> ") (display x) (newline) + (display "-----------------------------------------------------") + (newline) + (test-numbers rest ...))) + ((_) + (if (= 0 total-errors) + (begin (newline) + (display "-----> Everything OK, no errors!") + (newline)) + (begin (newline) + (display "-----> TOTAL ERRORS: ") + (display total-errors) + (newline) + (error total-errors)))))) + +(test-numbers + "Simple integers" + ("1" 1) + ("+1" 1 "1") + ("-1" (- 1)) + ("#i1" 1.0 "1.0" "1.") + ("#I1" 1.0 "1.0" "1.") + ("#i-1" (- 1.0) "-1.0" "-1.") + ("-#i1" #f) + ("+-1" #f) + ("" #f) + ("-" #f) + ("+" #f) + ("+-" #f) + + "Basic decimal notation" + ("1.0" (exact->inexact 1) "1.") + ("1." 1.0 "1.0" "1.") + ("1.#" 1.0 1.5 "1.0" "1." "1.5") + (".1" 0.1 "0.1" "100.0e-3") + ("-.1" (- 0.1) "-0.1" "-100.0e-3") + ;; Some Schemes don't allow negative zero. This is okay with the standard + ("-.0" -0.0 "-0." "-0.0" "0.0" "0." ".0") + ("-0." -0.0 "-.0" "-0.0" "0.0" "0." ".0") + ("." #f) + (".1." #f) + ("..1" #f) + ("1.." #f) + ("#i1.0" 1.0 "1.0" "1.") + ("#e1.0" 1 "1") + ("#e-.0" 0 "0") + ("#e-0." 0 "0") + ("-#e.0" #f) + + "Decimal notation with padding" + ("1#" 10.0 15.0 "10.0" "15.0" "10." "15.") + ("#e1#" 10 15 "10" "15") + ("#E1#" 10 15 "10" "15") + ("#1" #f) + ("#" #f) + ("1#2" #f) + ("1.#2" #f) + (".#" #f) + ("#.#" #f) + ("#.1" #f) + ("1#.2" #f) + ("1#." 10.0 15.0 "10.0" "15.0" "10." "15.") + + "Decimal notation with suffix" + ("1e2" 100.0 "100.0" "100.") + ("1E2" 100.0 "100.0" "100.") + ("1s2" 100.0 "100.0" "100.") + ("1S2" 100.0 "100.0" "100.") + ("1f2" 100.0 "100.0" "100.") + ("1F2" 100.0 "100.0" "100.") + ("1d2" 100.0 "100.0" "100.") + ("1D2" 100.0 "100.0" "100.") + ("1l2" 100.0 "100.0" "100.") + ("1L2" 100.0 "100.0" "100.") + ("1e2e3" #f) + ("1e2s3" #f) + ("1e2.0" #f) + + "Decimal notation with suffix and padding" + ("1#e2" 1000.0 1500.0 "1000.0" "1500.0" "1000." "1500." "1.0e3" "15.0e2") + ("1e2#" #f) + + "NaN, Inf" + ("+nan.0" the-nan "+NaN.0") + ("+NAN.0" the-nan "+nan.0" "+NaN.0") + ("+nan.1" #f) + ("+nan.01" #f) + ("+inf.0" pos-inf "+Inf.0") + ("+InF.0" pos-inf "+inf.0" "+Inf.0") + ("-inf.0" neg-inf "-Inf.0") + ("-iNF.0" neg-inf "-inf.0" "-Inf.0") + ("+inf.01" #f) + ("+inf.1" #f) + ("-inf.01" #f) + ("-inf.1" #f) + ("+inf.0/1" #f) + ("1/+inf.0" #f) +#| + ;; DEPRECATED (Disabled during deprecation period of "[+-]nan", "[+-]inf") + ("+nan" #f) + ("+inf" #f) + ("-inf" #f) + ("nan.0" #f) + ("inf.0" #f) +|# + + "Fractions" + ("1/2" (/ 1 2) "0.5" ".5" "500.0e-3") + ("10/2" 5.0 "5.0") + ("-1/2" (- (/ 1 2)) "-0.5" "-.5" "-500.0e-3") + ("1/-2" #f) + ("1.0/2" #f) + ("1/2.0" #f) + ("1/2e2" #f) + ("1/2e2" #f) + ("1#/2" 5.0 7.5 "5.0" "5." "7.5") + ("1/2#" 0.05 "0.05" ".05" "50.0e-3") + ("1#/#" #f) + ("1/" #f) + ("1/+" #f) + ("+/1" #f) + ("/1" #f) + ("/" #f) + + "Some invalid complex numbers syntax (not supported at all)" + ("2i" #f) + ("+-i" #f) + ("i" #f) + ("1+2i1" #f) + ("1+2" #f) + ("1#+#i" #f) + + "Base prefixes" + ("#x11" 17 "17") + ("#X11" 17 "17") + ("#d11" 11 "11") + ("#D11" 11 "11") + ("#o11" 9 "9") + ("#O11" 9 "9") + ("#b11" 3 "3") + ("#B11" 3 "3") + ("#da1" #f) + ("#o8" #f) + ("#b2" #f) + ("#o7" 7 "7") + ("#xa" 10 "10") + ("#xA" 10 "10") + ("#xf" 15 "15") + ("#xg" #f) + ("#x-10" -16 "-16") + ("#d-10" -10 "-10") + ("#o-10" -8 "-8") + ("#b-10" -2 "-2") + ("-#x10" #f) + ("-#d10" #f) + ("-#o10" #f) + ("-#b10" #f) + ("#x-" #f) + ("#x" #f) + ("#d" #f) + ("#d-" #f) + ("#d+" #f) + ("#o" #f) + ("#o-" #f) + ("#b" #f) + ("#b-" #f) + ("#e" #f) + ("#e-" #f) + ("#i" #f) + ("#i-" #f) + + "Combination of prefixes" + ("#x#x11" #f) + ("#x#b11" #f) + ("#b#o11" #f) + ("#e#x10" 16 "16") + ("#i#x10" 16.0 "16.0" "16.") + ("#e#e10" #f) + ("#e#e#x10" #f) + ("#E#e#X10" #f) + ("#i#e#x10" #f) + ("#e#x#e10" #f) + ("#x#x#e10" #f) + ("#x#e#x10" #f) + + "Base prefixes with padding" + ("#x1#0" #f) + ("#d1#0" #f) + ("#o1#0" #f) + ("#b1#0" #f) + ("#x1#" 16.0 24.0 "16.0" "24.0" "16." "24.") + ("#d1#" 10.0 15.0 "10.0" "15.0" "10." "15.") + ("#o1#" 8.0 12.0 "8.0" "12.0" "8." "12.") + ("#b1#" 2.0 3.0 "2.0" "3.0" "2." "3.") + + "(Attempted) decimal notation with base prefixes" + ("#x1.0" #f) + ("#d1.0" 1.0 "1.0" "1.") + ("#o1.0" #f) + ("#b1.0" #f) + ("#x1.#" #f) + ("#d1.#" 1.0 1.5 "1.0" "1.5" "1.") + ("#o1.#" #f) + ("#b1.#" #f) + ("#x1." #f) + ("#d1." 1.0 "1.0" "1.") + ("#o1." #f) + ("#b1." #f) + ("#x.1" #f) + ("#d.1" 0.1 "0.1" ".1" "100.0e-3") + ("#o.1" #f) + ("#b.1" #f) + ("#x1e2" 482 "482") + ("#d1e2" 100.0 "100.0" "100.") + ("#o1e2" #f) + ("#b1e2" #f) + + "Fractions with prefixes" + ("#x10/2" 8.0 "8.0" "8.") + ("#x11/2" 8.5 "8.5") + ("#d11/2" 5.5 "5.5") + ("#o11/2" 4.5 "4.5") + ("#b11/10" 1.5 "1.5") + ("#b11/2" #f) + ("#x10/#o10" #f) + ("10/#o10" #f) + ("#x1#/2" 8.0 12.0 "8.0" "8." "12.0" "12.") + ("#d1#/2" 5.0 7.5 "5.0" "5." "7.5") + ("#o1#/2" 4.0 6.0 "4.0" "4." "6.0" "6.") + ("#b1#/2" #f) + ("#b1#/10" 1.0 1.5 "1.0" "1." "1.5") + ) \ No newline at end of file diff --git a/tests/runtests.sh b/tests/runtests.sh index 4a24457..c33ef05 100644 --- a/tests/runtests.sh +++ b/tests/runtests.sh @@ -284,6 +284,10 @@ echo "======================================== fixnum tests ..." $compile fixnum-tests.scm ./a.out +echo "======================================== string->number tests ..." +$compile numbers-string-conversion-tests.scm +./a.out + echo "======================================== srfi-4 tests ..." $interpret -s srfi-4-tests.scm