From 6dba0d35c0a6a88183efa06f59164459a8bed6f2 Mon Sep 17 00:00:00 2001 From: Peter Bex Date: Sun, 6 Jan 2019 16:36:54 +0100 Subject: [PATCH 1/2] When interning "qualified" symbols, convert them to regular symbols This is a preparatory step in fixing #1077. Once we have a bootstrapping compiler that reads qualified symbols and interns them as "##foo#bar" instead of as "\003foobar", it will also no longer emit code with strings that are encoded in the old style. The hack must be in the interning step so that the new runtime will still be compatible with an old compiler: that compiler will still generate code like "\003foobar". Without this hack, we'd get errors like "\003syscar is unbound" if we'd only change the reading of these symbols. Luckily, uninterned symbols don't matter for this. This is a total hack which is also very wasteful and dumb because it will malloc a new string whenever it encounters a qualified symbol. But that's only in the intermediate bootstrapping compiler: after that, when compiling a new CHICKEN, there should be no more dequalification happening. Signed-off-by: felix --- NEWS | 4 ++++ library.scm | 39 +++++++++++----------------------- repl.scm | 1 - runtime.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++---------- 4 files changed, 74 insertions(+), 39 deletions(-) diff --git a/NEWS b/NEWS index 7d5f4d19..957760ad 100644 --- a/NEWS +++ b/NEWS @@ -27,6 +27,10 @@ specification and also give sensible results on Windows. - Fix get-environment from (chicken process-context) to raise an error when passed #f instead of segfaulting. + - Qualified symbols (##foo#bar style) are no longer encoded by a + byte prefix inside the symbol name. This ensures read-write + invariance of symbols which start with a low-byte character + (fixes #1077, except for keywords, which start with NUL bytes). 5.0.0 diff --git a/library.scm b/library.scm index 0891f6a4..d7dc35ad 100644 --- a/library.scm +++ b/library.scm @@ -2674,6 +2674,7 @@ EOF (##sys#intern-symbol str) ) (define ##sys#symbol->string) +;; DEPRECATED: Remove this once we have a new bootstrapping compiler (define ##sys#symbol->qualified-string) (define ##sys#qualified-symbol-prefix) @@ -2693,6 +2694,7 @@ EOF [i (split str len)] ) (if i (##sys#substring str i len) str) ) ) ) + ;; DEPRECATED: Remove this once we have a new bootstrapping compiler (set! ##sys#symbol->qualified-string (lambda (s) (let* ([str (##sys#slot s 1)] @@ -2702,6 +2704,7 @@ EOF (string-append "##" (##sys#substring str 1 i) "#" (##sys#substring str i len)) str) ) ) ) + ;; DEPRECATED: Remove this once we have a new bootstrapping compiler (set! ##sys#qualified-symbol-prefix (lambda (s) (let* ([str (##sys#slot s 1)] @@ -2709,18 +2712,12 @@ EOF [i (split str len)] ) (and i (##sys#substring str 0 i)) ) ) ) ) +;; DEPRECATED: Remove this once we have a new bootstrapping compiler (define (##sys#qualified-symbol? s) (let ((str (##sys#slot s 1))) (and (fx> (##sys#size str) 0) (fx<= (##sys#byte str 0) namespace-max-id-len)))) -(define ##sys#string->qualified-symbol - (lambda (prefix str) - (##sys#string->symbol - (if prefix - (##sys#string-append prefix str) - str) ) ) ) - (set! scheme#symbol->string (lambda (s) (##sys#check-symbol s 'symbol->string) @@ -3710,7 +3707,6 @@ EOF (define ##sys#default-read-info-hook #f) (define ##sys#read-error-with-line-number #f) -(define ##sys#enable-qualifiers #t) (define (##sys#read-prompt-hook) #f) ; just here so that srfi-18 works without eval (define (##sys#infix-list-hook lst) lst) @@ -4152,24 +4148,8 @@ EOF (loop i) ) ) ) ) ) ) (define (r-ext-symbol) - (let* ([p (##sys#make-string 1)] - [tok (r-token)] - [toklen (##sys#size tok)] ) - (unless ##sys#enable-qualifiers - (##sys#read-error port "qualified symbol syntax is not allowed" tok) ) - (let loop ([i 0]) - (cond [(fx>= i toklen) - (##sys#read-error port "invalid qualified symbol syntax" tok) ] - [(fx= (##sys#byte tok i) (char->integer #\#)) - (when (fx> i namespace-max-id-len) - (set! tok (##sys#substring tok 0 namespace-max-id-len)) ) - (##sys#setbyte p 0 i) - (##sys#intern-symbol - (string-append - p - (##sys#substring tok 0 i) - (##sys#substring tok (fx+ i 1) toklen)) ) ] - [else (loop (fx+ i 1))] ) ) ) ) + (let ((tok (r-token))) + (build-symbol (string-append "##" tok)))) (define (build-symbol tok) (##sys#intern-symbol tok) ) @@ -4556,7 +4536,12 @@ EOF (eq? c #\-) ) (not (##sys#string->number str)) ) ((eq? c #\:) (not (eq? ksp #:prefix))) - ((eq? c #\#) ;; #!rest, #!key etc + ((and (eq? c #\#) + ;; Not a qualified symbol? + (not (and (fx> len 2) + (eq? (##core#inline "C_subchar" str 1) #\#) + (not (eq? (##core#inline "C_subchar" str 2) #\#))))) + ;; #!rest, #!key etc (eq? (##core#inline "C_subchar" str 1) #\!)) ((specialchar? c) #f) (else #t) ) ) diff --git a/repl.scm b/repl.scm index 7d7ef771..4ec97efa 100644 --- a/repl.scm +++ b/repl.scm @@ -146,7 +146,6 @@ (##sys#reset-handler (lambda () (set! ##sys#read-error-with-line-number #f) - (set! ##sys#enable-qualifiers #t) (resetports) (c #f))))) (##sys#read-prompt-hook) diff --git a/runtime.c b/runtime.c index 2a5415fa..1ac6e4f8 100644 --- a/runtime.c +++ b/runtime.c @@ -597,6 +597,45 @@ C_dbg(C_char *prefix, C_char *fstr, ...) va_end(va); } +/* + * Dequalify symbol string if necessary. This is a temporary hack to + * ensure that all interned symbols are in the literal ##foo#bar + * style. This enforces compatibility between a new runtime and code + * compiled by an older compiler which still generates \003foobar + * literal symbols. This transition is needed to fix #1077. Because + * of its temporary nature (ideally we just build a new bootstrapping + * compiler with this in which the hack should have no effect), we can + * afford to be stupidly wasteful and just malloc a new string every + * time we get here. + * + * DEPRECATED: Remove this once we have a new bootstrapping compiler + */ +static C_char *dequalified_symbol_string(C_char *str, int *len) +{ + C_char *deq_str; + int prefix = (int)str[0]; + + if (prefix >= 31) return str; /* namespace-max-id-len */ + if (prefix == 0) return str; /* keyword (TODO: change this too) */ + + deq_str = malloc(*len+3); + if (deq_str == NULL) { + horror(C_text("cannot dequalify string - out of memory")); + } + + deq_str[0] = '#'; + deq_str[1] = '#'; + memcpy(deq_str + 2, str + 1, prefix); + deq_str[prefix + 2] = '#'; + memcpy(deq_str + prefix + 3, str + 1 + prefix, *len - prefix - 1); + deq_str[*len+2] = '\0'; /* Not always part of original str, but if it is, we must add it */ + *len += 2; + if(debug_mode) { + C_dbg(C_text("debug"), C_text("Dequalified [%o]%.*s into %s\n"), str[0], len-3, str+1, deq_str); + } + return deq_str; +} + /* Startup code: */ @@ -1108,12 +1147,12 @@ void initialize_symbol_table(void) for(i = 0; i < symbol_table->size; symbol_table->table[ i++ ] = C_SCHEME_END_OF_LIST); /* Obtain reference to hooks for later: */ - core_provided_symbol = C_intern2(C_heaptop, C_text("\004coreprovided")); - interrupt_hook_symbol = C_intern2(C_heaptop, C_text("\003sysinterrupt-hook")); - error_hook_symbol = C_intern2(C_heaptop, C_text("\003syserror-hook")); - callback_continuation_stack_symbol = C_intern3(C_heaptop, C_text("\003syscallback-continuation-stack"), C_SCHEME_END_OF_LIST); - pending_finalizers_symbol = C_intern2(C_heaptop, C_text("\003syspending-finalizers")); - current_thread_symbol = C_intern3(C_heaptop, C_text("\003syscurrent-thread"), C_SCHEME_FALSE); + core_provided_symbol = C_intern2(C_heaptop, C_text("##core#provided")); + interrupt_hook_symbol = C_intern2(C_heaptop, C_text("##sys#interrupt-hook")); + error_hook_symbol = C_intern2(C_heaptop, C_text("##sys#error-hook")); + callback_continuation_stack_symbol = C_intern3(C_heaptop, C_text("##sys#callback-continuation-stack"), C_SCHEME_END_OF_LIST); + pending_finalizers_symbol = C_intern2(C_heaptop, C_text("##sys#pending-finalizers")); + current_thread_symbol = C_intern3(C_heaptop, C_text("##sys#current-thread"), C_SCHEME_FALSE); } @@ -2278,6 +2317,8 @@ C_regparm C_word C_fcall C_intern_in(C_word **ptr, int len, C_char *str, C_SYMBO int key; C_word s; + str = dequalified_symbol_string(str, &len); + if(stable == NULL) stable = symbol_table; key = hash_string(len, str, stable->size, stable->rand, 0); @@ -2299,6 +2340,8 @@ C_regparm C_word C_fcall C_h_intern_in(C_word *slot, int len, C_char *str, C_SYM int key; C_word s; + str = dequalified_symbol_string(str, &len); + if(stable == NULL) stable = symbol_table; key = hash_string(len, str, stable->size, stable->rand, 0); @@ -2322,6 +2365,8 @@ C_regparm C_word C_fcall C_h_intern_in(C_word *slot, int len, C_char *str, C_SYM C_regparm C_word C_fcall intern0(C_char *str) { int len = C_strlen(str); + str = dequalified_symbol_string(str, &len); + int key = hash_string(len, str, symbol_table->size, symbol_table->rand, 0); C_word s; @@ -2335,10 +2380,11 @@ C_regparm C_word C_fcall C_lookup_symbol(C_word sym) int key; C_word str = C_block_item(sym, 1); int len = C_header_size(str); + C_char *the_str = dequalified_symbol_string(C_c_string(str), &len); - key = hash_string(len, C_c_string(str), symbol_table->size, symbol_table->rand, 0); + key = hash_string(len, the_str, symbol_table->size, symbol_table->rand, 0); - return lookup(key, len, C_c_string(str), symbol_table); + return lookup(key, len, the_str, symbol_table); } @@ -5866,7 +5912,7 @@ void C_ccall C_signum(C_word c, C_word *av) } else if (C_truep(C_bignump(x))) { C_kontinue(k, C_bignum_negativep(x) ? C_fix(-1) : C_fix(1)); } else { - try_extended_number("\003sysextended-signum", 2, k, x); + try_extended_number("##sys#extended-signum", 2, k, x); } } @@ -9948,6 +9994,7 @@ void C_ccall C_string_to_symbol(C_word c, C_word *av) len = C_header_size(string); name = (C_char *)C_data_pointer(string); + name = dequalified_symbol_string(name, &len); key = hash_string(len, name, symbol_table->size, symbol_table->rand, 0); if(!C_truep(s = lookup(key, len, name, symbol_table))) @@ -10412,7 +10459,7 @@ void C_ccall C_number_to_string(C_word c, C_word *av) C_integer_to_string(c, av); /* reuse av */ } else { C_word k = av[ 1 ]; - try_extended_number("\003sysextended-number->string", 3, k, num, radix); + try_extended_number("##sys#extended-number->string", 3, k, num, radix); } } @@ -10533,7 +10580,7 @@ void C_ccall C_integer_to_string(C_word c, C_word *av) if (len > C_RECURSIVE_TO_STRING_THRESHOLD && /* The power of two fast path is much faster than recursion */ ((C_uword)1 << radix_shift) != radix) { - try_extended_number("\003sysinteger->string/recursive", + try_extended_number("##sys#integer->string/recursive", 4, k, num, C_fix(radix), C_fix(len)); } else { C_word kab[C_SIZEOF_CLOSURE(4)], *ka = kab, kav[6]; -- 2.16.2