From d7ac6eeadf89ad79d604db4c96bb8dfc8afea41b Mon Sep 17 00:00:00 2001 From: Peter Bex Date: Tue, 1 Nov 2016 15:18:26 +0100 Subject: [PATCH 1/2] Improve read/write invariance of keywords (#1332). Keywords are now treated more like symbols are: when they are written, we check for readability, which means they'll be pipe-delimited if they contain "special" characters. The reader now also uses the same "extended token" reader for keywords using the "portable" representation as the style-specific reader, so that it's possible to enter keywords containing "special" characters when using this style. We now also support empty keywords, which can be entered through the "portable" syntax using quotation, i.e., as #:||. Currently, ||: and :|| are not recognised as keywords, but as a symbol of one character, the colon. Ie, it's treated the same as : by itself. --- NEWS | 3 ++ library.scm | 94 ++++++++++++++++++++++++++----------------------- tests/library-tests.scm | 41 ++++++++++++++++++--- 3 files changed, 90 insertions(+), 48 deletions(-) diff --git a/NEWS b/NEWS index 631f1bf..7f0975c 100644 --- a/NEWS +++ b/NEWS @@ -65,6 +65,9 @@ - Runtime system: - "time" macro now shows peak memory usage (#1318, thanks to Kooda). +- Core libraries: + - Keywords are more consistently read/written, like symbols (#1332). + 4.11.1 - Security fixes diff --git a/library.scm b/library.scm index b150540..3c94488 100644 --- a/library.scm +++ b/library.scm @@ -3386,8 +3386,8 @@ EOF (##sys#read-char-0 port) ) ((eq? c #\.) (##sys#read-char-0 port) - (let ([c2 (##sys#peek-char-0 port)]) - (cond [(or (char-whitespace? c2) + (let ((c2 (##sys#peek-char-0 port))) + (cond ((or (char-whitespace? c2) (eq? c2 #\() (eq? c2 #\)) (eq? c2 #\") @@ -3401,22 +3401,26 @@ EOF (##sys#read-error port (starting-line "missing list terminator") - end) ) ] - [else + end) ) ) + (else (r-xtoken (lambda (tok kw) (let* ((tok (##sys#string-append "." tok)) (val - (if kw - (build-keyword tok) - (or (and (char-numeric? c2) - (##sys#string->number tok)) - (build-symbol tok)))) - (node (cons val '())) ) + (cond ((and (string=? tok ".:") + (eq? ksp #:suffix)) + ;; Edge case: r-xtoken sees + ;; a bare ":" and sets kw to #f + (build-keyword ".")) + (kw (build-keyword tok)) + ((and (char-numeric? c2) + (##sys#string->number tok))) + (else (build-symbol tok))) ) + (node (cons val '()))) (if first (##sys#setslot last 1 node) (set! first node) ) - (loop node) ))) ] ) ) ) + (loop node) ))) ) ) ) ) (else (let ([node (cons (readrec) '())]) (if first @@ -3496,10 +3500,6 @@ EOF (##sys#read-char-0 port) (loop (##sys#peek-char-0 port) (cons c lst)) ) ) ) ) - (define (r-next-token) - (r-spaces) - (r-token) ) - (define (r-symbol) (r-xtoken (lambda (str kw) @@ -3513,9 +3513,13 @@ EOF (cond ((or (eof-object? c) (char-whitespace? c) (memq c terminating-characters)) - (if (and skw (eq? ksp #:suffix)) + ;; The not null? checks here ensure we read a + ;; plain ":" as a symbol, not as a keyword. + (if (and skw (eq? ksp #:suffix) + (not (null? (cdr lst)))) (k (##sys#reverse-list->string (cdr lst)) #t) - (k (##sys#reverse-list->string lst) pkw))) + (k (##sys#reverse-list->string lst) + (and pkw (not (null? lst)))) ) ) ((memq c reserved-characters) (reserved-character c)) (else @@ -3623,9 +3627,7 @@ EOF (define (build-keyword tok) (##sys#intern-symbol - (if (eq? 0 (##sys#size tok)) - ":" - (##sys#string-append kwprefix tok)) )) + (##sys#string-append kwprefix tok)) ) ;; now have the state to make a decision. (set! reserved-characters @@ -3733,10 +3735,14 @@ EOF (else (list 'location (readrec)) )))) ((#\:) (##sys#read-char-0 port) - (let ((tok (r-token))) - (if (eq? 0 (##sys#size tok)) - (##sys#read-error port "empty keyword") - (build-keyword tok)))) + (let ((c (##sys#peek-char-0 port))) + (fluid-let ((ksp #f)) + (r-xtoken + (lambda (str kw) + (if (and (eq? 0 (##sys#size str)) + (not (char=? c #\|))) + (##sys#read-error port "empty keyword") + (build-keyword str))) ) ) ) ) ((#\%) (build-symbol (##sys#string-append "#" (r-token))) ) ((#\+) @@ -3954,6 +3960,12 @@ EOF (or (fx<= c 32) (memq chr special-characters) ) ) ) + (define (outsym port sym) + (let ((str (##sys#symbol->string sym))) + (if (or (not readable) (sym-is-readable? str)) + (outstr port str) + (outreadablesym port str) )) ) + (define (outreadablesym port str) (let ((len (##sys#size str))) (outchr port #\|) @@ -4027,27 +4039,21 @@ EOF ((not (##core#inline "C_blockp" x)) (outstr port "#")) ((##core#inline "C_forwardedp" x) (outstr port "#")) ((##core#inline "C_symbolp" x) - (cond [(fx= 0 (##sys#byte (##sys#slot x 1) 0)) - (let ([str (##sys#symbol->string x)]) - (case ksp - [(#:prefix) - (outchr port #\:) - (outstr port str) ] - [(#:suffix) - (outstr port str) - (outchr port #\:) ] - [else - (outstr port "#:") - (outstr port str) ] ) ) ] - [(memq x '(#!optional #!key #!rest)) - (outstr port (##sys#slot x 1))] - [(##sys#qualified-symbol? x) - (outstr port (##sys#symbol->qualified-string x))] + (cond ((fx= 0 (##sys#byte (##sys#slot x 1) 0)) ; keyword + (case ksp + ((#:prefix) + (outchr port #\:) + (outsym port x)) + ((#:suffix) + (outsym port x) + (outchr port #\:)) + (else + (outstr port "#:") + (outsym port x)))) + ((##sys#qualified-symbol? x) + (outstr port (##sys#symbol->qualified-string x))) (else - (let ((str (##sys#symbol->string x))) - (if (or (not readable) (sym-is-readable? str)) - (outstr port str) - (outreadablesym port str) ) ) ) ) ) + (outsym port x) )) ) ((##sys#number? x) (outstr port (##sys#number->string x))) ((##core#inline "C_anypointerp" x) (outstr port (##sys#pointer->string x))) ((##core#inline "C_stringp" x) diff --git a/tests/library-tests.scm b/tests/library-tests.scm index aaa9097..ac09485 100644 --- a/tests/library-tests.scm +++ b/tests/library-tests.scm @@ -335,11 +335,25 @@ (parameterize ((keyword-style #:suffix)) (assert (string=? "abc:" (symbol->string (with-input-from-string "|abc:|" read)))) - (assert (string=? "abc" (symbol->string (with-input-from-string "|abc|:" read))))) ; keyword + (assert (string=? "abc" (symbol->string (with-input-from-string "|abc|:" read)))) ; keyword + (let ((kw (with-input-from-string "|foo bar|:" read))) + (assert (eq? kw (with-input-from-string "#:|foo bar|" read))) + (assert (string=? "foo bar" (symbol->string kw))) + (assert (string=? "foo bar:" + (with-output-to-string (lambda () (display kw))))) + (assert (string=? "|foo bar|:" + (with-output-to-string (lambda () (write kw))))))) (parameterize ((keyword-style #:prefix)) (assert (string=? "abc" (symbol->string (with-input-from-string ":|abc|" read)))) - (assert (string=? ":abc" (symbol->string (with-input-from-string "|:abc|" read))))) + (assert (string=? ":abc" (symbol->string (with-input-from-string "|:abc|" read)))) + (let ((kw (with-input-from-string ":|foo bar|" read))) + (assert (eq? kw (with-input-from-string "#:|foo bar|" read))) + (assert (string=? "foo bar" (symbol->string kw))) + (assert (string=? ":foo bar" + (with-output-to-string (lambda () (display kw))))) + (assert (string=? ":|foo bar|" + (with-output-to-string (lambda () (write kw))))))) (assert (eq? '|#:| (string->symbol "#:"))) (assert-fail (with-input-from-string "#:" read)) ; empty keyword @@ -366,10 +380,29 @@ (assert (not (keyword? (with-input-from-string ":abc:" read)))) (assert (not (keyword? (with-input-from-string "abc:" read))))) -(assert (string=? ":" (symbol->string (with-input-from-string ":" read)))) -(assert (string=? ":" (symbol->string (with-input-from-string ":||" read)))) +(let ((colon-sym (with-input-from-string ":" read))) + (assert (symbol? colon-sym)) + (assert (not (keyword? colon-sym))) + (assert (string=? ":" (symbol->string colon-sym)))) + +;; The next two cases are a bit dubious. These could also be read as +;; keywords due to the literal quotation. +(let ((colon-sym (with-input-from-string ":||" read))) + (assert (symbol? colon-sym)) + (assert (not (keyword? colon-sym))) + (assert (string=? ":" (symbol->string colon-sym)))) + +(let ((colon-sym (with-input-from-string "||:" read))) + (assert (symbol? colon-sym)) + (assert (not (keyword? colon-sym))) + (assert (string=? ":" (symbol->string colon-sym)))) + (assert-fail (with-input-from-string "#:" read)) +(let ((empty-kw (with-input-from-string "#:||" read))) + (assert (keyword? empty-kw)) + (assert (string=? "" (keyword->string empty-kw)))) + (assert (keyword? (with-input-from-string "42:" read))) (assert (keyword? (with-input-from-string ".:" read))) -- 2.1.4