>From d6a53663e299c67e3819adde66ee8f32aebd8be8 Mon Sep 17 00:00:00 2001 From: Peter Bex Date: Sun, 26 May 2013 21:33:41 +0200 Subject: [PATCH 2/2] Add support for R7RS's "indented string" escape syntax. This allows the user to "escape" all whitespace surrounding *one* newline with a backslash, causing all of this to be completely collapsed to nothing. This is useful when writing long string literals which should be broken up into multiple lines. This also adds some tests for other R7RS escape syntax, most of which we already supported anyway. There's only the escaped hex scalar value left, which is ambiguous with regard to the old CHICKEN hex scalar syntax. --- library.scm | 16 ++++++++++++++++ tests/r7rs-tests.scm | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/library.scm b/library.scm index cd33b09..68165d9 100644 --- a/library.scm +++ b/library.scm @@ -2521,6 +2521,22 @@ EOF (loop (##sys#read-char-0 port) (r-cons-codepoint n lst)) ))) ((#\\ #\' #\" #\|) (loop (##sys#read-char-0 port) (cons c lst))) + ((#\newline #\space #\tab) + ;; Read "escaped" * * + (let eat-ws ((c c) (nl? #f)) + (case c + ((#\space #\tab) + (eat-ws (##sys#read-char-0 port) nl?)) + ((#\newline) + (if nl? + (loop c lst) + (eat-ws (##sys#read-char-0 port) #t))) + (else + (unless nl? + (##sys#read-warning + port + "escaped whitespace, but no newline - collapsing anyway")) + (loop c lst))))) (else (cond ((and (char-numeric? c) (char>=? c #\0) diff --git a/tests/r7rs-tests.scm b/tests/r7rs-tests.scm index 368f9f4..c0f6ebd 100644 --- a/tests/r7rs-tests.scm +++ b/tests/r7rs-tests.scm @@ -89,11 +89,44 @@ +(SECTION 6 7) + + +;; We try to avoid using the very constructs that we are testing here, +;; hence the slightly cumbersome string construction of -> "\"\\\"" +(define (read-escaped-string x) + (with-input-from-string (string-append (string #\" #\\) x (string #\")) + read)) +(define (escaped-char x) + (string-ref (read-escaped-string x) 0)) + +(test #\alarm escaped-char "a") +(test #\backspace escaped-char "b") +(test #\tab escaped-char "t") +(test #\newline escaped-char "n") +(test #\return escaped-char "r") +(test #\" escaped-char "\"") +(test #\\ escaped-char "\\") +(test #\| escaped-char "|") +;; *ONE* line ending following a backslash escape, along with any +;; preceding or trailing intraline whitespace is collapsed and ignored. +(test #\E escaped-char (string-append (string #\newline) " END")) +(test #\E escaped-char (string-append " " (string #\newline) "END")) +(test #\E escaped-char (string-append " " (string #\newline) "END")) +(test #\E escaped-char (string-append " " (string #\newline) " END")) +;; But not more than one! +(test #\newline escaped-char (string-append " " (string #\newline) " " (string #\newline) " END")) +;; Tabs count as intraline whitespace too +(test #\E escaped-char (string-append (string #\tab) (string #\newline) (string #\tab) " END")) +;; Edge case +(test "" read-escaped-string (string-append " " (string #\newline) " ")) + ;; NOT YET (is ambiguous with existing \xNN syntax in Chicken) #;(test #\tab escaped-char "x9;") #;(test #\tab escaped-char "x09;") + (SECTION 6 8) ;; Symbols are implicitly quoted inside self-evaluating vectors. -- 1.8.2.3