>From d6a53663e299c67e3819adde66ee8f32aebd8be8 Mon Sep 17 00:00:00 2001
From: Peter Bex
Date: Sun, 26 May 2013 21:33:41 +0200
Subject: [PATCH 2/2] Add support for R7RS's "indented string" escape syntax.
This allows the user to "escape" all whitespace surrounding *one*
newline with a backslash, causing all of this to be completely
collapsed to nothing. This is useful when writing long string
literals which should be broken up into multiple lines.
This also adds some tests for other R7RS escape syntax, most of which
we already supported anyway. There's only the escaped hex scalar
value left, which is ambiguous with regard to the old CHICKEN hex
scalar syntax.
---
library.scm | 16 ++++++++++++++++
tests/r7rs-tests.scm | 33 +++++++++++++++++++++++++++++++++
2 files changed, 49 insertions(+)
diff --git a/library.scm b/library.scm
index cd33b09..68165d9 100644
--- a/library.scm
+++ b/library.scm
@@ -2521,6 +2521,22 @@ EOF
(loop (##sys#read-char-0 port) (r-cons-codepoint n lst)) )))
((#\\ #\' #\" #\|)
(loop (##sys#read-char-0 port) (cons c lst)))
+ ((#\newline #\space #\tab)
+ ;; Read "escaped" * *
+ (let eat-ws ((c c) (nl? #f))
+ (case c
+ ((#\space #\tab)
+ (eat-ws (##sys#read-char-0 port) nl?))
+ ((#\newline)
+ (if nl?
+ (loop c lst)
+ (eat-ws (##sys#read-char-0 port) #t)))
+ (else
+ (unless nl?
+ (##sys#read-warning
+ port
+ "escaped whitespace, but no newline - collapsing anyway"))
+ (loop c lst)))))
(else
(cond ((and (char-numeric? c)
(char>=? c #\0)
diff --git a/tests/r7rs-tests.scm b/tests/r7rs-tests.scm
index 368f9f4..c0f6ebd 100644
--- a/tests/r7rs-tests.scm
+++ b/tests/r7rs-tests.scm
@@ -89,11 +89,44 @@
+(SECTION 6 7)
+
+
+;; We try to avoid using the very constructs that we are testing here,
+;; hence the slightly cumbersome string construction of -> "\"\\\""
+(define (read-escaped-string x)
+ (with-input-from-string (string-append (string #\" #\\) x (string #\"))
+ read))
+(define (escaped-char x)
+ (string-ref (read-escaped-string x) 0))
+
+(test #\alarm escaped-char "a")
+(test #\backspace escaped-char "b")
+(test #\tab escaped-char "t")
+(test #\newline escaped-char "n")
+(test #\return escaped-char "r")
+(test #\" escaped-char "\"")
+(test #\\ escaped-char "\\")
+(test #\| escaped-char "|")
+;; *ONE* line ending following a backslash escape, along with any
+;; preceding or trailing intraline whitespace is collapsed and ignored.
+(test #\E escaped-char (string-append (string #\newline) " END"))
+(test #\E escaped-char (string-append " " (string #\newline) "END"))
+(test #\E escaped-char (string-append " " (string #\newline) "END"))
+(test #\E escaped-char (string-append " " (string #\newline) " END"))
+;; But not more than one!
+(test #\newline escaped-char (string-append " " (string #\newline) " " (string #\newline) " END"))
+;; Tabs count as intraline whitespace too
+(test #\E escaped-char (string-append (string #\tab) (string #\newline) (string #\tab) " END"))
+;; Edge case
+(test "" read-escaped-string (string-append " " (string #\newline) " "))
+
;; NOT YET (is ambiguous with existing \xNN syntax in Chicken)
#;(test #\tab escaped-char "x9;")
#;(test #\tab escaped-char "x09;")
+
(SECTION 6 8)
;; Symbols are implicitly quoted inside self-evaluating vectors.
--
1.8.2.3