[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Guile-commits] 01/06: Let read-line handle alternate line endings
From: |
Mike Gran |
Subject: |
[Guile-commits] 01/06: Let read-line handle alternate line endings |
Date: |
Wed, 18 Apr 2018 12:00:28 -0400 (EDT) |
mike121 pushed a commit to branch wip-mingw-guile-2.2
in repository guile.
commit 77b33170f4113c1d37f62c66a4807996187d2e24
Author: Michael Gran <address@hidden>
Date: Tue Apr 17 08:22:18 2018 -0700
Let read-line handle alternate line endings
Adds CRLF, NEL, PS and LS as line endings. %read-line will return
these. In the case of CRLF, %read-line will return a string "\r\n"
as the line ending.
* libguile/rdelim.c (scm_read_line): handle more line delimiters
* test-suite/tests/rdelim.test ("two lines, split, CRLF"): new test
("two long lines, split, CRLF", "two lines, split, NEL"): new tests
("two lines, split, LS", "two lines, split, PS"): new tests
---
libguile/rdelim.c | 41 +++++++++++++++++++++++++++++-------
test-suite/tests/rdelim.test | 49 ++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 83 insertions(+), 7 deletions(-)
diff --git a/libguile/rdelim.c b/libguile/rdelim.c
index 80962bc..62795b9 100644
--- a/libguile/rdelim.c
+++ b/libguile/rdelim.c
@@ -126,6 +126,7 @@ SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0,
SCM line, strings, result;
scm_t_wchar buf[LINE_BUFFER_SIZE], delim;
size_t index;
+ int cr = 0;
if (SCM_UNBNDP (port))
port = scm_current_input_port ();
@@ -151,12 +152,25 @@ SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0,
buf[index] = scm_getc (port);
switch (buf[index])
{
- case EOF:
case '\n':
delim = buf[index];
- break;
+ break;
+
+ case EOF:
+ case 0x85:
+ case 0x2028:
+ case 0x2029:
+ cr = 0;
+ delim = buf[index];
+ break;
+
+ case '\r':
+ cr = 1;
+ index ++;
+ break;
default:
+ cr = 0;
index++;
}
}
@@ -164,20 +178,33 @@ SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0,
while (delim == 0);
if (SCM_LIKELY (scm_is_false (strings)))
- /* The fast path. */
- line = scm_from_utf32_stringn (buf, index);
+ {
+ /* The fast path. */
+ if (cr)
+ line = scm_from_utf32_stringn (buf, index - 1);
+ else
+ line = scm_from_utf32_stringn (buf, index);
+ }
else
{
/* Aggregate the intermediary results. */
- strings = scm_cons (scm_from_utf32_stringn (buf, index), strings);
+ if (cr)
+ strings = scm_cons (scm_from_utf32_stringn (buf, index - 1), strings);
+ else
+ strings = scm_cons (scm_from_utf32_stringn (buf, index), strings);
line = scm_string_concatenate (scm_reverse (strings));
}
if (delim == EOF && scm_i_string_length (line) == 0)
result = scm_cons (SCM_EOF_VAL, SCM_EOF_VAL);
else
- result = scm_cons (line,
- delim == EOF ? SCM_EOF_VAL : SCM_MAKE_CHAR (delim));
+ {
+ if (cr)
+ result = scm_cons (line, scm_from_latin1_string("\r\n"));
+ else
+ result = scm_cons (line,
+ delim == EOF ? SCM_EOF_VAL : SCM_MAKE_CHAR (delim));
+ }
return result;
#undef LINE_BUFFER_SIZE
diff --git a/test-suite/tests/rdelim.test b/test-suite/tests/rdelim.test
index 3aaa0b2..7f9117b 100644
--- a/test-suite/tests/rdelim.test
+++ b/test-suite/tests/rdelim.test
@@ -62,6 +62,55 @@
(read-line p 'split)))
(eof-object? (read-line p)))))
+ (pass-if "two lines, split, CRLF"
+ (let* ((s "foo\r\nbar\r\n")
+ (p (open-input-string s)))
+ (and (equal? '(("foo" . "\r\n")
+ ("bar" . "\r\n"))
+ (list (read-line p 'split)
+ (read-line p 'split)))
+ (eof-object? (read-line p)))))
+
+ (pass-if "two long lines, split, CRLF"
+ ;; Must be longer than 256 codepoints
+ (let* ((text0 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
+ (text1 (string-append text0 text0 text0 text0 text0))
+ (text2 (string-append text1 "\r\n" text1 "\r\n")))
+ (let* ((s text2)
+ (p (open-input-string s)))
+ (and (equal? `((,text1 . "\r\n")
+ (,text1 . "\r\n"))
+ (list (read-line p 'split)
+ (read-line p 'split)))
+ (eof-object? (read-line p))))))
+
+ (pass-if "two lines, split, NEL"
+ (let* ((s "foo\x85bar\x85")
+ (p (open-input-string s)))
+ (and (equal? '(("foo" . #\x85)
+ ("bar" . #\x85))
+ (list (read-line p 'split)
+ (read-line p 'split)))
+ (eof-object? (read-line p)))))
+
+ (pass-if "two lines, split, LS"
+ (let* ((s "foo\u2028bar\u2028")
+ (p (open-input-string s)))
+ (and (equal? '(("foo" . #\x2028)
+ ("bar" . #\x2028))
+ (list (read-line p 'split)
+ (read-line p 'split)))
+ (eof-object? (read-line p)))))
+
+ (pass-if "two lines, split, PS"
+ (let* ((s "foo\u2029bar\u2029")
+ (p (open-input-string s)))
+ (and (equal? '(("foo" . #\x2029)
+ ("bar" . #\x2029))
+ (list (read-line p 'split)
+ (read-line p 'split)))
+ (eof-object? (read-line p)))))
+
(pass-if "two Greek lines, trim"
(let* ((s "λαμβδα\nμυ\n")
(p (open-input-string s)))
- [Guile-commits] branch wip-mingw-guile-2.2 updated (34131e3 -> 98f4024), Mike Gran, 2018/04/18
- [Guile-commits] 02/06: simplify reading http headers using updated %read-line, Mike Gran, 2018/04/18
- [Guile-commits] 01/06: Let read-line handle alternate line endings,
Mike Gran <=
- [Guile-commits] 03/06: Wrong preprocessor test for include guard for sys/select.h, Mike Gran, 2018/04/18
- [Guile-commits] 05/06: test shouldn't presume UTF-8 can be installed, Mike Gran, 2018/04/18
- [Guile-commits] 06/06: test-unwind leaks a file descriptor, Mike Gran, 2018/04/18
- [Guile-commits] 04/06: test-foreign-object-c needs libgnu, Mike Gran, 2018/04/18