guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] 01/06: Let read-line handle alternate line endings


From: Mike Gran
Subject: [Guile-commits] 01/06: Let read-line handle alternate line endings
Date: Wed, 18 Apr 2018 12:00:28 -0400 (EDT)

mike121 pushed a commit to branch wip-mingw-guile-2.2
in repository guile.

commit 77b33170f4113c1d37f62c66a4807996187d2e24
Author: Michael Gran <address@hidden>
Date:   Tue Apr 17 08:22:18 2018 -0700

    Let read-line handle alternate line endings
    
    Adds CRLF, NEL, PS and LS as line endings. %read-line will return
    these. In the case of CRLF, %read-line will return a string "\r\n"
    as the line ending.
    
    * libguile/rdelim.c (scm_read_line): handle more line delimiters
    * test-suite/tests/rdelim.test ("two lines, split, CRLF"): new test
      ("two long lines, split, CRLF", "two lines, split, NEL"): new tests
      ("two lines, split, LS", "two lines, split, PS"): new tests
---
 libguile/rdelim.c            | 41 +++++++++++++++++++++++++++++-------
 test-suite/tests/rdelim.test | 49 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 83 insertions(+), 7 deletions(-)

diff --git a/libguile/rdelim.c b/libguile/rdelim.c
index 80962bc..62795b9 100644
--- a/libguile/rdelim.c
+++ b/libguile/rdelim.c
@@ -126,6 +126,7 @@ SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0,
   SCM line, strings, result;
   scm_t_wchar buf[LINE_BUFFER_SIZE], delim;
   size_t index;
+  int cr = 0;
 
   if (SCM_UNBNDP (port))
     port = scm_current_input_port ();
@@ -151,12 +152,25 @@ SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0,
          buf[index] = scm_getc (port);
          switch (buf[index])
            {
-           case EOF:
            case '\n':
              delim = buf[index];
-             break;
+              break;
+
+            case EOF:
+            case 0x85:
+            case 0x2028:
+            case 0x2029:
+              cr = 0;
+              delim = buf[index];
+              break;
+
+            case '\r':
+              cr = 1;
+              index ++;
+              break;
 
            default:
+              cr = 0;
              index++;
            }
        }
@@ -164,20 +178,33 @@ SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0,
   while (delim == 0);
 
   if (SCM_LIKELY (scm_is_false (strings)))
-    /* The fast path.  */
-    line = scm_from_utf32_stringn (buf, index);
+    {
+      /* The fast path.  */
+      if (cr)
+        line = scm_from_utf32_stringn (buf, index - 1);
+      else
+        line = scm_from_utf32_stringn (buf, index);
+    }
   else
     {
       /* Aggregate the intermediary results.  */
-      strings = scm_cons (scm_from_utf32_stringn (buf, index), strings);
+      if (cr)
+        strings = scm_cons (scm_from_utf32_stringn (buf, index - 1), strings);
+      else
+        strings = scm_cons (scm_from_utf32_stringn (buf, index), strings);
       line = scm_string_concatenate (scm_reverse (strings));
     }
 
   if (delim == EOF && scm_i_string_length (line) == 0)
     result = scm_cons (SCM_EOF_VAL, SCM_EOF_VAL);
   else
-    result = scm_cons (line,
-                      delim == EOF ? SCM_EOF_VAL : SCM_MAKE_CHAR (delim));
+    {
+      if (cr)
+        result = scm_cons (line, scm_from_latin1_string("\r\n"));
+      else
+        result = scm_cons (line,
+                           delim == EOF ? SCM_EOF_VAL : SCM_MAKE_CHAR (delim));
+    }
 
   return result;
 #undef LINE_BUFFER_SIZE
diff --git a/test-suite/tests/rdelim.test b/test-suite/tests/rdelim.test
index 3aaa0b2..7f9117b 100644
--- a/test-suite/tests/rdelim.test
+++ b/test-suite/tests/rdelim.test
@@ -62,6 +62,55 @@
                          (read-line p 'split)))
            (eof-object? (read-line p)))))
 
+  (pass-if "two lines, split, CRLF"
+    (let* ((s "foo\r\nbar\r\n")
+           (p (open-input-string s)))
+      (and (equal? '(("foo" . "\r\n")
+                     ("bar" . "\r\n"))
+                   (list (read-line p 'split)
+                         (read-line p 'split)))
+           (eof-object? (read-line p)))))
+
+  (pass-if "two long lines, split, CRLF"
+    ;; Must be longer than 256 codepoints
+    (let* ((text0 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
+           (text1 (string-append text0 text0 text0 text0 text0))
+           (text2 (string-append text1 "\r\n" text1 "\r\n")))
+      (let* ((s text2)
+             (p (open-input-string s)))
+        (and (equal? `((,text1 . "\r\n")
+                       (,text1 . "\r\n"))
+                     (list (read-line p 'split)
+                           (read-line p 'split)))
+             (eof-object? (read-line p))))))
+
+  (pass-if "two lines, split, NEL"
+    (let* ((s "foo\x85bar\x85")
+           (p (open-input-string s)))
+      (and (equal? '(("foo" . #\x85)
+                     ("bar" . #\x85))
+                   (list (read-line p 'split)
+                         (read-line p 'split)))
+           (eof-object? (read-line p)))))
+
+  (pass-if "two lines, split, LS"
+    (let* ((s "foo\u2028bar\u2028")
+           (p (open-input-string s)))
+      (and (equal? '(("foo" . #\x2028)
+                     ("bar" . #\x2028))
+                   (list (read-line p 'split)
+                         (read-line p 'split)))
+           (eof-object? (read-line p)))))
+
+  (pass-if "two lines, split, PS"
+    (let* ((s "foo\u2029bar\u2029")
+           (p (open-input-string s)))
+      (and (equal? '(("foo" . #\x2029)
+                     ("bar" . #\x2029))
+                   (list (read-line p 'split)
+                         (read-line p 'split)))
+           (eof-object? (read-line p)))))
+
   (pass-if "two Greek lines, trim"
     (let* ((s "λαμβδα\nμυ\n")
            (p (open-input-string s)))



reply via email to

[Prev in Thread] Current Thread [Next in Thread]