[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Guile-commits] 06/08: Initial peek-char implementation in Scheme
From: |
Andy Wingo |
Subject: |
[Guile-commits] 06/08: Initial peek-char implementation in Scheme |
Date: |
Wed, 04 May 2016 10:43:53 +0000 |
wingo pushed a commit to branch wip-port-refactor
in repository guile.
commit 2ba638092fc890cd33416c6adcbc107e5f5cd0d5
Author: Andy Wingo <address@hidden>
Date: Wed May 4 11:48:05 2016 +0200
Initial peek-char implementation in Scheme
* module/ice-9/ports.scm (EILSEQ, decoding-error, peek-char-and-len/utf8):
(peek-char-and-len/iso-8859-1, peek-char-and-len/iconv):
(peek-char-and-len, %peek-char): New definitions. Missing iconv1 for
peek-char, but enough to benchmark.
---
module/ice-9/ports.scm | 123 ++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 123 insertions(+)
diff --git a/module/ice-9/ports.scm b/module/ice-9/ports.scm
index 1bf13be..9774e46 100644
--- a/module/ice-9/ports.scm
+++ b/module/ice-9/ports.scm
@@ -255,6 +255,129 @@ interpret its input and output."
(bytevector-u8-ref (port-buffer-bytevector buf)
(port-buffer-cur buf))))))))
+;; GNU/Linux definition; fixme?
+(define-syntax EILSEQ (identifier-syntax 84))
+
+(define-syntax-rule (decoding-error subr port)
+ (throw 'decoding-error subr "input decoding error" EILSEQ port))
+
+(define-inlinable (peek-char-and-len/utf8 port)
+ (define (bad-utf8 len)
+ (if (eq? (port-conversion-strategy port) 'substitute)
+ (values #\? len)
+ (decoding-error "peek-char" port)))
+ (let ((first-byte (peek-byte port)))
+ (cond
+ ((eq? first-byte the-eof-object)
+ (values first-byte 0))
+ ((< first-byte #x80)
+ (values (integer->char first-byte) 1))
+ ((<= #xc2 first-byte #xdf)
+ (call-with-values (lambda () (fill-input port 2))
+ (lambda (buf buffering)
+ (let ((bv (port-buffer-bytevector buf))
+ (cur (port-buffer-cur buf)))
+ (define (ref n)
+ (bytevector-u8-ref bv (+ cur 1)))
+ (when (or (< buffering 2)
+ (not (= (logand (ref 1) #xc0) #x80)))
+ (bad-utf8 1))
+ (values (integer->char
+ (logior (ash (logand first-byte #x1f) 6)
+ (logand (ref 1) #x3f)))
+ 2)))))
+ ((= (logand first-byte #xf0) #xe0)
+ (call-with-values (lambda () (fill-input port 3))
+ (lambda (buf buffering)
+ (let ((bv (port-buffer-bytevector buf))
+ (cur (port-buffer-cur buf)))
+ (define (ref n)
+ (bytevector-u8-ref bv (+ cur 1)))
+ (when (or (< buffering 2)
+ (not (= (logand (ref 1) #xc0) #x80))
+ (and (eq? first-byte #xe0) (< (ref 1) #xa0))
+ (and (eq? first-byte #xed) (< (ref 1) #x9f)))
+ (bad-utf8 1))
+ (when (or (< buffering 3)
+ (not (= (logand (ref 2) #xc0) #x80)))
+ (bad-utf8 2))
+ (values (integer->char
+ (logior (ash (logand first-byte #x0f) 12)
+ (ash (logand (ref 1) #x3f) 6)
+ (logand (ref 2) #x3f)))
+ 3)))))
+ ((<= #xf0 first-byte #xf4)
+ (call-with-values (lambda () (fill-input port 4))
+ (lambda (buf buffering)
+ (let ((bv (port-buffer-bytevector buf))
+ (cur (port-buffer-cur buf)))
+ (define (ref n)
+ (bytevector-u8-ref bv (+ cur 1)))
+ (when (or (< buffering 2)
+ (not (= (logand (ref 1) #xc0) #x80))
+ (and (eq? first-byte #xf0) (< (ref 1) #x90))
+ (and (eq? first-byte #xf4) (< (ref 1) #x8f)))
+ (bad-utf8 1))
+ (when (or (< buffering 3)
+ (not (= (logand (ref 2) #xc0) #x80)))
+ (bad-utf8 2))
+ (when (or (< buffering 4)
+ (not (= (logand (ref 3) #xc0) #x80)))
+ (bad-utf8 3))
+ (values (integer->char
+ (logior (ash (logand first-byte #x07) 18)
+ (ash (logand (ref 1) #x3f) 12)
+ (ash (logand (ref 2) #x3f) 6)
+ (logand (ref 3) #x3f)))
+ 4)))))
+ (else
+ (bad-utf8 1)))))
+
+(define-inlinable (peek-char-and-len/iso-8859-1 port)
+ (let ((byte-or-eof (peek-byte port)))
+ (if (eof-object? byte-or-eof)
+ (values byte-or-eof 0)
+ (values (integer->char byte-or-eof) 1))))
+
+(define (peek-char-and-len/iconv port)
+ (define (bad-input len)
+ (if (eq? (port-conversion-strategy port) 'substitute)
+ (values #\? len)
+ (decoding-error "peek-char" port)))
+ (let lp ((prev-input-size 0))
+ (let* ((input-size (1+ prev-input-size))
+ (buf (fill-input port input-size))
+ (cur (port-buffer-cur buf)))
+ (cond
+ ((<= (- (port-buffer-end buf) cur) prev-input-size)
+ (if (zero? prev-input-size)
+ (values the-eof-object 0)
+ (bad-input prev-input-size)))
+ ;; fixme: takes port arg???
+ ((iconv1 port (port-buffer-bytevector buf) cur input-size
+ (port-conversion-strategy port))
+ => (lambda (char)
+ (values char input-size)))
+ (else
+ (lp input-size))))))
+
+(define-inlinable (peek-char-and-len port)
+ (let ((enc (%port-encoding port)))
+ (call-with-values
+ (lambda ()
+ (case enc
+ ((UTF-8) (peek-char-and-len/utf8 port))
+ ((ISO-8859-1) (peek-char-and-len/iso-8859-1 port))
+ (else (peek-char-and-len/iconv port))))
+ (lambda (char len)
+ (if (port-maybe-consume-initial-byte-order-mark port char len)
+ (peek-char-and-len port)
+ (values char len))))))
+
+(define (%peek-char port)
+ (call-with-values (lambda () (peek-char-and-len port))
+ (lambda (char len)
+ char)))
- [Guile-commits] branch wip-port-refactor updated (1309ab8 -> d7a111b), Andy Wingo, 2016/05/04
- [Guile-commits] 06/08: Initial peek-char implementation in Scheme,
Andy Wingo <=
- [Guile-commits] 01/08: Changes to Scheme fill-input corresponding to C, Andy Wingo, 2016/05/04
- [Guile-commits] 02/08: Minor tweak to Scheme peek-byte., Andy Wingo, 2016/05/04
- [Guile-commits] 07/08: Add integer->char and char->integer opcodes, Andy Wingo, 2016/05/04
- [Guile-commits] 03/08: Port encoding internally represented as symbol, Andy Wingo, 2016/05/04
- [Guile-commits] 05/08: Port refactors to help Scheme peek-char, Andy Wingo, 2016/05/04
- [Guile-commits] 08/08: Spead tweaks to Scheme peek-char, Andy Wingo, 2016/05/04
- [Guile-commits] 04/08: Port conversion strategies internally are symbols, Andy Wingo, 2016/05/04