guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] GNU Guile branch, elisp, updated. release_1-9-1-82-g9e90


From: Daniel Kraft
Subject: [Guile-commits] GNU Guile branch, elisp, updated. release_1-9-1-82-g9e90010
Date: Thu, 27 Aug 2009 15:16:23 +0000

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU Guile".

http://git.savannah.gnu.org/cgit/guile.git/commit/?id=9e90010f075412e360890bd155de24c5d583de8a

The branch, elisp has been updated
       via  9e90010f075412e360890bd155de24c5d583de8a (commit)
       via  15eeabfd53326fd292e64f9c0669bc98039ee17f (commit)
       via  98c2d75a15b1d61ac5cefb6338a9459928a23883 (commit)
      from  e840cc654032c60e43aec0f868d67905a3bf5523 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 9e90010f075412e360890bd155de24c5d583de8a
Author: Daniel Kraft <address@hidden>
Date:   Thu Aug 27 17:15:57 2009 +0200

    Replaced generated elisp parser with hand-written one to fix source 
properties.
    
    * module/language/elisp/parser.scm: Hand-written parser.
    * test-suite/tests/elisp-reader.test: Test for source properties.

commit 15eeabfd53326fd292e64f9c0669bc98039ee17f
Author: Daniel Kraft <address@hidden>
Date:   Thu Aug 27 16:19:03 2009 +0200

    Don't accept backquote/unquote/unquote-splicing any longer in elisp.
    
    The real names \`, \, and \,@ should be used instead and are returned
    now by the real reader.
    
    * module/language/elisp/compile-tree-il.scm: Only accept correct names.

commit 98c2d75a15b1d61ac5cefb6338a9459928a23883
Author: Daniel Kraft <address@hidden>
Date:   Wed Aug 26 22:03:01 2009 +0200

    Error in lexer when 'empty' symbol would have been read.

-----------------------------------------------------------------------

Summary of changes:
 module/language/elisp/compile-tree-il.scm |   12 +--
 module/language/elisp/lexer.scm           |   14 +++-
 module/language/elisp/parser.scm          |  156 +++++++++++++++++++---------
 test-suite/tests/elisp-reader.test        |   14 ++-
 4 files changed, 133 insertions(+), 63 deletions(-)

diff --git a/module/language/elisp/compile-tree-il.scm 
b/module/language/elisp/compile-tree-il.scm
index e88ac17..b54f7f6 100644
--- a/module/language/elisp/compile-tree-il.scm
+++ b/module/language/elisp/compile-tree-il.scm
@@ -71,20 +71,14 @@
 ; named differently; to make easy adaptions, we define these predicates 
checking
 ; for a symbol being the car of an unquote/unquote-splicing/backquote form.
 
-; FIXME: Remove the quasiquote/unquote/unquote-splicing symbols when real elisp
-; reader is there.
-
 (define (backquote? sym)
-  (and (symbol? sym) (or (eq? sym 'quasiquote)
-                         (eq? sym '\`))))
+  (and (symbol? sym) (eq? sym '\`)))
 
 (define (unquote? sym)
-  (and (symbol? sym) (or (eq? sym 'unquote)
-                         (eq? sym '\,))))
+  (and (symbol? sym) (eq? sym '\,)))
 
 (define (unquote-splicing? sym)
-  (and (symbol? sym) (or (eq? sym 'unquote-splicing)
-                         (eq? sym '\,@))))
+  (and (symbol? sym) (eq? sym '\,@)))
 
 
 ; Build a call to a primitive procedure nicely.
diff --git a/module/language/elisp/lexer.scm b/module/language/elisp/lexer.scm
index 0a981ca..099c9b6 100644
--- a/module/language/elisp/lexer.scm
+++ b/module/language/elisp/lexer.scm
@@ -316,7 +316,19 @@
                 (get-symbol-or-number port))
               (lambda (type str)
                 (case type
-                  ((symbol) (return 'symbol (string->symbol str)))
+                  ((symbol)
+                   ; str could be empty if the first character is already
+                   ; something not allowed in a symbol (and not escaped)!
+                   ; Take care about that, it is an error because that 
character
+                   ; should have been handled elsewhere or is invalid in the
+                   ; input.
+                   (if (zero? (string-length str))
+                     (begin
+                       ; Take it out so the REPL might not get into an
+                       ; infinite loop with further reading attempts.
+                       (read-char port)
+                       (error "invalid character in input" c))
+                     (return 'symbol (string->symbol str))))
                   ((integer)
                    ; In elisp, something like "1." is an integer, while
                    ; string->number returns an inexact real.  Thus we
diff --git a/module/language/elisp/parser.scm b/module/language/elisp/parser.scm
index 431eba3..423ee6e 100644
--- a/module/language/elisp/parser.scm
+++ b/module/language/elisp/parser.scm
@@ -1,6 +1,6 @@
 ;;; Guile Emac Lisp
 
-;; Copyright (C) 2001 Free Software Foundation, Inc.
+;; Copyright (C) 2009 Free Software Foundation, Inc.
 
 ;; This program is free software; you can redistribute it and/or modify
 ;; it under the terms of the GNU General Public License as published by
@@ -21,56 +21,112 @@
 
 (define-module (language elisp parser)
   #:use-module (language elisp lexer)
-  #:use-module (language ecmascript parse-lalr)
   #:export (read-elisp))
 
-; The parser (reader) for elisp expressions.  It is implemented using the
-; (text parse-lalr) parser generator and uses my hand-written lexer as
-; the tokenizer.
-
-
-; Build the parser itself using parse-lalr.
-
-(define elisp-parser
-  (lalr-parser (integer float symbol character string
-                paren-open paren-close square-open square-close
-                dot quote backquote unquote unquote-splicing)
-
-    ; Expressions are our main interest.
-    ; It seems the symbol we're interested for return from the parser must
-    ; come very first, so here it is.
-    (expression (integer) -> $1
-                (float) -> $1
-                (symbol) -> $1
-                (character) -> $1
-                (string) -> $1
-                (list) -> $1
-                (quotation) -> $1
-                (vector) -> $1)
-
-    ; Pairs, lists and dotted lists.
-    (partial-list (expression) -> (list $1)
-                  (expression dot expression) -> (cons $1 $3)
-                  (expression partial-list) -> (cons $1 $2))
-    (list (paren-open paren-close) -> '()
-          (paren-open dot expression paren-close) -> $3
-          (paren-open partial-list paren-close) -> $2)
-
-    ; Quotation and unquotation expressions.
-    (quotation (quote expression) -> `(quote ,$2)
-               (backquote expression) -> `(\` ,$2)
-               (unquote expression) -> `(\, ,$2)
-               (unquote-splicing expression) -> `(\,@ ,$2))
-
-    ; Vectors.
-    (vector-elements (expression) -> (list $1)
-                     (expression vector-elements) -> (cons $1 $2))
-    (vector (square-open square-close) -> (make-vector 0)
-            (square-open vector-elements square-close) -> (list->vector $2))))
-
-
-; Use the parser to define the elisp reader function.
-; We only want to read a single expression at a time, so use get-lexer/1.
+; The parser (reader) for elisp expressions.
+; Is is hand-written (just as the lexer is) instead of using some parser
+; generator because this allows easier transfer of source properties from the
+; lexer, makes the circular syntax parsing easier (as it would be with
+; (text parse-lalr) and is easy enough anyways.
+
+
+; Report a parse error.  The first argument is some current lexer token
+; where source information is available should it be useful.
+
+(define (parse-error token msg . args)
+  (apply error msg args))
+
+
+; We need peek-functionality for the next lexer token, this is done with some
+; single token look-ahead storage.  This is handled by a closure which allows
+; getting or peeking the next token.
+; When one expression is fully parsed, we don't want a look-ahead stored here
+; because it would miss from future parsing.  This is verified by the finish
+; action.
+
+(define (make-lexer-buffer lex)
+  (let ((look-ahead #f))
+    (lambda (action)
+      (if (eq? action 'finish)
+        (if look-ahead
+          (error "lexer-buffer is not empty when finished")
+          #f)
+        (begin
+          (if (not look-ahead)
+            (set! look-ahead (lex)))
+          (case action
+            ((peek) look-ahead)
+            ((get)
+             (let ((result look-ahead))
+               (set! look-ahead #f)
+               result))
+            (else (error "invalid lexer-buffer action" action))))))))
+
+
+; Get the contents of a list, where the opening parentheses has already been
+; found.  The same code is used for vectors and lists, where lists allow the
+; dotted tail syntax and vectors not; additionally, the closing parenthesis
+; must of course match.
+
+(define (get-list lex allow-dot close-square)
+  (let* ((next (lex 'peek))
+         (type (car next)))
+    (cond
+      ((eq? type (if close-square 'square-close 'paren-close))
+       (begin
+         (if (not (eq? (car (lex 'get)) type))
+           (error "got different token than peeked"))
+         '()))
+      ((and allow-dot (eq? type 'dot))
+       (begin
+         (if (not (eq? (car (lex 'get)) type))
+           (error "got different token than peeked"))
+         (let ((tail (get-list lex #f close-square)))
+           (if (not (= (length tail) 1))
+             (parse-error next "expected exactly one element after dot"))
+           (car tail))))
+      (else
+        ; Do both parses in exactly this sequence!
+        (let* ((head (get-expression lex))
+               (tail (get-list lex allow-dot close-square)))
+          (cons head tail))))))
+
+
+
+; Parse a single expression from a lexer-buffer.  This is the main routine in
+; our recursive-descent parser.
+
+(define quotation-symbols '((quote . quote)
+                            (backquote . \`)
+                            (unquote . \,)
+                            (unquote-splicing . \,@)))
+
+(define (get-expression lex)
+  (let* ((token (lex 'get))
+         (type (car token))
+         (return (lambda (result)
+                   (if (pair? result)
+                     (set-source-properties! result (source-properties token)))
+                   result)))
+    (case type
+      ((integer float symbol character string)
+       (return (cdr token)))
+      ((quote backquote unquote unquote-splicing)
+       (return (list (assq-ref quotation-symbols type) (get-expression lex))))
+      ((paren-open)
+       (return (get-list lex #t #f)))
+      ((square-open)
+       (return (list->vector (get-list lex #f #t))))
+      (else
+        (parse-error token "expected expression, got" token)))))
+
+
+; Define the reader function based on this; build a lexer, a lexer-buffer,
+; and then parse a single expression to return.
 
 (define (read-elisp port)
-  (elisp-parser (get-lexer/1 port) error))
+  (let* ((lexer (get-lexer port))
+         (lexbuf (make-lexer-buffer lexer))
+         (result (get-expression lexbuf)))
+    (lexbuf 'finish)
+    result))
diff --git a/test-suite/tests/elisp-reader.test 
b/test-suite/tests/elisp-reader.test
index ab91792..c228283 100644
--- a/test-suite/tests/elisp-reader.test
+++ b/test-suite/tests/elisp-reader.test
@@ -26,9 +26,6 @@
 ; 
==============================================================================
 ; Test the lexer.
 
-; This is of course somewhat redundant with the full parser checks, but 
probably
-; can't hurt and is useful in developing the lexer itself.
-
 (define (get-string-lexer str)
   (call-with-input-string str get-lexer))
 
@@ -139,6 +136,17 @@ test\"ab\"\\ abcd
   (pass-if "only next expression"
     (equal? (parse-str "1 2 3") 1))
 
+  (pass-if "source properties"
+    (let* ((list1 (parse-str "\n\n   (\n(7)  (42))"))
+           (list2 (car list1))
+           (list3 (cadr list1)))
+      (and (= (source-property list1 'line) 3)
+           (= (source-property list1 'column) 4)
+           (= (source-property list2 'line) 4)
+           (= (source-property list2 'column) 1)
+           (= (source-property list3 'line) 4)
+           (= (source-property list3 'column) 6))))
+
   (pass-if "constants"
     (and (equal? (parse-str "-12") -12)
          (equal? (parse-str ".123") 0.123)


hooks/post-receive
-- 
GNU Guile




reply via email to

[Prev in Thread] Current Thread [Next in Thread]