emacs-elpa-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[elpa] externals/parser-generator 5f65cfc 015/434: More refactoring, us


From: ELPA Syncer
Subject: [elpa] externals/parser-generator 5f65cfc 015/434: More refactoring, using lists instead of string as grammar data type
Date: Mon, 29 Nov 2021 15:58:59 -0500 (EST)

branch: externals/parser-generator
commit 5f65cfcb4d8ab927551227b2df18c844d7741f76
Author: Christian Johansson <christian@cvj.se>
Commit: Christian Johansson <christian@cvj.se>

    More refactoring, using lists instead of string as grammar data type
---
 parser.el | 232 +++++++++++++++++++++++++++++++++++---------------------------
 1 file changed, 132 insertions(+), 100 deletions(-)

diff --git a/parser.el b/parser.el
index 102138f..ca57f8f 100644
--- a/parser.el
+++ b/parser.el
@@ -30,6 +30,10 @@
   nil
   "Current look-ahead number used.")
 
+(defvar parser--f-sets
+  nil
+  "Generated F-sets for grammar.")
+
 
 ;; Macros
 
@@ -104,14 +108,15 @@
     (error "Invalid look-ahead number k!"))
   (setq parser--grammar G)
   (setq parser--look-ahead-number k)
+  (setq parser--f-sets nil)
   (parser--load-symbols))
 
-(defun parser--valid-empty-p (symbol)
-  "Return whether SYMBOL is empty identifier or not."
-  (eq symbol "e"))
+(defun parser--valid-e-p (symbol)
+  "Return whether SYMBOL is the e identifier or not."
+  (eq symbol 'e))
 
 (defun parser--valid-grammar-p (G)
-  "Return if grammar G is valid or not.  Grammar should contain list with 4 
elements: non-terminals (N), terminals (T), productions (P), start (S) where N, 
T and P are lists and S is a symbol."
+  "Return if grammar G is valid or not.  Grammar should contain list with 4 
elements: non-terminals (N), terminals (T), productions (P), start (S) where N, 
T and P are lists containing symbols and/or strings and S is a symbol or 
string."
   (let ((valid-p t))
     (unless (listp G)
       (setq valid-p nil))
@@ -125,7 +130,9 @@
             (not (listp (nth 0 G)))
             (not (listp (nth 1 G)))
             (not (listp (nth 2 G)))
-            (not (stringp (nth 3 G)))))
+            (not (or
+                  (stringp (nth 3 G))
+                  (symbolp (nth 3 G))))))
       (setq valid-p nil))
     valid-p))
 
@@ -146,18 +153,17 @@
 (defun parser--valid-sentential-form-p (symbols)
   "Return whether SYMBOLS is a valid sentential form in grammar or not."
   (let ((is-valid t))
-    (let ((symbols-string (symbol-name symbols)))
-      (let ((symbols-length (length symbols-string))
-            (symbol-index 0))
-        (while (and
-                is-valid
-                (< symbol-index symbols-length))
-          (let ((symbol-string (substring symbols-string symbol-index (1+ 
symbol-index))))
-            (unless (or
-                     (parser--valid-empty-p symbol-string)
-                     (parser--valid-non-terminal-p symbol-string)
-                     (parser--valid-terminal-p symbol-string))
-              (setq is-valid nil))))))
+    (let ((symbols-length (length symbols))
+          (symbol-index 0))
+      (while (and
+              is-valid
+              (< symbol-index symbols-length))
+        (let ((symbol (nth symbol-index symbols)))
+          (unless (or
+                   (parser--valid-e-p symbol)
+                   (parser--valid-non-terminal-p symbol)
+                   (parser--valid-terminal-p symbol))
+            (setq is-valid nil)))))
     is-valid))
 
 (defun parser--valid-terminal-p (symbol)
@@ -191,7 +197,7 @@
         (k (nth 0 state))
         (i (nth 1 state))
         (f-sets (nth 2 state))
-        (disallow-empty-first (nth 3 state)))
+        (disallow-e-first (nth 3 state)))
     (parser--debug
      (message "input-tape-length: %s" input-tape-length)
      (message "k: %s" k)
@@ -203,22 +209,22 @@
         (let ((leading-terminals (nth 0 stack-symbol))
               (all-leading-terminals-p (nth 1 stack-symbol))
               (input-tape-index (nth 2 stack-symbol))
-              (empty-first-p nil))
+              (e-first-p nil))
           (parser--debug
            (message "leading-terminals: %s" leading-terminals)
            (message "all-leading-terminals-p: %s" all-leading-terminals-p)
            (message "input-tape-index: %s" input-tape-index))
 
           ;; Flag whether leading-terminal is empty or not
-          (when (string= leading-terminals "e")
-            (setq empty-first-p t))
+          (when (parser--valid-e-p leading-terminals)
+            (setq e-first-p t))
 
-          (parser--debug (message "empty-first-p: %s" empty-first-p))
+          (parser--debug (message "e-first-p: %s" e-first-p))
 
           ;; If leading terminal is empty and we have input-tape left, 
disregard it
           (when (and
-                 (not disallow-empty-first)
-                 empty-first-p
+                 (not disallow-e-first)
+                 e-first-p
                  (< input-tape-index input-tape-length))
             (parser--debug (message "Disregarding empty first terminal"))
             (setq leading-terminals ""))
@@ -229,16 +235,18 @@
                     (< input-tape-index input-tape-length)
                     (< leading-terminals-count k)
                     all-leading-terminals-p)
-              (let ((rhs-element (substring input-tape input-tape-index (1+ 
input-tape-index)))
+              (let ((rhs-element (nth input-tape-index input-tape))
                     (rhs-type))
                 (parser--debug (message "rhs-element: %s" rhs-element))
 
                 ;; Determine symbol type
-                (if (string= rhs-element (upcase rhs-element))
-                    (setq rhs-type 'NON-TERMINAL)
-                  (if (string= rhs-element "e")
-                      (setq rhs-type 'EMPTY)
-                    (setq rhs-type 'TERMINAL)))
+                (cond
+                 ((parser--valid-non-terminal-p rhs-element)
+                  (setq rhs-type 'NON-TERMINAL))
+                 ((parser--valid-e-p rhs-element)
+                  (setq rhs-type 'EMPTY))
+                 ((parser--valid-terminal-p rhs-element)
+                  (setq rhs-type 'TERMINAL)))
                 (parser--debug (message "rhs-type: %s" rhs-type))
 
                 (cond
@@ -262,23 +270,23 @@
                                         ;; When we have a leading terminal and 
sub-terminal set is empty, don't append it
                                         (when (and
                                                (> leading-terminals-count 0)
-                                               (string= sub-terminal-set "e"))
-                                          (setq sub-terminal-set ""))
+                                               (parser--valid-e-p 
sub-terminal-set))
+                                          (setq sub-terminal-set nil))
 
-                                        (let ((sub-rhs-leading-terminals 
(concat leading-terminals sub-terminal-set)))
+                                        (let ((sub-rhs-leading-terminals 
(append leading-terminals sub-terminal-set)))
                                           (when (> (length 
sub-rhs-leading-terminals) k)
-                                            (setq sub-rhs-leading-terminals 
(substring sub-rhs-leading-terminals 0 k)))
+                                            (setq sub-rhs-leading-terminals 
(butlast sub-rhs-leading-terminals (- (length sub-rhs-leading-terminals) k))))
                                           (push `(,sub-rhs-leading-terminals 
,all-leading-terminals-p ,(1+ input-tape-index)) stack)))
                                       (setq sub-terminal-index (1+ 
sub-terminal-index)))))
 
                                 (parser--debug (message "Sub-terminal-set: %s" 
sub-terminal-set))
                                 (when (or
-                                       (not (string= sub-terminal-set "e"))
+                                       (not (parser--valid-e-p 
sub-terminal-set))
                                        (= input-tape-index (1- 
input-tape-length)))
-                                  (setq leading-terminals (concat 
leading-terminals sub-terminal-set))
+                                  (setq leading-terminals (append 
leading-terminals sub-terminal-set))
                                   (setq leading-terminals-count (+ 
leading-terminals-count (length sub-terminal-set)))
                                   (when (> leading-terminals-count k)
-                                    (setq leading-terminals (substring 
leading-terminals 0 k))
+                                    (setq leading-terminals (butlast 
leading-terminals (- leading-terminals-count k)))
                                     (setq leading-terminals-count k)))))
                           (parser--debug
                            (message "Found no subsets for %s %s" rhs-element 
(1- i)))))
@@ -286,19 +294,19 @@
 
                  ((equal rhs-type 'EMPTY)
                   (if all-leading-terminals-p
-                      (if disallow-empty-first
+                      (if disallow-e-first
                           (when (= leading-terminals-count 0)
                             (setq all-leading-terminals-p nil))
                         (when (and
                                (= leading-terminals-count 0)
                                (= input-tape-index (1- input-tape-length)))
-                          (setq leading-terminals (concat leading-terminals 
rhs-element))
+                          (setq leading-terminals (append leading-terminals 
rhs-element))
                           (setq leading-terminals-count (1+ 
leading-terminals-count))))
                     (setq all-leading-terminals-p nil)))
 
                  ((equal rhs-type 'TERMINAL)
                   (when all-leading-terminals-p
-                    (setq leading-terminals (concat leading-terminals 
rhs-element))
+                    (setq leading-terminals (append leading-terminals 
rhs-element))
                     (setq leading-terminals-count (1+ 
leading-terminals-count))))))
               (setq input-tape-index (1+ input-tape-index)))
             (when (> leading-terminals-count 0)
@@ -306,70 +314,94 @@
     f-set))
 
 ;; Algorithm 5.5, p. 357
-;; TODO Make this work on strings instead of symbols
 (defun parser--first (β &optional disallow-e-first)
   "For sentential-form Β, in grammar, calculate first k terminals, optionally 
DISALLOW-E-FIRST."
-  (unless (parser--sentential-form-p β)
+  (unless (parser--valid-sentential-form-p β)
     (error "Invalid sentential form β!"))
-  (let ((productions (parser--get-grammar-productions))
-        (k parser--look-ahead-number))
-    (let ((f-sets (make-hash-table :test 'equal))
-          (i 0)
-          (i-max (length productions)))
-      (while (< i i-max)
-        (parser--debug (message "i = %s" i))
-        (let ((f-set (make-hash-table :test 'equal)))
-
-          ;; Iterate all productions, set F_i
-          (dolist (p productions)
-            (let ((production-lhs (symbol-name (car p)))
-                  (production-rhs (cdr p)))
-              (parser--debug
-               (message "Production-LHS: %s" production-lhs)
-               (message "Production-RHS: %s" production-rhs))
-
-              ;; Iterate all blocks in RHS
-              (let ((f-p-set))
-                (dolist (rhs-p production-rhs)
-                  (let ((rhs-string (symbol-name rhs-p)))
-                    (let ((rhs-leading-terminals
-                           (parser--f-set rhs-string `(,k ,i ,f-sets 
,disallow-e-first) '(("" t 0)))))
-                      (parser--debug
-                       (message "Leading %d terminals at index %s (%s) -> %s = 
%s" k i production-lhs rhs-string rhs-leading-terminals))
-                      (when rhs-leading-terminals
-                        (when (and
-                               (listp rhs-leading-terminals)
-                               (> (length rhs-leading-terminals) 0))
-                          (dolist (rhs-leading-terminals-string 
rhs-leading-terminals)
-                            (when (and
-                                   (stringp rhs-leading-terminals-string)
-                                   (> (length rhs-leading-terminals-string) 0))
-                              (push rhs-leading-terminals-string 
f-p-set))))))))
-
-                ;; Make set distinct
-                (setq f-p-set (parser--distinct f-p-set))
+  (let* ((productions (parser--get-grammar-productions))
+         (k parser--look-ahead-number)
+         (i-max (length productions)))
+
+    ;; Generate F-sets only once per grammar
+    (unless parser--f-sets
+      (let ((f-sets (make-hash-table :test 'equal))
+            (i 0))
+        (while (< i i-max)
+          (parser--debug (message "i = %s" i))
+          (let ((f-set (make-hash-table :test 'equal)))
+
+            ;; Iterate all productions, set F_i
+            (dolist (p productions)
+              (let ((production-lhs (car p))
+                    (production-rhs (cdr p)))
                 (parser--debug
-                 (message "F_%s_%s(%s) = %s" i k production-lhs f-p-set))
-                (puthash production-lhs (nreverse f-p-set) f-set))))
-          (puthash i f-set f-sets)
-          (setq i (+ i 1))))
-
-      ;; TODO Iterate each symbol in β using a PDA algorithm
-      (let ((symbol-length (length β))
-            (symbol-index 0)
-            (first-string "")
-            (first-length 0))
-        (while (and
-                (< symbol-index symbol-length)
-                (< first-length k))
-          (let ((symbol-string (substring β symbol-index (1+ symbol-index))))
-            (cond
-             ((parser--valid-terminal-p symbol-string)
-              (setq first-string (concat first-string symbol-string))
-              (setq first-length (1+ first-length)))
-             ((parser--valid-non-terminal-p symbol-string)
-              ;; TODO Handle this scenario here were a non-terminal can result 
in different FIRST sets
-      (sort (gethash (symbol-name production) (gethash (1- i-max) f-sets)) 
'string<))))
+                 (message "Production-LHS: %s" production-lhs)
+                 (message "Production-RHS: %s" production-rhs))
+
+                ;; Iterate all blocks in RHS
+                (let ((f-p-set))
+                  (dolist (rhs-p production-rhs)
+                    (let ((rhs-string rhs-p))
+                      (let ((rhs-leading-terminals
+                             (parser--f-set rhs-string `(,k ,i ,f-sets 
,disallow-e-first) '(("" t 0)))))
+                        (parser--debug
+                         (message "Leading %d terminals at index %s (%s) -> %s 
= %s" k i production-lhs rhs-string rhs-leading-terminals))
+                        (when rhs-leading-terminals
+                          (when (and
+                                 (listp rhs-leading-terminals)
+                                 (> (length rhs-leading-terminals) 0))
+                            (dolist (rhs-leading-terminals-string 
rhs-leading-terminals)
+                              (when (and
+                                     (stringp rhs-leading-terminals-string)
+                                     (> (length rhs-leading-terminals-string) 
0))
+                                (push rhs-leading-terminals-string 
f-p-set))))))))
+
+                  ;; Make set distinct
+                  (setq f-p-set (parser--distinct f-p-set))
+                  (parser--debug
+                   (message "F_%s_%s(%s) = %s" i k production-lhs f-p-set))
+                  (puthash production-lhs (nreverse f-p-set) f-set))))
+            (puthash i f-set f-sets)
+            (setq i (+ i 1))))
+        (setq parser--f-sets f-sets)))
+
+    ;; Iterate each symbol in β using a PDA algorithm
+    (let ((state 'parsing)
+          (input-tape β)
+          (input-tape-length (length β))
+          (stack '((0 0 nil)))
+          (first-list nil))
+      (while stack
+        (let ((stack-topmost (pop stack)))
+          (let ((input-tape-index (car stack-topmost))
+                (first-length (car (cdr stack-topmost)))
+                (first (car (cdr (cdr stack-topmost)))))
+            (while (and
+                    (< input-tape-index input-tape-length)
+                    (< first-length k))
+              (let ((symbol (nth input-tape-index input-tape)))
+                (cond
+                 ((parser--valid-terminal-p symbol)
+                  (push symbol first)
+                  (setq first-length (1+ first-length)))
+                 ((parser--valid-non-terminal-p symbol)
+                  (let ((symbol-f-set (sort (gethash symbol (gethash (1- 
i-max) parser--f-sets)) 'string<)))
+                    (when (> (length symbol-f-set) 0)
+                      ;; Handle this scenario here were a non-terminal can 
result in different FIRST sets
+                      (let ((symbol-f-set-index 1)
+                            (symbol-f-set-length (length symbol-f-set)))
+                        (while (< symbol-f-set-index symbol-f-set-length)
+                          (let ((symbol-f-set-element (nth symbol-f-set-index 
symbol-f-set)))
+                            (let ((alternative-first-length (+ first-length 
(length symbol-f-set-element)))
+                                  (alternative-first (append first 
symbol-f-set-element))
+                                  (alternative-tape-index (1+ 
input-tape-index)))
+                              (push `(,alternative-tape-index 
,alternative-first-length ,alternative-first) stack))))))
+                    (setq first-length (+ first-length (length (car 
symbol-f-set))))
+                    (setq first (append first (car symbol-f-set)))))))
+              (setq input-tape-index (1+ input-tape-index)))
+            (when (> first-length 0)
+              (push first first-list)))))
+      first-list)))
 
 (defun parser--v-set (y)
   "Calculate valid LRk-sets for the viable-prefix Y in grammar G with 
look-ahead K."



reply via email to

[Prev in Thread] Current Thread [Next in Thread]