[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[elpa] externals/parser-generator 5f65cfc 015/434: More refactoring, us
From: |
ELPA Syncer |
Subject: |
[elpa] externals/parser-generator 5f65cfc 015/434: More refactoring, using lists instead of string as grammar data type |
Date: |
Mon, 29 Nov 2021 15:58:59 -0500 (EST) |
branch: externals/parser-generator
commit 5f65cfcb4d8ab927551227b2df18c844d7741f76
Author: Christian Johansson <christian@cvj.se>
Commit: Christian Johansson <christian@cvj.se>
More refactoring, using lists instead of string as grammar data type
---
parser.el | 232 +++++++++++++++++++++++++++++++++++---------------------------
1 file changed, 132 insertions(+), 100 deletions(-)
diff --git a/parser.el b/parser.el
index 102138f..ca57f8f 100644
--- a/parser.el
+++ b/parser.el
@@ -30,6 +30,10 @@
nil
"Current look-ahead number used.")
+(defvar parser--f-sets
+ nil
+ "Generated F-sets for grammar.")
+
;; Macros
@@ -104,14 +108,15 @@
(error "Invalid look-ahead number k!"))
(setq parser--grammar G)
(setq parser--look-ahead-number k)
+ (setq parser--f-sets nil)
(parser--load-symbols))
-(defun parser--valid-empty-p (symbol)
- "Return whether SYMBOL is empty identifier or not."
- (eq symbol "e"))
+(defun parser--valid-e-p (symbol)
+ "Return whether SYMBOL is the e identifier or not."
+ (eq symbol 'e))
(defun parser--valid-grammar-p (G)
- "Return if grammar G is valid or not. Grammar should contain list with 4
elements: non-terminals (N), terminals (T), productions (P), start (S) where N,
T and P are lists and S is a symbol."
+ "Return if grammar G is valid or not. Grammar should contain list with 4
elements: non-terminals (N), terminals (T), productions (P), start (S) where N,
T and P are lists containing symbols and/or strings and S is a symbol or
string."
(let ((valid-p t))
(unless (listp G)
(setq valid-p nil))
@@ -125,7 +130,9 @@
(not (listp (nth 0 G)))
(not (listp (nth 1 G)))
(not (listp (nth 2 G)))
- (not (stringp (nth 3 G)))))
+ (not (or
+ (stringp (nth 3 G))
+ (symbolp (nth 3 G))))))
(setq valid-p nil))
valid-p))
@@ -146,18 +153,17 @@
(defun parser--valid-sentential-form-p (symbols)
"Return whether SYMBOLS is a valid sentential form in grammar or not."
(let ((is-valid t))
- (let ((symbols-string (symbol-name symbols)))
- (let ((symbols-length (length symbols-string))
- (symbol-index 0))
- (while (and
- is-valid
- (< symbol-index symbols-length))
- (let ((symbol-string (substring symbols-string symbol-index (1+
symbol-index))))
- (unless (or
- (parser--valid-empty-p symbol-string)
- (parser--valid-non-terminal-p symbol-string)
- (parser--valid-terminal-p symbol-string))
- (setq is-valid nil))))))
+ (let ((symbols-length (length symbols))
+ (symbol-index 0))
+ (while (and
+ is-valid
+ (< symbol-index symbols-length))
+ (let ((symbol (nth symbol-index symbols)))
+ (unless (or
+ (parser--valid-e-p symbol)
+ (parser--valid-non-terminal-p symbol)
+ (parser--valid-terminal-p symbol))
+ (setq is-valid nil)))))
is-valid))
(defun parser--valid-terminal-p (symbol)
@@ -191,7 +197,7 @@
(k (nth 0 state))
(i (nth 1 state))
(f-sets (nth 2 state))
- (disallow-empty-first (nth 3 state)))
+ (disallow-e-first (nth 3 state)))
(parser--debug
(message "input-tape-length: %s" input-tape-length)
(message "k: %s" k)
@@ -203,22 +209,22 @@
(let ((leading-terminals (nth 0 stack-symbol))
(all-leading-terminals-p (nth 1 stack-symbol))
(input-tape-index (nth 2 stack-symbol))
- (empty-first-p nil))
+ (e-first-p nil))
(parser--debug
(message "leading-terminals: %s" leading-terminals)
(message "all-leading-terminals-p: %s" all-leading-terminals-p)
(message "input-tape-index: %s" input-tape-index))
;; Flag whether leading-terminal is empty or not
- (when (string= leading-terminals "e")
- (setq empty-first-p t))
+ (when (parser--valid-e-p leading-terminals)
+ (setq e-first-p t))
- (parser--debug (message "empty-first-p: %s" empty-first-p))
+ (parser--debug (message "e-first-p: %s" e-first-p))
;; If leading terminal is empty and we have input-tape left,
disregard it
(when (and
- (not disallow-empty-first)
- empty-first-p
+ (not disallow-e-first)
+ e-first-p
(< input-tape-index input-tape-length))
(parser--debug (message "Disregarding empty first terminal"))
(setq leading-terminals ""))
@@ -229,16 +235,18 @@
(< input-tape-index input-tape-length)
(< leading-terminals-count k)
all-leading-terminals-p)
- (let ((rhs-element (substring input-tape input-tape-index (1+
input-tape-index)))
+ (let ((rhs-element (nth input-tape-index input-tape))
(rhs-type))
(parser--debug (message "rhs-element: %s" rhs-element))
;; Determine symbol type
- (if (string= rhs-element (upcase rhs-element))
- (setq rhs-type 'NON-TERMINAL)
- (if (string= rhs-element "e")
- (setq rhs-type 'EMPTY)
- (setq rhs-type 'TERMINAL)))
+ (cond
+ ((parser--valid-non-terminal-p rhs-element)
+ (setq rhs-type 'NON-TERMINAL))
+ ((parser--valid-e-p rhs-element)
+ (setq rhs-type 'EMPTY))
+ ((parser--valid-terminal-p rhs-element)
+ (setq rhs-type 'TERMINAL)))
(parser--debug (message "rhs-type: %s" rhs-type))
(cond
@@ -262,23 +270,23 @@
;; When we have a leading terminal and
sub-terminal set is empty, don't append it
(when (and
(> leading-terminals-count 0)
- (string= sub-terminal-set "e"))
- (setq sub-terminal-set ""))
+ (parser--valid-e-p
sub-terminal-set))
+ (setq sub-terminal-set nil))
- (let ((sub-rhs-leading-terminals
(concat leading-terminals sub-terminal-set)))
+ (let ((sub-rhs-leading-terminals
(append leading-terminals sub-terminal-set)))
(when (> (length
sub-rhs-leading-terminals) k)
- (setq sub-rhs-leading-terminals
(substring sub-rhs-leading-terminals 0 k)))
+ (setq sub-rhs-leading-terminals
(butlast sub-rhs-leading-terminals (- (length sub-rhs-leading-terminals) k))))
(push `(,sub-rhs-leading-terminals
,all-leading-terminals-p ,(1+ input-tape-index)) stack)))
(setq sub-terminal-index (1+
sub-terminal-index)))))
(parser--debug (message "Sub-terminal-set: %s"
sub-terminal-set))
(when (or
- (not (string= sub-terminal-set "e"))
+ (not (parser--valid-e-p
sub-terminal-set))
(= input-tape-index (1-
input-tape-length)))
- (setq leading-terminals (concat
leading-terminals sub-terminal-set))
+ (setq leading-terminals (append
leading-terminals sub-terminal-set))
(setq leading-terminals-count (+
leading-terminals-count (length sub-terminal-set)))
(when (> leading-terminals-count k)
- (setq leading-terminals (substring
leading-terminals 0 k))
+ (setq leading-terminals (butlast
leading-terminals (- leading-terminals-count k)))
(setq leading-terminals-count k)))))
(parser--debug
(message "Found no subsets for %s %s" rhs-element
(1- i)))))
@@ -286,19 +294,19 @@
((equal rhs-type 'EMPTY)
(if all-leading-terminals-p
- (if disallow-empty-first
+ (if disallow-e-first
(when (= leading-terminals-count 0)
(setq all-leading-terminals-p nil))
(when (and
(= leading-terminals-count 0)
(= input-tape-index (1- input-tape-length)))
- (setq leading-terminals (concat leading-terminals
rhs-element))
+ (setq leading-terminals (append leading-terminals
rhs-element))
(setq leading-terminals-count (1+
leading-terminals-count))))
(setq all-leading-terminals-p nil)))
((equal rhs-type 'TERMINAL)
(when all-leading-terminals-p
- (setq leading-terminals (concat leading-terminals
rhs-element))
+ (setq leading-terminals (append leading-terminals
rhs-element))
(setq leading-terminals-count (1+
leading-terminals-count))))))
(setq input-tape-index (1+ input-tape-index)))
(when (> leading-terminals-count 0)
@@ -306,70 +314,94 @@
f-set))
;; Algorithm 5.5, p. 357
-;; TODO Make this work on strings instead of symbols
(defun parser--first (β &optional disallow-e-first)
"For sentential-form Β, in grammar, calculate first k terminals, optionally
DISALLOW-E-FIRST."
- (unless (parser--sentential-form-p β)
+ (unless (parser--valid-sentential-form-p β)
(error "Invalid sentential form β!"))
- (let ((productions (parser--get-grammar-productions))
- (k parser--look-ahead-number))
- (let ((f-sets (make-hash-table :test 'equal))
- (i 0)
- (i-max (length productions)))
- (while (< i i-max)
- (parser--debug (message "i = %s" i))
- (let ((f-set (make-hash-table :test 'equal)))
-
- ;; Iterate all productions, set F_i
- (dolist (p productions)
- (let ((production-lhs (symbol-name (car p)))
- (production-rhs (cdr p)))
- (parser--debug
- (message "Production-LHS: %s" production-lhs)
- (message "Production-RHS: %s" production-rhs))
-
- ;; Iterate all blocks in RHS
- (let ((f-p-set))
- (dolist (rhs-p production-rhs)
- (let ((rhs-string (symbol-name rhs-p)))
- (let ((rhs-leading-terminals
- (parser--f-set rhs-string `(,k ,i ,f-sets
,disallow-e-first) '(("" t 0)))))
- (parser--debug
- (message "Leading %d terminals at index %s (%s) -> %s =
%s" k i production-lhs rhs-string rhs-leading-terminals))
- (when rhs-leading-terminals
- (when (and
- (listp rhs-leading-terminals)
- (> (length rhs-leading-terminals) 0))
- (dolist (rhs-leading-terminals-string
rhs-leading-terminals)
- (when (and
- (stringp rhs-leading-terminals-string)
- (> (length rhs-leading-terminals-string) 0))
- (push rhs-leading-terminals-string
f-p-set))))))))
-
- ;; Make set distinct
- (setq f-p-set (parser--distinct f-p-set))
+ (let* ((productions (parser--get-grammar-productions))
+ (k parser--look-ahead-number)
+ (i-max (length productions)))
+
+ ;; Generate F-sets only once per grammar
+ (unless parser--f-sets
+ (let ((f-sets (make-hash-table :test 'equal))
+ (i 0))
+ (while (< i i-max)
+ (parser--debug (message "i = %s" i))
+ (let ((f-set (make-hash-table :test 'equal)))
+
+ ;; Iterate all productions, set F_i
+ (dolist (p productions)
+ (let ((production-lhs (car p))
+ (production-rhs (cdr p)))
(parser--debug
- (message "F_%s_%s(%s) = %s" i k production-lhs f-p-set))
- (puthash production-lhs (nreverse f-p-set) f-set))))
- (puthash i f-set f-sets)
- (setq i (+ i 1))))
-
- ;; TODO Iterate each symbol in β using a PDA algorithm
- (let ((symbol-length (length β))
- (symbol-index 0)
- (first-string "")
- (first-length 0))
- (while (and
- (< symbol-index symbol-length)
- (< first-length k))
- (let ((symbol-string (substring β symbol-index (1+ symbol-index))))
- (cond
- ((parser--valid-terminal-p symbol-string)
- (setq first-string (concat first-string symbol-string))
- (setq first-length (1+ first-length)))
- ((parser--valid-non-terminal-p symbol-string)
- ;; TODO Handle this scenario here were a non-terminal can result
in different FIRST sets
- (sort (gethash (symbol-name production) (gethash (1- i-max) f-sets))
'string<))))
+ (message "Production-LHS: %s" production-lhs)
+ (message "Production-RHS: %s" production-rhs))
+
+ ;; Iterate all blocks in RHS
+ (let ((f-p-set))
+ (dolist (rhs-p production-rhs)
+ (let ((rhs-string rhs-p))
+ (let ((rhs-leading-terminals
+ (parser--f-set rhs-string `(,k ,i ,f-sets
,disallow-e-first) '(("" t 0)))))
+ (parser--debug
+ (message "Leading %d terminals at index %s (%s) -> %s
= %s" k i production-lhs rhs-string rhs-leading-terminals))
+ (when rhs-leading-terminals
+ (when (and
+ (listp rhs-leading-terminals)
+ (> (length rhs-leading-terminals) 0))
+ (dolist (rhs-leading-terminals-string
rhs-leading-terminals)
+ (when (and
+ (stringp rhs-leading-terminals-string)
+ (> (length rhs-leading-terminals-string)
0))
+ (push rhs-leading-terminals-string
f-p-set))))))))
+
+ ;; Make set distinct
+ (setq f-p-set (parser--distinct f-p-set))
+ (parser--debug
+ (message "F_%s_%s(%s) = %s" i k production-lhs f-p-set))
+ (puthash production-lhs (nreverse f-p-set) f-set))))
+ (puthash i f-set f-sets)
+ (setq i (+ i 1))))
+ (setq parser--f-sets f-sets)))
+
+ ;; Iterate each symbol in β using a PDA algorithm
+ (let ((state 'parsing)
+ (input-tape β)
+ (input-tape-length (length β))
+ (stack '((0 0 nil)))
+ (first-list nil))
+ (while stack
+ (let ((stack-topmost (pop stack)))
+ (let ((input-tape-index (car stack-topmost))
+ (first-length (car (cdr stack-topmost)))
+ (first (car (cdr (cdr stack-topmost)))))
+ (while (and
+ (< input-tape-index input-tape-length)
+ (< first-length k))
+ (let ((symbol (nth input-tape-index input-tape)))
+ (cond
+ ((parser--valid-terminal-p symbol)
+ (push symbol first)
+ (setq first-length (1+ first-length)))
+ ((parser--valid-non-terminal-p symbol)
+ (let ((symbol-f-set (sort (gethash symbol (gethash (1-
i-max) parser--f-sets)) 'string<)))
+ (when (> (length symbol-f-set) 0)
+ ;; Handle this scenario here were a non-terminal can
result in different FIRST sets
+ (let ((symbol-f-set-index 1)
+ (symbol-f-set-length (length symbol-f-set)))
+ (while (< symbol-f-set-index symbol-f-set-length)
+ (let ((symbol-f-set-element (nth symbol-f-set-index
symbol-f-set)))
+ (let ((alternative-first-length (+ first-length
(length symbol-f-set-element)))
+ (alternative-first (append first
symbol-f-set-element))
+ (alternative-tape-index (1+
input-tape-index)))
+ (push `(,alternative-tape-index
,alternative-first-length ,alternative-first) stack))))))
+ (setq first-length (+ first-length (length (car
symbol-f-set))))
+ (setq first (append first (car symbol-f-set)))))))
+ (setq input-tape-index (1+ input-tape-index)))
+ (when (> first-length 0)
+ (push first first-list)))))
+ first-list)))
(defun parser--v-set (y)
"Calculate valid LRk-sets for the viable-prefix Y in grammar G with
look-ahead K."
- [elpa] branch externals/parser-generator created (now 4a3a51d), ELPA Syncer, 2021/11/29
- [elpa] externals/parser-generator b93ab8c 001/434: My initial commit, ELPA Syncer, 2021/11/29
- [elpa] externals/parser-generator b8faa17 002/434: FIRSTk and EFFk working, ELPA Syncer, 2021/11/29
- [elpa] externals/parser-generator ee0a623 003/434: Added TRAVIS and LICENSE, ELPA Syncer, 2021/11/29
- [elpa] externals/parser-generator f5bfa40 004/434: Fixed typo in README, ELPA Syncer, 2021/11/29
- [elpa] externals/parser-generator 58798c8 010/434: Starting on calculation of valid LK-sets for a valid grammar prefix, ELPA Syncer, 2021/11/29
- [elpa] externals/parser-generator f9c8348 008/434: Updated Travis and Makefil rule name, ELPA Syncer, 2021/11/29
- [elpa] externals/parser-generator 5f65cfc 015/434: More refactoring, using lists instead of string as grammar data type,
ELPA Syncer <=
- [elpa] externals/parser-generator f2791c1 022/434: Passed unit test 3 intermediate grammar, ELPA Syncer, 2021/11/29
- [elpa] externals/parser-generator 5d9b98c 011/434: Added functions to validate G and k and tests, ELPA Syncer, 2021/11/29
- [elpa] externals/parser-generator 356720c 030/434: Passing all unit tests using new data structure, ELPA Syncer, 2021/11/29
- [elpa] externals/parser-generator e4fd795 007/434: Added compilation to test, ELPA Syncer, 2021/11/29
- [elpa] externals/parser-generator 42d92f1 014/434: More refactoring, ELPA Syncer, 2021/11/29
- [elpa] externals/parser-generator f648b52 020/434: Passing first unit test for FIRST after new data-structure refactor, ELPA Syncer, 2021/11/29
- [elpa] externals/parser-generator a4bbb2f 026/434: Using PDA algorithm for FIRST when β is above 1 symbol, ELPA Syncer, 2021/11/29
- [elpa] externals/parser-generator e02d5d7 049/434: More work on calculating valid LR-items, ELPA Syncer, 2021/11/29
- [elpa] externals/parser-generator 0465b58 045/434: Improved commenting, ELPA Syncer, 2021/11/29
- [elpa] externals/parser-generator 85dde51 009/434: Added License and Travis build logos, ELPA Syncer, 2021/11/29