[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[elpa] externals/parser-generator a175c1317a 08/29: Started on refactor
From: |
Christian Johansson |
Subject: |
[elpa] externals/parser-generator a175c1317a 08/29: Started on refactor of e-free-first function to properly handle a edge case |
Date: |
Sat, 12 Feb 2022 02:24:43 -0500 (EST) |
branch: externals/parser-generator
commit a175c1317a0495a3832ce92fb3a7a63ff34fd678
Author: Christian Johansson <christian@cvj.se>
Commit: Christian Johansson <christian@cvj.se>
Started on refactor of e-free-first function to properly handle a edge case
---
parser-generator.el | 581 ++++++++++++++++++------------------------
test/parser-generator-test.el | 10 +
2 files changed, 253 insertions(+), 338 deletions(-)
diff --git a/parser-generator.el b/parser-generator.el
index 6a3befc48a..5c5b923fd1 100644
--- a/parser-generator.el
+++ b/parser-generator.el
@@ -78,11 +78,6 @@
nil
"Generated F-sets for grammar.")
-(defvar
- parser-generator--f-free-sets
- nil
- "Generated e-free F-sets for grammar.")
-
(defvar
parser-generator--look-ahead-number
nil
@@ -161,9 +156,6 @@
(setq
parser-generator--f-sets
nil)
- (setq
- parser-generator--f-free-sets
- nil)
(setq
parser-generator--table-firsts
(make-hash-table :test 'equal)))
@@ -1051,9 +1043,7 @@
(defun parser-generator--generate-f-sets ()
"Generate F-sets for grammar."
;; Generate F-sets only once per grammar
- (unless (and
- parser-generator--f-sets
- parser-generator--f-free-sets)
+ (unless parser-generator--f-sets
(parser-generator--debug
(message "(parser-generator--generate-f-sets)"))
(let ((productions
@@ -1062,209 +1052,190 @@
(max
1
parser-generator--look-ahead-number)))
- (let ((disallow-set '(nil t)))
- (parser-generator--debug
- (message "disallow-set: %s" disallow-set))
- (dolist (disallow-e-first disallow-set)
+ (let ((f-sets (make-hash-table :test 'equal))
+ (i 0)
+ (expanded-all nil)
+ (expanded-all-second nil))
+
+ (while (or
+ (not expanded-all)
+ (not expanded-all-second))
+ ;; Make one iteration after everything has been expanded
+ (when expanded-all
+ (setq
+ expanded-all-second
+ t))
+ (when (> i 100)
+ (error "Endless loop!"))
(parser-generator--debug
- (message "disallow-e-first: %s" disallow-e-first))
- (let ((f-sets (make-hash-table :test 'equal))
- (i 0)
- (expanded-all nil)
- (expanded-all-second nil))
-
- (while (or
- (not expanded-all)
- (not expanded-all-second))
- ;; Make one iteration after everything has been expanded
- (when expanded-all
- (setq
- expanded-all-second
- t))
- (when (> i 100)
- (error "Endless loop!"))
- (parser-generator--debug
- (message "i = %s" i))
- (setq
- expanded-all
- t)
- (let ((f-set (make-hash-table :test 'equal)))
-
- ;; Iterate all productions, set F_i
- (dolist (p productions)
- (let ((production-lhs (car p))
- (production-rhs (cdr p)))
- (parser-generator--debug
- (message
- "Production: %s -> %s"
- production-lhs
- production-rhs))
-
- ;; Iterate all blocks in RHS
- (let ((f-p-set)
- (rhs-expanded-full t))
- (dolist (rhs-p production-rhs)
- (let ((rhs-string rhs-p))
- (let ((rhs-leading-terminals)
- (f-set-return
- (parser-generator--f-set
- rhs-string
- `(
- ,k
- ,i
- ,f-sets
- ,disallow-e-first
- ,production-lhs)
- '((nil t 0)))))
-
- (parser-generator--debug
- (message
- "f-set-return: %s = %s"
- rhs-string
- f-set-return))
-
- (unless (nth 0 f-set-return)
- (let ((unexpanded-non-terminal
- (nth 1 f-set-return)))
- (cond
- ((equal
- unexpanded-non-terminal
- production-lhs)
- (parser-generator--debug
- (message
- "Production '%S' unexpanded due to
self-reference, ignore flag."
- production-lhs)))
- ((gethash
- unexpanded-non-terminal
- f-set)
- (parser-generator--debug
- (message
- "Production '%S' is un-expanded due to
reference to previously processed production '%S', ignore flag."
- production-lhs
- unexpanded-non-terminal
- )))
- (t
- (parser-generator--debug
- (message
- "Expanded-all negative set because
f-set-return '%s' is not fully expanded because '%s' is unexpanded"
- f-set-return
- (nth 1 f-set-return)))
- (setq
- rhs-expanded-full
- nil)
- (setq
- expanded-all
- nil)))))
-
- (setq
- rhs-leading-terminals
- (nth 2 f-set-return))
-
- (parser-generator--debug
- (message
- "Leading %d terminals at index %s: %s -> %s = %s"
- k
- i
- production-lhs
+ (message "i = %s" i))
+ (setq
+ expanded-all
+ t)
+ (let ((f-set (make-hash-table :test 'equal)))
+
+ ;; Iterate all productions, set F_i
+ (dolist (p productions)
+ (let ((production-lhs (car p))
+ (production-rhs (cdr p)))
+ (parser-generator--debug
+ (message
+ "Production: %s -> %s"
+ production-lhs
+ production-rhs))
+
+ ;; Iterate all blocks in RHS
+ (let ((f-p-set)
+ (rhs-expanded-full t))
+ (dolist (rhs-p production-rhs)
+ (let ((rhs-string rhs-p))
+ (let ((rhs-leading-terminals)
+ (f-set-return
+ (parser-generator--f-set
rhs-string
- rhs-leading-terminals))
- (parser-generator--debug
- (message
- "expanded-all: %s"
- expanded-all))
-
- (when rhs-leading-terminals
- (when (and
- (listp rhs-leading-terminals)
- (> (length rhs-leading-terminals) 0))
- (dolist
- (rhs-leading-terminals-element
- rhs-leading-terminals)
- (push
- rhs-leading-terminals-element
- f-p-set)))))))
-
- ;; If we have multiple equal LHS
- ;; merge them
- (when (
- gethash
- production-lhs
- f-set)
- (let ((existing-f-set
- (gethash
+ `(
+ ,k
+ ,i
+ ,f-sets
+ ,production-lhs)
+ '((nil t 0)))))
+
+ (parser-generator--debug
+ (message
+ "f-set-return: %s = %s"
+ rhs-string
+ f-set-return))
+
+ (unless (nth 0 f-set-return)
+ (let ((unexpanded-non-terminal
+ (nth 1 f-set-return)))
+ (cond
+ ((equal
+ unexpanded-non-terminal
+ production-lhs)
+ (parser-generator--debug
+ (message
+ "Production '%S' unexpanded due to
self-reference, ignore flag."
+ production-lhs)))
+ ((gethash
+ unexpanded-non-terminal
+ f-set)
+ (parser-generator--debug
+ (message
+ "Production '%S' is un-expanded due to
reference to previously processed production '%S', ignore flag."
production-lhs
- f-set)))
-
- ;; If another RHS has not been fully expanded
- ;; mark LHS as not fully expanded
- (unless (nth 0 existing-f-set)
- (parser-generator--debug
- (message
- "Expanded-all negative set for LHS '%s' because
a alternative RHS '%s' is not fully expanded"
- production-lhs
- existing-f-set))
- (setq
- expanded-all
- nil)
- (setq
- rhs-expanded-full
- nil))
-
- (setq f-p-set
- (append
- f-p-set
- (nth 1 existing-f-set)))))
-
- ;; Make set distinct
- (setq
- f-p-set
- (parser-generator--distinct
- f-p-set))
- (puthash
- production-lhs
- (list
- rhs-expanded-full
- (reverse f-p-set))
- f-set)
- (parser-generator--debug
- (message
- "F_%s%s = %s"
- i
- production-lhs
- (gethash
+ unexpanded-non-terminal
+ )))
+ (t
+ (parser-generator--debug
+ (message
+ "Expanded-all negative set because
f-set-return '%s' is not fully expanded because '%s' is unexpanded"
+ f-set-return
+ (nth 1 f-set-return)))
+ (setq
+ rhs-expanded-full
+ nil)
+ (setq
+ expanded-all
+ nil)))))
+
+ (setq
+ rhs-leading-terminals
+ (nth 2 f-set-return))
+
+ (parser-generator--debug
+ (message
+ "Leading %d terminals at index %s: %s -> %s = %s"
+ k
+ i
+ production-lhs
+ rhs-string
+ rhs-leading-terminals))
+ (parser-generator--debug
+ (message
+ "expanded-all: %s"
+ expanded-all))
+
+ (when rhs-leading-terminals
+ (when (and
+ (listp rhs-leading-terminals)
+ (> (length rhs-leading-terminals) 0))
+ (dolist
+ (rhs-leading-terminals-element
+ rhs-leading-terminals)
+ (push
+ rhs-leading-terminals-element
+ f-p-set)))))))
+
+ ;; If we have multiple equal LHS
+ ;; merge them
+ (when (
+ gethash
production-lhs
- f-set))))))
+ f-set)
+ (let ((existing-f-set
+ (gethash
+ production-lhs
+ f-set)))
+
+ ;; If another RHS has not been fully expanded
+ ;; mark LHS as not fully expanded
+ (unless (nth 0 existing-f-set)
+ (parser-generator--debug
+ (message
+ "Expanded-all negative set for LHS '%s' because a
alternative RHS '%s' is not fully expanded"
+ production-lhs
+ existing-f-set))
+ (setq
+ expanded-all
+ nil)
+ (setq
+ rhs-expanded-full
+ nil))
- (puthash
- i
- f-set
- f-sets)
- (setq
- i
- (+ i 1))))
+ (setq f-p-set
+ (append
+ f-p-set
+ (nth 1 existing-f-set)))))
- (if disallow-e-first
- (progn
+ ;; Make set distinct
(setq
- parser-generator--f-free-sets
- (gethash
- (1- i)
- f-sets))
+ f-p-set
+ (parser-generator--distinct
+ f-p-set))
+ (puthash
+ production-lhs
+ (list
+ rhs-expanded-full
+ (reverse f-p-set))
+ f-set)
(parser-generator--debug
(message
- "E-FREE-FIRST max-index: %s, contents: %s"
- (1- i)
- parser-generator--f-free-sets)))
- (setq
- parser-generator--f-sets
- (gethash
- (1- i)
- f-sets))
- (parser-generator--debug
- (message
- "FIRST max-index: %s, contents: %s"
- (1- i)
- parser-generator--f-sets)))))))
+ "F_%s%s = %s"
+ i
+ production-lhs
+ (gethash
+ production-lhs
+ f-set))))))
+
+ (puthash
+ i
+ f-set
+ f-sets)
+ (setq
+ i
+ (+ i 1))))
+
+ (setq
+ parser-generator--f-sets
+ (gethash
+ (1- i)
+ f-sets))
+ (parser-generator--debug
+ (message
+ "FIRST max-index: %s, contents: %s"
+ (1- i)
+ parser-generator--f-sets))))
(parser-generator--debug
(message "Generated F-sets"))))
@@ -1328,12 +1299,10 @@
(k (nth 0 state))
(i (nth 1 state))
(f-sets (nth 2 state))
- (disallow-e-first (nth 3 state))
- (lhs (nth 4 state))
+ (lhs (nth 3 state))
(expanded-all t)
(unexpanded-non-terminal nil))
(parser-generator--debug
- (message "disallow-3-first: %s" disallow-e-first)
(message "input-tape-length: %s" input-tape-length)
(message "k: %s" k)
(message "i: %s" i))
@@ -1462,28 +1431,15 @@
(message "alternative-set is
the e identifier"))
;; Branch off here in two
separate tracks, one with the e-identifier appended and one without
- (if disallow-e-first
- (progn
- (when (and
-
all-leading-terminals-p
- (>
leading-terminals-count 0))
- (let ((branch `(
-
,leading-terminals
-
,all-leading-terminals-p
- ,(1+
input-tape-index))))
-
(parser-generator--debug (message "branched off 2: %s" branch))
- ;; Branch off here
with a separate track where this e-identifier is ignored
- (push branch
stack))))
-
- (when all-leading-terminals-p
- (let ((branch
- `(
- ,leading-terminals
-
,all-leading-terminals-p
- ,(1+
input-tape-index))))
- (parser-generator--debug
(message "branched off 1: %s" branch))
- ;; Branch off here with
a separate track where this e-identifier is ignored
- (push branch stack))))
+ (when all-leading-terminals-p
+ (let ((branch
+ `(
+ ,leading-terminals
+
,all-leading-terminals-p
+ ,(1+
input-tape-index))))
+ (parser-generator--debug
(message "branched off 1: %s" branch))
+ ;; Branch off here with a
separate track where this e-identifier is ignored
+ (push branch stack)))
(when all-leading-terminals-p
(let
((alternative-leading-terminals
@@ -1536,41 +1492,26 @@
(message "sub-terminal-set is the e
identifier"))
;; Branch off here in two separate
tracks, one with the e-identifier appended and one without
- (if disallow-e-first
- (progn
- (when (and
- all-leading-terminals-p
- (> leading-terminals-count
0))
- (let ((branch
- `(
- ,leading-terminals
- ,all-leading-terminals-p
- ,(1+
input-tape-index))))
- ;; Branch off here with a
separate track where this e-identifier is ignored
- (parser-generator--debug
(message "branched off 4: %s" branch))
- (push branch stack)))
-
- (setq all-leading-terminals-p nil))
-
- ;; Add e-identifier to leading
terminals when
- ;; we have not found any leading
terminals
- ;; and we are at the last symbol in
input-tape
-
- (when all-leading-terminals-p
- (let ((branch
- `(
- ,leading-terminals
- ,all-leading-terminals-p
- ,(1+ input-tape-index))))
- ;; Branch off here with a separate
track where this e-identifier is ignored
- (parser-generator--debug (message
"branched off 5: %s" branch))
- (push branch stack)))
-
- (parser-generator--debug (message
"leading-terminals-1: %s" leading-terminals))
- (setq leading-terminals
(parser-generator--merge-max-terminals leading-terminals sub-terminal-set k))
- (parser-generator--debug (message
"leading-terminals-2: %s" leading-terminals))
- (setq leading-terminals-count (length
leading-terminals))
- (setq all-leading-terminals-p nil)))
+
+ ;; Add e-identifier to leading terminals
when
+ ;; we have not found any leading
terminals
+ ;; and we are at the last symbol in
input-tape
+
+ (when all-leading-terminals-p
+ (let ((branch
+ `(
+ ,leading-terminals
+ ,all-leading-terminals-p
+ ,(1+ input-tape-index))))
+ ;; Branch off here with a separate
track where this e-identifier is ignored
+ (parser-generator--debug (message
"branched off 5: %s" branch))
+ (push branch stack)))
+
+ (parser-generator--debug (message
"leading-terminals-1: %s" leading-terminals))
+ (setq leading-terminals
(parser-generator--merge-max-terminals leading-terminals sub-terminal-set k))
+ (parser-generator--debug (message
"leading-terminals-2: %s" leading-terminals))
+ (setq leading-terminals-count (length
leading-terminals))
+ (setq all-leading-terminals-p nil))
(parser-generator--debug (message
"leading-terminals-3: %s" leading-terminals))
(setq leading-terminals
(parser-generator--merge-max-terminals leading-terminals sub-terminal-set k))
@@ -1612,44 +1553,27 @@
nil)))
((equal rhs-type 'E-IDENTIFIER)
- (if disallow-e-first
- (progn
- (when (and
- all-leading-terminals-p
- (> leading-terminals-count 0))
- ;; Branch off here with a separate track where this
e-identifier is ignored
- (let ((branch
- `(
- ,leading-terminals
- ,all-leading-terminals-p
- ,(1+ input-tape-index))))
- (parser-generator--debug (message "branched off 6:
%s" branch))
- (push branch stack)))
-
- (setq all-leading-terminals-p nil))
- ;; Add e-identifier to leading terminals when
- ;; we have not found any leading terminals
- ;; and we are at the last symbol in input-tape
-
- (when all-leading-terminals-p
- ;; Branch off here with a separate track where this
e-identifier is ignored
- (let ((branch
- `(
- ,leading-terminals
- ,all-leading-terminals-p
- ,(1+ input-tape-index))))
- (parser-generator--debug (message "branched off 7: %s"
branch))
- (push branch stack)))
-
- (setq leading-terminals (append leading-terminals
rhs-element))
- (setq leading-terminals-count (1+ leading-terminals-count))
- (setq all-leading-terminals-p nil)))
+ ;; Add e-identifier to leading terminals when
+ ;; we have not found any leading terminals
+ ;; and we are at the last symbol in input-tape
+
+ (when all-leading-terminals-p
+ ;; Branch off here with a separate track where this
e-identifier is ignored
+ (let ((branch
+ `(
+ ,leading-terminals
+ ,all-leading-terminals-p
+ ,(1+ input-tape-index))))
+ (parser-generator--debug (message "branched off 7: %s"
branch))
+ (push branch stack)))
+
+ (setq leading-terminals (append leading-terminals
rhs-element))
+ (setq leading-terminals-count (1+ leading-terminals-count))
+ (setq all-leading-terminals-p nil))
((equal rhs-type 'TERMINAL)
(setq leading-terminals (append leading-terminals (list
rhs-element)))
- (setq leading-terminals-count (1+ leading-terminals-count)))
-
- ))
+ (setq leading-terminals-count (1+
leading-terminals-count)))))
(setq input-tape-index (1+ input-tape-index)))
(when (> leading-terminals-count 0)
@@ -1679,20 +1603,18 @@
"%S-%s"
β
disallow-e-first)))
- (unless
- (gethash
- hash-key
- parser-generator--table-firsts)
+ (unless (gethash
+ hash-key
+ parser-generator--table-firsts)
(unless (listp β)
(setq β (list β)))
(unless (or
ignore-validation
(parser-generator--valid-sentential-form-p β))
(error "Invalid sentential form β! %s" β))
- (let ((k
- (max
- 1
- parser-generator--look-ahead-number)))
+ (let ((k (max
+ 1
+ parser-generator--look-ahead-number)))
;; Generate F-sets only once per grammar
(parser-generator--generate-f-sets)
@@ -1769,36 +1691,19 @@
;; Load the pre-generated F-set
;; if it's the first symbol and we are using
;; E-FREE-FIRST then use separate hash-table
- (if (and
- disallow-e-first
- (= first-length 0))
- (progn
- (parser-generator--debug
- (message
- "gethash: %s"
- (gethash
- symbol
- parser-generator--f-free-sets)))
- (setq
- symbol-f-set
- (nth
- 1
- (gethash
- symbol
- parser-generator--f-free-sets))))
- (parser-generator--debug
- (message
- "gethash: %s"
- (gethash
- symbol
- parser-generator--f-sets)))
- (setq
- symbol-f-set
- (nth
- 1
- (gethash
- symbol
- parser-generator--f-sets))))
+ (parser-generator--debug
+ (message
+ "gethash: %s"
+ (gethash
+ symbol
+ parser-generator--f-sets)))
+ (setq
+ symbol-f-set
+ (nth
+ 1
+ (gethash
+ symbol
+ parser-generator--f-sets)))
(parser-generator--debug
(message
"symbol-f-set: %s"
diff --git a/test/parser-generator-test.el b/test/parser-generator-test.el
index d8b0a204c7..437a4aa76e 100644
--- a/test/parser-generator-test.el
+++ b/test/parser-generator-test.el
@@ -545,6 +545,16 @@
(parser-generator--e-free-first '(a S b))))
(message "Passed empty-free-first 2 with trailing e-identifier 1")
+ ;; TODO Make this pass
+ (parser-generator-set-grammar
+ '((Sp S R T) (a b c) ((Sp S) (S (R S) (R)) (R (a b T)) (T (a T) (c) (e)))
Sp))
+ (parser-generator-set-look-ahead-number 2)
+ (parser-generator-process-grammar)
+ (should
+ (equal
+ '((a a) (a c) (a e) (c e))
+ (parser-generator--e-free-first 'T)))
+
(message "Passed tests for (parser-generator--empty-free-first)"))
(defun
parser-generator-test--get-grammar-context-sensitive-attributes-by-production-number
()
- [elpa] externals/parser-generator 0fa8261ed2 11/29: Passing some tests for FIRST, (continued)
- [elpa] externals/parser-generator 0fa8261ed2 11/29: Passing some tests for FIRST, Christian Johansson, 2022/02/12
- [elpa] externals/parser-generator 3bf81567ac 05/29: Added TODO notes and a failing test for e-free-first, Christian Johansson, 2022/02/12
- [elpa] externals/parser-generator 4e4907da84 10/29: More wrestling with FIRST and E-FREE-FIRST, Christian Johansson, 2022/02/12
- [elpa] externals/parser-generator 6ffa2a0290 15/29: More work on FIRST function, Christian Johansson, 2022/02/12
- [elpa] externals/parser-generator efe98cb71a 14/29: More tweaks of FIRST and E-FREE-FIRST, Christian Johansson, 2022/02/12
- [elpa] externals/parser-generator a7a321ca93 28/29: Added link to TODO document, Christian Johansson, 2022/02/12
- [elpa] externals/parser-generator e1f3fb4042 18/29: More work on FIRST, Christian Johansson, 2022/02/12
- [elpa] externals/parser-generator 0e1fbf9cef 07/29: More debugging of edge case, Christian Johansson, 2022/02/12
- [elpa] externals/parser-generator 653b8edece 17/29: Added failing test for generate-f-sets, Christian Johansson, 2022/02/12
- [elpa] externals/parser-generator 4c34af706f 29/29: Improved documentation, Christian Johansson, 2022/02/12
- [elpa] externals/parser-generator a175c1317a 08/29: Started on refactor of e-free-first function to properly handle a edge case,
Christian Johansson <=
- [elpa] externals/parser-generator 94fa7c3732 06/29: Cleaning up of e-free-first test, Christian Johansson, 2022/02/12
- [elpa] externals/parser-generator d2227ad65e 13/29: More wrestling with FIRST and E-FREE-FIRST calculation, Christian Johansson, 2022/02/12
- [elpa] externals/parser-generator d85a3ae246 16/29: Passing more tests, Christian Johansson, 2022/02/12
- [elpa] externals/parser-generator e2e464bb17 26/29: Updated version, date and TODO, Christian Johansson, 2022/02/12
- [elpa] externals/parser-generator dced2e199f 22/29: Sorting LR-action tables before outputting them in messages, Christian Johansson, 2022/02/12
- [elpa] externals/parser-generator 69fc89898e 19/29: Passing all tests for FIRST and E-FREE-FIRST with new algorithm, Christian Johansson, 2022/02/12
- [elpa] externals/parser-generator 6726c5231e 24/29: Fixed conflict, Christian Johansson, 2022/02/12
- [elpa] externals/parser-generator 98dc561880 23/29: FIRST() and E-FREE-FIRST() passing LR-test for k=0, Christian Johansson, 2022/02/12
- [elpa] externals/parser-generator 432e3732f2 20/29: Fixed some byte-compilation warnings, Christian Johansson, 2022/02/12
- [elpa] externals/parser-generator ba2bda38da 25/29: Added use of default conflict resolution flag in action-table generation, Christian Johansson, 2022/02/12