emacs-elpa-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[elpa] externals/parser-generator b8faa17 002/434: FIRSTk and EFFk worki


From: ELPA Syncer
Subject: [elpa] externals/parser-generator b8faa17 002/434: FIRSTk and EFFk working
Date: Mon, 29 Nov 2021 15:58:56 -0500 (EST)

branch: externals/parser-generator
commit b8faa171a03077510df95c1eaa81d907a84f5fa5
Author: Christian Johansson <christian@cvj.se>
Commit: Christian Johansson <christian@cvj.se>

    FIRSTk and EFFk working
---
 Makefile            |  20 +++++
 README.md           |  25 ++++++-
 emacs-parser.el     |   0
 parser.el           | 207 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 test/parser-test.el | 197 +++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 448 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index e69de29..2c072f5 100644
--- a/Makefile
+++ b/Makefile
@@ -0,0 +1,20 @@
+EMACS = emacs
+ifdef emacs
+       EMACS = $(emacs)
+endif
+EMACS_CMD := $(EMACS) -Q -batch -L . -L test/
+
+EL  := parser.el test/parser-test.el
+ELC := $(EL:.el=.elc)
+
+.PHONY: clean
+clean:
+       rm -f $(ELC)
+
+.PHONY: compile
+compile:
+       $(EMACS_CMD) -f batch-byte-compile $(EL)
+
+.PHONY: test
+test:
+       $(EMACS_CMD) -l test/parser-test.el -f "parser-test"
diff --git a/README.md b/README.md
index bb83076..d46d0a8 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,27 @@
 # Emacs Parser
 
-The idea of this plugin is to provide functions for various kinds of parsing.
+The idea of this plugin is to provide functions for various kinds of 
context-free grammar parsing. This project is about implementing algorithms 
described in the book `The Theory of Parsing, Translation and Compiling (Volume 
1)`.
 
+## Lexical Analysis
+
+We use a regular-language based lexical analyzer that can be implemented by a 
finite-state-machine (FSM).
+
+WIP
+
+## Syntax Analysis / Parsing
+
+We use a deterministic push down transducer (DPDT) based algorithms.
+
+### Top-down
+#### With backtracking
+#### Without backtracking
+### Bottom-up
+#### With backtracking
+#### Without backtracking
+##### LL(k)
+##### LR(k)
+##### LALR(k)
+
+## Test
+
+Run in terminal `make clean && make test`
diff --git a/emacs-parser.el b/emacs-parser.el
deleted file mode 100644
index e69de29..0000000
diff --git a/parser.el b/parser.el
new file mode 100644
index 0000000..a6af40c
--- /dev/null
+++ b/parser.el
@@ -0,0 +1,207 @@
+;;; parser.el --- LR(k) Parser -*- lexical-binding: t -*-
+
+
+;;; Commentary:
+
+
+;;; Code:
+
+(defvar parser--debug nil)
+
+
+;; page 402
+
+(defmacro parser--debug (&rest message)
+  "Output MESSAGE but only if debug is enabled."
+  `(when parser--debug
+     ,@message))
+
+(defun parser--distinct (elements)
+  "Return distinct of ELEMENTS."
+  (let ((processed (make-hash-table :test 'equal))
+        (new-elements))
+    (dolist (element elements)
+      (unless (gethash element processed)
+        (puthash element t processed)
+        (push element new-elements)))
+    (nreverse new-elements)))
+
+;; page 377, Algorithm 5.5
+(defun parser--first (k production productions &optional disallow-empty-first)
+  "Calculate first K tokens of PRODUCTION in PRODUCTIONS, optionally 
DISALLOW-EMPTY-FIRST."
+  (let ((f-sets (make-hash-table :test 'equal))
+        (i 0)
+        (i-max (length productions)))
+    (while (< i i-max)
+      (parser--debug (message "i = %s" i))
+      (let ((f-set (make-hash-table :test 'equal)))
+
+        ;; Iterate all productions, set F_i
+        (dolist (p productions)
+          (let ((production-lhs (symbol-name (car p)))
+                (production-rhs (cdr p)))
+            (parser--debug
+             (message "Production-LHS: %s" production-lhs)
+             (message "Production-RHS: %s" production-rhs))
+
+            ;; Iterate all blocks in RHS
+            (let ((f-p-set))
+              (dolist (rhs-p production-rhs)
+                (let ((rhs-string (symbol-name rhs-p)))
+                  (let ((rhs-leading-terminals
+                         (parser--f-set rhs-string `(,k ,i ,f-sets 
,disallow-empty-first) '(("" t 0)))))
+                    (parser--debug
+                     (message "Leading %d terminals at index %s (%s) -> %s = 
%s" k i production-lhs rhs-string rhs-leading-terminals))
+                    (when rhs-leading-terminals
+                      (when (and
+                             (listp rhs-leading-terminals)
+                             (> (length rhs-leading-terminals) 0))
+                        (dolist (rhs-leading-terminals-string 
rhs-leading-terminals)
+                          (when (and
+                                 (stringp rhs-leading-terminals-string)
+                                 (> (length rhs-leading-terminals-string) 0))
+                            (push rhs-leading-terminals-string f-p-set))))))))
+
+              ;; Make set distinct
+              (setq f-p-set (parser--distinct f-p-set))
+              (parser--debug
+               (message "F_%s_%s(%s) = %s" i k production-lhs f-p-set))
+              (puthash production-lhs (nreverse f-p-set) f-set))))
+        (puthash i f-set f-sets)
+        (setq i (+ i 1))))
+    (sort (gethash (symbol-name production) (gethash (1- i-max) f-sets)) 
'string<)))
+
+(defun parser--f-set (input-tape state stack)
+  "A deterministic push-down transducer (DPDT) for building F-sets from 
INPUT-TAPE, STATE and STACK."
+  (parser--debug
+   (message "(parser--f-set)")
+   (message "input-tape: %s" input-tape)
+   (message "state: %s" state)
+   (message "stack: %s" stack))
+
+  (let ((f-set)
+        (input-tape-length (length input-tape))
+        (k (nth 0 state))
+        (i (nth 1 state))
+        (f-sets (nth 2 state))
+        (disallow-empty-first (nth 3 state)))
+    (parser--debug
+     (message "input-tape-length: %s" input-tape-length)
+     (message "k: %s" k)
+     (message "i: %s" i))
+    (while stack
+      (let ((stack-symbol (pop stack)))
+        (parser--debug
+         (message "Stack-symbol: %s" stack-symbol))
+        (let ((leading-terminals (nth 0 stack-symbol))
+              (all-leading-terminals-p (nth 1 stack-symbol))
+              (input-tape-index (nth 2 stack-symbol))
+              (empty-first-p nil))
+          (parser--debug
+           (message "leading-terminals: %s" leading-terminals)
+           (message "all-leading-terminals-p: %s" all-leading-terminals-p)
+           (message "input-tape-index: %s" input-tape-index))
+
+          ;; Flag whether leading-terminal is empty or not
+          (when (string= leading-terminals "e")
+            (setq empty-first-p t))
+
+          (parser--debug (message "empty-first-p: %s" empty-first-p))
+
+          ;; If leading terminal is empty and we have input-tape left, 
disregard it
+          (when (and
+                 (not disallow-empty-first)
+                 empty-first-p
+                 (< input-tape-index input-tape-length))
+            (parser--debug (message "Disregarding empty first terminal"))
+            (setq leading-terminals ""))
+
+          (let ((leading-terminals-count (length leading-terminals)))
+            (parser--debug (message "leading-terminals-count: %s" 
leading-terminals-count))
+            (while (and
+                    (< input-tape-index input-tape-length)
+                    (< leading-terminals-count k)
+                    all-leading-terminals-p)
+              (let ((rhs-element (substring input-tape input-tape-index (1+ 
input-tape-index)))
+                    (rhs-type))
+                (parser--debug (message "rhs-element: %s" rhs-element))
+
+                ;; Determine symbol type
+                (if (string= rhs-element (upcase rhs-element))
+                    (setq rhs-type 'NON-TERMINAL)
+                  (if (string= rhs-element "e")
+                      (setq rhs-type 'EMPTY)
+                    (setq rhs-type 'TERMINAL)))
+                (parser--debug (message "rhs-type: %s" rhs-type))
+
+                (cond
+
+                 ((equal rhs-type 'NON-TERMINAL)
+                  (if (> i 0)
+                      (let ((sub-terminal-sets (gethash rhs-element (gethash 
(1- i) f-sets))))
+                        (if sub-terminal-sets
+                            (progn
+                              (parser--debug
+                               (message "Sub-terminal-sets F_%s_%s(%s) = %s 
(%d)" (1- i) k rhs-element sub-terminal-sets (length sub-terminal-sets)))
+                              (let ((sub-terminal-set (car sub-terminal-sets)))
+
+                                (unless (= (length sub-terminal-sets) 1)
+                                  ;; Should branch off here, each unique 
permutation should be included in set
+                                  ;; Follow first alternative in this scope 
but follow the rest in separate scopes
+                                  (let ((sub-terminal-index 0))
+                                    (dolist (sub-terminal-set 
sub-terminal-sets)
+                                      (unless (= sub-terminal-index 0)
+
+                                        ;; When we have a leading terminal and 
sub-terminal set is empty, don't append it
+                                        (when (and
+                                               (> leading-terminals-count 0)
+                                               (string= sub-terminal-set "e"))
+                                          (setq sub-terminal-set ""))
+
+                                        (let ((sub-rhs-leading-terminals 
(concat leading-terminals sub-terminal-set)))
+                                          (when (> (length 
sub-rhs-leading-terminals) k)
+                                            (setq sub-rhs-leading-terminals 
(substring sub-rhs-leading-terminals 0 k)))
+                                          (push `(,sub-rhs-leading-terminals 
,all-leading-terminals-p ,(1+ input-tape-index)) stack)))
+                                      (setq sub-terminal-index (1+ 
sub-terminal-index)))))
+
+                                (parser--debug (message "Sub-terminal-set: %s" 
sub-terminal-set))
+                                (when (or
+                                       (not (string= sub-terminal-set "e"))
+                                       (= input-tape-index (1- 
input-tape-length)))
+                                  (setq leading-terminals (concat 
leading-terminals sub-terminal-set))
+                                  (setq leading-terminals-count (+ 
leading-terminals-count (length sub-terminal-set)))
+                                  (when (> leading-terminals-count k)
+                                    (setq leading-terminals (substring 
leading-terminals 0 k))
+                                    (setq leading-terminals-count k)))))
+                          (parser--debug
+                           (message "Found no subsets for %s %s" rhs-element 
(1- i)))))
+                    (setq all-leading-terminals-p nil)))
+
+                 ((equal rhs-type 'EMPTY)
+                  (if all-leading-terminals-p
+                      (if disallow-empty-first
+                          (when (= leading-terminals-count 0)
+                            (setq all-leading-terminals-p nil))
+                        (when (and
+                               (= leading-terminals-count 0)
+                               (= input-tape-index (1- input-tape-length)))
+                          (setq leading-terminals (concat leading-terminals 
rhs-element))
+                          (setq leading-terminals-count (1+ 
leading-terminals-count))))
+                    (setq all-leading-terminals-p nil)))
+
+                 ((equal rhs-type 'TERMINAL)
+                  (when all-leading-terminals-p
+                    (setq leading-terminals (concat leading-terminals 
rhs-element))
+                    (setq leading-terminals-count (1+ 
leading-terminals-count))))))
+              (setq input-tape-index (1+ input-tape-index)))
+            (when (> leading-terminals-count 0)
+              (push leading-terminals f-set))))))
+    f-set))
+
+(defun parser--empty-free-first (k production productions)
+  "Calculate empty-free-first K tokens of PRODUCTION in PRODUCTIONS."
+  (parser--first k production productions t))
+
+(provide 'parser)
+
+;;; parser.el ends here
diff --git a/test/parser-test.el b/test/parser-test.el
new file mode 100644
index 0000000..b96a219
--- /dev/null
+++ b/test/parser-test.el
@@ -0,0 +1,197 @@
+;;; parser-test.el --- Tests for parser -*- lexical-binding: t -*-
+
+
+;;; Commentary:
+
+
+;;; Code:
+
+(require 'parser)
+(require 'ert)
+
+(defun parser-test--first ()
+  "Test `parser--first'."
+
+  (should
+   (equal
+    '("a")
+    (parser--first
+     1
+     'S
+     '(
+       (S a)))))
+  (message "Passed first 1 with rudimentary grammar")
+
+  (should
+   (equal
+    '("ab")
+    (parser--first
+     2
+     'S
+     '(
+       (S abc)))))
+  (message "Passed first 2 with rudimentary grammar")
+
+  (should
+   (equal
+    '("abc")
+    (parser--first
+     3
+     'S
+     '(
+       (S abc)))))
+  (message "Passed first 3 with rudimentary grammar")
+
+  (should
+   (equal
+    '("b")
+    (parser--first
+     1
+     'S
+     '(
+       (S A)
+       (A b)))))
+  (message "Passed first 1 with intermediate grammar")
+
+  (should
+   (equal
+    '("ba")
+    (parser--first
+     2
+     'S
+     '(
+       (S A)
+       (A ba)))))
+  (message "Passed first 2 with intermediate grammar")
+
+  (should
+   (equal
+    '("bac")
+    (parser--first
+     3
+     'S
+     '(
+       (S A)
+       (A bace)))))
+  (message "Passed first 3 with intermediate grammar")
+
+  (should
+   (equal
+    '("c" "d")
+    (parser--first
+     1
+     'S
+     '(
+       (S A)
+       (A B)
+       (B c d)))))
+  (message "Passed first 1 with semi-complex grammar")
+
+  (should
+   (equal
+    '("cf" "da")
+    (parser--first
+     2
+     'S
+     '(
+       (S Aa)
+       (A B)
+       (B cf d)))))
+  (message "Passed first 2 with semi-complex grammar")
+
+  (should
+   (equal
+    '("cam" "dam")
+    (parser--first
+     3
+     'S
+     '(
+       (S A)
+       (A Bam)
+       (B c d)))))
+  (message "Passed first 3 with semi-complex grammar")
+
+  (should
+   (equal
+    '("a" "b" "c" "e")
+    (parser--first
+     1
+     'S
+     '(
+       (S AB)
+       (A Ba e)
+       (B Cb C)
+       (C c e)))))
+  (message "Passed first 1 with complex grammar")
+
+  ;; Example 5.28 p 402
+  (should
+   (equal
+    '("a" "ab" "ac" "b" "ba" "c" "ca" "cb" "e")
+    (parser--first
+     2
+     'S
+     '(
+       (S AB)
+       (A Ba e)
+       (B Cb C)
+       (C c e)))))
+  (message "Passed first 2 with complex grammar")
+
+  (should
+   (equal
+    '("a" "ab" "ac" "acb" "b" "ba" "bab" "bac" "c" "ca" "cab" "cac" "cb" "cba" 
"e")
+    (parser--first
+     3
+     'S
+     '(
+       (S AB)
+       (A Ba e)
+       (B Cb C)
+       (C c e)))))
+  (message "Passed first 3 with complex grammar")
+
+  (message "Passed tests for (parser--first)"))
+
+;; Example 5.28 page 402
+(defun parser-test--empty-free-first ()
+  "Test `parser--empty-free-first'."
+
+    ;; Example 5.28 p 402
+  (should
+   (equal
+    '("ca" "cb")
+    (parser--empty-free-first
+     2
+     'S
+     '(
+       (S AB)
+       (A Ba e)
+       (B Cb C)
+       (C c e)))))
+  (message "Passed empty-free-first 2 with complex grammar")
+
+  (message "Passed tests for (parser-test--empty-free-first)"))
+
+(defun parser-test--distinct ()
+  "Test `parser--distinct'."
+  (should
+   (equal
+    '(a b c)
+    (parser--distinct '(a a b c))))
+
+  (should
+   (equal
+    '("aa" "b" "cc" "c" "a")
+    (parser--distinct '("aa" "b" "cc" "c" "b" "a" "aa"))))
+  (message "Passed tests for (parser--distinct)"))
+
+(defun parser-test ()
+  "Run test."
+  (parser-test--distinct)
+  (parser-test--first)
+  (parser-test--empty-free-first))
+
+(provide 'parser-test)
+
+;;; parser-test.el ends here



reply via email to

[Prev in Thread] Current Thread [Next in Thread]