[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[elpa] externals/parser-generator b8faa17 002/434: FIRSTk and EFFk worki
From: |
ELPA Syncer |
Subject: |
[elpa] externals/parser-generator b8faa17 002/434: FIRSTk and EFFk working |
Date: |
Mon, 29 Nov 2021 15:58:56 -0500 (EST) |
branch: externals/parser-generator
commit b8faa171a03077510df95c1eaa81d907a84f5fa5
Author: Christian Johansson <christian@cvj.se>
Commit: Christian Johansson <christian@cvj.se>
FIRSTk and EFFk working
---
Makefile | 20 +++++
README.md | 25 ++++++-
emacs-parser.el | 0
parser.el | 207 ++++++++++++++++++++++++++++++++++++++++++++++++++++
test/parser-test.el | 197 +++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 448 insertions(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index e69de29..2c072f5 100644
--- a/Makefile
+++ b/Makefile
@@ -0,0 +1,20 @@
+EMACS = emacs
+ifdef emacs
+ EMACS = $(emacs)
+endif
+EMACS_CMD := $(EMACS) -Q -batch -L . -L test/
+
+EL := parser.el test/parser-test.el
+ELC := $(EL:.el=.elc)
+
+.PHONY: clean
+clean:
+ rm -f $(ELC)
+
+.PHONY: compile
+compile:
+ $(EMACS_CMD) -f batch-byte-compile $(EL)
+
+.PHONY: test
+test:
+ $(EMACS_CMD) -l test/parser-test.el -f "parser-test"
diff --git a/README.md b/README.md
index bb83076..d46d0a8 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,27 @@
# Emacs Parser
-The idea of this plugin is to provide functions for various kinds of parsing.
+The idea of this plugin is to provide functions for various kinds of
context-free grammar parsing. This project is about implementing algorithms
described in the book `The Theory of Parsing, Translation and Compiling (Volume
1)`.
+## Lexical Analysis
+
+We use a regular-language based lexical analyzer that can be implemented by a
finite-state-machine (FSM).
+
+WIP
+
+## Syntax Analysis / Parsing
+
+We use a deterministic push down transducer (DPDT) based algorithms.
+
+### Top-down
+#### With backtracking
+#### Without backtracking
+### Bottom-up
+#### With backtracking
+#### Without backtracking
+##### LL(k)
+##### LR(k)
+##### LALR(k)
+
+## Test
+
+Run in terminal `make clean && make test`
diff --git a/emacs-parser.el b/emacs-parser.el
deleted file mode 100644
index e69de29..0000000
diff --git a/parser.el b/parser.el
new file mode 100644
index 0000000..a6af40c
--- /dev/null
+++ b/parser.el
@@ -0,0 +1,207 @@
+;;; parser.el --- LR(k) Parser -*- lexical-binding: t -*-
+
+
+;;; Commentary:
+
+
+;;; Code:
+
+(defvar parser--debug nil)
+
+
+;; page 402
+
+(defmacro parser--debug (&rest message)
+ "Output MESSAGE but only if debug is enabled."
+ `(when parser--debug
+ ,@message))
+
+(defun parser--distinct (elements)
+ "Return distinct of ELEMENTS."
+ (let ((processed (make-hash-table :test 'equal))
+ (new-elements))
+ (dolist (element elements)
+ (unless (gethash element processed)
+ (puthash element t processed)
+ (push element new-elements)))
+ (nreverse new-elements)))
+
+;; page 377, Algorithm 5.5
+(defun parser--first (k production productions &optional disallow-empty-first)
+ "Calculate first K tokens of PRODUCTION in PRODUCTIONS, optionally
DISALLOW-EMPTY-FIRST."
+ (let ((f-sets (make-hash-table :test 'equal))
+ (i 0)
+ (i-max (length productions)))
+ (while (< i i-max)
+ (parser--debug (message "i = %s" i))
+ (let ((f-set (make-hash-table :test 'equal)))
+
+ ;; Iterate all productions, set F_i
+ (dolist (p productions)
+ (let ((production-lhs (symbol-name (car p)))
+ (production-rhs (cdr p)))
+ (parser--debug
+ (message "Production-LHS: %s" production-lhs)
+ (message "Production-RHS: %s" production-rhs))
+
+ ;; Iterate all blocks in RHS
+ (let ((f-p-set))
+ (dolist (rhs-p production-rhs)
+ (let ((rhs-string (symbol-name rhs-p)))
+ (let ((rhs-leading-terminals
+ (parser--f-set rhs-string `(,k ,i ,f-sets
,disallow-empty-first) '(("" t 0)))))
+ (parser--debug
+ (message "Leading %d terminals at index %s (%s) -> %s =
%s" k i production-lhs rhs-string rhs-leading-terminals))
+ (when rhs-leading-terminals
+ (when (and
+ (listp rhs-leading-terminals)
+ (> (length rhs-leading-terminals) 0))
+ (dolist (rhs-leading-terminals-string
rhs-leading-terminals)
+ (when (and
+ (stringp rhs-leading-terminals-string)
+ (> (length rhs-leading-terminals-string) 0))
+ (push rhs-leading-terminals-string f-p-set))))))))
+
+ ;; Make set distinct
+ (setq f-p-set (parser--distinct f-p-set))
+ (parser--debug
+ (message "F_%s_%s(%s) = %s" i k production-lhs f-p-set))
+ (puthash production-lhs (nreverse f-p-set) f-set))))
+ (puthash i f-set f-sets)
+ (setq i (+ i 1))))
+ (sort (gethash (symbol-name production) (gethash (1- i-max) f-sets))
'string<)))
+
+(defun parser--f-set (input-tape state stack)
+ "A deterministic push-down transducer (DPDT) for building F-sets from
INPUT-TAPE, STATE and STACK."
+ (parser--debug
+ (message "(parser--f-set)")
+ (message "input-tape: %s" input-tape)
+ (message "state: %s" state)
+ (message "stack: %s" stack))
+
+ (let ((f-set)
+ (input-tape-length (length input-tape))
+ (k (nth 0 state))
+ (i (nth 1 state))
+ (f-sets (nth 2 state))
+ (disallow-empty-first (nth 3 state)))
+ (parser--debug
+ (message "input-tape-length: %s" input-tape-length)
+ (message "k: %s" k)
+ (message "i: %s" i))
+ (while stack
+ (let ((stack-symbol (pop stack)))
+ (parser--debug
+ (message "Stack-symbol: %s" stack-symbol))
+ (let ((leading-terminals (nth 0 stack-symbol))
+ (all-leading-terminals-p (nth 1 stack-symbol))
+ (input-tape-index (nth 2 stack-symbol))
+ (empty-first-p nil))
+ (parser--debug
+ (message "leading-terminals: %s" leading-terminals)
+ (message "all-leading-terminals-p: %s" all-leading-terminals-p)
+ (message "input-tape-index: %s" input-tape-index))
+
+ ;; Flag whether leading-terminal is empty or not
+ (when (string= leading-terminals "e")
+ (setq empty-first-p t))
+
+ (parser--debug (message "empty-first-p: %s" empty-first-p))
+
+ ;; If leading terminal is empty and we have input-tape left,
disregard it
+ (when (and
+ (not disallow-empty-first)
+ empty-first-p
+ (< input-tape-index input-tape-length))
+ (parser--debug (message "Disregarding empty first terminal"))
+ (setq leading-terminals ""))
+
+ (let ((leading-terminals-count (length leading-terminals)))
+ (parser--debug (message "leading-terminals-count: %s"
leading-terminals-count))
+ (while (and
+ (< input-tape-index input-tape-length)
+ (< leading-terminals-count k)
+ all-leading-terminals-p)
+ (let ((rhs-element (substring input-tape input-tape-index (1+
input-tape-index)))
+ (rhs-type))
+ (parser--debug (message "rhs-element: %s" rhs-element))
+
+ ;; Determine symbol type
+ (if (string= rhs-element (upcase rhs-element))
+ (setq rhs-type 'NON-TERMINAL)
+ (if (string= rhs-element "e")
+ (setq rhs-type 'EMPTY)
+ (setq rhs-type 'TERMINAL)))
+ (parser--debug (message "rhs-type: %s" rhs-type))
+
+ (cond
+
+ ((equal rhs-type 'NON-TERMINAL)
+ (if (> i 0)
+ (let ((sub-terminal-sets (gethash rhs-element (gethash
(1- i) f-sets))))
+ (if sub-terminal-sets
+ (progn
+ (parser--debug
+ (message "Sub-terminal-sets F_%s_%s(%s) = %s
(%d)" (1- i) k rhs-element sub-terminal-sets (length sub-terminal-sets)))
+ (let ((sub-terminal-set (car sub-terminal-sets)))
+
+ (unless (= (length sub-terminal-sets) 1)
+ ;; Should branch off here, each unique
permutation should be included in set
+ ;; Follow first alternative in this scope
but follow the rest in separate scopes
+ (let ((sub-terminal-index 0))
+ (dolist (sub-terminal-set
sub-terminal-sets)
+ (unless (= sub-terminal-index 0)
+
+ ;; When we have a leading terminal and
sub-terminal set is empty, don't append it
+ (when (and
+ (> leading-terminals-count 0)
+ (string= sub-terminal-set "e"))
+ (setq sub-terminal-set ""))
+
+ (let ((sub-rhs-leading-terminals
(concat leading-terminals sub-terminal-set)))
+ (when (> (length
sub-rhs-leading-terminals) k)
+ (setq sub-rhs-leading-terminals
(substring sub-rhs-leading-terminals 0 k)))
+ (push `(,sub-rhs-leading-terminals
,all-leading-terminals-p ,(1+ input-tape-index)) stack)))
+ (setq sub-terminal-index (1+
sub-terminal-index)))))
+
+ (parser--debug (message "Sub-terminal-set: %s"
sub-terminal-set))
+ (when (or
+ (not (string= sub-terminal-set "e"))
+ (= input-tape-index (1-
input-tape-length)))
+ (setq leading-terminals (concat
leading-terminals sub-terminal-set))
+ (setq leading-terminals-count (+
leading-terminals-count (length sub-terminal-set)))
+ (when (> leading-terminals-count k)
+ (setq leading-terminals (substring
leading-terminals 0 k))
+ (setq leading-terminals-count k)))))
+ (parser--debug
+ (message "Found no subsets for %s %s" rhs-element
(1- i)))))
+ (setq all-leading-terminals-p nil)))
+
+ ((equal rhs-type 'EMPTY)
+ (if all-leading-terminals-p
+ (if disallow-empty-first
+ (when (= leading-terminals-count 0)
+ (setq all-leading-terminals-p nil))
+ (when (and
+ (= leading-terminals-count 0)
+ (= input-tape-index (1- input-tape-length)))
+ (setq leading-terminals (concat leading-terminals
rhs-element))
+ (setq leading-terminals-count (1+
leading-terminals-count))))
+ (setq all-leading-terminals-p nil)))
+
+ ((equal rhs-type 'TERMINAL)
+ (when all-leading-terminals-p
+ (setq leading-terminals (concat leading-terminals
rhs-element))
+ (setq leading-terminals-count (1+
leading-terminals-count))))))
+ (setq input-tape-index (1+ input-tape-index)))
+ (when (> leading-terminals-count 0)
+ (push leading-terminals f-set))))))
+ f-set))
+
+(defun parser--empty-free-first (k production productions)
+ "Calculate empty-free-first K tokens of PRODUCTION in PRODUCTIONS."
+ (parser--first k production productions t))
+
+(provide 'parser)
+
+;;; parser.el ends here
diff --git a/test/parser-test.el b/test/parser-test.el
new file mode 100644
index 0000000..b96a219
--- /dev/null
+++ b/test/parser-test.el
@@ -0,0 +1,197 @@
+;;; parser-test.el --- Tests for parser -*- lexical-binding: t -*-
+
+
+;;; Commentary:
+
+
+;;; Code:
+
+(require 'parser)
+(require 'ert)
+
+(defun parser-test--first ()
+ "Test `parser--first'."
+
+ (should
+ (equal
+ '("a")
+ (parser--first
+ 1
+ 'S
+ '(
+ (S a)))))
+ (message "Passed first 1 with rudimentary grammar")
+
+ (should
+ (equal
+ '("ab")
+ (parser--first
+ 2
+ 'S
+ '(
+ (S abc)))))
+ (message "Passed first 2 with rudimentary grammar")
+
+ (should
+ (equal
+ '("abc")
+ (parser--first
+ 3
+ 'S
+ '(
+ (S abc)))))
+ (message "Passed first 3 with rudimentary grammar")
+
+ (should
+ (equal
+ '("b")
+ (parser--first
+ 1
+ 'S
+ '(
+ (S A)
+ (A b)))))
+ (message "Passed first 1 with intermediate grammar")
+
+ (should
+ (equal
+ '("ba")
+ (parser--first
+ 2
+ 'S
+ '(
+ (S A)
+ (A ba)))))
+ (message "Passed first 2 with intermediate grammar")
+
+ (should
+ (equal
+ '("bac")
+ (parser--first
+ 3
+ 'S
+ '(
+ (S A)
+ (A bace)))))
+ (message "Passed first 3 with intermediate grammar")
+
+ (should
+ (equal
+ '("c" "d")
+ (parser--first
+ 1
+ 'S
+ '(
+ (S A)
+ (A B)
+ (B c d)))))
+ (message "Passed first 1 with semi-complex grammar")
+
+ (should
+ (equal
+ '("cf" "da")
+ (parser--first
+ 2
+ 'S
+ '(
+ (S Aa)
+ (A B)
+ (B cf d)))))
+ (message "Passed first 2 with semi-complex grammar")
+
+ (should
+ (equal
+ '("cam" "dam")
+ (parser--first
+ 3
+ 'S
+ '(
+ (S A)
+ (A Bam)
+ (B c d)))))
+ (message "Passed first 3 with semi-complex grammar")
+
+ (should
+ (equal
+ '("a" "b" "c" "e")
+ (parser--first
+ 1
+ 'S
+ '(
+ (S AB)
+ (A Ba e)
+ (B Cb C)
+ (C c e)))))
+ (message "Passed first 1 with complex grammar")
+
+ ;; Example 5.28 p 402
+ (should
+ (equal
+ '("a" "ab" "ac" "b" "ba" "c" "ca" "cb" "e")
+ (parser--first
+ 2
+ 'S
+ '(
+ (S AB)
+ (A Ba e)
+ (B Cb C)
+ (C c e)))))
+ (message "Passed first 2 with complex grammar")
+
+ (should
+ (equal
+ '("a" "ab" "ac" "acb" "b" "ba" "bab" "bac" "c" "ca" "cab" "cac" "cb" "cba"
"e")
+ (parser--first
+ 3
+ 'S
+ '(
+ (S AB)
+ (A Ba e)
+ (B Cb C)
+ (C c e)))))
+ (message "Passed first 3 with complex grammar")
+
+ (message "Passed tests for (parser--first)"))
+
+;; Example 5.28 page 402
+(defun parser-test--empty-free-first ()
+ "Test `parser--empty-free-first'."
+
+ ;; Example 5.28 p 402
+ (should
+ (equal
+ '("ca" "cb")
+ (parser--empty-free-first
+ 2
+ 'S
+ '(
+ (S AB)
+ (A Ba e)
+ (B Cb C)
+ (C c e)))))
+ (message "Passed empty-free-first 2 with complex grammar")
+
+ (message "Passed tests for (parser-test--empty-free-first)"))
+
+(defun parser-test--distinct ()
+ "Test `parser--distinct'."
+ (should
+ (equal
+ '(a b c)
+ (parser--distinct '(a a b c))))
+
+ (should
+ (equal
+ '("aa" "b" "cc" "c" "a")
+ (parser--distinct '("aa" "b" "cc" "c" "b" "a" "aa"))))
+ (message "Passed tests for (parser--distinct)"))
+
+(defun parser-test ()
+ "Run test."
+ (parser-test--distinct)
+ (parser-test--first)
+ (parser-test--empty-free-first))
+
+(provide 'parser-test)
+
+;;; parser-test.el ends here
- [elpa] branch externals/parser-generator created (now 4a3a51d), ELPA Syncer, 2021/11/29
- [elpa] externals/parser-generator b93ab8c 001/434: My initial commit, ELPA Syncer, 2021/11/29
- [elpa] externals/parser-generator b8faa17 002/434: FIRSTk and EFFk working,
ELPA Syncer <=
- [elpa] externals/parser-generator ee0a623 003/434: Added TRAVIS and LICENSE, ELPA Syncer, 2021/11/29
- [elpa] externals/parser-generator f5bfa40 004/434: Fixed typo in README, ELPA Syncer, 2021/11/29
- [elpa] externals/parser-generator 58798c8 010/434: Starting on calculation of valid LK-sets for a valid grammar prefix, ELPA Syncer, 2021/11/29
- [elpa] externals/parser-generator f9c8348 008/434: Updated Travis and Makefil rule name, ELPA Syncer, 2021/11/29
- [elpa] externals/parser-generator 5f65cfc 015/434: More refactoring, using lists instead of string as grammar data type, ELPA Syncer, 2021/11/29
- [elpa] externals/parser-generator f2791c1 022/434: Passed unit test 3 intermediate grammar, ELPA Syncer, 2021/11/29
- [elpa] externals/parser-generator 5d9b98c 011/434: Added functions to validate G and k and tests, ELPA Syncer, 2021/11/29
- [elpa] externals/parser-generator 356720c 030/434: Passing all unit tests using new data structure, ELPA Syncer, 2021/11/29
- [elpa] externals/parser-generator e4fd795 007/434: Added compilation to test, ELPA Syncer, 2021/11/29
- [elpa] externals/parser-generator 42d92f1 014/434: More refactoring, ELPA Syncer, 2021/11/29