[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[elpa] externals/phps-mode 804a42e 09/62: Most of lex analyzer implement
From: |
Christian Johansson |
Subject: |
[elpa] externals/phps-mode 804a42e 09/62: Most of lex analyzer implemented |
Date: |
Tue, 20 Apr 2021 10:56:13 -0400 (EDT) |
branch: externals/phps-mode
commit 804a42ecec86c0d4744e6109326aae8a1d052fb8
Author: Christian Johansson <christian@mediastrategi.se>
Commit: Christian Johansson <christian@mediastrategi.se>
Most of lex analyzer implemented
---
phps-mode-lexer.el | 193 +++++++++++++++++++++++++++++++++++++++++++----------
1 file changed, 158 insertions(+), 35 deletions(-)
diff --git a/phps-mode-lexer.el b/phps-mode-lexer.el
index 5cb76a9..673bff2 100644
--- a/phps-mode-lexer.el
+++ b/phps-mode-lexer.el
@@ -45,24 +45,30 @@
;; INITIALIZE SETTINGS
-(phps-mode-parser-grammar-macro-CG 'parser-mode t)
-(phps-mode-parser-grammar-macro-CG 'short-tags t)
+(phps-mode-parser-grammar-macro-CG
+ 'parser-mode t)
+(phps-mode-parser-grammar-macro-CG
+ 'short-tags t)
;; SETTINGS
;; @see https://secure.php.net/manual/en/language.types.integer.php
-(defconst phps-mode-lexer--long-limit 2147483648
+(defconst phps-mode-lexer--long-limit
+ 2147483648
"Limit for 32-bit integer.")
-(defconst phps-mode-lexer--bnum "0b[01]+"
+(defconst phps-mode-lexer--bnum
+ "0b[01]+"
"Boolean number.")
-(defconst phps-mode-lexer--hnum "0x[0-9a-fA-F]+"
+(defconst phps-mode-lexer--hnum
+ "0x[0-9a-fA-F]+"
"Hexadecimal number.")
-(defconst phps-mode-lexer--lnum "[0-9]+"
+(defconst phps-mode-lexer--lnum
+ "[0-9]+"
"Long number.")
(defconst phps-mode-lexer--dnum
@@ -82,21 +88,26 @@
;; NOTE original is [a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*
;; NOTE Rebuilt for comparability with emacs-lisp
-(defconst phps-mode-lexer--whitespace "[ \n\r\t]+"
+(defconst phps-mode-lexer--whitespace
+ "[ \n\r\t]+"
"White-space.")
-(defconst phps-mode-lexer--tabs-and-spaces "[ \t]*"
+(defconst phps-mode-lexer--tabs-and-spaces
+ "[ \t]*"
"Tabs and white-spaces.")
-(defconst phps-mode-lexer--tokens "[][;:,.()|^&+/*=%!~$<>?@-]"
+(defconst phps-mode-lexer--tokens
+ "[][;:,.()|^&+/*=%!~$<>?@-]"
"Tokens.")
;; NOTE Original is [;:,.\[\]()|^&+-/*=%!~$<>?@]
;; NOTE The hyphen moved last since it has special meaning and to avoid it
being interpreted as a range.
-(defconst phps-mode-lexer--any-char "[^z-a]"
+(defconst phps-mode-lexer--any-char
+ "[^z-a]"
"Any character. The Zend equivalent is [^] but is not possible in Emacs
Lisp.")
-(defconst phps-mode-lexer--newline "[\n\r]"
+(defconst phps-mode-lexer--newline
+ "[\n\r]"
"Newline characters. The Zend equivalent is (\"\r\"|\"\n\"|\"\r\n\").")
@@ -134,6 +145,11 @@
(defvar-local phps-mode-lexer--match-data nil
"Match data.")
+(defvar-local phps-mode-lexer--nest-location-stack nil
+ "Nesting stack.")
+
+(defvar-local phps-mode-lexer--restart-flag nil
+ "Flag whether to restart or not.")
;; HELPER FUNCTIONS
@@ -176,6 +192,56 @@
(phps-mode-lexer--return-token 'T_INLINE_HTML start (- string-start
2))
(phps-mode-lexer--return-token 'T_INLINE_HTML start (point-max))))))
+(defun phps-mode-lexer--enter-nesting (&optional opening)
+ "Enter nesting of OPENING."
+ (unless opening
+ (setq
+ opening
+ (buffer-substring-no-properties
+ (match-beginning 0)
+ (match-end 0))))
+ (push
+ opening
+ phps-mode-lexer--nest-location-stack))
+
+(defun phps-mode-lexer--handle-newline ()
+ "Handle newline."
+ ;; TODO Implement this?
+ )
+
+(defun phps-mode-lexer--exit-nesting (&optional closing)
+ "Exit nesting of CLOSING."
+ (unless closing
+ (setq
+ closing
+ (buffer-substring-no-properties
+ (match-beginning 0)
+ (match-end 0))))
+ (unless phps-mode-lexer--nest-location-stack
+ (signal
+ 'phps-lexer-error
+ (list
+ (format "Unmatched '%s' at '%d'" closing (point))
+ (point))))
+ (let ((opening
+ (car
+ phps-mode-lexer--nest-location-stack)))
+ (when
+ (or
+ (and (string= opening "{")
+ (not (string= closing "}")))
+ (and (string= opening "[")
+ (not (string= closing "]")))
+ (and (string= opening "(")
+ (not (string= closing ")"))))
+ (signal
+ 'phps-lexer-error
+ (list
+ (format "Bad nesting '%s' vs '%s' at %d'" opening closing (point))
+ (point))))
+ (pop phps-mode-lexer--nest-location-stack)
+ t))
+
(defun phps-mode-lexer--emit-token (token start end)
"Emit TOKEN with START and END."
(semantic-lex-push-token (semantic-lex-token token start end))
@@ -225,17 +291,30 @@
(lambda()
,@body)))
-(defun phps-mode-lexer--return-token (token &optional start end)
+(defun phps-mode-lexer--return-token (&optional token start end)
"Return TOKEN with START and END."
(unless start
(setq start (match-beginning 0)))
(unless end
(setq end (match-end 0)))
+ (unless token
+ (setq
+ token
+ (buffer-substring-no-properties
+ start
+ end)))
(phps-mode-lexer--emit-token
token
start
end))
+(defun phps-mode-lexer--return-token-with-indent (&optional token start end)
+ "Return TOKEN with START and END."
+ (phps-mode-lexer--return-token
+ token
+ start
+ end))
+
(defun phps-mode-lexer--return-token-with-str (token offset &optional start
end)
"Return TOKEN at OFFSET with START and END."
(unless start
@@ -244,16 +323,39 @@
(setq end (match-end 0)))
(phps-mode-lexer--return-token token (+ start offset) (+ end offset)))
-(defun phps-mode-lexer--return-token-with-val (token &optional start end)
- "Return TOKEN with START and END."
- (phps-mode-lexer--return-token token start end))
+(defun phps-mode-lexer--return-whitespace ()
+ "Return whitespace."
+ ;; TODO Implement this
+ )
-(defun phps-mode-lexer--return-or-skip-token (token &optional start end)
- "Return TOKEN with START and END but only in parse-mode."
+(defun phps-mode-lexer--return-exit-nesting-token (&optional token start end)
+ "Return TOKEN if it does not exit a nesting with optional START and END."
(unless start
(setq start (match-beginning 0)))
(unless end
(setq end (match-end 0)))
+ (unless token
+ (setq
+ token
+ (buffer-substring-no-properties
+ start
+ end)))
+ (if (and
+ (phps-mode-lexer--exit-nesting token)
+ (phps-mode-parser-grammar-macro-CG 'parser-mode))
+ (phps-mode-lexer--return-token 'T_ERROR)
+ (phps-mode-lexer--return-token token start end)))
+
+(defun phps-mode-lexer--restart ()
+ "Restart."
+ (setq phps-mode-lexer--restart-flag t))
+
+(defun phps-mode-lexer--return-token-with-val (&optional token start end)
+ "Return TOKEN with START and END."
+ (phps-mode-lexer--return-token token start end))
+
+(defun phps-mode-lexer--return-or-skip-token (&optional token start end)
+ "Return TOKEN with START and END but only in parse-mode."
(when (phps-mode-parser-grammar-macro-CG 'parser-mode)
(phps-mode-lexer--return-token token start end)))
@@ -298,6 +400,7 @@
(defun phps-mode-lexer--re2c ()
"Elisp port of original Zend re2c lexer."
+ (setq phps-mode-lexer--restart-flag nil)
(let ((old-start (point)))
(phps-mode-debug-message
(message
@@ -343,7 +446,7 @@
(phps-mode-lexer--match-macro
(and ST_IN_SCRIPTING (looking-at "#\\["))
- (phps-mode-lexer--ENTER_NESTING "[")
+ (phps-mode-lexer--enter-nesting "[")
(phps-mode-lexer--return-token 'T_ATTRIBUTE))
(phps-mode-lexer--match-macro
@@ -413,9 +516,6 @@
(and ST_IN_SCRIPTING (looking-at "endfor"))
(phps-mode-lexer--return-token-with-ident 'T_ENDFOR))
- ;; TODO Fix return_token_with_indent function
- ;; TODO New function begin_nesting
-
(phps-mode-lexer--match-macro
(and ST_IN_SCRIPTING (looking-at "foreach"))
(phps-mode-lexer--return-token-with-ident 'T_FOREACH))
@@ -506,7 +606,6 @@
(phps-mode-lexer--yy-push-state 'ST_LOOKING_FOR_PROPERTY)
(phps-mode-lexer--return-token-with-ident 'T_NULLSAFE_OBJECT_OPERATOR))
- ;; TODO New function below
(phps-mode-lexer--match-macro
(and (or ST_IN_SCRIPTING ST_LOOKING_FOR_PROPERTY)
(looking-at phps-mode-lexer--whitespace))
@@ -520,13 +619,11 @@
(and ST_LOOKING_FOR_PROPERTY (looking-at "?->"))
(phps-mode-lexer--return-token 'T_NULLSAFE_OBJECT_OPERATOR))
- ;; TODO New function below
(phps-mode-lexer--match-macro
(and ST_LOOKING_FOR_PROPERTY (looking-at phps-mode-lexer--label))
(phps-mode-lexer--yy-pop-state)
(phps-mode-lexer--return-token-with-tr 'T_STRING 0))
- ;; TODO New function below
(phps-mode-lexer--match-macro
(and ST_LOOKING_FOR_PROPERTY (looking-at phps-mode-lexer--any-char))
(phps-mode-lexer--yyless 0)
@@ -867,7 +964,18 @@
(and ST_IN_SCRIPTING (looking-at ">>"))
(phps-mode-lexer--return-token 'T_SR))
- ;; TODO Fix new here
+ (phps-mode-lexer--match-macro
+ (and ST_IN_SCRIPTING
+ (looking-at
+ (concat "\\(" "]" "\\|" ")" "\\)")))
+ (phps-mode-lexer--return-exit-nesting-token))
+
+ (phps-mode-lexer--match-macro
+ (and ST_IN_SCRIPTING
+ (looking-at
+ (concat "\\(" "[" "\\|" "(" "\\)")))
+ (phps-mode-lexer--enter-nesting)
+ (phps-mode-lexer--return-token))
(phps-mode-lexer--match-macro
(and ST_IN_SCRIPTING (looking-at phps-mode-lexer--tokens))
@@ -892,7 +1000,6 @@
(phps-mode-lexer--enter-nesting "{")
(phps-mode-lexer--return-token 'T_DOLLAR_OPEN_CURLY_BRACES))
- ;; TODO New function below
(phps-mode-lexer--match-macro
(and ST_IN_SCRIPTING (looking-at "}"))
(phps-mode-lexer--reset-doc-comment)
@@ -901,21 +1008,26 @@
(phps-mode-lexer--return-exit-nesting-token "}"))
(phps-mode-lexer--match-macro
- (and ST_LOOKING_FOR_VARNAME (looking-at (concat phps-mode-lexer--label
"[\\[}]")))
+ (and
+ ST_LOOKING_FOR_VARNAME
+ (looking-at (concat phps-mode-lexer--label "[\\[}]")))
(phps-mode-lexer--yyless)
(phps-mode-lexer--yy-pop-state)
(phps-mode-lexer--yy-push-state 'ST_IN_SCRIPTING)
(phps-mode-lexer--return-token 'T_STRING_VARNAME))
(phps-mode-lexer--match-macro
- (and ST_LOOKING_FOR_VARNAME (looking-at phps-mode-lexer--any-char))
+ (and
+ ST_LOOKING_FOR_VARNAME
+ (looking-at phps-mode-lexer--any-char))
(phps-mode-lexer--yyless 0)
(phps-mode-lexer--yy-pop-state)
(phps-mode-lexer--yy-push-state 'ST_IN_SCRIPTING)
(phps-mode-lexer--restart))
(phps-mode-lexer--match-macro
- (and ST_IN_SCRIPTING (looking-at phps-mode-lexer--bnum))
+ (and ST_IN_SCRIPTING
+ (looking-at phps-mode-lexer--bnum))
(let* ((start (match-beginning 0))
(end (match-end 0))
(data (buffer-substring-no-properties (+ start 2) end))
@@ -926,7 +1038,8 @@
(phps-mode-lexer--return-token 'T_LNUMBER))))
(phps-mode-lexer--match-macro
- (and ST_IN_SCRIPTING (looking-at phps-mode-lexer--lnum))
+ (and ST_IN_SCRIPTING
+ (looking-at phps-mode-lexer--lnum))
(let* ((start (match-beginning 0))
(end (match-end 0))
(data (string-to-number (buffer-substring-no-properties start
end))))
@@ -1019,11 +1132,14 @@
(phps-mode-lexer--return-token-with-indent 'T_ECHO))
(phps-mode-lexer--return-token 'T_OPEN_TAG_WITH_ECHO))
- ;; TODO New function below
(phps-mode-lexer--match-macro
(and
ST_INITIAL
- (looking-at (concat "<\\?php\\([ \t]\\|" phps-mode-lexer--newline
"\\)")))
+ (looking-at
+ (concat
+ "<\\?php\\([ \t]\\|"
+ phps-mode-lexer--newline
+ "\\)")))
(phps-mode-lexer--handle-newline)
(phps-mode-lexer--begin 'ST_IN_SCRIPTING)
(phps-mode-lexer--return-or-skip-token 'T_OPEN_TAG))
@@ -1118,7 +1234,12 @@
(- (match-end 0) 1)))
(phps-mode-lexer--match-macro
- (and (or ST_IN_SCRIPTING ST_DOUBLE_QUOTES ST_HEREDOC ST_BACKQUOTE
ST_VAR_OFFSET)
+ (and (or
+ ST_IN_SCRIPTING
+ ST_DOUBLE_QUOTES
+ ST_HEREDOC
+ ST_BACKQUOTE
+ ST_VAR_OFFSET)
(looking-at
(concat
"\\$"
@@ -1141,7 +1262,6 @@
;; Only '[' or '-' can be valid, but returning other tokens will
allow a more explicit parse error
(phps-mode-lexer--return-token data)))
- ;; TODO New function below
(phps-mode-lexer--match-macro
(and ST_VAR_OFFSET (looking-at (concat "[ \n\r\t'#]")))
;; Invalid rule to return a more explicit parse error with proper line
number
@@ -1586,7 +1706,10 @@
(match-beginning 0))))
(when phps-mode-lexer--match-length
- (phps-mode-lexer--re2c-execute)))))
+ (phps-mode-lexer--re2c-execute)
+
+ (when phps-mode-lexer--restart-flag
+ (phps-mode-lexer--re2c))))))
(provide 'phps-mode-lexer)
- [elpa] externals/phps-mode c4f1335 34/62: Updated version and modified date, (continued)
- [elpa] externals/phps-mode c4f1335 34/62: Updated version and modified date, Christian Johansson, 2021/04/20
- [elpa] externals/phps-mode 6ffce34 42/62: More work on implementing grammar, Christian Johansson, 2021/04/20
- [elpa] externals/phps-mode fa61295 41/62: More work on grammar, Christian Johansson, 2021/04/20
- [elpa] externals/phps-mode 9a9c296 44/62: More grammar implementation, Christian Johansson, 2021/04/20
- [elpa] externals/phps-mode 9e291dc 47/62: Excluded admin-files from compilation, Christian Johansson, 2021/04/20
- [elpa] externals/phps-mode 9011c85 32/62: Updated copyright years, Christian Johansson, 2021/04/20
- [elpa] externals/phps-mode 85ed75d 46/62: More work on automated parser generation, Christian Johansson, 2021/04/20
- [elpa] externals/phps-mode aeec1d2 55/62: Removed comment, Christian Johansson, 2021/04/20
- [elpa] externals/phps-mode 62e7617 61/62: Merged GitHub, Christian Johansson, 2021/04/20
- [elpa] externals/phps-mode cd3bbfd 02/62: More work on PHP 8.0 lex analyzer, Christian Johansson, 2021/04/20
- [elpa] externals/phps-mode 804a42e 09/62: Most of lex analyzer implemented,
Christian Johansson <=
- [elpa] externals/phps-mode 1072b2d 14/62: Improved debugging of lex-analyzer, Christian Johansson, 2021/04/20
- [elpa] externals/phps-mode 9c63f7d 16/62: Passed first old tests for lexer, Christian Johansson, 2021/04/20
- [elpa] externals/phps-mode e624011 19/62: White-space fixes for lexer unit tests, Christian Johansson, 2021/04/20
- [elpa] externals/phps-mode d893f7f 22/62: Passing some more tests, Christian Johansson, 2021/04/20
- [elpa] externals/phps-mode c1ab121 25/62: Work on making integration-tests pass, Christian Johansson, 2021/04/20
- [elpa] externals/phps-mode ad4e825 20/62: Added some new lex analyzer tests for PHP 8.0 tokens, Christian Johansson, 2021/04/20
- [elpa] externals/phps-mode a418e41 26/62: Integration-test now comparing nest-location-stack as well, Christian Johansson, 2021/04/20
- [elpa] externals/phps-mode f1b513c 27/62: Passed another integration test, Christian Johansson, 2021/04/20
- [elpa] externals/phps-mode a900472 29/62: Passed bookkeeping tests, Christian Johansson, 2021/04/20
- [elpa] externals/phps-mode 685375c 31/62: Passing all unit tests after updated lex-analyzer, Christian Johansson, 2021/04/20