emacs-elpa-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[nongnu] externals/caml d045fae 132/197: correct fontification for strin


From: Stefan Monnier
Subject: [nongnu] externals/caml d045fae 132/197: correct fontification for strings and comments
Date: Sat, 21 Nov 2020 01:19:53 -0500 (EST)

branch: externals/caml
commit d045faec1c26e7ab29b4ae1ec0e4cf98fb4af729
Author: Damien Doligez <damien.doligez-inria.fr>
Commit: Damien Doligez <damien.doligez-inria.fr>

    correct fontification for strings and comments
    
    git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@11289 
f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
---
 caml-font.el | 341 ++++++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 304 insertions(+), 37 deletions(-)

diff --git a/caml-font.el b/caml-font.el
index 09d1ae7..425c076 100644
--- a/caml-font.el
+++ b/caml-font.el
@@ -1,30 +1,5 @@
-;(***********************************************************************)
-;(*                                                                     *)
-;(*                                OCaml                                *)
-;(*                                                                     *)
-;(*                Jacques Garrigue and Ian T Zimmerman                 *)
-;(*                                                                     *)
-;(*  Copyright 1997 Institut National de Recherche en Informatique et   *)
-;(*  en Automatique.  All rights reserved.  This file is distributed    *)
-;(*  under the terms of the GNU General Public License.                 *)
-;(*                                                                     *)
-;(***********************************************************************)
-
 ;; caml-font: font-lock support for OCaml files
-;;
-;; rewrite and clean-up.
-;; Changes:
-;; - fontify strings and comments using syntactic font lock
-;; - define a `font-lock-syntactic-face-function' to fontify ocamldoc comments
-;; - fontify infix operators like mod, land, lsl, etc.
-;; - fontify line number directives
-;; - fontify "failwith" and "invalid_arg" like "raise"
-;; - fontify '\x..' character constants
-;; - use the regexp-opt function to build regexps (more readable)
-;; - use backquote and comma in sexp (more readable)
-;; - drop the `caml-quote-char' variable (I don't use caml-light :))
-;; - stop doing weird things with faces
-
+;; now with perfect parsing of comments and strings
 
 (require 'font-lock)
 
@@ -48,9 +23,6 @@
 
 (defconst caml-font-lock-keywords
   `(
-;character literals
-    ("'\\(.\\|\\\\\\([ntbr\"'\\\\]\\|[0-9]\\{3\\}\\|x[0-9A-Fa-f]\\{2\\}\\)\\)'"
-     . font-lock-string-face)
 ;modules and constructors
    ("`?\\<[A-Z][A-Za-z0-9_']*\\>" . font-lock-function-name-face)
 ;definition
@@ -99,14 +71,298 @@
          ((looking-at "(\\*\\*[^*]")     'caml-font-doccomment-face)
          (t                              'font-lock-comment-face)))))))
 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; In order to correctly fontify an OCaml buffer, it is necessary to
+; lex the buffer to tell what is a comment and what is a string.
+; We do this incrementally in a hook
+; (font-lock-extend-after-change-region-function), which is called
+; whenever the buffer changes.  It sets the syntax-table property
+; on each beginning and end of chars, strings, and comments.
+
+; This mode handles correctly all the strange cases in the following
+; OCaml code.
+;
+; let l' _ = ();;
+; let _' _ = ();;
+; let l' = ();;
+; let b2_' = ();;
+; let a'a' = ();;
+; let f2 _ _ = ();;
+; let f3 _ _ _ = ();;
+; let f' _ _ _ _ _ = ();;
+; let hello = ();;
+;
+; (* ==== easy stuff ==== *)
+;
+; (* a comment *)
+; (* "a string" in a comment *)
+; (* "another string *)" in a comment *)
+; (* not a string '"' in a comment *)
+; "a string";;
+; '"';;              (* not a string *)
+;
+; (* ==== hard stuff ==== *)
+;
+; l'"' not not a string ";;
+; _'"' also not not a string";;
+; f2 0l'"';;            (* not not not a string *)
+; f2 0_'"';;            (* also not not not a string *)
+; f3 0.0l'"' not not not not a string ";;
+; f3 0.0_'"';;          (* not not not not not a string *)
+; f2 0b01_'"';;         (* not not not a string *)
+; f3 0b2_'"'  not not not not a string ";;
+; f3 0b02_'"';;         (* not not not not not a string *)
+; '\'';;   (* a char *)
+; '
+; ';;      (* a char *)
+; '^M
+; ';;      (* also a char [replace ^M with one CR character] *)
+; a'a';;   (* not a char *)
+; type '
+; a' t = X;;   (* also not a char *)
+;
+; (* ==== far-out stuff ==== *)
+;
+;    f'"'" "*) print_endline "hello";;(* \"" ;;
+; (* f'"'" "*) print_endline "hello";;(* \"" ;; *)
+
+
+(defconst caml-font-ident-re
+  
"[A-Za-z_\300-\326\330-\366\370-\377][A-Za-z_\300-\326\330-\366\370-\377'0-9]*"
+)
+
+(defconst caml-font-int-re
+  
"\\(0[xX][0-9A-Fa-f][0-9A-Fa-f_]*\\|0[oO][0-7][0-7_]*\\|0[bB][01][01_]*\\)[lLn]?"
+)
+
+; decimal integers are folded into the RE for floats to get longest-match
+; without using posix-looking-at
+(defconst caml-font-decimal-re
+  "[0-9][0-9_]*\\([lLn]\\|\\.[0-9_]*\\)?\\([eE][+-]?[0-9][0-9_]*\\)?"
+)
+
+; match any ident or numeral token
+(defconst caml-font-ident-or-num-re
+  (concat caml-font-ident-re "\\|" caml-font-int-re "\\|" caml-font-decimal-re)
+)
+
+; match any char token
+(defconst caml-font-char-re
+  "'\\(\015\012\\|[^\\']\\|\\(\\\\\\([\\'\"ntbr 
]\\|[0-9][0-9][0-9]\\|x[0-9A-Fa-f][0-9A-Fa-f]\\)\\)\\)'"
+)
+
+; match a quote followed by a newline
+(defconst caml-font-quote-newline-re
+  "'\\(\015\012\\|[\012\015]\\)"
+)
+
+; match any token or sequence of tokens that cannot contain a
+; quote, double quote, a start of comment, or a newline
+; note: this is only to go faster than one character at a time
+(defconst caml-font-other-re
+  "[^A-Za-z_0-9\012\015\300-\326\330-\366\370-\377'\"(]+"
+)
+
+; match any sequence of non-special characters in a comment
+; note: this is only to go faster than one character at a time
+(defconst caml-font-other-comment-re
+  "[^(*\"'\012\015]+"
+)
+
+; match any sequence of non-special characters in a string
+; note: this is only to go faster than one character at a time
+(defconst caml-font-other-string-re
+  "[^\\\"\012\015]"
+)
+
+; match a newline
+(defconst caml-font-newline-re
+  "\\(\015\012\\|[\012\015]\\)"
+)
+
+; Put the 'caml-font-state property with the given state on the
+; character before pos.  Return nil if it was already there, t if not.
+(defun caml-font-put-state (pos state)
+  (if (equal state (get-text-property (1- pos) 'caml-font-state))
+      nil
+    (put-text-property (1- pos) pos 'caml-font-state state)
+    t)
+)
+
+; Same as looking-at, but erase properties 'caml-font-state and
+; 'syntax-table from the matched range
+(defun caml-font-looking-at (re)
+  (let ((result (looking-at re)))
+    (when result
+      (remove-text-properties (match-beginning 0) (match-end 0)
+                              '(syntax-table nil caml-font-state nil)))
+    result)
+)
+
+; Annotate the buffer starting at point in state (st . depth)
+; Set the 'syntax-table property on beginnings and ends of:
+; - strings
+; - chars
+; - comments
+; Also set the 'caml-font-state property on each LF character that is
+; not preceded by a single quote. The property gives the state of the
+; lexer (nil or t) after reading that character.
+
+; Leave the point at a point where the pre-existing 'caml-font-state
+; property is consistent with the new parse, or at the end of the buffer.
+
+; depth is the depth of nested comments at this point
+;   it must be a non-negative integer
+; st can be:
+;   nil  -- we are in the base state
+;   t    -- we are within a string
+
+(defun caml-font-annotate (st depth)
+  (let ((continue t))
+    (while (and continue (not (eobp)))
+      (cond
+       ((and (equal st nil) (= depth 0)) ; base state, outside comment
+        (cond
+         ((caml-font-looking-at caml-font-ident-or-num-re)
+          (goto-char (match-end 0)))
+         ((caml-font-looking-at caml-font-char-re)
+          (put-text-property (point) (1+ (point))
+                             'syntax-table (string-to-syntax "|"))
+          (put-text-property (1- (match-end 0)) (match-end 0)
+                             'syntax-table (string-to-syntax "|"))
+          (goto-char (match-end 0)))
+         ((caml-font-looking-at caml-font-quote-newline-re)
+          (goto-char (match-end 0)))
+         ((caml-font-looking-at "\"")
+          (put-text-property (point) (1+ (point))
+                             'syntax-table (string-to-syntax "|"))
+          (goto-char (match-end 0))
+          (setq st t))
+         ((caml-font-looking-at "(\\*")
+          (put-text-property (point) (1+ (point))
+                             'syntax-table (string-to-syntax "!"))
+          (goto-char (match-end 0))
+          (setq depth 1))
+         ((looking-at caml-font-newline-re)
+          (goto-char (match-end 0))
+          (setq continue (caml-font-put-state (match-end 0) '(nil . 0))))
+         ((caml-font-looking-at caml-font-other-re)
+          (goto-char (match-end 0)))
+         (t
+          (remove-text-properties (point) (1+ (point))
+                                  '(syntax-table nil caml-font-state nil))
+          (goto-char (1+ (point))))))
+       ((equal st nil)                 ; base state inside comment
+        (cond
+         ((caml-font-looking-at "(\\*")
+          (goto-char (match-end 0))
+          (setq depth (1+ depth)))
+         ((caml-font-looking-at "\\*)")
+          (goto-char (match-end 0))
+          (setq depth (1- depth))
+          (when (= depth 0)
+            (put-text-property (1- (point)) (point)
+                               'syntax-table (string-to-syntax "!"))))
+         ((caml-font-looking-at "\"")
+          (goto-char (match-end 0))
+          (setq st t))
+         ((caml-font-looking-at caml-font-char-re)
+          (goto-char (match-end 0)))
+         ((caml-font-looking-at caml-font-quote-newline-re)
+          (goto-char (match-end 0)))
+         ((caml-font-looking-at "''")
+          (goto-char (match-end 0)))
+         ((looking-at caml-font-newline-re)
+          (goto-char (match-end 0))
+          (setq continue (caml-font-put-state (match-end 0) (cons nil depth))))
+         ((caml-font-looking-at caml-font-other-comment-re)
+          (goto-char (match-end 0)))
+         (t
+          (remove-text-properties (point) (1+ (point))
+                                  '(syntax-table nil caml-font-state nil))
+          (goto-char (1+ (point))))))
+       (t                     ; string state inside or outside a comment
+        (cond
+         ((caml-font-looking-at "\"")
+          (when (= depth 0)
+            (put-text-property (point) (1+ (point))
+                               'syntax-table (string-to-syntax "|")))
+          (goto-char (1+ (point)))
+          (setq st nil))
+         ((caml-font-looking-at "\\\\[\"\\]")
+          (goto-char (match-end 0)))
+         ((looking-at caml-font-newline-re)
+          (goto-char (match-end 0))
+          (setq continue (caml-font-put-state (match-end 0) (cons t depth))))
+         ((caml-font-looking-at caml-font-other-string-re)
+          (goto-char (match-end 0)))
+         (t
+          (remove-text-properties (point) (1+ (point))
+                                  '(syntax-table nil caml-font-state nil))
+          (goto-char (1+ (point)))))))))
+)
+
+; This is the hook function for font-lock-extend-after-change-function
+; It finds the nearest saved state at the left of the changed text,
+; calls caml-font-annotate to set the 'caml-font-state and 'syntax-table
+; properties, then returns the range that was parsed by caml-font-annotate.
+(defun caml-font-extend-after-change (beg end &optional old-len)
+  (save-excursion
+    (save-match-data
+      (let ((caml-font-modified (buffer-modified-p))
+            start-at
+            end-at
+            state)
+        (remove-text-properties beg end '(syntax-table nil caml-font-state 
nil))
+        (setq start-at
+              (or (and (> beg (point-min))
+                       (get-text-property (1- beg) 'caml-font-state)
+                       beg)
+                  (previous-single-property-change beg 'caml-font-state)
+                  (point-min)))
+        (setq state (or (and (> start-at (point-min))
+                             (get-text-property (1- start-at) 
'caml-font-state))
+                        (cons nil 0)))
+        (goto-char start-at)
+        (caml-font-annotate (car state) (cdr state))
+        (setq end-at (point))
+        (restore-buffer-modified-p caml-font-modified)
+        (cons start-at end-at))))
+)
+
+; We don't use the normal caml-mode syntax table because it contains an
+; approximation of strings and comments that interferes with our
+; annotations.
+(defconst caml-font-syntax-table
+  (let ((tbl (make-syntax-table)))
+    (modify-syntax-entry ?' "w" tbl)
+    (modify-syntax-entry ?_ "w" tbl)
+    (modify-syntax-entry ?\" "." tbl)
+    (modify-syntax-entry '(?\300 . ?\326) "w" tbl)
+    (modify-syntax-entry '(?\330 . ?\366) "w" tbl)
+    (modify-syntax-entry '(?\370 . ?\377) "w" tbl)
+    tbl))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 ;; font-lock commands are similar for caml-mode and inferior-caml-mode
 (defun caml-font-set-font-lock ()
+  (setq parse-sexp-lookup-properties t)
   (setq font-lock-defaults
-        '(caml-font-lock-keywords
-          nil nil nil nil
-          (font-lock-syntactic-face-function . caml-font-syntactic-face)))
-  (font-lock-mode 1))
+        (list
+         'caml-font-lock-keywords  ; keywords
+         nil  ; keywords-only
+         nil  ; case-fold
+         nil  ; syntax-alist
+         nil  ; syntax-begin
+         (cons 'font-lock-syntax-table caml-font-syntax-table)
+         '(font-lock-extend-after-change-region-function
+           . caml-font-extend-after-change)
+         '(font-lock-syntactic-face-function . caml-font-syntactic-face)
+         ))
+  (caml-font-extend-after-change (point-min) (point-max) 0)
+  (font-lock-mode 1)
+)
 (add-hook 'caml-mode-hook 'caml-font-set-font-lock)
 
 
@@ -116,11 +372,22 @@
     ,@caml-font-lock-keywords))
 
 (defun inferior-caml-set-font-lock ()
+  (setq parse-sexp-lookup-properties t)
   (setq font-lock-defaults
-        '(inferior-caml-font-lock-keywords
-          nil nil nil nil
-          (font-lock-syntactic-face-function . caml-font-syntactic-face)))
-  (font-lock-mode 1))
+        (list
+         'inferior-caml-font-lock-keywords  ; keywords
+         nil  ; keywords-only
+         nil  ; case-fold
+         nil  ; syntax-alist
+         nil  ; syntax-begin
+         (cons 'font-lock-syntax-table caml-font-syntax-table)
+         '(font-lock-extend-after-change-region-function
+           . caml-font-extend-after-change)
+         '(font-lock-syntactic-face-function . caml-font-syntactic-face)
+         ))
+  (caml-font-extend-after-change (point-min) (point-max) 0)
+  (font-lock-mode 1)
+)
 (add-hook 'inferior-caml-mode-hooks 'inferior-caml-set-font-lock)
 
 (provide 'caml-font)



reply via email to

[Prev in Thread] Current Thread [Next in Thread]