emacs-elpa-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[elpa] externals/org 801be9d 2/4: org-element-cache-map: Reduce regexp s


From: ELPA Syncer
Subject: [elpa] externals/org 801be9d 2/4: org-element-cache-map: Reduce regexp search overheads
Date: Wed, 27 Oct 2021 02:57:34 -0400 (EDT)

branch: externals/org
commit 801be9dcd0ae7d74deb140c2fdfd8e45c295d8d6
Author: Ihor Radchenko <yantar92@gmail.com>
Commit: Ihor Radchenko <yantar92@gmail.com>

    org-element-cache-map: Reduce regexp search overheads
    
    * lisp/org-element.el (org-element--cache-gapless): New variable
    tracking when cache does not contain gaps.
    (org-element-cache-reset): Initialise `org-element--cache-gapless'.
    (org-element-cache-map): Fill the cache gaps before running FUNC
    query.  When multiple calls to `org-element-cache-map' are done on
    unchanged buffer, pre-processing only requires a single regexp search
    pass across i.e. headlines.  Subsequent `org-element-cache-map' calls
    can then be reduced to a simple cache tree walk.
---
 lisp/org-element.el | 282 +++++++++++++++++++++++++++++++---------------------
 1 file changed, 170 insertions(+), 112 deletions(-)

diff --git a/lisp/org-element.el b/lisp/org-element.el
index 75e8409..ea888d8 100644
--- a/lisp/org-element.el
+++ b/lisp/org-element.el
@@ -6893,6 +6893,7 @@ buffers."
                                 (current-buffer)
                                 :inherit 'org-element--cache))
         (setq-local org-element--cache-change-tic (buffer-chars-modified-tick))
+        (setq-local org-element--cache-gapless nil)
        (setq-local org-element--cache
                    (avl-tree-create #'org-element--cache-compare))
         (setq-local org-element--headline-cache
@@ -6917,6 +6918,11 @@ buffers."
     (org-element--cache-set-timer (current-buffer))))
 
 (defvar warning-minimum-log-level) ; Defined in warning.el
+(defvar-local org-element--cache-gapless nil
+  "An alist containing (granularity . `org-element--cache-change-tic') 
elements.
+Each element indicates the latest `org-element--cache-change-tic' when
+change did not contain gaps.")
+(defvar org-element-cache-map--recurse nil)
 ;;;###autoload
 (cl-defun org-element-cache-map (func &key (granularity 'headline+inlinetask) 
restrict-elements
                            next-re fail-re from-pos (to-pos 
(point-max-marker)) after-element limit-count
@@ -7033,41 +7039,48 @@ of FUNC.  Changes to elements made in FUNC will also 
alter the cache."
                       ;; can be found.  When RE is nil, just find element at
                       ;; point.
                       (move-start-to-next-match
-                       (re) `(save-match-data
-                               (if (or (not ,re) (re-search-forward (or 
(car-safe ,re) ,re) nil 'move))
-                                   (unless (< (point) (or start -1))
-                                     (if (cdr-safe ,re)
-                                         ;; Avoid parsing when we are 100%
-                                         ;; sure that regexp is good enough
-                                         ;; to find new START.
-                                         (setq start (match-beginning 0))
-                                       (setq start (max (or start -1)
-                                                        (org-element-property 
:begin (element-match-at-point)))))
-                                     (when (>= start to-pos) 
(cache-walk-abort)))
-                                 (cache-walk-abort))))
+                        (re) `(save-match-data
+                                (if (or (not ,re) (re-search-forward (or 
(car-safe ,re) ,re) nil 'move))
+                                    (unless (or (< (point) (or start -1))
+                                                (and data
+                                                     (< (point) 
(org-element-property :begin data))))
+                                      (if (cdr-safe ,re)
+                                          ;; Avoid parsing when we are 100%
+                                          ;; sure that regexp is good enough
+                                          ;; to find new START.
+                                          (setq start (match-beginning 0))
+                                        (setq start (max (or start -1)
+                                                         (or 
(org-element-property :begin data) -1)
+                                                         (org-element-property 
:begin (element-match-at-point)))))
+                                      (when (>= start to-pos) 
(cache-walk-abort)))
+                                  (cache-walk-abort))))
                       ;; Find expected begin position of an element after
                       ;; DATA.
                       (next-element-start
-                       (data) `(let (next-start)
-                                 (if (memq granularity '(headline 
headline+inlinetask))
-                                     (setq next-start (or (when (memq 
(org-element-type data) '(headline org-data))
-                                                            
(org-element-property :contents-begin data))
-                                                          
(org-element-property :end data)))
-                                  (setq next-start (or (when (memq 
(org-element-type data) org-element-greater-elements)
-                                                          
(org-element-property :contents-begin data))
-                                                        (org-element-property 
:end data))))
-                                 ;; DATA end may be the last element inside
-                                 ;; i.e. source block.  Skip up to the end
-                                 ;; of parent in such case.
-                                 (let ((parent data))
-                                  (catch :exit
-                                     (when (eq next-start 
(org-element-property :contents-end parent))
-                                      (setq start (org-element-property :end 
parent)))
-                                    (while (setq parent (org-element-property 
:parent parent))
-                                      (if (eq next-start (org-element-property 
:contents-end parent))
-                                          (setq next-start 
(org-element-property :end parent))
-                                         (throw :exit t)))))
-                                 next-start)))
+                        (data) `(let (next-start)
+                                  (if (memq granularity '(headline 
headline+inlinetask))
+                                      (setq next-start (or (when (memq 
(org-element-type data) '(headline org-data))
+                                                             
(org-element-property :contents-begin data))
+                                                           
(org-element-property :end data)))
+                                   (setq next-start (or (when (memq 
(org-element-type data) org-element-greater-elements)
+                                                           
(org-element-property :contents-begin data))
+                                                         (org-element-property 
:end data))))
+                                  ;; DATA end may be the last element inside
+                                  ;; i.e. source block.  Skip up to the end
+                                  ;; of parent in such case.
+                                  (let ((parent data))
+                                   (catch :exit
+                                      (when (eq next-start 
(org-element-property :contents-end parent))
+                                       (setq next-start (org-element-property 
:end parent)))
+                                     (while (setq parent (org-element-property 
:parent parent))
+                                       (if (eq next-start 
(org-element-property :contents-end parent))
+                                           (setq next-start 
(org-element-property :end parent))
+                                          (throw :exit t)))))
+                                  next-start))
+                      ;; Check if cache does not have gaps.
+                      (cache-gapless-p
+                        () `(eq org-element--cache-change-tic
+                                (alist-get granularity 
org-element--cache-gapless))))
           ;; The core algorithm is simple walk along binary tree.  However,
           ;; instead of checking all the tree elements from first to last
           ;; (like in `avl-tree-mapcar'), we begin from FROM-POS skipping
@@ -7095,9 +7108,13 @@ of FUNC.  Changes to elements made in FUNC will also 
alter the cache."
                  ;; sure that we do not try to search it again.
                  (prev after-element)
                  (node (cache-root))
+                 data
                  (stack (list nil))
                  (leftp t)
                  result
+                 ;; Whether previous element matched FUNC (FUNC
+                 ;; returned non-nil).
+                 (last-match t)
                  continue-flag
                  ;; Byte-compile FUNC making sure that it is as performant
                  ;; as it could be.
@@ -7174,58 +7191,89 @@ of FUNC.  Changes to elements made in FUNC will also 
alter the cache."
                  (predicate-time 0)
                  (count-predicate-calls-match 0)
                  (count-predicate-calls-fail 0))
-            ;; Skip over to the first potential match.
-            (when next-re
-              (goto-char (or start (point-min)))
-              (move-start-to-next-match next-re))
-            (when next-element-re
-              (goto-char (or start (point-min)))
-              (move-start-to-next-match next-element-re))
+            ;; Skip to first element within region.
+            (goto-char (or start (point-min)))
+            (move-start-to-next-match next-element-re)
             (unless (and start (>= start to-pos))
+              ;; Pre-process cache filling all the gaps.
+              (unless (or org-element-cache-map--recurse
+                          (cache-gapless-p)
+                          ;; Pre-processing all the elements in large
+                          ;; buffers when NEXT-RE/FAIL-RE are provided
+                          ;; may be much slower compared to using
+                          ;; regexp.
+                          (and (eq granularity 'element)
+                               (or next-re fail-re)))
+                (let ((org-element-cache-map--recurse t))
+                  (org-element-cache-map
+                   #'ignore
+                   :granularity granularity)
+                  ;; Re-assign the cache root after filling the cache
+                  ;; gaps.
+                  (setq node (cache-root)))
+                (setf (alist-get granularity org-element--cache-gapless)
+                      org-element--cache-change-tic))
               (while node
-                (let ((data (avl-tree--node-data node)))
-                  (if (and leftp (avl-tree--node-left node) ; Left branch.
-                           ;; Do not move to left branch when we are before
-                           ;; PREV.
-                          (or (not prev)
-                              (not (org-element--cache-key-less-p
-                                    (org-element--cache-key data)
-                                    (org-element--cache-key prev))))
-                           ;; ... or when we are before START.
-                           (or (not start)
-                               (not (> start (org-element-property :begin 
data)))))
-                     (progn (push node stack)
-                            (setq node (avl-tree--node-left node)))
-                    ;; The whole tree left to DATA is before START and
-                    ;; PREV.  DATA may still be before START (i.e. when
-                    ;; DATA is the root or when START moved), at START, or
-                    ;; after START.
-                    ;;
-                    ;; If DATA is before start, skip it over and move to
-                    ;; subsequent elements.
-                    ;; If DATA is at start, run FUNC if necessary and
-                    ;; update START according and NEXT-RE, FAIL-RE,
-                    ;; NEXT-ELEMENT-RE.
-                    ;; If DATA is after start, we have found a cache gap
-                    ;; and need to fill it.
-                    (unless (or (and start (< (org-element-property :begin 
data) start))
-                               (and prev (not (org-element--cache-key-less-p
-                                               (org-element--cache-key prev)
-                                               (org-element--cache-key 
data)))))
-                      ;; DATA is at of after START and PREV.
-                     (if (or (not start) (= (org-element-property :begin data) 
start))
-                          ;; DATA is at START.  Match it.
-                          ;; In the process, we may alter the buffer,
-                          ;; so also keep track of the cache state.
-                          (let ((modified-tic org-element--cache-change-tic)
-                                (cache-size (cache-size)))
+                (setq data (avl-tree--node-data node))
+                (if (and leftp (avl-tree--node-left node) ; Left branch.
+                         ;; Do not move to left branch when we are before
+                         ;; PREV.
+                        (or (not prev)
+                            (not (org-element--cache-key-less-p
+                                (org-element--cache-key data)
+                                (org-element--cache-key prev))))
+                         ;; ... or when we are before START.
+                         (or (not start)
+                             (not (> start (org-element-property :begin 
data)))))
+                   (progn (push node stack)
+                          (setq node (avl-tree--node-left node)))
+                  ;; The whole tree left to DATA is before START and
+                  ;; PREV.  DATA may still be before START (i.e. when
+                  ;; DATA is the root or when START moved), at START, or
+                  ;; after START.
+                  ;;
+                  ;; If DATA is before start, skip it over and move to
+                  ;; subsequent elements.
+                  ;; If DATA is at start, run FUNC if necessary and
+                  ;; update START according and NEXT-RE, FAIL-RE,
+                  ;; NEXT-ELEMENT-RE.
+                  ;; If DATA is after start, we have found a cache gap
+                  ;; and need to fill it.
+                  (unless (or (and start (< (org-element-property :begin data) 
start))
+                             (and prev (not (org-element--cache-key-less-p
+                                           (org-element--cache-key prev)
+                                           (org-element--cache-key data)))))
+                    ;; DATA is at of after START and PREV.
+                   (if (or (not start) (= (org-element-property :begin data) 
start))
+                        ;; DATA is at START.  Match it.
+                        ;; In the process, we may alter the buffer,
+                        ;; so also keep track of the cache state.
+                        (let ((modified-tic org-element--cache-change-tic)
+                              (cache-size (cache-size)))
+                          ;; When NEXT-RE/FAIL-RE is provided, skip to
+                          ;; next regexp match after :begin of the current
+                          ;; element.
+                          (when (if last-match next-re fail-re)
+                            (goto-char (org-element-property :begin data))
+                            (move-start-to-next-match
+                             (if last-match next-re fail-re)))
+                          (when (and (or (not start) (eq (org-element-property 
:begin data) start))
+                                     (< (org-element-property :begin data) 
to-pos)) 
                             ;; Calculate where next possible element
                             ;; starts and update START if needed.
                            (setq start (next-element-start data))
+                            (goto-char start)
                             ;; Move START further if possible.
-                            (when next-element-re
-                              (goto-char start)
-                              (move-start-to-next-match next-element-re))
+                            (when (and next-element-re
+                                       ;; Do not move if we know for
+                                       ;; sure that cache does not
+                                       ;; contain gaps.  Regexp
+                                       ;; searches are not cheap.
+                                       (not (cache-gapless-p)))
+                              (move-start-to-next-match next-element-re)
+                              ;; Make sure that point is at START
+                              ;; before running FUNC.
+                              (goto-char start))
                             ;; Try FUNC if DATA matches all the
                             ;; restrictions.  Calculate new START.
                             (when (or (not restrict-elements)
@@ -7244,13 +7292,23 @@ of FUNC.  Changes to elements made in FUNC will also 
alter the cache."
                                       (cl-incf count-predicate-calls-fail)))
                                 (push (funcall func data) result)
                                 (when (car result) (cl-incf 
count-predicate-calls-match)))
-                              ;; Use NEXT-RE/FAIL-RE to skip
-                              ;; forward to next match.
-                              (goto-char (max start (point) ))
-                              (move-start-to-next-match
-                               (if (car result) next-re fail-re))
+                              ;; Set `last-match'.
+                              (setq last-match (car result))
+                              ;; If FUNC moved point forward, update
+                              ;; START.
+                              (when (> (point) start)
+                                (move-start-to-next-match nil))
                               ;; Drop nil.
                               (unless (car result) (pop result)))
+                            ;; If FUNC did not move the point and we
+                            ;; know for sure that cache does not contain
+                            ;; gaps, do not try to calculate START in
+                            ;; advance but simply loop to the next cache
+                            ;; element.
+                            (when (and (cache-gapless-p)
+                                       (eq (next-element-start data)
+                                           start))
+                              (setq start nil))
                             ;; Check if the buffer has been modified.
                             (unless (and (eq modified-tic 
org-element--cache-change-tic)
                                          (eq cache-size (cache-size)))
@@ -7281,35 +7339,35 @@ of FUNC.  Changes to elements made in FUNC will also 
alter the cache."
                               (cache-walk-abort))
                             (if (org-element-property :cached data)
                                (setq prev data)
-                              (setq prev nil)))
-                        ;; DATA is after START.  Fill the gap.
-                        (if (memq (org-element-type (org-element--parse-to 
start)) '(plain-list table))
-                            ;; Tables and lists are special, we need a
-                            ;; trickery to make items/rows be populated
-                            ;; into cache.
-                            (org-element--parse-to (1+ start)))
-                        ;; Restart tree traversal as AVL tree is
-                        ;; re-balanced upon adding elements.  We can no
-                        ;; longer trust STACK.
-                        (cache-walk-restart)))
-                    ;; Second, move to the right branch of the tree or skip
-                    ;; it alltogether.
-                    (if continue-flag
-                       (setq continue-flag nil)
-                     (setq node (if (and (car stack)
-                                          ;; If START advanced beyond stack 
parent, skip the right branch.
-                                          (or (and start (< 
(org-element-property :begin (avl-tree--node-data (car stack))) start))
-                                             (and prev 
(org-element--cache-key-less-p
-                                                        
(org-element--cache-key (avl-tree--node-data (car stack)))
-                                                         
(org-element--cache-key prev)))))
-                                     (progn
-                                       (setq leftp nil)
-                                       (pop stack))
-                                   ;; Otherwise, move ahead into the right
-                                   ;; branch when it exists.
-                                   (if (setq leftp (avl-tree--node-right node))
-                                      (avl-tree--node-right node)
-                                    (pop stack)))))))))
+                              (setq prev nil))))
+                      ;; DATA is after START.  Fill the gap.
+                      (if (memq (org-element-type (org-element--parse-to 
start)) '(plain-list table))
+                          ;; Tables and lists are special, we need a
+                          ;; trickery to make items/rows be populated
+                          ;; into cache.
+                          (org-element--parse-to (1+ start)))
+                      ;; Restart tree traversal as AVL tree is
+                      ;; re-balanced upon adding elements.  We can no
+                      ;; longer trust STACK.
+                      (cache-walk-restart)))
+                  ;; Second, move to the right branch of the tree or skip
+                  ;; it alltogether.
+                  (if continue-flag
+                     (setq continue-flag nil)
+                   (setq node (if (and (car stack)
+                                        ;; If START advanced beyond stack 
parent, skip the right branch.
+                                        (or (and start (< 
(org-element-property :begin (avl-tree--node-data (car stack))) start))
+                                           (and prev 
(org-element--cache-key-less-p
+                                                      (org-element--cache-key 
(avl-tree--node-data (car stack)))
+                                                       (org-element--cache-key 
prev)))))
+                                   (progn
+                                     (setq leftp nil)
+                                     (pop stack))
+                                 ;; Otherwise, move ahead into the right
+                                 ;; branch when it exists.
+                                 (if (setq leftp (avl-tree--node-right node))
+                                    (avl-tree--node-right node)
+                                  (pop stack))))))))
             (when (and org-element--cache-map-statistics
                        (or (not org-element--cache-map-statistics-threshold)
                            (> (- (float-time) time) 
org-element--cache-map-statistics-threshold)))



reply via email to

[Prev in Thread] Current Thread [Next in Thread]