emacs-elpa-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[elpa] externals/ebdb bb01b5c 13/15: Mostly re-write ebdb-snarf-collect


From: Eric Abrahamsen
Subject: [elpa] externals/ebdb bb01b5c 13/15: Mostly re-write ebdb-snarf-collect
Date: Thu, 22 Mar 2018 00:58:57 -0400 (EDT)

branch: externals/ebdb
commit bb01b5c7ecd29b70797f76f83fbb5797818e26c6
Author: Eric Abrahamsen <address@hidden>
Commit: Eric Abrahamsen <address@hidden>

    Mostly re-write ebdb-snarf-collect
    
    * ebdb-snarf.el (ebdb-snarf-collect): Instead of searching for one
      likely chunk of field data, and then progressively searching on
      after that (which could skip field data depending on the order of
      ebdb-snarf-routines), now find all contiguous field data in a single
      search, then loop each search routine regexp over that whole block
      of text. Seems to work a lot better, though there will be a million
      edge cases to adjust for.
    
    Addresses #58
---
 ebdb-snarf.el | 131 +++++++++++++++++++++++++++-------------------------------
 1 file changed, 62 insertions(+), 69 deletions(-)

diff --git a/ebdb-snarf.el b/ebdb-snarf.el
index 505fc10..6f85a0e 100644
--- a/ebdb-snarf.el
+++ b/ebdb-snarf.el
@@ -149,17 +149,15 @@ list of other field instances.  Any element can be nil."
 
        ;; (Also see the snarfing tests in ebdb-test.el.)
 
-       ;; The tactic is: Make a big regexp that finds any probable
-       ;; field data.  Once there's a hit, search *backwards* for a
-       ;; name, and *forwards* for more fields.  All contiguous field
-       ;; data is grouped into the same bundle.  If the first field
-       ;; found is at bol, assume "block" style data, as in the third
-       ;; example above.  If it is not at bol, assume "inline" style
-       ;; data, as in the second example.
+       ;; The tactic is: Make a big regexp that finds a big blob of
+       ;; probable field data.  Once there's a hit, search
+       ;; *backwards* for a name, and *forwards* for more fields.
+       ;; All contiguous field data is grouped into the same bundle.
 
        ;; Snarfing mail message data is very common, it would be nice
        ;; to somehow disregard left-hand quotation characters and
-       ;; indendation.  A problem for another day.
+       ;; indentation.  See `mail-citation-prefix-regexp'.  A problem
+       ;; for another day.
        (big-re
         (concat
          "\\(?:"
@@ -169,73 +167,68 @@ list of other field instances.  Any element can be nil."
                 (cadr r)
               (mapconcat #'identity (cadr r) "\\|")))
           ebdb-snarf-routines
-          "\\|")
-         "\\)"))
-       bundle block name)
+          "\\|*")
+         "\\)+"))
+       (name-re (concat
+                 "\\("
+                 (mapconcat #'identity
+                            ebdb-snarf-name-re "\\|")
+                 "\\)[-\n ,:]*")))
 
     (with-temp-buffer
       (insert str)
       (goto-char (point-min))
       (while (re-search-forward big-re nil t)
-       (goto-char (match-beginning 0))
-       (setq block (= (point) (point-at-bol)))
-       (when (setq name
-                   (save-excursion
-                     (when (re-search-backward
-                            (concat
-                             "\\("
-                             (mapconcat #'identity
-                                        ebdb-snarf-name-re "\\|")
-                             "\\)")
-                            (save-excursion
-                              (if block
-                                  (progn (forward-line -1)
-                                         (line-beginning-position))
-                                (point-at-bol)))
-                            t)
-                       ;; If something goes wrong with the
-                       ;; name, don't worry about it.
-                       (ignore-errors
-                         (ebdb-parse
-                          'ebdb-field-name
-                          (string-trim (match-string-no-properties 0)))))))
-         ;; If NAME matches one of the records that are already in
-         ;; BUNDLES, then assume we should be working with that record.
-         (dolist (b bundles)
-           (when (and (aref b 0)
-                      (string-match-p (ebdb-string name)
-                                      (ebdb-string (aref b 0))))
-             (setq bundle b))))
-
-       (unless bundle
-         (setq bundle (make-vector 3 nil))
-         (when name
-           (push name (aref bundle 1))))
-
-       (dolist (class ebdb-snarf-routines)
-         (dolist (re (cdr class))
-           (while (re-search-forward re (if block
-                                            (save-excursion
-                                              (forward-line)
-                                              (line-end-position))
-                                          (point-at-eol))
-                                     t)
-             (condition-case nil
-                 (push (ebdb-parse
-                        (car class)
-                        (match-string-no-properties 1))
-                       (aref bundle 2))
-
-               ;; If a regular expression matches but the result is
-               ;; unparseable, that means the regexp is bad and should be
-               ;; changed.  Later, report these errors if `ebdb-debug' is
-               ;; true.
-               (ebdb-unparseable nil)))))
-       (when bundle
+       (let* ((start (goto-char (match-beginning 0)))
+              (bound (match-end 0))
+              (name (save-excursion
+                      (when (re-search-backward
+                             name-re
+                             (if (bolp)
+                                 (line-beginning-position 0)
+                               (point-at-bol))
+                             t)
+                        ;; If something goes wrong with the
+                        ;; name, don't worry about it.
+                        (ignore-errors
+                          (ebdb-parse
+                           'ebdb-field-name
+                           (string-trim (match-string-no-properties 0)))))))
+              (bundle (or (and
+                           name
+                           ;; If NAME matches one of the records that
+                           ;; are already in BUNDLES, then assume we
+                           ;; should be working with that record.
+                           (catch 'match
+                             (dolist (b bundles)
+                               (when (and (aref b 0)
+                                          (ebdb-record-search
+                                           (aref b 0)
+                                           'ebdb-field-name
+                                           (ebdb-string name)))
+                                 (throw 'match b)))))
+                          (make-vector 3 nil))))
+
+         (when (and name (null (aref bundle 0)))
+           (push name (aref bundle 1)))
+
+         (dolist (class ebdb-snarf-routines)
+           (dolist (re (cdr class))
+             (save-excursion
+               (while (re-search-forward re bound t)
+                 (condition-case nil
+                     (push (ebdb-parse
+                            (car class)
+                            (match-string-no-properties 1))
+                           (aref bundle 2))
+
+                   ;; If a regular expression matches but the result is
+                   ;; unparseable, that means the regexp is bad and should be
+                   ;; changed.  Later, report these errors if `ebdb-debug' is
+                   ;; true.
+                   (ebdb-unparseable nil))))))
          (push bundle bundles)
-         (setq bundle nil))
-       (when block
-         (beginning-of-line 2))))
+         (goto-char bound))))
     bundles))
 
 (defun ebdb-snarf-collapse (input)



reply via email to

[Prev in Thread] Current Thread [Next in Thread]