emacs-elpa-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[elpa] externals/ebdb da1c6f6 01/12: Improve name parsing


From: Eric Abrahamsen
Subject: [elpa] externals/ebdb da1c6f6 01/12: Improve name parsing
Date: Tue, 12 Jun 2018 22:06:08 -0400 (EDT)

branch: externals/ebdb
commit da1c6f677804483d155b08a0ac63e4825f49e7b4
Author: Eric Abrahamsen <address@hidden>
Commit: Eric Abrahamsen <address@hidden>

    Improve name parsing
    
    * ebdb.el (ebdb-lastname-re): Better regexp for matching surnames with
      hyphens and apostrophes.
      (ebdb-divide-name): Check for all-caps UN-style surname. Also, now
      returns given names as a list.
      (ebdb-parse): Complex name method assumes given-names are
      already a list.
    * ebdb-test.el (ebdb-parse-name): Add more test cases.
---
 ebdb-test.el | 32 +++++++++++++++++++++++++++++++-
 ebdb.el      | 56 +++++++++++++++++++++++++++++++++++---------------------
 2 files changed, 66 insertions(+), 22 deletions(-)

diff --git a/ebdb-test.el b/ebdb-test.el
index a5c4c86..2e856ce 100644
--- a/ebdb-test.el
+++ b/ebdb-test.el
@@ -347,7 +347,37 @@ If it doesn't exist, raise `ebdb-related-unfound'."
           (slot-value
            (ebdb-parse 'ebdb-field-name-complex "Eric Abrahamsen, III")
            'suffix)
-          "III")))
+          "III"))
+  (should (equal
+          (slot-value
+           (ebdb-parse 'ebdb-field-name-complex "Albus Percival Wulfric Brian 
Dumbledore")
+           'given-names)
+          '("Albus" "Percival" "Wulfric" "Brian")))
+  (should (equal
+          (slot-value
+           (ebdb-parse 'ebdb-field-name-complex "MURAKAMI Haruki")
+           'surname)
+          "Murakami"))
+  (should (equal
+          (slot-value
+           (ebdb-parse 'ebdb-field-name-complex "Fintan O'Toole")
+           'surname)
+          "O'Toole"))
+  (should (equal
+          (slot-value
+           (ebdb-parse 'ebdb-field-name-complex "O'Toole, Fintan")
+           'surname)
+          "O'Toole"))
+  (should (equal
+          (slot-value
+           (ebdb-parse 'ebdb-field-name-complex "O'TOOLE Fintan")
+           'surname)
+          "O'Toole"))
+  (should (equal
+          (slot-value
+           (ebdb-parse 'ebdb-field-name-complex "Daniel Micahel Blake 
Day-Lewis")
+           'surname)
+          "Day-Lewis")))
 
 ;; Snarf testing.
 
diff --git a/ebdb.el b/ebdb.el
index 5a95a51..3cacfbb 100644
--- a/ebdb.el
+++ b/ebdb.el
@@ -578,8 +578,8 @@ Case is ignored."
 (defcustom ebdb-lastname-re
   (concat "[- \t]*\\(\\(?:\\<"
           (regexp-opt ebdb-lastname-prefixes)
-          ;; multiple last names concatenated by `-'
-          "\\>[- \t]+\\)?\\(?:\\w+[ \t]*-[ \t]*\\)*\\w+\\)\\'")
+          ;; Last names can contain hyphens and apostrophes.
+          "\\>[- \t]+\\)?\\w[[:word:]'-]+\\)\\>")
   "Regexp matching the last name of a full name.
 Its first parenthetical subexpression becomes the last name."
   :group 'ebdb-record-edit
@@ -1296,8 +1296,7 @@ first one."
               (ebdb-divide-name str)))
     (unless (plist-get slots :given-names)
       (setq slots (plist-put slots :given-names
-                            (when given-names
-                              (split-string given-names nil t)))))
+                            given-names)))
     (unless (plist-get slots :surname)
       (setq slots (plist-put slots :surname
                             (or surname ""))))
@@ -4866,27 +4865,42 @@ also be one of the special symbols below.
 
 (defun ebdb-divide-name (string)
   "Divide STRING into its component parts.
-Case is ignored.  Return name as a list of (LAST FIRST SUFFIX).
-LAST is always a string (possibly empty).  FIRST and SUFFIX may
-be nil."
+Return name as a list of (SURNAME GIVEN-NAMES SUFFIX).  SURNAME
+is always a string (possibly empty).  GIVEN-NAMES, if present, is
+a list of first names.  GIVEN-NAMES and SUFFIX may be nil.
+
+During parsing `case-fold-search' is non-nil, with the exception
+that a string of all-upper-case letters will be assumed (a la UN
+usage) to represent the surname."
   (let ((case-fold-search t)
-        first suffix)
+       given suffix)
     ;; Separate a suffix.
-    (if (string-match ebdb-lastname-suffix-re string)
-        (setq suffix (match-string 1 string)
-              string (substring string 0 (match-beginning 0))))
-    (cond ((string-match "\\`\\(.+\\),[ \t\n]*\\(.+\\)\\'" string)
-           ;; If STRING contains a comma, this probably means that STRING
-           ;; is of the form "Last, First".
-           (setq first (match-string 2 string)
-                 string (match-string 1 string)))
-          ((string-match ebdb-lastname-re string)
-           (setq first (and (not (zerop (match-beginning 0)))
-                            (substring string 0 (match-beginning 0)))
-                 string (match-string 1 string))))
+    (when (string-match ebdb-lastname-suffix-re string)
+      (setq suffix (match-string 1 string)
+            string (substring string 0 (match-beginning 0))))
+    (if (let ((case-fold-search nil))
+         ;; If there's an all-upper-case word, it's the last name.
+         (string-match
+          "[ \t\n]*\\([[:upper:]]+[[:upper:]-']+\\)\\>[ \t\n]*"
+          string))
+       (setq given (concat (substring string 0 (match-beginning 1))
+                           " "
+                           (substring string (match-end 1)))
+             string (capitalize (match-string 1 string)))
+      (cond ((string-match
+             (concat "\\`" ebdb-lastname-re ",[ \t\n]*\\(.+\\)\\'")
+             string)
+             ;; If STRING contains a comma, this probably means that STRING
+             ;; is of the form "Last, First".
+             (setq given (match-string 2 string)
+                   string (match-string 1 string)))
+            ((string-match (concat ebdb-lastname-re "[ ,]*\\'") string)
+             (setq given (and (not (zerop (match-beginning 0)))
+                              (substring string 0 (match-beginning 0)))
+                   string (match-string 1 string)))))
     (delq nil
          (list (ebdb-string-trim string)
-               (and first (ebdb-string-trim first))
+               (and given (split-string given nil t))
                suffix))))
 
 (defsubst ebdb-record-lessp (record1 record2)



reply via email to

[Prev in Thread] Current Thread [Next in Thread]