[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[elpa] externals/ebdb da1c6f6 01/12: Improve name parsing
From: |
Eric Abrahamsen |
Subject: |
[elpa] externals/ebdb da1c6f6 01/12: Improve name parsing |
Date: |
Tue, 12 Jun 2018 22:06:08 -0400 (EDT) |
branch: externals/ebdb
commit da1c6f677804483d155b08a0ac63e4825f49e7b4
Author: Eric Abrahamsen <address@hidden>
Commit: Eric Abrahamsen <address@hidden>
Improve name parsing
* ebdb.el (ebdb-lastname-re): Better regexp for matching surnames with
hyphens and apostrophes.
(ebdb-divide-name): Check for all-caps UN-style surname. Also, now
returns given names as a list.
(ebdb-parse): Complex name method assumes given-names are
already a list.
* ebdb-test.el (ebdb-parse-name): Add more test cases.
---
ebdb-test.el | 32 +++++++++++++++++++++++++++++++-
ebdb.el | 56 +++++++++++++++++++++++++++++++++++---------------------
2 files changed, 66 insertions(+), 22 deletions(-)
diff --git a/ebdb-test.el b/ebdb-test.el
index a5c4c86..2e856ce 100644
--- a/ebdb-test.el
+++ b/ebdb-test.el
@@ -347,7 +347,37 @@ If it doesn't exist, raise `ebdb-related-unfound'."
(slot-value
(ebdb-parse 'ebdb-field-name-complex "Eric Abrahamsen, III")
'suffix)
- "III")))
+ "III"))
+ (should (equal
+ (slot-value
+ (ebdb-parse 'ebdb-field-name-complex "Albus Percival Wulfric Brian
Dumbledore")
+ 'given-names)
+ '("Albus" "Percival" "Wulfric" "Brian")))
+ (should (equal
+ (slot-value
+ (ebdb-parse 'ebdb-field-name-complex "MURAKAMI Haruki")
+ 'surname)
+ "Murakami"))
+ (should (equal
+ (slot-value
+ (ebdb-parse 'ebdb-field-name-complex "Fintan O'Toole")
+ 'surname)
+ "O'Toole"))
+ (should (equal
+ (slot-value
+ (ebdb-parse 'ebdb-field-name-complex "O'Toole, Fintan")
+ 'surname)
+ "O'Toole"))
+ (should (equal
+ (slot-value
+ (ebdb-parse 'ebdb-field-name-complex "O'TOOLE Fintan")
+ 'surname)
+ "O'Toole"))
+ (should (equal
+ (slot-value
+ (ebdb-parse 'ebdb-field-name-complex "Daniel Micahel Blake
Day-Lewis")
+ 'surname)
+ "Day-Lewis")))
;; Snarf testing.
diff --git a/ebdb.el b/ebdb.el
index 5a95a51..3cacfbb 100644
--- a/ebdb.el
+++ b/ebdb.el
@@ -578,8 +578,8 @@ Case is ignored."
(defcustom ebdb-lastname-re
(concat "[- \t]*\\(\\(?:\\<"
(regexp-opt ebdb-lastname-prefixes)
- ;; multiple last names concatenated by `-'
- "\\>[- \t]+\\)?\\(?:\\w+[ \t]*-[ \t]*\\)*\\w+\\)\\'")
+ ;; Last names can contain hyphens and apostrophes.
+ "\\>[- \t]+\\)?\\w[[:word:]'-]+\\)\\>")
"Regexp matching the last name of a full name.
Its first parenthetical subexpression becomes the last name."
:group 'ebdb-record-edit
@@ -1296,8 +1296,7 @@ first one."
(ebdb-divide-name str)))
(unless (plist-get slots :given-names)
(setq slots (plist-put slots :given-names
- (when given-names
- (split-string given-names nil t)))))
+ given-names)))
(unless (plist-get slots :surname)
(setq slots (plist-put slots :surname
(or surname ""))))
@@ -4866,27 +4865,42 @@ also be one of the special symbols below.
(defun ebdb-divide-name (string)
"Divide STRING into its component parts.
-Case is ignored. Return name as a list of (LAST FIRST SUFFIX).
-LAST is always a string (possibly empty). FIRST and SUFFIX may
-be nil."
+Return name as a list of (SURNAME GIVEN-NAMES SUFFIX). SURNAME
+is always a string (possibly empty). GIVEN-NAMES, if present, is
+a list of first names. GIVEN-NAMES and SUFFIX may be nil.
+
+During parsing `case-fold-search' is non-nil, with the exception
+that a string of all-upper-case letters will be assumed (a la UN
+usage) to represent the surname."
(let ((case-fold-search t)
- first suffix)
+ given suffix)
;; Separate a suffix.
- (if (string-match ebdb-lastname-suffix-re string)
- (setq suffix (match-string 1 string)
- string (substring string 0 (match-beginning 0))))
- (cond ((string-match "\\`\\(.+\\),[ \t\n]*\\(.+\\)\\'" string)
- ;; If STRING contains a comma, this probably means that STRING
- ;; is of the form "Last, First".
- (setq first (match-string 2 string)
- string (match-string 1 string)))
- ((string-match ebdb-lastname-re string)
- (setq first (and (not (zerop (match-beginning 0)))
- (substring string 0 (match-beginning 0)))
- string (match-string 1 string))))
+ (when (string-match ebdb-lastname-suffix-re string)
+ (setq suffix (match-string 1 string)
+ string (substring string 0 (match-beginning 0))))
+ (if (let ((case-fold-search nil))
+ ;; If there's an all-upper-case word, it's the last name.
+ (string-match
+ "[ \t\n]*\\([[:upper:]]+[[:upper:]-']+\\)\\>[ \t\n]*"
+ string))
+ (setq given (concat (substring string 0 (match-beginning 1))
+ " "
+ (substring string (match-end 1)))
+ string (capitalize (match-string 1 string)))
+ (cond ((string-match
+ (concat "\\`" ebdb-lastname-re ",[ \t\n]*\\(.+\\)\\'")
+ string)
+ ;; If STRING contains a comma, this probably means that STRING
+ ;; is of the form "Last, First".
+ (setq given (match-string 2 string)
+ string (match-string 1 string)))
+ ((string-match (concat ebdb-lastname-re "[ ,]*\\'") string)
+ (setq given (and (not (zerop (match-beginning 0)))
+ (substring string 0 (match-beginning 0)))
+ string (match-string 1 string)))))
(delq nil
(list (ebdb-string-trim string)
- (and first (ebdb-string-trim first))
+ (and given (split-string given nil t))
suffix))))
(defsubst ebdb-record-lessp (record1 record2)
- [elpa] externals/ebdb updated (cadffb1 -> 60f8828), Eric Abrahamsen, 2018/06/12
- [elpa] externals/ebdb da1c6f6 01/12: Improve name parsing,
Eric Abrahamsen <=
- [elpa] externals/ebdb 365fa1c 04/12: Add nicknames to person record cache 'alt-names, Eric Abrahamsen, 2018/06/12
- [elpa] externals/ebdb 9febf18 02/12: New command ebdb-save-ebdb, Eric Abrahamsen, 2018/06/12
- [elpa] externals/ebdb 91f0953 05/12: Let the typos begin!, Eric Abrahamsen, 2018/06/12
- [elpa] externals/ebdb b2b56f8 07/12: Fix to "add nicknames to person record cache", Eric Abrahamsen, 2018/06/12
- [elpa] externals/ebdb 60f8828 12/12: Bump and flush: 0.5.3, Eric Abrahamsen, 2018/06/12
- [elpa] externals/ebdb 140381f 09/12: Update README.org (#70), Eric Abrahamsen, 2018/06/12
- [elpa] externals/ebdb 49a2e8d 03/12: Make "record" argument to field init and delete non-optional, Eric Abrahamsen, 2018/06/12
- [elpa] externals/ebdb 5f00a46 10/12: Tweak default formatter settings, Eric Abrahamsen, 2018/06/12
- [elpa] externals/ebdb 7d20db4 06/12: Further improvements to snarfing, Eric Abrahamsen, 2018/06/12
- [elpa] externals/ebdb b89a009 08/12: Move defmethods beneath all classes used as specializers, Eric Abrahamsen, 2018/06/12