emacs-elpa-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[elpa] externals/pyim 42bc6092ed 10/41: Add pyim-candidates-create-get-d


From: ELPA Syncer
Subject: [elpa] externals/pyim 42bc6092ed 10/41: Add pyim-candidates-create-get-dcache-words
Date: Sat, 4 Jun 2022 09:57:45 -0400 (EDT)

branch: externals/pyim
commit 42bc6092edb378654cc5dd82e9216785efb8bf42
Author: Feng Shu <tumashu@163.com>
Commit: Feng Shu <tumashu@163.com>

    Add pyim-candidates-create-get-dcache-words
---
 pyim-candidates.el  | 104 +++++++++++++++++++++++++++-------------------------
 pyim-dcache.el      |   3 +-
 tests/pyim-tests.el |  28 +++++++++++++-
 3 files changed, 83 insertions(+), 52 deletions(-)

diff --git a/pyim-candidates.el b/pyim-candidates.el
index 89279f0afa..320484f1ae 100644
--- a/pyim-candidates.el
+++ b/pyim-candidates.el
@@ -165,54 +165,15 @@
 
 (defun pyim-candidates-create-quanpin (imobjs scheme &optional fast-search)
   "`pyim-candidates-create' 内部使用的函数。"
-  (let ((znabc-words (pyim-candidates-create-like-znabc imobjs scheme 
fast-search))
-        (jianpin-words (pyim-candidates-create-like-jianpin imobjs scheme))
-        personal-words common-words
-        pinyin-chars-1 pinyin-chars-2 chief-word)
+  (let* ((znabc-words (pyim-candidates-create-like-znabc imobjs scheme 
fast-search))
+         (jianpin-words (pyim-candidates-create-like-jianpin imobjs scheme))
+         (dcache-words (pyim-candidates-create-get-dcache-words imobjs scheme))
+         (personal-words (nth 0 dcache-words))
+         (common-words (nth 1 dcache-words))
+         (pinyin-chars-1 (nth 2 dcache-words))
+         (pinyin-chars-2 (nth 3 dcache-words))
+         chief-word)
 
-    ;; 获取个人词条,词库词条和第一汉字列表。
-    (dolist (imobj imobjs)
-      (let* (;; 个人词条
-             (w1 (pyim-dcache-get
-                  (string-join (pyim-codes-create imobj scheme) "-")
-                  (if pyim-enable-shortcode
-                      '(icode2word ishortcode2word)
-                    '(icode2word))))
-             ;; 词库词条
-             (w2 (pyim-dcache-get
-                  (string-join (pyim-codes-create imobj scheme) "-")
-                  (if pyim-enable-shortcode
-                      '(code2word shortcode2word)
-                    '(code2word))))
-             ;; 第一个汉字
-             (w3 (pyim-dcache-get
-                  (car (pyim-codes-create imobj scheme))))
-             ;; 如果 w3 找不到第一个拼音对应的汉字,那就进一步使用
-             ;; `pyim-pymap-py2cchar-get' 来查找,这个函数支持声母搜索。可以得到
-             ;; 更多的词条。
-             (w4 (unless w3
-                   (mapcar #'char-to-string
-                           (pyim-zip
-                            (mapcar (lambda (x)
-                                      ;; NOTE: 
这里只取最常用的汉字,太多的汉字会带来后续处理压力,可能拖慢输入法。不过
-                                      ;; 这个结论只是猜测。
-                                      (car (split-string x "|")))
-                                    (pyim-pymap-py2cchar-get
-                                     (car (pyim-codes-create imobj 
scheme)))))))))
-        (push w1 personal-words)
-        (push w2 common-words)
-        (push w3 pinyin-chars-1)
-        (push w4 pinyin-chars-2)))
-
-    (setq jianpin-words (pyim-zip (nreverse jianpin-words) fast-search))
-    (setq personal-words (pyim-zip (nreverse personal-words) fast-search))
-    (setq common-words (pyim-zip (nreverse common-words) fast-search))
-    (setq pinyin-chars-1 (pyim-zip (nreverse pinyin-chars-1) fast-search))
-    (setq pinyin-chars-2 (pyim-zip (nreverse pinyin-chars-2) fast-search))
-
-    ;; 个人词条排序:使用词频信息对个人词库得到的候选词排序,第一个词条的位置
-    ;; 比较特殊,不参与排序,具体原因请参考 `pyim-page-select-word' 中的
-    ;; comment.
     (setq personal-words (pyim-candidates-sort personal-words))
     (setq chief-word (pyim-candidates-get-chief scheme personal-words))
 
@@ -253,7 +214,7 @@
                       (pyim-zip codes))
               fast-search)))
 
-(defun pyim-candidates-create-like-jianpin (imobjs scheme)
+(defun pyim-candidates-create-like-jianpin (imobjs scheme &optional 
fast-search)
   "简拼模式。
 
  假如输入 \"nih\" ,那么搜索 code 为 \"n-h\" 的词条,然后筛选出所
@@ -282,7 +243,52 @@
                       (string-match-p regexp1 (pyim-cstring-to-pinyin cstr nil 
"-")))
                     w1)))
           (push (delete-dups (append w2 w1)) jianpin-words)))
-      jianpin-words)))
+      (pyim-zip (nreverse jianpin-words) fast-search))))
+
+(defun pyim-candidates-create-get-dcache-words (imobjs scheme &optional 
fast-search ignore-pymap-chars)
+  "获取个人词条,词库词条和第一汉字列表。"
+  (let (personal-words common-words pinyin-chars-1 pinyin-chars-2)
+    (dolist (imobj imobjs)
+      (let* (;; 个人词条
+             (w1 (pyim-dcache-get
+                  (string-join (pyim-codes-create imobj scheme) "-")
+                  (if pyim-enable-shortcode
+                      '(icode2word ishortcode2word)
+                    '(icode2word))))
+             ;; 词库词条
+             (w2 (pyim-dcache-get
+                  (string-join (pyim-codes-create imobj scheme) "-")
+                  (if pyim-enable-shortcode
+                      '(code2word shortcode2word)
+                    '(code2word))))
+             ;; 第一个汉字
+             (w3 (pyim-dcache-get
+                  (car (pyim-codes-create imobj scheme))))
+             ;; 如果 w3 找不到第一个拼音对应的汉字,那就进一步使用
+             ;; `pyim-pymap-py2cchar-get' 来查找,这个函数支持声母搜索。可以得到
+             ;; 更多的词条。
+             (w4 (when (and (not w3) (not ignore-pymap-chars))
+                   (pyim-candidates-create-get-pymap-chars
+                    (car (pyim-codes-create imobj scheme))))))
+        (push w1 personal-words)
+        (push w2 common-words)
+        (push w3 pinyin-chars-1)
+        (push w4 pinyin-chars-2)))
+    (setq personal-words (pyim-zip (nreverse personal-words) fast-search))
+    (setq common-words (pyim-zip (nreverse common-words) fast-search))
+    (setq pinyin-chars-1 (pyim-zip (nreverse pinyin-chars-1) fast-search))
+    (setq pinyin-chars-2 (pyim-zip (nreverse pinyin-chars-2) fast-search))
+    (list personal-words common-words pinyin-chars-1 pinyin-chars-2)))
+
+(defun pyim-candidates-create-get-pymap-chars (pinyin)
+  "获取 pymap 表里面的汉字。"
+  (mapcar #'char-to-string
+          (pyim-zip
+           (mapcar (lambda (x)
+                     ;; NOTE: 这里只取最常用的汉字,太多的汉字会带来后续处理压力,可能拖慢输入法。不过
+                     ;; 这个结论只是猜测。
+                     (car (split-string x "|")))
+                   (pyim-pymap-py2cchar-get pinyin)))))
 
 (cl-defmethod pyim-candidates-create (_imobjs (_scheme pyim-scheme-shuangpin))
   "按照 SCHEME, 从 IMOBJS 获得候选词条,用于双拼输入法。"
diff --git a/pyim-dcache.el b/pyim-dcache.el
index 0a9885569d..59e37922e6 100644
--- a/pyim-dcache.el
+++ b/pyim-dcache.el
@@ -272,8 +272,7 @@ non-nil,文件存在时将会提示用户是否覆盖,默认为覆盖模式"
 当词库文件加载完成后,pyim 就可以用这个函数从词库缓存中搜索某个
 code 对应的中文词条了."
   (when code
-    `(,@(pyim-dcache-call-api 'get code from)
-      ,@(pyim-pymap-py2cchar-get code t t))))
+    (pyim-dcache-call-api 'get code from)))
 
 ;; * Footer
 (provide 'pyim-dcache)
diff --git a/tests/pyim-tests.el b/tests/pyim-tests.el
index cbc4a4bfda..dac7c90dbe 100644
--- a/tests/pyim-tests.el
+++ b/tests/pyim-tests.el
@@ -529,14 +529,40 @@
 (ert-deftest pyim-tests-pyim-candidates-create-like-jianpin ()
   (let* ((pyim-dhashcache-code2word (make-hash-table :test #'equal))
          (pyim-dhashcache-icode2word (make-hash-table :test #'equal))
+         (pyim-dhashcache-ishortcode2word (make-hash-table :test #'equal))
          (quanpin (pyim-scheme-get 'quanpin))
          (imobjs1 (pyim-imobjs-create "nih" quanpin))
          (imobjs2 (pyim-imobjs-create "ni" quanpin)))
     (puthash "n-h" (list "你好" "你坏" "尼耗" "南好" "内核" "内河") 
pyim-dhashcache-ishortcode2word)
     (should (equal (pyim-candidates-create-like-jianpin imobjs1 quanpin)
-                   '(("你好" "你坏" "尼耗"))))
+                   '("你好" "你坏" "尼耗")))
     (should-not (pyim-candidates-create-like-jianpin imobjs2 quanpin))))
 
+(ert-deftest pyim-tests-pyim-candidates-get-dcache-words ()
+  (let* ((pyim-dhashcache-code2word (make-hash-table :test #'equal))
+         (pyim-dhashcache-icode2word (make-hash-table :test #'equal))
+         (pyim-dhashcache-shortcode2word (make-hash-table :test #'equal))
+         (pyim-dhashcache-ishortcode2word (make-hash-table :test #'equal))
+         (quanpin (pyim-scheme-get 'quanpin))
+         (imobjs1 (pyim-imobjs-create "n" quanpin))
+         (imobjs2 (pyim-imobjs-create "ni-hao" quanpin))
+         (imobjs3 (pyim-imobjs-create "ni" quanpin)))
+    (puthash "n" (list "你" "您" "妮") pyim-dhashcache-ishortcode2word)
+    (puthash "ni" (list "你" "尼") pyim-dhashcache-icode2word)
+    (puthash "ni" (list "你" "尼") pyim-dhashcache-code2word)
+    (puthash "ni-hao" (list "你好" "尼耗") pyim-dhashcache-code2word)
+    (puthash "n-h" (list "你好" "你坏" "尼耗" "南好" "内核" "内河") 
pyim-dhashcache-ishortcode2word)
+    (should (equal (pyim-candidates-create-get-dcache-words imobjs1 quanpin 
nil t)
+                   '(("你" "您" "妮") nil nil nil)))
+    (should (equal (pyim-candidates-create-get-dcache-words imobjs2 quanpin 
nil t)
+                   '(nil ("你好" "尼耗") ("你好" "尼耗") nil)))
+    (should (equal (pyim-candidates-create-get-dcache-words imobjs3 quanpin 
nil t)
+                   '(("你" "尼") ("你" "尼") ("你" "尼" "你" "尼") nil)))))
+
+(ert-deftest pyim-tests-pyim-candidates-create-get-pymap-chars ()
+  (should (equal (cl-subseq (pyim-candidates-create-get-pymap-chars "ni") 0 10)
+                 '("你" "年" "娘" "鸟" "摄" "您" "宁" "牛" "尼" "念"))))
+
 (ert-deftest pyim-tests-pyim-candidates-search-buffer ()
   (with-temp-buffer
     (insert "你好你好你坏你坏你话牛蛤牛和牛蛤牛蛤牛蛤牛蛤牛蛤")



reply via email to

[Prev in Thread] Current Thread [Next in Thread]