emacs-elpa-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[elpa] externals/pyim 0b73690671 2/4: 优化 pyim-candidates-get-chief 的速度。


From: ELPA Syncer
Subject: [elpa] externals/pyim 0b73690671 2/4: 优化 pyim-candidates-get-chief 的速度。
Date: Thu, 6 Jan 2022 22:57:48 -0500 (EST)

branch: externals/pyim
commit 0b73690671cbc59291f35dc183dfed14d3799503
Author: Feng Shu <tumashu@163.com>
Commit: Feng Shu <tumashu@163.com>

    优化 pyim-candidates-get-chief 的速度。
    
            * pyim-process.el (pyim-process-create-word): simplify.
    
            * pyim-dhashcache.el (pyim-dhashcache-iword2count-recent1)
            (pyim-dhashcache-iword2count-recent2): new variable.
            (pyim-dhashcache-init-count-variables)
            (pyim-dhashcache-save-personal-dcache-to-file): Use the above two 
new variables.
            (pyim-dhashcache-update-iword2count-recent): new function.
            (pyim-dhashcache-update-iword2count): handle the above two 
variables.
    
            * pyim-candidates.el (pyim-candidates-possible-chiefs)
            (pyim-candidates-add-possible-chief): removed.
            (pyim-candidates-get-chief): simplify.
---
 pyim-candidates.el | 64 ++++++++++++++++++++----------------------------------
 pyim-dhashcache.el | 29 ++++++++++++++++++++++++-
 pyim-process.el    |  1 -
 3 files changed, 52 insertions(+), 42 deletions(-)

diff --git a/pyim-candidates.el b/pyim-candidates.el
index 639ecd67f5..2d93cbb95a 100644
--- a/pyim-candidates.el
+++ b/pyim-candidates.el
@@ -52,9 +52,6 @@
 
 细节信息请参考 `pyim-page-refresh' 的 docstring.")
 
-(defvar pyim-candidates-possible-chiefs nil
-  "可能做第一位候选词的词条列表。")
-
 (pyim-register-local-variables
  '(pyim-candidates pyim-candidate-position))
 
@@ -72,45 +69,32 @@ IMOBJS 获得候选词条。"
         (funcall (intern (format "pyim-candidates-create:%S" class))
                  imobjs scheme-name async)))))
 
-(defun pyim-candidates-add-possible-chief (word)
-  "将 WORD 添加到 `pyim-candidates-possible-chiefs'."
-  (push word pyim-candidates-possible-chiefs)
-  (setq pyim-candidates-possible-chiefs
-        (cl-subseq pyim-candidates-possible-chiefs 0
-                   (min 100 (length pyim-candidates-possible-chiefs)))))
-
 (defun pyim-candidates-get-chief (scheme-name &optional personal-words 
common-words)
   "选取第一位候选词。"
-  (let* ((class (pyim-scheme-get-option scheme-name :class))
-         (words pyim-candidates-possible-chiefs)
-         (length (length words))
-         ;; NOTE: 网上传言,一段话平均70个字,按照一个词两个字估算,100个词大概
-         ;; 为两段话。
-         (words100 (cl-subseq words 0 (min 100 length)))
-         ;; NOTE: 10个词大概1句话。
-         (words10 (cl-subseq words 0 (min 10 length))))
-    (cond
-     ((equal class 'xingma)
-      (or
-       ;; 如果从公共词库里面获取到的第一个词条是汉字,就选择它。
-       (when (= (length (car common-words)) 1)
-         (car common-words))
-       ;; 从个人词库里面按排列的先后顺序,获取一个汉字。
-       (cl-find-if
-        (lambda (word)
-          (= (length word) 1))
-        personal-words)))
-     (t (or
-         ;; 最近输入的10个词中出现一次以上。
-         (cl-find-if (lambda (word)
-                       (> (cl-count word words10 :test #'equal) 1))
-                     personal-words)
-         ;; 最近输入的100个词中出现过三次以上。
-         (cl-find-if (lambda (word)
-                       (> (cl-count word words100 :test #'equal) 3))
-                     personal-words)
-         ;; 个人词条中的第一个词。
-         (car personal-words))))))
+  (let ((class (pyim-scheme-get-option scheme-name :class)))
+    (cond ((equal class 'xingma)
+           (or
+            ;; 如果从公共词库里面获取到的第一个词条是汉字,就选择它。
+            (when (= (length (car common-words)) 1)
+              (car common-words))
+            ;; 从个人词库里面按排列的先后顺序,获取一个汉字。
+            (cl-find-if
+             (lambda (word)
+               (= (length word) 1))
+             personal-words)))
+          (t (or
+              ;; 最近输入的10个不同的词中出现一次以上。
+              (cl-find-if
+               (lambda (word)
+                 (> (or (car (pyim-dcache-get word 'iword2count-recent1)) 0) 
1))
+               personal-words)
+              ;; 最近输入的50个不同的词中出现过三次以上。
+              (cl-find-if
+               (lambda (word)
+                 (> (or (car (pyim-dcache-get word 'iword2count-recent2)) 0) 
3))
+               personal-words)
+              ;; 个人词条中的第一个词。
+              (car personal-words))))))
 
 (defun pyim-candidates-create:xingma (imobjs scheme-name &optional async)
   "`pyim-candidates-create' 处理五笔仓颉等形码输入法的函数."
diff --git a/pyim-dhashcache.el b/pyim-dhashcache.el
index 1f3796403a..83ee86f071 100644
--- a/pyim-dhashcache.el
+++ b/pyim-dhashcache.el
@@ -59,6 +59,8 @@
 (defvar pyim-dhashcache-word2code nil)
 (defvar pyim-dhashcache-iword2count nil)
 (defvar pyim-dhashcache-iword2count-log nil)
+(defvar pyim-dhashcache-iword2count-recent1 nil)
+(defvar pyim-dhashcache-iword2count-recent2 nil)
 (defvar pyim-dhashcache-shortcode2word nil)
 (defvar pyim-dhashcache-icode2word nil)
 (defvar pyim-dhashcache-ishortcode2word nil)
@@ -469,7 +471,9 @@ code 对应的中文词条了。
 (defun pyim-dhashcache-init-count-variables ()
   "初始化 count 相关的变量。"
   (pyim-dcache-init-variable pyim-dhashcache-iword2count)
-  (pyim-dcache-init-variable pyim-dhashcache-iword2count-log))
+  (pyim-dcache-init-variable pyim-dhashcache-iword2count-log)
+  (pyim-dcache-init-variable pyim-dhashcache-iword2count-recent1)
+  (pyim-dcache-init-variable pyim-dhashcache-iword2count-recent2))
 
 (defun pyim-dhashcache-save-personal-dcache-to-file ()
   ;; 用户选择过的词
@@ -501,8 +505,31 @@ code 对应的中文词条了。
        (setq ,new-value (progn ,@body))
        (puthash ,key ,new-value ,table))))
 
+(defun pyim-dhashcache-update-iword2count-recent (word n hash-table)
+  (let (words-need-remove)
+    (pyim-dhashcache-put
+      hash-table :all-words
+      (setq orig-value (remove word orig-value))
+      (push word orig-value)
+      (if (<= (length orig-value) n)
+          orig-value
+        (setq words-need-remove (nthcdr n orig-value))
+        (cl-subseq orig-value 0 n)))
+    (dolist (w words-need-remove)
+      (remhash w hash-table))
+    (pyim-dhashcache-put
+      hash-table word
+      (+ (or orig-value 0) 1))
+    hash-table))
+
 (defun pyim-dhashcache-update-iword2count (word &optional wordcount-handler)
   "保存词频到缓存."
+  (setq pyim-dhashcache-iword2count-recent1
+        (pyim-dhashcache-update-iword2count-recent
+         word 10 pyim-dhashcache-iword2count-recent1))
+  (setq pyim-dhashcache-iword2count-recent2
+        (pyim-dhashcache-update-iword2count-recent
+         word 50 pyim-dhashcache-iword2count-recent2))
   (pyim-dhashcache-put
     pyim-dhashcache-iword2count word
     (cond
diff --git a/pyim-process.el b/pyim-process.el
index 5bbcd6f6bc..7eb1131441 100644
--- a/pyim-process.el
+++ b/pyim-process.el
@@ -575,7 +575,6 @@ BUG:拼音无法有效地处理多音字。"
            (codes (pyim-cstring-to-codes
                    word scheme-name
                    (or criteria pyim-cstring-to-code-criteria))))
-      (pyim-candidates-add-possible-chief word)
       ;; 保存对应词条的词频
       (when (> (length word) 0)
         (pyim-dcache-update-wordcount word (or wordcount-handler #'1+)))



reply via email to

[Prev in Thread] Current Thread [Next in Thread]