[elpa] externals/pyim 7e8937010c 4/4: Merge pull request #432 from tumas

emacs-elpa-diffs
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[elpa] externals/pyim 7e8937010c 4/4: Merge pull request #432 from tumas

From:	ELPA Syncer
Subject:	[elpa] externals/pyim 7e8937010c 4/4: Merge pull request #432 from tumashu/count-log
Date:	Thu, 6 Jan 2022 22:57:48 -0500 (EST)
branch: externals/pyim
commit 7e8937010ca04224672d3ac97f3a58b0f6ca0607
Merge: 19a18b414b 1c864717d5
Author: tumashu <tumashu@163.com>
Commit: GitHub <noreply@github.com>

    Merge pull request #432 from tumashu/count-log
    
    Count log
---
 pyim-candidates.el  |  89 +++++++---------------
 pyim-dcache.el      |   1 +
 pyim-dhashcache.el  | 215 ++++++++++++++++++++++++++++++++++++++++++++--------
 pyim-dregcache.el   |  19 ++---
 pyim-process.el     |   1 -
 tests/pyim-tests.el |  44 ++++++-----
 6 files changed, 240 insertions(+), 129 deletions(-)

diff --git a/pyim-candidates.el b/pyim-candidates.el
index 063c733a4d..2d93cbb95a 100644
--- a/pyim-candidates.el
+++ b/pyim-candidates.el
@@ -52,29 +52,13 @@
 
 细节信息请参考 `pyim-page-refresh' 的 docstring.")
 
-(defvar pyim-candidates-possible-chiefs nil
-  "可能做第一位候选词的词条列表。")
-
 (pyim-register-local-variables
  '(pyim-candidates pyim-candidate-position))
 
 ;; ** 获取备选词列表
-(defun pyim-candidates-create-weight-table (words)
-  "基于 WORDS 的先后顺序，创建一个用于候选词排序的 count 权重表。
-count 和 count 权重结合起来确定词条的先后顺序。"
-  (let ((table (make-hash-table :test #'equal))
-        ;; FIXME: 这个权重列表是想当然的数字，因为目前我也不知道这个合理的权重是
-        ;; 什么，希望以后通过实际使用，可以总结出更合理的数字。
-        (weights (list 1.3 1.2 1.1)))
-    (dolist (weight weights)
-      (let ((word (pop words)))
-        (when word
-          (puthash word weight table))))
-    table))
-
-(defun pyim-candidates-sort (candidates &optional weight-table)
+(defun pyim-candidates-sort (candidates)
   "对 CANDIDATES 进行排序。"
-  (pyim-dcache-call-api 'sort-words candidates nil weight-table))
+  (pyim-dcache-call-api 'sort-words candidates))
 
 (defun pyim-candidates-create (imobjs scheme-name &optional async)
   "按照 SCHEME-NAME 对应的输入法方案， 从输入法内部对象列表:
@@ -85,45 +69,32 @@ IMOBJS 获得候选词条。"
         (funcall (intern (format "pyim-candidates-create:%S" class))
                  imobjs scheme-name async)))))
 
-(defun pyim-candidates-add-possible-chief (word)
-  "将 WORD 添加到 `pyim-candidates-possible-chiefs'."
-  (push word pyim-candidates-possible-chiefs)
-  (setq pyim-candidates-possible-chiefs
-        (cl-subseq pyim-candidates-possible-chiefs 0
-                   (min 100 (length pyim-candidates-possible-chiefs)))))
-
 (defun pyim-candidates-get-chief (scheme-name &optional personal-words 
common-words)
   "选取第一位候选词。"
-  (let* ((class (pyim-scheme-get-option scheme-name :class))
-         (words pyim-candidates-possible-chiefs)
-         (length (length words))
-         ;; NOTE: 网上传言，一段话平均70个字，按照一个词两个字估算，100个词大概
-         ;; 为两段话。
-         (words100 (cl-subseq words 0 (min 100 length)))
-         ;; NOTE: 10个词大概1句话。
-         (words10 (cl-subseq words 0 (min 10 length))))
-    (cond
-     ((equal class 'xingma)
-      (or
-       ;; 如果从公共词库里面获取到的第一个词条是汉字，就选择它。
-       (when (= (length (car common-words)) 1)
-         (car common-words))
-       ;; 从个人词库里面按排列的先后顺序，获取一个汉字。
-       (cl-find-if
-        (lambda (word)
-          (= (length word) 1))
-        personal-words)))
-     (t (or
-         ;; 最近输入的10个词中出现一次以上。
-         (cl-find-if (lambda (word)
-                       (> (cl-count word words10 :test #'equal) 1))
-                     personal-words)
-         ;; 最近输入的100个词中出现过三次以上。
-         (cl-find-if (lambda (word)
-                       (> (cl-count word words100 :test #'equal) 3))
-                     personal-words)
-         ;; 个人词条中的第一个词。
-         (car personal-words))))))
+  (let ((class (pyim-scheme-get-option scheme-name :class)))
+    (cond ((equal class 'xingma)
+           (or
+            ;; 如果从公共词库里面获取到的第一个词条是汉字，就选择它。
+            (when (= (length (car common-words)) 1)
+              (car common-words))
+            ;; 从个人词库里面按排列的先后顺序，获取一个汉字。
+            (cl-find-if
+             (lambda (word)
+               (= (length word) 1))
+             personal-words)))
+          (t (or
+              ;; 最近输入的10个不同的词中出现一次以上。
+              (cl-find-if
+               (lambda (word)
+                 (> (or (car (pyim-dcache-get word 'iword2count-recent1)) 0) 
1))
+               personal-words)
+              ;; 最近输入的50个不同的词中出现过三次以上。
+              (cl-find-if
+               (lambda (word)
+                 (> (or (car (pyim-dcache-get word 'iword2count-recent2)) 0) 
3))
+               personal-words)
+              ;; 个人词条中的第一个词。
+              (car personal-words))))))
 
 (defun pyim-candidates-create:xingma (imobjs scheme-name &optional async)
   "`pyim-candidates-create' 处理五笔仓颉等形码输入法的函数."
@@ -158,10 +129,7 @@ IMOBJS 获得候选词条。"
                 ;; 1. 第一个词选择公共词库中的第一个词。
                 ;; 2. 剩下的分成常用字和词，常用字优先排，字和词各按 count 大小排序。
                 (let* ((personal-words (pyim-dcache-get last-code 
'(icode2word)))
-                       (weight-table (pyim-candidates-create-weight-table 
personal-words))
-                       (personal-words
-                        (pyim-candidates-sort
-                         personal-words weight-table))
+                       (personal-words (pyim-candidates-sort personal-words))
                        (common-words (pyim-dcache-get last-code '(code2word)))
                        (chief-word (pyim-candidates-get-chief scheme-name 
personal-words common-words))
                        (common-words (pyim-candidates-sort common-words 
weight-table))
@@ -314,8 +282,7 @@ IMOBJS 获得候选词条。"
     ;; 个人词条排序：使用词频信息对个人词库得到的候选词排序，第一个词条的位置
     ;; 比较特殊，不参与排序，具体原因请参考 `pyim-page-select-word' 中的
     ;; comment.
-    (setq weight-table (pyim-candidates-create-weight-table personal-words))
-    (setq personal-words (pyim-candidates-sort personal-words weight-table))
+    (setq personal-words (pyim-candidates-sort personal-words))
     (setq chief-word (pyim-candidates-get-chief scheme-name personal-words))
 
     ;; 调试输出
diff --git a/pyim-dcache.el b/pyim-dcache.el
index 6c8f537714..343af278b4 100644
--- a/pyim-dcache.el
+++ b/pyim-dcache.el
@@ -220,6 +220,7 @@ non-nil，文件存在时将会提示用户是否覆盖，默认为覆盖模式"
 如果 FORCE 为真，强制加载。"
   (pyim-dcache-init-variables)
   (when pyim-dcache-auto-update
+    (pyim-dcache-call-api 'update-iword2priority force)
     (pyim-dcache-call-api 'update-personal-words force)
     (let* ((dict-files (mapcar (lambda (x)
                                  (unless (plist-get x :disable)
diff --git a/pyim-dhashcache.el b/pyim-dhashcache.el
index 14d42c02de..0a519c9015 100644
--- a/pyim-dhashcache.el
+++ b/pyim-dhashcache.el
@@ -40,35 +40,92 @@
 (require 'pyim-dcache)
 (require 'pyim-scheme)
 
+(defvar pyim-dhashcache-count-types
+  '((day
+     ;; 用于生成类似 :20220206 这样的 key.
+     :format ":%Y%m%d"
+     ;; 最多保存七天 count 到缓存。
+     :max-save-length 7
+     ;; 计算排序综合指标时，最近七天 count 对应的权重。
+     :weights (0.396 0.245 0.151 0.094 0.057 0.038 0.019)
+     ;; 获取前一天需要减去的天数。
+     :delta -1
+     ;; 计算日平均 count 需要乘的数字。
+     :factor 0.143))
+  "计算排序综合指数时，用到的基本信息。")
+
 (defvar pyim-dhashcache-code2word nil)
 (defvar pyim-dhashcache-code2word-md5 nil)
 (defvar pyim-dhashcache-word2code nil)
 (defvar pyim-dhashcache-iword2count nil)
+(defvar pyim-dhashcache-iword2count-log nil)
+(defvar pyim-dhashcache-iword2count-recent1 nil)
+(defvar pyim-dhashcache-iword2count-recent2 nil)
+(defvar pyim-dhashcache-iword2priority nil)
 (defvar pyim-dhashcache-shortcode2word nil)
 (defvar pyim-dhashcache-icode2word nil)
 (defvar pyim-dhashcache-ishortcode2word nil)
 (defvar pyim-dhashcache-update-shortcode2word-p nil)
 (defvar pyim-dhashcache-update-ishortcode2word-p nil)
 (defvar pyim-dhashcache-update-icode2word-p nil)
+(defvar pyim-dhashcache-update-iword2priority-p nil)
 (defvar pyim-dhashcache-update-code2word-running-p nil)
 
-(defun pyim-dhashcache-sort-words (words-list &optional iword2count 
count-weight-table)
-  "对 WORDS-LIST 排序，词频大的排在前面.
-
-如果 IWORD2COUNT 为 nil, 排序将使用 `pyim-dhashcache-iword2count'
-中记录的词频信息
-
-COUNT-WEIGHT-TABLE 是一个哈希表，保存词条的 count 权重，在排序过
-程中， ‘count * 权重’ 的取值决定了排序先后顺序, 权重是一个不小于1
-的数字。"
-  (let ((iword2count (or iword2count pyim-dhashcache-iword2count))
-        (count-weight-table (or count-weight-table (make-hash-table :test 
#'equal))))
+(defun pyim-dhashcache-sort-words (words-list)
+  "对 WORDS-LIST 排序"
+  (let ((iword2count pyim-dhashcache-iword2count)
+        (iword2priority pyim-dhashcache-iword2priority))
     (sort words-list
           (lambda (a b)
-            (> (* (or (gethash a iword2count) 0)
-                  (or (gethash a count-weight-table) 1))
-               (* (or (or (gethash b iword2count) 0)
-                      (or (gethash b count-weight-table) 1))))))))
+            (let ((n1 (or (gethash a iword2priority) 0))
+                  (n2 (or (gethash b iword2priority) 0)))
+              (if (= n1 n2)
+                  (let ((n3 (or (gethash a iword2count) 0))
+                        (n4 (or (gethash b iword2count) 0)))
+                    (> n3 n4))
+                (> n1 n2)))))))
+
+(defun pyim-dhashcache-get-counts-from-log (log-info &optional time)
+  "从 LOG-INFO 中获取所有的 count 值。
+
+比如： ((day :20220205 10
+             :20220204 6   => ((day 10 6 0 3 ...))
+             :20220202 3
+             ...))"
+  (mapcar (lambda (x)
+            (let* ((label (car x))
+                   (plist (cdr x))
+                   (format (plist-get plist :format))
+                   (n (plist-get plist :max-save-length))
+                   (delta (plist-get plist :delta))
+                   (time (or time (current-time)))
+                   output)
+              (dotimes (i n)
+                (let* ((time (time-add time (days-to-time (* i delta))))
+                       (key (intern (format-time-string format time)))
+                       (plist (cdr (assoc label log-info))))
+                  (push (or (plist-get plist key) 0) output)))
+              `(,label ,@(reverse output))))
+          pyim-dhashcache-count-types))
+
+(defun pyim-dhashcache-calculate-priority (counts-info)
+  "根据 COUNTS-INFO 计算一个优先级指标，用于对词条进行排序。
+COUNTS-INFO 是一个 alist, 其结构类似：
+
+      ((day n1 n2 n3 ...))
+
+其中 (n1 n2 n3 ...) 代表从当前日期逐日倒推，每日 count 所组成的列表。"
+  (apply #'+ (mapcar (lambda (x)
+                       (let* ((label (car x))
+                              (plist (cdr x))
+                              (weights (plist-get plist :weights))
+                              (factor (plist-get plist :factor)))
+                         (* (apply #'+ (cl-mapcar (lambda (a b)
+                                                    (* (or a 0) b))
+                                                  (cdr (assoc label 
counts-info))
+                                                  weights))
+                            factor)))
+                     pyim-dhashcache-count-types)))
 
 (defun pyim-dhashcache-get-shortcodes (code)
   "获取 CODE 所有的 shortcodes.
@@ -124,16 +181,15 @@ COUNT-WEIGHT-TABLE 是一个哈希表，保存词条的 count 权重，在排序
         ,@(pyim-dhashcache-async-inject-variables)
         (require 'pyim-dhashcache)
         (pyim-dcache-init-variable pyim-dhashcache-icode2word)
-        (pyim-dcache-init-variable pyim-dhashcache-iword2count)
+        (pyim-dhashcache-init-count-and-priority-variables)
         (pyim-dcache-save-variable
          'pyim-dhashcache-ishortcode2word
          (pyim-dhashcache-update-ishortcode2word-1
-          pyim-dhashcache-icode2word
-          pyim-dhashcache-iword2count)))
+          pyim-dhashcache-icode2word)))
      (lambda (_)
        (pyim-dcache-reload-variable pyim-dhashcache-ishortcode2word)))))
 
-(defun pyim-dhashcache-update-ishortcode2word-1 (icode2word iword2count)
+(defun pyim-dhashcache-update-ishortcode2word-1 (icode2word)
   "`pyim-dhashcache-update-ishortcode2word' 内部函数."
   (let ((ishortcode2word (make-hash-table :test #'equal)))
     (maphash
@@ -147,7 +203,7 @@ COUNT-WEIGHT-TABLE 是一个哈希表，保存词条的 count 权重，在排序
      icode2word)
     (maphash
      (lambda (key value)
-       (puthash key (pyim-dhashcache-sort-words value iword2count)
+       (puthash key (pyim-dhashcache-sort-words value)
                 ishortcode2word))
      ishortcode2word)
     ishortcode2word))
@@ -166,16 +222,15 @@ COUNT-WEIGHT-TABLE 是一个哈希表，保存词条的 count 权重，在排序
         ,@(pyim-dhashcache-async-inject-variables)
         (require 'pyim-dhashcache)
         (pyim-dcache-init-variable pyim-dhashcache-code2word)
-        (pyim-dcache-init-variable pyim-dhashcache-iword2count)
+        (pyim-dhashcache-init-count-and-priority-variables)
         (pyim-dcache-save-variable
          'pyim-dhashcache-shortcode2word
          (pyim-dhashcache-update-shortcode2word-1
-          pyim-dhashcache-code2word
-          pyim-dhashcache-iword2count)))
+          pyim-dhashcache-code2word)))
      (lambda (_)
        (pyim-dcache-reload-variable pyim-dhashcache-shortcode2word)))))
 
-(defun pyim-dhashcache-update-shortcode2word-1 (code2word iword2count)
+(defun pyim-dhashcache-update-shortcode2word-1 (code2word)
   "`pyim-dhashcache-update-shortcode2word' 的内部函数"
   (let ((shortcode2word (make-hash-table :test #'equal)))
     (maphash
@@ -197,7 +252,7 @@ COUNT-WEIGHT-TABLE 是一个哈希表，保存词条的 count 权重，在排序
      code2word)
     (maphash
      (lambda (key value)
-       (puthash key (pyim-dhashcache-sort-words value iword2count)
+       (puthash key (pyim-dhashcache-sort-words value)
                 shortcode2word))
      shortcode2word)
     shortcode2word))
@@ -349,10 +404,10 @@ code 对应的中文词条了。
         ,@(pyim-dhashcache-async-inject-variables)
         (require 'pyim-dhashcache)
         (pyim-dcache-init-variable pyim-dhashcache-icode2word)
-        (pyim-dcache-init-variable pyim-dhashcache-iword2count)
+        (pyim-dhashcache-init-count-and-priority-variables)
         (maphash
          (lambda (key value)
-           (puthash key (pyim-dhashcache-sort-words value 
pyim-dhashcache-iword2count)
+           (puthash key (pyim-dhashcache-sort-words value)
                     pyim-dhashcache-icode2word))
          pyim-dhashcache-icode2word)
         (pyim-dcache-save-variable
@@ -404,22 +459,38 @@ code 对应的中文词条了。
              (directory-files pyim-dcache-directory nil "-backup-"))
     (message "PYIM: 在 %S 目录中发现备份文件的存在，可能是词库缓存文件损坏导致，请抓紧检查处理！！！"
              pyim-dcache-directory))
-  (pyim-dcache-init-variable pyim-dhashcache-iword2count)
+  (pyim-dhashcache-init-count-and-priority-variables)
   (pyim-dcache-init-variable pyim-dhashcache-code2word)
   (pyim-dcache-init-variable pyim-dhashcache-word2code)
   (pyim-dcache-init-variable pyim-dhashcache-shortcode2word)
   (pyim-dcache-init-variable pyim-dhashcache-icode2word)
   (pyim-dcache-init-variable pyim-dhashcache-ishortcode2word))
 
+(defun pyim-dhashcache-init-count-and-priority-variables ()
+  "初始化 count 相关的变量。"
+  (pyim-dcache-init-variable pyim-dhashcache-iword2count)
+  (pyim-dcache-init-variable pyim-dhashcache-iword2count-log)
+  (pyim-dcache-init-variable pyim-dhashcache-iword2count-recent1)
+  (pyim-dcache-init-variable pyim-dhashcache-iword2count-recent2)
+  (pyim-dcache-init-variable pyim-dhashcache-iword2priority))
+
 (defun pyim-dhashcache-save-personal-dcache-to-file ()
   ;; 用户选择过的词
   (pyim-dcache-save-variable
    'pyim-dhashcache-icode2word
    pyim-dhashcache-icode2word 0.8)
-  ;; 词频
+  ;; 词条总 count
   (pyim-dcache-save-variable
    'pyim-dhashcache-iword2count
-   pyim-dhashcache-iword2count 0.8))
+   pyim-dhashcache-iword2count 0.8)
+  ;; 词条 count 日志
+  (pyim-dcache-save-variable
+   'pyim-dhashcache-iword2count-log
+   pyim-dhashcache-iword2count-log 0.8)
+  ;; 词条优先级
+  (pyim-dcache-save-variable
+   'pyim-dhashcache-iword2priority
+   pyim-dhashcache-iword2priority 0.8))
 
 (defmacro pyim-dhashcache-put (cache code &rest body)
   "将 BODY 的返回值保存到 CACHE 对应的 CODE 中。
@@ -437,8 +508,34 @@ code 对应的中文词条了。
        (setq ,new-value (progn ,@body))
        (puthash ,key ,new-value ,table))))
 
+(defun pyim-dhashcache-update-iword2count-recent (word n hash-table)
+  (let (words-need-remove)
+    (pyim-dhashcache-put
+      hash-table :all-words
+      (setq orig-value (remove word orig-value))
+      (push word orig-value)
+      (if (<= (length orig-value) n)
+          orig-value
+        (setq words-need-remove (nthcdr n orig-value))
+        (cl-subseq orig-value 0 n)))
+    (dolist (w words-need-remove)
+      (remhash w hash-table))
+    (pyim-dhashcache-put
+      hash-table word
+      (+ (or orig-value 0) 1))
+    hash-table))
+
 (defun pyim-dhashcache-update-iword2count (word &optional wordcount-handler)
   "保存词频到缓存."
+  ;; 更新最近输入 10 个词条的 count 表
+  (setq pyim-dhashcache-iword2count-recent1
+        (pyim-dhashcache-update-iword2count-recent
+         word 10 pyim-dhashcache-iword2count-recent1))
+  ;; 更新最近输入 50 个词条的 count 表
+  (setq pyim-dhashcache-iword2count-recent2
+        (pyim-dhashcache-update-iword2count-recent
+         word 50 pyim-dhashcache-iword2count-recent2))
+  ;; 更新总 count 表
   (pyim-dhashcache-put
     pyim-dhashcache-iword2count word
     (cond
@@ -446,7 +543,59 @@ code 对应的中文词条了。
       (funcall wordcount-handler (or orig-value 0)))
      ((numberp wordcount-handler)
       wordcount-handler)
-     (t (or orig-value 0)))))
+     (t (or orig-value 0))))
+  ;; 更新 count 日志表。
+  (pyim-dhashcache-put
+    pyim-dhashcache-iword2count-log word
+    (let (out)
+      (dolist (x pyim-dhashcache-count-types)
+        (let* ((label (car x))
+               (key (intern (format-time-string (plist-get (cdr x) :format))))
+               (n (plist-get (cdr x) :max-save-length))
+               (plist (cdr (assoc label orig-value)))
+               (value (plist-get plist key))
+               (output (if value
+                           (plist-put plist key (+ 1 value))
+                         (append (list key 1) plist)))
+               (length (length output))
+               (output (cl-subseq output 0 (min length (* 2 n)))))
+          (push `(,label ,@output) out)))
+      out))
+  ;; 更新优先级表
+  (pyim-dhashcache-put
+    pyim-dhashcache-iword2priority word
+    ;; Fix warn
+    (ignore orig-value)
+    (pyim-dhashcache-calculate-priority
+     (pyim-dhashcache-get-counts-from-log
+      (gethash word pyim-dhashcache-iword2count-log)))))
+
+(defun pyim-dhashcache-update-iword2priority (&optional force)
+  "更新词条优先级表，如果 FORCE 为真，强制更新。"
+  (interactive)
+  (when (or force (not pyim-dhashcache-update-iword2priority-p))
+    ;; NOTE: 这个变量按理说应该在回调函数里面设置，但 async 在某些情况下会卡死，
+    ;; 这个变量无法设置为 t, 导致后续产生大量的 emacs 进程，极其影响性能。
+    (setq pyim-dhashcache-update-iword2priority-p t)
+    (async-start
+     `(lambda ()
+        ,@(pyim-dhashcache-async-inject-variables)
+        (require 'pyim-dhashcache)
+        (pyim-dhashcache-init-count-and-priority-variables)
+        (maphash
+         (lambda (key value)
+           (puthash key
+                    (pyim-dhashcache-calculate-priority
+                     (pyim-dhashcache-get-counts-from-log
+                      value))
+                    pyim-dhashcache-iword2priority))
+         pyim-dhashcache-iword2count-log)
+        (pyim-dcache-save-variable
+         'pyim-dhashcache-iword2priority
+         pyim-dhashcache-iword2priority)
+        nil)
+     (lambda (_)
+       (pyim-dcache-reload-variable pyim-dhashcache-iword2priority)))))
 
 (defun pyim-dhashcache-delete-word (word)
   "将中文词条 WORD 从个人词库中删除"
@@ -467,7 +616,9 @@ code 对应的中文词条了。
              (puthash key new-value pyim-dhashcache-ishortcode2word)
            (remhash key pyim-dhashcache-ishortcode2word)))))
    pyim-dhashcache-ishortcode2word)
-  (remhash word pyim-dhashcache-iword2count))
+  (remhash word pyim-dhashcache-iword2count)
+  (remhash word pyim-dhashcache-iword2count-log)
+  (remhash word pyim-dhashcache-iword2priority))
 
 (defun pyim-dhashcache-insert-word-into-icode2word (word code prepend)
   "将词条 WORD 插入到 icode2word 词库缓存 CODE 键对应的位置.
diff --git a/pyim-dregcache.el b/pyim-dregcache.el
index 4a7825163c..94c7764cdf 100644
--- a/pyim-dregcache.el
+++ b/pyim-dregcache.el
@@ -67,24 +67,15 @@
         (insert-file-contents file)
         (buffer-string)))))
 
-(defun pyim-dregcache-sort-words (words-list &optional iword2count 
count-weight-table)
-  "对 WORDS-LIST 排序，词频大的排在前面.
-
-如果 IWORD2COUNT 为 nil, 排序将使用 `pyim-dregcache-iword2count'
-中记录的词频信息
-
-COUNT-WEIGHT-TABLE 是一个哈希表，保存词条的 count 权重，在排序过
-程中， ‘count * 权重’ 的取值决定了排序先后顺序。"
-  (let ((iword2count (or iword2count pyim-dregcache-iword2count))
-        (count-weight-table (or count-weight-table (make-hash-table :test 
#'equal))))
+(defun pyim-dregcache-sort-words (words-list)
+  "对 WORDS-LIST 排序，词频大的排在前面."
+  (let ((iword2count pyim-dregcache-iword2count))
     (sort words-list
           (lambda (a b)
             (let ((a (car (split-string a ":")))
                   (b (car (split-string b ":"))))
-              (> (* (or (gethash a iword2count) 0)
-                    (or (gethash a count-weight-table) 1))
-                 (* (or (or (gethash b iword2count) 0)
-                        (or (gethash b count-weight-table) 1)))))))))
+              (> (or (gethash a iword2count) 0)
+                 (or (gethash b iword2count) 0)))))))
 
 (defun pyim-dregcache-sort-icode2word ()
   "对个人词库排序."
diff --git a/pyim-process.el b/pyim-process.el
index 5bbcd6f6bc..7eb1131441 100644
--- a/pyim-process.el
+++ b/pyim-process.el
@@ -575,7 +575,6 @@ BUG：拼音无法有效地处理多音字。"
            (codes (pyim-cstring-to-codes
                    word scheme-name
                    (or criteria pyim-cstring-to-code-criteria))))
-      (pyim-candidates-add-possible-chief word)
       ;; 保存对应词条的词频
       (when (> (length word) 0)
         (pyim-dcache-update-wordcount word (or wordcount-handler #'1+)))
diff --git a/tests/pyim-tests.el b/tests/pyim-tests.el
index 2bec8f6707..ece136030d 100644
--- a/tests/pyim-tests.el
+++ b/tests/pyim-tests.el
@@ -742,8 +742,8 @@ zuo-zuo-you-mang 作作有芒")
     (should (equal (gethash "啊啊" output2) nil))))
 
 (ert-deftest pyim-tests-pyim-dhashcache-update-shortcode2word ()
-  (let ((code2word (make-hash-table :test #'equal))
-        (iword2count (make-hash-table :test #'equal))
+  (let ((pyim-dhashcache-iword2count (make-hash-table :test #'equal))
+        (code2word (make-hash-table :test #'equal))
         (shortcode2word (make-hash-table :test #'equal))
         output)
 
@@ -755,7 +755,7 @@ zuo-zuo-you-mang 作作有芒")
     (puthash "wubi/aaae" '("𧝣") code2word)
 
     (setq shortcode2word
-          (pyim-dhashcache-update-shortcode2word-1 code2word iword2count))
+          (pyim-dhashcache-update-shortcode2word-1 code2word))
 
     (should (equal (gethash "wubi/aa" shortcode2word)
                    '(#("工" 0 1 (:comment "a"))
@@ -769,8 +769,8 @@ zuo-zuo-you-mang 作作有芒")
                      #("𧝣" 0 1 (:comment "e")))))))
 
 (ert-deftest pyim-tests-pyim-dhashcache-update-ishortcode2word ()
-  (let ((icode2word (make-hash-table :test #'equal))
-        (iword2count (make-hash-table :test #'equal))
+  (let ((pyim-dhashcache-iword2count (make-hash-table :test #'equal))
+        (icode2word (make-hash-table :test #'equal))
         ishortcode2word)
 
     (puthash "ni" '("你" "呢") icode2word)
@@ -778,8 +778,7 @@ zuo-zuo-you-mang 作作有芒")
     (puthash "ni-huai" '("你坏") icode2word)
 
     (setq ishortcode2word
-          (pyim-dhashcache-update-ishortcode2word-1
-           icode2word iword2count))
+          (pyim-dhashcache-update-ishortcode2word-1 icode2word))
 
     (should (equal (gethash "n-h" ishortcode2word)
                    '("你好" "呢耗" "你坏")))
@@ -860,27 +859,30 @@ yin-xing 因行
 
 (ert-deftest pyim-tests-pyim-dhashcache-sort-words ()
   (let ((pyim-dhashcache-iword2count (make-hash-table :test #'equal))
-        (weight-table (make-hash-table :test #'equal))
         words)
     (puthash "你好" 3 pyim-dhashcache-iword2count)
     (puthash "呢耗" 2 pyim-dhashcache-iword2count)
     (puthash "你豪" 1 pyim-dhashcache-iword2count)
 
-    (puthash "你好" 0.1 weight-table)
-    (puthash "呢耗" 0.3 weight-table)
-    (puthash "你豪" 5   weight-table)
-
     (setq words (list "呢耗" "你豪" "你好"))
     (should (equal (pyim-dhashcache-sort-words words)
-                   '("你好" "呢耗" "你豪")))
-
-    (setq words (list "呢耗" "你豪" "你好"))
-    (should (equal (pyim-dhashcache-sort-words words 
pyim-dhashcache-iword2count)
-                   '("你好" "呢耗" "你豪")))
-
-    (setq words (list "呢耗" "你豪" "你好"))
-    (should  (equal (pyim-dhashcache-sort-words words nil weight-table)
-                    '("你豪" "呢耗" "你好")))))
+                   '("你好" "呢耗" "你豪")))))
+
+(ert-deftest pyim-tests-pyim-dhashcache-get-counts-from-log ()
+  (should (member (pyim-dhashcache-get-counts-from-log
+                   '((day :20220107 10
+                          :20220106 6
+                          :20220104 3
+                          :20220103 3))
+                   ;; (date-to-time "2022-01-07")
+                   '(25047 4608))
+                  '(((day 6 0 3 3 0 0 0)) ;Fixme: In github-ci will result 
this value, why?
+                    ((day 10 6 0 3 3 0 0))))))
+
+(ert-deftest pyim-tests-pyim-dhashcache-calculate-priority ()
+  (should (equal (pyim-dhashcache-calculate-priority
+                  '((day 3 7 6 4 5 9 1)))
+                 0.690833)))
 
 ;; ** pyim-dregcache 相关单元测试
 (ert-deftest pyim-tests-pyim-general ()
[Prev in Thread]
Current Thread
[Next in Thread]
[elpa] externals/pyim updated (19a18b414b -> 7e8937010c), ELPA Syncer, 2022/01/06
- [elpa] externals/pyim 0b73690671 2/4: 优化 pyim-candidates-get-chief 的速度。, ELPA Syncer, 2022/01/06
- [elpa] externals/pyim 1c864717d5 3/4: Add pyim-dhashcache-iword2priority., ELPA Syncer, 2022/01/06
- [elpa] externals/pyim 7e8937010c 4/4: Merge pull request #432 from tumashu/count-log, ELPA Syncer <=
- [elpa] externals/pyim a7f178a736 1/4: 添加每日词频日志相关功能。, ELPA Syncer, 2022/01/06
Prev by Date: [elpa] externals/pyim 1c864717d5 3/4: Add pyim-dhashcache-iword2priority.
Next by Date: [elpa] externals/pyim a7f178a736 1/4: 添加每日词频日志相关功能。
Previous by thread: [elpa] externals/pyim 1c864717d5 3/4: Add pyim-dhashcache-iword2priority.
Next by thread: [elpa] externals/pyim a7f178a736 1/4: 添加每日词频日志相关功能。
Index(es):
- Date
- Thread