emacs-orgmode
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Orgmode] [PATCH 13/16] Refactor unescaping functions


From: David Maus
Subject: [Orgmode] [PATCH 13/16] Refactor unescaping functions
Date: Sun, 13 Feb 2011 13:01:15 +0100

* org.el (org-link-unescape): Simpler algorithm for replacing percent
escapes.
(org-link-unescape-compound): Use cond statements instead of nested
if, convert hex string with string-to-number, save match data.
(org-link-unescape-single-byte-sequence): Use mapconcat and
string-to-number for unescaping single byte sequence.
---
 lisp/org.el |  102 ++++++++++++++++++++++------------------------------------
 1 files changed, 39 insertions(+), 63 deletions(-)

diff --git a/lisp/org.el b/lisp/org.el
index fcd421f..f35f898 100644
--- a/lisp/org.el
+++ b/lisp/org.el
@@ -8584,77 +8584,53 @@ If optional argument MERGE is set, merge TABLE into
 (defun org-link-unescape (str)
   "Unhex hexified unicode strings as returned from the JavaScript function
 encodeURIComponent. E.g. `%C3%B6' is the german Umlaut `ö'."
-  (setq str (or str ""))
-  (let ((tmp "")
-       (case-fold-search t))
-    (while (string-match "\\(%[0-9a-f][0-9a-f]\\)+" str)
-      (let* ((start (match-beginning 0))
-            (end (match-end 0))
-            (hex (match-string 0 str))
-            (replacement (org-link-unescape-compound (upcase hex))))
-       (setq tmp (concat tmp (substring str 0 start) replacement))
-       (setq str (substring str end))))
-    (setq tmp (concat tmp str))
-    tmp))
+  (unless (and (null str) (string= "" str))
+    (let ((pos 0) (case-fold-search t) unhexed)
+      (while (setq pos (string-match "\\(%[0-9a-f][0-9a-f]\\)+" str pos))
+       (setq unhexed (org-link-unescape-compound (match-string 0 str)))
+       (setq str (replace-match unhexed t t str))
+       (setq pos (+ pos (length unhexed))))))
+  str)
 
 (defun org-link-unescape-compound (hex)
   "Unhexify unicode hex-chars. E.g. `%C3%B6' is the German Umlaut `ö'.
 Note: this function also decodes single byte encodings like
 `%E1' (\"á\") if not followed by another `%[A-F0-9]{2}' group."
-  (let* ((bytes (remove "" (split-string hex "%")))
-        (ret "")
-        (eat 0)
-        (sum 0))
-    (while bytes
-      (let* ((b (pop bytes))
-            (a (elt b 0))
-            (b (elt b 1))
-            (c1 (if (> a ?9) (+ 10 (- a ?A)) (- a ?0)))
-            (c2 (if (> b ?9) (+ 10 (- b ?A)) (- b ?0)))
-            (val (+ (lsh c1 4) c2))
-            (shift
-             (if (= 0 eat) ;; new byte
-                 (if (>= val 252) 6
-                   (if (>= val 248) 5
-                     (if (>= val 240) 4
-                       (if (>= val 224) 3
-                         (if (>= val 192) 2 0)))))
-               6))
-            (xor
-             (if (= 0 eat) ;; new byte
-                 (if (>= val 252) 252
-                   (if (>= val 248) 248
-                     (if (>= val 240) 240
-                       (if (>= val 224) 224
-                         (if (>= val 192) 192 0)))))
-               128)))
-       (if (>= val 192) (setq eat shift))
-       (setq val (logxor val xor))
-       (setq sum (+ (lsh sum shift) val))
-       (if (> eat 0) (setq eat (- eat 1)))
-       (cond
-        ((= 0 eat)                         ;multi byte
-         (setq ret (concat ret (org-char-to-string sum)))
-         (setq sum 0))
-        ((not bytes)                       ; single byte(s)
-         (setq ret (org-link-unescape-single-byte-sequence hex))))
-       )) ;; end (while bytes
-    ret ))
+  (save-match-data
+    (let* ((bytes (cdr (split-string hex "%")))
+          (ret "")
+          (eat 0)
+          (sum 0))
+      (while bytes
+       (let* ((val (string-to-number (pop bytes) 16))
+              (shift-xor
+               (if (= 0 eat)
+                   (cond
+                    ((>= val 252) (cons 6 252))
+                    ((>= val 248) (cons 5 248))
+                    ((>= val 240) (cons 4 240))
+                    ((>= val 224) (cons 3 224))
+                    ((>= val 192) (cons 2 192))
+                    (t (cons 0 0)))
+                 (cons 6 128))))
+         (if (>= val 192) (setq eat (car shift-xor)))
+         (setq val (logxor val (cdr shift-xor)))
+         (setq sum (+ (lsh sum (car shift-xor)) val))
+         (if (> eat 0) (setq eat (- eat 1)))
+         (cond
+          ((= 0 eat)                   ;multi byte
+           (setq ret (concat ret (org-char-to-string sum)))
+           (setq sum 0))
+          ((not bytes)                 ; single byte(s)
+           (setq ret (org-link-unescape-single-byte-sequence hex))))
+         )) ;; end (while bytes
+      ret )))
 
 (defun org-link-unescape-single-byte-sequence (hex)
   "Unhexify hex-encoded single byte character sequences."
-  (let ((bytes (remove "" (split-string hex "%")))
-       (ret ""))
-    (while bytes
-      (let* ((b (pop bytes))
-            (a (elt b 0))
-            (b (elt b 1))
-            (c1 (if (> a ?9) (+ 10 (- a ?A)) (- a ?0)))
-            (c2 (if (> b ?9) (+ 10 (- b ?A)) (- b ?0))))
-       (setq ret
-             (concat ret (char-to-string
-                          (+ (lsh c1 4) c2))))))
-    ret))
+  (mapconcat (lambda (byte)
+              (char-to-string (string-to-number byte 16)))
+            (cdr (split-string hex "%")) ""))
 
 (defun org-xor (a b)
   "Exclusive or."
-- 
1.7.2.3




reply via email to

[Prev in Thread] Current Thread [Next in Thread]