emacs-orgmode
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH] Re: Bug: Plain https links with brackets are not recognised


From: Ihor Radchenko
Subject: Re: [PATCH] Re: Bug: Plain https links with brackets are not recognised [9.4.4 (release_9.4.4-625-g763c7a @ /home/yantar92/.emacs.d/straight/build/org/)]
Date: Tue, 16 Mar 2021 20:35:16 +0800

Maxim Nikulin <manikulin@gmail.com> writes:

> I have tried to compare current, your, and a little modified your 
> regexps on several synthetic examples:

Thanks! Your version is indeed cleaner. I updated the patch. It uses
your version now with one group specification that I missed earlier. I
also added tests from your email, the post introducing regexp, and few
more examples I came up with.

I am testing the new regexp for a few days now. Because the regexp is
quite complex and because font-lock apparently fontifies even invisible
(folded) text, loading time on large org files with many links became
noticeably longer. Though it was 7.2Mb file with ~13k links in it.

Best,
Ihor

>From 6eb2208a67745e3150024e7b72509115b97fcfa3 Mon Sep 17 00:00:00 2001
From: Ihor Radchenko <yantar92@gmail.com>
Date: Tue, 16 Mar 2021 20:20:32 +0800
Subject: [PATCH] Improve org-link-plain-re

(org-link-plain-re): Update docstring.  Now, the docstring explicitly
mentions that the regexp must contain groups for the link type and the
path.

* lisp/ol.el (org-link-make-regexps): Allow URLs with up to two levels
of nested brackets.  Now, URLs like [1] can be matched.  The new
regexp is based on [2].

[1] https://doi.org/10.1016/0160-791x(79)90023-x
[2] https://daringfireball.net/2010/07/improved_regex_for_matching_urls

* testing/lisp/test-ol.el: Add tests for plain links.
---
 lisp/ol.el              |  61 +++++++++++++------
 testing/lisp/test-ol.el | 132 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 173 insertions(+), 20 deletions(-)

diff --git a/lisp/ol.el b/lisp/ol.el
index b8bd7d234..d3a07a3ed 100644
--- a/lisp/ol.el
+++ b/lisp/ol.el
@@ -519,7 +519,10 @@ links more efficient."
   "Matches link with angular brackets, spaces are allowed.")
 
 (defvar org-link-plain-re nil
-  "Matches plain link, without spaces.")
+  "Matches plain link, without spaces.
+Group 1 must contain the link type (i.e. https).
+Group 2 must contain the link path (i.e. //example.com).
+Used by `org-element-link-parser'.")
 
 (defvar org-link-bracket-re nil
   "Matches a link in double brackets.")
@@ -807,26 +810,44 @@ This should be called after the variable 
`org-link-parameters' has changed."
          (format "<%s:\\([^>\n]*\\(?:\n[ \t]*[^> \t\n][^>\n]*\\)*\\)>"
                  types-re)
          org-link-plain-re
-         (concat
-          "\\<" types-re ":"
-          "\\([^][ \t\n()<>]+\\(?:([[:word:]0-9_]+)\\|\\([^[:punct:] 
\t\n]\\|/\\)\\)\\)")
-         ;;     "\\([^]\t\n\r<>() ]+[^]\t\n\r<>,.;() ]\\)")
-         org-link-bracket-re
-         (rx (seq "[["
-                  ;; URI part: match group 1.
-                  (group
-                   (one-or-more
+          (let* ((non-space-bracket "[^][ \t\n()<>]")
+                (parenthesis
+                 `(seq "("
+                       (0+ (or (regex ,non-space-bracket)
+                               (seq "("
+                                    (0+ (regex ,non-space-bracket))
+                                    ")")))
+                       ")")))
+           ;; Heuristics for an URL link inspired by
+           ;; 
https://daringfireball.net/2010/07/improved_regex_for_matching_urls
+           (rx-to-string
+            `(seq word-start
+                   ;; Link type: match group 1.
+                  (regexp ,types-re)
+                  ":"
+                   ;; Link path: match group 2.
+                   (group
+                   (1+ (or (regex ,non-space-bracket)
+                           ,parenthesis))
+                   (or (regexp "[^[:punct:] \t\n]")
+                       ?/
+                       ,parenthesis)))))
+          org-link-bracket-re
+          (rx (seq "[["
+                  ;; URI part: match group 1.
+                  (group
+                   (one-or-more
                      (or (not (any "[]\\"))
-                        (and "\\" (zero-or-more "\\\\") (any "[]"))
-                        (and (one-or-more "\\") (not (any "[]"))))))
-                  "]"
-                  ;; Description (optional): match group 2.
-                  (opt "[" (group (+? anything)) "]")
-                  "]"))
-         org-link-any-re
-         (concat "\\(" org-link-bracket-re "\\)\\|\\("
-                 org-link-angle-re "\\)\\|\\("
-                 org-link-plain-re "\\)"))))
+                        (and "\\" (zero-or-more "\\\\") (any "[]"))
+                        (and (one-or-more "\\") (not (any "[]"))))))
+                  "]"
+                  ;; Description (optional): match group 2.
+                  (opt "[" (group (+? anything)) "]")
+                  "]"))
+          org-link-any-re
+          (concat "\\(" org-link-bracket-re "\\)\\|\\("
+                 org-link-angle-re "\\)\\|\\("
+                 org-link-plain-re "\\)"))))
 
 (defun org-link-complete-file (&optional arg)
   "Create a file link using completion."
diff --git a/testing/lisp/test-ol.el b/testing/lisp/test-ol.el
index 5b7dc513b..e6208cd38 100644
--- a/testing/lisp/test-ol.el
+++ b/testing/lisp/test-ol.el
@@ -491,5 +491,137 @@
              (org-previous-link))
            (buffer-substring (point) (line-end-position))))))
 
+
+;;; Link regexps
+
+(ert-deftest test-ol/plain-link-re ()
+  "Test `org-link-plain-re'."
+  (should
+   (equal
+    '("https" "//example.com/qwe()")
+    (org-test-with-temp-text
+        "(Some text in parenthesis followed by link with brackets 
<point>https://example.com/qwe())"
+      (list (org-element-property :type (org-element-link-parser))
+            (org-element-property :path (org-element-link-parser))))))
+  (should
+   (equal
+    '("https" "//doi.org/10.1016/0160-791x(79)90023-x")
+    (org-test-with-temp-text
+        "<point>https://doi.org/10.1016/0160-791x(79)90023-x"
+      (list (org-element-property :type (org-element-link-parser))
+            (org-element-property :path (org-element-link-parser))))))
+  (should
+   (equal
+    '("file" "aa")
+    (org-test-with-temp-text
+        "The <point>file:aa link"
+      (list (org-element-property :type (org-element-link-parser))
+            (org-element-property :path (org-element-link-parser))))))
+  (should
+   (equal
+    '("file" "a(b)c")
+    (org-test-with-temp-text
+        "The <point>file:a(b)c link"
+      (list (org-element-property :type (org-element-link-parser))
+            (org-element-property :path (org-element-link-parser))))))
+  (should
+   (equal
+    '("file" "a()")
+    (org-test-with-temp-text
+        "The <point>file:a() link"
+      (list (org-element-property :type (org-element-link-parser))
+            (org-element-property :path (org-element-link-parser))))))
+  (should
+   (equal
+    '("file" "aa((a))")
+    (org-test-with-temp-text
+        "The <point>file:aa((a)) link"
+      (list (org-element-property :type (org-element-link-parser))
+            (org-element-property :path (org-element-link-parser))))))
+  (should
+   (equal
+    '("file" "aa(())")
+    (org-test-with-temp-text
+        "The <point>file:aa(()) link"
+      (list (org-element-property :type (org-element-link-parser))
+            (org-element-property :path (org-element-link-parser))))))
+  (should
+   (equal
+    '("file" "/a")
+    (org-test-with-temp-text
+        "The <point>file:/a link"
+      (list (org-element-property :type (org-element-link-parser))
+            (org-element-property :path (org-element-link-parser))))))
+  (should
+   (equal
+    '("file" "/a/")
+    (org-test-with-temp-text
+        "The <point>file:/a/ link"
+      (list (org-element-property :type (org-element-link-parser))
+            (org-element-property :path (org-element-link-parser))))))
+  (should
+   (equal
+    '("http" "//")
+    (org-test-with-temp-text
+        "The <point>http:// link"
+      (list (org-element-property :type (org-element-link-parser))
+            (org-element-property :path (org-element-link-parser))))))
+  (should
+   (equal
+    '("file" "ab")
+    (org-test-with-temp-text
+        "The (some <point>file:ab) link"
+      (list (org-element-property :type (org-element-link-parser))
+            (org-element-property :path (org-element-link-parser))))))
+  (should
+   (equal
+    '("file" "aa")
+    (org-test-with-temp-text
+        "The <point>file:aa) link"
+      (list (org-element-property :type (org-element-link-parser))
+            (org-element-property :path (org-element-link-parser))))))
+  (should
+   (equal
+    '("file" "aa")
+    (org-test-with-temp-text
+        "The <point>file:aa( link"
+      (list (org-element-property :type (org-element-link-parser))
+            (org-element-property :path (org-element-link-parser))))))
+  (should
+   (equal
+    '("http" "//foo.com/more_(than)_one_(parens)")
+    (org-test-with-temp-text
+        "The <point>http://foo.com/more_(than)_one_(parens) link"
+      (list (org-element-property :type (org-element-link-parser))
+            (org-element-property :path (org-element-link-parser))))))
+  (should
+   (equal
+    '("http" "//foo.com/blah_(wikipedia)#cite-1")
+    (org-test-with-temp-text
+        "The <point>http://foo.com/blah_(wikipedia)#cite-1 link"
+      (list (org-element-property :type (org-element-link-parser))
+            (org-element-property :path (org-element-link-parser))))))
+  (should
+   (equal
+    '("http" "//foo.com/blah_(wikipedia)_blah#cite-1")
+    (org-test-with-temp-text
+        "The <point>http://foo.com/blah_(wikipedia)_blah#cite-1 link"
+      (list (org-element-property :type (org-element-link-parser))
+            (org-element-property :path (org-element-link-parser))))))
+  (should
+   (equal
+    '("http" "//foo.com/unicode_(✪)_in_parens")
+    (org-test-with-temp-text
+        "The <point>http://foo.com/unicode_(✪)_in_parens link"
+      (list (org-element-property :type (org-element-link-parser))
+            (org-element-property :path (org-element-link-parser))))))
+  (should
+   (equal
+    '("http" "//foo.com/(something)?after=parens")
+    (org-test-with-temp-text
+        "The <point>http://foo.com/(something)?after=parens link"
+      (list (org-element-property :type (org-element-link-parser))
+            (org-element-property :path (org-element-link-parser)))))))
+
 (provide 'test-ol)
 ;;; test-ol.el ends here
-- 
2.26.2


reply via email to

[Prev in Thread] Current Thread [Next in Thread]