From 6c630bd5b001243d6b7115380088909a7a180ddb Mon Sep 17 00:00:00 2001 From: Philipp Stephani Date: Mon, 24 Oct 2016 21:54:51 +0200 Subject: [PATCH] Fix encoding of JSON surrogate pairs JSON requires that such pairs be treated as UTF-16 surrogate pairs, not individual code points; cf. Bug #24784. * lisp/json.el (json-read-escaped-char): Fix decoding of surrogate pairs. (json--decode-utf-16-surrogates): New defsubst. * test/lisp/json-tests.el (test-json-read-string): Add test for surrogate pairs. (test-json-encode-string): Add test for non-BMP character encoding. --- lisp/json.el | 13 +++++++++++++ test/lisp/json-tests.el | 7 +++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/lisp/json.el b/lisp/json.el index fdac8d9..5bfdfd4 100644 --- a/lisp/json.el +++ b/lisp/json.el @@ -363,6 +363,10 @@ json-special-chars ;; String parsing +(defsubst json--decode-utf-16-surrogates (high low) + "Return the code point represented by the UTF-16 surrogates HIGH and LOW." + (+ (lsh (- high #xD800) 10) (- low #xDC00) #x10000)) + (defun json-read-escaped-char () "Read the JSON string escaped character at point." ;; Skip over the '\' @@ -372,6 +376,15 @@ json-read-escaped-char (cond (special (cdr special)) ((not (eq char ?u)) char) + ;; Special-case UTF-16 surrogate pairs, + ;; cf. https://tools.ietf.org/html/rfc7159#section-7 + ((looking-at + (rx (group (any "Dd") (any "89ABab") (= 2 (any "0-9A-Fa-f"))) + "\\u" (group (any "Dd") (any "C-Fc-f") (= 2 (any "0-9A-Fa-f"))))) + (json-advance 10) + (json--decode-utf-16-surrogates + (string-to-number (match-string 1) 16) + (string-to-number (match-string 2) 16))) ((looking-at "[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f]") (let ((hex (match-string 0))) (json-advance 4) diff --git a/test/lisp/json-tests.el b/test/lisp/json-tests.el index 78cebb4..8958000 100644 --- a/test/lisp/json-tests.el +++ b/test/lisp/json-tests.el @@ -167,14 +167,17 @@ json-tests--with-temp-buffer (should (equal (json-read-string) "abcαβγ"))) (json-tests--with-temp-buffer "\"\\nasd\\u0444\\u044b\\u0432fgh\\t\"" (should (equal (json-read-string) "\nasdфывfgh\t"))) + ;; Bug#24784 + (json-tests--with-temp-buffer "\"\\uD834\\uDD1E\"" + (should (equal (json-read-string) "\U0001D11E"))) (json-tests--with-temp-buffer "foo" (should-error (json-read-string) :type 'json-string-format))) (ert-deftest test-json-encode-string () (should (equal (json-encode-string "foo") "\"foo\"")) (should (equal (json-encode-string "a\n\fb") "\"a\\n\\fb\"")) - (should (equal (json-encode-string "\nasdфыв\u001f\u007ffgh\t") - "\"\\nasdфыв\\u001f\u007ffgh\\t\""))) + (should (equal (json-encode-string "\nasdфыв𝄞\u001f\u007ffgh\t") + "\"\\nasdфыв𝄞\\u001f\u007ffgh\\t\""))) (ert-deftest test-json-encode-key () (should (equal (json-encode-key "foo") "\"foo\"")) -- 2.10.1