gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gnunet-scheme] 25/49: web/form: New module.


From: gnunet
Subject: [gnunet-scheme] 25/49: web/form: New module.
Date: Sat, 25 Dec 2021 23:00:02 +0100

This is an automated email from the git hooks/post-receive script.

maxime-devos pushed a commit to branch master
in repository gnunet-scheme.

commit 2e0d4723264bcf620742316ddaa85e0b0feef61d
Author: Maxime Devos <maximedevos@telenet.be>
AuthorDate: Sun Sep 26 21:50:42 2021 +0200

    web/form: New module.
    
    * web/form.scm: New module.
    * tests/form.scm: New tests.
    * Makefile.am
      (modules): Add the new module.
      (SCM_TESTS): Add the corresponding tests.
---
 Makefile.am    |   3 ++
 tests/form.scm |  93 +++++++++++++++++++++++++++++++++++++++++++++
 web/form.scm   | 118 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 214 insertions(+)

diff --git a/Makefile.am b/Makefile.am
index 770ab43..9b4b051 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -29,6 +29,8 @@ SUFFIXES = .scm .go
 
 # Scheme code that works
 modules = \
+  web/form.scm \
+  \
   gnu/extractor/enum.scm \
   \
   gnu/gnunet/scripts/download-store.scm \
@@ -173,6 +175,7 @@ SCM_TESTS = \
   tests/config-expand.scm \
   tests/config-db.scm \
   tests/config-fs.scm \
+  tests/form.scm \
   tests/netstruct.scm \
   tests/time.scm \
   tests/tokeniser.scm
diff --git a/tests/form.scm b/tests/form.scm
new file mode 100644
index 0000000..7edd7e2
--- /dev/null
+++ b/tests/form.scm
@@ -0,0 +1,93 @@
+;; This file is part of scheme-GNUnet. -*- coding: utf-8 -*-
+;; Copyright (C) 2021 GNUnet e.V.
+;;
+;; scheme-GNUnet is free software: you can redistribute it and/or modify it
+;; under the terms of the GNU Affero General Public License as published
+;; by the Free Software Foundation, either version 3 of the License,
+;; or (at your option) any later version.
+;;
+;; scheme-GNUnet is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; Affero General Public License for more details.
+;;
+;; You should have received a copy of the GNU Affero General Public License
+;; along with this program.  If not, see <http://www.gnu.org/licenses/>.
+;;
+;; SPDX-License-Identifier: AGPL3.0-or-later
+
+(use-modules (web form)
+            (rnrs bytevectors)
+            (srfi srfi-64))
+
+(define (urlencoded-string->alist string)
+  (urlencoded->alist (string->utf8 string)))
+
+(define-syntax-rule (test-decode name from to)
+  (test-equal name (list to) (list (urlencoded-string->alist from))))
+
+(test-begin "w-www-form-urlencoded")
+
+(test-decode "empty list" "" '())
+(test-decode "one field" "x=y" '(("x" . "y")))
+(test-decode "two fields" "x=y&z=w" '(("x" . "y") ("z" . "w")))
+(test-decode "spaces" "x+x+x=z+z+z" '(("x x x" . "z z z")))
+(test-decode "forgot to encode spaces" "x x x=z z z" #f)
+(test-decode "%-encoding" "x%01%02=x%03z" '(("x\x01\x02" . "x\x03z")))
+(test-decode "%-encoding (NULL)" "%00x%01%02=x%03z" '(("\x00x\x01\x02" . 
"x\x03z")))
+(test-decode "= in keys and values" "x%3Dz=0%3D1" '(("x=z" . "0=1")))
+
+(test-decode "zero-length values" "x=&y=" '(("x" . "") ("y" . "")))
+(test-decode "zero-length keys" "=z" '(("" . "z")))
+
+;; IceCat 78.14.0 (a Firefox derivative) doesn't encode - and _, even though 
they should
+;; be according to RFC 1866.
+(test-decode "Firefox compatibility" "x-yz_w=0-12_3" '(("x-yz_w" . "0-12_3")))
+(test-decode "Correct %-encoding of - and _" "%5F=%2D" '(("_" . "-")))
+
+;; The specification uses uppercase letters.
+(test-decode "no lowercase % (0)" "%aA=0" #false)
+(test-decode "no lowercase % (1)" "%Aa=0" #false)
+
+(test-decode "no %-encoding of A" "%41=0" #false)
+(test-decode "no %-encoding of Z" "%5A=0" #false)
+(test-decode "no %-encoding of a" "%61=0" #false)
+(test-decode "no %-encoding of z" "%7A=0" #false)
+(test-decode "no %-encoding of 0" "%30=0" #false)
+(test-decode "no %-encoding of 9" "%39=0" #false)
+
+;; While it might not be advisable, RFC 1866 does not forbid duplicate
+;; field names.
+(test-decode "duplicate field names" "field=value&field=value2"
+            '(("field" . "value") ("field" . "value2")))
+
+(test-decode "leading &" "&oop=s" #false)
+(test-decode "trailing &" "oop=s&" #false)
+(test-decode "duplicated &" "o=o&&p=s" #false)
+(test-decode "duplicated =" "oo==ps" #false)
+(test-decode "too many =" "o=o=ps" #false)
+
+;; RFC 1866 doesn't specify any character encoding, so assume UTF-8.
+(define unicode-input "%C3%A9=%F0%9F%AA%82")
+(define unicode-output '(("é" . "🪂")))
+(test-decode "non-ASCII" unicode-input unicode-output)
+(test-decode "bogus UTF-8" "%ED%9F%C0=z" #f)
+
+(define (test-decode-with-encoding encoding)
+  (parameterize (((fluid->parameter %default-port-encoding) encoding))
+    (test-decode (string-append "non-ASCII, with " encoding
+                               " default port encoding")
+                unicode-input unicode-output)))
+
+;; 'unescape' calls 'call-with-output-bytevector' without explicitely setting
+;; the port encoding appropriately
+(test-decode-with-encoding "UTF-8")
+(test-decode-with-encoding "ISO-88519") ; doesn't support Unicode
+(test-decode-with-encoding "UTF-16") ; two to four bytes per character
+(test-decode-with-encoding "EBCDIC") ; non-ASCII compatible, doesn't support 
Unicode
+
+(test-decode "non-ASCII input" "é=é" #f)
+(test-assert "bogus UTF-8 (before decoding)"
+  (not (urlencoded->alist #vu8(237 159 192 61 49))))
+
+(test-end "w-www-form-urlencoded")
diff --git a/web/form.scm b/web/form.scm
new file mode 100644
index 0000000..1a191ae
--- /dev/null
+++ b/web/form.scm
@@ -0,0 +1,118 @@
+;; This file is part of scheme-GNUnet
+;; Copyright (C) 2021 GNUnet e.V.
+;;
+;; scheme-GNUnet is free software: you can redistribute it and/or modify it
+;; under the terms of the GNU Affero General Public License as published
+;; by the Free Software Foundation, either version 3 of the License,
+;; or (at your option) any later version.
+;;
+;; GNUnet is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; Affero General Public License for more details.
+;;
+;; You should have received a copy of the GNU Affero General Public License
+;; along with this program.  If not, see <http://www.gnu.org/licenses/>.
+;;
+;; SPDX-License-Identifier: AGPL-3.0-or-later
+
+;; TODO: look into integrating this into Guile proper.
+(define-module (web form)
+  #:use-module (rnrs bytevectors)
+  #:use-module (ice-9 match)
+  #:use-module (ice-9 binary-ports)
+  #:use-module (ice-9 textual-ports)
+  #:use-module (ice-9 control)
+  #:use-module (ice-9 string-fun)
+  #:export (urlencoded->alist))
+
+
+;; application/x-www-form-urlencoded, documented in 8.2.1.
+;; of RFC 1866
+
+;; 8.2.1 ‘[...] space characters are replaced by #\+ [...]’
+;;
+;; Presumably only #\  is meant here and not the non-breaking space (NBSP),
+;; otherwise NBSP could not be distinguished from the regular space character
+;; #\ .
+;;
+;; 8.2.1 ‘[...] [non-alphanumeric] characters are replaced by %HH [...]’.
+;;
+;; Presumably with ‘non-alphanumeric’, ‘non-alphanumeric or non-ASCII’
+;; is meant here, otherwise the validity of application/x-www-form-urlencoded
+;; data could depend on the Unicode standard used.
+;;
+;; In practice, Firefox doesn't escape - and _, so include those as well
+;; for compatibility.
+
+;; TODO: isn't a-zA-Z0-9 problematic under some locales?
+(define encoded-pat "^(\\+|[a-zA-Z0-9_-]|%[0-9A-F][0-9A-F])*$")
+(define encoded-regex (make-regexp encoded-pat))
+
+(define (try-utf8->string bv)
+  "Like utf8->string, but return #false instead of raising an error if
+@var{bv} is not valid UTF-8."
+  (catch 'decoding-error
+    ;; RFC 1866 doesn't specify the character encoding, so assume UTF-8.
+    (lambda () (utf8->string bv))
+    (lambda _ #false)))
+
+(define (urlencoded-string->alist string)
+  (let/ec return
+    (let ()
+      (define (oops)
+       (return #false))
+      (when (string-null? string)
+       (return '()))
+      (define fields (string-split string #\&))
+      (define (unescape string)
+       ;; Validate the syntax of STRING ...
+       (unless (regexp-exec encoded-regex string)
+         (oops))
+       ;; ... replace #\+ with #\  ...
+       (define string-with-space (string-replace-substring string "+" " "))
+       (define bv
+         (call-with-output-bytevector
+          (lambda (port)
+            ;; ... and undo % escapes.
+            (define (search remainder)
+              (define next-% (string-index remainder #\%))
+              (if next-%
+                  (begin
+                    (put-string port (substring remainder 0 next-%))
+                    (undo-% (substring remainder next-%)))
+                  (put-string port remainder)))
+            (define (undo-% remainder)
+              (define octet
+                (string->number (substring remainder 1 3) 16))
+              ;; 8.2.1 ‘[...] [non-alphanumeric] characters are replaced by
+              ;; %HH [...]’.
+              ;;
+              ;; The syntax of application/x-www-form-urlencoded is given in
+              ;; terms of how to encode the fields, and alphanumeric characters
+              ;; are not included there, thus alphanumeric characters are
+              ;; forbidden.
+              (when (or (<= (char->integer #\a) octet (char->integer #\z))
+                        (<= (char->integer #\A) octet (char->integer #\Z))
+                        (<= (char->integer #\0) octet (char->integer #\9)))
+                (oops))
+              (put-u8 port octet)
+              (search (substring remainder 3)))
+            (search string-with-space))))
+       ;; RFC 1866 doesn't specify the character encoding, so assume UTF-8.
+       ;; The resulting bytevector could be bogus UTF-8, so catch
+       ;; 'decoding-error'.
+       (or (try-utf8->string bv)
+           (oops)))
+      (define (decode-field field)
+       (match (string-split field #\=)
+         ((escaped-field-name escaped-field-value)
+          (cons (unescape escaped-field-name) (unescape escaped-field-value)))
+         (_ (oops))))
+      (map decode-field fields))))
+
+(define (urlencoded->alist body)
+  "Decode body, a bytevector holding a application/x-www-form-urlencoded,
+to an association list of string-valued key-value pairs.  Return #false
+if the bytevector could not be parsed."
+  (and=> (try-utf8->string body) urlencoded-string->alist))

-- 
To stop receiving notification emails like this one, please contact
gnunet@gnunet.org.



reply via email to

[Prev in Thread] Current Thread [Next in Thread]