From 6a4de050d3d9407ca0b3de48e4fb4a6a2b3c2eb1 Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Sun, 5 Jun 2022 23:54:11 -0400 Subject: [PATCH 2/2] bindat (str, strz): Convert to unibyte when packing * lisp/emacs-lisp/bindat.el (str) (strz): Allow callers to pack a multibyte string if it only contains ASCII and `eight-bit' characters. * doc/lispref/processes.texi (Bindat Types): Update documentation. * test/lisp/emacs-lisp/bindat-tests.el (str) (strz): Update tests. --- doc/lispref/processes.texi | 14 ++++++++++---- lisp/emacs-lisp/bindat.el | 14 ++++++-------- test/lisp/emacs-lisp/bindat-tests.el | 10 ++++++---- 3 files changed, 22 insertions(+), 16 deletions(-) diff --git a/doc/lispref/processes.texi b/doc/lispref/processes.texi index 55fb93ec5a..fbf285c1cc 100644 --- a/doc/lispref/processes.texi +++ b/doc/lispref/processes.texi @@ -3484,8 +3484,11 @@ Bindat Types to the packed output. If the input string is shorter than @var{len}, the remaining bytes will be null (zero) unless a pre-allocated string was provided to @code{bindat-pack}, in which case the remaining bytes -are left unmodified. When unpacking, any null bytes in the packed -input string will appear in the unpacked output. +are left unmodified. If the input string is multibyte with only ASCII +and @code{eight-bit} characters, it is converted to unibyte before it +is packed; other multibyte strings signal an error. When unpacking, +any null bytes in the packed input string will appear in the unpacked +output. @item strz &optional @var{len} If @var{len} is not provided: Variable-length null-terminated unibyte @@ -3495,8 +3498,11 @@ Bindat Types @code{bindat-pack}, in which case that byte is left unmodified. The length of the packed output is the length of the input string plus one (for the null terminator). The input string must not contain any null -bytes. When unpacking, the resulting string contains all bytes up to -(but excluding) the null byte. +bytes. If the input string is multibyte with only ASCII and +@code{eight-bit} characters, it is converted to unibyte before it is +packed; other multibyte strings signal an error. When unpacking, the +resulting string contains all bytes up to (but excluding) the null +byte. @quotation Caution If a pre-allocated string is provided to @code{bindat-pack}, the diff --git a/lisp/emacs-lisp/bindat.el b/lisp/emacs-lisp/bindat.el index 9ac24fa008..04ad09abc1 100644 --- a/lisp/emacs-lisp/bindat.el +++ b/lisp/emacs-lisp/bindat.el @@ -435,16 +435,14 @@ bindat--pack-u64r (bindat--pack-u32r (ash v -32))) (defun bindat--pack-str (len v) - (if (multibyte-string-p v) - (signal 'wrong-type-argument `(multibyte-string-p ,v))) - (dotimes (i (min len (length v))) - (aset bindat-raw (+ bindat-idx i) (aref v i))) - (setq bindat-idx (+ bindat-idx len))) + (let ((v (string-to-unibyte v))) + (dotimes (i (min len (length v))) + (aset bindat-raw (+ bindat-idx i) (aref v i))) + (setq bindat-idx (+ bindat-idx len)))) (defun bindat--pack-strz (v) - (if (multibyte-string-p v) - (signal 'wrong-type-argument `(multibyte-string-p ,v))) - (let ((len (length v))) + (let* ((v (string-to-unibyte v)) + (len (length v))) (dotimes (i len) (aset bindat-raw (+ bindat-idx i) (aref v i))) (setq bindat-idx (+ bindat-idx len 1)))) diff --git a/test/lisp/emacs-lisp/bindat-tests.el b/test/lisp/emacs-lisp/bindat-tests.el index da688d1e82..d33f1c01a2 100644 --- a/test/lisp/emacs-lisp/bindat-tests.el +++ b/test/lisp/emacs-lisp/bindat-tests.el @@ -193,13 +193,15 @@ bindat-test--str-strz-multibyte (dolist (spec (list (bindat-type str 2) (bindat-type strz 2) (bindat-type strz))) - (should-error (bindat-pack spec (string-to-multibyte "x"))) - (should-error (bindat-pack spec (string-to-multibyte "\xff"))) + (should (equal (bindat-pack spec (string-to-multibyte "x")) "x\0")) + (should (equal (bindat-pack spec (string-to-multibyte "\xff")) "\xff\0")) (should-error (bindat-pack spec "💩")) (should-error (bindat-pack spec "\N{U+ff}"))) (dolist (spec (list '((x str 2)) '((x strz 2)))) - (should-error (bindat-pack spec `((x . ,(string-to-multibyte "x"))))) - (should-error (bindat-pack spec `((x . ,(string-to-multibyte "\xff"))))) + (should (equal (bindat-pack spec `((x . ,(string-to-multibyte "x")))) + "x\0")) + (should (equal (bindat-pack spec `((x . ,(string-to-multibyte "\xff")))) + "\xff\0")) (should-error (bindat-pack spec '((x . "💩")))) (should-error (bindat-pack spec '((x . "\N{U+ff}")))))) -- 2.36.1