From 413b5e1ade51d9d4f1013649e03330d880004f23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Engdeg=C3=A5rd?= Date: Sun, 25 Oct 2020 12:40:36 +0100 Subject: [PATCH] Parse GDB/MI results directly instead of going via JSON (bug#44173) Translating GDB/MI into JSON is an unnecessary and fragile detour that made it hard to deal with octal escapes in strings correctly. Parse GDB/MI directly instead. * lisp/progmodes/gdb-mi.el (gdb-mi-decode-strings): Adjust doc string. (gdb-mi-decode, gud-gdbmi-marker-filter): Remove gdb-mi-decode. (gdb-jsonify-buffer): Remove. (gdb-mi--parse-tuple-or-list, gdb-mi--parse-c-string) (gdb-mi--parse-value, gdb-mi--parse-result-or-value) (gdb-mi--parse-results, gdb-mi--fix-key, gdb-mi--extend-fullname): New. (gdb-json-read-buffer, gdb-json-string, gdb-json-partial-output): Rename to gdb-mi--read-buffer, gdb-mi--from-string and gdb-mi--partial-output respectively. Remove useless FIX-LIST argument. FIX-KEY is now a symbol, not a string. All callers updated. * test/lisp/progmodes/gdb-mi-tests.el: New file. --- lisp/progmodes/gdb-mi.el | 290 +++++++++++++++------------- test/lisp/progmodes/gdb-mi-tests.el | 44 +++++ 2 files changed, 200 insertions(+), 134 deletions(-) create mode 100644 test/lisp/progmodes/gdb-mi-tests.el diff --git a/lisp/progmodes/gdb-mi.el b/lisp/progmodes/gdb-mi.el index 9e8af84a60..d31aef5790 100644 --- a/lisp/progmodes/gdb-mi.el +++ b/lisp/progmodes/gdb-mi.el @@ -89,7 +89,6 @@ ;;; Code: (require 'gud) -(require 'json) (require 'bindat) (require 'cl-lib) (require 'cl-seq) @@ -167,7 +166,7 @@ gdb-threads-list "Associative list of threads provided by \"-thread-info\" MI command. Keys are thread numbers (in strings) and values are structures as -returned from -thread-info by `gdb-json-partial-output'. Updated in +returned from -thread-info by `gdb-mi--partial-output'. Updated in `gdb-thread-list-handler-custom'.") (defvar gdb-running-threads-count nil @@ -186,7 +185,7 @@ gdb-breakpoints-list "Associative list of breakpoints provided by \"-break-list\" MI command. Keys are breakpoint numbers (in string) and values are structures -as returned from \"-break-list\" by `gdb-json-partial-output' +as returned from \"-break-list\" by `gdb-mi--partial-output' \(\"body\" field is used). Updated in `gdb-breakpoints-list-handler-custom'.") @@ -1266,7 +1265,7 @@ gud-watch (message "gud-watch is a no-op in this mode.")))) (defun gdb-var-create-handler (expr) - (let* ((result (gdb-json-partial-output))) + (let* ((result (gdb-mi--partial-output))) (if (not (bindat-get-field result 'msg)) (let ((var (list (bindat-get-field result 'name) @@ -1317,7 +1316,7 @@ gdb-var-list-children (defun gdb-var-list-children-handler (varnum) (let* ((var-list nil) - (output (gdb-json-partial-output "child")) + (output (gdb-mi--partial-output 'child)) (children (bindat-get-field output 'children))) (catch 'child-already-watched (dolist (var gdb-var-list) @@ -1392,7 +1391,7 @@ gdb-var-update 'gdb-var-update)) (defun gdb-var-update-handler () - (let ((changelist (bindat-get-field (gdb-json-partial-output) 'changelist))) + (let ((changelist (bindat-get-field (gdb-mi--partial-output) 'changelist))) (dolist (var gdb-var-list) (setcar (nthcdr 5 var) nil)) (let ((temp-var-list gdb-var-list)) @@ -2436,7 +2435,7 @@ gdbmi-bnf-incomplete-record-result is-complete))) -; The following grammar rules are not yet implemented by this GDBMI-BNF parser. +; The following grammar rules are not parsed directly by this GDBMI-BNF parser. ; The handling of those rules is currently done by the handlers registered ; in gdbmi-bnf-result-state-configs ; @@ -2458,19 +2457,17 @@ gdbmi-bnf-incomplete-record-result ; list ==> ; "[]" | "[" value ( "," value )* "]" | "[" result ( "," result )* "]" +;; FIXME: This is fragile: it relies on the assumption that all the +;; non-ASCII strings output by GDB, including names of the source +;; files, values of string variables in the inferior, etc., are all +;; encoded in the same encoding. + (defcustom gdb-mi-decode-strings nil "When non-nil, decode octal escapes in GDB output into non-ASCII text. If the value is a coding-system, use that coding-system to decode the bytes reconstructed from octal escapes. Any other non-nil value -means to decode using the coding-system set for the GDB process. - -Warning: setting this non-nil might mangle strings reported by GDB -that have literal substrings which match the \\nnn octal escape -patterns, where nnn is an octal number between 200 and 377. So -we only recommend to set this variable non-nil if the program you -are debugging really reports non-ASCII text, or some of its source -file names include non-ASCII characters." +means to decode using the coding-system set for the GDB process." :type '(choice (const :tag "Don't decode" nil) (const :tag "Decode using default coding-system" t) @@ -2478,47 +2475,9 @@ gdb-mi-decode-strings :group 'gdb :version "25.1") -;; The idea of the following function was suggested -;; by Kenichi Handa . -;; -;; FIXME: This is fragile: it relies on the assumption that all the -;; non-ASCII strings output by GDB, including names of the source -;; files, values of string variables in the inferior, etc., are all -;; encoded in the same encoding. It also assumes that the \nnn -;; sequences are not split between chunks of output of the GDB process -;; due to buffering, and arrive together. Finally, if some string -;; included literal \nnn strings (as opposed to non-ASCII characters -;; converted by GDB/MI to octal escapes), this decoding will mangle -;; those strings. When/if GDB acquires the ability to not -;; escape-protect non-ASCII characters in its MI output, this kludge -;; should be removed. -(defun gdb-mi-decode (string) - "Decode octal escapes in MI output STRING into multibyte text." - (let ((coding - (if (coding-system-p gdb-mi-decode-strings) - gdb-mi-decode-strings - (with-current-buffer - (gdb-get-buffer-create 'gdb-partial-output-buffer) - buffer-file-coding-system)))) - (with-temp-buffer - (set-buffer-multibyte nil) - (prin1 string (current-buffer)) - (goto-char (point-min)) - ;; prin1 quotes the octal escapes as well, which interferes with - ;; their interpretation by 'read' below. Remove the extra - ;; backslashes to countermand that. - (while (re-search-forward "\\\\\\(\\\\[2-3][0-7][0-7]\\)" nil t) - (replace-match "\\1" nil nil)) - (goto-char (point-min)) - (decode-coding-string (read (current-buffer)) coding)))) - (defun gud-gdbmi-marker-filter (string) "Filter GDB/MI output." - ;; If required, decode non-ASCII text encoded with octal escapes. - (or (null gdb-mi-decode-strings) - (setq string (gdb-mi-decode string))) - ;; Record transactions if logging is enabled. (when gdb-enable-debug (push (cons 'recv string) gdb-debug-log) @@ -2565,7 +2524,7 @@ gdb-thread-created (defun gdb-thread-exited (_token output-field) "Handle =thread-exited async record. Unset `gdb-thread-number' if current thread exited and update threads list." - (let* ((thread-id (bindat-get-field (gdb-json-string output-field) 'id))) + (let* ((thread-id (bindat-get-field (gdb-mi--from-string output-field) 'id))) (if (string= gdb-thread-number thread-id) (gdb-setq-thread-number nil)) ;; When we continue current thread and it quickly exits, @@ -2579,7 +2538,7 @@ gdb-thread-selected "Handler for =thread-selected MI output record. Sets `gdb-thread-number' to new id." - (let* ((result (gdb-json-string output-field)) + (let* ((result (gdb-mi--from-string output-field)) (thread-id (bindat-get-field result 'id))) (gdb-setq-thread-number thread-id) ;; Typing `thread N' in GUD buffer makes GDB emit `^done' followed @@ -2595,7 +2554,7 @@ gdb-thread-selected (defun gdb-running (_token output-field) (let* ((thread-id - (bindat-get-field (gdb-json-string output-field) 'thread-id))) + (bindat-get-field (gdb-mi--from-string output-field) 'thread-id))) ;; We reset gdb-frame-number to nil if current thread has gone ;; running. This can't be done in gdb-thread-list-handler-custom ;; because we need correct gdb-frame-number by the time @@ -2624,7 +2583,7 @@ gdb-stopped "Given the contents of *stopped MI async record, select new current thread and update GDB buffers." ;; Reason is available with target-async only - (let* ((result (gdb-json-string output-field)) + (let* ((result (gdb-mi--from-string output-field)) (reason (bindat-get-field result 'reason)) (thread-id (bindat-get-field result 'thread-id)) (retval (bindat-get-field result 'return-value)) @@ -2780,83 +2739,146 @@ gdb-clear-partial-output (with-current-buffer (gdb-get-buffer-create 'gdb-partial-output-buffer) (erase-buffer))) -(defun gdb-jsonify-buffer (&optional fix-key fix-list) - "Prepare GDB/MI output in current buffer for parsing with `json-read'. - -Field names are wrapped in double quotes and equal signs are -replaced with semicolons. - -If FIX-KEY is non-nil, strip all \"FIX-KEY=\" occurrences from -partial output. This is used to get rid of useless keys in lists -in MI messages, e.g.: [key=.., key=..]. -stack-list-frames and --break-info are examples of MI commands which issue such -responses. - -If FIX-LIST is non-nil, \"FIX-LIST={..}\" is replaced with -\"FIX-LIST=[..]\" prior to parsing. This is used to fix broken --break-info output when it contains breakpoint script field -incompatible with GDB/MI output syntax. - -If `default-directory' is remote, full file names are adapted accordingly." - (save-excursion +;; Parse GDB/MI result records: this process converts +;; list [...] -> list +;; tuple {...} -> list +;; result KEY=VALUE -> (KEY . VALUE) where KEY is a symbol +;; c-string "..." -> string + +(defun gdb-mi--parse-tuple-or-list (end-char) + "Parse a tuple or list, either returned as a Lisp list. +END-CHAR is the ending delimiter; will stop at end-of-buffer otherwise." + (let ((items nil)) + (while (not (or (eobp) + (eq (following-char) end-char))) + (let ((item (gdb-mi--parse-result-or-value))) + (push item items) + (when (eq (following-char) ?,) + (forward-char)))) + (when (eq (following-char) end-char) + (forward-char)) + (nreverse items))) + +(defun gdb-mi--parse-c-string () + "Parse a c-string." + (let ((start (point)) + (pieces nil) + (octals-used nil)) + (while (and (re-search-forward (rx (or ?\\ ?\"))) + (not (eq (preceding-char) ?\"))) + (push (buffer-substring start (1- (point))) pieces) + (cond + ((looking-at (rx (any "0-7") (? (any "0-7") (? (any "0-7"))))) + (push (unibyte-string (string-to-number (match-string 0) 8)) pieces) + (setq octals-used t) + (goto-char (match-end 0))) + ((looking-at (rx (any "ntrvfab\"\\"))) + (push (cdr (assq (following-char) + '((?n . "\n") + (?t . "\t") + (?r . "\r") + (?v . "\v") + (?f . "\f") + (?a . "\a") + (?b . "\b") + (?\" . "\"") + (?\\ . "\\")))) + pieces) + (forward-char)) + (t + (warn "Unrecognised escape char: %c" (following-char)))) + (setq start (point))) + (push (buffer-substring start (1- (point))) pieces) + (let ((s (apply #'concat (nreverse pieces)))) + (if (and octals-used gdb-mi-decode-strings) + (let ((coding + (if (coding-system-p gdb-mi-decode-strings) + gdb-mi-decode-strings + (buffer-local-value + 'buffer-file-coding-system + ;; FIXME: This is somewhat expensive. + (gdb-get-buffer-create 'gdb-partial-output-buffer))))) + (decode-coding-string s coding)) + s)))) + +(defun gdb-mi--parse-value () + "Parse a value." + (cond + ((eq (following-char) ?\{) + (forward-char) + (gdb-mi--parse-tuple-or-list ?\})) + ((eq (following-char) ?\[) + (forward-char) + (gdb-mi--parse-tuple-or-list ?\])) + ((eq (following-char) ?\") + (forward-char) + (gdb-mi--parse-c-string)) + (t (error "Bad start of result or value: %c" (following-char))))) + +(defun gdb-mi--parse-result-or-value () + "Parse a result (key=value) or value." + (if (looking-at (rx (group (+ (any "a-zA-Z" ?_ ?-))) "=")) + (progn + (goto-char (match-end 0)) + (let* ((variable (intern (match-string 1))) + (value (gdb-mi--parse-value))) + (cons variable value))) + (gdb-mi--parse-value))) + +(defun gdb-mi--parse-results () + "Parse zero or more result productions as a list." + (gdb-mi--parse-tuple-or-list nil)) + +(defun gdb-mi--fix-key (key value) + "Convert any result (key-value pair) in VALUE whose key is KEY to its value." + (cond + ((atom value) value) + ((symbolp (car value)) + (if (eq (car value) key) + (cdr value) + (cons (car value) (gdb-mi--fix-key key (cdr value))))) + (t (mapcar (lambda (x) (gdb-mi--fix-key key x)) value)))) + +(defun gdb-mi--extend-fullname (remote value) + "Prepend REMOTE to any result string with `fullname' as the key in VALUE." + (cond + ((atom value) value) + ((symbolp (car value)) + (if (and (eq (car value) 'fullname) + (stringp (cdr value))) + (cons 'fullname (concat remote (cdr value))) + (cons (car value) (gdb-mi--extend-fullname remote (cdr value))))) + (t (mapcar (lambda (x) (gdb-mi--extend-fullname remote x)) value)))) + +(defun gdb-mi--read-buffer (fix-key) + "Parse the current buffer as a list of result productions. +If FIX-KEY is a non-nil symbol, convert all FIX-KEY=VALUE results into VALUE. +This is used to get rid of useless keys in lists in MI messages; +eg, [key=.., key=..]. -stack-list-frames and -break-info are +examples of MI commands which issue such responses." + (goto-char (point-min)) + (let ((results (gdb-mi--parse-results))) (let ((remote (file-remote-p default-directory))) (when remote - (goto-char (point-min)) - (while (re-search-forward "[\\[,]fullname=\"\\(.+\\)\"" nil t) - (replace-match (concat remote "\\1") nil nil nil 1)))) - (goto-char (point-min)) + (setq results (gdb-mi--extend-fullname remote results)))) (when fix-key - (save-excursion - (while (re-search-forward (concat "[\\[,]\\(" fix-key "=\\)") nil t) - (replace-match "" nil nil nil 1)))) - (when fix-list - (save-excursion - ;; Find positions of braces which enclose broken list - (while (re-search-forward (concat fix-list "={\"") nil t) - (let ((p1 (goto-char (- (point) 2))) - (p2 (progn (forward-sexp) - (1- (point))))) - ;; Replace braces with brackets - (save-excursion - (goto-char p1) - (delete-char 1) - (insert "[") - (goto-char p2) - (delete-char 1) - (insert "]")))))) - (goto-char (point-min)) - (insert "{") - (let ((re (concat "\\([[:alnum:]_-]+\\)="))) - (while (re-search-forward re nil t) - (replace-match "\"\\1\":" nil nil) - (if (eq (char-after) ?\") (forward-sexp) (forward-char)))) - (goto-char (point-max)) - (insert "}"))) - -(defun gdb-json-read-buffer (&optional fix-key fix-list) - "Prepare and parse GDB/MI output in current buffer with `json-read'. - -FIX-KEY and FIX-LIST work as in `gdb-jsonify-buffer'." - (gdb-jsonify-buffer fix-key fix-list) - (save-excursion - (goto-char (point-min)) - (let ((json-array-type 'list)) - (json-read)))) + (setq results (gdb-mi--fix-key fix-key results))) + results)) -(defun gdb-json-string (string &optional fix-key fix-list) - "Prepare and parse STRING containing GDB/MI output with `json-read'. +(defun gdb-mi--from-string (string &optional fix-key) + "Prepare and parse STRING containing GDB/MI output. -FIX-KEY and FIX-LIST work as in `gdb-jsonify-buffer'." +FIX-KEY works as in `gdb-mi--read-buffer'." (with-temp-buffer (insert string) - (gdb-json-read-buffer fix-key fix-list))) + (gdb-mi--read-buffer fix-key))) -(defun gdb-json-partial-output (&optional fix-key fix-list) - "Prepare and parse gdb-partial-output-buffer with `json-read'. +(defun gdb-mi--partial-output (&optional fix-key) + "Prepare and parse gdb-partial-output-buffer. -FIX-KEY and FIX-KEY work as in `gdb-jsonify-buffer'." +FIX-KEY works as in `gdb-mi--read-buffer'." (with-current-buffer (gdb-get-buffer-create 'gdb-partial-output-buffer) - (gdb-json-read-buffer fix-key fix-list))) + (gdb-mi--read-buffer fix-key))) (defun gdb-line-posns (line) "Return a pair of LINE beginning and end positions." @@ -3033,7 +3055,7 @@ def-gdb-trigger-and-handler (defun gdb-breakpoints-list-handler-custom () (let ((breakpoints-list (bindat-get-field - (gdb-json-partial-output "bkpt" "script") + (gdb-mi--partial-output 'bkpt) 'BreakpointTable 'body)) (table (make-gdb-table))) (setq gdb-breakpoints-list nil) @@ -3355,7 +3377,7 @@ gdb-threads-mode 'gdb-invalidate-threads) (defun gdb-thread-list-handler-custom () - (let ((threads-list (bindat-get-field (gdb-json-partial-output) 'threads)) + (let ((threads-list (bindat-get-field (gdb-mi--partial-output) 'threads)) (table (make-gdb-table)) (marked-line nil)) (setq gdb-threads-list nil) @@ -3592,7 +3614,7 @@ gdb-memory-column-width (error "Unknown format")))) (defun gdb-read-memory-custom () - (let* ((res (gdb-json-partial-output)) + (let* ((res (gdb-mi--partial-output)) (err-msg (bindat-get-field res 'msg))) (if (not err-msg) (let ((memory (bindat-get-field res 'memory))) @@ -4001,7 +4023,7 @@ gdb-disassembly-mode 'gdb-invalidate-disassembly) (defun gdb-disassembly-handler-custom () - (let* ((instructions (bindat-get-field (gdb-json-partial-output) 'asm_insns)) + (let* ((instructions (bindat-get-field (gdb-mi--partial-output) 'asm_insns)) (address (bindat-get-field (gdb-current-buffer-frame) 'addr)) (table (make-gdb-table)) (marked-line nil)) @@ -4142,7 +4164,7 @@ gdb-frame-location (if res (concat " of " res) "")))) (defun gdb-stack-list-frames-custom () - (let ((stack (bindat-get-field (gdb-json-partial-output "frame") 'stack)) + (let ((stack (bindat-get-field (gdb-mi--partial-output 'frame) 'stack)) (table (make-gdb-table))) (set-marker gdb-stack-position nil) (dolist (frame stack) @@ -4270,7 +4292,7 @@ gdb-edit-locals-value ;; Don't display values of arrays or structures. ;; These can be expanded using gud-watch. (defun gdb-locals-handler-custom () - (let ((locals-list (bindat-get-field (gdb-json-partial-output) 'locals)) + (let ((locals-list (bindat-get-field (gdb-mi--partial-output) 'locals)) (table (make-gdb-table))) (dolist (local locals-list) (let ((name (bindat-get-field local 'name)) @@ -4367,7 +4389,7 @@ gdb-frame-locals-buffer (defun gdb-registers-handler-custom () (when gdb-register-names (let ((register-values - (bindat-get-field (gdb-json-partial-output) 'register-values)) + (bindat-get-field (gdb-mi--partial-output) 'register-values)) (table (make-gdb-table))) (dolist (register register-values) (let* ((register-number (bindat-get-field register 'number)) @@ -4457,7 +4479,7 @@ gdb-get-changed-registers (defun gdb-changed-registers-handler () (setq gdb-changed-registers nil) (dolist (register-number - (bindat-get-field (gdb-json-partial-output) 'changed-registers)) + (bindat-get-field (gdb-mi--partial-output) 'changed-registers)) (push register-number gdb-changed-registers))) (defun gdb-register-names-handler () @@ -4465,7 +4487,7 @@ gdb-register-names-handler ;; only once (in gdb-init-1) (setq gdb-register-names nil) (dolist (register-name - (bindat-get-field (gdb-json-partial-output) 'register-names)) + (bindat-get-field (gdb-mi--partial-output) 'register-names)) (push register-name gdb-register-names)) (setq gdb-register-names (reverse gdb-register-names))) @@ -4492,7 +4514,7 @@ gdb-get-main-selected-frame (defun gdb-frame-handler () "Set `gdb-selected-frame' and `gdb-selected-file' to show overlay arrow in source buffer." - (let ((frame (bindat-get-field (gdb-json-partial-output) 'frame))) + (let ((frame (bindat-get-field (gdb-mi--partial-output) 'frame))) (when frame (setq gdb-selected-frame (bindat-get-field frame 'func)) (setq gdb-selected-file (bindat-get-field frame 'fullname)) diff --git a/test/lisp/progmodes/gdb-mi-tests.el b/test/lisp/progmodes/gdb-mi-tests.el new file mode 100644 index 0000000000..79493a571b --- /dev/null +++ b/test/lisp/progmodes/gdb-mi-tests.el @@ -0,0 +1,44 @@ +;;; gdb-mi-tests.el --- tests for gdb-mi.el -*- lexical-binding: t -*- + +;; Copyright (C) 2020 Free Software Foundation, Inc. + +;; This file is part of GNU Emacs. + +;; GNU Emacs is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs. If not, see . + +(require 'ert) +(require 'gdb-mi) + +(ert-deftest gdb-mi-parse-value () + ;; Test the GDB/MI result/value parser. + (should (equal + (gdb-mi--from-string + "alpha=\"ab\\ncd\",beta=[\"x\",{gamma=\"y\",delta=[]}]") + '((alpha . "ab\ncd") + (beta . ("x" ((gamma . "y") (delta . ()))))))) + (should (equal + (gdb-mi--from-string + "alpha=\"ab\\ncd\",beta=[\"x\",{gamma=\"y\",delta=[]}]" + 'gamma) + '((alpha . "ab\ncd") + (beta . ("x" ("y" (delta . ()))))))) + + (should (equal (gdb-mi--from-string "alpha=\"a\\303\\245b\"") + `((alpha . ,(string-to-multibyte "a\303\245b"))))) + (let ((gdb-mi-decode-strings 'utf-8)) + (should (equal (gdb-mi--from-string "alpha=\"a\\303\\245b\"") + '((alpha . "aåb"))))) + ) + +(provide 'gdb-mi-tests) -- 2.26.2