[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH v1 1/8] target/avr: Add instruction decoder
From: |
Sarah Harris |
Subject: |
[Qemu-devel] [PATCH v1 1/8] target/avr: Add instruction decoder |
Date: |
Sat, 4 May 2019 09:36:31 +0100 |
This utility module builds a decision tree to decode instructions, starting
from a human readable list of instruction bit patterns.
Automatic tree generation will hopefully be more efficient and more
maintainable than a hand-designed opcode parser.
Tree generation happens at startup because this seemed simpler to implement
than adding a new build step.
Signed-off-by: Sarah Harris <address@hidden>
---
target/avr/decode.c | 441 ++++++++++++++++++++++++++++++++++++++++++++
target/avr/decode.h | 68 +++++++
2 files changed, 509 insertions(+)
create mode 100644 target/avr/decode.c
create mode 100644 target/avr/decode.h
diff --git a/target/avr/decode.c b/target/avr/decode.c
new file mode 100644
index 0000000000..a984806d96
--- /dev/null
+++ b/target/avr/decode.c
@@ -0,0 +1,441 @@
+/*
+ * AVR instruction decoder.
+ *
+ * Copyright (c) 2019 University of Kent
+ * Author: Sarah Harris
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/*
+ * # Why is this here?
+ * This decoder takes a list of human readable descriptions of instructions
+ * and uses it to build a binary decision tree used to choose translation
+ * functions for opcodes.
+ * It's built like this because figuring out the structure of AVR instructions
+ * was too hard and writing a Big Nested Switch by hand seemed too painful.
+ * This seems to be the simplest answer that doesn't use loads (>0.5MB) of RAM.
+ *
+ * # How does it work?
+ * This is based J. R. Quinlan's ID3 algorithm, tweaked to add weights to each
+ * instruction.
+ * Having a binary tree branch on opcode bits seems obvious, but the awkward
+ * part is deciding which order to test the bits.
+ * Getting the order right means that redundant bits can be ignored and fewer
+ * branches are needed; i.e. less memory and faster lookups.
+ * Here, the tests are ordered by an estimate of information gain based on
+ * Shannon Entropy.
+ * In short, we guess how much each bit tells us and pick the one that gives
+ * us most progress toward knowing which instruction we're seeing.
+ * The weights are currently only used to prioritise legal opcodes over
+ * illegal opcodes, which significantly reduces the tree size.
+ *
+ * # Why are you doing this at run time?
+ * It was easier than building and running a special purpose tool during
+ * QEMU's build process.
+ * The tree is only built once, during startup, and hopefully doesn't take long
+ * enough to be noticeable.
+ */
+
+#include <math.h>
+#include "qemu/osdep.h"
+#include "qemu/bitops.h"
+#include "qemu/error-report.h"
+#include "decode.h"
+
+/* #define DEBUG_DECODER */
+
+/* Wide enough for the largest AVR instruction. */
+#define OPCODE_T uint16_t
+#define OPCODE_SIZE 16
+
+/*
+ * Probability estimate for each instruction.
+ * Larger values mean higher priority.
+ */
+#define WEIGHT_T uint64_t
+#define WEIGHT_LEGAL (1 << 16)
+#define WEIGHT_ILLEGAL 1
+
+typedef union Tree Tree;
+
+typedef struct {
+ bool is_leaf;
+ /* Bit to test */
+ uint bit;
+ const Tree *zero;
+ const Tree *one;
+} Branch;
+
+typedef struct {
+ bool is_leaf;
+ TranslateFn decoder;
+ /* Instruction length in bits */
+ uint32_t length;
+ const char *name;
+} Leaf;
+
+union Tree {
+ bool is_leaf;
+ Branch branch;
+ Leaf leaf;
+};
+
+/* Additional (generated) instruction data */
+typedef struct {
+ const Instruction *instruction;
+ /* Instruction length in bits */
+ uint32_t length;
+ WEIGHT_T weight;
+ /*
+ * Bit pattern matched in opcodes.
+ * For each 1 in mask, the same bit in the opcode must match that from
bits.
+ */
+ OPCODE_T bits;
+ OPCODE_T mask;
+} Pattern;
+
+/* Cached decoding tree */
+Tree *cache;
+
+/* Return calculated bit pattern and length for an instruction */
+static Pattern get_info(const Instruction *instruction)
+{
+ OPCODE_T bit = 1 << (OPCODE_SIZE - 1);
+ OPCODE_T bits = 0;
+ OPCODE_T mask = 0;
+ uint32_t length = 0;
+ const char *c = instruction->pattern;
+ while (*c != '\0') {
+ switch (*c) {
+ case '0':
+ mask |= bit;
+ bit >>= 1;
+ length++;
+ break;
+ case '1':
+ bits |= bit;
+ mask |= bit;
+ bit >>= 1;
+ length++;
+ break;
+ case '*':
+ bit >>= 1;
+ length++;
+ break;
+ case '_':
+ /* NOP */
+ break;
+ default:
+ assert(0);
+ }
+ c++;
+ }
+ const Pattern pattern = {
+ .instruction = instruction,
+ .length = length,
+ .weight = WEIGHT_LEGAL,
+ .bits = bits,
+ .mask = mask
+ };
+ return pattern;
+}
+
+/* Return true if an instruction matches a pattern of known/unknown bits */
+static bool matches(const Pattern *const pattern, const OPCODE_T bits,
+ const OPCODE_T mask)
+{
+ OPCODE_T overlap = pattern->mask & mask;
+ return (pattern->bits & overlap) == (bits & overlap);
+}
+
+/* Return number of instructions that match a pattern of known/unknown bits */
+static size_t count_legal(const Pattern *patterns, const size_t size,
+ const OPCODE_T bits, const OPCODE_T mask)
+{
+ size_t sum = 0;
+ size_t i;
+ for (i = 0; i < size; i++) {
+ if (matches(&patterns[i], bits, mask)) {
+ sum++;
+ }
+ }
+ return sum;
+}
+
+/* Return the number of opcodes that could match a bit pattern */
+static uint64_t count_opcodes(const OPCODE_T _bits, OPCODE_T mask)
+{
+ uint64_t matches = 1;
+ int i;
+ assert(sizeof(matches) * 8 > OPCODE_SIZE);
+ for (i = 0; i < OPCODE_SIZE; i++) {
+ if (!(mask & 1)) {
+ matches <<= 1;
+ }
+ mask >>= 1;
+ }
+ return matches;
+}
+
+/*
+ * Return a known/unknown bit pattern that only matches opcodes matched by both
+ * of the given patterns.
+ */
+static void intersection(const OPCODE_T a_bits, const OPCODE_T a_mask,
+ const OPCODE_T b_bits, const OPCODE_T b_mask,
+ OPCODE_T *const out_bits, OPCODE_T *const out_mask)
+{
+ const OPCODE_T overlap = a_mask & b_mask;
+ /* The two patterns mustn't have conflicting requirements */
+ assert((a_bits & overlap) == (b_bits & overlap));
+ *out_bits = (a_bits & a_mask) | (b_bits & b_mask);
+ *out_mask = a_mask | b_mask;
+}
+
+/*
+ * Return one if any opcode allowed by a pattern of known/unknown bits is
+ * illegal.
+ */
+static size_t count_illegal(const Pattern *patterns, const size_t size,
+ const OPCODE_T bits, const OPCODE_T mask)
+{
+ size_t i;
+ const uint64_t no_opcodes = count_opcodes(bits, mask);
+ uint64_t no_legal = 0;
+
+ /* Count opcodes that match instructions */
+ for (i = 0; i < size; i++) {
+ const Pattern *pattern = &patterns[i];
+ if (matches(pattern, bits, mask)) {
+ OPCODE_T both_bits, both_mask;
+ intersection(
+ bits, mask,
+ pattern->bits, pattern->mask,
+ &both_bits, &both_mask
+ );
+ no_legal += count_opcodes(both_bits, both_mask);
+ }
+ }
+
+ assert(no_legal <= no_opcodes);
+ if (no_legal == no_opcodes) {
+ return 0;
+ } else {
+ return 1;
+ }
+}
+
+/* Return the first matching instruction for a pattern of known/unknown bits */
+static const Pattern *find_match(const Pattern *patterns, const size_t size,
+ const OPCODE_T bits, const OPCODE_T mask)
+{
+ size_t i;
+ for (i = 0; i < size; i++) {
+ if (matches(&patterns[i], bits, mask)) {
+ return &patterns[i];
+ }
+ }
+ return NULL;
+}
+
+/* Return sum of weights of instructions that match a bit pattern */
+static WEIGHT_T weigh_matches(const Pattern *patterns, const size_t size,
+ const OPCODE_T bits, const OPCODE_T mask)
+{
+ size_t i;
+ WEIGHT_T illegal = (WEIGHT_T)count_illegal(patterns, size, bits, mask)
+ * WEIGHT_ILLEGAL;
+ WEIGHT_T legal = 0;
+ for (i = 0; i < size; i++) {
+ if (matches(&patterns[i], bits, mask)) {
+ legal += patterns[i].weight;
+ }
+ }
+ return legal + illegal;
+}
+
+/*
+ * Return "effort" (estimated information needed) to decide tree outcome.
+ * bits and mask give the opcode bits already decided by the parent tree.
+ * parent_weight gives the sum of the weights of instructions that the
+ * parent tree matches.
+ */
+static float subtree_effort(const Pattern *patterns, const size_t size,
+ const OPCODE_T bits, const OPCODE_T mask, const WEIGHT_T parent_weight)
+{
+ const WEIGHT_T weight = weigh_matches(patterns, size, bits, mask);
+ float entropy_legal, entropy_illegal, probability;
+ size_t i;
+
+ /* Sum information needed to decide legal instructions */
+ entropy_legal = 0.0;
+ for (i = 0; i < size; i++) {
+ const Pattern *const pattern = &patterns[i];
+ if (matches(pattern, bits, mask)) {
+ probability = (float)pattern->weight / (float)weight;
+ entropy_legal += -probability * log2(probability);
+ }
+ }
+
+ /* Sum information needed to decide illegal instructions */
+ probability = (float)WEIGHT_ILLEGAL / (float)weight;
+ entropy_illegal = -probability * log2(probability) *
+ (float)count_illegal(patterns, size, bits, mask);
+
+ return ((float)weight / (float)parent_weight) *
+ (entropy_legal + entropy_illegal);
+}
+
+/* Return recursively built binary tree for decoding an opcode to instruction
*/
+static Tree *build_tree(const Pattern *patterns, const size_t size,
+ OPCODE_T bits, OPCODE_T mask)
+{
+ /* Check if we've reached a leaf */
+ size_t matching_illegal = count_illegal(patterns, size, bits, mask);
+ size_t matching_legal = count_legal(patterns, size, bits, mask);
+ size_t matching = matching_illegal + matching_legal;
+ assert(matching > 0); /* At last an illegal instruction should match */
+ if (matching_legal == 0) {
+ /* Illegal instruction */
+ Leaf *leaf = g_new0(Leaf, 1);
+ leaf->is_leaf = true;
+ leaf->decoder = NULL;
+ leaf->length = 16;
+ leaf->name = "illegal";
+ return (Tree *)leaf;
+ }
+ if (matching_legal == 1 && matching_illegal == 0) {
+ /* Legal instruction */
+ const Pattern *pattern = find_match(patterns, size, bits, mask);
+ assert(pattern != NULL);
+ const Instruction *const instruction = pattern->instruction;
+ Leaf *leaf = g_new0(Leaf, 1);
+ leaf->is_leaf = true;
+ leaf->decoder = instruction->decoder;
+ leaf->length = pattern->length;
+ leaf->name = instruction->name;
+ return (Tree *)leaf;
+ }
+
+ /* Work out which bit to branch on */
+ const WEIGHT_T tree_weight = weigh_matches(patterns, size, bits, mask);
+ float min_effort = 0.0;
+ ssize_t min_bit = -1;
+ size_t i;
+ for (i = 0; i < OPCODE_SIZE; i++) {
+ float effort;
+ const OPCODE_T bit = 1 << i;
+ if (mask & bit) {
+ /* This bit already branched on, skip */
+ continue;
+ }
+ effort = subtree_effort(patterns, size, bits, mask | bit, tree_weight)
+
+ subtree_effort(patterns, size, bits | bit, mask | bit,
tree_weight);
+ if (min_bit < 0 || effort < min_effort) {
+ min_bit = i;
+ min_effort = effort;
+ }
+ }
+
+ /*
+ * Setup branch on bit that gives most information gain.
+ * (AKA minimum information/effort needed to decide remaining branches.
+ */
+ assert(min_bit >= 0); /* Probably multiple instructions match one opcode */
+ const OPCODE_T bit = 1 << min_bit;
+ const Tree *zero = build_tree(patterns, size, bits, mask | bit);
+ const Tree *one = build_tree(patterns, size, bits | bit, mask | bit);
+ Branch *branch = g_new0(Branch, 1);
+ branch->is_leaf = false;
+ branch->bit = min_bit;
+ branch->zero = zero;
+ branch->one = one;
+ return (Tree *)branch;
+}
+
+#ifdef DEBUG_DECODER
+static size_t depth(const Tree *const tree);
+static size_t depth(const Tree *const tree)
+{
+ if (tree->is_leaf) {
+ return 1;
+ }
+ size_t zero = depth(tree->branch.zero);
+ size_t one = depth(tree->branch.one);
+ if (zero > one) {
+ return zero + 1;
+ }
+ return one + 1;
+}
+static size_t count(const Tree *const tree);
+static size_t count(const Tree *const tree)
+{
+ if (tree->is_leaf) {
+ return 1;
+ }
+ return 1 + count(tree->branch.zero) + count(tree->branch.one);
+}
+#endif
+
+void avr_decoder_init(const Instruction instructions[], const size_t size)
+{
+ assert(cache == NULL); /* Shouldn't be initialised more than once */
+ Pattern *const patterns = g_new0(Pattern, size);
+ size_t i;
+ for (i = 0; i < size; i++) {
+ patterns[i] = get_info(&instructions[i]);
+ }
+ cache = build_tree(patterns, size, 0, 0);
+ g_free(patterns);
+#ifdef DEBUG_DECODER
+ printf("AVR decoder init, depth=%lu, size=%lu\n",
+ depth(cache), count(cache));
+#endif
+}
+
+TranslateFn avr_decode(const uint32_t opcode, uint32_t *const length_out)
+{
+ assert(cache != NULL); /* Must be initialised */
+ const Tree *node = cache;
+ while (1) {
+ assert(node != NULL);
+ if (node->is_leaf) {
+ const Leaf *const leaf = &node->leaf;
+ if (leaf->decoder == NULL) {
+ /* Illegal instruction */
+ error_report("Illegal AVR instruction");
+ exit(1);
+ }
+#ifdef DEBUG_DECODER
+ printf("AVR decoder: %s\n", leaf->name);
+#endif
+ *length_out = leaf->length;
+ return leaf->decoder;
+ } else {
+ const Branch *const branch = &node->branch;
+ const OPCODE_T mask = 1 << branch->bit;
+ if (opcode & mask) {
+ node = branch->one;
+ } else {
+ node = branch->zero;
+ }
+ }
+ }
+}
diff --git a/target/avr/decode.h b/target/avr/decode.h
new file mode 100644
index 0000000000..415406544a
--- /dev/null
+++ b/target/avr/decode.h
@@ -0,0 +1,68 @@
+/*
+ * AVR instruction decoder.
+ *
+ * Copyright (c) 2019 University of Kent
+ * Author: Sarah Harris
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef AVR_DECODER_H
+#define AVR_DECODER_H
+
+/* Pointer to functions used to do final decoding step from opcode to TCG. */
+typedef struct DisasContext DisasContext;
+typedef int (*TranslateFn)(DisasContext *ctx, uint32_t opcode);
+
+/*
+ * Human readable instruction descriptions used to generate decoder.
+ * Doing this at runtime avoids a complicated new build step.
+ */
+typedef struct {
+ /* Instruction mnemonic for debugging */
+ const char *name;
+ /*
+ * Bit pattern describing the instruction's opcode.
+ * Each character represents a bit:
+ * - '1' means bit must be set
+ * - '0' means bit must be cleared
+ * - '*' means don't care
+ * - '_' is ignored (i.e. whitespace), please use to aid readability
+ */
+ const char *pattern;
+ /* Function used to translate this instruction to TCG */
+ TranslateFn decoder;
+} Instruction;
+
+/*
+ * Converts a list of instruction descriptions to a decoding tree
+ * and caches it.
+ * Must only be called once.
+ * `size` is the number of instructions in the given list.
+ */
+void avr_decoder_init(const Instruction instructions[], const size_t size);
+
+/*
+ * Returns the translation function and length of an instruction, given
+ * the opcode.
+ * avr_decoder_init() must be called first to build the decoding tree.
+ */
+TranslateFn avr_decode(const uint32_t opcode, uint32_t *const length_out);
+
+#endif /* AVR_DECODER_H */
--
2.21.0
- [Qemu-devel] [PATCH v1 0/8] DRAFT AVR Patches, Sarah Harris, 2019/05/04
- [Qemu-devel] [PATCH v1 7/8] target/avr: Add example board configuration, Sarah Harris, 2019/05/04
- [Qemu-devel] [PATCH v1 4/8] target/avr: Add instruction helpers, Sarah Harris, 2019/05/04
- [Qemu-devel] [PATCH v1 1/8] target/avr: Add instruction decoder,
Sarah Harris <=
- [Qemu-devel] [PATCH v1 5/8] target/avr: Add instruction translation, Sarah Harris, 2019/05/04
- [Qemu-devel] [PATCH v1 6/8] target/avr: Add limited support for USART and 16 bit timer peripherals, Sarah Harris, 2019/05/04
- [Qemu-devel] [PATCH v1 8/8] target/avr: Register AVR support with the rest of QEMU, the build system, and the MAINTAINERS file, Sarah Harris, 2019/05/04
- Re: [Qemu-devel] [PATCH v1 8/8] target/avr: Register AVR support with the rest of QEMU, the build system, and the MAINTAINERS file, Richard Henderson, 2019/05/05
- Re: [Qemu-devel] [PATCH v1 8/8] target/avr: Register AVR support with the rest of QEMU, the build system, and the MAINTAINERS file, Michael Rolnik, 2019/05/05
- Re: [Qemu-devel] [PATCH v1 8/8] target/avr: Register AVR support with the rest of QEMU, the build system, and the MAINTAINERS file, Sarah Harris, 2019/05/07
- Re: [Qemu-devel] [PATCH v1 8/8] target/avr: Register AVR support with the rest of QEMU, the build system, and the MAINTAINERS file, Sarah Harris, 2019/05/10
- Re: [Qemu-devel] [PATCH v1 8/8] target/avr: Register AVR support with the rest of QEMU, the build system, and the MAINTAINERS file, Eric Blake, 2019/05/06