qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH v1 1/8] target/avr: Add instruction decoder


From: Sarah Harris
Subject: [Qemu-devel] [PATCH v1 1/8] target/avr: Add instruction decoder
Date: Sat, 4 May 2019 09:36:31 +0100

This utility module builds a decision tree to decode instructions, starting 
from a human readable list of instruction bit patterns.
Automatic tree generation will hopefully be more efficient and more 
maintainable than a hand-designed opcode parser.

Tree generation happens at startup because this seemed simpler to implement 
than adding a new build step.

Signed-off-by: Sarah Harris <address@hidden>
---
 target/avr/decode.c | 441 ++++++++++++++++++++++++++++++++++++++++++++
 target/avr/decode.h |  68 +++++++
 2 files changed, 509 insertions(+)
 create mode 100644 target/avr/decode.c
 create mode 100644 target/avr/decode.h

diff --git a/target/avr/decode.c b/target/avr/decode.c
new file mode 100644
index 0000000000..a984806d96
--- /dev/null
+++ b/target/avr/decode.c
@@ -0,0 +1,441 @@
+/*
+ * AVR instruction decoder.
+ *
+ * Copyright (c) 2019 University of Kent
+ * Author: Sarah Harris
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/*
+ * # Why is this here?
+ * This decoder takes a list of human readable descriptions of instructions
+ * and uses it to build a binary decision tree used to choose translation
+ * functions for opcodes.
+ * It's built like this because figuring out the structure of AVR instructions
+ * was too hard and writing a Big Nested Switch by hand seemed too painful.
+ * This seems to be the simplest answer that doesn't use loads (>0.5MB) of RAM.
+ *
+ * # How does it work?
+ * This is based J. R. Quinlan's ID3 algorithm, tweaked to add weights to each
+ * instruction.
+ * Having a binary tree branch on opcode bits seems obvious, but the awkward
+ * part is deciding which order to test the bits.
+ * Getting the order right means that redundant bits can be ignored and fewer
+ * branches are needed; i.e. less memory and faster lookups.
+ * Here, the tests are ordered by an estimate of information gain based on
+ * Shannon Entropy.
+ * In short, we guess how much each bit tells us and pick the one that gives
+ * us most progress toward knowing which instruction we're seeing.
+ * The weights are currently only used to prioritise legal opcodes over
+ * illegal opcodes, which significantly reduces the tree size.
+ *
+ * # Why are you doing this at run time?
+ * It was easier than building and running a special purpose tool during
+ * QEMU's build process.
+ * The tree is only built once, during startup, and hopefully doesn't take long
+ * enough to be noticeable.
+ */
+
+#include <math.h>
+#include "qemu/osdep.h"
+#include "qemu/bitops.h"
+#include "qemu/error-report.h"
+#include "decode.h"
+
+/* #define DEBUG_DECODER */
+
+/* Wide enough for the largest AVR instruction. */
+#define OPCODE_T uint16_t
+#define OPCODE_SIZE 16
+
+/*
+ * Probability estimate for each instruction.
+ * Larger values mean higher priority.
+ */
+#define WEIGHT_T uint64_t
+#define WEIGHT_LEGAL (1 << 16)
+#define WEIGHT_ILLEGAL 1
+
+typedef union Tree Tree;
+
+typedef struct {
+    bool is_leaf;
+    /* Bit to test */
+    uint bit;
+    const Tree *zero;
+    const Tree *one;
+} Branch;
+
+typedef struct {
+    bool is_leaf;
+    TranslateFn decoder;
+    /* Instruction length in bits */
+    uint32_t length;
+    const char *name;
+} Leaf;
+
+union Tree {
+    bool is_leaf;
+    Branch branch;
+    Leaf leaf;
+};
+
+/* Additional (generated) instruction data */
+typedef struct {
+    const Instruction *instruction;
+    /* Instruction length in bits */
+    uint32_t length;
+    WEIGHT_T weight;
+    /*
+     * Bit pattern matched in opcodes.
+     * For each 1 in mask, the same bit in the opcode must match that from 
bits.
+     */
+    OPCODE_T bits;
+    OPCODE_T mask;
+} Pattern;
+
+/* Cached decoding tree */
+Tree *cache;
+
+/* Return calculated bit pattern and length for an instruction */
+static Pattern get_info(const Instruction *instruction)
+{
+    OPCODE_T bit = 1 << (OPCODE_SIZE - 1);
+    OPCODE_T bits = 0;
+    OPCODE_T mask = 0;
+    uint32_t length = 0;
+    const char *c = instruction->pattern;
+    while (*c != '\0') {
+        switch (*c) {
+        case '0':
+            mask |= bit;
+            bit >>= 1;
+            length++;
+            break;
+        case '1':
+            bits |= bit;
+            mask |= bit;
+            bit >>= 1;
+            length++;
+            break;
+        case '*':
+            bit >>= 1;
+            length++;
+            break;
+        case '_':
+            /* NOP */
+            break;
+        default:
+            assert(0);
+        }
+        c++;
+    }
+    const Pattern pattern = {
+        .instruction = instruction,
+        .length = length,
+        .weight = WEIGHT_LEGAL,
+        .bits = bits,
+        .mask = mask
+    };
+    return pattern;
+}
+
+/* Return true if an instruction matches a pattern of known/unknown bits */
+static bool matches(const Pattern *const pattern, const OPCODE_T bits,
+    const OPCODE_T mask)
+{
+    OPCODE_T overlap = pattern->mask & mask;
+    return (pattern->bits & overlap) == (bits & overlap);
+}
+
+/* Return number of instructions that match a pattern of known/unknown bits */
+static size_t count_legal(const Pattern *patterns, const size_t size,
+    const OPCODE_T bits, const OPCODE_T mask)
+{
+    size_t sum = 0;
+    size_t i;
+    for (i = 0; i < size; i++) {
+        if (matches(&patterns[i], bits, mask)) {
+            sum++;
+        }
+    }
+    return sum;
+}
+
+/* Return the number of opcodes that could match a bit pattern */
+static uint64_t count_opcodes(const OPCODE_T _bits, OPCODE_T mask)
+{
+    uint64_t matches = 1;
+    int i;
+    assert(sizeof(matches) * 8 > OPCODE_SIZE);
+    for (i = 0; i < OPCODE_SIZE; i++) {
+        if (!(mask & 1)) {
+            matches <<= 1;
+        }
+        mask >>= 1;
+    }
+    return matches;
+}
+
+/*
+ * Return a known/unknown bit pattern that only matches opcodes matched by both
+ * of the given patterns.
+ */
+static void intersection(const OPCODE_T a_bits, const OPCODE_T a_mask,
+    const OPCODE_T b_bits, const OPCODE_T b_mask,
+    OPCODE_T *const out_bits, OPCODE_T *const out_mask)
+{
+    const OPCODE_T overlap = a_mask & b_mask;
+    /* The two patterns mustn't have conflicting requirements */
+    assert((a_bits & overlap) == (b_bits & overlap));
+    *out_bits = (a_bits & a_mask) | (b_bits & b_mask);
+    *out_mask = a_mask | b_mask;
+}
+
+/*
+ * Return one if any opcode allowed by a pattern of known/unknown bits is
+ * illegal.
+ */
+static size_t count_illegal(const Pattern *patterns, const size_t size,
+    const OPCODE_T bits, const OPCODE_T mask)
+{
+    size_t i;
+    const uint64_t no_opcodes = count_opcodes(bits, mask);
+    uint64_t no_legal = 0;
+
+    /* Count opcodes that match instructions */
+    for (i = 0; i < size; i++) {
+        const Pattern *pattern = &patterns[i];
+        if (matches(pattern, bits, mask)) {
+            OPCODE_T both_bits, both_mask;
+            intersection(
+                bits, mask,
+                pattern->bits, pattern->mask,
+                &both_bits, &both_mask
+            );
+            no_legal += count_opcodes(both_bits, both_mask);
+        }
+    }
+
+    assert(no_legal <= no_opcodes);
+    if (no_legal == no_opcodes) {
+        return 0;
+    } else {
+        return 1;
+    }
+}
+
+/* Return the first matching instruction for a pattern of known/unknown bits */
+static const Pattern *find_match(const Pattern *patterns, const size_t size,
+    const OPCODE_T bits, const OPCODE_T mask)
+{
+    size_t i;
+    for (i = 0; i < size; i++) {
+        if (matches(&patterns[i], bits, mask)) {
+            return &patterns[i];
+        }
+    }
+    return NULL;
+}
+
+/* Return sum of weights of instructions that match a bit pattern */
+static WEIGHT_T weigh_matches(const Pattern *patterns, const size_t size,
+    const OPCODE_T bits, const OPCODE_T mask)
+{
+    size_t i;
+    WEIGHT_T illegal = (WEIGHT_T)count_illegal(patterns, size, bits, mask)
+        * WEIGHT_ILLEGAL;
+    WEIGHT_T legal = 0;
+    for (i = 0; i < size; i++) {
+        if (matches(&patterns[i], bits, mask)) {
+            legal += patterns[i].weight;
+        }
+    }
+    return legal + illegal;
+}
+
+/*
+ * Return "effort" (estimated information needed) to decide tree outcome.
+ * bits and mask give the opcode bits already decided by the parent tree.
+ * parent_weight gives the sum of the weights of instructions that the
+ * parent tree matches.
+ */
+static float subtree_effort(const Pattern *patterns, const size_t size,
+    const OPCODE_T bits, const OPCODE_T mask, const WEIGHT_T parent_weight)
+{
+    const WEIGHT_T weight = weigh_matches(patterns, size, bits, mask);
+    float entropy_legal, entropy_illegal, probability;
+    size_t i;
+
+    /* Sum information needed to decide legal instructions */
+    entropy_legal = 0.0;
+    for (i = 0; i < size; i++) {
+        const Pattern *const pattern = &patterns[i];
+        if (matches(pattern, bits, mask)) {
+            probability = (float)pattern->weight / (float)weight;
+            entropy_legal += -probability * log2(probability);
+        }
+    }
+
+    /* Sum information needed to decide illegal instructions */
+    probability = (float)WEIGHT_ILLEGAL / (float)weight;
+    entropy_illegal = -probability * log2(probability) *
+        (float)count_illegal(patterns, size, bits, mask);
+
+    return ((float)weight / (float)parent_weight) *
+        (entropy_legal + entropy_illegal);
+}
+
+/* Return recursively built binary tree for decoding an opcode to instruction 
*/
+static Tree *build_tree(const Pattern *patterns, const size_t size,
+    OPCODE_T bits, OPCODE_T mask)
+{
+    /* Check if we've reached a leaf */
+    size_t matching_illegal = count_illegal(patterns, size, bits, mask);
+    size_t matching_legal = count_legal(patterns, size, bits, mask);
+    size_t matching = matching_illegal + matching_legal;
+    assert(matching > 0); /* At last an illegal instruction should match */
+    if (matching_legal == 0) {
+        /* Illegal instruction */
+        Leaf *leaf = g_new0(Leaf, 1);
+        leaf->is_leaf = true;
+        leaf->decoder = NULL;
+        leaf->length = 16;
+        leaf->name = "illegal";
+        return (Tree *)leaf;
+    }
+    if (matching_legal == 1 && matching_illegal == 0) {
+        /* Legal instruction */
+        const Pattern *pattern = find_match(patterns, size, bits, mask);
+        assert(pattern != NULL);
+        const Instruction *const instruction = pattern->instruction;
+        Leaf *leaf = g_new0(Leaf, 1);
+        leaf->is_leaf = true;
+        leaf->decoder = instruction->decoder;
+        leaf->length = pattern->length;
+        leaf->name = instruction->name;
+        return (Tree *)leaf;
+    }
+
+    /* Work out which bit to branch on */
+    const WEIGHT_T tree_weight = weigh_matches(patterns, size, bits, mask);
+    float min_effort = 0.0;
+    ssize_t min_bit = -1;
+    size_t i;
+    for (i = 0; i < OPCODE_SIZE; i++) {
+        float effort;
+        const OPCODE_T bit = 1 << i;
+        if (mask & bit) {
+            /* This bit already branched on, skip */
+            continue;
+        }
+        effort = subtree_effort(patterns, size, bits, mask | bit, tree_weight) 
+
+            subtree_effort(patterns, size, bits | bit, mask | bit, 
tree_weight);
+        if (min_bit < 0 || effort < min_effort) {
+            min_bit = i;
+            min_effort = effort;
+        }
+    }
+
+    /*
+     * Setup branch on bit that gives most information gain.
+     * (AKA minimum information/effort needed to decide remaining branches.
+     */
+    assert(min_bit >= 0); /* Probably multiple instructions match one opcode */
+    const OPCODE_T bit = 1 << min_bit;
+    const Tree *zero = build_tree(patterns, size, bits, mask | bit);
+    const Tree *one = build_tree(patterns, size, bits | bit, mask | bit);
+    Branch *branch = g_new0(Branch, 1);
+    branch->is_leaf = false;
+    branch->bit = min_bit;
+    branch->zero = zero;
+    branch->one = one;
+    return (Tree *)branch;
+}
+
+#ifdef DEBUG_DECODER
+static size_t depth(const Tree *const tree);
+static size_t depth(const Tree *const tree)
+{
+    if (tree->is_leaf) {
+        return 1;
+    }
+    size_t zero = depth(tree->branch.zero);
+    size_t one = depth(tree->branch.one);
+    if (zero > one) {
+        return zero + 1;
+    }
+    return one + 1;
+}
+static size_t count(const Tree *const tree);
+static size_t count(const Tree *const tree)
+{
+    if (tree->is_leaf) {
+        return 1;
+    }
+    return 1 + count(tree->branch.zero) + count(tree->branch.one);
+}
+#endif
+
+void avr_decoder_init(const Instruction instructions[], const size_t size)
+{
+    assert(cache == NULL); /* Shouldn't be initialised more than once */
+    Pattern *const patterns = g_new0(Pattern, size);
+    size_t i;
+    for (i = 0; i < size; i++) {
+        patterns[i] = get_info(&instructions[i]);
+    }
+    cache = build_tree(patterns, size, 0, 0);
+    g_free(patterns);
+#ifdef DEBUG_DECODER
+    printf("AVR decoder init, depth=%lu, size=%lu\n",
+        depth(cache), count(cache));
+#endif
+}
+
+TranslateFn avr_decode(const uint32_t opcode, uint32_t *const length_out)
+{
+    assert(cache != NULL); /* Must be initialised */
+    const Tree *node = cache;
+    while (1) {
+        assert(node != NULL);
+        if (node->is_leaf) {
+            const Leaf *const leaf = &node->leaf;
+            if (leaf->decoder == NULL) {
+                /* Illegal instruction */
+                error_report("Illegal AVR instruction");
+                exit(1);
+            }
+#ifdef DEBUG_DECODER
+            printf("AVR decoder: %s\n", leaf->name);
+#endif
+            *length_out = leaf->length;
+            return leaf->decoder;
+        } else {
+            const Branch *const branch = &node->branch;
+            const OPCODE_T mask = 1 << branch->bit;
+            if (opcode & mask) {
+                node = branch->one;
+            } else {
+                node = branch->zero;
+            }
+        }
+    }
+}
diff --git a/target/avr/decode.h b/target/avr/decode.h
new file mode 100644
index 0000000000..415406544a
--- /dev/null
+++ b/target/avr/decode.h
@@ -0,0 +1,68 @@
+/*
+ * AVR instruction decoder.
+ *
+ * Copyright (c) 2019 University of Kent
+ * Author: Sarah Harris
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef AVR_DECODER_H
+#define AVR_DECODER_H
+
+/* Pointer to functions used to do final decoding step from opcode to TCG. */
+typedef struct DisasContext DisasContext;
+typedef int (*TranslateFn)(DisasContext *ctx, uint32_t opcode);
+
+/*
+ * Human readable instruction descriptions used to generate decoder.
+ * Doing this at runtime avoids a complicated new build step.
+ */
+typedef struct {
+    /* Instruction mnemonic for debugging */
+    const char *name;
+    /*
+     * Bit pattern describing the instruction's opcode.
+     * Each character represents a bit:
+     * - '1' means bit must be set
+     * - '0' means bit must be cleared
+     * - '*' means don't care
+     * - '_' is ignored (i.e. whitespace), please use to aid readability
+     */
+    const char *pattern;
+    /* Function used to translate this instruction to TCG */
+    TranslateFn decoder;
+} Instruction;
+
+/*
+ * Converts a list of instruction descriptions to a decoding tree
+ * and caches it.
+ * Must only be called once.
+ * `size` is the number of instructions in the given list.
+ */
+void avr_decoder_init(const Instruction instructions[], const size_t size);
+
+/*
+ * Returns the translation function and length of an instruction, given
+ * the opcode.
+ * avr_decoder_init() must be called first to build the decoding tree.
+ */
+TranslateFn avr_decode(const uint32_t opcode, uint32_t *const length_out);
+
+#endif /* AVR_DECODER_H */
-- 
2.21.0




reply via email to

[Prev in Thread] Current Thread [Next in Thread]