qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH 2/7] nios2: Add architecture emulation support


From: Richard Henderson
Subject: Re: [Qemu-devel] [PATCH 2/7] nios2: Add architecture emulation support
Date: Wed, 28 Sep 2016 18:05:03 -0700
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:45.0) Gecko/20100101 Thunderbird/45.3.0

 hw/nios2/cpu_pic.c         |   70 +++

Why is this in this patch?

 target-nios2/instruction.c | 1427 ++++++++++++++++++++++++++++++++++++++++++++
 target-nios2/instruction.h |  279 +++++++++
 target-nios2/translate.c   |  242 ++++++++

Why are these files separate?

+    if (n < 32)             /* GP regs */
+        return gdb_get_reg32(mem_buf, env->regs[n]);
+    else if (n == 32)  /* PC */
+        return gdb_get_reg32(mem_buf, env->regs[R_PC]);
+    else if (n < 49)        /* Status regs */
+        return gdb_get_reg32(mem_buf, env->regs[n - 1]);

Use checkpatch.pl; the formatting is wrong.

There's no particular reason why R_PC needs to be 64; if you change it to 32, you can simplify this.

+struct CPUNios2State {
+    uint32_t regs[NUM_CORE_REGS];
+
+    /* Addresses that are hard-coded in the FPGA build settings */
+    uint32_t reset_addr;
+    uint32_t exception_addr;
+    uint32_t fast_tlb_miss_addr;

These three should go after ...

+
+#if !defined(CONFIG_USER_ONLY)
+    Nios2MMU mmu;
+
+    uint32_t irq_pending;
+#endif
+
+    CPU_COMMON

... here, or even into ...

+};
+
+/**
+ * Nios2CPU:
+ * @env: #CPUNios2State
+ *
+ * A Nios2 CPU.
+ */
+typedef struct Nios2CPU {
+    /*< private >*/
+    CPUState parent_obj;
+    /*< public >*/
+
+    CPUNios2State env;
+    bool mmu_present;

... here.

+static inline void t_gen_helper_raise_exception(DisasContext *dc,
+                                                uint32_t index)

Remove all of the inline markers and let the compiler decide.

+static void gen_check_supervisor(DisasContext *dc, TCGLabel *label)
+{
+    TCGLabel *l1 = gen_new_label();
+
+    TCGv_i32 tmp = tcg_temp_new();
+    tcg_gen_andi_tl(tmp, dc->cpu_R[CR_STATUS], CR_STATUS_U);
+    tcg_gen_brcond_tl(TCG_COND_EQ, dc->cpu_R[R_ZERO], tmp, l1);
+    t_gen_helper_raise_exception(dc, EXCP_SUPERI);
+    tcg_gen_br(label);

The supervisor check should be done at translate time by checking dc->tb->flags & CR_STATUS_U.

+#ifdef CALL_TRACING
+    TCGv_i32 tmp = tcg_const_i32(dc->pc);
+    TCGv_i32 tmp2 = tcg_const_i32((dc->pc & 0xF0000000) | (instr->imm26 * 4));
+    gen_helper_call_status(tmp, tmp2);
+    tcg_temp_free_i32(tmp);
+    tcg_temp_free_i32(tmp2);
+#endif

What's the point of this?

+/*
+ * I-Type instructions
+ */
+
+/* rB <- 0x000000 : Mem8[rA + @(IMM16)] */
+static void ldbu(DisasContext *dc, uint32_t code)
+{
+    I_TYPE(instr, code);
+
+    TCGv addr = tcg_temp_new();
+    tcg_gen_addi_tl(addr, dc->cpu_R[instr->a],
+                    (int32_t)((int16_t)instr->imm16));
+
+    tcg_gen_qemu_ld8u(dc->cpu_R[instr->b], addr, dc->mem_idx);
+
+    tcg_temp_free(addr);
+}

You should have helper functions so that you don't have to replicate 12 line functions. Use the tcg_gen_qemu_ld_tl function, and the TCGMemOp flags to help merge all load instructions, and all store instructions.

+/* rB <- rA + IMM16 */
+static void addi(DisasContext *dc, uint32_t code)
+{
+    I_TYPE(instr, code);
+
+    TCGv imm = tcg_temp_new();
+    tcg_gen_addi_tl(dc->cpu_R[instr->b], dc->cpu_R[instr->a],
+                    (int32_t)((int16_t)instr->imm16));

The double cast is pointless, as are the extra parenthesis.

You're not doing anything to make sure that r0 reads as 0, and ignores writes. You need to look at target-alpha, target-mips, or target-sparc to see various ways in which a zero register may be handled.

You should handle the special case of movi, documented as addi rb, r0, imm, so that the common method of loading a small value does not require extra tcg opcodes.

Similarly for the other (common) aliases, like mov and nop, which are both aliased to add.

+/* Initializes the data cache line currently caching address rA + @(IMM16) */
+static void initda(DisasContext *dc __attribute__((unused)),
+                   uint32_t code __attribute__((unused)))
+{
+    /* TODO */
+}

This will always be a nop for qemu.

+static void blt(DisasContext *dc, uint32_t code)
+{
+    I_TYPE(instr, code);
+
+    TCGLabel *l1 = gen_new_label();
+
+    tcg_gen_brcond_tl(TCG_COND_LT, dc->cpu_R[instr->a],
+                      dc->cpu_R[instr->b], l1);
+
+    gen_goto_tb(dc, 0, dc->pc + 4);
+
+    gen_set_label(l1);
+
+    gen_goto_tb(dc, 1, dc->pc + 4 + (int16_t)(instr->imm16 & 0xFFFC));
+
+    dc->is_jmp = DISAS_TB_JUMP;
+}

These really require a helper function.

+/* rB <- 0x000000 : Mem8[rA + @(IMM16)] (bypassing cache) */
+static void ldbuio(DisasContext *dc, uint32_t code)

Reuse ldbu; qemu will never implement caches.

+/* rB <- (rA * @(IMM16))(31..0) */
+static void muli(DisasContext *dc, uint32_t code)
+{
+    I_TYPE(instr, code);
+
+    TCGv imm = tcg_temp_new();
+    tcg_gen_muli_i32(dc->cpu_R[instr->b], dc->cpu_R[instr->a],
+                     (int32_t)((int16_t)instr->imm16));
+    tcg_temp_free(imm);

imm is unused.

+static const Nios2Instruction i_type_instructions[I_TYPE_COUNT] = {
+    [CALL]    = INSTRUCTION(call),
+    [JMPI]    = INSTRUCTION(jmpi),
+    [0x02]    = INSTRUCTION_ILLEGAL(),
+    [LDBU]    = INSTRUCTION(ldbu),
+    [ADDI]    = INSTRUCTION(addi),
+    [STB]     = INSTRUCTION(stb),
+    [BR]      = INSTRUCTION(br),
+    [LDB]     = INSTRUCTION(ldb),
+    [CMPGEI]  = INSTRUCTION(cmpgei),
+    [0x09]    = INSTRUCTION_ILLEGAL(),
+    [0x0a]    = INSTRUCTION_ILLEGAL(),
+    [LDHU]    = INSTRUCTION(ldhu),
+    [ANDI]    = INSTRUCTION(andi),
+    [STH]     = INSTRUCTION(sth),
+    [BGE]     = INSTRUCTION(bge),
+    [LDH]     = INSTRUCTION(ldh),
+    [CMPLTI]  = INSTRUCTION(cmplti),
+    [0x11]    = INSTRUCTION_ILLEGAL(),
+    [0x12]    = INSTRUCTION_ILLEGAL(),
+    [INITDA]  = INSTRUCTION(initda),
+    [ORI]     = INSTRUCTION(ori),
+    [STW]     = INSTRUCTION(stw),
+    [BLT]     = INSTRUCTION(blt),
+    [LDW]     = INSTRUCTION(ldw),
+    [CMPNEI]  = INSTRUCTION(cmpnei),
+    [0x19]    = INSTRUCTION_ILLEGAL(),
+    [0x1a]    = INSTRUCTION_ILLEGAL(),
+    [FLUSHDA] = INSTRUCTION_NOP(flushda),
+    [XORI]    = INSTRUCTION(xori),
+    [0x1d]    = INSTRUCTION_ILLEGAL(),
+    [BNE]     = INSTRUCTION(bne),
+    [0x1f]    = INSTRUCTION_ILLEGAL(),
+    [CMPEQI]  = INSTRUCTION(cmpeqi),
+    [0x21]    = INSTRUCTION_ILLEGAL(),
+    [0x22]    = INSTRUCTION_ILLEGAL(),
+    [LDBUIO]  = INSTRUCTION(ldbuio),
+    [MULI]    = INSTRUCTION(muli),
+    [STBIO]   = INSTRUCTION(stbio),
+    [BEQ]     = INSTRUCTION(beq),
+    [LDBIO]   = INSTRUCTION(ldbio),
+    [CMPGEUI] = INSTRUCTION(cmpgeui),
+    [0x29]    = INSTRUCTION_ILLEGAL(),
+    [0x2a]    = INSTRUCTION_ILLEGAL(),
+    [LDHUIO]  = INSTRUCTION(ldhuio),
+    [ANDHI]   = INSTRUCTION(andhi),
+    [STHIO]   = INSTRUCTION(sthio),
+    [BGEU]    = INSTRUCTION(bgeu),
+    [LDHIO]   = INSTRUCTION(ldhio),
+    [CMPLTUI] = INSTRUCTION(cmpltui),
+    [0x31]    = INSTRUCTION_ILLEGAL(),
+    [CUSTOM]  = INSTRUCTION_UNIMPLEMENTED(custom),
+    [INITD]   = INSTRUCTION(initd),
+    [ORHI]    = INSTRUCTION(orhi),
+    [STWIO]   = INSTRUCTION(stwio),
+    [BLTU]    = INSTRUCTION(bltu),
+    [LDWIO]   = INSTRUCTION(ldwio),
+    [RDPRS]   = INSTRUCTION_UNIMPLEMENTED(rdprs),
+    [0x39]    = INSTRUCTION_ILLEGAL(),
+    [R_TYPE]  = { "<R-type instruction>", handle_r_type_instr },
+    [FLUSHD]  = INSTRUCTION_NOP(flushd),
+    [XORHI]   = INSTRUCTION(xorhi),
+    [0x3d]    = INSTRUCTION_ILLEGAL(),
+    [0x3e]    = INSTRUCTION_ILLEGAL(),
+    [0x3f]    = INSTRUCTION_ILLEGAL(),

Is there really any point to using a table, as opposed to a switch statement? At least with a switch you can use default to catch all of the illegals at once.

+/* rC <- ((unsigned)rA * (unsigned)rB))(31..0) */
+static void mulxuu(DisasContext *dc, uint32_t code)
+{
+    R_TYPE(instr, code);
+
+    TCGv_i64 t0, t1;
+
+    t0 = tcg_temp_new_i64();
+    t1 = tcg_temp_new_i64();
+
+    tcg_gen_extu_i32_i64(t0, dc->cpu_R[instr->a]);
+    tcg_gen_extu_i32_i64(t1, dc->cpu_R[instr->b]);
+    tcg_gen_mul_i64(t0, t0, t1);
+
+    tcg_gen_shri_i64(t0, t0, 32);
+    tcg_gen_extrl_i64_i32(dc->cpu_R[instr->c], t0);

  TCGv t0 = tcg_temp_new();
  tcg_gen_mulu2_tl(t0, dc->cpu_R[instr->c],
                   dc->cpu_R[instr->a],
                   dc->cpu_R[instr->b]);
  tcg_temp_free(t0);

+/* rC <- ((signed)rA * (unsigned)rB))(31..0) */
+static void mulxsu(DisasContext *dc, uint32_t code)
+{
+    R_TYPE(instr, code);
+
+    TCGv_i64 t0, t1;
+
+    t0 = tcg_temp_new_i64();
+    t1 = tcg_temp_new_i64();
+
+    tcg_gen_ext_i32_i64(t0, dc->cpu_R[instr->a]);
+    tcg_gen_extu_i32_i64(t1, dc->cpu_R[instr->b]);
+    tcg_gen_mul_i64(t0, t0, t1);
+
+    tcg_gen_shri_i64(t0, t0, 32);
+    tcg_gen_extrl_i64_i32(dc->cpu_R[instr->c], t0);

Similarly, using tcg_gen_mulsu2_tl. To be fair, a patch for this was posted yesterday, and will be included in a tcg pull soon. See

  https://patchwork.ozlabs.org/patch/675859/

+static void callr(DisasContext *dc, uint32_t code)
+{
+    R_TYPE(instr, code);
+
+#ifdef CALL_TRACING
+    TCGv_i32 tmp = tcg_const_i32(dc->pc);
+    gen_helper_call_status(tmp, dc->cpu_R[instr->a]);
+    tcg_temp_free_i32(tmp);
+#endif
+
+    tcg_gen_movi_tl(dc->cpu_R[R_RA], dc->pc + 4);
+    tcg_gen_mov_tl(dc->cpu_R[R_PC], dc->cpu_R[instr->a]);

Does the hardware actually prevent rA == RA like this? The documentation is unclear. If the hardware actually performs the described actions in parallel, then you need to swap these two lines.

+/* rC <- ((signed)rA * (signed)rB))(31..0) */
+static void mulxss(DisasContext *dc, uint32_t code)
+{
+    R_TYPE(instr, code);
+
+    TCGv_i64 t0, t1;
+
+    t0 = tcg_temp_new_i64();
+    t1 = tcg_temp_new_i64();
+
+    tcg_gen_ext_i32_i64(t0, dc->cpu_R[instr->a]);
+    tcg_gen_ext_i32_i64(t1, dc->cpu_R[instr->b]);
+    tcg_gen_mul_i64(t0, t0, t1);

Similarly, tcg_gen_muls2_tl.

+/* rC <- rA / rB */
+static void divu(DisasContext *dc, uint32_t code)
+{
+    R_TYPE(instr, code);
+
+    gen_helper_divu(dc->cpu_R[instr->c], dc->cpu_R[instr->a],
+                    dc->cpu_R[instr->b]);
+}

You can perform this inline.  C.f. target-mips

Since divide-by-zero is undefined, this can be done with

  TCGv t0 = tcg_const_tl(0);
  tcg_gen_setcond_tl(TCG_COND_EQ, t0, dc->cpu_R[instr->b], t0);
  tcg_gen_or_tl(t0, t0, dc->cpu_R[instr->b]);
  tcg_gen_divu_tl(dc->cpu_R[instr->c], dc->cpu_R[instr->a], t0);
  tcg_temp_free_tl(t0);

+    gen_helper_divs(dc->cpu_R[instr->c], dc->cpu_R[instr->a],
+                    dc->cpu_R[instr->b]);

Similarly.

+/*
+ * bstatus ← status
+ * PIE <- 0
+ * U <- 0
+ * ba <- PC + 4
+ * PC <- break handler address
+ */
+static void __break(DisasContext *dc, uint32_t code __attribute__((unused)))

This symbol is in the system namespace.  Just use break_insn or something.

+static const Nios2Instruction r_type_instructions[R_TYPE_COUNT] = {
+    [0x00]   = INSTRUCTION_ILLEGAL(),
+    [ERET]   = INSTRUCTION(eret),
+    [ROLI]   = INSTRUCTION(roli),
+    [ROL]    = INSTRUCTION(rol),
+    [FLUSHP] = INSTRUCTION(flushp),
+    [RET]    = INSTRUCTION(ret),
+    [NOR]    = INSTRUCTION(nor),
+    [MULXUU] = INSTRUCTION(mulxuu),
+    [CMPGE]  = INSTRUCTION(cmpge),
+    [BRET]   = INSTRUCTION(bret),
+    [0x0a]   = INSTRUCTION_ILLEGAL(),
+    [ROR]    = INSTRUCTION(ror),
+    [FLUSHI] = INSTRUCTION(flushi),
+    [JMP]    = INSTRUCTION(jmp),
+    [AND]    = INSTRUCTION(and),
+    [0x0f]   = INSTRUCTION_ILLEGAL(),
+    [CMPLT]  = INSTRUCTION(cmplt),
+    [0x11]   = INSTRUCTION_ILLEGAL(),
+    [SLLI]   = INSTRUCTION(slli),
+    [SLL]    = INSTRUCTION(sll),
+    [WRPRS]  = INSTRUCTION_UNIMPLEMENTED(wrprs),
+    [0x15]   = INSTRUCTION_ILLEGAL(),
+    [OR]     = INSTRUCTION(or),
+    [MULXSU] = INSTRUCTION(mulxsu),
+    [CMPNE]  = INSTRUCTION(cmpne),
+    [0x19]   = INSTRUCTION_ILLEGAL(),
+    [SRLI]   = INSTRUCTION(srli),
+    [SRL]    = INSTRUCTION(srl),
+    [NEXTPC] = INSTRUCTION(nextpc),
+    [CALLR]  = INSTRUCTION(callr),
+    [XOR]    = INSTRUCTION(xor),
+    [MULXSS] = INSTRUCTION(mulxss),
+    [CMPEQ]  = INSTRUCTION(cmpeq),
+    [0x21]   = INSTRUCTION_ILLEGAL(),
+    [0x22]   = INSTRUCTION_ILLEGAL(),
+    [0x23]   = INSTRUCTION_ILLEGAL(),
+    [DIVU]   = INSTRUCTION(divu),
+    [DIV]    = { "div", _div },
+    [RDCTL]  = INSTRUCTION(rdctl),
+    [MUL]    = INSTRUCTION(mul),
+    [CMPGEU] = INSTRUCTION(cmpgeu),
+    [INITI]  = INSTRUCTION(initi),
+    [0x2a]   = INSTRUCTION_ILLEGAL(),
+    [0x2b]   = INSTRUCTION_ILLEGAL(),
+    [0x2c]   = INSTRUCTION_ILLEGAL(),
+    [TRAP]   = INSTRUCTION(trap),
+    [WRCTL]  = INSTRUCTION(wrctl),
+    [0x2f]   = INSTRUCTION_ILLEGAL(),
+    [CMPLTU] = INSTRUCTION(cmpltu),
+    [ADD]    = INSTRUCTION(add),
+    [0x32]   = INSTRUCTION_ILLEGAL(),
+    [0x33]   = INSTRUCTION_ILLEGAL(),
+    [BREAK]  = { "break", __break },
+    [0x35]   = INSTRUCTION_ILLEGAL(),
+    [SYNC]   = INSTRUCTION(nop),
+    [0x37]   = INSTRUCTION_ILLEGAL(),
+    [0x38]   = INSTRUCTION_ILLEGAL(),
+    [SUB]    = INSTRUCTION(sub),
+    [SRAI]   = INSTRUCTION(srai),
+    [SRA]    = INSTRUCTION(sra),
+    [0x3c]   = INSTRUCTION_ILLEGAL(),
+    [0x3d]   = INSTRUCTION_ILLEGAL(),
+    [0x3e]   = INSTRUCTION_ILLEGAL(),
+    [0x3f]   = INSTRUCTION_ILLEGAL(),
+};

Similar comment wrt the I-type table.

+const char *instruction_get_string(uint32_t code)
+{
+    uint32_t op = get_opcode(code);
+
+    if (unlikely(op >= I_TYPE_COUNT)) {
+        return "";
+    } else if (op == R_TYPE) {
+        uint32_t opx = get_opxcode(code);
+        if (unlikely(opx >= R_TYPE_COUNT)) {
+            return "";
+        }
+        return r_type_instructions[opx].name;
+    } else {
+        return i_type_instructions[op].name;
+    }
+}

What is this function used for?

+/* I-Type instruction */
+typedef struct Nios2IType {
+    uint32_t op:6;
+    uint32_t imm16:16;
+    uint32_t b:5;
+    uint32_t a:5;
+} QEMU_PACKED Nios2IType;

These bitfields are a non-starter. Layout of them is non-portable in more ways than is worth going into here. You must use extract32 and sextract32 to extract the fields.

+
+union i_type_u {
+    uint32_t      v;
+    Nios2IType i;
+};
+
+#define I_TYPE(instr, op) \
+    union i_type_u instr_u = { .v = op }; \
+    Nios2IType *instr = &instr_u.i

You could probably still hide everything behind this macro, with an inline function returning a structure (defined *without* bitfields).

+/*
+ * Return values for instruction handlers
+ */
+#define INSTR_UNIMPL     -2  /* Unimplemented instruction */
+#define INSTR_ERR        -1  /* Error in instruction */
+#define PC_INC_NORMAL     0  /* Normal PC increment after instruction */
+#define PC_INC_BY_INSTR   1  /* PC got incremented by instruction */
+#define INSTR_BREAK       2  /* Break encountered */
+#define INSTR_EXCEPTION 255  /* Instruction generated an exception
+                                (the exception cause will be stored
+                                in struct nios2 */

What's the gain over using a simple enum with sequential values?

+/*
+ * FIXME: Convert to VMstate
+ */
+
+#include "qemu/osdep.h"
+#include "hw/hw.h"
+#include "hw/boards.h"
+
+void cpu_save(QEMUFile *f, void *opaque)
+{
+    /* TODO */
+}
+
+int cpu_load(QEMUFile *f, void *opaque, int version_id)
+{
+    /* TODO */
+    return 0;
+}

Fixing this is no longer optional.

+void helper_memalign(CPUNios2State *env, uint32_t addr, uint32_t dr, uint32_t 
wr, uint32_t mask)
+{
+    if (addr & mask) {
+        qemu_log("unaligned access addr=%x mask=%x, wr=%d dr=r%d\n",
+                 addr, mask, wr, dr);
+        env->regs[CR_BADADDR] = addr;
+        env->regs[CR_EXCEPTION] = EXCP_UNALIGN << 2;
+        helper_raise_exception(env, EXCP_UNALIGN);
+    }
+}

What is this doing that cc->do_unaligned_access doesn't?

+    /* Initialize DC */
+    dc->cpu_env = cpu_env;
+    dc->cpu_R   = cpu_R;

What is this assignment for? Are you planning to implement shadow registers at some point?


r~



reply via email to

[Prev in Thread] Current Thread [Next in Thread]