Re: [Qemu-devel] [PATCH 2/7] nios2: Add architecture emulation support

qemu-devel

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH 2/7] nios2: Add architecture emulation support

From:	Richard Henderson
Subject:	Re: [Qemu-devel] [PATCH 2/7] nios2: Add architecture emulation support
Date:	Wed, 28 Sep 2016 18:05:03 -0700
User-agent:	Mozilla/5.0 (X11; Linux x86_64; rv:45.0) Gecko/20100101 Thunderbird/45.3.0

 hw/nios2/cpu_pic.c         |   70 +++


Why is this in this patch?

 target-nios2/instruction.c | 1427 ++++++++++++++++++++++++++++++++++++++++++++
 target-nios2/instruction.h |  279 +++++++++
 target-nios2/translate.c   |  242 ++++++++


Why are these files separate?

+    if (n < 32)             /* GP regs */
+        return gdb_get_reg32(mem_buf, env->regs[n]);
+    else if (n == 32)  /* PC */
+        return gdb_get_reg32(mem_buf, env->regs[R_PC]);
+    else if (n < 49)        /* Status regs */
+        return gdb_get_reg32(mem_buf, env->regs[n - 1]);


Use checkpatch.pl; the formatting is wrong.

There's no particular reason why R_PC needs to be 64; if you change it to 32,you can simplify this.

+struct CPUNios2State {
+    uint32_t regs[NUM_CORE_REGS];
+
+    /* Addresses that are hard-coded in the FPGA build settings */
+    uint32_t reset_addr;
+    uint32_t exception_addr;
+    uint32_t fast_tlb_miss_addr;


These three should go after ...

+
+#if !defined(CONFIG_USER_ONLY)
+    Nios2MMU mmu;
+
+    uint32_t irq_pending;
+#endif
+
+    CPU_COMMON


... here, or even into ...

+};
+
+/**
+ * Nios2CPU:
+ * @env: #CPUNios2State
+ *
+ * A Nios2 CPU.
+ */
+typedef struct Nios2CPU {
+    /*< private >*/
+    CPUState parent_obj;
+    /*< public >*/
+
+    CPUNios2State env;
+    bool mmu_present;


... here.

+static inline void t_gen_helper_raise_exception(DisasContext *dc,
+                                                uint32_t index)


Remove all of the inline markers and let the compiler decide.

+static void gen_check_supervisor(DisasContext *dc, TCGLabel *label)
+{
+    TCGLabel *l1 = gen_new_label();
+
+    TCGv_i32 tmp = tcg_temp_new();
+    tcg_gen_andi_tl(tmp, dc->cpu_R[CR_STATUS], CR_STATUS_U);
+    tcg_gen_brcond_tl(TCG_COND_EQ, dc->cpu_R[R_ZERO], tmp, l1);
+    t_gen_helper_raise_exception(dc, EXCP_SUPERI);
+    tcg_gen_br(label);

The supervisor check should be done at translate time by checking dc->tb->flags& CR_STATUS_U.

+#ifdef CALL_TRACING
+    TCGv_i32 tmp = tcg_const_i32(dc->pc);
+    TCGv_i32 tmp2 = tcg_const_i32((dc->pc & 0xF0000000) | (instr->imm26 * 4));
+    gen_helper_call_status(tmp, tmp2);
+    tcg_temp_free_i32(tmp);
+    tcg_temp_free_i32(tmp2);
+#endif


What's the point of this?

+/*
+ * I-Type instructions
+ */
+
+/* rB <- 0x000000 : Mem8[rA + @(IMM16)] */
+static void ldbu(DisasContext *dc, uint32_t code)
+{
+    I_TYPE(instr, code);
+
+    TCGv addr = tcg_temp_new();
+    tcg_gen_addi_tl(addr, dc->cpu_R[instr->a],
+                    (int32_t)((int16_t)instr->imm16));
+
+    tcg_gen_qemu_ld8u(dc->cpu_R[instr->b], addr, dc->mem_idx);
+
+    tcg_temp_free(addr);
+}

You should have helper functions so that you don't have to replicate 12 linefunctions. Use the tcg_gen_qemu_ld_tl function, and the TCGMemOp flags to helpmerge all load instructions, and all store instructions.

+/* rB <- rA + IMM16 */
+static void addi(DisasContext *dc, uint32_t code)
+{
+    I_TYPE(instr, code);
+
+    TCGv imm = tcg_temp_new();
+    tcg_gen_addi_tl(dc->cpu_R[instr->b], dc->cpu_R[instr->a],
+                    (int32_t)((int16_t)instr->imm16));


The double cast is pointless, as are the extra parenthesis.

You're not doing anything to make sure that r0 reads as 0, and ignores writes.You need to look at target-alpha, target-mips, or target-sparc to see variousways in which a zero register may be handled.

You should handle the special case of movi, documented as addi rb, r0, imm, sothat the common method of loading a small value does not require extra tcg opcodes.

Similarly for the other (common) aliases, like mov and nop, which are bothaliased to add.

+/* Initializes the data cache line currently caching address rA + @(IMM16) */
+static void initda(DisasContext *dc __attribute__((unused)),
+                   uint32_t code __attribute__((unused)))
+{
+    /* TODO */
+}


This will always be a nop for qemu.

+static void blt(DisasContext *dc, uint32_t code)
+{
+    I_TYPE(instr, code);
+
+    TCGLabel *l1 = gen_new_label();
+
+    tcg_gen_brcond_tl(TCG_COND_LT, dc->cpu_R[instr->a],
+                      dc->cpu_R[instr->b], l1);
+
+    gen_goto_tb(dc, 0, dc->pc + 4);
+
+    gen_set_label(l1);
+
+    gen_goto_tb(dc, 1, dc->pc + 4 + (int16_t)(instr->imm16 & 0xFFFC));
+
+    dc->is_jmp = DISAS_TB_JUMP;
+}


These really require a helper function.

+/* rB <- 0x000000 : Mem8[rA + @(IMM16)] (bypassing cache) */
+static void ldbuio(DisasContext *dc, uint32_t code)


Reuse ldbu; qemu will never implement caches.

+/* rB <- (rA * @(IMM16))(31..0) */
+static void muli(DisasContext *dc, uint32_t code)
+{
+    I_TYPE(instr, code);
+
+    TCGv imm = tcg_temp_new();
+    tcg_gen_muli_i32(dc->cpu_R[instr->b], dc->cpu_R[instr->a],
+                     (int32_t)((int16_t)instr->imm16));
+    tcg_temp_free(imm);


imm is unused.

+static const Nios2Instruction i_type_instructions[I_TYPE_COUNT] = {
+    [CALL]    = INSTRUCTION(call),
+    [JMPI]    = INSTRUCTION(jmpi),
+    [0x02]    = INSTRUCTION_ILLEGAL(),
+    [LDBU]    = INSTRUCTION(ldbu),
+    [ADDI]    = INSTRUCTION(addi),
+    [STB]     = INSTRUCTION(stb),
+    [BR]      = INSTRUCTION(br),
+    [LDB]     = INSTRUCTION(ldb),
+    [CMPGEI]  = INSTRUCTION(cmpgei),
+    [0x09]    = INSTRUCTION_ILLEGAL(),
+    [0x0a]    = INSTRUCTION_ILLEGAL(),
+    [LDHU]    = INSTRUCTION(ldhu),
+    [ANDI]    = INSTRUCTION(andi),
+    [STH]     = INSTRUCTION(sth),
+    [BGE]     = INSTRUCTION(bge),
+    [LDH]     = INSTRUCTION(ldh),
+    [CMPLTI]  = INSTRUCTION(cmplti),
+    [0x11]    = INSTRUCTION_ILLEGAL(),
+    [0x12]    = INSTRUCTION_ILLEGAL(),
+    [INITDA]  = INSTRUCTION(initda),
+    [ORI]     = INSTRUCTION(ori),
+    [STW]     = INSTRUCTION(stw),
+    [BLT]     = INSTRUCTION(blt),
+    [LDW]     = INSTRUCTION(ldw),
+    [CMPNEI]  = INSTRUCTION(cmpnei),
+    [0x19]    = INSTRUCTION_ILLEGAL(),
+    [0x1a]    = INSTRUCTION_ILLEGAL(),
+    [FLUSHDA] = INSTRUCTION_NOP(flushda),
+    [XORI]    = INSTRUCTION(xori),
+    [0x1d]    = INSTRUCTION_ILLEGAL(),
+    [BNE]     = INSTRUCTION(bne),
+    [0x1f]    = INSTRUCTION_ILLEGAL(),
+    [CMPEQI]  = INSTRUCTION(cmpeqi),
+    [0x21]    = INSTRUCTION_ILLEGAL(),
+    [0x22]    = INSTRUCTION_ILLEGAL(),
+    [LDBUIO]  = INSTRUCTION(ldbuio),
+    [MULI]    = INSTRUCTION(muli),
+    [STBIO]   = INSTRUCTION(stbio),
+    [BEQ]     = INSTRUCTION(beq),
+    [LDBIO]   = INSTRUCTION(ldbio),
+    [CMPGEUI] = INSTRUCTION(cmpgeui),
+    [0x29]    = INSTRUCTION_ILLEGAL(),
+    [0x2a]    = INSTRUCTION_ILLEGAL(),
+    [LDHUIO]  = INSTRUCTION(ldhuio),
+    [ANDHI]   = INSTRUCTION(andhi),
+    [STHIO]   = INSTRUCTION(sthio),
+    [BGEU]    = INSTRUCTION(bgeu),
+    [LDHIO]   = INSTRUCTION(ldhio),
+    [CMPLTUI] = INSTRUCTION(cmpltui),
+    [0x31]    = INSTRUCTION_ILLEGAL(),
+    [CUSTOM]  = INSTRUCTION_UNIMPLEMENTED(custom),
+    [INITD]   = INSTRUCTION(initd),
+    [ORHI]    = INSTRUCTION(orhi),
+    [STWIO]   = INSTRUCTION(stwio),
+    [BLTU]    = INSTRUCTION(bltu),
+    [LDWIO]   = INSTRUCTION(ldwio),
+    [RDPRS]   = INSTRUCTION_UNIMPLEMENTED(rdprs),
+    [0x39]    = INSTRUCTION_ILLEGAL(),
+    [R_TYPE]  = { "<R-type instruction>", handle_r_type_instr },
+    [FLUSHD]  = INSTRUCTION_NOP(flushd),
+    [XORHI]   = INSTRUCTION(xorhi),
+    [0x3d]    = INSTRUCTION_ILLEGAL(),
+    [0x3e]    = INSTRUCTION_ILLEGAL(),
+    [0x3f]    = INSTRUCTION_ILLEGAL(),

Is there really any point to using a table, as opposed to a switch statement?At least with a switch you can use default to catch all of the illegals at once.

+/* rC <- ((unsigned)rA * (unsigned)rB))(31..0) */
+static void mulxuu(DisasContext *dc, uint32_t code)
+{
+    R_TYPE(instr, code);
+
+    TCGv_i64 t0, t1;
+
+    t0 = tcg_temp_new_i64();
+    t1 = tcg_temp_new_i64();
+
+    tcg_gen_extu_i32_i64(t0, dc->cpu_R[instr->a]);
+    tcg_gen_extu_i32_i64(t1, dc->cpu_R[instr->b]);
+    tcg_gen_mul_i64(t0, t0, t1);
+
+    tcg_gen_shri_i64(t0, t0, 32);
+    tcg_gen_extrl_i64_i32(dc->cpu_R[instr->c], t0);


  TCGv t0 = tcg_temp_new();
  tcg_gen_mulu2_tl(t0, dc->cpu_R[instr->c],
                   dc->cpu_R[instr->a],
                   dc->cpu_R[instr->b]);
  tcg_temp_free(t0);

+/* rC <- ((signed)rA * (unsigned)rB))(31..0) */
+static void mulxsu(DisasContext *dc, uint32_t code)
+{
+    R_TYPE(instr, code);
+
+    TCGv_i64 t0, t1;
+
+    t0 = tcg_temp_new_i64();
+    t1 = tcg_temp_new_i64();
+
+    tcg_gen_ext_i32_i64(t0, dc->cpu_R[instr->a]);
+    tcg_gen_extu_i32_i64(t1, dc->cpu_R[instr->b]);
+    tcg_gen_mul_i64(t0, t0, t1);
+
+    tcg_gen_shri_i64(t0, t0, 32);
+    tcg_gen_extrl_i64_i32(dc->cpu_R[instr->c], t0);

Similarly, using tcg_gen_mulsu2_tl. To be fair, a patch for this was postedyesterday, and will be included in a tcg pull soon. See


  https://patchwork.ozlabs.org/patch/675859/

+static void callr(DisasContext *dc, uint32_t code)
+{
+    R_TYPE(instr, code);
+
+#ifdef CALL_TRACING
+    TCGv_i32 tmp = tcg_const_i32(dc->pc);
+    gen_helper_call_status(tmp, dc->cpu_R[instr->a]);
+    tcg_temp_free_i32(tmp);
+#endif
+
+    tcg_gen_movi_tl(dc->cpu_R[R_RA], dc->pc + 4);
+    tcg_gen_mov_tl(dc->cpu_R[R_PC], dc->cpu_R[instr->a]);

Does the hardware actually prevent rA == RA like this? The documentation isunclear. If the hardware actually performs the described actions in parallel,then you need to swap these two lines.

+/* rC <- ((signed)rA * (signed)rB))(31..0) */
+static void mulxss(DisasContext *dc, uint32_t code)
+{
+    R_TYPE(instr, code);
+
+    TCGv_i64 t0, t1;
+
+    t0 = tcg_temp_new_i64();
+    t1 = tcg_temp_new_i64();
+
+    tcg_gen_ext_i32_i64(t0, dc->cpu_R[instr->a]);
+    tcg_gen_ext_i32_i64(t1, dc->cpu_R[instr->b]);
+    tcg_gen_mul_i64(t0, t0, t1);


Similarly, tcg_gen_muls2_tl.

+/* rC <- rA / rB */
+static void divu(DisasContext *dc, uint32_t code)
+{
+    R_TYPE(instr, code);
+
+    gen_helper_divu(dc->cpu_R[instr->c], dc->cpu_R[instr->a],
+                    dc->cpu_R[instr->b]);
+}


You can perform this inline.  C.f. target-mips

Since divide-by-zero is undefined, this can be done with

  TCGv t0 = tcg_const_tl(0);
  tcg_gen_setcond_tl(TCG_COND_EQ, t0, dc->cpu_R[instr->b], t0);
  tcg_gen_or_tl(t0, t0, dc->cpu_R[instr->b]);
  tcg_gen_divu_tl(dc->cpu_R[instr->c], dc->cpu_R[instr->a], t0);
  tcg_temp_free_tl(t0);

+    gen_helper_divs(dc->cpu_R[instr->c], dc->cpu_R[instr->a],
+                    dc->cpu_R[instr->b]);


Similarly.

+/*
+ * bstatus ← status
+ * PIE <- 0
+ * U <- 0
+ * ba <- PC + 4
+ * PC <- break handler address
+ */
+static void __break(DisasContext *dc, uint32_t code __attribute__((unused)))


This symbol is in the system namespace.  Just use break_insn or something.

+static const Nios2Instruction r_type_instructions[R_TYPE_COUNT] = {
+    [0x00]   = INSTRUCTION_ILLEGAL(),
+    [ERET]   = INSTRUCTION(eret),
+    [ROLI]   = INSTRUCTION(roli),
+    [ROL]    = INSTRUCTION(rol),
+    [FLUSHP] = INSTRUCTION(flushp),
+    [RET]    = INSTRUCTION(ret),
+    [NOR]    = INSTRUCTION(nor),
+    [MULXUU] = INSTRUCTION(mulxuu),
+    [CMPGE]  = INSTRUCTION(cmpge),
+    [BRET]   = INSTRUCTION(bret),
+    [0x0a]   = INSTRUCTION_ILLEGAL(),
+    [ROR]    = INSTRUCTION(ror),
+    [FLUSHI] = INSTRUCTION(flushi),
+    [JMP]    = INSTRUCTION(jmp),
+    [AND]    = INSTRUCTION(and),
+    [0x0f]   = INSTRUCTION_ILLEGAL(),
+    [CMPLT]  = INSTRUCTION(cmplt),
+    [0x11]   = INSTRUCTION_ILLEGAL(),
+    [SLLI]   = INSTRUCTION(slli),
+    [SLL]    = INSTRUCTION(sll),
+    [WRPRS]  = INSTRUCTION_UNIMPLEMENTED(wrprs),
+    [0x15]   = INSTRUCTION_ILLEGAL(),
+    [OR]     = INSTRUCTION(or),
+    [MULXSU] = INSTRUCTION(mulxsu),
+    [CMPNE]  = INSTRUCTION(cmpne),
+    [0x19]   = INSTRUCTION_ILLEGAL(),
+    [SRLI]   = INSTRUCTION(srli),
+    [SRL]    = INSTRUCTION(srl),
+    [NEXTPC] = INSTRUCTION(nextpc),
+    [CALLR]  = INSTRUCTION(callr),
+    [XOR]    = INSTRUCTION(xor),
+    [MULXSS] = INSTRUCTION(mulxss),
+    [CMPEQ]  = INSTRUCTION(cmpeq),
+    [0x21]   = INSTRUCTION_ILLEGAL(),
+    [0x22]   = INSTRUCTION_ILLEGAL(),
+    [0x23]   = INSTRUCTION_ILLEGAL(),
+    [DIVU]   = INSTRUCTION(divu),
+    [DIV]    = { "div", _div },
+    [RDCTL]  = INSTRUCTION(rdctl),
+    [MUL]    = INSTRUCTION(mul),
+    [CMPGEU] = INSTRUCTION(cmpgeu),
+    [INITI]  = INSTRUCTION(initi),
+    [0x2a]   = INSTRUCTION_ILLEGAL(),
+    [0x2b]   = INSTRUCTION_ILLEGAL(),
+    [0x2c]   = INSTRUCTION_ILLEGAL(),
+    [TRAP]   = INSTRUCTION(trap),
+    [WRCTL]  = INSTRUCTION(wrctl),
+    [0x2f]   = INSTRUCTION_ILLEGAL(),
+    [CMPLTU] = INSTRUCTION(cmpltu),
+    [ADD]    = INSTRUCTION(add),
+    [0x32]   = INSTRUCTION_ILLEGAL(),
+    [0x33]   = INSTRUCTION_ILLEGAL(),
+    [BREAK]  = { "break", __break },
+    [0x35]   = INSTRUCTION_ILLEGAL(),
+    [SYNC]   = INSTRUCTION(nop),
+    [0x37]   = INSTRUCTION_ILLEGAL(),
+    [0x38]   = INSTRUCTION_ILLEGAL(),
+    [SUB]    = INSTRUCTION(sub),
+    [SRAI]   = INSTRUCTION(srai),
+    [SRA]    = INSTRUCTION(sra),
+    [0x3c]   = INSTRUCTION_ILLEGAL(),
+    [0x3d]   = INSTRUCTION_ILLEGAL(),
+    [0x3e]   = INSTRUCTION_ILLEGAL(),
+    [0x3f]   = INSTRUCTION_ILLEGAL(),
+};


Similar comment wrt the I-type table.

+const char *instruction_get_string(uint32_t code)
+{
+    uint32_t op = get_opcode(code);
+
+    if (unlikely(op >= I_TYPE_COUNT)) {
+        return "";
+    } else if (op == R_TYPE) {
+        uint32_t opx = get_opxcode(code);
+        if (unlikely(opx >= R_TYPE_COUNT)) {
+            return "";
+        }
+        return r_type_instructions[opx].name;
+    } else {
+        return i_type_instructions[op].name;
+    }
+}


What is this function used for?

+/* I-Type instruction */
+typedef struct Nios2IType {
+    uint32_t op:6;
+    uint32_t imm16:16;
+    uint32_t b:5;
+    uint32_t a:5;
+} QEMU_PACKED Nios2IType;

These bitfields are a non-starter. Layout of them is non-portable in more waysthan is worth going into here. You must use extract32 and sextract32 toextract the fields.

+
+union i_type_u {
+    uint32_t      v;
+    Nios2IType i;
+};
+
+#define I_TYPE(instr, op) \
+    union i_type_u instr_u = { .v = op }; \
+    Nios2IType *instr = &instr_u.i

You could probably still hide everything behind this macro, with an inlinefunction returning a structure (defined *without* bitfields).

+/*
+ * Return values for instruction handlers
+ */
+#define INSTR_UNIMPL     -2  /* Unimplemented instruction */
+#define INSTR_ERR        -1  /* Error in instruction */
+#define PC_INC_NORMAL     0  /* Normal PC increment after instruction */
+#define PC_INC_BY_INSTR   1  /* PC got incremented by instruction */
+#define INSTR_BREAK       2  /* Break encountered */
+#define INSTR_EXCEPTION 255  /* Instruction generated an exception
+                                (the exception cause will be stored
+                                in struct nios2 */


What's the gain over using a simple enum with sequential values?

+/*
+ * FIXME: Convert to VMstate
+ */
+
+#include "qemu/osdep.h"
+#include "hw/hw.h"
+#include "hw/boards.h"
+
+void cpu_save(QEMUFile *f, void *opaque)
+{
+    /* TODO */
+}
+
+int cpu_load(QEMUFile *f, void *opaque, int version_id)
+{
+    /* TODO */
+    return 0;
+}


Fixing this is no longer optional.

+void helper_memalign(CPUNios2State *env, uint32_t addr, uint32_t dr, uint32_t 
wr, uint32_t mask)
+{
+    if (addr & mask) {
+        qemu_log("unaligned access addr=%x mask=%x, wr=%d dr=r%d\n",
+                 addr, mask, wr, dr);
+        env->regs[CR_BADADDR] = addr;
+        env->regs[CR_EXCEPTION] = EXCP_UNALIGN << 2;
+        helper_raise_exception(env, EXCP_UNALIGN);
+    }
+}


What is this doing that cc->do_unaligned_access doesn't?

+    /* Initialize DC */
+    dc->cpu_env = cpu_env;
+    dc->cpu_R   = cpu_R;

What is this assignment for? Are you planning to implement shadow registers atsome point?

r~

[Prev in Thread]

Current Thread

[Next in Thread]

[Qemu-devel] [PATCH V2 1/7] nios2: Add disas entries, Marek Vasut, 2016/09/27
- [Qemu-devel] [PATCH 5/7] nios2: Add periodic timer emulation, Marek Vasut, 2016/09/27
- [Qemu-devel] [PATCH 4/7] nios2: Add IIC interrupt controller emulation, Marek Vasut, 2016/09/27
- [Qemu-devel] [PATCH 3/7] nios2: Add usermode binaries emulation, Marek Vasut, 2016/09/27
- [Qemu-devel] [PATCH 7/7] nios2: Add support for Nios-II R1, Marek Vasut, 2016/09/27
- [Qemu-devel] [PATCH 6/7] nios2: Add Altera 10M50 GHRD emulation, Marek Vasut, 2016/09/27
- [Qemu-devel] [PATCH 2/7] nios2: Add architecture emulation support, Marek Vasut, 2016/09/27
  - Re: [Qemu-devel] [PATCH 2/7] nios2: Add architecture emulation support, Richard Henderson <=

Prev by Date: Re: [Qemu-devel] [PATCH v2] target-i386: fix losing XCR0 processor state component bits
Next by Date: Re: [Qemu-devel] [PATCH 1/3] hw/ppc/spapr: Move code related to "ibm, pa-features" to a separate function
Previous by thread: [Qemu-devel] [PATCH 2/7] nios2: Add architecture emulation support
Next by thread: [Qemu-devel] [PATCHv2] m68k: change default system clock for m5208evb
Index(es):
- Date
- Thread