qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH 05/18] target-riscv: Add Arithmetic instructions


From: Richard Henderson
Subject: Re: [Qemu-devel] [PATCH 05/18] target-riscv: Add Arithmetic instructions
Date: Mon, 26 Sep 2016 10:31:27 -0700
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:45.0) Gecko/20100101 Thunderbird/45.3.0

On 09/26/2016 03:56 AM, Sagar Karandikar wrote:
+#if defined(TARGET_RISCV64)
+target_ulong helper_mulhsu(CPURISCVState *env, target_ulong arg1,
+                          target_ulong arg2)
+{
+    int64_t a = arg1;
+    uint64_t b = arg2;
+    return (int64_t)((__int128_t)a * b >> 64);
+}
+#endif

This won't compile on a 32-bit host, or indeed a 64-bit host without 
CONFIG_INT128.

But what you should actually be using is tcg_gen_mulu2_i64, with a fixup afterward for the one signed argument. See tcg_gen_muls2_i64 in tcg/tcg-op.c for an example of fixing up an unsigned multiply for two signed inputs; you would need only half of that for a single signed input.

+/* Wrapper for setting reg values - need to check of reg is zero since
+ * cpu_gpr[0] is not actually allocated. this is more for safety purposes,
+ * since we usually avoid calling the OP_TYPE_gen function if we see a write to
+ * $zero
+ */
+static inline void gen_set_gpr(int reg_num_dst, TCGv t)
+{
+    if (reg_num_dst != 0) {
+        tcg_gen_mov_tl(cpu_gpr[reg_num_dst], t);
+    }
+}

FWIW, target-alpha used to have lots and lots of checks for the zero register. In the end it was much cleaner to simply allocate a scratch temporary for the zero-register sink. Aside from known patterns, such as canonical nop formations, you'll almost never see such instructions. While it's true that you must do something in order to be architecturally correct, it's better to do something that minimizes the impact to the rest of the translator.


+    tcg_gen_shri_i64(t0, t0, 32);
+    tcg_gen_extrl_i64_i32(ret, t0);

This would be tcg_gen_extrh_i64_i32.

+static inline void gen_arith(DisasContext *ctx, uint32_t opc, int rd, int rs1,
+        int rs2)
+{
+    TCGv source1, source2, cond1, cond2, zeroreg, resultopt1;
+    cond1 = tcg_temp_new();
+    cond2 = tcg_temp_new();
+    source1 = tcg_temp_new();
+    source2 = tcg_temp_new();
+    zeroreg = tcg_temp_new();
+    resultopt1 = tcg_temp_new();
+    gen_get_gpr(source1, rs1);
+    gen_get_gpr(source2, rs2);
+    tcg_gen_movi_tl(zeroreg, 0); /* hardcoded zero for compare in DIV, etc */

It would be far preferable to allocate this only when needed.

+
+    switch (opc) {
+#if defined(TARGET_RISCV64)
+    case OPC_RISC_ADDW:
+#endif
+    case OPC_RISC_ADD:

Can we avoid sprinkling so many ifdefs?  Perhaps with something akin to

#ifdef TARGET_RISCV64
#define CASE_OP_32_64(X) case X: case glue(X, W)
#else
#define CASE_OP_32_64(X) case X
#endif


+#if defined(TARGET_RISCV64)
+    case OPC_RISC_SLLW:
+        tcg_gen_andi_tl(source2, source2, 0x1F);
+        /* fall through to SLL */
+#endif
+    case OPC_RISC_SLL:
+        tcg_gen_andi_tl(source2, source2, TARGET_LONG_BITS - 1);
+        tcg_gen_shl_tl(source1, source1, source2);

Better to not fall through at this point, to avoid the double and.

+#if defined(TARGET_RISCV64)
+    case OPC_RISC_SRLW:
+        /* clear upper 32 */
+        tcg_gen_andi_tl(source1, source1, 0x00000000FFFFFFFFLL);
+        tcg_gen_andi_tl(source2, source2, 0x1F);
+        /* fall through to SRL */
+#endif
+    case OPC_RISC_SRL:
+        tcg_gen_andi_tl(source2, source2, TARGET_LONG_BITS - 1);
+        tcg_gen_shr_tl(source1, source1, source2);

Likewise.  Also, tcg_gen_ext32u_tl to clear upper 32.

+        break;
+#if defined(TARGET_RISCV64)
+    case OPC_RISC_SRAW:
+        /* first, trick to get it to act like working on 32 bits (get rid of
+        upper 32, sign extend to fill space) */
+        tcg_gen_shli_tl(source1, source1, 32);
+        tcg_gen_sari_tl(source1, source1, 32);
+        tcg_gen_andi_tl(source2, source2, 0x1F);
+        /* fall through to SRA */
+#endif
+    case OPC_RISC_SRA:
+        tcg_gen_andi_tl(source2, source2, TARGET_LONG_BITS - 1);

Likewise.  Also, tcg_gen_ext32s_tl to sign-extend.

+#if defined(TARGET_RISCV64)
+    case OPC_RISC_MULW:
+#endif
+    case OPC_RISC_MUL:
+        tcg_gen_muls2_tl(source1, source2, source1, source2);

tcg_gen_mul_tl, since source2 is dead.

+#if defined(TARGET_RISCV64)
+    case OPC_RISC_DIVW:
+        tcg_gen_ext32s_tl(source1, source1);
+        tcg_gen_ext32s_tl(source2, source2);
+        /* fall through to DIV */
+#endif
+    case OPC_RISC_DIV:
+        /* Handle by altering args to tcg_gen_div to produce req'd results:
+         * For overflow: want source1 in source1 and 1 in source2
+         * For div by zero: want -1 in source1 and 1 in source2 -> -1 result */
+        tcg_gen_movi_tl(resultopt1, (target_ulong)0xFFFFFFFFFFFFFFFF);

You'd need ULL for a constant with so many F's, but a plain -1 works just fine.

+        tcg_gen_setcondi_tl(TCG_COND_EQ, cond2, source2, (target_ulong)(~0L));

Likewise -1.

+        tcg_gen_setcondi_tl(TCG_COND_EQ, cond1, source1,
+                            1L << (TARGET_LONG_BITS - 1));

ULL, not L, or better as (target_ulong)1.


+    case OPC_RISC_DIVU:
+        tcg_gen_setcondi_tl(TCG_COND_EQ, cond1, source2, 0);
+        tcg_gen_movi_tl(resultopt1, (target_ulong)(~0L));

-1 again.

+    case OPC_RISC_REM:
...
+    case OPC_RISC_REMU:

Similarly.

+static inline void gen_arith_imm(DisasContext *ctx, uint32_t opc, int rd,
+        int rs1, int16_t imm)
+{
+    TCGv source1;
+    source1 = tcg_temp_new();
+    gen_get_gpr(source1, rs1);
+    /* lower 12 bits of imm are valid */
+    target_long uimm = (target_long)imm; /* sign ext 16->64 bits */

Better to just make the function argument target_long, surely. I don't understand the "uimm", as this isn't unsigned...

+#if defined(TARGET_RISCV64)
+    case OPC_RISC_SLLIW:
+         if ((uimm >= 32)) {
+            kill_unknown(ctx, RISCV_EXCP_ILLEGAL_INST);
+         }
+        /* fall through to SLLI */

break after known exception.


+#if defined(TARGET_RISCV64)
+    case OPC_RISC_SHIFT_RIGHT_IW:
+        if ((uimm & 0x3ff) >= 32) {
+            kill_unknown(ctx, RISCV_EXCP_ILLEGAL_INST);
+        }
+        tcg_gen_shli_tl(source1, source1, 32);
+        extra_shamt = 32;

tcg_gen_ext32{u,s}_tl is better than extra shifts.


r~



reply via email to

[Prev in Thread] Current Thread [Next in Thread]