qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH] tcg/mips: Add support for mips64el backend


From: Richard Henderson
Subject: Re: [Qemu-devel] [PATCH] tcg/mips: Add support for mips64el backend
Date: Sun, 13 Nov 2016 08:56:15 +0100
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:45.0) Gecko/20100101 Thunderbird/45.4.0

On 11/11/2016 08:20 AM, address@hidden wrote:
From: Jin Guojie <address@hidden>

This patch implements TCG mips64r2(little-endian) translation backend.
Tested on Loongson 3A2000(a MIPS64-compatible CPU) with Fedora Linux 21 Remix.
linux-0.2.img.bz2 runs well.
The performance is nearly 10 times higher than tci mode.

https://en.wikipedia.org/wiki/Loongson
http://www.loongnix.org/index.php/Loongnix

Cc: Aurelien Jarno <address@hidden>
Signed-off-by: Jin Guojie <address@hidden>

Have you seen

https://lists.nongnu.org/archive/html/qemu-devel/2016-02/msg01910.html

? I know there are bugs in that patch set, but I would like any mips64 support to look like that. In particular, reduce the use of #if to an absolute minimum.

+#if UINTPTR_MAX == UINT32_MAX
+# define TCG_TARGET_REG_BITS 32
+#elif UINTPTR_MAX == UINT64_MAX
+# define TCG_TARGET_REG_BITS 64
+#endif

There are two mips64 abi's. You're only allowing 64-bit code to be generated for n64, and not n32.

+#undef use_movnz_instructions
+#undef use_mips32_instructions
+#undef use_mips32r6_instructions
+
+#define use_movnz_instructions  0
+#define use_mips32_instructions  0
+#define use_mips32r6_instructions  0

Why?  Certainly we should be able to generate code for mips64r2 and mips64r6.

+#if TCG_TARGET_REG_BITS == 64
+static const TCGReg tcg_target_call_oarg_regs[1] = {
+    TCG_REG_V0,
+};
+#else
 static const TCGReg tcg_target_call_oarg_regs[2] = {
     TCG_REG_V0,
     TCG_REG_V1
 };
+#endif

This change would be incorrect if we ever enhance tcg to handle __int128_t. In the meantime it doesn't matter, and can be left unchanged.

@@ -459,7 +502,15 @@ static inline void tcg_out_mov(TCGContext *s, TCGType type,
 {
     /* Simple reg-reg move, optimising out the 'do nothing' case */
     if (ret != arg) {
+#if TCG_TARGET_REG_BITS == 64
+        if (type == TCG_TYPE_I32) {
+            tcg_out_opc_reg(s, OPC_ADDU, ret, arg, TCG_REG_ZERO);
+        } else {
+            tcg_out_opc_reg(s, OPC_DADDU, ret, arg, TCG_REG_ZERO);
+        }
+#else
         tcg_out_opc_reg(s, OPC_ADDU, ret, arg, TCG_REG_ZERO);
+#endif
     }

This is why a proper mips assembler uses OPC_OR.

 }

@@ -470,12 +521,21 @@ static inline void tcg_out_movi(TCGContext *s, TCGType 
type,
         tcg_out_opc_imm(s, OPC_ADDIU, reg, TCG_REG_ZERO, arg);
     } else if (arg == (uint16_t)arg) {
         tcg_out_opc_imm(s, OPC_ORI, reg, TCG_REG_ZERO, arg);
-    } else {
+    } else if (arg == (int32_t)arg) {
         tcg_out_opc_imm(s, OPC_LUI, reg, TCG_REG_ZERO, arg >> 16);
         if (arg & 0xffff) {
             tcg_out_opc_imm(s, OPC_ORI, reg, reg, arg & 0xffff);
         }
     }
+#if TCG_TARGET_REG_BITS == 64
+    /* 64-bit imm */
+    else {
+        tcg_out_opc_imm(s, OPC_LUI, reg, 0, (arg >> 32) & 0xffff);
+        tcg_out_opc_imm(s, OPC_ORI, reg, reg, (arg >> 16) & 0xffff);
+        tcg_out_opc_imm_64(s, OPC_DSLL, reg, reg, 16);
+        tcg_out_opc_imm(s, OPC_ORI, reg, reg, arg & 0xffff);
+    }
+#endif

This is only a 48-bit immediate.

 }

 static inline void tcg_out_bswap16(TCGContext *s, TCGReg ret, TCGReg arg)
@@ -566,7 +626,11 @@ static void tcg_out_ldst(TCGContext *s, MIPSInsn opc, 
TCGReg data,
     if (ofs != lo) {
         tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - lo);
         if (addr != TCG_REG_ZERO) {
+#if TCG_TARGET_REG_BITS == 64
+            tcg_out_opc_reg(s, OPC_DADDU, TCG_TMP0, TCG_TMP0, addr);
+#else
             tcg_out_opc_reg(s, OPC_ADDU, TCG_TMP0, TCG_TMP0, addr);
+#endif

See my patchset where I introduce OPC_PADDU to avoid this and other similar 
ifdefs.

@@ -1163,6 +1276,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, 
TCGLabelQemuLdst *l)
     tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);

     v0 = l->datalo_reg;
+#if TCG_TARGET_REG_BITS == 32
     if ((opc & MO_SIZE) == MO_64) {
         /* We eliminated V0 from the possible output registers, so it
            cannot be clobbered here.  So we must move V1 first.  */
@@ -1173,11 +1287,21 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, 
TCGLabelQemuLdst *l)
             tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_V1);
         }
     }
+#endif

     reloc_pc16(s->code_ptr, l->raddr);
     tcg_out_opc_br(s, OPC_BEQ, TCG_REG_ZERO, TCG_REG_ZERO);
     /* delay slot */
+#if TCG_TARGET_REG_BITS == 32
     tcg_out_mov(s, TCG_TYPE_REG, v0, TCG_REG_V0);
+#else
+    /* ext unsigned long(32) -> 64-bit */
+    if ((opc & MO_SIZE) == MO_32) {
+        tcg_out_mov(s, TCG_TYPE_I32, v0, TCG_REG_V0);
+    } else {
+        tcg_out_mov(s, TCG_TYPE_REG, v0, TCG_REG_V0);
+    }
+#endif

This is incorrect, as you're not passing down whether the operation is a 32-bit load into a 32-bit temporary, or a 32-bit load into a 64-bit temporary. I.e. the difference between

  unsigned int x = *(unsigned int *)ptr;
and
  unsigned long long x = *(unsigned int *)ptr;


r~



reply via email to

[Prev in Thread] Current Thread [Next in Thread]