qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH for-4.0 04/17] tcg/i386: Force qemu_ld/st arguments


From: Richard Henderson
Subject: [Qemu-devel] [PATCH for-4.0 04/17] tcg/i386: Force qemu_ld/st arguments into fixed registers
Date: Mon, 12 Nov 2018 22:44:50 +0100

This is an incremental step toward moving the qemu_ld/st
code sequence out of line.

Signed-off-by: Richard Henderson <address@hidden>
---
 tcg/i386/tcg-target.inc.c | 193 +++++++++++++++++++++++++++++++-------
 1 file changed, 159 insertions(+), 34 deletions(-)

diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
index 2a96ca4274..8a3e7690b6 100644
--- a/tcg/i386/tcg-target.inc.c
+++ b/tcg/i386/tcg-target.inc.c
@@ -171,6 +171,56 @@ static bool have_lzcnt;
 
 static tcg_insn_unit *tb_ret_addr;
 
+#ifdef CONFIG_SOFTMMU
+/*
+ * Constraint to choose a particular register.  This is used for softmmu
+ * loads and stores.  Registers with no assignment get an empty string.
+ */
+static const char * const one_reg_constraint[TCG_TARGET_NB_REGS] = {
+    [TCG_REG_EAX] = "a",
+    [TCG_REG_EBX] = "b",
+    [TCG_REG_ECX] = "c",
+    [TCG_REG_EDX] = "d",
+    [TCG_REG_ESI] = "S",
+    [TCG_REG_EDI] = "D",
+#if TCG_TARGET_REG_BITS == 64
+    [TCG_REG_R8]  = "E",
+    [TCG_REG_R9]  = "N",
+#endif
+};
+
+/*
+ * Calling convention for the softmmu load and store thunks.
+ *
+ * For 64-bit, we mostly use the host calling convention, therefore the
+ * real first argument is reserved for the ENV parameter that is passed
+ * on to the slow path helpers.
+ *
+ * For 32-bit, the host calling convention is stack based; we invent a
+ * private convention that uses 4 of the 6 available host registers, and
+ * we reserve EAX and EDX as temporaries for use by the thunk.
+ */
+static inline TCGReg softmmu_arg(unsigned n)
+{
+    if (TCG_TARGET_REG_BITS == 64) {
+        tcg_debug_assert(n < ARRAY_SIZE(tcg_target_call_iarg_regs) - 1);
+        return tcg_target_call_iarg_regs[n + 1];
+    } else {
+        static const TCGReg local_order[] = {
+            TCG_REG_ESI, TCG_REG_EDI, TCG_REG_ECX, TCG_REG_EBX
+        };
+        tcg_debug_assert(n < ARRAY_SIZE(local_order));
+        return local_order[n];
+    }
+}
+
+#define qemu_memop_arg(N)          one_reg_constraint[softmmu_arg(N)]
+#define qemu_memop_ret(N)          (N ? "d" : "a")
+#else
+#define qemu_memop_arg(N)          "L"
+#define qemu_memop_ret(N)          "L"
+#endif /* CONFIG_SOFTMMU */
+
 static void patch_reloc(tcg_insn_unit *code_ptr, int type,
                         intptr_t value, intptr_t addend)
 {
@@ -1677,11 +1727,15 @@ static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg 
addrlo, TCGReg addrhi,
        copies the entire guest address for the slow path, while truncation
        for the 32-bit host happens with the fastpath ADDL below.  */
     if (TCG_TARGET_REG_BITS == 64) {
-        base = tcg_target_call_iarg_regs[1];
+        tcg_debug_assert(addrlo == tcg_target_call_iarg_regs[1]);
+        if (TARGET_LONG_BITS == 32) {
+            tcg_out_ext32u(s, addrlo, addrlo);
+        }
+        base = addrlo;
     } else {
         base = r1;
+        tcg_out_mov(s, ttype, base, addrlo);
     }
-    tcg_out_mov(s, ttype, base, addrlo);
 
     /* jne slow_path */
     tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
@@ -2006,16 +2060,22 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, 
TCGReg datalo, TCGReg datahi,
    common. */
 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
 {
-    TCGReg datalo, datahi, addrlo;
-    TCGReg addrhi __attribute__((unused));
+    TCGReg datalo, addrlo;
+    TCGReg datahi __attribute__((unused)) = -1;
+    TCGReg addrhi __attribute__((unused)) = -1;
     TCGMemOpIdx oi;
     TCGMemOp opc;
+    int i = -1;
 
-    datalo = *args++;
-    datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
-    addrlo = *args++;
-    addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
-    oi = *args++;
+    datalo = args[++i];
+    if (TCG_TARGET_REG_BITS == 32 && is64) {
+        datahi = args[++i];
+    }
+    addrlo = args[++i];
+    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+        addrhi = args[++i];
+    }
+    oi = args[++i];
     opc = get_memop(oi);
 
 #if defined(CONFIG_SOFTMMU)
@@ -2024,6 +2084,15 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg 
*args, bool is64)
         tcg_insn_unit *label_ptr[2];
         TCGReg base;
 
+        tcg_debug_assert(datalo == tcg_target_call_oarg_regs[0]);
+        if (TCG_TARGET_REG_BITS == 32 && is64) {
+            tcg_debug_assert(datahi == tcg_target_call_oarg_regs[1]);
+        }
+        tcg_debug_assert(addrlo == softmmu_arg(0));
+        if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+            tcg_debug_assert(addrhi == softmmu_arg(1));
+        }
+
         base = tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
                                 label_ptr, offsetof(CPUTLBEntry, addr_read));
 
@@ -2146,16 +2215,22 @@ static void tcg_out_qemu_st_direct(TCGContext *s, 
TCGReg datalo, TCGReg datahi,
 
 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
 {
-    TCGReg datalo, datahi, addrlo;
-    TCGReg addrhi __attribute__((unused));
+    TCGReg datalo, addrlo;
+    TCGReg datahi __attribute__((unused)) = -1;
+    TCGReg addrhi __attribute__((unused)) = -1;
     TCGMemOpIdx oi;
     TCGMemOp opc;
+    int i = -1;
 
-    datalo = *args++;
-    datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
-    addrlo = *args++;
-    addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
-    oi = *args++;
+    datalo = args[++i];
+    if (TCG_TARGET_REG_BITS == 32 && is64) {
+        datahi = args[++i];
+    }
+    addrlo = args[++i];
+    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+        addrhi = args[++i];
+    }
+    oi = args[++i];
     opc = get_memop(oi);
 
 #if defined(CONFIG_SOFTMMU)
@@ -2164,6 +2239,16 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg 
*args, bool is64)
         tcg_insn_unit *label_ptr[2];
         TCGReg base;
 
+        i = -1;
+        tcg_debug_assert(addrlo == softmmu_arg(++i));
+        if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+            tcg_debug_assert(addrhi == softmmu_arg(++i));
+        }
+        tcg_debug_assert(datalo == softmmu_arg(++i));
+        if (TCG_TARGET_REG_BITS == 32 && is64) {
+            tcg_debug_assert(datahi == softmmu_arg(++i));
+        }
+
         base = tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
                                 label_ptr, offsetof(CPUTLBEntry, addr_write));
 
@@ -2833,15 +2918,6 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode 
op)
     static const TCGTargetOpDef r_r_re = { .args_ct_str = { "r", "r", "re" } };
     static const TCGTargetOpDef r_0_re = { .args_ct_str = { "r", "0", "re" } };
     static const TCGTargetOpDef r_0_ci = { .args_ct_str = { "r", "0", "ci" } };
-    static const TCGTargetOpDef r_L = { .args_ct_str = { "r", "L" } };
-    static const TCGTargetOpDef L_L = { .args_ct_str = { "L", "L" } };
-    static const TCGTargetOpDef r_L_L = { .args_ct_str = { "r", "L", "L" } };
-    static const TCGTargetOpDef r_r_L = { .args_ct_str = { "r", "r", "L" } };
-    static const TCGTargetOpDef L_L_L = { .args_ct_str = { "L", "L", "L" } };
-    static const TCGTargetOpDef r_r_L_L
-        = { .args_ct_str = { "r", "r", "L", "L" } };
-    static const TCGTargetOpDef L_L_L_L
-        = { .args_ct_str = { "L", "L", "L", "L" } };
     static const TCGTargetOpDef x_x = { .args_ct_str = { "x", "x" } };
     static const TCGTargetOpDef x_x_x = { .args_ct_str = { "x", "x", "x" } };
     static const TCGTargetOpDef x_x_x_x
@@ -3023,17 +3099,66 @@ static const TCGTargetOpDef 
*tcg_target_op_def(TCGOpcode op)
         }
 
     case INDEX_op_qemu_ld_i32:
-        return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_L : &r_L_L;
-    case INDEX_op_qemu_st_i32:
-        return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &L_L : &L_L_L;
+        {
+            static TCGTargetOpDef ld32;
+            ld32.args_ct_str[0] = qemu_memop_ret(0);
+            ld32.args_ct_str[1] = qemu_memop_arg(0);
+            if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+                ld32.args_ct_str[2] = qemu_memop_arg(1);
+            }
+            return &ld32;
+        }
     case INDEX_op_qemu_ld_i64:
-        return (TCG_TARGET_REG_BITS == 64 ? &r_L
-                : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_r_L
-                : &r_r_L_L);
+        {
+            static TCGTargetOpDef ld64;
+            if (TCG_TARGET_REG_BITS == 64) {
+                ld64.args_ct_str[0] = qemu_memop_ret(0);
+                ld64.args_ct_str[1] = qemu_memop_arg(0);
+            } else if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
+                ld64.args_ct_str[0] = qemu_memop_ret(0);
+                ld64.args_ct_str[1] = qemu_memop_ret(1);
+                ld64.args_ct_str[2] = qemu_memop_arg(0);
+            } else {
+                ld64.args_ct_str[0] = qemu_memop_ret(0);
+                ld64.args_ct_str[1] = qemu_memop_ret(1);
+                ld64.args_ct_str[2] = qemu_memop_arg(0);
+                ld64.args_ct_str[3] = qemu_memop_arg(1);
+            }
+            return &ld64;
+        }
+
+    /* Recall the store value comes before addr in the opcode args
+       and after addr in helper args.  */
+    case INDEX_op_qemu_st_i32:
+        {
+            static TCGTargetOpDef st32;
+            st32.args_ct_str[1] = qemu_memop_arg(0);
+            if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
+                st32.args_ct_str[0] = qemu_memop_arg(1);
+            } else {
+                st32.args_ct_str[2] = qemu_memop_arg(1);
+                st32.args_ct_str[0] = qemu_memop_arg(2);
+            }
+            return &st32;
+        }
     case INDEX_op_qemu_st_i64:
-        return (TCG_TARGET_REG_BITS == 64 ? &L_L
-                : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &L_L_L
-                : &L_L_L_L);
+        {
+            static TCGTargetOpDef st64;
+            if (TCG_TARGET_REG_BITS == 64) {
+                st64.args_ct_str[1] = qemu_memop_arg(0);
+                st64.args_ct_str[0] = qemu_memop_arg(1);
+            } else if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
+                st64.args_ct_str[2] = qemu_memop_arg(0);
+                st64.args_ct_str[0] = qemu_memop_arg(1);
+                st64.args_ct_str[1] = qemu_memop_arg(2);
+            } else {
+                st64.args_ct_str[2] = qemu_memop_arg(0);
+                st64.args_ct_str[3] = qemu_memop_arg(1);
+                st64.args_ct_str[0] = qemu_memop_arg(2);
+                st64.args_ct_str[1] = qemu_memop_arg(3);
+            }
+            return &st64;
+        }
 
     case INDEX_op_brcond2_i32:
         {
-- 
2.17.2




reply via email to

[Prev in Thread] Current Thread [Next in Thread]