qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH for-4.0 v2 08/37] tcg/i386: Force qemu_ld/st argumen


From: Richard Henderson
Subject: [Qemu-devel] [PATCH for-4.0 v2 08/37] tcg/i386: Force qemu_ld/st arguments into fixed registers
Date: Fri, 23 Nov 2018 15:45:29 +0100

This is an incremental step toward moving the qemu_ld/st
code sequence out of line.

Signed-off-by: Richard Henderson <address@hidden>
---
 tcg/i386/tcg-target.inc.c | 203 +++++++++++++++++++++++++++++++-------
 1 file changed, 169 insertions(+), 34 deletions(-)

diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
index 07df4b2b12..50e5dc31b3 100644
--- a/tcg/i386/tcg-target.inc.c
+++ b/tcg/i386/tcg-target.inc.c
@@ -171,6 +171,80 @@ static bool have_lzcnt;
 
 static tcg_insn_unit *tb_ret_addr;
 
+typedef enum {
+    ARG_ADDR,
+    ARG_STVAL,
+    ARG_LDVAL,
+} QemuMemArgType;
+
+#ifdef CONFIG_SOFTMMU
+/*
+ * Constraint to choose a particular register.  This is used for softmmu
+ * loads and stores.  Registers with no assignment get an empty string.
+ */
+static const char * const one_reg_constraint[TCG_TARGET_NB_REGS] = {
+    [TCG_REG_EAX] = "a",
+    [TCG_REG_EBX] = "b",
+    [TCG_REG_ECX] = "c",
+    [TCG_REG_EDX] = "d",
+    [TCG_REG_ESI] = "S",
+    [TCG_REG_EDI] = "D",
+#if TCG_TARGET_REG_BITS == 64
+    [TCG_REG_R8]  = "E",
+    [TCG_REG_R9]  = "N",
+#endif
+};
+
+/*
+ * Calling convention for the softmmu load and store thunks.
+ *
+ * For 64-bit, we mostly use the host calling convention, therefore the
+ * real first argument is reserved for the ENV parameter that is passed
+ * on to the slow path helpers.
+ *
+ * For 32-bit, the host calling convention is stack based; we invent a
+ * private convention that uses 4 of the 6 available host registers.
+ * We reserve EAX and EDX as temporaries for use by the thunk, we require
+ * INDEX_op_qemu_st_i32 to have a 'q' register from which to store, and
+ * further complicate this last by wanting a call-clobbered for that store.
+ * The 'q' requirement allows MO_8 stores at all; the call-clobbered part
+ * allows bswap to operate in-place, clobbering the input.
+ */
+static TCGReg softmmu_arg(QemuMemArgType type, bool is_64, int hi)
+{
+    switch (type) {
+    case ARG_ADDR:
+        tcg_debug_assert(!hi || TARGET_LONG_BITS > TCG_TARGET_REG_BITS);
+        if (TCG_TARGET_REG_BITS == 64) {
+            return tcg_target_call_iarg_regs[1];
+        } else {
+            return hi ? TCG_REG_EDI : TCG_REG_ESI;
+        }
+    case ARG_STVAL:
+        tcg_debug_assert(!hi || (TCG_TARGET_REG_BITS == 32 && is_64));
+        if (TCG_TARGET_REG_BITS == 64) {
+            return tcg_target_call_iarg_regs[2];
+        } else {
+            return hi ? TCG_REG_EBX : TCG_REG_ECX;
+        }
+    case ARG_LDVAL:
+        tcg_debug_assert(!hi || (TCG_TARGET_REG_BITS == 32 && is_64));
+        return tcg_target_call_oarg_regs[hi];
+    }
+    g_assert_not_reached();
+}
+
+static const char *constrain_memop_arg(QemuMemArgType type, bool is_64, int hi)
+{
+    return one_reg_constraint[softmmu_arg(type, is_64, hi)];
+}
+#else
+static const char *constrain_memop_arg(QemuMemArgType type, bool is_64, int hi)
+{
+    return "L";
+}
+#endif /* CONFIG_SOFTMMU */
+
 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
                         intptr_t value, intptr_t addend)
 {
@@ -1680,11 +1754,15 @@ static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg 
addrlo, TCGReg addrhi,
        copies the entire guest address for the slow path, while truncation
        for the 32-bit host happens with the fastpath ADDL below.  */
     if (TCG_TARGET_REG_BITS == 64) {
-        base = tcg_target_call_iarg_regs[1];
+        tcg_debug_assert(addrlo == tcg_target_call_iarg_regs[1]);
+        if (TARGET_LONG_BITS == 32) {
+            tcg_out_ext32u(s, addrlo, addrlo);
+        }
+        base = addrlo;
     } else {
         base = r1;
+        tcg_out_mov(s, ttype, base, addrlo);
     }
-    tcg_out_mov(s, ttype, base, addrlo);
 
     /* jne slow_path */
     tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
@@ -2009,16 +2087,22 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, 
TCGReg datalo, TCGReg datahi,
    common. */
 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
 {
-    TCGReg datalo, datahi, addrlo;
-    TCGReg addrhi __attribute__((unused));
+    TCGReg datalo, addrlo;
+    TCGReg datahi __attribute__((unused)) = -1;
+    TCGReg addrhi __attribute__((unused)) = -1;
     TCGMemOpIdx oi;
     TCGMemOp opc;
+    int i = -1;
 
-    datalo = *args++;
-    datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
-    addrlo = *args++;
-    addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
-    oi = *args++;
+    datalo = args[++i];
+    if (TCG_TARGET_REG_BITS == 32 && is64) {
+        datahi = args[++i];
+    }
+    addrlo = args[++i];
+    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+        addrhi = args[++i];
+    }
+    oi = args[++i];
     opc = get_memop(oi);
 
 #if defined(CONFIG_SOFTMMU)
@@ -2027,6 +2111,15 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg 
*args, bool is64)
         tcg_insn_unit *label_ptr[2];
         TCGReg base;
 
+        tcg_debug_assert(datalo == softmmu_arg(ARG_LDVAL, is64, 0));
+        if (TCG_TARGET_REG_BITS == 32 && is64) {
+            tcg_debug_assert(datahi == softmmu_arg(ARG_LDVAL, is64, 1));
+        }
+        tcg_debug_assert(addrlo == softmmu_arg(ARG_ADDR, 0, 0));
+        if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+            tcg_debug_assert(addrhi == softmmu_arg(ARG_ADDR, 0, 1));
+        }
+
         base = tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
                                 label_ptr, offsetof(CPUTLBEntry, addr_read));
 
@@ -2149,16 +2242,22 @@ static void tcg_out_qemu_st_direct(TCGContext *s, 
TCGReg datalo, TCGReg datahi,
 
 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
 {
-    TCGReg datalo, datahi, addrlo;
-    TCGReg addrhi __attribute__((unused));
+    TCGReg datalo, addrlo;
+    TCGReg datahi __attribute__((unused)) = -1;
+    TCGReg addrhi __attribute__((unused)) = -1;
     TCGMemOpIdx oi;
     TCGMemOp opc;
+    int i = -1;
 
-    datalo = *args++;
-    datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
-    addrlo = *args++;
-    addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
-    oi = *args++;
+    datalo = args[++i];
+    if (TCG_TARGET_REG_BITS == 32 && is64) {
+        datahi = args[++i];
+    }
+    addrlo = args[++i];
+    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+        addrhi = args[++i];
+    }
+    oi = args[++i];
     opc = get_memop(oi);
 
 #if defined(CONFIG_SOFTMMU)
@@ -2167,6 +2266,15 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg 
*args, bool is64)
         tcg_insn_unit *label_ptr[2];
         TCGReg base;
 
+        tcg_debug_assert(datalo == softmmu_arg(ARG_STVAL, is64, 0));
+        if (TCG_TARGET_REG_BITS == 32 && is64) {
+            tcg_debug_assert(datahi == softmmu_arg(ARG_STVAL, is64, 1));
+        }
+        tcg_debug_assert(addrlo == softmmu_arg(ARG_ADDR, 0, 0));
+        if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+            tcg_debug_assert(addrhi == softmmu_arg(ARG_ADDR, 0, 1));
+        }
+
         base = tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
                                 label_ptr, offsetof(CPUTLBEntry, addr_write));
 
@@ -2836,15 +2944,6 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode 
op)
     static const TCGTargetOpDef r_r_re = { .args_ct_str = { "r", "r", "re" } };
     static const TCGTargetOpDef r_0_re = { .args_ct_str = { "r", "0", "re" } };
     static const TCGTargetOpDef r_0_ci = { .args_ct_str = { "r", "0", "ci" } };
-    static const TCGTargetOpDef r_L = { .args_ct_str = { "r", "L" } };
-    static const TCGTargetOpDef L_L = { .args_ct_str = { "L", "L" } };
-    static const TCGTargetOpDef r_L_L = { .args_ct_str = { "r", "L", "L" } };
-    static const TCGTargetOpDef r_r_L = { .args_ct_str = { "r", "r", "L" } };
-    static const TCGTargetOpDef L_L_L = { .args_ct_str = { "L", "L", "L" } };
-    static const TCGTargetOpDef r_r_L_L
-        = { .args_ct_str = { "r", "r", "L", "L" } };
-    static const TCGTargetOpDef L_L_L_L
-        = { .args_ct_str = { "L", "L", "L", "L" } };
     static const TCGTargetOpDef x_x = { .args_ct_str = { "x", "x" } };
     static const TCGTargetOpDef x_x_x = { .args_ct_str = { "x", "x", "x" } };
     static const TCGTargetOpDef x_x_x_x
@@ -3026,17 +3125,53 @@ static const TCGTargetOpDef 
*tcg_target_op_def(TCGOpcode op)
         }
 
     case INDEX_op_qemu_ld_i32:
-        return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_L : &r_L_L;
-    case INDEX_op_qemu_st_i32:
-        return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &L_L : &L_L_L;
+        {
+            static TCGTargetOpDef ld32;
+            int i;
+
+            ld32.args_ct_str[0] = constrain_memop_arg(ARG_LDVAL, 0, 0);
+            for (i = 0; i * TCG_TARGET_REG_BITS < TARGET_LONG_BITS; ++i) {
+                ld32.args_ct_str[i + 1] = constrain_memop_arg(ARG_ADDR, 0, i);
+            }
+            return &ld32;
+        }
     case INDEX_op_qemu_ld_i64:
-        return (TCG_TARGET_REG_BITS == 64 ? &r_L
-                : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_r_L
-                : &r_r_L_L);
+        {
+            static TCGTargetOpDef ld64;
+            int i, j = 0;
+
+            for (i = 0; i * TCG_TARGET_REG_BITS < 64; ++i) {
+                ld64.args_ct_str[j++] = constrain_memop_arg(ARG_LDVAL, 1, i);
+            }
+            for (i = 0; i * TCG_TARGET_REG_BITS < TARGET_LONG_BITS; ++i) {
+                ld64.args_ct_str[j++] = constrain_memop_arg(ARG_ADDR, 0, i);
+            }
+            return &ld64;
+        }
+    case INDEX_op_qemu_st_i32:
+        {
+            static TCGTargetOpDef st32;
+            int i;
+
+            st32.args_ct_str[0] = constrain_memop_arg(ARG_STVAL, 0, 0);
+            for (i = 0; i * TCG_TARGET_REG_BITS < TARGET_LONG_BITS; ++i) {
+                st32.args_ct_str[i + 1] = constrain_memop_arg(ARG_ADDR, 0, i);
+            }
+            return &st32;
+        }
     case INDEX_op_qemu_st_i64:
-        return (TCG_TARGET_REG_BITS == 64 ? &L_L
-                : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &L_L_L
-                : &L_L_L_L);
+        {
+            static TCGTargetOpDef st64;
+            int i, j = 0;
+
+            for (i = 0; i * TCG_TARGET_REG_BITS < 64; ++i) {
+                st64.args_ct_str[j++] = constrain_memop_arg(ARG_STVAL, 1, i);
+            }
+            for (i = 0; i * TCG_TARGET_REG_BITS < TARGET_LONG_BITS; ++i) {
+                st64.args_ct_str[j++] = constrain_memop_arg(ARG_ADDR, 0, i);
+            }
+            return &st64;
+        }
 
     case INDEX_op_brcond2_i32:
         {
-- 
2.17.2




reply via email to

[Prev in Thread] Current Thread [Next in Thread]