[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH for-4.0 v2 08/37] tcg/i386: Force qemu_ld/st argumen
From: |
Richard Henderson |
Subject: |
[Qemu-devel] [PATCH for-4.0 v2 08/37] tcg/i386: Force qemu_ld/st arguments into fixed registers |
Date: |
Fri, 23 Nov 2018 15:45:29 +0100 |
This is an incremental step toward moving the qemu_ld/st
code sequence out of line.
Signed-off-by: Richard Henderson <address@hidden>
---
tcg/i386/tcg-target.inc.c | 203 +++++++++++++++++++++++++++++++-------
1 file changed, 169 insertions(+), 34 deletions(-)
diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
index 07df4b2b12..50e5dc31b3 100644
--- a/tcg/i386/tcg-target.inc.c
+++ b/tcg/i386/tcg-target.inc.c
@@ -171,6 +171,80 @@ static bool have_lzcnt;
static tcg_insn_unit *tb_ret_addr;
+typedef enum {
+ ARG_ADDR,
+ ARG_STVAL,
+ ARG_LDVAL,
+} QemuMemArgType;
+
+#ifdef CONFIG_SOFTMMU
+/*
+ * Constraint to choose a particular register. This is used for softmmu
+ * loads and stores. Registers with no assignment get an empty string.
+ */
+static const char * const one_reg_constraint[TCG_TARGET_NB_REGS] = {
+ [TCG_REG_EAX] = "a",
+ [TCG_REG_EBX] = "b",
+ [TCG_REG_ECX] = "c",
+ [TCG_REG_EDX] = "d",
+ [TCG_REG_ESI] = "S",
+ [TCG_REG_EDI] = "D",
+#if TCG_TARGET_REG_BITS == 64
+ [TCG_REG_R8] = "E",
+ [TCG_REG_R9] = "N",
+#endif
+};
+
+/*
+ * Calling convention for the softmmu load and store thunks.
+ *
+ * For 64-bit, we mostly use the host calling convention, therefore the
+ * real first argument is reserved for the ENV parameter that is passed
+ * on to the slow path helpers.
+ *
+ * For 32-bit, the host calling convention is stack based; we invent a
+ * private convention that uses 4 of the 6 available host registers.
+ * We reserve EAX and EDX as temporaries for use by the thunk, we require
+ * INDEX_op_qemu_st_i32 to have a 'q' register from which to store, and
+ * further complicate this last by wanting a call-clobbered for that store.
+ * The 'q' requirement allows MO_8 stores at all; the call-clobbered part
+ * allows bswap to operate in-place, clobbering the input.
+ */
+static TCGReg softmmu_arg(QemuMemArgType type, bool is_64, int hi)
+{
+ switch (type) {
+ case ARG_ADDR:
+ tcg_debug_assert(!hi || TARGET_LONG_BITS > TCG_TARGET_REG_BITS);
+ if (TCG_TARGET_REG_BITS == 64) {
+ return tcg_target_call_iarg_regs[1];
+ } else {
+ return hi ? TCG_REG_EDI : TCG_REG_ESI;
+ }
+ case ARG_STVAL:
+ tcg_debug_assert(!hi || (TCG_TARGET_REG_BITS == 32 && is_64));
+ if (TCG_TARGET_REG_BITS == 64) {
+ return tcg_target_call_iarg_regs[2];
+ } else {
+ return hi ? TCG_REG_EBX : TCG_REG_ECX;
+ }
+ case ARG_LDVAL:
+ tcg_debug_assert(!hi || (TCG_TARGET_REG_BITS == 32 && is_64));
+ return tcg_target_call_oarg_regs[hi];
+ }
+ g_assert_not_reached();
+}
+
+static const char *constrain_memop_arg(QemuMemArgType type, bool is_64, int hi)
+{
+ return one_reg_constraint[softmmu_arg(type, is_64, hi)];
+}
+#else
+static const char *constrain_memop_arg(QemuMemArgType type, bool is_64, int hi)
+{
+ return "L";
+}
+#endif /* CONFIG_SOFTMMU */
+
static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
intptr_t value, intptr_t addend)
{
@@ -1680,11 +1754,15 @@ static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg
addrlo, TCGReg addrhi,
copies the entire guest address for the slow path, while truncation
for the 32-bit host happens with the fastpath ADDL below. */
if (TCG_TARGET_REG_BITS == 64) {
- base = tcg_target_call_iarg_regs[1];
+ tcg_debug_assert(addrlo == tcg_target_call_iarg_regs[1]);
+ if (TARGET_LONG_BITS == 32) {
+ tcg_out_ext32u(s, addrlo, addrlo);
+ }
+ base = addrlo;
} else {
base = r1;
+ tcg_out_mov(s, ttype, base, addrlo);
}
- tcg_out_mov(s, ttype, base, addrlo);
/* jne slow_path */
tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
@@ -2009,16 +2087,22 @@ static void tcg_out_qemu_ld_direct(TCGContext *s,
TCGReg datalo, TCGReg datahi,
common. */
static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
{
- TCGReg datalo, datahi, addrlo;
- TCGReg addrhi __attribute__((unused));
+ TCGReg datalo, addrlo;
+ TCGReg datahi __attribute__((unused)) = -1;
+ TCGReg addrhi __attribute__((unused)) = -1;
TCGMemOpIdx oi;
TCGMemOp opc;
+ int i = -1;
- datalo = *args++;
- datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
- addrlo = *args++;
- addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
- oi = *args++;
+ datalo = args[++i];
+ if (TCG_TARGET_REG_BITS == 32 && is64) {
+ datahi = args[++i];
+ }
+ addrlo = args[++i];
+ if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+ addrhi = args[++i];
+ }
+ oi = args[++i];
opc = get_memop(oi);
#if defined(CONFIG_SOFTMMU)
@@ -2027,6 +2111,15 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg
*args, bool is64)
tcg_insn_unit *label_ptr[2];
TCGReg base;
+ tcg_debug_assert(datalo == softmmu_arg(ARG_LDVAL, is64, 0));
+ if (TCG_TARGET_REG_BITS == 32 && is64) {
+ tcg_debug_assert(datahi == softmmu_arg(ARG_LDVAL, is64, 1));
+ }
+ tcg_debug_assert(addrlo == softmmu_arg(ARG_ADDR, 0, 0));
+ if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+ tcg_debug_assert(addrhi == softmmu_arg(ARG_ADDR, 0, 1));
+ }
+
base = tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
label_ptr, offsetof(CPUTLBEntry, addr_read));
@@ -2149,16 +2242,22 @@ static void tcg_out_qemu_st_direct(TCGContext *s,
TCGReg datalo, TCGReg datahi,
static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
{
- TCGReg datalo, datahi, addrlo;
- TCGReg addrhi __attribute__((unused));
+ TCGReg datalo, addrlo;
+ TCGReg datahi __attribute__((unused)) = -1;
+ TCGReg addrhi __attribute__((unused)) = -1;
TCGMemOpIdx oi;
TCGMemOp opc;
+ int i = -1;
- datalo = *args++;
- datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
- addrlo = *args++;
- addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
- oi = *args++;
+ datalo = args[++i];
+ if (TCG_TARGET_REG_BITS == 32 && is64) {
+ datahi = args[++i];
+ }
+ addrlo = args[++i];
+ if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+ addrhi = args[++i];
+ }
+ oi = args[++i];
opc = get_memop(oi);
#if defined(CONFIG_SOFTMMU)
@@ -2167,6 +2266,15 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg
*args, bool is64)
tcg_insn_unit *label_ptr[2];
TCGReg base;
+ tcg_debug_assert(datalo == softmmu_arg(ARG_STVAL, is64, 0));
+ if (TCG_TARGET_REG_BITS == 32 && is64) {
+ tcg_debug_assert(datahi == softmmu_arg(ARG_STVAL, is64, 1));
+ }
+ tcg_debug_assert(addrlo == softmmu_arg(ARG_ADDR, 0, 0));
+ if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+ tcg_debug_assert(addrhi == softmmu_arg(ARG_ADDR, 0, 1));
+ }
+
base = tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
label_ptr, offsetof(CPUTLBEntry, addr_write));
@@ -2836,15 +2944,6 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode
op)
static const TCGTargetOpDef r_r_re = { .args_ct_str = { "r", "r", "re" } };
static const TCGTargetOpDef r_0_re = { .args_ct_str = { "r", "0", "re" } };
static const TCGTargetOpDef r_0_ci = { .args_ct_str = { "r", "0", "ci" } };
- static const TCGTargetOpDef r_L = { .args_ct_str = { "r", "L" } };
- static const TCGTargetOpDef L_L = { .args_ct_str = { "L", "L" } };
- static const TCGTargetOpDef r_L_L = { .args_ct_str = { "r", "L", "L" } };
- static const TCGTargetOpDef r_r_L = { .args_ct_str = { "r", "r", "L" } };
- static const TCGTargetOpDef L_L_L = { .args_ct_str = { "L", "L", "L" } };
- static const TCGTargetOpDef r_r_L_L
- = { .args_ct_str = { "r", "r", "L", "L" } };
- static const TCGTargetOpDef L_L_L_L
- = { .args_ct_str = { "L", "L", "L", "L" } };
static const TCGTargetOpDef x_x = { .args_ct_str = { "x", "x" } };
static const TCGTargetOpDef x_x_x = { .args_ct_str = { "x", "x", "x" } };
static const TCGTargetOpDef x_x_x_x
@@ -3026,17 +3125,53 @@ static const TCGTargetOpDef
*tcg_target_op_def(TCGOpcode op)
}
case INDEX_op_qemu_ld_i32:
- return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_L : &r_L_L;
- case INDEX_op_qemu_st_i32:
- return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &L_L : &L_L_L;
+ {
+ static TCGTargetOpDef ld32;
+ int i;
+
+ ld32.args_ct_str[0] = constrain_memop_arg(ARG_LDVAL, 0, 0);
+ for (i = 0; i * TCG_TARGET_REG_BITS < TARGET_LONG_BITS; ++i) {
+ ld32.args_ct_str[i + 1] = constrain_memop_arg(ARG_ADDR, 0, i);
+ }
+ return &ld32;
+ }
case INDEX_op_qemu_ld_i64:
- return (TCG_TARGET_REG_BITS == 64 ? &r_L
- : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_r_L
- : &r_r_L_L);
+ {
+ static TCGTargetOpDef ld64;
+ int i, j = 0;
+
+ for (i = 0; i * TCG_TARGET_REG_BITS < 64; ++i) {
+ ld64.args_ct_str[j++] = constrain_memop_arg(ARG_LDVAL, 1, i);
+ }
+ for (i = 0; i * TCG_TARGET_REG_BITS < TARGET_LONG_BITS; ++i) {
+ ld64.args_ct_str[j++] = constrain_memop_arg(ARG_ADDR, 0, i);
+ }
+ return &ld64;
+ }
+ case INDEX_op_qemu_st_i32:
+ {
+ static TCGTargetOpDef st32;
+ int i;
+
+ st32.args_ct_str[0] = constrain_memop_arg(ARG_STVAL, 0, 0);
+ for (i = 0; i * TCG_TARGET_REG_BITS < TARGET_LONG_BITS; ++i) {
+ st32.args_ct_str[i + 1] = constrain_memop_arg(ARG_ADDR, 0, i);
+ }
+ return &st32;
+ }
case INDEX_op_qemu_st_i64:
- return (TCG_TARGET_REG_BITS == 64 ? &L_L
- : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &L_L_L
- : &L_L_L_L);
+ {
+ static TCGTargetOpDef st64;
+ int i, j = 0;
+
+ for (i = 0; i * TCG_TARGET_REG_BITS < 64; ++i) {
+ st64.args_ct_str[j++] = constrain_memop_arg(ARG_STVAL, 1, i);
+ }
+ for (i = 0; i * TCG_TARGET_REG_BITS < TARGET_LONG_BITS; ++i) {
+ st64.args_ct_str[j++] = constrain_memop_arg(ARG_ADDR, 0, i);
+ }
+ return &st64;
+ }
case INDEX_op_brcond2_i32:
{
--
2.17.2
- [Qemu-devel] [PATCH for-4.0 v2 10/37] tcg/aarch64: Add constraints for x0, x1, x2, (continued)
- [Qemu-devel] [PATCH for-4.0 v2 10/37] tcg/aarch64: Add constraints for x0, x1, x2, Richard Henderson, 2018/11/23
- [Qemu-devel] [PATCH for-4.0 v2 11/37] tcg/aarch64: Parameterize the temps for tcg_out_tlb_read, Richard Henderson, 2018/11/23
- [Qemu-devel] [PATCH for-4.0 v2 12/37] tcg/aarch64: Parameterize the temp for tcg_out_goto_long, Richard Henderson, 2018/11/23
- [Qemu-devel] [PATCH for-4.0 v2 13/37] tcg/aarch64: Use B not BL for tcg_out_goto_long, Richard Henderson, 2018/11/23
- [Qemu-devel] [PATCH for-4.0 v2 09/37] tcg/i386: Use TCG_TARGET_NEED_LDST_OOL_LABELS, Richard Henderson, 2018/11/23
- [Qemu-devel] [PATCH for-4.0 v2 08/37] tcg/i386: Force qemu_ld/st arguments into fixed registers,
Richard Henderson <=
- [Qemu-devel] [PATCH for-4.0 v2 14/37] tcg/aarch64: Use TCG_TARGET_NEED_LDST_OOL_LABELS, Richard Henderson, 2018/11/23
- [Qemu-devel] [PATCH for-4.0 v2 16/37] tcg/arm: Add constraints for R0-R5, Richard Henderson, 2018/11/23
- [Qemu-devel] [PATCH for-4.0 v2 15/37] tcg/arm: Parameterize the temps for tcg_out_tlb_read, Richard Henderson, 2018/11/23
- [Qemu-devel] [PATCH for-4.0 v2 29/37] tcg: Add TCG_TARGET_HAS_MEMORY_BSWAP, Richard Henderson, 2018/11/23
- [Qemu-devel] [PATCH for-4.0 v2 27/37] tcg: Clean up generic bswap64, Richard Henderson, 2018/11/23
- [Qemu-devel] [PATCH for-4.0 v2 24/37] tcg/ppc: Force qemu_ld/st arguments into fixed registers, Richard Henderson, 2018/11/23
- [Qemu-devel] [PATCH for-4.0 v2 18/37] tcg/arm: Force qemu_ld/st arguments into fixed registers, Richard Henderson, 2018/11/23
- [Qemu-devel] [PATCH for-4.0 v2 17/37] tcg/arm: Reduce the number of temps for tcg_out_tlb_read, Richard Henderson, 2018/11/23
- [Qemu-devel] [PATCH for-4.0 v2 22/37] tcg/ppc: Add constraints for R7-R8, Richard Henderson, 2018/11/23