[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH 19/19] tcg-ia64: Move part of softmmu slow path out
From: |
Richard Henderson |
Subject: |
[Qemu-devel] [PATCH 19/19] tcg-ia64: Move part of softmmu slow path out of line |
Date: |
Thu, 5 Sep 2013 23:50:41 -0700 |
Signed-off-by: Richard Henderson <address@hidden>
---
tcg/ia64/tcg-target.c | 156 +++++++++++++++++++++++++++++++-------------------
tcg/ia64/tcg-target.h | 2 +-
2 files changed, 97 insertions(+), 61 deletions(-)
diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index ea24e83..9fd176d 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -219,6 +219,7 @@ enum {
OPC_ALLOC_M34 = 0x02c00000000ull,
OPC_BR_DPTK_FEW_B1 = 0x08400000000ull,
OPC_BR_SPTK_MANY_B1 = 0x08000001000ull,
+ OPC_BR_CALL_SPNT_FEW_B3 = 0x0a200000000ull,
OPC_BR_SPTK_MANY_B4 = 0x00100001000ull,
OPC_BR_CALL_SPTK_MANY_B5 = 0x02100001000ull,
OPC_BR_RET_SPTK_MANY_B4 = 0x00108001100ull,
@@ -355,6 +356,15 @@ static inline uint64_t tcg_opc_b1(int qp, uint64_t opc,
uint64_t imm)
| (qp & 0x3f);
}
+static inline uint64_t tcg_opc_b3(int qp, uint64_t opc, int b1, uint64_t imm)
+{
+ return opc
+ | ((imm & 0x100000) << 16) /* s */
+ | ((imm & 0x0fffff) << 13) /* imm20b */
+ | ((b1 & 0x7) << 6)
+ | (qp & 0x3f);
+}
+
static inline uint64_t tcg_opc_b4(int qp, uint64_t opc, int b2)
{
return opc
@@ -1633,14 +1643,70 @@ static inline void tcg_out_qemu_tlb(TCGContext *s,
TCGReg addr_reg,
bswap2);
}
-/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
- int mmu_idx, uintptr_t retaddr) */
-static const void * const qemu_ld_helpers[4] = {
- helper_ret_ldub_mmu,
- helper_le_lduw_mmu,
- helper_le_ldul_mmu,
- helper_le_ldq_mmu,
-};
+static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOp opc,
+ uint8_t *label_ptr)
+{
+ TCGLabelQemuLdst *l = &s->qemu_ldst_labels[s->nb_qemu_ldst_labels++];
+
+ assert(s->nb_qemu_ldst_labels <= TCG_MAX_QEMU_LDST);
+
+ /* We don't need most of the items in the generic structure. */
+ memset(l, 0, sizeof(*l));
+ l->is_ld = is_ld;
+ l->opc = opc & MO_SIZE;
+ l->label_ptr[0] = label_ptr;
+}
+
+void tcg_out_tb_finalize(TCGContext *s)
+{
+ static const void * const helpers[8] = {
+ helper_ret_stb_mmu,
+ helper_le_stw_mmu,
+ helper_le_stl_mmu,
+ helper_le_stq_mmu,
+ helper_ret_ldub_mmu,
+ helper_le_lduw_mmu,
+ helper_le_ldul_mmu,
+ helper_le_ldq_mmu,
+ };
+ uintptr_t thunks[8] = { };
+ size_t i, n = s->nb_qemu_ldst_labels;
+
+ for (i = 0; i < n; i++) {
+ TCGLabelQemuLdst *l = &s->qemu_ldst_labels[i];
+ long x = l->is_ld * 4 + l->opc;
+ uintptr_t dest = thunks[x];
+
+ /* The out-of-line thunks are all the same; load the return address
+ from B0, load the GP, and branch to the code. Note that we are
+ always post-call, so the register window has rolled, so we're
+ using incomming parameter register numbers, not outgoing. */
+ if (dest == 0) {
+ uintptr_t disp, *desc = (uintptr_t *)helpers[x];
+
+ thunks[x] = dest = (uintptr_t)s->code_ptr;
+
+ tcg_out_bundle(s, mlx,
+ INSN_NOP_M,
+ tcg_opc_l2 (desc[1]),
+ tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2,
+ TCG_REG_R1, desc[1]));
+ tcg_out_bundle(s, mii,
+ INSN_NOP_M,
+ INSN_NOP_I,
+ tcg_opc_i22(TCG_REG_P0, OPC_MOV_I22,
+ l->is_ld ? TCG_REG_R35 : TCG_REG_R36,
+ TCG_REG_B0));
+ disp = (desc[0] - (uintptr_t)s->code_ptr) >> 4;
+ tcg_out_bundle(s, mLX,
+ INSN_NOP_M,
+ tcg_opc_l3 (disp),
+ tcg_opc_x3 (TCG_REG_P0, OPC_BRL_SPTK_MANY_X3,
disp));
+ }
+
+ reloc_pcrel21b(l->label_ptr[0], dest);
+ }
+}
static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
TCGMemOp opc)
@@ -1650,7 +1716,8 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const
TCGArg *args,
};
int addr_reg, data_reg, mem_index;
TCGMemOp s_bits;
- uint64_t fin1, fin2, *desc, func, gp, here;
+ uint64_t fin1, fin2;
+ uint8_t *label_ptr;
data_reg = *args++;
addr_reg = *args++;
@@ -1677,31 +1744,20 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const
TCGArg *args,
fin1 = tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, TCG_REG_R8);
}
- desc = (uintptr_t *)qemu_ld_helpers[s_bits];
- func = desc[0];
- gp = desc[1];
- here = (uintptr_t)s->code_ptr;
-
- tcg_out_bundle(s, mlx,
+ tcg_out_bundle(s, mmI,
tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0),
- tcg_opc_l2 (here),
- tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R59, here));
- tcg_out_bundle(s, mLX,
tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2,
TCG_REG_R2, TCG_REG_R57),
- tcg_opc_l2 (gp),
- tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R1, gp));
- tcg_out_bundle(s, mmi,
+ tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, mem_index));
+ label_ptr = s->code_ptr + 2;
+ tcg_out_bundle(s, miB,
tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits],
TCG_REG_R8, TCG_REG_R2),
- tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, mem_index),
- INSN_NOP_I);
- func -= (uintptr_t)s->code_ptr;
- tcg_out_bundle(s, mLX,
- INSN_NOP_M,
- tcg_opc_l4 (func >> 4),
- tcg_opc_x4 (TCG_REG_P7, OPC_BRL_CALL_SPNT_MANY_X4,
- TCG_REG_B0, func >> 4));
+ INSN_NOP_I,
+ tcg_opc_b3 (TCG_REG_P7, OPC_BR_CALL_SPNT_FEW_B3, TCG_REG_B0,
+ get_reloc_pcrel21b(label_ptr)));
+
+ add_qemu_ldst_label(s, 1, opc, label_ptr);
/* Note that we always use LE helper functions, so the bswap insns
here for the fast path also apply to the slow path. */
@@ -1711,15 +1767,6 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const
TCGArg *args,
fin2 ? fin2 : INSN_NOP_I);
}
-/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
- uintxx_t val, int mmu_idx, uintptr_t retaddr) */
-static const void * const qemu_st_helpers[4] = {
- helper_ret_stb_mmu,
- helper_le_stw_mmu,
- helper_le_stl_mmu,
- helper_le_stq_mmu,
-};
-
static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
TCGMemOp opc)
{
@@ -1728,8 +1775,9 @@ static inline void tcg_out_qemu_st(TCGContext *s, const
TCGArg *args,
};
TCGReg addr_reg, data_reg;
int mem_index;
- uint64_t pre1, pre2, *desc, func, gp, here;
+ uint64_t pre1, pre2;
TCGMemOp s_bits;
+ uint8_t *label_ptr;
data_reg = *args++;
addr_reg = *args++;
@@ -1758,32 +1806,20 @@ static inline void tcg_out_qemu_st(TCGContext *s, const
TCGArg *args,
pre1, pre2);
/* P6 is the fast path, and P7 the slow path */
-
- desc = (uintptr_t *)qemu_st_helpers[s_bits];
- func = desc[0];
- gp = desc[1];
- here = (uintptr_t)s->code_ptr;
-
- tcg_out_bundle(s, mlx,
+ tcg_out_bundle(s, mmI,
tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0),
- tcg_opc_l2 (here),
- tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R60, here));
- tcg_out_bundle(s, mLX,
tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2,
TCG_REG_R2, TCG_REG_R57),
- tcg_opc_l2 (gp),
- tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R1, gp));
- tcg_out_bundle(s, mmi,
+ tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R59, mem_index));
+ label_ptr = s->code_ptr + 2;
+ tcg_out_bundle(s, miB,
tcg_opc_m4 (TCG_REG_P6, opc_st_m4[s_bits],
TCG_REG_R58, TCG_REG_R2),
- tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R59, mem_index),
- INSN_NOP_I);
- func -= (uintptr_t)s->code_ptr;
- tcg_out_bundle(s, mLX,
- INSN_NOP_M,
- tcg_opc_l4 (func >> 4),
- tcg_opc_x4 (TCG_REG_P7, OPC_BRL_CALL_SPNT_MANY_X4,
- TCG_REG_B0, func >> 4));
+ INSN_NOP_I,
+ tcg_opc_b3 (TCG_REG_P7, OPC_BR_CALL_SPNT_FEW_B3, TCG_REG_B0,
+ get_reloc_pcrel21b(label_ptr)));
+
+ add_qemu_ldst_label(s, 0, opc, label_ptr);
}
#else /* !CONFIG_SOFTMMU */
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index 65897f2..9e1e8ba 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -25,7 +25,7 @@
#ifndef TCG_TARGET_IA64
#define TCG_TARGET_IA64 1
-#undef TCG_QEMU_LDST_OPTIMIZATION
+#define TCG_QEMU_LDST_OPTIMIZATION
/* We only map the first 64 registers */
#define TCG_TARGET_NB_REGS 64
--
1.8.3.1
- [Qemu-devel] [PATCH 09/19] tcg-ia64: Use A3 form of logical operations, (continued)
- [Qemu-devel] [PATCH 09/19] tcg-ia64: Use A3 form of logical operations, Richard Henderson, 2013/09/06
- [Qemu-devel] [PATCH 10/19] tcg-ia64 Introduce tcg_opc_mov_a, Richard Henderson, 2013/09/06
- [Qemu-devel] [PATCH 11/19] tcg-ia64 Introduce tcg_opc_movi_a, Richard Henderson, 2013/09/06
- [Qemu-devel] [PATCH 12/19] tcg-ia64 Introduce tcg_opc_ext_i, Richard Henderson, 2013/09/06
- [Qemu-devel] [PATCH 13/19] tcg-ia64 Introduce tcg_opc_bswap64_i, Richard Henderson, 2013/09/06
- [Qemu-devel] [PATCH 14/19] tcg-ia64: Re-bundle the tlb load, Richard Henderson, 2013/09/06
- [Qemu-devel] [PATCH 15/19] tcg-ia64: Move bswap for store into tlb load, Richard Henderson, 2013/09/06
- [Qemu-devel] [PATCH 16/19] tcg-ia64: Move tlb addend load into tlb read, Richard Henderson, 2013/09/06
- [Qemu-devel] [PATCH 17/19] tcg-i64: Reduce code duplication in tcg_out_qemu_ld, Richard Henderson, 2013/09/06
- [Qemu-devel] [PATCH 18/19] tcg-ia64: Convert to new ldst helpers, Richard Henderson, 2013/09/06
- [Qemu-devel] [PATCH 19/19] tcg-ia64: Move part of softmmu slow path out of line,
Richard Henderson <=