[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH 35/35] tcg/arm: Use LDRD to load tlb mask+table
From: |
Richard Henderson |
Subject: |
[Qemu-devel] [PATCH 35/35] tcg/arm: Use LDRD to load tlb mask+table |
Date: |
Sat, 23 Mar 2019 12:09:25 -0700 |
Signed-off-by: Richard Henderson <address@hidden>
---
tcg/arm/tcg-target.inc.c | 109 +++++++++++++++++++--------------------
1 file changed, 52 insertions(+), 57 deletions(-)
diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c
index 4a8c12e9a4..e2fd42e218 100644
--- a/tcg/arm/tcg-target.inc.c
+++ b/tcg/arm/tcg-target.inc.c
@@ -278,6 +278,7 @@ static const char *target_parse_constraint(TCGArgConstraint
*ct,
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
#endif
break;
@@ -1253,75 +1254,69 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg
addrlo, TCGReg addrhi,
unsigned s_bits = opc & MO_SIZE;
unsigned a_bits = get_alignment_bits(opc);
- /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
- tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP, TCG_AREG0, mask_off);
- tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R2, TCG_AREG0, table_off);
-
- /* Extract the tlb index from the address into TMP. */
- tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, addrlo,
- SHIFT_IMM_LSR(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS));
-
/*
- * Add the tlb_table pointer, creating the CPUTLBEntry address in R2.
- * Load the tlb comparator into R0/R1 and the fast path addend into R2.
+ * We don't support inline unaligned acceses, but we can easily
+ * support overalignment checks.
*/
- if (cmp_off == 0) {
- if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
- tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R0, TCG_REG_R2, TCG_REG_TMP);
- } else {
- tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R0, TCG_REG_R2, TCG_REG_TMP);
- }
- } else {
- tcg_out_dat_reg(s, COND_AL, ARITH_ADD,
- TCG_REG_R2, TCG_REG_R2, TCG_REG_TMP, 0);
- if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
- tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
- } else {
- tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
- }
- }
- if (!use_armv6_instructions && TARGET_LONG_BITS == 64) {
- tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, cmp_off + 4);
- }
-
- /* Load the tlb addend. */
- tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R2,
- offsetof(CPUTLBEntry, addend));
-
- /* Check alignment. We don't support inline unaligned acceses,
- but we can easily support overalignment checks. */
if (a_bits < s_bits) {
a_bits = s_bits;
}
- if (use_armv7_instructions) {
- tcg_target_ulong mask = ~(TARGET_PAGE_MASK | ((1 << a_bits) - 1));
- int rot = encode_imm(mask);
-
- if (rot >= 0) {
- tcg_out_dat_imm(s, COND_AL, ARITH_BIC, TCG_REG_TMP, addrlo,
- rotl(mask, rot) | (rot << 7));
- } else {
- tcg_out_movi32(s, COND_AL, TCG_REG_TMP, mask);
- tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP,
- addrlo, TCG_REG_TMP, 0);
- }
- tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R0, TCG_REG_TMP, 0);
+ /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
+ if (use_armv6_instructions) {
+ tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_AREG0, fast_off);
} else {
- if (a_bits) {
- tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo,
- (1 << a_bits) - 1);
- }
- tcg_out_dat_reg(s, (a_bits ? COND_EQ : COND_AL), ARITH_CMP,
- 0, TCG_REG_R0, TCG_REG_TMP,
- SHIFT_IMM_LSL(TARGET_PAGE_BITS));
+ tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R0, TCG_AREG0, mask_off);
+ tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R1, TCG_AREG0, table_off);
}
+ /* Extract the tlb index from the address into R0. */
+ tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_R0, addrlo,
+ SHIFT_IMM_LSR(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS));
+
+ /*
+ * Add the tlb_table pointer, creating the CPUTLBEntry address in R1.
+ * Load the tlb comparator into R2/R3 and the fast path addend into R1.
+ */
+ if (cmp_off == 0) {
+ if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
+ tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
+ } else {
+ tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
+ }
+ } else {
+ tcg_out_dat_reg(s, COND_AL, ARITH_ADD,
+ TCG_REG_R1, TCG_REG_R1, TCG_REG_R0, 0);
+ if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
+ tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
+ } else {
+ tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
+ }
+ }
+ if (!use_armv6_instructions && TARGET_LONG_BITS == 64) {
+ tcg_out_ld32_12(s, COND_AL, TCG_REG_R3, TCG_REG_R1, cmp_off + 4);
+ }
+
+ /* Shift the non-page bits out. */
+ tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_R0, 0, addrlo,
+ SHIFT_IMM_LSR(TARGET_PAGE_BITS));
+
+ /* Load the tlb addend. */
+ tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R1,
+ offsetof(CPUTLBEntry, addend));
+
+ /* Check alignment, check comparators. */
+ if (a_bits) {
+ tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, (1 << a_bits) - 1);
+ }
+ tcg_out_dat_reg(s, (a_bits ? COND_EQ : COND_AL), ARITH_CMP, 0,
+ TCG_REG_R2, TCG_REG_R0, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
+
if (TARGET_LONG_BITS == 64) {
- tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R1, addrhi, 0);
+ tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0);
}
- return TCG_REG_R2;
+ return TCG_REG_R1;
}
/* Record the context of a call to the out of line helper code for the slow
--
2.17.1
- [Qemu-devel] [PATCH for-4.1 00/35] tcg: Move the softmmu tlb to CPUNegativeOffsetState, Richard Henderson, 2019/03/23
- [Qemu-devel] [PATCH 16/35] target/mips: Use env_cpu, env_archcpu, Richard Henderson, 2019/03/23
- [Qemu-devel] [PATCH 30/35] cpu: Introduce CPUNegativeOffsetState, Richard Henderson, 2019/03/23
- [Qemu-devel] [PATCH 32/35] cpu: Move the softmmu tlb to CPUNegativeOffsetState, Richard Henderson, 2019/03/23
- [Qemu-devel] [PATCH 29/35] cpu: Move ENV_OFFSET to exec/gen-icount.h, Richard Henderson, 2019/03/23
- [Qemu-devel] [PATCH 25/35] target/tilegx: Use env_cpu, Richard Henderson, 2019/03/23
- [Qemu-devel] [PATCH 33/35] cpu: Remove CPU_COMMON, Richard Henderson, 2019/03/23
- [Qemu-devel] [PATCH 31/35] cpu: Move icount_decr to CPUNegativeOffsetState, Richard Henderson, 2019/03/23
- [Qemu-devel] [PATCH 27/35] target/unicore32: Use env_cpu, env_archcpu, Richard Henderson, 2019/03/23
- [Qemu-devel] [PATCH 35/35] tcg/arm: Use LDRD to load tlb mask+table,
Richard Henderson <=
- [Qemu-devel] [PATCH 28/35] target/xtensa: Use env_cpu, env_archcpu, Richard Henderson, 2019/03/23
- [Qemu-devel] [PATCH 20/35] target/ppc: Use env_cpu, env_archcpu, Richard Henderson, 2019/03/23
- [Qemu-devel] [PATCH 34/35] tcg/aarch64: Use LDP to load tlb mask+table, Richard Henderson, 2019/03/23
- [Qemu-devel] [PATCH 22/35] target/s390x: Use env_cpu, env_archcpu, Richard Henderson, 2019/03/23
- [Qemu-devel] [PATCH 17/35] target/moxie: Use env_cpu, env_archcpu, Richard Henderson, 2019/03/23
- [Qemu-devel] [PATCH 19/35] target/openrisc: Use env_cpu, env_archcpu, Richard Henderson, 2019/03/23
- [Qemu-devel] [PATCH 18/35] target/nios2: Use env_cpu, env_archcpu, Richard Henderson, 2019/03/23
- [Qemu-devel] [PATCH 24/35] target/sparc: Use env_cpu, env_archcpu, Richard Henderson, 2019/03/23
- [Qemu-devel] [PATCH 23/35] target/sh4: Use env_cpu, env_archcpu, Richard Henderson, 2019/03/23
- [Qemu-devel] [PATCH 21/35] target/riscv: Use env_cpu, env_archcpu, Richard Henderson, 2019/03/23