[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH v3 14/20] tcg-arm: Cleanup goto_tb handling
From: |
Richard Henderson |
Subject: |
[Qemu-devel] [PATCH v3 14/20] tcg-arm: Cleanup goto_tb handling |
Date: |
Thu, 28 Mar 2013 08:32:55 -0700 |
Eliminate 2 disabled code blocks. Choose the load-to-pc method of
jumping so that we can eliminate the 16M code_gen_buffer limitation.
Remove a test in the indirect jump method that is always true.
Signed-off-by: Richard Henderson <address@hidden>
---
include/exec/exec-all.h | 23 +++--------------------
tcg/arm/tcg-target.c | 26 ++++++--------------------
translate-all.c | 2 --
3 files changed, 9 insertions(+), 42 deletions(-)
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index e856191..190effa 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -233,26 +233,9 @@ static inline void tb_set_jmp_target1(uintptr_t jmp_addr,
uintptr_t addr)
#elif defined(__arm__)
static inline void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr)
{
-#if !QEMU_GNUC_PREREQ(4, 1)
- register unsigned long _beg __asm ("a1");
- register unsigned long _end __asm ("a2");
- register unsigned long _flg __asm ("a3");
-#endif
-
- /* we could use a ldr pc, [pc, #-4] kind of branch and avoid the flush */
- *(uint32_t *)jmp_addr =
- (*(uint32_t *)jmp_addr & ~0xffffff)
- | (((addr - (jmp_addr + 8)) >> 2) & 0xffffff);
-
-#if QEMU_GNUC_PREREQ(4, 1)
- __builtin___clear_cache((char *) jmp_addr, (char *) jmp_addr + 4);
-#else
- /* flush icache */
- _beg = jmp_addr;
- _end = jmp_addr + 4;
- _flg = 0;
- __asm __volatile__ ("swi 0x9f0002" : : "r" (_beg), "r" (_end), "r" (_flg));
-#endif
+ /* We're using "ldr pc, [pc,#-4]", so we can just store the raw
+ address, without caring for flushing the icache. */
+ *(uint32_t *)jmp_addr = addr;
}
#elif defined(__sparc__)
void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr);
diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index 7bcba19..6de5e90 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -1595,30 +1595,16 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode
opc,
break;
case INDEX_op_goto_tb:
if (s->tb_jmp_offset) {
- /* Direct jump method */
-#if defined(USE_DIRECT_JUMP)
- s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
- tcg_out_b_noaddr(s, COND_AL);
-#else
+ /* "Direct" jump method. Rather than limit the code gen buffer
+ to 16M, load the destination from the next word. */
tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4);
s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
- tcg_out32(s, 0);
-#endif
+ s->code_ptr += 4;
} else {
- /* Indirect jump method */
-#if 1
- c = (int) (s->tb_next + args[0]) - ((int) s->code_ptr + 8);
- if (c > 0xfff || c < -0xfff) {
- tcg_out_movi32(s, COND_AL, TCG_REG_R0,
- (tcg_target_long) (s->tb_next + args[0]));
- tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, 0);
- } else
- tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, c);
-#else
- tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_PC, 0);
+ /* Indirect jump method. */
+ tcg_out_movi32(s, COND_AL, TCG_REG_R0,
+ (tcg_target_long) (s->tb_next + args[0]));
tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, 0);
- tcg_out32(s, (tcg_target_long) (s->tb_next + args[0]));
-#endif
}
s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
break;
diff --git a/translate-all.c b/translate-all.c
index a98c646..3ca839f 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -460,8 +460,6 @@ static inline PageDesc *page_find(tb_page_addr_t index)
# define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
#elif defined(__sparc__)
# define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
-#elif defined(__arm__)
-# define MAX_CODE_GEN_BUFFER_SIZE (16u * 1024 * 1024)
#elif defined(__s390x__)
/* We have a +- 4GB range on the branches; leave some slop. */
# define MAX_CODE_GEN_BUFFER_SIZE (3ul * 1024 * 1024 * 1024)
--
1.8.1.4
- Re: [Qemu-devel] [PATCH v3 07/20] tcg-arm: Fold epilogue into INDEX_op_exit_tb, (continued)
- [Qemu-devel] [PATCH v3 09/20] tcg-arm: Implement division instructions, Richard Henderson, 2013/03/28
- [Qemu-devel] [PATCH v3 10/20] tcg-arm: Use TCG_REG_TMP name for the tcg temporary, Richard Henderson, 2013/03/28
- [Qemu-devel] [PATCH v3 11/20] tcg-arm: Use R12 for the tcg temporary, Richard Henderson, 2013/03/28
- [Qemu-devel] [PATCH v3 12/20] tcg-arm: Cleanup multiply subroutines, Richard Henderson, 2013/03/28
- [Qemu-devel] [PATCH v3 13/20] tcg-arm: Cleanup tcg_out_goto_label, Richard Henderson, 2013/03/28
- [Qemu-devel] [PATCH v3 14/20] tcg-arm: Cleanup goto_tb handling,
Richard Henderson <=
[Qemu-devel] [PATCH v3 15/20] tcg-arm: Cleanup most primitive load store subroutines, Richard Henderson, 2013/03/28
[Qemu-devel] [PATCH v3 16/20] tcg-arm: Fix local stack frame, Richard Henderson, 2013/03/28
[Qemu-devel] [PATCH v3 17/20] tcg-arm: Split out tcg_out_tlb_read, Richard Henderson, 2013/03/28
[Qemu-devel] [PATCH v3 18/20] tcg-arm: Improve scheduling of tcg_out_tlb_read, Richard Henderson, 2013/03/28
[Qemu-devel] [PATCH v3 20/20] tcg-arm: Convert to CONFIG_QEMU_LDST_OPTIMIZATION, Richard Henderson, 2013/03/28