[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v6 17/21] accel/tcg: Add fast path for translator_ld*
From: |
Ilya Leoshkevich |
Subject: |
Re: [PATCH v6 17/21] accel/tcg: Add fast path for translator_ld* |
Date: |
Tue, 23 Aug 2022 01:15:44 +0200 |
User-agent: |
Evolution 3.36.5-0ubuntu1 |
On Thu, 2022-08-18 at 20:26 -0700, Richard Henderson wrote:
> Cache the translation from guest to host address, so we may
> use direct loads when we hit on the primary translation page.
>
> Look up the second translation page only once, during translation.
> This obviates another lookup of the second page within tb_gen_code
> after translation.
>
> Fixes a bug in that plugin_insn_append should be passed the bytes
> in the original memory order, not bswapped by pieces.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> include/exec/translator.h | 63 +++++++++++--------
> accel/tcg/translate-all.c | 26 +++-----
> accel/tcg/translator.c | 127 +++++++++++++++++++++++++++++-------
> --
> 3 files changed, 144 insertions(+), 72 deletions(-)
>
> diff --git a/include/exec/translator.h b/include/exec/translator.h
> index 69db0f5c21..329a42fe46 100644
> --- a/include/exec/translator.h
> +++ b/include/exec/translator.h
> @@ -81,24 +81,14 @@ typedef enum DisasJumpType {
> * Architecture-agnostic disassembly context.
> */
> typedef struct DisasContextBase {
> - const TranslationBlock *tb;
> + TranslationBlock *tb;
> target_ulong pc_first;
> target_ulong pc_next;
> DisasJumpType is_jmp;
> int num_insns;
> int max_insns;
> bool singlestep_enabled;
> -#ifdef CONFIG_USER_ONLY
> - /*
> - * Guest address of the last byte of the last protected page.
> - *
> - * Pages containing the translated instructions are made non-
> writable in
> - * order to achieve consistency in case another thread is
> modifying the
> - * code while translate_insn() fetches the instruction bytes
> piecemeal.
> - * Such writer threads are blocked on mmap_lock() in
> page_unprotect().
> - */
> - target_ulong page_protect_end;
> -#endif
> + void *host_addr[2];
> } DisasContextBase;
>
> /**
> @@ -183,24 +173,43 @@ bool translator_use_goto_tb(DisasContextBase
> *db, target_ulong dest);
> * the relevant information at translation time.
> */
>
> -#define GEN_TRANSLATOR_LD(fullname, type, load_fn,
> swap_fn) \
> - type fullname ## _swap(CPUArchState *env, DisasContextBase
> *dcbase, \
> - abi_ptr pc, bool
> do_swap); \
> - static inline type fullname(CPUArchState
> *env, \
> - DisasContextBase *dcbase, abi_ptr
> pc) \
> - {
> \
> - return fullname ## _swap(env, dcbase, pc,
> false); \
> +uint8_t translator_ldub(CPUArchState *env, DisasContextBase *db,
> abi_ptr pc);
> +uint16_t translator_lduw(CPUArchState *env, DisasContextBase *db,
> abi_ptr pc);
> +uint32_t translator_ldl(CPUArchState *env, DisasContextBase *db,
> abi_ptr pc);
> +uint64_t translator_ldq(CPUArchState *env, DisasContextBase *db,
> abi_ptr pc);
> +
> +static inline uint16_t
> +translator_lduw_swap(CPUArchState *env, DisasContextBase *db,
> + abi_ptr pc, bool do_swap)
> +{
> + uint16_t ret = translator_lduw(env, db, pc);
> + if (do_swap) {
> + ret = bswap16(ret);
> }
> + return ret;
> +}
>
> -#define
> FOR_EACH_TRANSLATOR_LD(F) \
> - F(translator_ldub, uint8_t, cpu_ldub_code, /* no swap
> */) \
> - F(translator_lduw, uint16_t, cpu_lduw_code,
> bswap16) \
> - F(translator_ldl, uint32_t, cpu_ldl_code,
> bswap32) \
> - F(translator_ldq, uint64_t, cpu_ldq_code, bswap64)
> +static inline uint32_t
> +translator_ldl_swap(CPUArchState *env, DisasContextBase *db,
> + abi_ptr pc, bool do_swap)
> +{
> + uint32_t ret = translator_ldl(env, db, pc);
> + if (do_swap) {
> + ret = bswap32(ret);
> + }
> + return ret;
> +}
>
> -FOR_EACH_TRANSLATOR_LD(GEN_TRANSLATOR_LD)
> -
> -#undef GEN_TRANSLATOR_LD
> +static inline uint64_t
> +translator_ldq_swap(CPUArchState *env, DisasContextBase *db,
> + abi_ptr pc, bool do_swap)
> +{
> + uint64_t ret = translator_ldq_swap(env, db, pc, false);
> + if (do_swap) {
> + ret = bswap64(ret);
> + }
> + return ret;
> +}
>
> /*
> * Return whether addr is on the same page as where disassembly
> started.
> diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
> index b224f856d0..e44f40b234 100644
> --- a/accel/tcg/translate-all.c
> +++ b/accel/tcg/translate-all.c
> @@ -1385,10 +1385,10 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
> {
> CPUArchState *env = cpu->env_ptr;
> TranslationBlock *tb, *existing_tb;
> - tb_page_addr_t phys_pc, phys_page2;
> - target_ulong virt_page2;
> + tb_page_addr_t phys_pc;
> tcg_insn_unit *gen_code_buf;
> int gen_code_size, search_size, max_insns;
> + void *host_pc;
> #ifdef CONFIG_PROFILER
> TCGProfile *prof = &tcg_ctx->prof;
> int64_t ti;
> @@ -1397,7 +1397,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
> assert_memory_lock();
> qemu_thread_jit_write();
>
> - phys_pc = get_page_addr_code_hostp(env, pc, false, NULL);
> + phys_pc = get_page_addr_code_hostp(env, pc, false, &host_pc);
>
> if (phys_pc == -1) {
> /* Generate a one-shot TB with 1 insn in it */
> @@ -1428,6 +1428,8 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
> tb->flags = flags;
> tb->cflags = cflags;
> tb->trace_vcpu_dstate = *cpu->trace_dstate;
> + tb->page_addr[0] = phys_pc;
> + tb->page_addr[1] = -1;
> tcg_ctx->tb_cflags = cflags;
> tb_overflow:
>
> @@ -1621,13 +1623,11 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
> }
>
> /*
> - * If the TB is not associated with a physical RAM page then
> - * it must be a temporary one-insn TB, and we have nothing to do
> - * except fill in the page_addr[] fields. Return early before
> - * attempting to link to other TBs or add to the lookup table.
> + * If the TB is not associated with a physical RAM page then it
> must be
> + * a temporary one-insn TB, and we have nothing left to do.
> Return early
> + * before attempting to link to other TBs or add to the lookup
> table.
> */
> - if (phys_pc == -1) {
> - tb->page_addr[0] = tb->page_addr[1] = -1;
> + if (tb->page_addr[0] == -1) {
> return tb;
> }
>
> @@ -1638,17 +1638,11 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
> */
> tcg_tb_insert(tb);
>
> - /* check next page if needed */
> - virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
> - phys_page2 = -1;
> - if ((pc & TARGET_PAGE_MASK) != virt_page2) {
> - phys_page2 = get_page_addr_code(env, virt_page2);
> - }
> /*
> * No explicit memory barrier is required -- tb_link_page()
> makes the
> * TB visible in a consistent state.
> */
> - existing_tb = tb_link_page(tb, phys_pc, phys_page2);
> + existing_tb = tb_link_page(tb, tb->page_addr[0], tb-
> >page_addr[1]);
> /* if the TB already exists, discard what we just translated */
> if (unlikely(existing_tb != tb)) {
> uintptr_t orig_aligned = (uintptr_t)gen_code_buf;
> diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
> index 3eef30d93a..c8e9523e52 100644
> --- a/accel/tcg/translator.c
> +++ b/accel/tcg/translator.c
> @@ -42,15 +42,6 @@ bool translator_use_goto_tb(DisasContextBase *db,
> target_ulong dest)
> return ((db->pc_first ^ dest) & TARGET_PAGE_MASK) == 0;
> }
>
> -static inline void translator_page_protect(DisasContextBase *dcbase,
> - target_ulong pc)
> -{
> -#ifdef CONFIG_USER_ONLY
> - dcbase->page_protect_end = pc | ~TARGET_PAGE_MASK;
> - page_protect(pc);
> -#endif
> -}
> -
> void translator_loop(CPUState *cpu, TranslationBlock *tb, int
> max_insns,
> target_ulong pc, void *host_pc,
> const TranslatorOps *ops, DisasContextBase *db)
> @@ -66,7 +57,12 @@ void translator_loop(CPUState *cpu,
> TranslationBlock *tb, int max_insns,
> db->num_insns = 0;
> db->max_insns = max_insns;
> db->singlestep_enabled = cflags & CF_SINGLE_STEP;
> - translator_page_protect(db, db->pc_next);
> + db->host_addr[0] = host_pc;
> + db->host_addr[1] = NULL;
> +
> +#ifdef CONFIG_USER_ONLY
> + page_protect(pc);
> +#endif
>
> ops->init_disas_context(db, cpu);
> tcg_debug_assert(db->is_jmp == DISAS_NEXT); /* no early exit */
> @@ -151,31 +147,104 @@ void translator_loop(CPUState *cpu,
> TranslationBlock *tb, int max_insns,
> #endif
> }
>
> -static inline void translator_maybe_page_protect(DisasContextBase
> *dcbase,
> - target_ulong pc,
> size_t len)
> +static void *translator_access(CPUArchState *env, DisasContextBase
> *db,
> + target_ulong pc, size_t len)
> {
> -#ifdef CONFIG_USER_ONLY
> - target_ulong end = pc + len - 1;
> + void *host;
> + target_ulong base, end;
> + TranslationBlock *tb;
>
> - if (end > dcbase->page_protect_end) {
> - translator_page_protect(dcbase, end);
> + tb = db->tb;
> +
> + /* Use slow path if first page is MMIO. */
> + if (unlikely(tb->page_addr[0] == -1)) {
> + return NULL;
> }
> +
> + end = pc + len - 1;
> + if (likely(is_same_page(db, end))) {
> + host = db->host_addr[0];
> + base = db->pc_first;
> + } else {
> + host = db->host_addr[1];
> + base = TARGET_PAGE_ALIGN(db->pc_first);
> + if (host == NULL) {
> + tb->page_addr[1] =
> + get_page_addr_code_hostp(env, base, false,
> + &db->host_addr[1]);
> +#ifdef CONFIG_USER_ONLY
> + page_protect(end);
> #endif
> + /* We cannot handle MMIO as second page. */
> + assert(tb->page_addr[1] != -1);
> + host = db->host_addr[1];
> + }
> +
> + /* Use slow path when crossing pages. */
> + if (is_same_page(db, pc)) {
> + return NULL;
> + }
> + }
> +
> + tcg_debug_assert(pc >= base);
> + return host + (pc - base);
> }
>
> -#define GEN_TRANSLATOR_LD(fullname, type, load_fn,
> swap_fn) \
> - type fullname ## _swap(CPUArchState *env, DisasContextBase
> *dcbase, \
> - abi_ptr pc, bool
> do_swap) \
> - {
> \
> - translator_maybe_page_protect(dcbase, pc,
> sizeof(type)); \
> - type ret = load_fn(env,
> pc); \
> - if (do_swap)
> { \
> - ret =
> swap_fn(ret); \
> - }
> \
> - plugin_insn_append(pc, &ret,
> sizeof(ret)); \
> - return
> ret; \
> +uint8_t translator_ldub(CPUArchState *env, DisasContextBase *db,
> abi_ptr pc)
> +{
> + uint8_t ret;
> + void *p = translator_access(env, db, pc, sizeof(ret));
> +
> + if (p) {
> + plugin_insn_append(pc, p, sizeof(ret));
> + return ldub_p(p);
> }
> + ret = cpu_ldub_code(env, pc);
> + plugin_insn_append(pc, &ret, sizeof(ret));
> + return ret;
> +}
>
> -FOR_EACH_TRANSLATOR_LD(GEN_TRANSLATOR_LD)
> +uint16_t translator_lduw(CPUArchState *env, DisasContextBase *db,
> abi_ptr pc)
> +{
> + uint16_t ret, plug;
> + void *p = translator_access(env, db, pc, sizeof(ret));
>
> -#undef GEN_TRANSLATOR_LD
> + if (p) {
> + plugin_insn_append(pc, p, sizeof(ret));
> + return lduw_p(p);
> + }
> + ret = cpu_lduw_code(env, pc);
> + plug = tswap16(ret);
> + plugin_insn_append(pc, &plug, sizeof(ret));
> + return ret;
> +}
> +
> +uint32_t translator_ldl(CPUArchState *env, DisasContextBase *db,
> abi_ptr pc)
> +{
> + uint32_t ret, plug;
> + void *p = translator_access(env, db, pc, sizeof(ret));
> +
> + if (p) {
> + plugin_insn_append(pc, p, sizeof(ret));
> + return ldl_p(p);
> + }
> + ret = cpu_ldl_code(env, pc);
> + plug = tswap32(ret);
> + plugin_insn_append(pc, &plug, sizeof(ret));
> + return ret;
> +}
> +
> +uint64_t translator_ldq(CPUArchState *env, DisasContextBase *db,
> abi_ptr pc)
> +{
> + uint64_t ret, plug;
> + void *p = translator_access(env, db, pc, sizeof(ret));
> +
> + if (p) {
> + plugin_insn_append(pc, p, sizeof(ret));
> + return ldq_p(p);
> + }
> + ret = cpu_ldq_code(env, pc);
> + plug = tswap64(ret);
> + plugin_insn_append(pc, &plug, sizeof(ret));
> + return ret;
> +}
Hi,
I think you need the following fixup here:
--- a/tests/tcg/multiarch/noexec.c.inc
+++ b/tests/tcg/multiarch/noexec.c.inc
@@ -1,8 +1,5 @@
/*
* Common code for arch-specific MMU_INST_FETCH fault testing.
- *
- * Declare struct arch_noexec_test before including this file and
define
- * arch_check_mcontext() after that.
*/
#define _GNU_SOURCE
@@ -13,6 +10,7 @@
#include <stdlib.h>
#include <string.h>
#include <errno.h>
+#include <unistd.h>
#include <sys/mman.h>
#include <sys/ucontext.h>
After the simplifications the comment is no longer true or useful;
unistd.h is needed for getpagesize().
With that:
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
for the series.
Best regards,
Ilya
- [PATCH v6 12/21] accel/tcg: Use probe_access_internal for softmmu get_page_addr_code_hostp, (continued)
- [PATCH v6 12/21] accel/tcg: Use probe_access_internal for softmmu get_page_addr_code_hostp, Richard Henderson, 2022/08/18
- [PATCH v6 10/21] accel/tcg: Make tb_htable_lookup static, Richard Henderson, 2022/08/18
- [PATCH v6 14/21] accel/tcg: Raise PROT_EXEC exception early, Richard Henderson, 2022/08/18
- [PATCH v6 18/21] target/s390x: Make translator stop before the end of a page, Richard Henderson, 2022/08/18
- [PATCH v6 16/21] accel/tcg: Add pc and host_pc params to gen_intermediate_code, Richard Henderson, 2022/08/18
- [PATCH v6 19/21] target/i386: Make translator stop before the end of a page, Richard Henderson, 2022/08/18
- [PATCH v6 17/21] accel/tcg: Add fast path for translator_ld*, Richard Henderson, 2022/08/18
- Re: [PATCH v6 17/21] accel/tcg: Add fast path for translator_ld*,
Ilya Leoshkevich <=
- [PATCH v6 08/21] accel/tcg: Properly implement get_page_addr_code for user-only, Richard Henderson, 2022/08/18
- [PATCH v6 21/21] target/riscv: Make translator stop before the end of a page, Richard Henderson, 2022/08/18
- [PATCH v6 20/21] target/riscv: Add MAX_INSN_LEN and insn_len, Richard Henderson, 2022/08/18
- Re: [PATCH v6 00/21] linux-user: Fix siginfo_t contents when jumping to non-readable pages, Vivian Wang, 2022/08/19