[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH 03/35] tcg: Create struct CPUTLB
From: |
Alistair Francis |
Subject: |
Re: [Qemu-devel] [PATCH 03/35] tcg: Create struct CPUTLB |
Date: |
Tue, 26 Mar 2019 13:35:48 -0700 |
On Sat, Mar 23, 2019 at 12:47 PM Richard Henderson
<address@hidden> wrote:
>
> Move all softmmu tlb data into this structure. Arrange the
> members so that we are able to place mask+table together and
> at a smaller absolute offset from ENV.
>
> Signed-off-by: Richard Henderson <address@hidden>
Acked-by: Alistair Francis <address@hidden>
Alistair
> ---
> accel/tcg/softmmu_template.h | 4 +-
> include/exec/cpu-defs.h | 61 ++++++++-------
> include/exec/cpu_ldst.h | 6 +-
> accel/tcg/cputlb.c | 147 ++++++++++++++++++-----------------
> target/arm/translate-a64.c | 2 +-
> tcg/aarch64/tcg-target.inc.c | 10 +--
> tcg/arm/tcg-target.inc.c | 10 +--
> tcg/i386/tcg-target.inc.c | 4 +-
> tcg/mips/tcg-target.inc.c | 12 +--
> tcg/ppc/tcg-target.inc.c | 8 +-
> tcg/riscv/tcg-target.inc.c | 12 +--
> tcg/s390/tcg-target.inc.c | 8 +-
> tcg/sparc/tcg-target.inc.c | 12 +--
> 13 files changed, 135 insertions(+), 161 deletions(-)
>
> diff --git a/accel/tcg/softmmu_template.h b/accel/tcg/softmmu_template.h
> index e970a8b378..fc6371aed1 100644
> --- a/accel/tcg/softmmu_template.h
> +++ b/accel/tcg/softmmu_template.h
> @@ -102,7 +102,7 @@ static inline DATA_TYPE glue(io_read,
> SUFFIX)(CPUArchState *env,
> bool recheck,
> MMUAccessType access_type)
> {
> - CPUIOTLBEntry *iotlbentry = &env->iotlb[mmu_idx][index];
> + CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
> return io_readx(env, iotlbentry, mmu_idx, addr, retaddr, recheck,
> access_type, DATA_SIZE);
> }
> @@ -273,7 +273,7 @@ static inline void glue(io_write, SUFFIX)(CPUArchState
> *env,
> uintptr_t retaddr,
> bool recheck)
> {
> - CPUIOTLBEntry *iotlbentry = &env->iotlb[mmu_idx][index];
> + CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
> return io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr,
> recheck, DATA_SIZE);
> }
> diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
> index 2694481769..fbe8945606 100644
> --- a/include/exec/cpu-defs.h
> +++ b/include/exec/cpu-defs.h
> @@ -78,6 +78,7 @@ typedef uint64_t target_ulong;
> #endif
>
> #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG)
> +
> /* use a fully associative victim tlb of 8 entries */
> #define CPU_VTLB_SIZE 8
>
> @@ -147,6 +148,10 @@ typedef struct CPUIOTLBEntry {
> MemTxAttrs attrs;
> } CPUIOTLBEntry;
>
> +/*
> + * Data elements that are per MMU mode, minus the bits accessed by
> + * the TCG fast path.
> + */
> typedef struct CPUTLBDesc {
> /*
> * Describe a region covering all of the large pages allocated
> @@ -160,16 +165,31 @@ typedef struct CPUTLBDesc {
> int64_t window_begin_ns;
> /* maximum number of entries observed in the window */
> size_t window_max_entries;
> + size_t n_used_entries;
> /* The next index to use in the tlb victim table. */
> size_t vindex;
> - size_t n_used_entries;
> + /* The tlb victim table, in two parts. */
> + CPUTLBEntry vtable[CPU_VTLB_SIZE];
> + CPUIOTLBEntry viotlb[CPU_VTLB_SIZE];
> + /* The iotlb. */
> + CPUIOTLBEntry *iotlb;
> } CPUTLBDesc;
>
> +/*
> + * Data elements that are per MMU mode, accessed by the fast path.
> + */
> +typedef struct CPUTLBDescFast {
> + /* Contains (n_entries - 1) << CPU_TLB_ENTRY_BITS */
> + uintptr_t mask;
> + /* The array of tlb entries itself. */
> + CPUTLBEntry *table;
> +} CPUTLBDescFast;
> +
> /*
> * Data elements that are shared between all MMU modes.
> */
> typedef struct CPUTLBCommon {
> - /* Serialize updates to tlb_table and tlb_v_table, and others as noted.
> */
> + /* Serialize updates to tlb_table and vtable, and others as noted. */
> QemuSpin lock;
> /*
> * Within dirty, for each bit N, modifications have been made to
> @@ -187,35 +207,24 @@ typedef struct CPUTLBCommon {
> size_t elide_flush_count;
> } CPUTLBCommon;
>
> -# define CPU_TLB \
> - /* tlb_mask[i] contains (n_entries - 1) << CPU_TLB_ENTRY_BITS */ \
> - uintptr_t tlb_mask[NB_MMU_MODES]; \
> - CPUTLBEntry *tlb_table[NB_MMU_MODES];
> -# define CPU_IOTLB \
> - CPUIOTLBEntry *iotlb[NB_MMU_MODES];
> -
> /*
> + * The entire softmmu tlb, for all MMU modes.
> * The meaning of each of the MMU modes is defined in the target code.
> - * Note that NB_MMU_MODES is not yet defined; we can only reference it
> - * within preprocessor defines that will be expanded later.
> */
> -#define CPU_COMMON_TLB \
> - CPUTLBCommon tlb_c; \
> - CPUTLBDesc tlb_d[NB_MMU_MODES]; \
> - CPU_TLB \
> - CPUTLBEntry tlb_v_table[NB_MMU_MODES][CPU_VTLB_SIZE]; \
> - CPU_IOTLB \
> - CPUIOTLBEntry iotlb_v[NB_MMU_MODES][CPU_VTLB_SIZE];
> +typedef struct CPUTLB {
> + CPUTLBDescFast f[NB_MMU_MODES];
> + CPUTLBDesc d[NB_MMU_MODES];
> + CPUTLBCommon c;
> +} CPUTLB;
> +
> +/* There are target-specific members named "tlb". This is temporary. */
> +#define CPU_COMMON CPUTLB tlb_;
> +#define env_tlb(ENV) (&(ENV)->tlb_)
>
> #else
>
> -#define CPU_COMMON_TLB
> -
> -#endif
> -
> -
> -#define CPU_COMMON \
> - /* soft mmu support */ \
> - CPU_COMMON_TLB \
> +#define CPU_COMMON /* Nothing */
> +
> +#endif /* !CONFIG_USER_ONLY && CONFIG_TCG */
>
> #endif
> diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
> index d78041d7a0..09abd95008 100644
> --- a/include/exec/cpu_ldst.h
> +++ b/include/exec/cpu_ldst.h
> @@ -139,21 +139,21 @@ static inline target_ulong tlb_addr_write(const
> CPUTLBEntry *entry)
> static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx,
> target_ulong addr)
> {
> - uintptr_t size_mask = env->tlb_mask[mmu_idx] >> CPU_TLB_ENTRY_BITS;
> + uintptr_t size_mask = env_tlb(env)->f[mmu_idx].mask >>
> CPU_TLB_ENTRY_BITS;
>
> return (addr >> TARGET_PAGE_BITS) & size_mask;
> }
>
> static inline size_t tlb_n_entries(CPUArchState *env, uintptr_t mmu_idx)
> {
> - return (env->tlb_mask[mmu_idx] >> CPU_TLB_ENTRY_BITS) + 1;
> + return (env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS) + 1;
> }
>
> /* Find the TLB entry corresponding to the mmu_idx + address pair. */
> static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx,
> target_ulong addr)
> {
> - return &env->tlb_table[mmu_idx][tlb_index(env, mmu_idx, addr)];
> + return &env_tlb(env)->f[mmu_idx].table[tlb_index(env, mmu_idx, addr)];
> }
>
> #ifdef MMU_MODE0_SUFFIX
> diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
> index 23586f9974..c28b6b6328 100644
> --- a/accel/tcg/cputlb.c
> +++ b/accel/tcg/cputlb.c
> @@ -76,7 +76,7 @@ QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
>
> static inline size_t sizeof_tlb(CPUArchState *env, uintptr_t mmu_idx)
> {
> - return env->tlb_mask[mmu_idx] + (1 << CPU_TLB_ENTRY_BITS);
> + return env_tlb(env)->f[mmu_idx].mask + (1 << CPU_TLB_ENTRY_BITS);
> }
>
> static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
> @@ -91,14 +91,14 @@ static void tlb_dyn_init(CPUArchState *env)
> int i;
>
> for (i = 0; i < NB_MMU_MODES; i++) {
> - CPUTLBDesc *desc = &env->tlb_d[i];
> + CPUTLBDesc *desc = &env_tlb(env)->d[i];
> size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS;
>
> tlb_window_reset(desc, get_clock_realtime(), 0);
> desc->n_used_entries = 0;
> - env->tlb_mask[i] = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
> - env->tlb_table[i] = g_new(CPUTLBEntry, n_entries);
> - env->iotlb[i] = g_new(CPUIOTLBEntry, n_entries);
> + env_tlb(env)->f[i].mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
> + env_tlb(env)->f[i].table = g_new(CPUTLBEntry, n_entries);
> + env_tlb(env)->d[i].iotlb = g_new(CPUIOTLBEntry, n_entries);
> }
> }
>
> @@ -144,7 +144,7 @@ static void tlb_dyn_init(CPUArchState *env)
> */
> static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
> {
> - CPUTLBDesc *desc = &env->tlb_d[mmu_idx];
> + CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
> size_t old_size = tlb_n_entries(env, mmu_idx);
> size_t rate;
> size_t new_size = old_size;
> @@ -187,14 +187,14 @@ static void tlb_mmu_resize_locked(CPUArchState *env,
> int mmu_idx)
> return;
> }
>
> - g_free(env->tlb_table[mmu_idx]);
> - g_free(env->iotlb[mmu_idx]);
> + g_free(env_tlb(env)->f[mmu_idx].table);
> + g_free(env_tlb(env)->d[mmu_idx].iotlb);
>
> tlb_window_reset(desc, now, 0);
> /* desc->n_used_entries is cleared by the caller */
> - env->tlb_mask[mmu_idx] = (new_size - 1) << CPU_TLB_ENTRY_BITS;
> - env->tlb_table[mmu_idx] = g_try_new(CPUTLBEntry, new_size);
> - env->iotlb[mmu_idx] = g_try_new(CPUIOTLBEntry, new_size);
> + env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
> + env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size);
> + env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size);
> /*
> * If the allocations fail, try smaller sizes. We just freed some
> * memory, so going back to half of new_size has a good chance of
> working.
> @@ -202,46 +202,47 @@ static void tlb_mmu_resize_locked(CPUArchState *env,
> int mmu_idx)
> * allocations to fail though, so we progressively reduce the allocation
> * size, aborting if we cannot even allocate the smallest TLB we support.
> */
> - while (env->tlb_table[mmu_idx] == NULL || env->iotlb[mmu_idx] == NULL) {
> + while (env_tlb(env)->f[mmu_idx].table == NULL ||
> + env_tlb(env)->d[mmu_idx].iotlb == NULL) {
> if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) {
> error_report("%s: %s", __func__, strerror(errno));
> abort();
> }
> new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS);
> - env->tlb_mask[mmu_idx] = (new_size - 1) << CPU_TLB_ENTRY_BITS;
> + env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
>
> - g_free(env->tlb_table[mmu_idx]);
> - g_free(env->iotlb[mmu_idx]);
> - env->tlb_table[mmu_idx] = g_try_new(CPUTLBEntry, new_size);
> - env->iotlb[mmu_idx] = g_try_new(CPUIOTLBEntry, new_size);
> + g_free(env_tlb(env)->f[mmu_idx].table);
> + g_free(env_tlb(env)->d[mmu_idx].iotlb);
> + env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size);
> + env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size);
> }
> }
>
> static inline void tlb_table_flush_by_mmuidx(CPUArchState *env, int mmu_idx)
> {
> tlb_mmu_resize_locked(env, mmu_idx);
> - memset(env->tlb_table[mmu_idx], -1, sizeof_tlb(env, mmu_idx));
> - env->tlb_d[mmu_idx].n_used_entries = 0;
> + memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx));
> + env_tlb(env)->d[mmu_idx].n_used_entries = 0;
> }
>
> static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t
> mmu_idx)
> {
> - env->tlb_d[mmu_idx].n_used_entries++;
> + env_tlb(env)->d[mmu_idx].n_used_entries++;
> }
>
> static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t
> mmu_idx)
> {
> - env->tlb_d[mmu_idx].n_used_entries--;
> + env_tlb(env)->d[mmu_idx].n_used_entries--;
> }
>
> void tlb_init(CPUState *cpu)
> {
> CPUArchState *env = cpu->env_ptr;
>
> - qemu_spin_init(&env->tlb_c.lock);
> + qemu_spin_init(&env_tlb(env)->c.lock);
>
> /* Ensure that cpu_reset performs a full flush. */
> - env->tlb_c.dirty = ALL_MMUIDX_BITS;
> + env_tlb(env)->c.dirty = ALL_MMUIDX_BITS;
>
> tlb_dyn_init(env);
> }
> @@ -273,9 +274,9 @@ void tlb_flush_counts(size_t *pfull, size_t *ppart,
> size_t *pelide)
> CPU_FOREACH(cpu) {
> CPUArchState *env = cpu->env_ptr;
>
> - full += atomic_read(&env->tlb_c.full_flush_count);
> - part += atomic_read(&env->tlb_c.part_flush_count);
> - elide += atomic_read(&env->tlb_c.elide_flush_count);
> + full += atomic_read(&env_tlb(env)->c.full_flush_count);
> + part += atomic_read(&env_tlb(env)->c.part_flush_count);
> + elide += atomic_read(&env_tlb(env)->c.elide_flush_count);
> }
> *pfull = full;
> *ppart = part;
> @@ -285,10 +286,11 @@ void tlb_flush_counts(size_t *pfull, size_t *ppart,
> size_t *pelide)
> static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
> {
> tlb_table_flush_by_mmuidx(env, mmu_idx);
> - memset(env->tlb_v_table[mmu_idx], -1, sizeof(env->tlb_v_table[0]));
> - env->tlb_d[mmu_idx].large_page_addr = -1;
> - env->tlb_d[mmu_idx].large_page_mask = -1;
> - env->tlb_d[mmu_idx].vindex = 0;
> + env_tlb(env)->d[mmu_idx].large_page_addr = -1;
> + env_tlb(env)->d[mmu_idx].large_page_mask = -1;
> + env_tlb(env)->d[mmu_idx].vindex = 0;
> + memset(env_tlb(env)->d[mmu_idx].vtable, -1,
> + sizeof(env_tlb(env)->d[0].vtable));
> }
>
> static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data
> data)
> @@ -301,31 +303,31 @@ static void tlb_flush_by_mmuidx_async_work(CPUState
> *cpu, run_on_cpu_data data)
>
> tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked);
>
> - qemu_spin_lock(&env->tlb_c.lock);
> + qemu_spin_lock(&env_tlb(env)->c.lock);
>
> - all_dirty = env->tlb_c.dirty;
> + all_dirty = env_tlb(env)->c.dirty;
> to_clean = asked & all_dirty;
> all_dirty &= ~to_clean;
> - env->tlb_c.dirty = all_dirty;
> + env_tlb(env)->c.dirty = all_dirty;
>
> for (work = to_clean; work != 0; work &= work - 1) {
> int mmu_idx = ctz32(work);
> tlb_flush_one_mmuidx_locked(env, mmu_idx);
> }
>
> - qemu_spin_unlock(&env->tlb_c.lock);
> + qemu_spin_unlock(&env_tlb(env)->c.lock);
>
> cpu_tb_jmp_cache_clear(cpu);
>
> if (to_clean == ALL_MMUIDX_BITS) {
> - atomic_set(&env->tlb_c.full_flush_count,
> - env->tlb_c.full_flush_count + 1);
> + atomic_set(&env_tlb(env)->c.full_flush_count,
> + env_tlb(env)->c.full_flush_count + 1);
> } else {
> - atomic_set(&env->tlb_c.part_flush_count,
> - env->tlb_c.part_flush_count + ctpop16(to_clean));
> + atomic_set(&env_tlb(env)->c.part_flush_count,
> + env_tlb(env)->c.part_flush_count + ctpop16(to_clean));
> if (to_clean != asked) {
> - atomic_set(&env->tlb_c.elide_flush_count,
> - env->tlb_c.elide_flush_count +
> + atomic_set(&env_tlb(env)->c.elide_flush_count,
> + env_tlb(env)->c.elide_flush_count +
> ctpop16(asked & ~to_clean));
> }
> }
> @@ -410,11 +412,12 @@ static inline bool tlb_flush_entry_locked(CPUTLBEntry
> *tlb_entry,
> static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx,
> target_ulong page)
> {
> + CPUTLBDesc *d = &env_tlb(env)->d[mmu_idx];
> int k;
>
> assert_cpu_is_self(ENV_GET_CPU(env));
> for (k = 0; k < CPU_VTLB_SIZE; k++) {
> - if (tlb_flush_entry_locked(&env->tlb_v_table[mmu_idx][k], page)) {
> + if (tlb_flush_entry_locked(&d->vtable[k], page)) {
> tlb_n_used_entries_dec(env, mmu_idx);
> }
> }
> @@ -423,8 +426,8 @@ static inline void
> tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx,
> static void tlb_flush_page_locked(CPUArchState *env, int midx,
> target_ulong page)
> {
> - target_ulong lp_addr = env->tlb_d[midx].large_page_addr;
> - target_ulong lp_mask = env->tlb_d[midx].large_page_mask;
> + target_ulong lp_addr = env_tlb(env)->d[midx].large_page_addr;
> + target_ulong lp_mask = env_tlb(env)->d[midx].large_page_mask;
>
> /* Check if we need to flush due to large pages. */
> if ((page & lp_mask) == lp_addr) {
> @@ -459,13 +462,13 @@ static void
> tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
> tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%lx\n",
> addr, mmu_idx_bitmap);
>
> - qemu_spin_lock(&env->tlb_c.lock);
> + qemu_spin_lock(&env_tlb(env)->c.lock);
> for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
> if (test_bit(mmu_idx, &mmu_idx_bitmap)) {
> tlb_flush_page_locked(env, mmu_idx, addr);
> }
> }
> - qemu_spin_unlock(&env->tlb_c.lock);
> + qemu_spin_unlock(&env_tlb(env)->c.lock);
>
> tb_flush_jmp_cache(cpu, addr);
> }
> @@ -609,22 +612,22 @@ void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1,
> ram_addr_t length)
> int mmu_idx;
>
> env = cpu->env_ptr;
> - qemu_spin_lock(&env->tlb_c.lock);
> + qemu_spin_lock(&env_tlb(env)->c.lock);
> for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
> unsigned int i;
> unsigned int n = tlb_n_entries(env, mmu_idx);
>
> for (i = 0; i < n; i++) {
> - tlb_reset_dirty_range_locked(&env->tlb_table[mmu_idx][i], start1,
> - length);
> + tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i],
> + start1, length);
> }
>
> for (i = 0; i < CPU_VTLB_SIZE; i++) {
> - tlb_reset_dirty_range_locked(&env->tlb_v_table[mmu_idx][i],
> start1,
> - length);
> + tlb_reset_dirty_range_locked(&env_tlb(env)->d[mmu_idx].vtable[i],
> + start1, length);
> }
> }
> - qemu_spin_unlock(&env->tlb_c.lock);
> + qemu_spin_unlock(&env_tlb(env)->c.lock);
> }
>
> /* Called with tlb_c.lock held */
> @@ -646,7 +649,7 @@ void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
> assert_cpu_is_self(cpu);
>
> vaddr &= TARGET_PAGE_MASK;
> - qemu_spin_lock(&env->tlb_c.lock);
> + qemu_spin_lock(&env_tlb(env)->c.lock);
> for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
> tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr);
> }
> @@ -654,10 +657,10 @@ void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
> for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
> int k;
> for (k = 0; k < CPU_VTLB_SIZE; k++) {
> - tlb_set_dirty1_locked(&env->tlb_v_table[mmu_idx][k], vaddr);
> + tlb_set_dirty1_locked(&env_tlb(env)->d[mmu_idx].vtable[k],
> vaddr);
> }
> }
> - qemu_spin_unlock(&env->tlb_c.lock);
> + qemu_spin_unlock(&env_tlb(env)->c.lock);
> }
>
> /* Our TLB does not support large pages, so remember the area covered by
> @@ -665,7 +668,7 @@ void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
> static void tlb_add_large_page(CPUArchState *env, int mmu_idx,
> target_ulong vaddr, target_ulong size)
> {
> - target_ulong lp_addr = env->tlb_d[mmu_idx].large_page_addr;
> + target_ulong lp_addr = env_tlb(env)->d[mmu_idx].large_page_addr;
> target_ulong lp_mask = ~(size - 1);
>
> if (lp_addr == (target_ulong)-1) {
> @@ -675,13 +678,13 @@ static void tlb_add_large_page(CPUArchState *env, int
> mmu_idx,
> /* Extend the existing region to include the new page.
> This is a compromise between unnecessary flushes and
> the cost of maintaining a full variable size TLB. */
> - lp_mask &= env->tlb_d[mmu_idx].large_page_mask;
> + lp_mask &= env_tlb(env)->d[mmu_idx].large_page_mask;
> while (((lp_addr ^ vaddr) & lp_mask) != 0) {
> lp_mask <<= 1;
> }
> }
> - env->tlb_d[mmu_idx].large_page_addr = lp_addr & lp_mask;
> - env->tlb_d[mmu_idx].large_page_mask = lp_mask;
> + env_tlb(env)->d[mmu_idx].large_page_addr = lp_addr & lp_mask;
> + env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask;
> }
>
> /* Add a new TLB entry. At most one entry for a given virtual address
> @@ -757,10 +760,10 @@ void tlb_set_page_with_attrs(CPUState *cpu,
> target_ulong vaddr,
> * a longer critical section, but this is not a concern since the TLB
> lock
> * is unlikely to be contended.
> */
> - qemu_spin_lock(&env->tlb_c.lock);
> + qemu_spin_lock(&env_tlb(env)->c.lock);
>
> /* Note that the tlb is no longer clean. */
> - env->tlb_c.dirty |= 1 << mmu_idx;
> + env_tlb(env)->c.dirty |= 1 << mmu_idx;
>
> /* Make sure there's no cached translation for the new page. */
> tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page);
> @@ -770,12 +773,12 @@ void tlb_set_page_with_attrs(CPUState *cpu,
> target_ulong vaddr,
> * different page; otherwise just overwrite the stale data.
> */
> if (!tlb_hit_page_anyprot(te, vaddr_page) && !tlb_entry_is_empty(te)) {
> - unsigned vidx = env->tlb_d[mmu_idx].vindex++ % CPU_VTLB_SIZE;
> - CPUTLBEntry *tv = &env->tlb_v_table[mmu_idx][vidx];
> + unsigned vidx = env_tlb(env)->d[mmu_idx].vindex++ % CPU_VTLB_SIZE;
> + CPUTLBEntry *tv = &env_tlb(env)->d[mmu_idx].vtable[vidx];
>
> /* Evict the old entry into the victim tlb. */
> copy_tlb_helper_locked(tv, te);
> - env->iotlb_v[mmu_idx][vidx] = env->iotlb[mmu_idx][index];
> + env_tlb(env)->d[mmu_idx].viotlb[vidx] =
> env_tlb(env)->d[mmu_idx].iotlb[index];
> tlb_n_used_entries_dec(env, mmu_idx);
> }
>
> @@ -792,8 +795,8 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong
> vaddr,
> * subtract here is that of the page base, and not the same as the
> * vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
> */
> - env->iotlb[mmu_idx][index].addr = iotlb - vaddr_page;
> - env->iotlb[mmu_idx][index].attrs = attrs;
> + env_tlb(env)->d[mmu_idx].iotlb[index].addr = iotlb - vaddr_page;
> + env_tlb(env)->d[mmu_idx].iotlb[index].attrs = attrs;
>
> /* Now calculate the new entry */
> tn.addend = addend - vaddr_page;
> @@ -829,7 +832,7 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong
> vaddr,
>
> copy_tlb_helper_locked(te, &tn);
> tlb_n_used_entries_inc(env, mmu_idx);
> - qemu_spin_unlock(&env->tlb_c.lock);
> + qemu_spin_unlock(&env_tlb(env)->c.lock);
> }
>
> /* Add a new TLB entry, but without specifying the memory
> @@ -996,7 +999,7 @@ static bool victim_tlb_hit(CPUArchState *env, size_t
> mmu_idx, size_t index,
>
> assert_cpu_is_self(ENV_GET_CPU(env));
> for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
> - CPUTLBEntry *vtlb = &env->tlb_v_table[mmu_idx][vidx];
> + CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx];
> target_ulong cmp;
>
> /* elt_ofs might correspond to .addr_write, so use atomic_read */
> @@ -1008,16 +1011,16 @@ static bool victim_tlb_hit(CPUArchState *env, size_t
> mmu_idx, size_t index,
>
> if (cmp == page) {
> /* Found entry in victim tlb, swap tlb and iotlb. */
> - CPUTLBEntry tmptlb, *tlb = &env->tlb_table[mmu_idx][index];
> + CPUTLBEntry tmptlb, *tlb =
> &env_tlb(env)->f[mmu_idx].table[index];
>
> - qemu_spin_lock(&env->tlb_c.lock);
> + qemu_spin_lock(&env_tlb(env)->c.lock);
> copy_tlb_helper_locked(&tmptlb, tlb);
> copy_tlb_helper_locked(tlb, vtlb);
> copy_tlb_helper_locked(vtlb, &tmptlb);
> - qemu_spin_unlock(&env->tlb_c.lock);
> + qemu_spin_unlock(&env_tlb(env)->c.lock);
>
> - CPUIOTLBEntry tmpio, *io = &env->iotlb[mmu_idx][index];
> - CPUIOTLBEntry *vio = &env->iotlb_v[mmu_idx][vidx];
> + CPUIOTLBEntry tmpio, *io =
> &env_tlb(env)->d[mmu_idx].iotlb[index];
> + CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx];
> tmpio = *io; *io = *vio; *vio = tmpio;
> return true;
> }
> diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
> index 1959046343..9bd23f5cae 100644
> --- a/target/arm/translate-a64.c
> +++ b/target/arm/translate-a64.c
> @@ -14163,7 +14163,7 @@ static bool is_guarded_page(CPUARMState *env,
> DisasContext *s)
> * table entry even for that case.
> */
> return (tlb_hit(entry->addr_code, addr) &&
> - env->iotlb[mmu_idx][index].attrs.target_tlb_bit0);
> + env_tlb(env)->d[mmu_idx].iotlb[index].attrs.target_tlb_bit0);
> #endif
> }
>
> diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
> index d57f9e500f..5e6af10faf 100644
> --- a/tcg/aarch64/tcg-target.inc.c
> +++ b/tcg/aarch64/tcg-target.inc.c
> @@ -1451,12 +1451,8 @@ static void add_qemu_ldst_label(TCGContext *s, bool
> is_ld, TCGMemOpIdx oi,
> label->label_ptr[0] = label_ptr;
> }
>
> -/* We expect tlb_mask to be before tlb_table. */
> -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) <
> - offsetof(CPUArchState, tlb_mask));
> -
> /* We expect to use a 24-bit unsigned offset from ENV. */
> -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1])
> +QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_.f[NB_MMU_MODES - 1].table)
> > 0xffffff);
>
> /* Load and compare a TLB entry, emitting the conditional jump to the
> @@ -1467,8 +1463,8 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg
> addr_reg, TCGMemOp opc,
> tcg_insn_unit **label_ptr, int mem_index,
> bool is_read)
> {
> - int mask_ofs = offsetof(CPUArchState, tlb_mask[mem_index]);
> - int table_ofs = offsetof(CPUArchState, tlb_table[mem_index]);
> + int mask_ofs = offsetof(CPUArchState, tlb_.f[mem_index].mask);
> + int table_ofs = offsetof(CPUArchState, tlb_.f[mem_index].table);
> unsigned a_bits = get_alignment_bits(opc);
> unsigned s_bits = opc & MO_SIZE;
> unsigned a_mask = (1u << a_bits) - 1;
> diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c
> index 2245a8aeb9..04c2eebb41 100644
> --- a/tcg/arm/tcg-target.inc.c
> +++ b/tcg/arm/tcg-target.inc.c
> @@ -1235,12 +1235,8 @@ static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg
> argreg,
>
> #define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
>
> -/* We expect tlb_mask to be before tlb_table. */
> -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) <
> - offsetof(CPUArchState, tlb_mask));
> -
> /* We expect to use a 20-bit unsigned offset from ENV. */
> -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1])
> +QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_.f[NB_MMU_MODES - 1].table)
> > 0xfffff);
>
> /* Load and compare a TLB entry, leaving the flags set. Returns the register
> @@ -1251,8 +1247,8 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg
> addrlo, TCGReg addrhi,
> {
> int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read)
> : offsetof(CPUTLBEntry, addr_write));
> - int mask_off = offsetof(CPUArchState, tlb_mask[mem_index]);
> - int table_off = offsetof(CPUArchState, tlb_table[mem_index]);
> + int mask_off = offsetof(CPUArchState, tlb_.f[mem_index].mask);
> + int table_off = offsetof(CPUArchState, tlb_.f[mem_index].table);
> TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0;
> unsigned s_bits = opc & MO_SIZE;
> unsigned a_bits = get_alignment_bits(opc);
> diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
> index e0670e5098..1bd33389c9 100644
> --- a/tcg/i386/tcg-target.inc.c
> +++ b/tcg/i386/tcg-target.inc.c
> @@ -1654,10 +1654,10 @@ static inline void tcg_out_tlb_load(TCGContext *s,
> TCGReg addrlo, TCGReg addrhi,
> TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
>
> tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, r0, TCG_AREG0,
> - offsetof(CPUArchState, tlb_mask[mem_index]));
> + offsetof(CPUArchState, tlb_.f[mem_index].mask));
>
> tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r0, TCG_AREG0,
> - offsetof(CPUArchState, tlb_table[mem_index]));
> + offsetof(CPUArchState, tlb_.f[mem_index].table));
>
> /* If the required alignment is at least as large as the access, simply
> copy the address and mask. For lesser alignments, check that we don't
> diff --git a/tcg/mips/tcg-target.inc.c b/tcg/mips/tcg-target.inc.c
> index 8a92e916dd..b827579317 100644
> --- a/tcg/mips/tcg-target.inc.c
> +++ b/tcg/mips/tcg-target.inc.c
> @@ -1201,14 +1201,6 @@ static int tcg_out_call_iarg_reg2(TCGContext *s, int
> i, TCGReg al, TCGReg ah)
> return i;
> }
>
> -/* We expect tlb_mask to be before tlb_table. */
> -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) <
> - offsetof(CPUArchState, tlb_mask));
> -
> -/* We expect tlb_mask to be "near" tlb_table. */
> -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) -
> - offsetof(CPUArchState, tlb_mask) >= 0x8000);
> -
> /*
> * Perform the tlb comparison operation.
> * The complete host address is placed in BASE.
> @@ -1222,8 +1214,8 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg
> base, TCGReg addrl,
> unsigned s_bits = opc & MO_SIZE;
> unsigned a_bits = get_alignment_bits(opc);
> int mem_index = get_mmuidx(oi);
> - int mask_off = offsetof(CPUArchState, tlb_mask[mem_index]);
> - int table_off = offsetof(CPUArchState, tlb_table[mem_index]);
> + int mask_off = offsetof(CPUArchState, tlb_.f[mem_index].mask);
> + int table_off = offsetof(CPUArchState, tlb_.f[mem_index].mask);
> int add_off = offsetof(CPUTLBEntry, addend);
> int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read)
> : offsetof(CPUTLBEntry, addr_write));
> diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
> index 773690f1d9..1f717745c1 100644
> --- a/tcg/ppc/tcg-target.inc.c
> +++ b/tcg/ppc/tcg-target.inc.c
> @@ -1505,10 +1505,6 @@ static void * const qemu_st_helpers[16] = {
> [MO_BEQ] = helper_be_stq_mmu,
> };
>
> -/* We expect tlb_mask to be before tlb_table. */
> -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) <
> - offsetof(CPUArchState, tlb_mask));
> -
> /* Perform the TLB load and compare. Places the result of the comparison
> in CR7, loads the addend of the TLB into R3, and returns the register
> containing the guest address (zero-extended into R4). Clobbers R0 and
> R2. */
> @@ -1521,8 +1517,8 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp
> opc,
> = (is_read
> ? offsetof(CPUTLBEntry, addr_read)
> : offsetof(CPUTLBEntry, addr_write));
> - int mask_off = offsetof(CPUArchState, tlb_mask[mem_index]);
> - int table_off = offsetof(CPUArchState, tlb_table[mem_index]);
> + int mask_off = offsetof(CPUArchState, tlb_.f[mem_index].mask);
> + int table_off = offsetof(CPUArchState, tlb_.f[mem_index].table);
> TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0;
> unsigned s_bits = opc & MO_SIZE;
> unsigned a_bits = get_alignment_bits(opc);
> diff --git a/tcg/riscv/tcg-target.inc.c b/tcg/riscv/tcg-target.inc.c
> index b785f4acb7..c1f9c784bc 100644
> --- a/tcg/riscv/tcg-target.inc.c
> +++ b/tcg/riscv/tcg-target.inc.c
> @@ -961,14 +961,6 @@ static void * const qemu_st_helpers[16] = {
> /* We don't support oversize guests */
> QEMU_BUILD_BUG_ON(TCG_TARGET_REG_BITS < TARGET_LONG_BITS);
>
> -/* We expect tlb_mask to be before tlb_table. */
> -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) <
> - offsetof(CPUArchState, tlb_mask));
> -
> -/* We expect tlb_mask to be "near" tlb_table. */
> -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) -
> - offsetof(CPUArchState, tlb_mask) >= 0x800);
> -
> static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl,
> TCGReg addrh, TCGMemOpIdx oi,
> tcg_insn_unit **label_ptr, bool is_load)
> @@ -981,8 +973,8 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl,
> int mask_off, table_off;
> TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0;
>
> - mask_off = offsetof(CPUArchState, tlb_mask[mem_index]);
> - table_off = offsetof(CPUArchState, tlb_table[mem_index]);
> + mask_off = offsetof(CPUArchState, tlb_.f[mem_index].mask);
> + table_off = offsetof(CPUArchState, tlb_.f[mem_index].table);
> if (table_off > 0x7ff) {
> int mask_hi = mask_off - sextreg(mask_off, 0, 12);
> int table_hi = table_off - sextreg(table_off, 0, 12);
> diff --git a/tcg/s390/tcg-target.inc.c b/tcg/s390/tcg-target.inc.c
> index 7db90b3bae..3a8794d9bd 100644
> --- a/tcg/s390/tcg-target.inc.c
> +++ b/tcg/s390/tcg-target.inc.c
> @@ -1538,9 +1538,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s,
> TCGMemOp opc, TCGReg data,
> #include "tcg-ldst.inc.c"
>
> /* We're expecting to use a 20-bit signed offset on the tlb memory ops. */
> -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_mask[NB_MMU_MODES - 1])
> - > 0x7ffff);
> -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1])
> +QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_.f[NB_MMU_MODES - 1].table)
> > 0x7ffff);
>
> /* Load and compare a TLB entry, leaving the flags set. Loads the TLB
> @@ -1552,8 +1550,8 @@ static TCGReg tcg_out_tlb_read(TCGContext* s, TCGReg
> addr_reg, TCGMemOp opc,
> unsigned a_bits = get_alignment_bits(opc);
> unsigned s_mask = (1 << s_bits) - 1;
> unsigned a_mask = (1 << a_bits) - 1;
> - int mask_off = offsetof(CPUArchState, tlb_mask[mem_index]);
> - int table_off = offsetof(CPUArchState, tlb_table[mem_index]);
> + int mask_off = offsetof(CPUArchState, tlb_.f[mem_index].mask);
> + int table_off = offsetof(CPUArchState, tlb_.f[mem_index].table);
> int ofs, a_off;
> uint64_t tlb_mask;
>
> diff --git a/tcg/sparc/tcg-target.inc.c b/tcg/sparc/tcg-target.inc.c
> index 7a61839dc1..be10124e11 100644
> --- a/tcg/sparc/tcg-target.inc.c
> +++ b/tcg/sparc/tcg-target.inc.c
> @@ -1074,19 +1074,11 @@ static void tcg_out_nop_fill(tcg_insn_unit *p, int
> count)
> The result of the TLB comparison is in %[ix]cc. The sanitized address
> is in the returned register, maybe %o0. The TLB addend is in %o1. */
>
> -/* We expect tlb_mask to be before tlb_table. */
> -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) <
> - offsetof(CPUArchState, tlb_mask));
> -
> -/* We expect tlb_mask to be "near" tlb_table. */
> -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) -
> - offsetof(CPUArchState, tlb_mask) >= (1 << 13));
> -
> static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index,
> TCGMemOp opc, int which)
> {
> - int mask_off = offsetof(CPUArchState, tlb_mask[mem_index]);
> - int table_off = offsetof(CPUArchState, tlb_table[mem_index]);
> + int mask_off = offsetof(CPUArchState, tlb_.f[mem_index].mask);
> + int table_off = offsetof(CPUArchState, tlb_.f[mem_index].table);
> TCGReg base = TCG_AREG0;
> const TCGReg r0 = TCG_REG_O0;
> const TCGReg r1 = TCG_REG_O1;
> --
> 2.17.1
>
>
- [Qemu-devel] [PATCH 09/35] target/arm: Use env_cpu, env_archcpu, (continued)
- [Qemu-devel] [PATCH 09/35] target/arm: Use env_cpu, env_archcpu, Richard Henderson, 2019/03/23
- [Qemu-devel] [PATCH 11/35] target/hppa: Use env_cpu, env_archcpu, Richard Henderson, 2019/03/23
- [Qemu-devel] [PATCH 10/35] target/cris: Use env_cpu, env_archcpu, Richard Henderson, 2019/03/23
- [Qemu-devel] [PATCH 08/35] target/alpha: Use env_cpu, env_archcpu, Richard Henderson, 2019/03/23
- [Qemu-devel] [PATCH 06/35] cpu: Replace ENV_GET_CPU with env_cpu, Richard Henderson, 2019/03/23
- [Qemu-devel] [PATCH 07/35] cpu: Introduce env_archcpu, Richard Henderson, 2019/03/23
- [Qemu-devel] [PATCH 05/35] cpu: Define ArchCPU, Richard Henderson, 2019/03/23
- [Qemu-devel] [PATCH 03/35] tcg: Create struct CPUTLB, Richard Henderson, 2019/03/23
- Re: [Qemu-devel] [PATCH 03/35] tcg: Create struct CPUTLB,
Alistair Francis <=
- [Qemu-devel] [PATCH 01/35] tcg: Fold CPUTLBWindow into CPUTLBDesc, Richard Henderson, 2019/03/23
- [Qemu-devel] [PATCH 02/35] tcg: Split out target/arch/cpu-param.h, Richard Henderson, 2019/03/23
- [Qemu-devel] [PATCH 04/35] cpu: Define CPUArchState with typedef, Richard Henderson, 2019/03/23
- Re: [Qemu-devel] [PATCH for-4.1 00/35] tcg: Move the softmmu tlb to CPUNegativeOffsetState, no-reply, 2019/03/23