[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH 2/4] exec: [tcg] Use multiple physical TB caches
From: |
Lluís Vilanova |
Subject: |
[Qemu-devel] [PATCH 2/4] exec: [tcg] Use multiple physical TB caches |
Date: |
Wed, 14 Sep 2016 23:23:28 +0200 |
User-agent: |
StGit/0.17.1-dirty |
The physical TB cache is split into 2^E caches, where E is the number of
events with the "vcpu" and without the "disable" properties.
The virtual TB cache on each vCPU uses a (potentially) different
physical TB cache.
This is later exploited to support different tracing event states on a
per-vCPU basis.
Signed-off-by: Lluís Vilanova <address@hidden>
---
cpu-exec.c | 5 ++++
include/exec/exec-all.h | 6 +++++
include/exec/tb-context.h | 2 +-
include/qom/cpu.h | 4 +++-
qom/cpu.c | 1 +
translate-all.c | 51 +++++++++++++++++++++++++++++++++++++--------
translate-all.h | 17 +++++++++++++++
translate-all.inc.h | 13 +++++++++++
8 files changed, 87 insertions(+), 12 deletions(-)
create mode 100644 translate-all.inc.h
diff --git a/cpu-exec.c b/cpu-exec.c
index 5d9710a..7b2d8c6 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -33,6 +33,7 @@
#include "hw/i386/apic.h"
#endif
#include "sysemu/replay.h"
+#include "translate-all.h"
/* -icount align implementation. */
@@ -267,6 +268,7 @@ static TranslationBlock *tb_find_physical(CPUState *cpu,
tb_page_addr_t phys_pc;
struct tb_desc desc;
uint32_t h;
+ struct qht *qht;
desc.env = (CPUArchState *)cpu->env_ptr;
desc.cs_base = cs_base;
@@ -275,7 +277,8 @@ static TranslationBlock *tb_find_physical(CPUState *cpu,
phys_pc = get_page_addr_code(desc.env, pc);
desc.phys_page1 = phys_pc & TARGET_PAGE_MASK;
h = tb_hash_func(phys_pc, pc, flags);
- return qht_lookup(&tcg_ctx.tb_ctx.htable, tb_cmp, &desc, h);
+ qht = tb_caches_get(&tcg_ctx.tb_ctx, cpu->tb_cache_idx);
+ return qht_lookup(qht, tb_cmp, &desc, h);
}
static TranslationBlock *tb_find_slow(CPUState *cpu,
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index e2124dc..4ae04f6 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -211,6 +211,10 @@ static inline void tlb_flush_by_mmuidx(CPUState *cpu, ...)
#define USE_DIRECT_JUMP
#endif
+/**
+ * TranslationBlock:
+ * @tb_cache_idx: Index of physical TB cache where this TB has been allocated.
+ */
struct TranslationBlock {
target_ulong pc; /* simulated PC corresponding to this block (EIP + CS
base) */
target_ulong cs_base; /* CS base for this block */
@@ -262,6 +266,8 @@ struct TranslationBlock {
*/
uintptr_t jmp_list_next[2];
uintptr_t jmp_list_first;
+
+ DECLARE_BITMAP(tb_cache_idx, TRACE_VCPU_EVENT_COUNT);
};
void tb_free(TranslationBlock *tb);
diff --git a/include/exec/tb-context.h b/include/exec/tb-context.h
index dce95d9..7728904 100644
--- a/include/exec/tb-context.h
+++ b/include/exec/tb-context.h
@@ -32,7 +32,7 @@ typedef struct TBContext TBContext;
struct TBContext {
TranslationBlock *tbs;
- struct qht htable;
+ struct qht *htables;
int nb_tbs;
/* any access to the tbs or the page table must use this lock */
QemuMutex tb_lock;
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index ce0c406..d870810 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -282,6 +282,7 @@ struct qemu_work_item {
* @kvm_fd: vCPU file descriptor for KVM.
* @work_mutex: Lock to prevent multiple access to queued_work_*.
* @queued_work_first: First asynchronous work pending.
+ * @tb_cache_idx: Index of current TB cache.
* @trace_dstate: Dynamic tracing state of events for this vCPU (bitmask).
*
* State of one CPU core or thread.
@@ -350,7 +351,8 @@ struct CPUState {
struct KVMState *kvm_state;
struct kvm_run *kvm_run;
- /* Used for events with 'vcpu' and *without* the 'disabled' properties */
+ /* Used for events with 'vcpu' and *without* the 'disable' properties */
+ DECLARE_BITMAP(tb_cache_idx, TRACE_VCPU_EVENT_COUNT);
DECLARE_BITMAP(trace_dstate, TRACE_VCPU_EVENT_COUNT);
/* TODO Move common fields from CPUArchState here. */
diff --git a/qom/cpu.c b/qom/cpu.c
index 2553247..2225103 100644
--- a/qom/cpu.c
+++ b/qom/cpu.c
@@ -345,6 +345,7 @@ static void cpu_common_initfn(Object *obj)
qemu_mutex_init(&cpu->work_mutex);
QTAILQ_INIT(&cpu->breakpoints);
QTAILQ_INIT(&cpu->watchpoints);
+ bitmap_zero(cpu->tb_cache_idx, TRACE_VCPU_EVENT_COUNT);
bitmap_zero(cpu->trace_dstate, TRACE_VCPU_EVENT_COUNT);
}
diff --git a/translate-all.c b/translate-all.c
index ebd9fa0..c864eee 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -733,11 +733,22 @@ static inline void code_gen_alloc(size_t tb_size)
qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock);
}
+/*
+ * Ensure bitmaps can be used as indexes.
+ */
+void *__error__too_many_vcpu_events[
+ (TRACE_VCPU_EVENT_COUNT + 1) <= BITS_PER_LONG ? 0 : -1];
+
static void tb_htable_init(void)
{
+ int cache;
unsigned int mode = QHT_MODE_AUTO_RESIZE;
- qht_init(&tcg_ctx.tb_ctx.htable, CODE_GEN_HTABLE_SIZE, mode);
+ tcg_ctx.tb_ctx.htables = g_malloc(
+ sizeof(tcg_ctx.tb_ctx.htables[0]) * tb_caches_count());
+ for (cache = 0; cache < tb_caches_count(); cache++) {
+ qht_init(&tcg_ctx.tb_ctx.htables[cache], CODE_GEN_HTABLE_SIZE, mode);
+ }
}
/* Must be called before using the QEMU cpus. 'tb_size' is the size
@@ -834,6 +845,8 @@ static void page_flush_tb(void)
/* XXX: tb_flush is currently not thread safe */
void tb_flush(CPUState *cpu)
{
+ int i;
+
if (!tcg_enabled()) {
return;
}
@@ -854,7 +867,9 @@ void tb_flush(CPUState *cpu)
tb_flush_jmp_cache_all(cpu);
}
- qht_reset_size(&tcg_ctx.tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
+ for (i = 0; i < tb_caches_count(); i++) {
+ qht_reset_size(&tcg_ctx.tb_ctx.htables[i], CODE_GEN_HTABLE_SIZE);
+ }
page_flush_tb();
tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer;
@@ -879,8 +894,12 @@ do_tb_invalidate_check(struct qht *ht, void *p, uint32_t
hash, void *userp)
static void tb_invalidate_check(target_ulong address)
{
+ int i;
+
address &= TARGET_PAGE_MASK;
- qht_iter(&tcg_ctx.tb_ctx.htable, do_tb_invalidate_check, &address);
+ for (i = 0; i < tb_caches_count(); i++) {
+ qht_iter(&tcg_ctx.tb_ctx.htables[i], do_tb_invalidate_check, &address);
+ }
}
static void
@@ -900,7 +919,10 @@ do_tb_page_check(struct qht *ht, void *p, uint32_t hash,
void *userp)
/* verify that all the pages have correct rights for code */
static void tb_page_check(void)
{
- qht_iter(&tcg_ctx.tb_ctx.htable, do_tb_page_check, NULL);
+ int i;
+ for (i = 0; i < tb_caches_count(); i++) {
+ qht_iter(&tcg_ctx.tb_ctx.htables[i], do_tb_page_check, NULL);
+ }
}
#endif
@@ -987,12 +1009,14 @@ void tb_phys_invalidate(TranslationBlock *tb,
tb_page_addr_t page_addr)
CPUState *cpu;
PageDesc *p;
uint32_t h;
+ struct qht *qht;
tb_page_addr_t phys_pc;
/* remove the TB from the hash list */
phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
h = tb_hash_func(phys_pc, tb->pc, tb->flags);
- qht_remove(&tcg_ctx.tb_ctx.htable, tb, h);
+ qht = tb_caches_get(&tcg_ctx.tb_ctx, tb->tb_cache_idx);
+ qht_remove(qht, tb, h);
/* remove the TB from the page list */
if (tb->page_addr[0] != page_addr) {
@@ -1122,10 +1146,12 @@ static void tb_link_page(TranslationBlock *tb,
tb_page_addr_t phys_pc,
tb_page_addr_t phys_page2)
{
uint32_t h;
+ struct qht *qht;
/* add in the hash table */
h = tb_hash_func(phys_pc, tb->pc, tb->flags);
- qht_insert(&tcg_ctx.tb_ctx.htable, tb, h);
+ qht = tb_caches_get(&tcg_ctx.tb_ctx, tb->tb_cache_idx);
+ qht_insert(qht, tb, h);
/* add in the page list */
tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
@@ -1175,6 +1201,8 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
tb->cs_base = cs_base;
tb->flags = flags;
tb->cflags = cflags;
+ bitmap_copy(tb->tb_cache_idx, ENV_GET_CPU(env)->tb_cache_idx,
+ TRACE_VCPU_EVENT_COUNT);
#ifdef CONFIG_PROFILER
tcg_ctx.tb_count1++; /* includes aborted translations because of
@@ -1636,6 +1664,8 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
pc = tb->pc;
cs_base = tb->cs_base;
flags = tb->flags;
+ /* XXX: It is OK to invalidate only this TB, as this is the one triggering
+ * the memory access */
tb_phys_invalidate(tb, -1);
if (tb->cflags & CF_NOCACHE) {
if (tb->orig_tb) {
@@ -1715,6 +1745,7 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
int direct_jmp_count, direct_jmp2_count, cross_page;
TranslationBlock *tb;
struct qht_stats hst;
+ int cache;
target_code_size = 0;
max_target_code_size = 0;
@@ -1766,9 +1797,11 @@ void dump_exec_info(FILE *f, fprintf_function
cpu_fprintf)
tcg_ctx.tb_ctx.nb_tbs ? (direct_jmp2_count * 100) /
tcg_ctx.tb_ctx.nb_tbs : 0);
- qht_statistics_init(&tcg_ctx.tb_ctx.htable, &hst);
- print_qht_statistics(f, cpu_fprintf, hst);
- qht_statistics_destroy(&hst);
+ for (cache = 0; cache < tb_caches_count(); cache++) {
+ qht_statistics_init(&tcg_ctx.tb_ctx.htables[cache], &hst);
+ print_qht_statistics(f, cpu_fprintf, hst);
+ qht_statistics_destroy(&hst);
+ }
cpu_fprintf(f, "\nStatistics:\n");
cpu_fprintf(f, "TB flush count %d\n", tcg_ctx.tb_ctx.tb_flush_count);
diff --git a/translate-all.h b/translate-all.h
index ba8e4d6..d39bf32 100644
--- a/translate-all.h
+++ b/translate-all.h
@@ -20,7 +20,21 @@
#define TRANSLATE_ALL_H
#include "exec/exec-all.h"
+#include "qemu/typedefs.h"
+/**
+ * tb_caches_count:
+ *
+ * Number of TB caches.
+ */
+static size_t tb_caches_count(void);
+
+/**
+ * tb_caches_get:
+ *
+ * Get the TB cache for the given bitmap index.
+ */
+static struct qht *tb_caches_get(TBContext *tb_ctx, unsigned long *bitmap);
/* translate-all.c */
void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len);
@@ -33,4 +47,7 @@ void tb_check_watchpoint(CPUState *cpu);
int page_unprotect(target_ulong address, uintptr_t pc);
#endif
+
+#include "translate-all.inc.h"
+
#endif /* TRANSLATE_ALL_H */
diff --git a/translate-all.inc.h b/translate-all.inc.h
new file mode 100644
index 0000000..c60a48e
--- /dev/null
+++ b/translate-all.inc.h
@@ -0,0 +1,13 @@
+/* Inline implementations for translate-all.h */
+
+static inline size_t tb_caches_count(void)
+{
+ return 1ULL << TRACE_VCPU_EVENT_COUNT;
+}
+
+static inline struct qht *tb_caches_get(TBContext *tb_ctx,
+ unsigned long *bitmap)
+{
+ unsigned long idx = *bitmap;
+ return &tb_ctx->htables[idx];
+}