[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [RFC PATCH V7 07/19] protect TBContext with tb_lock.
From: |
fred . konrad |
Subject: |
[Qemu-devel] [RFC PATCH V7 07/19] protect TBContext with tb_lock. |
Date: |
Mon, 10 Aug 2015 17:27:05 +0200 |
From: KONRAD Frederic <address@hidden>
This protects TBContext with tb_lock to make tb_* thread safe.
We can still have issue with tb_flush in case of multithread TCG:
An other CPU can be executing code during a flush.
This can be fixed later by making all other TCG thread exiting before calling
tb_flush().
tb_find_slow is separated into tb_find_slow and tb_find_physical as the whole
tb_find_slow doesn't require to lock the tb.
Signed-off-by: KONRAD Frederic <address@hidden>
Changes:
V6 -> V7:
* Drop a tb_lock in already locked restore_state_to_opc.
V5 -> V6:
* Drop a tb_lock arround tb_find_fast in cpu-exec.c.
---
cpu-exec.c | 58 +++++++++++++-------
include/exec/exec-all.h | 1 +
target-arm/translate.c | 3 ++
tcg/tcg.h | 14 ++++-
translate-all.c | 137 +++++++++++++++++++++++++++++++++++++-----------
5 files changed, 162 insertions(+), 51 deletions(-)
diff --git a/cpu-exec.c b/cpu-exec.c
index f3358a9..a012e9d 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -131,6 +131,8 @@ static void init_delay_params(SyncClocks *sc, const
CPUState *cpu)
void cpu_loop_exit(CPUState *cpu)
{
cpu->current_tb = NULL;
+ /* Release those mutex before long jump so other thread can work. */
+ tb_lock_reset();
siglongjmp(cpu->jmp_env, 1);
}
@@ -143,6 +145,8 @@ void cpu_resume_from_signal(CPUState *cpu, void *puc)
/* XXX: restore cpu registers saved in host registers */
cpu->exception_index = -1;
+ /* Release those mutex before long jump so other thread can work. */
+ tb_lock_reset();
siglongjmp(cpu->jmp_env, 1);
}
@@ -253,10 +257,8 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
tb_free(tb);
}
-static TranslationBlock *tb_find_slow(CPUState *cpu,
- target_ulong pc,
- target_ulong cs_base,
- uint64_t flags)
+static TranslationBlock *tb_find_physical(CPUState *cpu, target_ulong pc,
+ target_ulong cs_base, uint64_t flags)
{
CPUArchState *env = (CPUArchState *)cpu->env_ptr;
TranslationBlock *tb, **ptb1;
@@ -273,8 +275,9 @@ static TranslationBlock *tb_find_slow(CPUState *cpu,
ptb1 = &tcg_ctx.tb_ctx.tb_phys_hash[h];
for(;;) {
tb = *ptb1;
- if (!tb)
- goto not_found;
+ if (!tb) {
+ return tb;
+ }
if (tb->pc == pc &&
tb->page_addr[0] == phys_page1 &&
tb->cs_base == cs_base &&
@@ -282,28 +285,42 @@ static TranslationBlock *tb_find_slow(CPUState *cpu,
/* check next page if needed */
if (tb->page_addr[1] != -1) {
tb_page_addr_t phys_page2;
-
virt_page2 = (pc & TARGET_PAGE_MASK) +
TARGET_PAGE_SIZE;
phys_page2 = get_page_addr_code(env, virt_page2);
- if (tb->page_addr[1] == phys_page2)
- goto found;
+ if (tb->page_addr[1] == phys_page2) {
+ return tb;
+ }
} else {
- goto found;
+ return tb;
}
}
ptb1 = &tb->phys_hash_next;
}
- not_found:
- /* if no translated code available, then translate it now */
- tb = tb_gen_code(cpu, pc, cs_base, flags, 0);
-
- found:
- /* Move the last found TB to the head of the list */
- if (likely(*ptb1)) {
- *ptb1 = tb->phys_hash_next;
- tb->phys_hash_next = tcg_ctx.tb_ctx.tb_phys_hash[h];
- tcg_ctx.tb_ctx.tb_phys_hash[h] = tb;
+ return tb;
+}
+
+static TranslationBlock *tb_find_slow(CPUState *cpu, target_ulong pc,
+ target_ulong cs_base, uint64_t flags)
+{
+ /*
+ * First try to get the tb if we don't find it we need to lock and compile
+ * it.
+ */
+ TranslationBlock *tb;
+
+ tb = tb_find_physical(cpu, pc, cs_base, flags);
+ if (!tb) {
+ tb_lock();
+ /*
+ * Retry to get the TB in case a CPU just translate it to avoid having
+ * duplicated TB in the pool.
+ */
+ tb = tb_find_physical(cpu, pc, cs_base, flags);
+ if (!tb) {
+ tb = tb_gen_code(cpu, pc, cs_base, flags, 0);
+ }
+ tb_unlock();
}
/* we add the TB in the virtual pc hash table */
cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb;
@@ -326,6 +343,7 @@ static inline TranslationBlock *tb_find_fast(CPUState *cpu)
tb->flags != flags)) {
tb = tb_find_slow(cpu, pc, cs_base, flags);
}
+
return tb;
}
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 55a6ff2..9f1c1cb 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -74,6 +74,7 @@ typedef struct TranslationBlock TranslationBlock;
void gen_intermediate_code(CPUArchState *env, struct TranslationBlock *tb);
void gen_intermediate_code_pc(CPUArchState *env, struct TranslationBlock *tb);
+/* Called with tb_lock held. */
void restore_state_to_opc(CPUArchState *env, struct TranslationBlock *tb,
int pc_pos);
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 69ac18c..960c75e 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -11166,6 +11166,8 @@ static inline void
gen_intermediate_code_internal(ARMCPU *cpu,
dc->tb = tb;
+ tb_lock();
+
dc->is_jmp = DISAS_NEXT;
dc->pc = pc_start;
dc->singlestep_enabled = cs->singlestep_enabled;
@@ -11506,6 +11508,7 @@ done_generating:
tb->size = dc->pc - pc_start;
tb->icount = num_insns;
}
+ tb_unlock();
}
void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb)
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 231a781..1932323 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -590,21 +590,33 @@ static inline bool tcg_op_buf_full(void)
/* pool based memory allocation */
+/* tb_lock must be help for tcg_malloc_internal. */
void *tcg_malloc_internal(TCGContext *s, int size);
+
void tcg_pool_reset(TCGContext *s);
void tcg_pool_delete(TCGContext *s);
+void tb_lock(void);
+void tb_unlock(void);
+void tb_lock_reset(void);
+
static inline void *tcg_malloc(int size)
{
TCGContext *s = &tcg_ctx;
uint8_t *ptr, *ptr_end;
+ void *ret;
+
+ tb_lock();
size = (size + sizeof(long) - 1) & ~(sizeof(long) - 1);
ptr = s->pool_cur;
ptr_end = ptr + size;
if (unlikely(ptr_end > s->pool_end)) {
- return tcg_malloc_internal(&tcg_ctx, size);
+ ret = tcg_malloc_internal(&tcg_ctx, size);
+ tb_unlock();
+ return ret;
} else {
s->pool_cur = ptr_end;
+ tb_unlock();
return ptr;
}
}
diff --git a/translate-all.c b/translate-all.c
index 60a3d8b..046565c 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -129,6 +129,34 @@ static void *l1_map[V_L1_SIZE];
/* code generation context */
TCGContext tcg_ctx;
+/* translation block context */
+__thread volatile int have_tb_lock;
+
+void tb_lock(void)
+{
+ if (!have_tb_lock) {
+ qemu_mutex_lock(&tcg_ctx.tb_ctx.tb_lock);
+ }
+ have_tb_lock++;
+}
+
+void tb_unlock(void)
+{
+ assert(have_tb_lock > 0);
+ have_tb_lock--;
+ if (!have_tb_lock) {
+ qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock);
+ }
+}
+
+void tb_lock_reset(void)
+{
+ if (have_tb_lock) {
+ qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock);
+ }
+ have_tb_lock = 0;
+}
+
static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
tb_page_addr_t phys_page2);
static TranslationBlock *tb_find_pc(uintptr_t tc_ptr);
@@ -217,6 +245,7 @@ static int cpu_restore_state_from_tb(CPUState *cpu,
TranslationBlock *tb,
#ifdef CONFIG_PROFILER
ti = profile_getclock();
#endif
+ tb_lock();
tcg_func_start(s);
gen_intermediate_code_pc(env, tb);
@@ -230,8 +259,10 @@ static int cpu_restore_state_from_tb(CPUState *cpu,
TranslationBlock *tb,
/* find opc index corresponding to search_pc */
tc_ptr = (uintptr_t)tb->tc_ptr;
- if (searched_pc < tc_ptr)
+ if (searched_pc < tc_ptr) {
+ tb_unlock();
return -1;
+ }
s->tb_next_offset = tb->tb_next_offset;
#ifdef USE_DIRECT_JUMP
@@ -243,8 +274,10 @@ static int cpu_restore_state_from_tb(CPUState *cpu,
TranslationBlock *tb,
#endif
j = tcg_gen_code_search_pc(s, (tcg_insn_unit *)tc_ptr,
searched_pc - tc_ptr);
- if (j < 0)
+ if (j < 0) {
+ tb_unlock();
return -1;
+ }
/* now find start of instruction before */
while (s->gen_opc_instr_start[j] == 0) {
j--;
@@ -257,6 +290,8 @@ static int cpu_restore_state_from_tb(CPUState *cpu,
TranslationBlock *tb,
s->restore_time += profile_getclock() - ti;
s->restore_count++;
#endif
+
+ tb_unlock();
return 0;
}
@@ -675,6 +710,7 @@ static inline void code_gen_alloc(size_t tb_size)
CODE_GEN_AVG_BLOCK_SIZE;
tcg_ctx.tb_ctx.tbs =
g_malloc(tcg_ctx.code_gen_max_blocks * sizeof(TranslationBlock));
+ qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock);
}
/* Must be called before using the QEMU cpus. 'tb_size' is the size
@@ -699,16 +735,22 @@ bool tcg_enabled(void)
return tcg_ctx.code_gen_buffer != NULL;
}
-/* Allocate a new translation block. Flush the translation buffer if
- too many translation blocks or too much generated code. */
+/*
+ * Allocate a new translation block. Flush the translation buffer if
+ * too many translation blocks or too much generated code.
+ * tb_alloc is not thread safe but tb_gen_code is protected by a mutex so this
+ * function is called only by one thread.
+ */
static TranslationBlock *tb_alloc(target_ulong pc)
{
- TranslationBlock *tb;
+ TranslationBlock *tb = NULL;
if (tcg_ctx.tb_ctx.nb_tbs >= tcg_ctx.code_gen_max_blocks ||
(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer) >=
tcg_ctx.code_gen_buffer_max_size) {
- return NULL;
+ tb = &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs++];
+ tb->pc = pc;
+ tb->cflags = 0;
}
tb = &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs++];
tb->pc = pc;
@@ -721,11 +763,16 @@ void tb_free(TranslationBlock *tb)
/* In practice this is mostly used for single use temporary TB
Ignore the hard cases and just back up if this TB happens to
be the last one generated. */
+
+ tb_lock();
+
if (tcg_ctx.tb_ctx.nb_tbs > 0 &&
tb == &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs - 1]) {
tcg_ctx.code_gen_ptr = tb->tc_ptr;
tcg_ctx.tb_ctx.nb_tbs--;
}
+
+ tb_unlock();
}
static inline void invalidate_page_bitmap(PageDesc *p)
@@ -774,6 +821,8 @@ static void page_flush_tb(void)
/* XXX: tb_flush is currently not thread safe */
void tb_flush(CPUState *cpu)
{
+ tb_lock();
+
#if defined(DEBUG_FLUSH)
printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
(unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer),
@@ -798,6 +847,8 @@ void tb_flush(CPUState *cpu)
/* XXX: flush processor icache at this point if cache flush is
expensive */
tcg_ctx.tb_ctx.tb_flush_count++;
+
+ tb_unlock();
}
#ifdef DEBUG_TB_CHECK
@@ -807,6 +858,8 @@ static void tb_invalidate_check(target_ulong address)
TranslationBlock *tb;
int i;
+ tb_lock();
+
address &= TARGET_PAGE_MASK;
for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) {
for (tb = tb_ctx.tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next)
{
@@ -818,6 +871,8 @@ static void tb_invalidate_check(target_ulong address)
}
}
}
+
+ tb_unlock();
}
/* verify that all the pages have correct rights for code */
@@ -826,6 +881,8 @@ static void tb_page_check(void)
TranslationBlock *tb;
int i, flags1, flags2;
+ tb_lock();
+
for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) {
for (tb = tcg_ctx.tb_ctx.tb_phys_hash[i]; tb != NULL;
tb = tb->phys_hash_next) {
@@ -837,6 +894,8 @@ static void tb_page_check(void)
}
}
}
+
+ tb_unlock();
}
#endif
@@ -917,6 +976,8 @@ void tb_phys_invalidate(TranslationBlock *tb,
tb_page_addr_t page_addr)
tb_page_addr_t phys_pc;
TranslationBlock *tb1, *tb2;
+ tb_lock();
+
/* remove the TB from the hash list */
phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
h = tb_phys_hash_func(phys_pc);
@@ -964,6 +1025,7 @@ void tb_phys_invalidate(TranslationBlock *tb,
tb_page_addr_t page_addr)
tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
tcg_ctx.tb_ctx.tb_phys_invalidate_count++;
+ tb_unlock();
}
static void build_page_bitmap(PageDesc *p)
@@ -1005,6 +1067,8 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
target_ulong virt_page2;
int code_gen_size;
+ tb_lock();
+
phys_pc = get_page_addr_code(env, pc);
if (use_icount) {
cflags |= CF_USE_ICOUNT;
@@ -1033,6 +1097,8 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
phys_page2 = get_page_addr_code(env, virt_page2);
}
tb_link_page(tb, phys_pc, phys_page2);
+
+ tb_unlock();
return tb;
}
@@ -1331,13 +1397,15 @@ static inline void tb_alloc_page(TranslationBlock *tb,
}
/* add a new TB and link it to the physical page tables. phys_page2 is
- (-1) to indicate that only one page contains the TB. */
+ * (-1) to indicate that only one page contains the TB. */
static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
tb_page_addr_t phys_page2)
{
unsigned int h;
TranslationBlock **ptb;
+ tb_lock();
+
/* Grab the mmap lock to stop another thread invalidating this TB
before we are done. */
mmap_lock();
@@ -1371,6 +1439,8 @@ static void tb_link_page(TranslationBlock *tb,
tb_page_addr_t phys_pc,
tb_page_check();
#endif
mmap_unlock();
+
+ tb_unlock();
}
/* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
@@ -1379,31 +1449,34 @@ static TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
{
int m_min, m_max, m;
uintptr_t v;
- TranslationBlock *tb;
-
- if (tcg_ctx.tb_ctx.nb_tbs <= 0) {
- return NULL;
- }
- if (tc_ptr < (uintptr_t)tcg_ctx.code_gen_buffer ||
- tc_ptr >= (uintptr_t)tcg_ctx.code_gen_ptr) {
- return NULL;
- }
- /* binary search (cf Knuth) */
- m_min = 0;
- m_max = tcg_ctx.tb_ctx.nb_tbs - 1;
- while (m_min <= m_max) {
- m = (m_min + m_max) >> 1;
- tb = &tcg_ctx.tb_ctx.tbs[m];
- v = (uintptr_t)tb->tc_ptr;
- if (v == tc_ptr) {
- return tb;
- } else if (tc_ptr < v) {
- m_max = m - 1;
- } else {
- m_min = m + 1;
+ TranslationBlock *tb = NULL;
+
+ tb_lock();
+
+ if ((tcg_ctx.tb_ctx.nb_tbs > 0)
+ && (tc_ptr >= (uintptr_t)tcg_ctx.code_gen_buffer &&
+ tc_ptr < (uintptr_t)tcg_ctx.code_gen_ptr)) {
+ /* binary search (cf Knuth) */
+ m_min = 0;
+ m_max = tcg_ctx.tb_ctx.nb_tbs - 1;
+ while (m_min <= m_max) {
+ m = (m_min + m_max) >> 1;
+ tb = &tcg_ctx.tb_ctx.tbs[m];
+ v = (uintptr_t)tb->tc_ptr;
+ if (v == tc_ptr) {
+ tb_unlock();
+ return tb;
+ } else if (tc_ptr < v) {
+ m_max = m - 1;
+ } else {
+ m_min = m + 1;
+ }
}
+ tb = &tcg_ctx.tb_ctx.tbs[m_max];
}
- return &tcg_ctx.tb_ctx.tbs[m_max];
+
+ tb_unlock();
+ return tb;
}
#if !defined(CONFIG_USER_ONLY)
@@ -1565,6 +1638,8 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
int direct_jmp_count, direct_jmp2_count, cross_page;
TranslationBlock *tb;
+ tb_lock();
+
target_code_size = 0;
max_target_code_size = 0;
cross_page = 0;
@@ -1620,6 +1695,8 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
tcg_ctx.tb_ctx.tb_phys_invalidate_count);
cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
tcg_dump_info(f, cpu_fprintf);
+
+ tb_unlock();
}
void dump_opcount_info(FILE *f, fprintf_function cpu_fprintf)
--
1.9.0
- [Qemu-devel] [RFC PATCH V7 02/19] cpus: add tcg_exec_flag., (continued)
- [Qemu-devel] [RFC PATCH V7 03/19] cpus: introduce async_run_safe_work_on_cpu., fred . konrad, 2015/08/10
- [Qemu-devel] [RFC PATCH V7 05/19] remove unused spinlock., fred . konrad, 2015/08/10
- [Qemu-devel] [RFC PATCH V7 06/19] add support for spin lock on POSIX systems exclusively, fred . konrad, 2015/08/10
- [Qemu-devel] [RFC PATCH V7 04/19] replace spinlock by QemuMutex., fred . konrad, 2015/08/10
- [Qemu-devel] [RFC PATCH V7 07/19] protect TBContext with tb_lock.,
fred . konrad <=
- Re: [Qemu-devel] [RFC PATCH V7 07/19] protect TBContext with tb_lock., Frederic Konrad, 2015/08/11
- Re: [Qemu-devel] [RFC PATCH V7 07/19] protect TBContext with tb_lock., Paolo Bonzini, 2015/08/11
- Re: [Qemu-devel] [RFC PATCH V7 07/19] protect TBContext with tb_lock., Peter Maydell, 2015/08/11
- Re: [Qemu-devel] [RFC PATCH V7 07/19] protect TBContext with tb_lock., Paolo Bonzini, 2015/08/11
Re: [Qemu-devel] [RFC PATCH V7 07/19] protect TBContext with tb_lock., Frederic Konrad, 2015/08/12