[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [RFC 28/38] cpu-exec: use RCU to perform lockless TB lookup
From: |
Emilio G. Cota |
Subject: |
[Qemu-devel] [RFC 28/38] cpu-exec: use RCU to perform lockless TB lookups |
Date: |
Sun, 23 Aug 2015 20:23:57 -0400 |
Only grab tb_lock when new code has to be generated.
Note that due to the RCU usage we lose the ability to move
recently-found TB's to the beginning of the slot's list.
We could in theory try to do something smart about this,
but given that each CPU has a private tb_jmp_cache, it
might be OK to just leave it alone.
Signed-off-by: Emilio G. Cota <address@hidden>
---
cpu-exec.c | 21 +++++++++-----------
include/exec/exec-all.h | 12 +++++++++---
translate-all.c | 52 ++++++++++++++++++++++++-------------------------
3 files changed, 43 insertions(+), 42 deletions(-)
diff --git a/cpu-exec.c b/cpu-exec.c
index 826ec25..ff08da8 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -24,6 +24,7 @@
#include "qemu/atomic.h"
#include "qemu/timer.h"
#include "exec/tb-hash.h"
+#include "qemu/rcu_queue.h"
#include "qemu/rcu.h"
#if !defined(CONFIG_USER_ONLY)
@@ -261,7 +262,8 @@ static TranslationBlock *tb_find_physical(CPUState *cpu,
uint64_t flags)
{
CPUArchState *env = (CPUArchState *)cpu->env_ptr;
- TranslationBlock *tb, **ptb1;
+ TBPhysHashSlot *slot;
+ TranslationBlock *tb;
unsigned int h;
tb_page_addr_t phys_pc, phys_page1;
target_ulong virt_page2;
@@ -270,12 +272,9 @@ static TranslationBlock *tb_find_physical(CPUState *cpu,
phys_pc = get_page_addr_code(env, pc);
phys_page1 = phys_pc & TARGET_PAGE_MASK;
h = tb_phys_hash_func(phys_pc);
- ptb1 = &tcg_ctx.tb_ctx.tb_phys_hash[h];
- for(;;) {
- tb = atomic_rcu_read(ptb1);
- if (!tb) {
- return NULL;
- }
+ slot = &tcg_ctx.tb_ctx.tb_phys_hash[h];
+
+ QLIST_FOREACH_RCU(tb, &slot->list, slot_node) {
if (tb->pc == pc &&
tb->page_addr[0] == phys_page1 &&
tb->cs_base == cs_base &&
@@ -288,16 +287,14 @@ static TranslationBlock *tb_find_physical(CPUState *cpu,
TARGET_PAGE_SIZE;
phys_page2 = get_page_addr_code(env, virt_page2);
if (tb->page_addr[1] == phys_page2) {
- break;
+ return tb;
}
} else {
- break;
+ return tb;
}
}
- ptb1 = &tb->phys_hash_next;
}
-
- return tb;
+ return NULL;
}
static TranslationBlock *tb_find_slow(CPUState *cpu,
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 7e4aea7..050e820 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -155,8 +155,8 @@ struct TranslationBlock {
#define CF_USE_ICOUNT 0x20000
void *tc_ptr; /* pointer to the translated code */
- /* next matching tb for physical address. */
- struct TranslationBlock *phys_hash_next;
+ /* list node in slot of physically-indexed hash of translation blocks */
+ QLIST_ENTRY(TranslationBlock) slot_node;
/* original tb when cflags has CF_NOCACHE */
struct TranslationBlock *orig_tb;
/* first and second physical page containing code. The lower bit
@@ -183,12 +183,18 @@ struct TranslationBlock {
#include "qemu/thread.h"
+typedef struct TBPhysHashSlot TBPhysHashSlot;
+
+struct TBPhysHashSlot {
+ QLIST_HEAD(, TranslationBlock) list;
+};
+
typedef struct TBContext TBContext;
struct TBContext {
TranslationBlock *tbs;
- TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
+ TBPhysHashSlot tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
int nb_tbs;
/* any access to the tbs or the page table must use this lock */
QemuMutex tb_lock;
diff --git a/translate-all.c b/translate-all.c
index 94adcd0..df65c83 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -60,6 +60,7 @@
#include "exec/cputlb.h"
#include "exec/tb-hash.h"
#include "translate-all.h"
+#include "qemu/rcu_queue.h"
#include "qemu/bitmap.h"
#include "qemu/timer.h"
#include "qemu/aie.h"
@@ -721,6 +722,17 @@ static inline void code_gen_alloc(size_t tb_size)
qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock);
}
+static void tb_ctx_init(void)
+{
+ int i;
+
+ for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) {
+ TBPhysHashSlot *slot = &tcg_ctx.tb_ctx.tb_phys_hash[i];
+
+ QLIST_INIT(&slot->list);
+ }
+}
+
/* Must be called before using the QEMU cpus. 'tb_size' is the size
(in bytes) allocated to the translation buffer. Zero means default
size. */
@@ -731,6 +743,7 @@ void tcg_exec_init(unsigned long tb_size)
tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer;
tcg_register_jit(tcg_ctx.code_gen_buffer, tcg_ctx.code_gen_buffer_size);
page_init();
+ tb_ctx_init();
aie_init();
#if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
/* There's no guest base to take into account, so go ahead and
@@ -878,7 +891,7 @@ void tb_flush(CPUState *cpu)
}
tb_invalidate_all();
- memset(tcg_ctx.tb_ctx.tb_phys_hash, 0,
sizeof(tcg_ctx.tb_ctx.tb_phys_hash));
+ tb_ctx_init();
page_flush_tb();
tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer;
@@ -898,7 +911,9 @@ static void tb_invalidate_check(target_ulong address)
address &= TARGET_PAGE_MASK;
for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) {
- for (tb = tb_ctx.tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next)
{
+ TBPhysHashSlot *slot = &tcg_ctx.tb_ctx.tb_phys_hash[i];
+
+ QLIST_FOREACH_RCU(tb, &slot->list, slot_node) {
if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
address >= tb->pc + tb->size)) {
printf("ERROR invalidate: address=" TARGET_FMT_lx
@@ -919,8 +934,9 @@ static void tb_page_check(void)
int i, flags1, flags2;
for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) {
- for (tb = tcg_ctx.tb_ctx.tb_phys_hash[i]; tb != NULL;
- tb = tb->phys_hash_next) {
+ TBPhysHashSlot *slot = &tcg_ctx.tb_ctx.tb_phys_hash[i];
+
+ QLIST_FOREACH_RCU(tb, &slot->list, slot_node) {
flags1 = page_get_flags(tb->pc);
flags2 = page_get_flags(tb->pc + tb->size - 1);
if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
@@ -933,20 +949,6 @@ static void tb_page_check(void)
#endif
-static inline void tb_hash_remove(TranslationBlock **ptb, TranslationBlock *tb)
-{
- TranslationBlock *tb1;
-
- for (;;) {
- tb1 = *ptb;
- if (tb1 == tb) {
- *ptb = tb1->phys_hash_next;
- break;
- }
- ptb = &tb1->phys_hash_next;
- }
-}
-
static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
{
TranslationBlock *tb1;
@@ -1029,16 +1031,13 @@ void tb_phys_invalidate(TranslationBlock *tb,
tb_page_addr_t page_addr)
{
CPUState *cpu;
PageDesc *p;
- unsigned int h, n1;
- tb_page_addr_t phys_pc;
+ unsigned int n1;
TranslationBlock *tb1, *tb2;
/* Now remove the TB from the hash list, so that tb_find_slow
* cannot find it anymore.
*/
- phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
- h = tb_phys_hash_func(phys_pc);
- tb_hash_remove(&tcg_ctx.tb_ctx.tb_phys_hash[h], tb);
+ QLIST_REMOVE_RCU(tb, slot_node);
/* remove the TB from the page list */
if (tb->page_addr[0] != page_addr) {
@@ -1485,13 +1484,12 @@ static void tb_link_page(TranslationBlock *tb,
tb_page_addr_t phys_pc,
tb_page_addr_t phys_page2)
{
unsigned int h;
- TranslationBlock **ptb;
+ TBPhysHashSlot *slot;
/* add in the physical hash table */
h = tb_phys_hash_func(phys_pc);
- ptb = &tcg_ctx.tb_ctx.tb_phys_hash[h];
- tb->phys_hash_next = *ptb;
- atomic_rcu_set(ptb, tb);
+ slot = &tcg_ctx.tb_ctx.tb_phys_hash[h];
+ QLIST_INSERT_HEAD_RCU(&slot->list, tb, slot_node);
/* add in the page list */
tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
--
1.9.1
- Re: [Qemu-devel] [RFC 31/38] cpu: protect l1_map with tb_lock in full-system mode, (continued)
- [Qemu-devel] [RFC 27/38] cpu-exec: convert tb_invalidated_flag into a per-TB flag, Emilio G. Cota, 2015/08/23
- [Qemu-devel] [RFC 33/38] cpu: introduce cpu_tcg_sched_work to run work while other CPUs sleep, Emilio G. Cota, 2015/08/23
- [Qemu-devel] [RFC 21/38] target-i386: emulate atomic instructions + barriers using AIE, Emilio G. Cota, 2015/08/23
- [Qemu-devel] [RFC 38/38] Revert "target-i386: yield to another VCPU on PAUSE", Emilio G. Cota, 2015/08/23
- [Qemu-devel] [RFC 37/38] cpus: remove async_run_safe_work_on_cpu, Emilio G. Cota, 2015/08/23
- [Qemu-devel] [RFC 32/38] cpu list: convert to RCU QLIST, Emilio G. Cota, 2015/08/23
- [Qemu-devel] [RFC 28/38] cpu-exec: use RCU to perform lockless TB lookups,
Emilio G. Cota <=
- Re: [Qemu-devel] [RFC 00/38] MTTCG: i386, user+system mode, Paolo Bonzini, 2015/08/24
- Re: [Qemu-devel] [RFC 00/38] MTTCG: i386, user+system mode, Artyom Tarasenko, 2015/08/24