[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [RFC 08/10] Drop global lock during TCG code execution
From: |
fred . konrad |
Subject: |
[Qemu-devel] [RFC 08/10] Drop global lock during TCG code execution |
Date: |
Fri, 16 Jan 2015 18:19:55 +0100 |
From: Jan Kiszka <address@hidden>
This finally allows TCG to benefit from the iothread introduction: Drop
the global mutex while running pure TCG CPU code. Reacquire the lock
when entering MMIO or PIO emulation, or when leaving the TCG loop.
We have to revert a few optimization for the current TCG threading
model, namely kicking the TCG thread in qemu_mutex_lock_iothread and not
kicking it in qemu_cpu_kick. We also need to disable RAM block
reordering until we have a more efficient locking mechanism at hand.
I'm pretty sure some cases are still broken, definitely SMP (we no
longer perform round-robin scheduling "by chance"). Still, a Linux x86
UP guest and my Musicpal ARM model boot fine here. These numbers
demonstrate where we gain something:
20338 jan 20 0 331m 75m 6904 R 99 0.9 0:50.95 qemu-system-arm
20337 jan 20 0 331m 75m 6904 S 20 0.9 0:26.50 qemu-system-arm
The guest CPU was fully loaded, but the iothread could still run mostly
independent on a second core. Without the patch we don't get beyond
32206 jan 20 0 330m 73m 7036 R 82 0.9 1:06.00 qemu-system-arm
32204 jan 20 0 330m 73m 7036 S 21 0.9 0:17.03 qemu-system-arm
We don't benefit significantly, though, when the guest is not fully
loading a host CPU.
Note that this patch depends on
http://thread.gmane.org/gmane.comp.emulators.qemu/118657
Changes from Fred Konrad:
* Rebase on the current HEAD.
* Fixes a deadlock in qemu_devices_reset().
---
cpus.c | 19 +++++++------------
cputlb.c | 5 +++++
exec.c | 25 +++++++++++++++++++++++++
softmmu_template.h | 6 ++++++
target-i386/misc_helper.c | 27 ++++++++++++++++++++++++---
translate-all.c | 2 ++
vl.c | 6 ++++++
7 files changed, 75 insertions(+), 15 deletions(-)
diff --git a/cpus.c b/cpus.c
index 91a48f2..f10c94d 100644
--- a/cpus.c
+++ b/cpus.c
@@ -1017,7 +1017,7 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
qemu_tcg_init_cpu_signals();
qemu_thread_get_self(cpu->thread);
- qemu_mutex_lock(&qemu_global_mutex);
+ qemu_mutex_lock_iothread();
CPU_FOREACH(cpu) {
cpu->thread_id = qemu_get_thread_id();
cpu->created = true;
@@ -1125,17 +1125,7 @@ static bool qemu_in_vcpu_thread(void)
void qemu_mutex_lock_iothread(void)
{
- if (!tcg_enabled()) {
- qemu_mutex_lock(&qemu_global_mutex);
- } else {
- iothread_requesting_mutex = true;
- if (qemu_mutex_trylock(&qemu_global_mutex)) {
- qemu_cpu_kick_thread(first_cpu);
- qemu_mutex_lock(&qemu_global_mutex);
- }
- iothread_requesting_mutex = false;
- qemu_cond_broadcast(&qemu_io_proceeded_cond);
- }
+ qemu_mutex_lock(&qemu_global_mutex);
}
void qemu_mutex_unlock_iothread(void)
@@ -1356,7 +1346,12 @@ static int tcg_cpu_exec(CPUArchState *env)
cpu->icount_decr.u16.low = decr;
cpu->icount_extra = count;
}
+
+ qemu_mutex_unlock_iothread();
+
ret = cpu_exec(env);
+
+ qemu_mutex_lock_iothread();
#ifdef CONFIG_PROFILER
qemu_time += profile_getclock() - ti;
#endif
diff --git a/cputlb.c b/cputlb.c
index 3b271d4..4a7e634 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -30,6 +30,9 @@
#include "exec/ram_addr.h"
#include "tcg/tcg.h"
+void qemu_mutex_lock_iothread(void);
+void qemu_mutex_unlock_iothread(void);
+
//#define DEBUG_TLB
//#define DEBUG_TLB_CHECK
@@ -125,8 +128,10 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr)
can be detected */
void tlb_protect_code(ram_addr_t ram_addr)
{
+ qemu_mutex_lock_iothread();
cpu_physical_memory_reset_dirty(ram_addr, TARGET_PAGE_SIZE,
DIRTY_MEMORY_CODE);
+ qemu_mutex_unlock_iothread();
}
/* update the TLB so that writes in physical page 'phys_addr' are no longer
diff --git a/exec.c b/exec.c
index 081818e..705d451 100644
--- a/exec.c
+++ b/exec.c
@@ -1786,6 +1786,7 @@ static void check_watchpoint(int offset, int len, int
flags)
}
wp->hitaddr = vaddr;
if (!cpu->watchpoint_hit) {
+ qemu_mutex_unlock_iothread();
cpu->watchpoint_hit = wp;
tb_check_watchpoint(cpu);
if (wp->flags & BP_STOP_BEFORE_ACCESS) {
@@ -2557,6 +2558,7 @@ static inline uint32_t ldl_phys_internal(AddressSpace
*as, hwaddr addr,
mr = address_space_translate(as, addr, &addr1, &l, false);
if (l < 4 || !memory_access_is_direct(mr, false)) {
/* I/O case */
+ qemu_mutex_lock_iothread();
io_mem_read(mr, addr1, &val, 4);
#if defined(TARGET_WORDS_BIGENDIAN)
if (endian == DEVICE_LITTLE_ENDIAN) {
@@ -2567,6 +2569,7 @@ static inline uint32_t ldl_phys_internal(AddressSpace
*as, hwaddr addr,
val = bswap32(val);
}
#endif
+ qemu_mutex_unlock_iothread();
} else {
/* RAM case */
ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
@@ -2616,6 +2619,7 @@ static inline uint64_t ldq_phys_internal(AddressSpace
*as, hwaddr addr,
false);
if (l < 8 || !memory_access_is_direct(mr, false)) {
/* I/O case */
+ qemu_mutex_lock_iothread();
io_mem_read(mr, addr1, &val, 8);
#if defined(TARGET_WORDS_BIGENDIAN)
if (endian == DEVICE_LITTLE_ENDIAN) {
@@ -2626,6 +2630,7 @@ static inline uint64_t ldq_phys_internal(AddressSpace
*as, hwaddr addr,
val = bswap64(val);
}
#endif
+ qemu_mutex_unlock_iothread();
} else {
/* RAM case */
ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
@@ -2683,6 +2688,7 @@ static inline uint32_t lduw_phys_internal(AddressSpace
*as, hwaddr addr,
false);
if (l < 2 || !memory_access_is_direct(mr, false)) {
/* I/O case */
+ qemu_mutex_lock_iothread();
io_mem_read(mr, addr1, &val, 2);
#if defined(TARGET_WORDS_BIGENDIAN)
if (endian == DEVICE_LITTLE_ENDIAN) {
@@ -2693,6 +2699,7 @@ static inline uint32_t lduw_phys_internal(AddressSpace
*as, hwaddr addr,
val = bswap16(val);
}
#endif
+ qemu_mutex_unlock_iothread();
} else {
/* RAM case */
ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
@@ -2741,7 +2748,9 @@ void stl_phys_notdirty(AddressSpace *as, hwaddr addr,
uint32_t val)
mr = address_space_translate(as, addr, &addr1, &l,
true);
if (l < 4 || !memory_access_is_direct(mr, true)) {
+ qemu_mutex_lock_iothread();
io_mem_write(mr, addr1, val, 4);
+ qemu_mutex_unlock_iothread();
} else {
addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
ptr = qemu_get_ram_ptr(addr1);
@@ -2749,10 +2758,12 @@ void stl_phys_notdirty(AddressSpace *as, hwaddr addr,
uint32_t val)
if (unlikely(in_migration)) {
if (cpu_physical_memory_is_clean(addr1)) {
+ qemu_mutex_lock_iothread();
/* invalidate code */
tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
/* set dirty bit */
cpu_physical_memory_set_dirty_range_nocode(addr1, 4);
+ qemu_mutex_unlock_iothread();
}
}
}
@@ -2780,7 +2791,9 @@ static inline void stl_phys_internal(AddressSpace *as,
val = bswap32(val);
}
#endif
+ qemu_mutex_lock_iothread();
io_mem_write(mr, addr1, val, 4);
+ qemu_mutex_unlock_iothread();
} else {
/* RAM case */
addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
@@ -2796,7 +2809,9 @@ static inline void stl_phys_internal(AddressSpace *as,
stl_p(ptr, val);
break;
}
+ qemu_mutex_lock_iothread();
invalidate_and_set_dirty(addr1, 4);
+ qemu_mutex_unlock_iothread();
}
}
@@ -2843,7 +2858,9 @@ static inline void stw_phys_internal(AddressSpace *as,
val = bswap16(val);
}
#endif
+ qemu_mutex_lock_iothread();
io_mem_write(mr, addr1, val, 2);
+ qemu_mutex_unlock_iothread();
} else {
/* RAM case */
addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
@@ -2859,7 +2876,9 @@ static inline void stw_phys_internal(AddressSpace *as,
stw_p(ptr, val);
break;
}
+ qemu_mutex_lock_iothread();
invalidate_and_set_dirty(addr1, 2);
+ qemu_mutex_unlock_iothread();
}
}
@@ -2881,20 +2900,26 @@ void stw_be_phys(AddressSpace *as, hwaddr addr,
uint32_t val)
/* XXX: optimize */
void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
{
+ qemu_mutex_lock_iothread();
val = tswap64(val);
address_space_rw(as, addr, (void *) &val, 8, 1);
+ qemu_mutex_unlock_iothread();
}
void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
{
+ qemu_mutex_lock_iothread();
val = cpu_to_le64(val);
address_space_rw(as, addr, (void *) &val, 8, 1);
+ qemu_mutex_unlock_iothread();
}
void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
{
+ qemu_mutex_lock_iothread();
val = cpu_to_be64(val);
address_space_rw(as, addr, (void *) &val, 8, 1);
+ qemu_mutex_unlock_iothread();
}
/* virtual memory access for debug (includes writing to ROM) */
diff --git a/softmmu_template.h b/softmmu_template.h
index 6b4e615..e3c6dc8 100644
--- a/softmmu_template.h
+++ b/softmmu_template.h
@@ -157,8 +157,12 @@ static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState
*env,
cpu_io_recompile(cpu, retaddr);
}
+ qemu_mutex_lock_iothread();
+
cpu->mem_io_vaddr = addr;
io_mem_read(mr, physaddr, &val, 1 << SHIFT);
+
+ qemu_mutex_unlock_iothread();
return val;
}
#endif
@@ -376,9 +380,11 @@ static inline void glue(io_write, SUFFIX)(CPUArchState
*env,
cpu_io_recompile(cpu, retaddr);
}
+ qemu_mutex_lock_iothread();
cpu->mem_io_vaddr = addr;
cpu->mem_io_pc = retaddr;
io_mem_write(mr, physaddr, val, 1 << SHIFT);
+ qemu_mutex_unlock_iothread();
}
void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
diff --git a/target-i386/misc_helper.c b/target-i386/misc_helper.c
index 4aaf1e4..0a953a9 100644
--- a/target-i386/misc_helper.c
+++ b/target-i386/misc_helper.c
@@ -24,32 +24,53 @@
void helper_outb(uint32_t port, uint32_t data)
{
+ qemu_mutex_lock_iothread();
cpu_outb(port, data & 0xff);
+ qemu_mutex_unlock_iothread();
}
target_ulong helper_inb(uint32_t port)
{
- return cpu_inb(port);
+ target_ulong ret;
+
+ qemu_mutex_lock_iothread();
+ ret = cpu_inb(port);
+ qemu_mutex_unlock_iothread();
+ return ret;
}
void helper_outw(uint32_t port, uint32_t data)
{
+ qemu_mutex_lock_iothread();
cpu_outw(port, data & 0xffff);
+ qemu_mutex_unlock_iothread();
}
target_ulong helper_inw(uint32_t port)
{
- return cpu_inw(port);
+ target_ulong ret;
+
+ qemu_mutex_lock_iothread();
+ ret = cpu_inw(port);
+ qemu_mutex_unlock_iothread();
+ return ret;
}
void helper_outl(uint32_t port, uint32_t data)
{
+ qemu_mutex_lock_iothread();
cpu_outl(port, data);
+ qemu_mutex_unlock_iothread();
}
target_ulong helper_inl(uint32_t port)
{
- return cpu_inl(port);
+ target_ulong ret;
+
+ qemu_mutex_lock_iothread();
+ ret = cpu_inl(port);
+ qemu_mutex_unlock_iothread();
+ return ret;
}
void helper_into(CPUX86State *env, int next_eip_addend)
diff --git a/translate-all.c b/translate-all.c
index 68505c0..a986d61 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -1240,6 +1240,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start,
tb_page_addr_t end,
#endif
#ifdef TARGET_HAS_PRECISE_SMC
if (current_tb_modified) {
+ qemu_mutex_unlock_iothread();
/* we generate a block containing just the instruction
modifying the memory. It will ensure that it cannot modify
itself */
@@ -1337,6 +1338,7 @@ static void tb_invalidate_phys_page(tb_page_addr_t addr,
p->first_tb[current_cpu->cpu_index] = NULL;
#ifdef TARGET_HAS_PRECISE_SMC
if (current_tb_modified) {
+ qemu_mutex_unlock_iothread();
/* we generate a block containing just the instruction
modifying the memory. It will ensure that it cannot modify
itself */
diff --git a/vl.c b/vl.c
index 7786b2f..160e4a8 100644
--- a/vl.c
+++ b/vl.c
@@ -1608,10 +1608,16 @@ void qemu_devices_reset(void)
{
QEMUResetEntry *re, *nre;
+ /*
+ * Some device's reset needs to grab the global_mutex. So just release it
+ * here.
+ */
+ qemu_mutex_unlock_iothread();
/* reset all devices */
QTAILQ_FOREACH_SAFE(re, &reset_handlers, entry, nre) {
re->func(re->opaque);
}
+ qemu_mutex_lock_iothread();
}
void qemu_system_reset(bool report)
--
1.9.0
- [Qemu-devel] [RFC 00/10] MultiThread TCG., fred . konrad, 2015/01/16
- [Qemu-devel] [RFC 03/10] replace spinlock by QemuMutex., fred . konrad, 2015/01/16
- [Qemu-devel] [RFC 07/10] tcg: remove tcg_halt_cond global variable., fred . konrad, 2015/01/16
- [Qemu-devel] [RFC 08/10] Drop global lock during TCG code execution,
fred . konrad <=
- [Qemu-devel] [RFC 01/10] target-arm: protect cpu_exclusive_*., fred . konrad, 2015/01/16
- [Qemu-devel] [RFC 09/10] cpu: remove exit_request global., fred . konrad, 2015/01/16
- [Qemu-devel] [RFC 05/10] extract TBContext from TCGContext., fred . konrad, 2015/01/16
- [Qemu-devel] [RFC 10/10] tcg: switch on multithread., fred . konrad, 2015/01/16
- [Qemu-devel] [RFC 02/10] use a different translation block list for each cpu., fred . konrad, 2015/01/16