[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH 2/2] kvm: use per-cpu lock to free vcpu thread out o
From: |
Liu Ping Fan |
Subject: |
[Qemu-devel] [PATCH 2/2] kvm: use per-cpu lock to free vcpu thread out of the big lock |
Date: |
Thu, 21 Jun 2012 23:06:58 +0800 |
In order to break the big lock, using per-cpu_lock in kvm_cpu_exec()
to protect the race from other cpu's access to env->apic_state & related
field in env.
Also, we need to protect agaist run_on_cpu().
Race condition can be like this:
1. vcpu-1 IPI vcpu-2
vcpu-3 IPI vcpu-2
Open window exists for accessing to vcpu-2's apic_state & env
2. run_on_cpu() write env->queued_work_last, while flush_queued_work()
read
Signed-off-by: Liu Ping Fan <address@hidden>
---
cpus.c | 6 ++++--
hw/apic.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++----
hw/pc.c | 8 +++++++-
kvm-all.c | 13 +++++++++++--
4 files changed, 76 insertions(+), 9 deletions(-)
diff --git a/cpus.c b/cpus.c
index 554f7bc..ac99afe 100644
--- a/cpus.c
+++ b/cpus.c
@@ -649,6 +649,7 @@ void run_on_cpu(CPUArchState *env, void (*func)(void
*data), void *data)
wi.func = func;
wi.data = data;
+ qemu_mutex_lock(env->cpu_lock);
if (!env->queued_work_first) {
env->queued_work_first = &wi;
} else {
@@ -657,6 +658,7 @@ void run_on_cpu(CPUArchState *env, void (*func)(void
*data), void *data)
env->queued_work_last = &wi;
wi.next = NULL;
wi.done = false;
+ qemu_mutex_unlock(env->cpu_lock);
qemu_cpu_kick(env);
while (!wi.done) {
@@ -718,7 +720,7 @@ static void qemu_tcg_wait_io_event(void)
static void qemu_kvm_wait_io_event(CPUArchState *env)
{
while (cpu_thread_is_idle(env)) {
- qemu_cond_wait(env->halt_cond, &qemu_global_mutex);
+ qemu_cond_wait(env->halt_cond, env->cpu_lock);
}
qemu_kvm_eat_signals(env);
@@ -729,8 +731,8 @@ static void *qemu_kvm_cpu_thread_fn(void *arg)
{
CPUArchState *env = arg;
int r;
+ qemu_mutex_lock_cpu(env);
- qemu_mutex_lock(&qemu_global_mutex);
qemu_thread_get_self(env->thread);
env->thread_id = qemu_get_thread_id();
cpu_single_env = env;
diff --git a/hw/apic.c b/hw/apic.c
index 4eeaf88..b999a40 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -22,6 +22,7 @@
#include "host-utils.h"
#include "trace.h"
#include "pc.h"
+#include "qemu-thread.h"
#define MAX_APIC_WORDS 8
@@ -94,6 +95,7 @@ static int get_highest_priority_int(uint32_t *tab)
return -1;
}
+/* Caller must hold the lock */
static void apic_sync_vapic(APICCommonState *s, int sync_type)
{
VAPICState vapic_state;
@@ -141,11 +143,13 @@ static void apic_sync_vapic(APICCommonState *s, int
sync_type)
}
}
+/* Caller must hold lock */
static void apic_vapic_base_update(APICCommonState *s)
{
apic_sync_vapic(s, SYNC_TO_VAPIC);
}
+/* Caller must hold the lock */
static void apic_local_deliver(APICCommonState *s, int vector)
{
uint32_t lvt = s->lvt[vector];
@@ -175,9 +179,11 @@ static void apic_local_deliver(APICCommonState *s, int
vector)
(lvt & APIC_LVT_LEVEL_TRIGGER))
trigger_mode = APIC_TRIGGER_LEVEL;
apic_set_irq(s, lvt & 0xff, trigger_mode);
+ break;
}
}
+/* Caller must hold the lock */
void apic_deliver_pic_intr(DeviceState *d, int level)
{
APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d);
@@ -200,9 +206,12 @@ void apic_deliver_pic_intr(DeviceState *d, int level)
}
}
+/* Must hold lock */
static void apic_external_nmi(APICCommonState *s)
{
+ qemu_mutex_lock_cpu(s->cpu_env);
apic_local_deliver(s, APIC_LVT_LINT1);
+ qemu_mutex_unlock_cpu(s->cpu_env);
}
#define foreach_apic(apic, deliver_bitmask, code) \
@@ -215,7 +224,9 @@ static void apic_external_nmi(APICCommonState *s)
if (__mask & (1 << __j)) {\
apic = local_apics[__i * 32 + __j];\
if (apic) {\
+ qemu_mutex_lock_cpu(apic->cpu_env);\
code;\
+ qemu_mutex_unlock_cpu(apic->cpu_env);\
}\
}\
}\
@@ -244,7 +255,9 @@ static void apic_bus_deliver(const uint32_t
*deliver_bitmask,
if (d >= 0) {
apic_iter = local_apics[d];
if (apic_iter) {
+ qemu_mutex_lock_cpu(apic_iter->cpu_env);
apic_set_irq(apic_iter, vector_num, trigger_mode);
+ qemu_mutex_unlock_cpu(apic_iter->cpu_env);
}
}
}
@@ -293,6 +306,7 @@ void apic_deliver_irq(uint8_t dest, uint8_t dest_mode,
uint8_t delivery_mode,
apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, trigger_mode);
}
+/* Caller must hold lock */
static void apic_set_base(APICCommonState *s, uint64_t val)
{
s->apicbase = (val & 0xfffff000) |
@@ -305,6 +319,7 @@ static void apic_set_base(APICCommonState *s, uint64_t val)
}
}
+/* caller must hold lock */
static void apic_set_tpr(APICCommonState *s, uint8_t val)
{
/* Updates from cr8 are ignored while the VAPIC is active */
@@ -314,12 +329,14 @@ static void apic_set_tpr(APICCommonState *s, uint8_t val)
}
}
+/* caller must hold lock */
static uint8_t apic_get_tpr(APICCommonState *s)
{
apic_sync_vapic(s, SYNC_FROM_VAPIC);
return s->tpr >> 4;
}
+/* Caller must hold the lock */
static int apic_get_ppr(APICCommonState *s)
{
int tpr, isrv, ppr;
@@ -348,6 +365,7 @@ static int apic_get_arb_pri(APICCommonState *s)
* 0 - no interrupt,
* >0 - interrupt number
*/
+/* Caller must hold cpu_lock */
static int apic_irq_pending(APICCommonState *s)
{
int irrv, ppr;
@@ -363,6 +381,7 @@ static int apic_irq_pending(APICCommonState *s)
return irrv;
}
+/* caller must ensure the lock has been taken */
/* signal the CPU if an irq is pending */
static void apic_update_irq(APICCommonState *s)
{
@@ -380,11 +399,13 @@ static void apic_update_irq(APICCommonState *s)
void apic_poll_irq(DeviceState *d)
{
APICCommonState *s = APIC_COMMON(d);
-
+ qemu_mutex_lock_cpu(s->cpu_env);
apic_sync_vapic(s, SYNC_FROM_VAPIC);
apic_update_irq(s);
+ qemu_mutex_unlock_cpu(s->cpu_env);
}
+/* caller must hold the lock */
static void apic_set_irq(APICCommonState *s, int vector_num, int trigger_mode)
{
apic_report_irq_delivered(!get_bit(s->irr, vector_num));
@@ -407,6 +428,7 @@ static void apic_set_irq(APICCommonState *s, int
vector_num, int trigger_mode)
apic_update_irq(s);
}
+/* caller must hold the lock */
static void apic_eoi(APICCommonState *s)
{
int isrv;
@@ -415,6 +437,7 @@ static void apic_eoi(APICCommonState *s)
return;
reset_bit(s->isr, isrv);
if (!(s->spurious_vec & APIC_SV_DIRECTED_IO) && get_bit(s->tmr, isrv)) {
+ //fix me,need to take extra lock for the bus?
ioapic_eoi_broadcast(isrv);
}
apic_sync_vapic(s, SYNC_FROM_VAPIC | SYNC_TO_VAPIC);
@@ -480,18 +503,23 @@ static void apic_get_delivery_bitmask(uint32_t
*deliver_bitmask,
static void apic_startup(APICCommonState *s, int vector_num)
{
s->sipi_vector = vector_num;
+ qemu_mutex_lock_cpu(s->cpu_env);
cpu_interrupt(s->cpu_env, CPU_INTERRUPT_SIPI);
+ qemu_mutex_unlock_cpu(s->cpu_env);
}
void apic_sipi(DeviceState *d)
{
APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d);
-
+ qemu_mutex_lock_cpu(s->cpu_env);
cpu_reset_interrupt(s->cpu_env, CPU_INTERRUPT_SIPI);
- if (!s->wait_for_sipi)
+ if (!s->wait_for_sipi) {
+ qemu_mutex_unlock_cpu(s->cpu_env);
return;
+ }
cpu_x86_load_seg_cache_sipi(s->cpu_env, s->sipi_vector);
+ qemu_mutex_unlock_cpu(s->cpu_env);
s->wait_for_sipi = 0;
}
@@ -543,6 +571,7 @@ static void apic_deliver(DeviceState *d, uint8_t dest,
uint8_t dest_mode,
apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, trigger_mode);
}
+/* Caller must take lock */
int apic_get_interrupt(DeviceState *d)
{
APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d);
@@ -572,6 +601,7 @@ int apic_get_interrupt(DeviceState *d)
return intno;
}
+/* Caller must hold the cpu_lock */
int apic_accept_pic_intr(DeviceState *d)
{
APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d);
@@ -589,6 +619,7 @@ int apic_accept_pic_intr(DeviceState *d)
return 0;
}
+/* Caller hold lock */
static uint32_t apic_get_current_count(APICCommonState *s)
{
int64_t d;
@@ -619,9 +650,10 @@ static void apic_timer_update(APICCommonState *s, int64_t
current_time)
static void apic_timer(void *opaque)
{
APICCommonState *s = opaque;
-
+ qemu_mutex_lock_cpu(s->cpu_env);
apic_local_deliver(s, APIC_LVT_TIMER);
apic_timer_update(s, s->next_time);
+ qemu_mutex_unlock_cpu(s->cpu_env);
}
static uint32_t apic_mem_readb(void *opaque, target_phys_addr_t addr)
@@ -664,18 +696,22 @@ static uint32_t apic_mem_readl(void *opaque,
target_phys_addr_t addr)
val = 0x11 | ((APIC_LVT_NB - 1) << 16); /* version 0x11 */
break;
case 0x08:
+ qemu_mutex_lock_cpu(s->cpu_env);
apic_sync_vapic(s, SYNC_FROM_VAPIC);
if (apic_report_tpr_access) {
cpu_report_tpr_access(s->cpu_env, TPR_ACCESS_READ);
}
val = s->tpr;
+ qemu_mutex_unlock_cpu(s->cpu_env);
break;
case 0x09:
val = apic_get_arb_pri(s);
break;
case 0x0a:
/* ppr */
+ qemu_mutex_lock_cpu(s->cpu_env);
val = apic_get_ppr(s);
+ qemu_mutex_unlock_cpu(s->cpu_env);
break;
case 0x0b:
val = 0;
@@ -712,7 +748,9 @@ static uint32_t apic_mem_readl(void *opaque,
target_phys_addr_t addr)
val = s->initial_count;
break;
case 0x39:
+ qemu_mutex_lock_cpu(s->cpu_env);
val = apic_get_current_count(s);
+ qemu_mutex_unlock_cpu(s->cpu_env);
break;
case 0x3e:
val = s->divide_conf;
@@ -767,18 +805,22 @@ static void apic_mem_writel(void *opaque,
target_phys_addr_t addr, uint32_t val)
case 0x03:
break;
case 0x08:
+ qemu_mutex_lock_cpu(s->cpu_env);
if (apic_report_tpr_access) {
cpu_report_tpr_access(s->cpu_env, TPR_ACCESS_WRITE);
}
s->tpr = val;
apic_sync_vapic(s, SYNC_TO_VAPIC);
apic_update_irq(s);
+ qemu_mutex_unlock_cpu(s->cpu_env);
break;
case 0x09:
case 0x0a:
break;
case 0x0b: /* EOI */
+ qemu_mutex_lock_cpu(s->cpu_env);
apic_eoi(s);
+ qemu_mutex_unlock_cpu(s->cpu_env);
break;
case 0x0d:
s->log_dest = val >> 24;
@@ -787,8 +829,10 @@ static void apic_mem_writel(void *opaque,
target_phys_addr_t addr, uint32_t val)
s->dest_mode = val >> 28;
break;
case 0x0f:
+ qemu_mutex_lock_cpu(s->cpu_env);
s->spurious_vec = val & 0x1ff;
apic_update_irq(s);
+ qemu_mutex_unlock_cpu(s->cpu_env);
break;
case 0x10 ... 0x17:
case 0x18 ... 0x1f:
@@ -807,24 +851,30 @@ static void apic_mem_writel(void *opaque,
target_phys_addr_t addr, uint32_t val)
case 0x32 ... 0x37:
{
int n = index - 0x32;
+ qemu_mutex_lock_cpu(s->cpu_env);
s->lvt[n] = val;
if (n == APIC_LVT_TIMER)
apic_timer_update(s, qemu_get_clock_ns(vm_clock));
+ qemu_mutex_unlock_cpu(s->cpu_env);
}
break;
case 0x38:
+ qemu_mutex_lock_cpu(s->cpu_env);
s->initial_count = val;
s->initial_count_load_time = qemu_get_clock_ns(vm_clock);
apic_timer_update(s, s->initial_count_load_time);
+ qemu_mutex_unlock_cpu(s->cpu_env);
break;
case 0x39:
break;
case 0x3e:
{
int v;
+ qemu_mutex_lock_cpu(s->cpu_env);
s->divide_conf = val & 0xb;
v = (s->divide_conf & 3) | ((s->divide_conf >> 1) & 4);
s->count_shift = (v + 1) & 7;
+ qemu_mutex_unlock_cpu(s->cpu_env);
}
break;
default:
diff --git a/hw/pc.c b/hw/pc.c
index 4d34a33..5e7350c 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -147,7 +147,7 @@ void cpu_smm_update(CPUX86State *env)
smm_set(!!(env->hflags & HF_SMM_MASK), smm_arg);
}
-
+/* Called by kvm_cpu_exec(), already with lock protection */
/* IRQ handling */
int cpu_get_pic_interrupt(CPUX86State *env)
{
@@ -173,9 +173,15 @@ static void pic_irq_request(void *opaque, int irq, int
level)
DPRINTF("pic_irqs: %s irq %d\n", level? "raise" : "lower", irq);
if (env->apic_state) {
while (env) {
+ if (!qemu_cpu_is_self(env)) {
+ qemu_mutex_lock_cpu(env);
+ }
if (apic_accept_pic_intr(env->apic_state)) {
apic_deliver_pic_intr(env->apic_state, level);
}
+ if (!qemu_cpu_is_self(env)) {
+ qemu_mutex_unlock_cpu(env);
+ }
env = env->next_cpu;
}
} else {
diff --git a/kvm-all.c b/kvm-all.c
index 9b73ccf..dc9aa54 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -29,6 +29,7 @@
#include "bswap.h"
#include "memory.h"
#include "exec-memory.h"
+#include "qemu-thread.h"
/* This check must be after config-host.h is included */
#ifdef CONFIG_EVENTFD
@@ -1252,12 +1253,15 @@ int kvm_cpu_exec(CPUArchState *env)
*/
qemu_cpu_kick_self();
}
- qemu_mutex_unlock_iothread();
+ qemu_mutex_unlock(env->cpu_lock);
run_ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
- qemu_mutex_lock_iothread();
+ qemu_mutex_lock(env->cpu_lock);
kvm_arch_post_run(env, run);
+ qemu_mutex_unlock_cpu(env);
+
+ qemu_mutex_lock_iothread();
kvm_flush_coalesced_mmio_buffer();
@@ -1265,6 +1269,8 @@ int kvm_cpu_exec(CPUArchState *env)
if (run_ret == -EINTR || run_ret == -EAGAIN) {
DPRINTF("io window exit\n");
ret = EXCP_INTERRUPT;
+ qemu_mutex_unlock_iothread();
+ qemu_mutex_lock_cpu(env);
break;
}
fprintf(stderr, "error: kvm run failed %s\n",
@@ -1312,6 +1318,9 @@ int kvm_cpu_exec(CPUArchState *env)
ret = kvm_arch_handle_exit(env, run);
break;
}
+
+ qemu_mutex_unlock_iothread();
+ qemu_mutex_lock_cpu(env);
} while (ret == 0);
if (ret < 0) {
--
1.7.4.4
[Qemu-devel] [PATCH 2/2] kvm: use per-cpu lock to free vcpu thread out of the big lock,
Liu Ping Fan <=
- Re: [Qemu-devel] [RFC] use little granularity lock to substitue qemu_mutex_lock_iothread, Jan Kiszka, 2012/06/21
- Re: [Qemu-devel] [RFC] use little granularity lock to substitue qemu_mutex_lock_iothread, liu ping fan, 2012/06/22
- Re: [Qemu-devel] [RFC] use little granularity lock to substitue qemu_mutex_lock_iothread, Jan Kiszka, 2012/06/22
- Re: [Qemu-devel] [RFC] use little granularity lock to substitue qemu_mutex_lock_iothread, Anthony Liguori, 2012/06/22
- Re: [Qemu-devel] [RFC] use little granularity lock to substitue qemu_mutex_lock_iothread, Jan Kiszka, 2012/06/22
- Re: [Qemu-devel] [RFC] use little granularity lock to substitue qemu_mutex_lock_iothread, Anthony Liguori, 2012/06/22
- Re: [Qemu-devel] [RFC] use little granularity lock to substitue qemu_mutex_lock_iothread, Jan Kiszka, 2012/06/22
- Re: [Qemu-devel] [RFC] use little granularity lock to substitue qemu_mutex_lock_iothread, Anthony Liguori, 2012/06/22