[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH] Convert ram_list to RCU DQ V3
From: |
Paolo Bonzini |
Subject: |
Re: [Qemu-devel] [PATCH] Convert ram_list to RCU DQ V3 |
Date: |
Wed, 04 Sep 2013 15:49:54 +0200 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:17.0) Gecko/20130805 Thunderbird/17.0.8 |
Il 04/09/2013 15:29, Mike Day ha scritto:
> @@ -717,19 +731,21 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
> qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
> total_sent += 8;
> bytes_transferred += total_sent;
> -
> return total_sent;
> }
>
> static int ram_save_complete(QEMUFile *f, void *opaque)
> {
> - qemu_mutex_lock_ramlist();
> +
> + qemu_mutex_lock_iothread();
> migration_bitmap_sync();
> + qemu_mutex_unlock_iothread();
ram_save_complete runs with the iothread taken. Since the lock is not
recursive, this will cause a deadlock.
To test migration, I suggest you use virt-test. Install
autotest-framework if you run Fedora, clone
https://github.com/autotest/virt-test.git and do "./run -t qemu
--qemu-bin=/path/to/qemu-system-x86_64".
> ram_control_before_iterate(f, RAM_CONTROL_FINISH);
>
> /* try transferring iterative blocks of memory */
>
> + rcu_read_lock();
> /* flush all remaining blocks regardless of rate limiting */
> while (true) {
> int bytes_sent;
> @@ -741,11 +757,10 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
> }
> bytes_transferred += bytes_sent;
> }
> -
> + rcu_read_unlock();
> ram_control_after_iterate(f, RAM_CONTROL_FINISH);
> migration_end();
>
> - qemu_mutex_unlock_ramlist();
> qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
>
> return 0;
> diff --git a/exec.c b/exec.c
> index 5eebcc1..52b282a 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -46,7 +46,7 @@
> #endif
> #include "exec/cpu-all.h"
> #include "qemu/tls.h"
> -
> +#include "qemu/rcu_queue.h"
> #include "exec/cputlb.h"
> #include "translate-all.h"
>
> @@ -57,7 +57,10 @@
> #if !defined(CONFIG_USER_ONLY)
> static int in_migration;
>
> -RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
> +/* ram_list is enabled for RCU access - writes should be protected
> + * by the iothread lock or an equivalent mutex.
> + */
/* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
* are protected by a lock, currently the iothread lock.
*/
> +RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
>
> static MemoryRegion *system_memory;
> static MemoryRegion *system_io;
> @@ -1123,16 +1138,18 @@ static int memory_try_enable_merging(void *addr,
> size_t len)
> return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
> }
>
> +/* Called with the iothread lock being held, so we can
> + * foregoe protecting the ram_list. When that changes,
s/foregoe/forgo/ ...
> + * acquire the iothread mutex before writing the list below.
... but I don't think the comment is accurate. Being a very coarse
lock, the iothread mutex will almost always be taken by the caller of a
function that requires it, for two reasons:
1) a function will almost always have callers that hold the lock and
callers that don't
2) the iothread mutex must always be taken _outside_ other locks to
prevent ABBA deadlock.
So I would just write /* Called with the iothread lock held */ in the
write sides (in fact the current standard is to document stuff that is
called _without_ the lock held! :)). Reads that do not take the
rcu_read_lock() of course need more explanation, which is what you did
for find_ram_offset() and qemu_ram_set_idstr().
> + */
> ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
> MemoryRegion *mr)
> {
> - RAMBlock *block, *new_block;
> + RAMBlock *block, *new_block, *last_block = 0;
>
> size = TARGET_PAGE_ALIGN(size);
> new_block = g_malloc0(sizeof(*new_block));
>
> - /* This assumes the iothread lock is taken here too. */
> - qemu_mutex_lock_ramlist();
> new_block->mr = mr;
> new_block->offset = find_ram_offset(size);
> if (host) {
> @@ -1200,33 +1220,40 @@ ram_addr_t qemu_ram_alloc(ram_addr_t size,
> MemoryRegion *mr)
> return qemu_ram_alloc_from_ptr(size, NULL, mr);
> }
>
> +static void reclaim_ramblock(struct rcu_head *prcu)
> +{
> + RAMBlock *block = container_of(prcu, struct RAMBlock, rcu);
> + g_free(block);
> +}
> +
> +
> void qemu_ram_free_from_ptr(ram_addr_t addr)
> {
> RAMBlock *block;
>
> - /* This assumes the iothread lock is taken here too. */
> - qemu_mutex_lock_ramlist();
> - QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> + /* This assumes the iothread lock is taken here too. */
> + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
> if (addr == block->offset) {
> - QTAILQ_REMOVE(&ram_list.blocks, block, next);
> + QLIST_REMOVE_RCU(block, next);
> ram_list.mru_block = NULL;
> ram_list.version++;
> - g_free(block);
> + call_rcu1(&block->rcu, reclaim_ramblock);
You can use call_rcu too:
call_rcu(block, reclaim_ramblock, rcu);
or possibly even this:
call_rcu(block, g_free, rcu);
This removes the container_of from reclaim_ramblock, which can take a
RAMBlock * directly.
> break;
> }
> }
> - qemu_mutex_unlock_ramlist();
> }
>
> void qemu_ram_free(ram_addr_t addr)
> {
> RAMBlock *block;
>
> - /* This assumes the iothread lock is taken here too. */
> - qemu_mutex_lock_ramlist();
> - QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> + /* This assumes the iothread lock is taken here too.
> + * if that changes, accesses to ram_list need to be protected
> + * by a mutex (writes) or an rcu read lock (reads)
> + */
> + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
> if (addr == block->offset) {
> - QTAILQ_REMOVE(&ram_list.blocks, block, next);
> + QLIST_REMOVE_RCU(block, next);
> ram_list.mru_block = NULL;
> ram_list.version++;
> if (block->flags & RAM_PREALLOC_MASK) {
> @@ -1249,12 +1276,10 @@ void qemu_ram_free(ram_addr_t addr)
> qemu_anon_ram_free(block->host, block->length);
> }
> }
> - g_free(block);
> + call_rcu1(&block->rcu, reclaim_ramblock);
Same here.
> break;
> }
> }
> - qemu_mutex_unlock_ramlist();
> -
> }
>
> #ifndef _WIN32
> diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
> index e088089..53aa70d 100644
> --- a/include/exec/cpu-all.h
> +++ b/include/exec/cpu-all.h
> @@ -24,6 +24,7 @@
> #include "qemu/thread.h"
> #include "qom/cpu.h"
> #include "qemu/tls.h"
> +#include "qemu/rcu.h"
>
> /* some important defines:
> *
> @@ -448,6 +449,7 @@ extern ram_addr_t ram_size;
> #define RAM_PREALLOC_MASK (1 << 0)
>
> typedef struct RAMBlock {
> + struct rcu_head rcu;
> struct MemoryRegion *mr;
> uint8_t *host;
> ram_addr_t offset;
> @@ -457,7 +459,7 @@ typedef struct RAMBlock {
> /* Reads can take either the iothread or the ramlist lock.
> * Writes must take both locks.
> */
Not true anymore---please remove the ramlist lock altogether.
> - QTAILQ_ENTRY(RAMBlock) next;
> + QLIST_ENTRY(RAMBlock) next;
> #if defined(__linux__) && !defined(TARGET_S390X)
> int fd;
> #endif
> @@ -469,7 +471,7 @@ typedef struct RAMList {
> uint8_t *phys_dirty;
> RAMBlock *mru_block;
> /* Protected by the ramlist lock. */
Not true anymore.
Almost there! I'm confident that v4 will be fine!
Paolo
> - QTAILQ_HEAD(, RAMBlock) blocks;
> + QLIST_HEAD(, RAMBlock) blocks;
> uint32_t version;
> } RAMList;
> extern RAMList ram_list;
> diff --git a/include/qemu/rcu_queue.h b/include/qemu/rcu_queue.h
> index e2b8ba5..d159850 100644
> --- a/include/qemu/rcu_queue.h
> +++ b/include/qemu/rcu_queue.h
> @@ -37,6 +37,14 @@
> extern "C" {
> #endif
>
> +
> +/*
> + * List access methods.
> + */
> +#define QLIST_EMPTY_RCU(head) (atomic_rcu_read(&(head)->lh_first) == NULL)
> +#define QLIST_FIRST_RCU(head) (atomic_rcu_read(&(head)->lh_first))
> +#define QLIST_NEXT_RCU(elm, field) (atomic_rcu_read(&(elm)->field.le_next))
> +
> /*
> * List functions.
> */
>