[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH 20/41] migration: run pending/iterate callbacks
From: |
Orit Wasserman |
Subject: |
Re: [Qemu-devel] [PATCH 20/41] migration: run pending/iterate callbacks out of big lock |
Date: |
Tue, 19 Feb 2013 14:34:24 +0200 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:17.0) Gecko/20130110 Thunderbird/17.0.2 |
On 02/15/2013 07:46 PM, Paolo Bonzini wrote:
> This makes it possible to do blocking writes directly to the socket,
> with no buffer in the middle. For RAM, only the migration_bitmap_sync()
> call needs the iothread lock. For block migration, it is needed by
> the block layer (including bdrv_drain_all and dirty bitmap access),
> but because some code is shared between iterate and complete, all of
> mig_save_device_dirty is run with the lock taken.
>
> In the savevm case, the iterate callback runs within the big lock.
> This is annoying because it complicates the rules. Luckily we do not
> need to do anything about it: the RAM iterate callback does not need
> the iothread lock, and block migration never runs during savevm.
>
> Signed-off-by: Paolo Bonzini <address@hidden>
> ---
> arch_init.c | 4 ++++
> block-migration.c | 37 +++++++++++++++++++++++++++++++++++--
> include/migration/vmstate.h | 11 +++++++++++
> migration.c | 4 ++--
> 4 files changed, 52 insertions(+), 4 deletions(-)
>
> diff --git a/arch_init.c b/arch_init.c
> index 8da868b..adca555 100644
> --- a/arch_init.c
> +++ b/arch_init.c
> @@ -379,6 +379,8 @@ static inline bool
> migration_bitmap_set_dirty(MemoryRegion *mr,
> return ret;
> }
>
> +/* Needs iothread lock! */
> +
> static void migration_bitmap_sync(void)
> {
> RAMBlock *block;
> @@ -689,7 +691,9 @@ static uint64_t ram_save_pending(QEMUFile *f, void
> *opaque, uint64_t max_size)
> remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
>
> if (remaining_size < max_size) {
> + qemu_mutex_lock_iothread();
> migration_bitmap_sync();
> + qemu_mutex_unlock_iothread();
> remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
> }
> return remaining_size;
> diff --git a/block-migration.c b/block-migration.c
> index ea99163..143180c 100644
> --- a/block-migration.c
> +++ b/block-migration.c
> @@ -107,6 +107,10 @@ static void blk_mig_unlock(void)
> qemu_mutex_unlock(&block_mig_state.lock);
> }
>
> +/* Must run outside of the iothread lock during the bulk phase,
> + * or the VM will stall.
> + */
> +
> static void blk_send(QEMUFile *f, BlkMigBlock * blk)
> {
> int len;
> @@ -226,6 +230,8 @@ static void blk_mig_read_cb(void *opaque, int ret)
> assert(block_mig_state.submitted >= 0);
> }
>
> +/* Called with no lock taken. */
> +
> static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
> {
> int64_t total_sectors = bmds->total_sectors;
> @@ -235,11 +241,13 @@ static int mig_save_device_bulk(QEMUFile *f,
> BlkMigDevState *bmds)
> int nr_sectors;
>
> if (bmds->shared_base) {
> + qemu_mutex_lock_iothread();
> while (cur_sector < total_sectors &&
> !bdrv_is_allocated(bs, cur_sector, MAX_IS_ALLOCATED_SEARCH,
> &nr_sectors)) {
> cur_sector += nr_sectors;
> }
> + qemu_mutex_unlock_iothread();
> }
>
> if (cur_sector >= total_sectors) {
> @@ -272,15 +280,19 @@ static int mig_save_device_bulk(QEMUFile *f,
> BlkMigDevState *bmds)
> block_mig_state.submitted++;
> blk_mig_unlock();
>
> + qemu_mutex_lock_iothread();
> blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov,
> nr_sectors, blk_mig_read_cb, blk);
>
> bdrv_reset_dirty(bs, cur_sector, nr_sectors);
> - bmds->cur_sector = cur_sector + nr_sectors;
> + qemu_mutex_unlock_iothread();
>
> + bmds->cur_sector = cur_sector + nr_sectors;
> return (bmds->cur_sector >= total_sectors);
> }
>
> +/* Called with iothread lock taken. */
> +
> static void set_dirty_tracking(int enable)
> {
> BlkMigDevState *bmds;
> @@ -336,6 +348,8 @@ static void init_blk_migration(QEMUFile *f)
> bdrv_iterate(init_blk_migration_it, NULL);
> }
>
> +/* Called with no lock taken. */
> +
> static int blk_mig_save_bulked_block(QEMUFile *f)
> {
> int64_t completed_sector_sum = 0;
> @@ -382,6 +396,8 @@ static void blk_mig_reset_dirty_cursor(void)
> }
> }
>
> +/* Called with iothread lock taken. */
> +
> static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
> int is_async)
> {
> @@ -451,7 +467,9 @@ error:
> return ret;
> }
>
> -/* return value:
> +/* Called with iothread lock taken.
> + *
> + * return value:
> * 0: too much data for max_downtime
> * 1: few enough data for max_downtime
> */
> @@ -470,6 +488,8 @@ static int blk_mig_save_dirty_block(QEMUFile *f, int
> is_async)
> return ret;
> }
>
> +/* Called with no locks taken. */
> +
> static int flush_blks(QEMUFile *f)
> {
> BlkMigBlock *blk;
> @@ -509,6 +529,8 @@ static int flush_blks(QEMUFile *f)
> return ret;
> }
>
> +/* Called with iothread lock taken. */
> +
> static int64_t get_remaining_dirty(void)
> {
> BlkMigDevState *bmds;
> @@ -521,6 +543,8 @@ static int64_t get_remaining_dirty(void)
> return dirty << BDRV_SECTOR_BITS;
> }
>
> +/* Called with iothread lock taken. */
> +
> static void blk_mig_cleanup(void)
> {
> BlkMigDevState *bmds;
> @@ -600,7 +624,12 @@ static int block_save_iterate(QEMUFile *f, void *opaque)
> }
> ret = 0;
> } else {
> + /* Always called with iothread lock taken for
> + * simplicity, block_save_complete also calls it.
> + */
> + qemu_mutex_lock_iothread();
> ret = blk_mig_save_dirty_block(f, 1);
> + qemu_mutex_unlock_iothread();
> }
> if (ret < 0) {
> return ret;
> @@ -622,6 +651,8 @@ static int block_save_iterate(QEMUFile *f, void *opaque)
> return qemu_ftell(f) - last_ftell;
> }
>
> +/* Called with iothread lock taken. */
> +
> static int block_save_complete(QEMUFile *f, void *opaque)
> {
> int ret;
> @@ -665,6 +696,7 @@ static uint64_t block_save_pending(QEMUFile *f, void
> *opaque, uint64_t max_size)
> /* Estimate pending number of bytes to send */
> uint64_t pending;
>
> + qemu_mutex_lock_iothread();
> blk_mig_lock();
> pending = get_remaining_dirty() +
> block_mig_state.submitted * BLOCK_SIZE +
> @@ -675,6 +707,7 @@ static uint64_t block_save_pending(QEMUFile *f, void
> *opaque, uint64_t max_size)
> pending = BLOCK_SIZE;
> }
> blk_mig_unlock();
> + qemu_mutex_unlock_iothread();
>
> DPRINTF("Enter save live pending %" PRIu64 "\n", pending);
> return pending;
> diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
> index 6229569..5f803f5 100644
> --- a/include/migration/vmstate.h
> +++ b/include/migration/vmstate.h
> @@ -30,14 +30,25 @@ typedef void SaveStateHandler(QEMUFile *f, void *opaque);
> typedef int LoadStateHandler(QEMUFile *f, void *opaque, int version_id);
>
> typedef struct SaveVMHandlers {
> + /* This runs inside the iothread lock. */
> void (*set_params)(const MigrationParams *params, void * opaque);
> SaveStateHandler *save_state;
>
> int (*save_live_setup)(QEMUFile *f, void *opaque);
> void (*cancel)(void *opaque);
> int (*save_live_complete)(QEMUFile *f, void *opaque);
> +
> + /* This runs both outside and inside the iothread lock. */
> bool (*is_active)(void *opaque);
> +
> + /* This runs outside the iothread lock in the migration case, and
> + * within the lock in the savevm case. The callback had better only
> + * use data that is local to the migration thread or protected
> + * by other locks.
> + */
> int (*save_live_iterate)(QEMUFile *f, void *opaque);
> +
> + /* This runs outside the iothread lock! */
> uint64_t (*save_live_pending)(QEMUFile *f, void *opaque, uint64_t
> max_size);
>
> LoadStateHandler *load_state;
> diff --git a/migration.c b/migration.c
> index 8abaaea..cb7f7b4 100644
> --- a/migration.c
> +++ b/migration.c
> @@ -658,7 +658,6 @@ static void *buffered_file_thread(void *opaque)
> uint64_t pending_size;
>
> if (s->bytes_xfer < s->xfer_limit) {
> - qemu_mutex_lock_iothread();
> DPRINTF("iterate\n");
> pending_size = qemu_savevm_state_pending(s->file, max_size);
> DPRINTF("pending size %lu max %lu\n", pending_size, max_size);
> @@ -666,6 +665,7 @@ static void *buffered_file_thread(void *opaque)
> qemu_savevm_state_iterate(s->file);
> } else {
> DPRINTF("done iterating\n");
> + qemu_mutex_lock_iothread();
> start_time = qemu_get_clock_ms(rt_clock);
> qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
> old_vm_running = runstate_is_running();
> @@ -673,8 +673,8 @@ static void *buffered_file_thread(void *opaque)
> s->xfer_limit = INT_MAX;
> qemu_savevm_state_complete(s->file);
> last_round = true;
> + qemu_mutex_unlock_iothread();
> }
> - qemu_mutex_unlock_iothread();
> }
> if (current_time >= initial_time + BUFFER_DELAY) {
> uint64_t transferred_bytes = s->bytes_xfer;
>
Looks good but locking is complicated ..
Reviewed-by: Orit Wasserman <address@hidden>