|
From: | Denis Plotnikov |
Subject: | Re: [PATCH v0 3/4] migration: add background snapshot |
Date: | Wed, 29 Jul 2020 15:27:44 +0300 |
User-agent: | Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Thunderbird/68.10.0 |
On 24.07.2020 01:15, Peter Xu wrote:
On Wed, Jul 22, 2020 at 11:11:32AM +0300, Denis Plotnikov wrote:+static void *background_snapshot_thread(void *opaque) +{ + MigrationState *m = opaque; + QIOChannelBuffer *bioc; + QEMUFile *fb; + int res = 0; + + rcu_register_thread(); + + qemu_file_set_rate_limit(m->to_dst_file, INT64_MAX); + + qemu_mutex_lock_iothread(); + vm_stop(RUN_STATE_PAUSED); + + qemu_savevm_state_header(m->to_dst_file); + qemu_mutex_unlock_iothread(); + qemu_savevm_state_setup(m->to_dst_file);Is it intended to skip bql for the setup phase? IIUC the main thread could start the vm before we take the lock again below if we released it...
Good point!
+ qemu_mutex_lock_iothread(); + + migrate_set_state(&m->state, MIGRATION_STATUS_SETUP, + MIGRATION_STATUS_ACTIVE); + + /* + * We want to save the vm state for the moment when the snapshot saving was + * called but also we want to write RAM content with vm running. The RAM + * content should appear first in the vmstate. + * So, we first, save non-ram part of the vmstate to the temporary, buffer, + * then write ram part of the vmstate to the migration stream with vCPUs + * running and, finally, write the non-ram part of the vmstate from the + * buffer to the migration stream. + */ + bioc = qio_channel_buffer_new(4096); + qio_channel_set_name(QIO_CHANNEL(bioc), "vmstate-buffer"); + fb = qemu_fopen_channel_output(QIO_CHANNEL(bioc)); + object_unref(OBJECT(bioc)); + + if (ram_write_tracking_start()) { + goto failed_resume; + } + + if (global_state_store()) { + goto failed_resume; + }Is this needed? We should be always in stopped state here, right?
Yes, seems it isn't needed
+ + cpu_synchronize_all_states(); + + if (qemu_savevm_state_complete_precopy_non_iterable(fb, false, false)) { + goto failed_resume; + } + + vm_start(); + qemu_mutex_unlock_iothread(); + + while (!res) { + res = qemu_savevm_state_iterate(m->to_dst_file, false); + + if (res < 0 || qemu_file_get_error(m->to_dst_file)) { + goto failed; + } + } + + /* + * By this moment we have RAM content saved into the migration stream. + * The next step is to flush the non-ram content (vm devices state) + * right after the ram content. The device state was stored in + * the temporary buffer prior to the ram saving. + */ + qemu_put_buffer(m->to_dst_file, bioc->data, bioc->usage); + qemu_fflush(m->to_dst_file); + + if (qemu_file_get_error(m->to_dst_file)) { + goto failed; + } + + migrate_set_state(&m->state, MIGRATION_STATUS_ACTIVE, + MIGRATION_STATUS_COMPLETED); + goto exit; + +failed_resume: + vm_start(); + qemu_mutex_unlock_iothread(); +failed: + migrate_set_state(&m->state, MIGRATION_STATUS_ACTIVE, + MIGRATION_STATUS_FAILED); +exit: + ram_write_tracking_stop(); + qemu_fclose(fb); + qemu_mutex_lock_iothread(); + qemu_savevm_state_cleanup(); + qemu_mutex_unlock_iothread(); + rcu_unregister_thread(); + return NULL; +} + void migrate_fd_connect(MigrationState *s, Error *error_in) { Error *local_err = NULL; @@ -3599,8 +3694,14 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) migrate_fd_cleanup(s); return; } - qemu_thread_create(&s->thread, "live_migration", migration_thread, s, - QEMU_THREAD_JOINABLE); + if (migrate_background_snapshot()) { + qemu_thread_create(&s->thread, "bg_snapshot",Maybe the name "live_snapshot" suites more (since the other one is "live_migration")?
looks like it, another good name is async_snapshot and all the related function and properties should be rename accordingly
+ background_snapshot_thread, s, + QEMU_THREAD_JOINABLE); + } else { + qemu_thread_create(&s->thread, "live_migration", migration_thread, s, + QEMU_THREAD_JOINABLE); + } s->migration_thread_running = true; }[...]@@ -1151,9 +1188,11 @@ static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, ram_counters.transferred += save_page_header(rs, rs->f, block, offset | RAM_SAVE_FLAG_PAGE); if (async) { - qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE, - migrate_release_ram() & - migration_in_postcopy()); + bool may_free = migrate_background_snapshot() || + (migrate_release_ram() && + migration_in_postcopy());Does background snapshot need to free the memory? /me confused..
Yes, for the page copies. No, for the rest of the pages.
+ + qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE, may_free); } else { qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE); }[...]+void ram_block_list_create(void) +{ + RAMBlock *block = NULL; + RamBlockList *block_list = ram_bgs_block_list_get(); + + qemu_mutex_lock_ramlist(); + RAMBLOCK_FOREACH_MIGRATABLE(block) { + memory_region_ref(block->mr); + QLIST_INSERT_HEAD(block_list, block, bgs_next); + } + qemu_mutex_unlock_ramlist();This kind of duplicate with ram_list.blocks itself...+} + +static int page_fault_fd; +static int thread_quit_fd; +static QemuThread page_fault_thread; + +static int mem_change_wp(void *addr, uint64_t length, bool protect) +{ + struct uffdio_writeprotect wp = { 0 }; + + assert(page_fault_fd); + + if (protect) { + struct uffdio_register reg = { 0 }; + + reg.mode = UFFDIO_REGISTER_MODE_WP; + reg.range.start = (uint64_t) addr; + reg.range.len = length; + + if (ioctl(page_fault_fd, UFFDIO_REGISTER, ®)) { + error_report("Can't register memeory at %p len: %"PRIu64 + " for page fault interception", addr, length); + return -1; + }IMHO it's better to move the register out of mem_change_wp(). mem_change_wp() should be in page granularity, while we should be clear in the code that the registeration is happening per-ramblock.
so, will move it
Btw, is UFFDIO_UNREGISTER missing in the whole process?
yeah
+ + wp.mode = UFFDIO_WRITEPROTECT_MODE_WP; + }[...]@@ -2338,6 +2881,11 @@ static void ram_list_init_bitmaps(void) bitmap_set(block->bmap, 0, pages); block->clear_bmap_shift = shift; block->clear_bmap = bitmap_new(clear_bmap_size(pages, shift)); + + if (migrate_background_snapshot()) { + block->touched_map = bitmap_new(pages); + block->copied_map = bitmap_new(pages); + }We should be able to avoid allocating bmap & clear_bmap for snapshots.Or we can also directly reuse the two bitmaps?
Probably, yes
[Prev in Thread] | Current Thread | [Next in Thread] |