[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH RFC v3 13/27] COLO RAM: Flush cached RAM into SV
From: |
Dr. David Alan Gilbert |
Subject: |
Re: [Qemu-devel] [PATCH RFC v3 13/27] COLO RAM: Flush cached RAM into SVM's memory |
Date: |
Thu, 12 Mar 2015 11:49:46 +0000 |
User-agent: |
Mutt/1.5.23 (2014-03-12) |
* zhanghailiang (address@hidden) wrote:
> On 2015/3/12 3:08, Dr. David Alan Gilbert wrote:
> >* zhanghailiang (address@hidden) wrote:
> >>We only need to flush RAM that is both dirty on PVM and SVM since
> >>last checkpoint. Besides, we must ensure flush RAM cache before load
> >>device state.
> >>
> >>Signed-off-by: zhanghailiang <address@hidden>a
> >>Signed-off-by: Lai Jiangshan <address@hidden>
> >>Signed-off-by: Li Zhijian <address@hidden>
> >>Signed-off-by: Yang Hongyang <address@hidden>
> >>Signed-off-by: Gonglei <address@hidden>
> >
> >This could do with some more comments; colo_flush_ram_cache is quite complex.
> >See below.
> >
> >>---
> >> arch_init.c | 91
> >> +++++++++++++++++++++++++++++++++++++-
> >> include/migration/migration-colo.h | 1 +
> >> migration/colo.c | 1 -
> >> 3 files changed, 91 insertions(+), 2 deletions(-)
> >>
> >>diff --git a/arch_init.c b/arch_init.c
> >>index 4a1d825..f70de23 100644
> >>--- a/arch_init.c
> >>+++ b/arch_init.c
> >>@@ -1100,6 +1100,7 @@ static int ram_load(QEMUFile *f, void *opaque, int
> >>version_id)
> >> {
> >> int flags = 0, ret = 0;
> >> static uint64_t seq_iter;
> >>+ bool need_flush = false;
> >>
> >> seq_iter++;
> >>
> >>@@ -1163,6 +1164,7 @@ static int ram_load(QEMUFile *f, void *opaque, int
> >>version_id)
> >> break;
> >> }
> >>
> >>+ need_flush = true;
> >> ch = qemu_get_byte(f);
> >> ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
> >> break;
> >>@@ -1174,6 +1176,7 @@ static int ram_load(QEMUFile *f, void *opaque, int
> >>version_id)
> >> break;
> >> }
> >>
> >>+ need_flush = true;
> >> qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
> >> break;
> >> case RAM_SAVE_FLAG_XBZRLE:
> >>@@ -1190,6 +1193,7 @@ static int ram_load(QEMUFile *f, void *opaque, int
> >>version_id)
> >> ret = -EINVAL;
> >> break;
> >> }
> >>+ need_flush = true;
> >> break;
> >> case RAM_SAVE_FLAG_EOS:
> >> /* normal exit */
> >>@@ -1207,7 +1211,10 @@ static int ram_load(QEMUFile *f, void *opaque, int
> >>version_id)
> >> ret = qemu_file_get_error(f);
> >> }
> >> }
> >>-
> >>+ if (!ret && ram_cache_enable && need_flush) {
> >>+ DPRINTF("Flush ram_cache\n");
> >>+ colo_flush_ram_cache();
> >>+ }
> >> DPRINTF("Completed load of VM with exit code %d seq iteration "
> >> "%" PRIu64 "\n", ret, seq_iter);
> >> return ret;
> >>@@ -1220,6 +1227,7 @@ static int ram_load(QEMUFile *f, void *opaque, int
> >>version_id)
> >> void create_and_init_ram_cache(void)
> >> {
> >> RAMBlock *block;
> >>+ int64_t ram_cache_pages = last_ram_offset() >> TARGET_PAGE_BITS;
> >>
> >> QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> >> block->host_cache = g_malloc(block->used_length);
> >>@@ -1227,6 +1235,14 @@ void create_and_init_ram_cache(void)
> >> }
> >>
> >> ram_cache_enable = true;
> >>+ /*
> >>+ * Start dirty log for slave VM, we will use this dirty bitmap together
> >>with
> >>+ * VM's cache RAM dirty bitmap to decide which page in cache should be
> >>+ * flushed into VM's RAM.
> >>+ */
> >>+ migration_bitmap = bitmap_new(ram_cache_pages);
> >>+ migration_dirty_pages = 0;
> >>+ memory_global_dirty_log_start();
> >> }
> >>
> >> void release_ram_cache(void)
> >>@@ -1261,6 +1277,79 @@ static void
> >>*memory_region_get_ram_cache_ptr(MemoryRegion *mr, RAMBlock *block)
> >> return block->host_cache + (addr - block->offset);
> >> }
> >>
> >>+static inline
> >>+ram_addr_t host_bitmap_find_and_reset_dirty(MemoryRegion *mr,
> >>+ ram_addr_t start)
> >>+{
> >>+ unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS;
> >>+ unsigned long nr = base + (start >> TARGET_PAGE_BITS);
> >>+ unsigned long size = base + (int128_get64(mr->size) >>
> >>TARGET_PAGE_BITS);
> >>+
> >>+ unsigned long next;
> >>+
> >>+ next = find_next_bit(ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION],
> >>+ size, nr);
> >>+ if (next < size) {
> >>+ clear_bit(next, ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION]);
> >>+ }
> >>+ return (next - base) << TARGET_PAGE_BITS;
> >>+}
> >>+
> >>+void colo_flush_ram_cache(void)
> >>+{
> >>+ RAMBlock *block = NULL;
> >>+ void *dst_host;
> >>+ void *src_host;
> >>+ ram_addr_t ca = 0, ha = 0;
> >>+ bool got_ca = 0, got_ha = 0;
> >>+ int64_t host_dirty = 0, both_dirty = 0;
> >>+
> >>+ address_space_sync_dirty_bitmap(&address_space_memory);
> >>+
> >>+ block = QTAILQ_FIRST(&ram_list.blocks);
> >>+ while (true) {
> >>+ if (ca < block->used_length && ca <= ha) {
> >>+ ca = migration_bitmap_find_and_reset_dirty(block->mr, ca);
> >>+ if (ca < block->used_length) {
> >>+ got_ca = 1;
> >>+ }
> >>+ }
> >>+ if (ha < block->used_length && ha <= ca) {
> >>+ ha = host_bitmap_find_and_reset_dirty(block->mr, ha);
> >>+ if (ha < block->used_length && ha != ca) {
> >>+ got_ha = 1;
> >>+ }
> >>+ host_dirty += (ha < block->used_length ? 1 : 0);
> >>+ both_dirty += (ha < block->used_length && ha == ca ? 1 : 0);
> >>+ }
> >>+ if (ca >= block->used_length && ha >= block->used_length) {
> >>+ ca = 0;
> >>+ ha = 0;
> >>+ block = QTAILQ_NEXT(block, next);
> >>+ if (!block) {
> >>+ break;
> >>+ }
> >>+ } else {
> >>+ if (got_ha) {
> >>+ got_ha = 0;
> >>+ dst_host = memory_region_get_ram_ptr(block->mr) + ha;
> >>+ src_host = memory_region_get_ram_cache_ptr(block->mr,
> >>block)
> >>+ + ha;
> >>+ memcpy(dst_host, src_host, TARGET_PAGE_SIZE);
> >>+ }
> >>+ if (got_ca) {
> >>+ got_ca = 0;
> >>+ dst_host = memory_region_get_ram_ptr(block->mr) + ca;
> >>+ src_host = memory_region_get_ram_cache_ptr(block->mr,
> >>block)
> >>+ + ca;
> >>+ memcpy(dst_host, src_host, TARGET_PAGE_SIZE);
> >>+ }
> >
> >Both of these cases are copying from the ram_cache to the main RAM; what
> >copies from main RAM into the RAM cache, other than
> >create_and_init_ram_cache?
> >
> >I can see create_and_init_ram_cache creates the initial copy at startup,
> >and I can see the code that feeds the memory from the PVM into the SVM via
> >the RAM cache; but don't you need to take a copy of the SVM memory before
> >you start running each checkpoint, in case the SVM changes a page that
> >the PVM didn't change (SVM dirty, PVM isn't dirty) and then when you load
> >that new checkpoint how do you restore that SVM page to be the same as the
> >PVM (i.e. the same as at the start of that checkpoint)?
> >
>
> Er, one thing is clear: after a round of checkpoint, before PVM and SVM
> continue to
> run, the memory of PVM and SVM should be completely the same, and at the same
> time,
> the content of SVM's RAM cache is SAME with both of them.
>
> During the time of VM's running, PVM/SVM may dirty some pages, we will
> transfer PVM's
> dirty pages to SVM and store them into SVM's RAM cache at next checkpoint
> time.
> So, the content of SVM's RAM cache will always be some with PVM's memory
> after checkpoint.
> Yes, we can certainly flush all content of SVM's RAM cache into SVM's MEMORY,
> to ensure
> SVM's memory same with PVM's. But, is it inefficient?
>
> The better way to do it is:
> (1) Log SVM's dirty pages
> (2) Only flush the page that either dirtied by PVM or either dirtied by SVM.
>
> >Does that rely on a previous checkpoint receiving the new page from the PVM
> >to update the ram cache?
>
> Yes, we never clean the content of ram cache during VM's colo lifecycle.
OK, yes that's more efficient than the original idea that I'd understood.
Dave
>
> >Dave
> >
> >>+ }
> >>+ }
> >>+
> >>+ assert(migration_dirty_pages == 0);
> >>+}
> >>+
> >> static SaveVMHandlers savevm_ram_handlers = {
> >> .save_live_setup = ram_save_setup,
> >> .save_live_iterate = ram_save_iterate,
> >>diff --git a/include/migration/migration-colo.h
> >>b/include/migration/migration-colo.h
> >>index 7d43aed..2084fe2 100644
> >>--- a/include/migration/migration-colo.h
> >>+++ b/include/migration/migration-colo.h
> >>@@ -36,5 +36,6 @@ void *colo_process_incoming_checkpoints(void *opaque);
> >> bool loadvm_in_colo_state(void);
> >> /* ram cache */
> >> void create_and_init_ram_cache(void);
> >>+void colo_flush_ram_cache(void);
> >> void release_ram_cache(void);
> >> #endif
> >>diff --git a/migration/colo.c b/migration/colo.c
> >>index a0e1b7a..5ff2ee8 100644
> >>--- a/migration/colo.c
> >>+++ b/migration/colo.c
> >>@@ -397,7 +397,6 @@ void *colo_process_incoming_checkpoints(void *opaque)
> >> }
> >> DPRINTF("Finish load all vm state to cache\n");
> >> qemu_mutex_unlock_iothread();
> >>- /* TODO: flush vm state */
> >>
> >> ret = colo_ctl_put(ctl, COLO_CHECKPOINT_LOADED);
> >> if (ret < 0) {
> >>--
> >>1.7.12.4
> >>
> >>
> >--
> >Dr. David Alan Gilbert / address@hidden / Manchester, UK
> >
> >.
> >
>
>
--
Dr. David Alan Gilbert / address@hidden / Manchester, UK