qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH RFC v3 13/27] COLO RAM: Flush cached RAM into SV


From: Dr. David Alan Gilbert
Subject: Re: [Qemu-devel] [PATCH RFC v3 13/27] COLO RAM: Flush cached RAM into SVM's memory
Date: Wed, 11 Mar 2015 19:08:48 +0000
User-agent: Mutt/1.5.23 (2014-03-12)

* zhanghailiang (address@hidden) wrote:
> We only need to flush RAM that is both dirty on PVM and SVM since
> last checkpoint. Besides, we must ensure flush RAM cache before load
> device state.
> 
> Signed-off-by: zhanghailiang <address@hidden>a
> Signed-off-by: Lai Jiangshan <address@hidden>
> Signed-off-by: Li Zhijian <address@hidden>
> Signed-off-by: Yang Hongyang <address@hidden>
> Signed-off-by: Gonglei <address@hidden>

This could do with some more comments; colo_flush_ram_cache is quite complex.
See below.

> ---
>  arch_init.c                        | 91 
> +++++++++++++++++++++++++++++++++++++-
>  include/migration/migration-colo.h |  1 +
>  migration/colo.c                   |  1 -
>  3 files changed, 91 insertions(+), 2 deletions(-)
> 
> diff --git a/arch_init.c b/arch_init.c
> index 4a1d825..f70de23 100644
> --- a/arch_init.c
> +++ b/arch_init.c
> @@ -1100,6 +1100,7 @@ static int ram_load(QEMUFile *f, void *opaque, int 
> version_id)
>  {
>      int flags = 0, ret = 0;
>      static uint64_t seq_iter;
> +    bool need_flush = false;
>  
>      seq_iter++;
>  
> @@ -1163,6 +1164,7 @@ static int ram_load(QEMUFile *f, void *opaque, int 
> version_id)
>                  break;
>              }
>  
> +            need_flush = true;
>              ch = qemu_get_byte(f);
>              ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
>              break;
> @@ -1174,6 +1176,7 @@ static int ram_load(QEMUFile *f, void *opaque, int 
> version_id)
>                  break;
>              }
>  
> +            need_flush = true;
>              qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
>              break;
>          case RAM_SAVE_FLAG_XBZRLE:
> @@ -1190,6 +1193,7 @@ static int ram_load(QEMUFile *f, void *opaque, int 
> version_id)
>                  ret = -EINVAL;
>                  break;
>              }
> +            need_flush = true;
>              break;
>          case RAM_SAVE_FLAG_EOS:
>              /* normal exit */
> @@ -1207,7 +1211,10 @@ static int ram_load(QEMUFile *f, void *opaque, int 
> version_id)
>              ret = qemu_file_get_error(f);
>          }
>      }
> -
> +    if (!ret  && ram_cache_enable && need_flush) {
> +        DPRINTF("Flush ram_cache\n");
> +        colo_flush_ram_cache();
> +    }
>      DPRINTF("Completed load of VM with exit code %d seq iteration "
>              "%" PRIu64 "\n", ret, seq_iter);
>      return ret;
> @@ -1220,6 +1227,7 @@ static int ram_load(QEMUFile *f, void *opaque, int 
> version_id)
>  void create_and_init_ram_cache(void)
>  {
>      RAMBlock *block;
> +    int64_t ram_cache_pages = last_ram_offset() >> TARGET_PAGE_BITS;
>  
>      QTAILQ_FOREACH(block, &ram_list.blocks, next) {
>          block->host_cache = g_malloc(block->used_length);
> @@ -1227,6 +1235,14 @@ void create_and_init_ram_cache(void)
>      }
>  
>      ram_cache_enable = true;
> +    /*
> +    * Start dirty log for slave VM, we will use this dirty bitmap together 
> with
> +    * VM's cache RAM dirty bitmap to decide which page in cache should be
> +    * flushed into VM's RAM.
> +    */
> +    migration_bitmap = bitmap_new(ram_cache_pages);
> +    migration_dirty_pages = 0;
> +    memory_global_dirty_log_start();
>  }
>  
>  void release_ram_cache(void)
> @@ -1261,6 +1277,79 @@ static void 
> *memory_region_get_ram_cache_ptr(MemoryRegion *mr, RAMBlock *block)
>      return block->host_cache + (addr - block->offset);
>  }
>  
> +static inline
> +ram_addr_t host_bitmap_find_and_reset_dirty(MemoryRegion *mr,
> +                                            ram_addr_t start)
> +{
> +    unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS;
> +    unsigned long nr = base + (start >> TARGET_PAGE_BITS);
> +    unsigned long size = base + (int128_get64(mr->size) >> TARGET_PAGE_BITS);
> +
> +    unsigned long next;
> +
> +    next = find_next_bit(ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION],
> +                         size, nr);
> +    if (next < size) {
> +        clear_bit(next, ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION]);
> +    }
> +    return (next - base) << TARGET_PAGE_BITS;
> +}
> +
> +void colo_flush_ram_cache(void)
> +{
> +    RAMBlock *block = NULL;
> +    void *dst_host;
> +    void *src_host;
> +    ram_addr_t ca  = 0, ha = 0;
> +    bool got_ca = 0, got_ha = 0;
> +    int64_t host_dirty = 0, both_dirty = 0;
> +
> +    address_space_sync_dirty_bitmap(&address_space_memory);
> +
> +    block = QTAILQ_FIRST(&ram_list.blocks);
> +    while (true) {
> +        if (ca < block->used_length && ca <= ha) {
> +            ca = migration_bitmap_find_and_reset_dirty(block->mr, ca);
> +            if (ca < block->used_length) {
> +                got_ca = 1;
> +            }
> +        }
> +        if (ha < block->used_length && ha <= ca) {
> +            ha = host_bitmap_find_and_reset_dirty(block->mr, ha);
> +            if (ha < block->used_length && ha != ca) {
> +                got_ha = 1;
> +            }
> +            host_dirty += (ha < block->used_length ? 1 : 0);
> +            both_dirty += (ha < block->used_length && ha == ca ? 1 : 0);
> +        }
> +        if (ca >= block->used_length && ha >= block->used_length) {
> +            ca = 0;
> +            ha = 0;
> +            block = QTAILQ_NEXT(block, next);
> +            if (!block) {
> +                break;
> +            }
> +        } else {
> +            if (got_ha) {
> +                got_ha = 0;
> +                dst_host = memory_region_get_ram_ptr(block->mr) + ha;
> +                src_host = memory_region_get_ram_cache_ptr(block->mr, block)
> +                           + ha;
> +                memcpy(dst_host, src_host, TARGET_PAGE_SIZE);
> +            }
> +            if (got_ca) {
> +                got_ca = 0;
> +                dst_host = memory_region_get_ram_ptr(block->mr) + ca;
> +                src_host = memory_region_get_ram_cache_ptr(block->mr, block)
> +                           + ca;
> +                memcpy(dst_host, src_host, TARGET_PAGE_SIZE);
> +            }

Both of these cases are copying from the ram_cache to the main RAM; what
copies from main RAM into the RAM cache, other than create_and_init_ram_cache?

I can see create_and_init_ram_cache creates the initial copy at startup,
and I can see the code that feeds the memory from the PVM into the SVM via
the RAM cache; but don't you need to take a copy of the SVM memory before
you start running each checkpoint, in case the SVM changes a page that
the PVM didn't change (SVM dirty, PVM isn't dirty) and then when you load
that new checkpoint how do you restore that SVM page to be the same as the
PVM (i.e. the same as at the start of that checkpoint)?

Does that rely on a previous checkpoint receiving the new page from the PVM
to update the ram cache?

Dave

> +        }
> +    }
> +
> +    assert(migration_dirty_pages == 0);
> +}
> +
>  static SaveVMHandlers savevm_ram_handlers = {
>      .save_live_setup = ram_save_setup,
>      .save_live_iterate = ram_save_iterate,
> diff --git a/include/migration/migration-colo.h 
> b/include/migration/migration-colo.h
> index 7d43aed..2084fe2 100644
> --- a/include/migration/migration-colo.h
> +++ b/include/migration/migration-colo.h
> @@ -36,5 +36,6 @@ void *colo_process_incoming_checkpoints(void *opaque);
>  bool loadvm_in_colo_state(void);
>  /* ram cache */
>  void create_and_init_ram_cache(void);
> +void colo_flush_ram_cache(void);
>  void release_ram_cache(void);
>  #endif
> diff --git a/migration/colo.c b/migration/colo.c
> index a0e1b7a..5ff2ee8 100644
> --- a/migration/colo.c
> +++ b/migration/colo.c
> @@ -397,7 +397,6 @@ void *colo_process_incoming_checkpoints(void *opaque)
>          }
>          DPRINTF("Finish load all vm state to cache\n");
>          qemu_mutex_unlock_iothread();
> -        /* TODO: flush vm state */
>  
>          ret = colo_ctl_put(ctl, COLO_CHECKPOINT_LOADED);
>          if (ret < 0) {
> -- 
> 1.7.12.4
> 
> 
--
Dr. David Alan Gilbert / address@hidden / Manchester, UK



reply via email to

[Prev in Thread] Current Thread [Next in Thread]