qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH RFC 2/3] memory: implement checkpoint handling


From: Dr. David Alan Gilbert
Subject: Re: [Qemu-devel] [PATCH RFC 2/3] memory: implement checkpoint handling
Date: Mon, 16 Nov 2015 16:56:21 +0000
User-agent: Mutt/1.5.24 (2015-08-30)

* Bohdan Trach (address@hidden) wrote:
> From: Bohdan Trach <address@hidden>
> 
> This commit adds functions, which are used to work with checkpoint
> files. A new command-line option `-checkpoint` is added, which is used
> to specify the checkpoint file. Currently, MD5 function from OpenSSL is
> used to checkpoint memory.
> 
> Signed-off-by: Bohdan Trach <address@hidden>
> ---
>  arch_init.c     | 149 
> ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  configure       |   2 +
>  qemu-options.hx |   9 ++++
>  vl.c            |  12 +++++
>  4 files changed, 172 insertions(+)
> 
> diff --git a/arch_init.c b/arch_init.c
> index b8a4fb1..eda86d4 100644
> --- a/arch_init.c
> +++ b/arch_init.c
> @@ -27,6 +27,7 @@
>  #ifndef _WIN32
>  #include <sys/types.h>
>  #include <sys/mman.h>
> +#include <openssl/md5.h>
>  #endif
>  #include "config.h"
>  #include "monitor/monitor.h"
> @@ -140,6 +141,8 @@ static struct defconfig_file {
>  
>  static const uint8_t ZERO_TARGET_PAGE[TARGET_PAGE_SIZE];
>  
> +int fd_checkpoint = -1;
> +
>  int qemu_read_default_config_files(bool userconfig)
>  {
>      int ret;
> @@ -184,6 +187,30 @@ static void XBZRLE_cache_lock(void)
>          qemu_mutex_lock(&XBZRLE.lock);
>  }
>  
> +#ifdef DEBUG_ARCH_INIT
> +static char* md5s(const uint8_t *digest) {
> +    /* MD5 is 16 bytes, i.e., 32 hexadecimal digits. + 1 for trailing \0. */
> +    static const size_t size = 32 + 1;
> +    static char hex_digits[32+1];
> +
> +    /* snprintf(hex_digits, */
> +    /*          MD5_DIGEST_LENGTH+1, */
> +    /*          "%016lx", */
> +    /*          *((uint64_t*)digest)); */
> +    /* snprintf(hex_digits+MD5_DIGEST_LENGTH, */
> +    /*          MD5_DIGEST_LENGTH+1, */
> +    /*          "%016lx", */
> +    /*          *((uint64_t*)(digest+sizeof(uint64_t)))); */
> +    int digit;
> +    for (digit = 0; digit < 32; digit += 2) {
> +        snprintf(hex_digits+digit, 3, "%02x", digest[digit/2]);
> +    }
> +
> +    hex_digits[size-1] = '\0';
> +    return hex_digits;
> +}
> +#endif
> +
>  static void XBZRLE_cache_unlock(void)
>  {
>      if (migrate_use_xbzrle())
> @@ -603,6 +630,126 @@ static void migration_bitmap_sync(void)
>      }
>  }
>  
> +int uint128_compare(const void*, const void*);
> +int uint128_compare(const void* x, const void* y)
> +{
> +    return memcmp(x, y, MD5_DIGEST_LENGTH);
> +}

Is anything in qemu/int128.h  useful here?
However, as mentioned in my previous follow up,
I think you need something stronger than MD5 to stop collisions;
sha256 seems appropriate and CPUs have acceleration instructions
for it these days.

> +/* indexed by page number */
> +static uint64_t hashes_size = 0;
> +static uint64_t hashes_entries = 0;
> +static uint8_t *hashes = 0;
> +
> +static uint32_t get_page_nr(uint64_t addr) {
> +    assert((addr % TARGET_PAGE_SIZE) == 0);
> +    return (addr / TARGET_PAGE_SIZE);
> +}
> +
> +typedef struct  {
> +    uint8_t hash[MD5_DIGEST_LENGTH];
> +    uint64_t offset;
> +} hash_offset_entry;
> +
> +static uint64_t hash_offset_entries = 0;
> +static uint64_t max_hash_offset_entries;
> +static hash_offset_entry* hash_offset_array = 0;
> +static uint8_t all_zeroes_hash[MD5_DIGEST_LENGTH];
> +
> +int cmp_hash_offset_entry(const void*, const void*);
> +int cmp_hash_offset_entry(const void* a, const void* b) {

You seem to do this trick of declaring and then defining a lot;
if you need it only within a file then make it static and then you
don't need the declaration unless you use it before it's definition.
If you want to use it outside of this file then the declaration should
be in a header.

I guess this stuff should be in migration/ram.c these days?

> +    hash_offset_entry* e = (hash_offset_entry*) a;
> +    hash_offset_entry* f = (hash_offset_entry*) b;
> +
> +    return memcmp(e->hash, f->hash, MD5_DIGEST_LENGTH);
> +}
> +
> +void veecycle_init(void);
> +void veecycle_init(void) {

Nice name; but if you're using a cute name make sure that you put
a big comment to let people know what they're looking at!

> +    RAMBlock *block;
> +    block = QLIST_FIRST_RCU(&ram_list.blocks);
> +    if (block == NULL)
> +        return;
> +    assert(0 == strncmp(block->mr->name, "pc.ram", strlen("pc.ram")));

This also makes it PC specific; what about everything else?

> +    max_hash_offset_entries = hashes_entries = (ram_size / TARGET_PAGE_SIZE);
> +    DPRINTF("pages=%lu\n", hashes_entries);
> +    hashes_size = hashes_entries * MD5_DIGEST_LENGTH;
> +
> +    hashes = g_malloc(hashes_size);
> +    assert(0 != hashes);
> +    bzero(hashes, hashes_size);

Then you can use g_new0 to allocate and zero fill, but becareful, since these
things are probably quite big you might want to use one of the g_try_ 
allocators.


> +    uint8_t all_zeroes[TARGET_PAGE_SIZE];
> +    bzero(all_zeroes, TARGET_PAGE_SIZE);
> +    MD5(all_zeroes, TARGET_PAGE_SIZE, all_zeroes_hash);
> +
> +    hash_offset_array = g_malloc(max_hash_offset_entries * 
> sizeof(hash_offset_entry));
> +    assert(0 != hash_offset_array);
> +    bzero(hash_offset_array, max_hash_offset_entries * 
> sizeof(hash_offset_entry));
> +}
> +
> +void init_checksum_lookup_table(const char *checkpoint_path);
> +void init_checksum_lookup_table(const char *checkpoint_path) {
> +    int rc;
> +    uint8_t* ram;
> +    RAMBlock *block;
> +
> +    DPRINTF("ram_size=%lu\n", ram_size);
> +
> +    struct stat sb;
> +    rc = stat(checkpoint_path, &sb);
> +    if (rc == -1 && errno == ENOENT) return;
> +    assert(0 == rc);
> +
> +    block = QLIST_FIRST_RCU(&ram_list.blocks);
> +    assert(0 == strncmp(block->mr->name, "pc.ram", strlen("pc.ram")));
> +    ram = block->host;
> +    assert(block->used_length == ram_size);
> +
> +    /* Ignore checkpoint file if size is different from VM's current memory 
> size. */
> +    assert(sb.st_size == ram_size);

Why does this matter? Can't you reuse the hash of pages that are at different
locations in the stored file? e.g. a hash from an old/future boot of the same
VM or one where the page got moved but unchanged?

> +    fd_checkpoint = open(checkpoint_path, O_RDWR);
> +    assert(fd_checkpoint != -1);
> +
> +    uint64_t idx;
> +    for (idx=0; idx<ram_size; idx+=TARGET_PAGE_SIZE) {
> +        rc = read(fd_checkpoint, ram+idx, TARGET_PAGE_SIZE);
> +        assert(rc == TARGET_PAGE_SIZE);
> +        assert(hash_offset_entries < max_hash_offset_entries);
> +        MD5((unsigned char*)(ram+idx),
> +            TARGET_PAGE_SIZE,
> +            (unsigned char*)hash_offset_array[hash_offset_entries].hash);
> +
> +        hash_offset_array[hash_offset_entries].offset = idx;
> +
> +        DPRINTF("hash=%s offset=%08lx\n",
> +                md5s(hash_offset_array[hash_offset_entries].hash),
> +                hash_offset_array[hash_offset_entries].offset);
> +
> +        hash_offset_entries++;
> +    };
> +
> +    qsort(hash_offset_array, hash_offset_entries, sizeof(hash_offset_entry),
> +          cmp_hash_offset_entry);
> +}
> +
> +int is_ram_addr(void* host);
> +int is_ram_addr(void* host) {
> +    static RAMBlock *block = NULL;
> +
> +    block = QLIST_FIRST_RCU(&ram_list.blocks);
> +    assert(0 == strncmp(block->mr->name, "pc.ram", strlen("pc.ram")));
> +
> +    return (host >= memory_region_get_ram_ptr(block->mr) &&
> +            host < memory_region_get_ram_ptr(block->mr) + 
> block->used_length);
> +}
> +
> +static int is_outgoing_with_checkpoint(void) {
> +    return (fd_checkpoint != -1);
> +}
> +
>  void qmp_dump_pc_ram(const char *file, Error **errp) {
>  
>      int rc;
> @@ -869,6 +1016,8 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
>      bitmap_sync_count = 0;
>      migration_bitmap_sync_init();
>  
> +    qsort(hashes, hashes_entries, MD5_DIGEST_LENGTH, uint128_compare);
> +
>      if (migrate_use_xbzrle()) {
>          XBZRLE_cache_lock();
>          XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
> diff --git a/configure b/configure
> index 6969f6f..fd6dd23 100755
> --- a/configure
> +++ b/configure
> @@ -337,6 +337,8 @@ vhdx=""
>  quorum=""
>  numa=""
>  
> +LIBS="-lcrypto"
> +
>  # parse CC options first
>  for opt do
>    optarg=`expr "x$opt" : 'x[^=]*=\(.*\)'`
> diff --git a/qemu-options.hx b/qemu-options.hx
> index 319d971..ece4758 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -268,6 +268,15 @@ If @var{slots} and @var{maxmem} are not specified, 
> memory hotplug won't
>  be enabled and the guest startup RAM will never increase.
>  ETEXI
>  
> +DEF("checkpoint", HAS_ARG, QEMU_OPTION_checkpoint,
> +    "-checkpoint file         path to checkpoint file\n", QEMU_ARCH_ALL)
> +STEXI
> address@hidden -checkpoint @var{path}
> address@hidden -checkpoint
> +Checkpoint file to use during incoming migrations.
> +Reduces network traffic and total migration time.
> +ETEXI
> +
>  DEF("mem-path", HAS_ARG, QEMU_OPTION_mempath,
>      "-mem-path FILE  provide backing storage for guest RAM\n", QEMU_ARCH_ALL)
>  STEXI
> diff --git a/vl.c b/vl.c
> index 74c2681..d423e99 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -134,6 +134,7 @@ int display_opengl;
>  static int display_remote;
>  const char* keyboard_layout = NULL;
>  ram_addr_t ram_size;
> +const char *checkpoint_path = NULL;
>  const char *mem_path = NULL;
>  int mem_prealloc = 0; /* force preallocation of physical target memory */
>  bool enable_mlock = false;
> @@ -2643,6 +2644,9 @@ out:
>      return 0;
>  }
>  
> +void init_checksum_lookup_table(const char *checkpoint_path);
> +void veecycle_init(void);
> +
>  static void set_memory_options(uint64_t *ram_slots, ram_addr_t *maxram_size)
>  {
>      uint64_t sz;
> @@ -3116,6 +3120,9 @@ int main(int argc, char **argv, char **envp)
>                  }
>                  break;
>  #endif
> +            case QEMU_OPTION_checkpoint:
> +                checkpoint_path = optarg;
> +                break;
>              case QEMU_OPTION_mempath:
>                  mem_path = optarg;
>                  break;
> @@ -4331,6 +4338,7 @@ int main(int argc, char **argv, char **envp)
>          }
>      }
>  
> +    veecycle_init();
>      qdev_prop_check_globals();
>      if (vmstate_dump_file) {
>          /* dump and exit */
> @@ -4339,6 +4347,10 @@ int main(int argc, char **argv, char **envp)
>      }
>  
>      if (incoming) {
> +        if (checkpoint_path) {
> +            init_checksum_lookup_table(checkpoint_path);
> +        }
> +
>          Error *local_err = NULL;
>          qemu_start_incoming_migration(incoming, &local_err);
>          if (local_err) {
> -- 
> 2.0.5
> 

Dave

> 
--
Dr. David Alan Gilbert / address@hidden / Manchester, UK



reply via email to

[Prev in Thread] Current Thread [Next in Thread]