qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH v2] XBRLE page delta compression for live migration


From: Shribman, Aidan
Subject: [Qemu-devel] [PATCH v2] XBRLE page delta compression for live migration of large memory apps
Date: Wed, 6 Jul 2011 14:01:58 +0200

Subject: [PATCH v2] XBRLE page delta compression for live migration of large memory apps
From: Aidan Shribman <address@hidden>
 
By using XBRLE (Xor Based Run-Length-Encoding) we can reduce VM downtime and
total live-migration time for VMs running memory write intensive workloads
typical of large enterprise applications.
 
On the sender side pages are cached (in a 2-way cache), and on memory update
in XBRLE representation of the page update is encoded.  On the receiver side
the XBRLE update is applied to the existing (old) page to form the new page.
 
XBRLE does not suite all scenarios but has been proven highly beneficial in
for VMs running payloads such as: SAP ERP systems; VLC transcoding; LMbench
memory write benchmarks.
 
Work is based on research results published VEE 2011: Evaluation of Delta
Compression Techniques for Efficient Live Migration of Large Virtual Machines
by Benoit, Svard, Tordsson and Elmroth.
 
A typical usage scenario:
    {qemu} migrate_set_cachesize 256m
    {qemu} migrate -x -d tcp:destination.host:4444 
    {qemu} info migrate
    ...
    transferred ram-duplicate: A kbytes
    transferred ram-duplicate: B pages
    transferred ram-normal: C kbytes
    transferred ram-normal: D pages
    transferred ram-xbrle: E kbytes
    transferred ram-xbrle: F pages
    overflow ram-xbrle: G pages
    cache-miss ram-xbrle: H pages
 
Testing: live migration with and without xbrle.
 
A simple synthetic memory r/w load generator:
..    unsigned char *buf = (char *) calloc(64, 4096);
..    while (1) {
..        unsigned long i;
..        for (i = 0; i < 64; i++) {
..            buf[i * 4096 + 1234 + i] += (unsigned char) i;
..        }
..    }
 
Signed-off-by: Benoit Hudzia <address@hidden>
Signed-off-by: Petter Svard <address@hidden>
Signed-off-by: Aidan Shribman <address@hidden>
 
---
 
arch_init.c       |  549 +++++++++++++++++++++++++++++++++++++++++++++++++----
block-migration.c |    3 +-
hmp-commands.hx   |   36 +++-
hw/hw.h           |    3 +-
migration-exec.c  |    6 +-
migration-fd.c    |    6 +-
migration-tcp.c   |    6 +-
migration-unix.c  |    6 +-
migration.c       |  109 ++++++++++-
migration.h       |   25 ++-
qmp-commands.hx   |   43 ++++-
savevm.c          |   13 +-
sysemu.h          |   12 +-
13 files changed, 737 insertions(+), 80 deletions(-)
 
diff --git a/arch_init.c b/arch_init.c
old mode 100644
new mode 100755
index 4486925..2fddea9
--- a/arch_init.c
+++ b/arch_init.c
@@ -27,6 +27,7 @@
#include <sys/types.h>
#include <sys/mman.h>
#endif
+#include <assert.h>
#include "config.h"
#include "monitor.h"
#include "sysemu.h"
@@ -41,6 +42,15 @@
#include "gdbstub.h"
#include "hw/smbios.h"
 
+//#define DEBUG_ARCH_INIT
+#ifdef DEBUG_ARCH_INIT
+#define DPRINTF(fmt, ...) \
+    do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(fmt, ...) \
+    do { } while (0)
+#endif
+
#ifdef TARGET_SPARC
int graphic_width = 1024;
int graphic_height = 768;
@@ -88,6 +98,375 @@ const uint32_t arch_type = QEMU_ARCH;
#define RAM_SAVE_FLAG_PAGE     0x08
#define RAM_SAVE_FLAG_EOS      0x10
#define RAM_SAVE_FLAG_CONTINUE 0x20
+#define RAM_SAVE_FLAG_XBRLE    0x40
+
+/***********************************************************/
+/* Page cache for storing previous pages as basis for XBRLE compression */
+#define CACHE_N_WAY 2 /* 2-way assossiative cache */
+
+typedef struct CacheItem {
+    ram_addr_t it_addr;
+    unsigned long it_age;
+    uint8_t *it_data;
+} CacheItem;
+
+typedef struct CacheBucket {
+    CacheItem bkt_item[CACHE_N_WAY];
+} CacheBucket;
+
+static CacheBucket *page_cache;
+static int64_t cache_num_buckets;
+static uint64_t cache_max_item_age;
+static int64_t cache_num_items;
+
+static void cache_init(ssize_t num_buckets);
+static void cache_fini(void);
+static int cache_is_cached(ram_addr_t addr);
+static int cache_get_oldest(CacheBucket *buck);
+static int cache_get_newest(CacheBucket *buck, ram_addr_t addr);
+static void cache_insert(ram_addr_t id, uint8_t *pdata);
+static unsigned long cache_get_cache_pos(ram_addr_t address);
+static CacheItem *cache_item_get(unsigned long pos, int item);
+
+/***********************************************************/
+/* RAM Migration State */
+typedef struct ArchMigrationState {
+    int use_xbrle;
+    int64_t xbrle_cache_size;
+} ArchMigrationState;
+
+static ArchMigrationState arch_mig_state;
+
+void arch_set_params(int blk_enable, int shared_base, int use_xbrle,
+        int64_t xbrle_cache_size, void *opaque)
+{
+    arch_mig_state.use_xbrle = use_xbrle;
+    arch_mig_state.xbrle_cache_size = xbrle_cache_size;
+}
+
+/***********************************************************/
+/* XBRLE (Xor Based Run-Length Encoding) */
+typedef struct XBRLEHeader {
+    uint8_t xh_flags;
+    uint16_t xh_len;
+    uint32_t xh_cksum;
+} XBRLEHeader;
+
+static int rle_encode(uint8_t *src, int slen, uint8_t *dst, int dlen);
+static int rle_decode(uint8_t *src, int slen, uint8_t *dst, int dlen);
+
+/***********************************************************/
+/* accounting */
+typedef struct AccountingInfo{
+    uint64_t dup_pages;
+    uint64_t norm_pages;
+    uint64_t xbrle_bytes;
+    uint64_t xbrle_pages;
+    uint64_t xbrle_overflow;
+    uint64_t xbrle_cache_miss;
+    uint64_t iterations;
+} AccountingInfo;
+
+static AccountingInfo acct_info;
+
+static void acct_clear(void)
+{
+    bzero(&acct_info, sizeof(acct_info));
+}
+
+uint64_t dup_mig_bytes_transferred(void)
+{
+    return acct_info.dup_pages;
+}
+
+uint64_t dup_mig_pages_transferred(void)
+{
+    return acct_info.dup_pages;
+}
+
+uint64_t norm_mig_bytes_transferred(void)
+{
+    return acct_info.norm_pages * TARGET_PAGE_SIZE;
+}
+
+uint64_t norm_mig_pages_transferred(void)
+{
+    return acct_info.norm_pages;
+}
+
+uint64_t xbrle_mig_bytes_transferred(void)
+{
+    return acct_info.xbrle_bytes;
+}
+
+uint64_t xbrle_mig_pages_transferred(void)
+{
+    return acct_info.xbrle_pages;
+}
+
+uint64_t xbrle_mig_pages_overflow(void)
+{
+    return acct_info.xbrle_overflow;
+}
+
+uint64_t xbrle_mig_pages_cache_miss(void)
+{
+    return acct_info.xbrle_cache_miss;
+}
+
+/***********************************************************/
+/* XBRLE page cache implementation */
+static CacheItem *cache_item_get(unsigned long pos, int item)
+{
+    assert(page_cache);
+    return &page_cache[pos].bkt_item[item];
+}
+
+static void cache_init(int64_t num_bytes)
+{
+    int i;
+
+    cache_num_items = 0;
+    cache_max_item_age = 0;
+    cache_num_buckets = num_bytes / (TARGET_PAGE_SIZE * CACHE_N_WAY);
+    assert(cache_num_buckets);
+    DPRINTF("Setting cache buckets to %lu\n", cache_num_buckets);
+
+    assert(!page_cache);
+    page_cache = (CacheBucket *)qemu_mallocz((cache_num_buckets) *
+            sizeof(CacheBucket));
+
+    for (i = 0; i < cache_num_buckets; i++) {
+        int j;
+        for (j = 0; j < CACHE_N_WAY; j++) {
+            CacheItem *it = cache_item_get(i, j);
+            it->it_data = NULL;
+            it->it_age = 0;
+            it->it_addr = -1;
+        }
+    }
+}
+
+static void cache_fini(void)
+{
+    int i;
+
+    assert(page_cache);
+
+    for (i = 0; i < cache_num_buckets; i++) {
+        int j;
+        for (j = 0; j < CACHE_N_WAY; j++) {
+            CacheItem *it = cache_item_get(i, j);
+            qemu_free(it->it_data);
+            it->it_data = 0;
+        }
+    }
+
+    qemu_free(page_cache);
+    page_cache = NULL;
+}
+
+static unsigned long cache_get_cache_pos(ram_addr_t address)
+{
+    unsigned long pos;
+
+    assert(cache_num_buckets);
+    pos = (address/TARGET_PAGE_SIZE) & (cache_num_buckets - 1);
+    return pos;
+}
+
+static int cache_get_newest(CacheBucket *buck, ram_addr_t addr)
+{
+    unsigned long big = 0;
+    int big_pos = -1;
+    int j;
+
+    assert(page_cache);
+
+    for (j = 0; j < CACHE_N_WAY; j++) {
+        CacheItem *it = &buck->bkt_item[j];
+
+        if (it->it_addr != addr) {
+            continue;
+        }
+
+        if (!j || it->it_age > big) {
+            big = it->it_age;
+            big_pos = j;
+        }
+    }
+
+    return big_pos;
+}
+
+static int cache_get_oldest(CacheBucket *buck)
+{
+    unsigned long small = 0;
+    int small_pos = -1;
+    int j;
+
+    assert(page_cache);
+
+    for (j = 0; j < CACHE_N_WAY; j++) {
+        CacheItem *it = &buck->bkt_item[j];
+
+        if (!j || it->it_age <  small) {
+            small = it->it_age;
+            small_pos = j;
+        }
+    }
+
+    return small_pos;
+}
+
+static int cache_is_cached(ram_addr_t addr)
+{
+    unsigned long pos = cache_get_cache_pos(addr);
+
+    assert(page_cache);
+    CacheBucket *bucket = &page_cache[pos];
+    return cache_get_newest(bucket, addr);
+}
+
+static void cache_insert(unsigned long addr, uint8_t *pdata)
+{
+    unsigned long pos;
+    int slot = -1;
+    CacheBucket *bucket;
+
+    pos = cache_get_cache_pos(addr);
+    assert(page_cache);
+    bucket = &page_cache[pos];
+    slot = cache_get_oldest(bucket); /* evict LRU */
+
+    /* actual update of entry */
+    CacheItem *it = cache_item_get(pos, slot);
+    if (!it->it_data) {
+        cache_num_items++;
+    }
+    qemu_free(it->it_data);
+    it->it_data = pdata;
+    it->it_age = ++cache_max_item_age;
+    it->it_addr = addr;
+}
+
+/* XBRLE (Xor Based Run-Length Encoding) */
+static int rle_encode(uint8_t *src, int slen, uint8_t *dst, int dlen)
+{
+    int d = 0, ch_run = 0, i;
+    uint8_t prev, ch;
+
+    for (i = 0; i <= slen; i++) {
+        if (i != slen) {
+            ch = src[i];
+        }
+
+        if (!i || (i != slen && ch == prev && ch_run < 255)) {
+            ch_run++;
+        } else {
+            if (d+2 > dlen)
+                return -1;
+            *dst++ = ch_run;
+            *dst++ = prev;
+            d += 2;
+            ch_run = 1;
+        }
+
+        prev = ch;
+    }
+    return d;
+}
+
+static int rle_decode(uint8_t *src, int slen, uint8_t *dst, int dlen)
+{
+    int d = 0, s;
+
+    for (s = 0; s < slen-1; s += 2) {
+        uint8_t ch_run = src[s];
+        uint8_t ch = src[s+1];
+        while (ch_run--) {
+            if (d == dlen) {
+                return -1;
+            }
+            dst[d] = ch;
+            d++;
+        }
+    }
+    return d;
+}
+
+static void xor_encode(uint8_t *dst, uint8_t *src1, uint8_t *src2)
+{
+    int i;
+
+    for (i = 0; i < TARGET_PAGE_SIZE; i++) {
+        dst[i] = src1[i] ^ src2[i];
+    }
+}
+
+static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
+        int cont, int flag)
+{
+        qemu_put_be64(f, offset | cont | flag);
+        if (!cont) {
+                qemu_put_byte(f, strlen(block->idstr));
+                qemu_put_buffer(f, (uint8_t *)block->idstr,
+                                strlen(block->idstr));
+        }
+}
+
+#define ENCODING_FLAG_XBRLE 0x1
+
+static int save_xbrle_page(QEMUFile *f, uint8_t *current_data,
+        ram_addr_t current_addr, RAMBlock *block, ram_addr_t offset, int cont)
+{
+    int cache_location = -1, slot = -1, encoded_len = 0, bytes_sent = 0;
+    XBRLEHeader hdr = {0};
+    CacheItem *it;
+    uint8_t *xor_buf = NULL, *xbrle_buf = NULL;
+
+    /* get location */
+    slot = cache_is_cached(current_addr);
+    if (slot == -1) {
+        acct_info.xbrle_cache_miss++;
+        goto done;
+    }
+    cache_location = cache_get_cache_pos(current_addr);
+
+    /* abort if page changed too much */
+    it = cache_item_get(cache_location, slot);
+
+    /* XOR encoding */
+    xor_buf = (uint8_t *) qemu_mallocz(TARGET_PAGE_SIZE);
+    xor_encode(xor_buf, it->it_data, current_data);
+
+    /* XBRLE (XOR+RLE) encoding (if we can ensure a 1/3 ratio) */
+    xbrle_buf = (uint8_t *) qemu_mallocz(TARGET_PAGE_SIZE);
+    encoded_len = rle_encode(xor_buf, TARGET_PAGE_SIZE, xbrle_buf,
+            TARGET_PAGE_SIZE/3);
+
+    if (encoded_len < 0) {
+        DPRINTF("XBRLE encoding oeverflow - sending uncompressed\n");
+        acct_info.xbrle_overflow++;
+        goto done;
+    }
+
+    hdr.xh_len = encoded_len;
+    hdr.xh_flags |= ENCODING_FLAG_XBRLE;
+
+    /* Send XBRLE compressed page */
+    save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBRLE);
+    qemu_put_buffer(f, (uint8_t *) &hdr, sizeof(hdr));
+    qemu_put_buffer(f, xbrle_buf, encoded_len);
+    acct_info.xbrle_pages++;
+    bytes_sent = encoded_len + sizeof(hdr);
+    acct_info.xbrle_bytes += bytes_sent;
+
+done:
+    qemu_free(xor_buf);
+    qemu_free(xbrle_buf);
+    return bytes_sent;
+}
 
static int is_dup_page(uint8_t *page, uint8_t ch)
{
@@ -107,7 +486,7 @@ static int is_dup_page(uint8_t *page, uint8_t ch)
static RAMBlock *last_block;
static ram_addr_t last_offset;
 
-static int ram_save_block(QEMUFile *f)
+static int ram_save_block(QEMUFile *f, int stage)
{
     RAMBlock *block = last_block;
     ram_addr_t offset = last_offset;
@@ -128,28 +507,27 @@ static int ram_save_block(QEMUFile *f)
                                             current_addr + TARGET_PAGE_SIZE,
                                             MIGRATION_DIRTY_FLAG);
 
-            p = block->host + offset;
+            p = qemu_mallocz(TARGET_PAGE_SIZE);
+            memcpy(p, block->host + offset, TARGET_PAGE_SIZE);
 
             if (is_dup_page(p, *p)) {
-                qemu_put_be64(f, offset | cont | RAM_SAVE_FLAG_COMPRESS);
-                if (!cont) {
-                    qemu_put_byte(f, strlen(block->idstr));
-                    qemu_put_buffer(f, (uint8_t *)block->idstr,
-                                    strlen(block->idstr));
-                }
+                save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_COMPRESS);
                 qemu_put_byte(f, *p);
                 bytes_sent = 1;
-            } else {
-                qemu_put_be64(f, offset | cont | RAM_SAVE_FLAG_PAGE);
-                if (!cont) {
-                    qemu_put_byte(f, strlen(block->idstr));
-                    qemu_put_buffer(f, (uint8_t *)block->idstr,
-                                    strlen(block->idstr));
-                }
+                acct_info.dup_pages++;
+            } else if (stage == 2 && arch_mig_state.use_xbrle) {
+                bytes_sent = save_xbrle_page(f, p, current_addr, block,
+                    offset, cont);
+            }
+            if (!bytes_sent) {
+                save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
                 qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
                 bytes_sent = TARGET_PAGE_SIZE;
+                acct_info.norm_pages++;
+            }
+            if (arch_mig_state.use_xbrle) {
+               cache_insert(current_addr, p);
             }
-
             break;
         }
 
@@ -221,6 +599,9 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
 
     if (stage < 0) {
         cpu_physical_memory_set_dirty_tracking(0);
+        if (arch_mig_state.use_xbrle) {
+            cache_fini();
+        }
         return 0;
     }
 
@@ -235,6 +616,11 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
         last_block = NULL;
         last_offset = 0;
 
+        if (arch_mig_state.use_xbrle) {
+            cache_init(arch_mig_state.xbrle_cache_size);
+            acct_clear();
+        }
+
         /* Make sure all dirty bits are set */
         QLIST_FOREACH(block, &ram_list.blocks, next) {
             for (addr = block->offset; addr < block->offset + block->length;
@@ -264,8 +650,9 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
     while (!qemu_file_rate_limit(f)) {
         int bytes_sent;
 
-        bytes_sent = ram_save_block(f);
+        bytes_sent = ram_save_block(f, stage);
         bytes_transferred += bytes_sent;
+        acct_info.iterations++;
         if (bytes_sent == 0) { /* no more blocks */
             break;
         }
@@ -285,19 +672,71 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
         int bytes_sent;
 
         /* flush all remaining blocks regardless of rate limiting */
-        while ((bytes_sent = ram_save_block(f)) != 0) {
+        while ((bytes_sent = ram_save_block(f, stage))) {
             bytes_transferred += bytes_sent;
         }
         cpu_physical_memory_set_dirty_tracking(0);
+        if (arch_mig_state.use_xbrle) {
+            cache_fini();
+        }
     }
 
     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
 
     expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;
 
+    DPRINTF("ram_save_live: expected(%ld) <= max(%ld)?\n", expected_time,
+        migrate_max_downtime());
+
     return (stage == 2) && (expected_time <= migrate_max_downtime());
}
 
+static int load_xbrle(QEMUFile *f, ram_addr_t addr, void *host)
+{
+    int ret, rc = -1;
+    uint8_t *prev_page, *xor_buf, *xbrle_buf;
+    XBRLEHeader hdr = {0};
+
+    /* extract RLE header */
+    qemu_get_buffer(f, (uint8_t *) &hdr, sizeof(hdr));
+    if (!(hdr.xh_flags & ENCODING_FLAG_XBRLE)) {
+        fprintf(stderr, "Failed to load XBRLE page - wrong compression!\n");
+        goto done;
+    }
+
+    if (hdr.xh_len > TARGET_PAGE_SIZE) {
+        fprintf(stderr, "Failed to load XBRLE page - len overflow!\n");
+        goto done;
+    }
+
+    /* load data and decode */
+    xbrle_buf = (uint8_t *) qemu_mallocz(TARGET_PAGE_SIZE);
+    qemu_get_buffer(f, xbrle_buf, hdr.xh_len);
+
+    /* decode RLE */
+    xor_buf = (uint8_t *) qemu_mallocz(TARGET_PAGE_SIZE);
+    ret = rle_decode(xbrle_buf, hdr.xh_len, xor_buf, TARGET_PAGE_SIZE);
+    if (ret == -1) {
+        fprintf(stderr, "Failed to load XBRLE page - decode error!\n");
+        goto done;
+    }
+
+    if (ret != TARGET_PAGE_SIZE) {
+        fprintf(stderr, "Failed to load XBRLE page - size %d expected %d!\n",
+            ret, TARGET_PAGE_SIZE);
+        goto done;
+    }
+
+    /* decode XOR delta */
+    prev_page = host;
+    xor_encode(prev_page, prev_page, xor_buf);
+    rc = 0;
+done:
+    qemu_free(xor_buf);
+    qemu_free(xbrle_buf);
+    return rc;
+}
+
static inline void *host_from_stream_offset(QEMUFile *f,
                                             ram_addr_t offset,
                                             int flags)
@@ -328,16 +767,38 @@ static inline void *host_from_stream_offset(QEMUFile *f,
     return NULL;
}
 
+static inline void *host_from_stream_offset_versioned(int version_id,
+        QEMUFile *f, ram_addr_t offset, int flags)
+{
+        void *host;
+        if (version_id == 3) {
+                host = qemu_get_ram_ptr(offset);
+        } else {
+                host = host_from_stream_offset(f, offset, flags);
+        }
+        if (!host) {
+            fprintf(stderr, "Failed to convert RAM address to host"
+                    " for offset 0x%lX!\n", offset);
+            abort();
+        }
+        return host;
+}
+
int ram_load(QEMUFile *f, void *opaque, int version_id)
{
     ram_addr_t addr;
-    int flags;
+    int flags, ret = 0;
+    static uint64_t seq_iter;
+
+    seq_iter++;
 
     if (version_id < 3 || version_id > 4) {
-        return -EINVAL;
+        ret = -EINVAL;
+        goto done;
     }
 
     do {
+        void *host;
         addr = qemu_get_be64(f);
 
         flags = addr & ~TARGET_PAGE_MASK;
@@ -346,7 +807,8 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
         if (flags & RAM_SAVE_FLAG_MEM_SIZE) {
             if (version_id == 3) {
                 if (addr != ram_bytes_total()) {
-                    return -EINVAL;
+                    ret = -EINVAL;
+                    goto done;
                 }
             } else {
                 /* Synchronize RAM block list */
@@ -365,8 +827,10 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
 
                     QLIST_FOREACH(block, &ram_list.blocks, next) {
                         if (!strncmp(id, block->idstr, sizeof(id))) {
-                            if (block->length != length)
-                                return -EINVAL;
+                            if (block->length != length) {
+                                ret = -EINVAL;
+                                goto done;
+                            }
                             break;
                         }
                     }
@@ -374,7 +838,8 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
                     if (!block) {
                         fprintf(stderr, "Unknown ramblock \"%s\", cannot "
                                 "accept migration\n", id);
-                        return -EINVAL;
+                        ret = -EINVAL;
+                        goto done;
                     }
 
                     total_ram_bytes -= length;
@@ -383,17 +848,10 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
         }
 
         if (flags & RAM_SAVE_FLAG_COMPRESS) {
-            void *host;
             uint8_t ch;
 
-            if (version_id == 3)
-                host = qemu_get_ram_ptr(addr);
-            else
-                host = host_from_stream_offset(f, addr, flags);
-            if (!host) {
-                return -EINVAL;
-            }
-
+            host = host_from_stream_offset_versioned(version_id,
+                            f, addr, flags);
             ch = qemu_get_byte(f);
             memset(host, ch, TARGET_PAGE_SIZE);
#ifndef _WIN32
@@ -403,21 +861,28 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
             }
#endif
         } else if (flags & RAM_SAVE_FLAG_PAGE) {
-            void *host;
-
-            if (version_id == 3)
-                host = qemu_get_ram_ptr(addr);
-            else
-                host = host_from_stream_offset(f, addr, flags);
-
+            host = host_from_stream_offset_versioned(version_id,
+                            f, addr, flags);
             qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
+        } else if (flags & RAM_SAVE_FLAG_XBRLE) {
+            host = host_from_stream_offset_versioned(version_id,
+                            f, addr, flags);
+            if (load_xbrle(f, addr, host) < 0) {
+                ret = -EINVAL;
+                goto done;
+            }
         }
+
         if (qemu_file_has_error(f)) {
-            return -EIO;
+            ret = -EIO;
+            goto done;
         }
     } while (!(flags & RAM_SAVE_FLAG_EOS));
 
-    return 0;
+done:
+    DPRINTF("Completed load of VM with exit code %d seq iteration %ld\n",
+            ret, seq_iter);
+    return ret;
}
 
void qemu_service_io(void)
diff --git a/block-migration.c b/block-migration.c
index 3e66f49..504df70 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -689,7 +689,8 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
     return 0;
}
 
-static void block_set_params(int blk_enable, int shared_base, void *opaque)
+static void block_set_params(int blk_enable, int shared_base,
+        int use_xbrle, int64_t xbrle_cache_size, void *opaque)
{
     block_mig_state.blk_enable = blk_enable;
     block_mig_state.shared_base = shared_base;
diff --git a/hmp-commands.hx b/hmp-commands.hx
old mode 100644
new mode 100755
index e5585ba..e49d5be
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -717,24 +717,27 @@ ETEXI
 
     {
         .name       = "migrate",
-        .args_type  = "detach:-d,blk:-b,inc:-i,uri:s",
-        .params     = "[-d] [-b] [-i] uri",
-        .help       = "migrate to URI (using -d to not wait for completion)"
-                     "\n\t\t\t -b for migration without shared storage with"
-                     " full copy of disk\n\t\t\t -i for migration without "
-                     "shared storage with incremental copy of disk "
-                     "(base image shared between src and destination)",
+        .args_type  = "detach:-d,blk:-b,inc:-i,xbrle:-x,uri:s",
+        .params     = "[-d] [-b] [-i] [-x] uri",
+        .help       = "migrate to URI"
+                      "\n\t -d to not wait for completion"
+                      "\n\t -b for migration without shared storage with"
+                      " full copy of disk"
+                      "\n\t -i for migration without"
+                      " shared storage with incremental copy of disk"
+                      " (base image shared between source and destination)"
+                      "\n\t -x to use XBRLE page delta compression",
         .user_print = monitor_user_noop,      
         .mhandler.cmd_new = do_migrate,
     },
 
-
STEXI
address@hidden migrate [-d] [-b] [-i] @var{uri}
+@item migrate [-d] [-b] [-i] [-x] @var{uri}
@findex migrate
Migrate to @var{uri} (using -d to not wait for completion).
         -b for migration with full copy of disk
         -i for migration with incremental copy of disk (base image is shared)
+    -x to use XBRLE page delta compression
ETEXI
 
     {
@@ -753,10 +756,23 @@ Cancel the current VM migration.
ETEXI
 
     {
+        .name       = "migrate_set_cachesize",
+        .args_type  = "value:s",
+        .params     = "value",
+        .help       = "set cache size (in MB) for XBRLE migrations",
+        .mhandler.cmd = do_migrate_set_cachesize,
+    },
+
+STEXI
+@item migrate_set_cachesize @var{value}
+Set cache size (in MB) for xbrle migrations.
+ETEXI
+
+    {
         .name       = "migrate_set_speed",
         .args_type  = "value:o",
         .params     = "value",
-        .help       = "set maximum speed (in bytes) for migrations. "
+        .help       = "set maximum XBRLE cache size (in bytes) for migrations. "
         "Defaults to MB if no size suffix is specified, ie. B/K/M/G/T",
         .user_print = monitor_user_noop,
         .mhandler.cmd_new = do_migrate_set_speed,
diff --git a/hw/hw.h b/hw/hw.h
index 9d2cfc2..aa336ec 100644
--- a/hw/hw.h
+++ b/hw/hw.h
@@ -239,7 +239,8 @@ static inline void qemu_get_sbe64s(QEMUFile *f, int64_t *pv)
int64_t qemu_ftell(QEMUFile *f);
int64_t qemu_fseek(QEMUFile *f, int64_t pos, int whence);
 
-typedef void SaveSetParamsHandler(int blk_enable, int shared, void * opaque);
+typedef void SaveSetParamsHandler(int blk_enable, int shared,
+        int use_xbrle, int64_t xbrle_cache_size, void *opaque);
typedef void SaveStateHandler(QEMUFile *f, void *opaque);
typedef int SaveLiveStateHandler(Monitor *mon, QEMUFile *f, int stage,
                                  void *opaque);
diff --git a/migration-exec.c b/migration-exec.c
index 14718dd..fe8254a 100644
--- a/migration-exec.c
+++ b/migration-exec.c
@@ -67,7 +67,9 @@ MigrationState *exec_start_outgoing_migration(Monitor *mon,
                                               int64_t bandwidth_limit,
                                               int detach,
                                               int blk,
-                                             int inc)
+                          int inc,
+                          int use_xbrle,
+                          int64_t xbrle_cache_size)
{
     FdMigrationState *s;
     FILE *f;
@@ -99,6 +101,8 @@ MigrationState *exec_start_outgoing_migration(Monitor *mon,
 
     s->mig_state.blk = blk;
     s->mig_state.shared = inc;
+    s->mig_state.use_xbrle = use_xbrle;
+    s->mig_state.xbrle_cache_size = xbrle_cache_size;
 
     s->state = MIG_STATE_ACTIVE;
     s->mon = NULL;
diff --git a/migration-fd.c b/migration-fd.c
index 6d14505..4a1ddbd 100644
--- a/migration-fd.c
+++ b/migration-fd.c
@@ -56,7 +56,9 @@ MigrationState *fd_start_outgoing_migration(Monitor *mon,
                                             int64_t bandwidth_limit,
                                             int detach,
                                             int blk,
-                                           int inc)
+                        int inc,
+                        int use_xbrle,
+                        int64_t xbrle_cache_size)
{
     FdMigrationState *s;
 
@@ -82,6 +84,8 @@ MigrationState *fd_start_outgoing_migration(Monitor *mon,
 
     s->mig_state.blk = blk;
     s->mig_state.shared = inc;
+    s->mig_state.use_xbrle = use_xbrle;
+    s->mig_state.xbrle_cache_size = xbrle_cache_size;
 
     s->state = MIG_STATE_ACTIVE;
     s->mon = NULL;
diff --git a/migration-tcp.c b/migration-tcp.c
index b55f419..4ca5bf6 100644
--- a/migration-tcp.c
+++ b/migration-tcp.c
@@ -81,7 +81,9 @@ MigrationState *tcp_start_outgoing_migration(Monitor *mon,
                                              int64_t bandwidth_limit,
                                              int detach,
                                              int blk,
-                                            int inc)
+                         int inc,
+                         int use_xbrle,
+                         int64_t xbrle_cache_size)
{
     struct sockaddr_in addr;
     FdMigrationState *s;
@@ -101,6 +103,8 @@ MigrationState *tcp_start_outgoing_migration(Monitor *mon,
 
     s->mig_state.blk = blk;
     s->mig_state.shared = inc;
+    s->mig_state.use_xbrle = use_xbrle;
+    s->mig_state.xbrle_cache_size = xbrle_cache_size;
 
     s->state = MIG_STATE_ACTIVE;
     s->mon = NULL;
diff --git a/migration-unix.c b/migration-unix.c
index 57232c0..0813902 100644
--- a/migration-unix.c
+++ b/migration-unix.c
@@ -80,7 +80,9 @@ MigrationState *unix_start_outgoing_migration(Monitor *mon,
                                               int64_t bandwidth_limit,
                                               int detach,
                                               int blk,
-                                             int inc)
+                          int inc,
+                          int use_xbrle,
+                          int64_t xbrle_cache_size)
{
     FdMigrationState *s;
     struct sockaddr_un addr;
@@ -100,6 +102,8 @@ MigrationState *unix_start_outgoing_migration(Monitor *mon,
 
     s->mig_state.blk = blk;
     s->mig_state.shared = inc;
+    s->mig_state.use_xbrle = use_xbrle;
+    s->mig_state.xbrle_cache_size = xbrle_cache_size;
 
     s->state = MIG_STATE_ACTIVE;
     s->mon = NULL;
diff --git a/migration.c b/migration.c
old mode 100644
new mode 100755
index 9ee8b17..02e58b9
--- a/migration.c
+++ b/migration.c
@@ -34,6 +34,11 @@
/* Migration speed throttling */
static uint32_t max_throttle = (32 << 20);
 
+/* Migration XBRLE cache size */
+#define DEFAULT_MIGRATE_CACHE_SIZE (64 * 1024 * 1024)
+
+static int64_t migrate_cache_size = DEFAULT_MIGRATE_CACHE_SIZE;
+
static MigrationState *current_migration;
 
int qemu_start_incoming_migration(const char *uri)
@@ -80,6 +85,7 @@ int do_migrate(Monitor *mon, const QDict *qdict, QObject **ret_data)
     int detach = qdict_get_try_bool(qdict, "detach", 0);
     int blk = qdict_get_try_bool(qdict, "blk", 0);
     int inc = qdict_get_try_bool(qdict, "inc", 0);
+    int use_xbrle = qdict_get_try_bool(qdict, "xbrle", 0);
     const char *uri = qdict_get_str(qdict, "uri");
 
     if (current_migration &&
@@ -90,17 +96,21 @@ int do_migrate(Monitor *mon, const QDict *qdict, QObject **ret_data)
 
     if (strstart(uri, "tcp:", &p)) {
         s = tcp_start_outgoing_migration(mon, p, max_throttle, detach,
-                                         blk, inc);
+                                         blk, inc, use_xbrle,
+                                         migrate_cache_size);
#if !defined(WIN32)
     } else if (strstart(uri, "exec:", &p)) {
         s = exec_start_outgoing_migration(mon, p, max_throttle, detach,
-                                          blk, inc);
+                                          blk, inc, use_xbrle,
+                                          migrate_cache_size);
     } else if (strstart(uri, "unix:", &p)) {
         s = unix_start_outgoing_migration(mon, p, max_throttle, detach,
-                                          blk, inc);
+                                          blk, inc, use_xbrle,
+                                          migrate_cache_size);
     } else if (strstart(uri, "fd:", &p)) {
         s = fd_start_outgoing_migration(mon, p, max_throttle, detach,
-                                        blk, inc);
+                                        blk, inc, use_xbrle,
+                                        migrate_cache_size);
#endif
     } else {
         monitor_printf(mon, "unknown migration protocol: %s\n", uri);
@@ -185,6 +195,30 @@ static void migrate_print_status(Monitor *mon, const char *name,
                         qdict_get_int(qdict, "total") >> 10);
}
 
+static void migrate_print_ram_status(Monitor *mon, const char *name,
+                                 const QDict *status_dict)
+{
+    QDict *qdict;
+    uint64_t overflow, cache_miss;
+
+    qdict = qobject_to_qdict(qdict_get(status_dict, name));
+
+    monitor_printf(mon, "transferred %s: %" PRIu64 " kbytes\n", name,
+                        qdict_get_int(qdict, "bytes") >> 10);
+    monitor_printf(mon, "transferred %s: %" PRIu64 " pages\n", name,
+                        qdict_get_int(qdict, "pages"));
+    overflow = qdict_get_int(qdict, "overflow");
+    if (overflow > 0) {
+        monitor_printf(mon, "overflow %s: %" PRIu64 " pages\n", name,
+            overflow);
+    }
+    cache_miss = qdict_get_int(qdict, "cache-miss");
+    if (cache_miss > 0) {
+        monitor_printf(mon, "cache-miss %s: %" PRIu64 " pages\n", name,
+            cache_miss);
+    }
+}
+
void do_info_migrate_print(Monitor *mon, const QObject *data)
{
     QDict *qdict;
@@ -198,6 +232,18 @@ void do_info_migrate_print(Monitor *mon, const QObject *data)
         migrate_print_status(mon, "ram", qdict);
     }
 
+    if (qdict_haskey(qdict, "ram-duplicate")) {
+        migrate_print_ram_status(mon, "ram-duplicate", qdict);
+    }
+
+    if (qdict_haskey(qdict, "ram-normal")) {
+        migrate_print_ram_status(mon, "ram-normal", qdict);
+    }
+
+    if (qdict_haskey(qdict, "ram-xbrle")) {
+        migrate_print_ram_status(mon, "ram-xbrle", qdict);
+    }
+
     if (qdict_haskey(qdict, "disk")) {
         migrate_print_status(mon, "disk", qdict);
     }
@@ -214,6 +260,20 @@ static void migrate_put_status(QDict *qdict, const char *name,
     qdict_put_obj(qdict, name, obj);
}
 
+static void migrate_put_ram_status(QDict *qdict, const char *name,
+                               uint64_t bytes, uint64_t pages,
+                               uint64_t overflow, uint64_t cache_miss)
+{
+    QObject *obj;
+
+    obj = qobject_from_jsonf("{ 'bytes': %" PRId64 ", "
+                               "'pages': %" PRId64 ", "
+                               "'overflow': %" PRId64 ", "
+                               "'cache-miss': %" PRId64 " }",
+                               bytes, pages, overflow, cache_miss);
+    qdict_put_obj(qdict, name, obj);
+}
+
void do_info_migrate(Monitor *mon, QObject **ret_data)
{
     QDict *qdict;
@@ -228,6 +288,20 @@ void do_info_migrate(Monitor *mon, QObject **ret_data)
             migrate_put_status(qdict, "ram", ram_bytes_transferred(),
                                ram_bytes_remaining(), ram_bytes_total());
 
+            if (s->use_xbrle) {
+                migrate_put_ram_status(qdict, "ram-duplicate",
+                                   dup_mig_bytes_transferred(),
+                                   dup_mig_pages_transferred(), 0, 0);
+                migrate_put_ram_status(qdict, "ram-normal",
+                                   norm_mig_bytes_transferred(),
+                                   norm_mig_pages_transferred(), 0, 0);
+                migrate_put_ram_status(qdict, "ram-xbrle",
+                                   xbrle_mig_bytes_transferred(),
+                                   xbrle_mig_pages_transferred(),
+                                   xbrle_mig_pages_overflow(),
+                                   xbrle_mig_pages_cache_miss());
+            }
+
             if (blk_mig_active()) {
                 migrate_put_status(qdict, "disk", blk_mig_bytes_transferred(),
                                    blk_mig_bytes_remaining(),
@@ -341,7 +415,8 @@ void migrate_fd_connect(FdMigrationState *s)
 
     DPRINTF("beginning savevm\n");
     ret = qemu_savevm_state_begin(s->mon, s->file, s->mig_state.blk,
-                                  s->mig_state.shared);
+                                  s->mig_state.shared, s->mig_state.use_xbrle,
+                                  s->mig_state.xbrle_cache_size);
     if (ret < 0) {
         DPRINTF("failed, %d\n", ret);
         migrate_fd_error(s);
@@ -448,3 +523,27 @@ int migrate_fd_close(void *opaque)
     qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
     return s->close(s);
}
+
+void do_migrate_set_cachesize(Monitor *mon, const QDict *qdict)
+{
+    ssize_t bytes;
+    const char *value = qdict_get_str(qdict, "value");
+
+    bytes = strtosz(value, NULL);
+    if (bytes < 0) {
+        monitor_printf(mon, "invalid cache size: %s\n", value);
+        return;
+    }
+
+    /* On 32-bit hosts, QEMU is limited by virtual address space */
+    if (bytes > (2047 << 20) && HOST_LONG_BITS == 32) {
+        monitor_printf(mon, "cache can't exceed 2047 MB RAM limit on host\n");
+        return;
+    }
+    if (bytes != (uint64_t) bytes) {
+        monitor_printf(mon, "cache size too large\n");
+        return;
+    }
+    migrate_cache_size = bytes;
+}
+
diff --git a/migration.h b/migration.h
index d13ed4f..6dc0543 100644
--- a/migration.h
+++ b/migration.h
@@ -32,6 +32,8 @@ struct MigrationState
     void (*release)(MigrationState *s);
     int blk;
     int shared;
+    int use_xbrle;
+    int64_t xbrle_cache_size;
};
 
typedef struct FdMigrationState FdMigrationState;
@@ -76,7 +78,9 @@ MigrationState *exec_start_outgoing_migration(Monitor *mon,
                                               int64_t bandwidth_limit,
                                               int detach,
                                               int blk,
-                                             int inc);
+                          int inc,
+                          int use_xbrle,
+                          int64_t xbrle_cache_size);
 
int tcp_start_incoming_migration(const char *host_port);
 
@@ -85,7 +89,9 @@ MigrationState *tcp_start_outgoing_migration(Monitor *mon,
                                              int64_t bandwidth_limit,
                                              int detach,
                                              int blk,
-                                            int inc);
+                         int inc,
+                         int use_xbrle,
+                         int64_t xbrle_cache_size);
 
int unix_start_incoming_migration(const char *path);
 
@@ -94,7 +100,9 @@ MigrationState *unix_start_outgoing_migration(Monitor *mon,
                                               int64_t bandwidth_limit,
                                               int detach,
                                               int blk,
-                                             int inc);
+                          int inc,
+                          int use_xbrle,
+                          int64_t xbrle_cache_size);
 
int fd_start_incoming_migration(const char *path);
 
@@ -103,7 +111,9 @@ MigrationState *fd_start_outgoing_migration(Monitor *mon,
                                             int64_t bandwidth_limit,
                                             int detach,
                                             int blk,
-                                           int inc);
+                        int inc,
+                        int use_xbrle,
+                        int64_t xbrle_cache_size);
 
void migrate_fd_monitor_suspend(FdMigrationState *s, Monitor *mon);
 
@@ -134,4 +144,11 @@ static inline FdMigrationState *migrate_to_fms(MigrationState *mig_state)
     return container_of(mig_state, FdMigrationState, mig_state);
}
 
+void do_migrate_set_cachesize(Monitor *mon, const QDict *qdict);
+
+void arch_set_params(int blk_enable, int shared_base,
+        int use_xbrle, int64_t xbrle_cache_size, void *opaque);
+
+int xbrle_mig_active(void);
+
#endif
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 793cf1c..8fbe64b 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -431,13 +431,16 @@ EQMP
 
     {
         .name       = "migrate",
-        .args_type  = "detach:-d,blk:-b,inc:-i,uri:s",
-        .params     = "[-d] [-b] [-i] uri",
-        .help       = "migrate to URI (using -d to not wait for completion)"
-                     "\n\t\t\t -b for migration without shared storage with"
-                     " full copy of disk\n\t\t\t -i for migration without "
-                     "shared storage with incremental copy of disk "
-                     "(base image shared between src and destination)",
+        .args_type  = "detach:-d,blk:-b,inc:-i,xbrle:-x,uri:s",
+        .params     = "[-d] [-b] [-i] [-x] uri",
+        .help       = "migrate to URI"
+                      "\n\t -d to not wait for completion"
+                      "\n\t -b for migration without shared storage with"
+                      " full copy of disk"
+                      "\n\t -i for migration without"
+                      " shared storage with incremental copy of disk"
+                      " (base image shared between source and destination)"
+                      "\n\t -x to use XBRLE page delta compression",
         .user_print = monitor_user_noop,      
         .mhandler.cmd_new = do_migrate,
     },
@@ -453,6 +456,7 @@ Arguments:
- "blk": block migration, full disk copy (json-bool, optional)
- "inc": incremental disk copy (json-bool, optional)
- "uri": Destination URI (json-string)
+- "xbrle": to use XBRLE page delta compression
 
Example:
 
@@ -494,6 +498,31 @@ Example:
EQMP
 
     {
+        .name       = "migrate_set_cachesize",
+        .args_type  = "value:s",
+        .params     = "value",
+        .help       = "set cache size (in MB) for xbrle migrations",
+        .mhandler.cmd = do_migrate_set_cachesize,
+    },
+
+SQMP
+migrate_set_cachesize
+---------------------
+
+Set cache size to be used by XBRLE migration
+
+Arguments:
+
+- "value": cache size in bytes (json-number)
+
+Example:
+
+-> { "execute": "migrate_set_cachesize", "arguments": { "value": 500M } }
+<- { "return": {} }
+
+EQMP
+
+    {
         .name       = "migrate_set_speed",
         .args_type  = "value:f",
         .params     = "value",
diff --git a/savevm.c b/savevm.c
index 4e49765..93b512b 100644
--- a/savevm.c
+++ b/savevm.c
@@ -1141,7 +1141,8 @@ int register_savevm(DeviceState *dev,
                     void *opaque)
{
     return register_savevm_live(dev, idstr, instance_id, version_id,
-                                NULL, NULL, save_state, load_state, opaque);
+                                arch_set_params, NULL, save_state,
+                                load_state, opaque);
}
 
void unregister_savevm(DeviceState *dev, const char *idstr, void *opaque)
@@ -1428,15 +1429,17 @@ static int vmstate_save(QEMUFile *f, SaveStateEntry *se)
#define QEMU_VM_SUBSECTION           0x05
 
int qemu_savevm_state_begin(Monitor *mon, QEMUFile *f, int blk_enable,
-                            int shared)
+                            int shared, int use_xbrle,
+                            int64_t xbrle_cache_size)
{
     SaveStateEntry *se;
 
     QTAILQ_FOREACH(se, &savevm_handlers, entry) {
         if(se->set_params == NULL) {
             continue;
-       }
-       se->set_params(blk_enable, shared, se->opaque);
+        }
+        se->set_params(blk_enable, shared, use_xbrle, xbrle_cache_size,
+                se->opaque);
     }
 
     qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
@@ -1577,7 +1580,7 @@ static int qemu_savevm_state(Monitor *mon, QEMUFile *f)
 
     bdrv_flush_all();
 
-    ret = qemu_savevm_state_begin(mon, f, 0, 0);
+    ret = qemu_savevm_state_begin(mon, f, 0, 0, 0, 0);
     if (ret < 0)
         goto out;
 
diff --git a/sysemu.h b/sysemu.h
index b81a70e..4a854bd 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -44,6 +44,15 @@ uint64_t ram_bytes_remaining(void);
uint64_t ram_bytes_transferred(void);
uint64_t ram_bytes_total(void);
 
+uint64_t dup_mig_bytes_transferred(void);
+uint64_t dup_mig_pages_transferred(void);
+uint64_t norm_mig_bytes_transferred(void);
+uint64_t norm_mig_pages_transferred(void);
+uint64_t xbrle_mig_bytes_transferred(void);
+uint64_t xbrle_mig_pages_transferred(void);
+uint64_t xbrle_mig_pages_overflow(void);
+uint64_t xbrle_mig_pages_cache_miss(void);
+
int64_t cpu_get_ticks(void);
void cpu_enable_ticks(void);
void cpu_disable_ticks(void);
@@ -74,7 +83,8 @@ void qemu_announce_self(void);
void main_loop_wait(int nonblocking);
 
int qemu_savevm_state_begin(Monitor *mon, QEMUFile *f, int blk_enable,
-                            int shared);
+                            int shared, int use_xbrle,
+                            int64_t xbrle_cache_size);
int qemu_savevm_state_iterate(Monitor *mon, QEMUFile *f);
int qemu_savevm_state_complete(Monitor *mon, QEMUFile *f);
void qemu_savevm_state_cancel(Monitor *mon, QEMUFile *f);
 
 
 

reply via email to

[Prev in Thread] Current Thread [Next in Thread]