[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH v0 4/7] migration: add background snapshot infrastru
From: |
Denis Plotnikov |
Subject: |
[Qemu-devel] [PATCH v0 4/7] migration: add background snapshot infrastructure |
Date: |
Fri, 29 Jun 2018 11:03:17 +0300 |
It allows to intercept VM's RAM access and write them into the
snapshot.
Signed-off-by: Denis Plotnikov <address@hidden>
---
include/exec/ram_addr.h | 7 +
include/exec/ramlist.h | 4 +-
migration/migration.c | 2 +-
migration/ram.c | 333 ++++++++++++++++++++++++++++++++++++++--
migration/ram.h | 11 +-
5 files changed, 338 insertions(+), 19 deletions(-)
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index 6cbc02aa0f..5b403d537d 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -36,6 +36,8 @@ struct RAMBlock {
char idstr[256];
/* RCU-enabled, writes protected by the ramlist lock */
QLIST_ENTRY(RAMBlock) next;
+ /* blocks used for background snapshot */
+ QLIST_ENTRY(RAMBlock) bgs_next;
QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers;
int fd;
size_t page_size;
@@ -49,6 +51,11 @@ struct RAMBlock {
unsigned long *unsentmap;
/* bitmap of already received pages in postcopy */
unsigned long *receivedmap;
+ /* The following 2 are for background snapshot */
+ /* Pages currently being copied */
+ unsigned long *touched_map;
+ /* Pages has been copied already */
+ unsigned long *copied_map;
};
static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset)
diff --git a/include/exec/ramlist.h b/include/exec/ramlist.h
index 2e2ac6cb99..e0231d3bec 100644
--- a/include/exec/ramlist.h
+++ b/include/exec/ramlist.h
@@ -44,11 +44,13 @@ typedef struct {
unsigned long *blocks[];
} DirtyMemoryBlocks;
+typedef QLIST_HEAD(, RAMBlock) RamBlockList;
+
typedef struct RAMList {
QemuMutex mutex;
RAMBlock *mru_block;
/* RCU-enabled, writes protected by the ramlist lock. */
- QLIST_HEAD(, RAMBlock) blocks;
+ RamBlockList blocks;
DirtyMemoryBlocks *dirty_memory[DIRTY_MEMORY_NUM];
uint32_t version;
QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers;
diff --git a/migration/migration.c b/migration/migration.c
index 87096d23ef..131d0904e4 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1716,7 +1716,7 @@ static void migrate_handle_rp_req_pages(MigrationState
*ms, const char* rbname,
return;
}
- if (ram_save_queue_pages(rbname, start, len)) {
+ if (ram_save_queue_pages(NULL, rbname, start, len, NULL)) {
mark_source_rp_bad(ms);
}
}
diff --git a/migration/ram.c b/migration/ram.c
index 021d583b9b..286b79ad51 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -188,10 +188,21 @@ struct RAMSrcPageRequest {
RAMBlock *rb;
hwaddr offset;
hwaddr len;
+ void* page_copy;
QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
};
+/* Page buffer used for background snapshot */
+typedef struct RAMPageBuffer {
+ /* Page buffer capacity in host pages */
+ int capacity;
+ /* Current number of pages in the buffer */
+ int used;
+ /* Event to notify that buffer usage is under capacity */
+ QemuEvent used_decreased;
+} RAMPageBuffer;
+
/* State of RAM for migration */
struct RAMState {
/* QEMUFile used for this migration */
@@ -230,6 +241,11 @@ struct RAMState {
/* Queue of outstanding page requests from the destination */
QemuMutex src_page_req_mutex;
QSIMPLEQ_HEAD(src_page_requests, RAMSrcPageRequest) src_page_requests;
+ /* The following 2 are for background snapshot */
+ /* Buffer data to store copies of ram pages while async vm saving */
+ RAMPageBuffer page_buffer;
+ /* Event to notify that a page coping just has finished*/
+ QemuEvent page_coping_done;
};
typedef struct RAMState RAMState;
@@ -250,6 +266,8 @@ struct PageSearchStatus {
unsigned long page;
/* Set once we wrap around */
bool complete_round;
+ /* Pointer to the cached page */
+ void* page_copy;
};
typedef struct PageSearchStatus PageSearchStatus;
@@ -958,7 +976,11 @@ static int ram_save_page(RAMState *rs, PageSearchStatus
*pss, bool last_stage)
RAMBlock *block = pss->block;
ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
- p = block->host + offset;
+ if (pss->page_copy) {
+ p = pss->page_copy;
+ } else {
+ p = block->host + offset;
+ }
trace_ram_save_page(block->idstr, (uint64_t)offset, p);
/* In doubt sent page as normal */
@@ -989,9 +1011,12 @@ static int ram_save_page(RAMState *rs, PageSearchStatus
*pss, bool last_stage)
* page would be stale
*/
xbzrle_cache_zero_page(rs, current_addr);
- ram_release_pages(block->idstr, offset, pages);
+ if (pss->page_copy) {
+ qemu_madvise(p, TARGET_PAGE_SIZE, MADV_DONTNEED);
+ }
} else if (!rs->ram_bulk_stage &&
- !migration_in_postcopy() && migrate_use_xbzrle()) {
+ !migration_in_postcopy() && migrate_use_xbzrle() &&
+ !migrate_background_snapshot()) {
pages = save_xbzrle_page(rs, &p, current_addr, block,
offset, last_stage);
if (!last_stage) {
@@ -1008,9 +1033,10 @@ static int ram_save_page(RAMState *rs, PageSearchStatus
*pss, bool last_stage)
ram_counters.transferred +=
save_page_header(rs, rs->f, block, offset | RAM_SAVE_FLAG_PAGE);
if (send_async) {
- qemu_put_buffer_async(rs->f, p, TARGET_PAGE_SIZE,
- migrate_release_ram() &
- migration_in_postcopy());
+ bool may_free = migrate_background_snapshot() ||
+ (migrate_release_ram() &&
+ migration_in_postcopy());
+ qemu_put_buffer_async(rs->f, p, TARGET_PAGE_SIZE, may_free);
} else {
qemu_put_buffer(rs->f, p, TARGET_PAGE_SIZE);
}
@@ -1251,7 +1277,7 @@ static bool find_dirty_block(RAMState *rs,
PageSearchStatus *pss, bool *again)
* @rs: current RAM state
* @offset: used to return the offset within the RAMBlock
*/
-static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
+static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset, void
**page_copy)
{
RAMBlock *block = NULL;
@@ -1261,10 +1287,14 @@ static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t
*offset)
QSIMPLEQ_FIRST(&rs->src_page_requests);
block = entry->rb;
*offset = entry->offset;
+ *page_copy = entry->page_copy;
if (entry->len > TARGET_PAGE_SIZE) {
entry->len -= TARGET_PAGE_SIZE;
entry->offset += TARGET_PAGE_SIZE;
+ if (entry->page_copy) {
+ entry->page_copy += TARGET_PAGE_SIZE/sizeof(void*);
+ }
} else {
memory_region_unref(block->mr);
QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
@@ -1291,9 +1321,10 @@ static bool get_queued_page(RAMState *rs,
PageSearchStatus *pss)
RAMBlock *block;
ram_addr_t offset;
bool dirty;
+ void *page_copy;
do {
- block = unqueue_page(rs, &offset);
+ block = unqueue_page(rs, &offset, &page_copy);
/*
* We're sending this page, and since it's postcopy nothing else
* will dirty it, and we must make sure it doesn't get sent again
@@ -1331,6 +1362,7 @@ static bool get_queued_page(RAMState *rs,
PageSearchStatus *pss)
*/
pss->block = block;
pss->page = offset >> TARGET_PAGE_BITS;
+ pss->page_copy = page_copy;
}
return !!block;
@@ -1368,17 +1400,25 @@ static void migration_page_queue_free(RAMState *rs)
*
* @rbname: Name of the RAMBLock of the request. NULL means the
* same that last one.
+ * @block: RAMBlock to use. block and rbname have mutualy exclusive
+ * semantic with higher priority of the block.
* @start: starting address from the start of the RAMBlock
* @len: length (in bytes) to send
+ * @page_copy: the address the page should be written from
*/
-int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
+int ram_save_queue_pages(RAMBlock *block, const char *rbname,
+ ram_addr_t start, ram_addr_t len, void* page_copy)
{
RAMBlock *ramblock;
RAMState *rs = ram_state;
ram_counters.postcopy_requests++;
+
rcu_read_lock();
- if (!rbname) {
+
+ if (block) {
+ ramblock = block;
+ } else if (!rbname) {
/* Reuse last RAMBlock */
ramblock = rs->last_req_rb;
@@ -1413,6 +1453,7 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t
start, ram_addr_t len)
new_entry->rb = ramblock;
new_entry->offset = start;
new_entry->len = len;
+ new_entry->page_copy = page_copy;
memory_region_ref(ramblock->mr);
qemu_mutex_lock(&rs->src_page_req_mutex);
@@ -1450,7 +1491,8 @@ static int ram_save_target_page(RAMState *rs,
PageSearchStatus *pss,
* xbzrle can do better than compression.
*/
if (migrate_use_compression() &&
- (rs->ram_bulk_stage || !migrate_use_xbzrle())) {
+ (rs->ram_bulk_stage || !migrate_use_xbzrle()) &&
+ !migrate_background_snapshot()) {
res = ram_save_compressed_page(rs, pss, last_stage);
} else {
res = ram_save_page(rs, pss, last_stage);
@@ -1508,6 +1550,226 @@ static int ram_save_host_page(RAMState *rs,
PageSearchStatus *pss,
return pages;
}
+static bool ram_has_postcopy(void *opaque)
+{
+ return migrate_postcopy_ram();
+}
+
+static int mem_protect(void *addr, uint64_t length, int prot)
+{
+ int ret = mprotect(addr, length, prot);
+
+ if (ret < 0) {
+ error_report("%s: Can't change protection on ram block at %p (len:
%lu)",
+ __func__, addr, length);
+ }
+
+ // 0 on success
+ return ret;
+}
+
+static int ram_set_ro(void* addr, uint64_t length)
+{
+ return mem_protect(addr, length, PROT_READ);
+}
+
+static int ram_set_rw(void* addr, uint64_t length)
+{
+ return mem_protect(addr, length, PROT_READ | PROT_WRITE);
+}
+
+static RamBlockList ram_blocks;
+
+RamBlockList *ram_blocks_get(void)
+{
+ return &ram_blocks;
+}
+
+void ram_blocks_fill(RamBlockList *blocks)
+{
+ RAMBlock *block = NULL;
+
+ qemu_mutex_lock_ramlist();
+ QLIST_FOREACH(block, &ram_list.blocks, next) {
+ memory_region_ref(block->mr);
+ QLIST_INSERT_HEAD(blocks, block, bgs_next);
+ }
+ qemu_mutex_unlock_ramlist();
+}
+
+void ram_blocks_clear(RamBlockList *blocks)
+{
+ RAMBlock *block = NULL;
+
+ QLIST_FOREACH(block, blocks, bgs_next) {
+ QLIST_REMOVE(block, bgs_next);
+ memory_region_unref(block->mr);
+ }
+}
+
+int ram_blocks_set_ro(RamBlockList *blocks)
+{
+ RAMBlock *block = NULL;
+ int ret = 0;
+
+ QLIST_FOREACH(block, blocks, bgs_next) {
+ ret = ram_set_ro(block->host, block->used_length);
+ if (ret) {
+ break;
+ }
+ }
+
+ return ret;
+}
+
+int ram_blocks_set_rw(RamBlockList *blocks)
+{
+ RAMBlock *block = NULL;
+ int ret = 0;
+
+ QLIST_FOREACH(block, blocks, bgs_next) {
+ ret = ram_set_rw(block->host, block->used_length);
+ if (ret) {
+ break;
+ }
+ }
+
+ return ret;
+}
+
+static void ram_page_buffer_decrease_used(void)
+{
+ qemu_event_reset(&ram_state->page_buffer.used_decreased);
+ atomic_dec(&ram_state->page_buffer.used);
+ qemu_event_set(&ram_state->page_buffer.used_decreased);
+}
+
+static void ram_page_buffer_increase_used_wait(void)
+{
+ int ret, used, *used_ptr;
+ RAMState *rs = ram_state;
+ used_ptr = &rs->page_buffer.used;
+ do {
+ used = atomic_read(used_ptr);
+ if (rs->page_buffer.capacity > used) {
+ if ((ret = atomic_cmpxchg(used_ptr, used, used + 1)) == used) {
+ return;
+ } else {
+ continue;
+ }
+ } else {
+ qemu_event_wait(&rs->page_buffer.used_decreased);
+ }
+ } while(true);
+}
+
+static void *ram_page_buffer_get(void)
+{
+ void *page;
+ ram_page_buffer_increase_used_wait();
+ page = mmap(0, TARGET_PAGE_SIZE, PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANONYMOUS,
+ -1, 0);
+ if (page == MAP_FAILED) {
+ ram_page_buffer_decrease_used();
+ page = NULL;
+ }
+ return page;
+}
+
+static int ram_page_buffer_free(void *buffer)
+{
+ ram_page_buffer_decrease_used();
+ return qemu_madvise(buffer, TARGET_PAGE_SIZE, MADV_DONTNEED);
+}
+
+static int ram_try_copy_page(RAMBlock *block, unsigned long page_nr,
+ void** page_copy)
+{
+ void *host_page;
+
+ if (test_and_set_bit_atomic(page_nr, block->touched_map)) {
+ while (!test_bit_atomic(page_nr, block->copied_map)) {
+ // the page is being copied -- wait for the end of the coping
+ // and check once again
+ qemu_event_reset(&ram_state->page_coping_done);
+ qemu_event_wait(&ram_state->page_coping_done);
+ }
+ return 0;
+ }
+
+ *page_copy = ram_page_buffer_get();
+ if (!*page_copy) {
+ return -1;
+ }
+
+ host_page = block->host + (page_nr << TARGET_PAGE_BITS);
+ memcpy(*page_copy, host_page, TARGET_PAGE_SIZE);
+
+ if (ram_set_rw(host_page, TARGET_PAGE_SIZE)) {
+ ram_page_buffer_free(*page_copy);
+ *page_copy = NULL;
+ return -1;
+ }
+
+ smp_mb();
+ set_bit_atomic(page_nr, block->copied_map);
+ qemu_event_set(&ram_state->page_coping_done);
+
+ return 1;
+}
+
+static RAMBlock *find_ram_block(uint8_t *address, ram_addr_t *page_offset)
+{
+ RAMBlock *block = NULL;
+
+
+ QLIST_FOREACH(block, ram_blocks_get(), bgs_next) {
+ /* This case append when the block is not mapped. */
+ if (block->host == NULL) {
+ continue;
+ }
+
+ if (address - block->host < block->max_length) {
+ *page_offset = (address - block->host) & TARGET_PAGE_MASK;
+ return block;
+ }
+ }
+
+ return NULL;
+}
+
+// 0 - on success, 0 < - on error
+int ram_process_page_fault(void *address)
+{
+ int ret;
+ void *page_copy = NULL;
+ unsigned long page_nr;
+ ram_addr_t offset;
+
+ RAMBlock *block = find_ram_block(address, &offset);
+
+ if (!block) {
+ return -1;
+ }
+
+ page_nr = offset >> TARGET_PAGE_BITS;
+
+ ret = ram_try_copy_page(block, page_nr, &page_copy);
+
+ if (ret < 0) {
+ return ret;
+ } else if (ret > 0) {
+ if (ram_save_queue_pages(block, NULL, offset,
+ TARGET_PAGE_SIZE, page_copy)) {
+ ram_page_buffer_free(page_copy);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
/**
* ram_find_and_save_block: finds a dirty page and sends it to f
*
@@ -1536,6 +1798,7 @@ static int ram_find_and_save_block(RAMState *rs, bool
last_stage)
pss.block = rs->last_seen_block;
pss.page = rs->last_page;
pss.complete_round = false;
+ pss.page_copy = NULL;
if (!pss.block) {
pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
@@ -1548,11 +1811,27 @@ static int ram_find_and_save_block(RAMState *rs, bool
last_stage)
if (!found) {
/* priority queue empty, so just search for something dirty */
found = find_dirty_block(rs, &pss, &again);
+
+ if (found && migrate_background_snapshot()) {
+ // make a copy of the page and pass it to the page search
status
+ int ret;
+ ret = ram_try_copy_page(pss.block, pss.page, &pss.page_copy);
+ if (ret == 0) {
+ found = false;
+ pages = 0;
+ } else if(ret < 0) {
+ return ret;
+ }
+ }
}
if (found) {
pages = ram_save_host_page(rs, &pss, last_stage);
}
+
+ if (pss.page_copy) {
+ ram_page_buffer_decrease_used();
+ }
} while (!pages && again);
rs->last_seen_block = pss.block;
@@ -1600,9 +1879,15 @@ static void xbzrle_load_cleanup(void)
static void ram_state_cleanup(RAMState **rsp)
{
+ if (migrate_background_snapshot()) {
+ qemu_event_destroy(&(*rsp)->page_buffer.used_decreased);
+ qemu_event_destroy(&(*rsp)->page_coping_done);
+ }
+
migration_page_queue_free(*rsp);
qemu_mutex_destroy(&(*rsp)->bitmap_mutex);
qemu_mutex_destroy(&(*rsp)->src_page_req_mutex);
+
g_free(*rsp);
*rsp = NULL;
}
@@ -1638,6 +1923,13 @@ static void ram_save_cleanup(void *opaque)
block->bmap = NULL;
g_free(block->unsentmap);
block->unsentmap = NULL;
+
+ if (migrate_background_snapshot()) {
+ g_free(block->touched_map);
+ block->touched_map = NULL;
+ g_free(block->copied_map);
+ block->copied_map = NULL;
+ }
}
xbzrle_cleanup();
@@ -1652,6 +1944,9 @@ static void ram_state_reset(RAMState *rs)
rs->last_page = 0;
rs->last_version = ram_list.version;
rs->ram_bulk_stage = true;
+
+ rs->page_buffer.capacity = 1000; // in number of pages
+ rs->page_buffer.used = 0;
}
#define MAX_WAIT 50 /* ms, half buffered_file limit */
@@ -2129,6 +2424,11 @@ static int ram_state_init(RAMState **rsp)
*/
(*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
+ if (migrate_background_snapshot()) {
+ qemu_event_init(&ram_state->page_buffer.used_decreased, false);
+ qemu_event_init(&ram_state->page_coping_done, false);
+ }
+
ram_state_reset(*rsp);
return 0;
@@ -2145,10 +2445,16 @@ static void ram_list_init_bitmaps(void)
pages = block->max_length >> TARGET_PAGE_BITS;
block->bmap = bitmap_new(pages);
bitmap_set(block->bmap, 0, pages);
+
if (migrate_postcopy_ram()) {
block->unsentmap = bitmap_new(pages);
bitmap_set(block->unsentmap, 0, pages);
}
+
+ if (migrate_background_snapshot()) {
+ block->touched_map = bitmap_new(pages);
+ block->copied_map = bitmap_new(pages);
+ }
}
}
}
@@ -2974,11 +3280,6 @@ static int ram_load(QEMUFile *f, void *opaque, int
version_id)
return ret;
}
-static bool ram_has_postcopy(void *opaque)
-{
- return migrate_postcopy_ram();
-}
-
static SaveVMHandlers savevm_ram_handlers = {
.save_setup = ram_save_setup,
.save_live_iterate = ram_save_iterate,
diff --git a/migration/ram.h b/migration/ram.h
index 64d81e9f1d..627c2efb51 100644
--- a/migration/ram.h
+++ b/migration/ram.h
@@ -31,6 +31,7 @@
#include "qemu-common.h"
#include "exec/cpu-common.h"
+#include "exec/ramlist.h"
extern MigrationStats ram_counters;
extern XBZRLECacheStats xbzrle_counters;
@@ -45,7 +46,9 @@ int multifd_load_setup(void);
int multifd_load_cleanup(Error **errp);
uint64_t ram_pagesize_summary(void);
-int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len);
+int ram_save_queue_pages(RAMBlock *block, const char *rbname,
+ ram_addr_t start, ram_addr_t len,
+ void* cached_page);
void acct_update_position(QEMUFile *f, size_t size, bool zero);
void ram_debug_dump_bitmap(unsigned long *todump, bool expected,
unsigned long pages);
@@ -61,5 +64,11 @@ void ram_handle_compressed(void *host, uint8_t ch, uint64_t
size);
int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr);
void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr);
void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr, size_t nr);
+int ram_process_page_fault(void *address);
+RamBlockList *ram_blocks_get(void);
+void ram_blocks_fill(RamBlockList *blocks);
+void ram_blocks_clear(RamBlockList *blocks);
+int ram_blocks_set_ro(RamBlockList *blocks);
+int ram_blocks_set_rw(RamBlockList *blocks);
#endif
--
2.17.0
- [Qemu-devel] [PATCH v0 0/7] Background snapshots, Denis Plotnikov, 2018/06/29
- [Qemu-devel] [PATCH v0 1/7] migration: add background snapshot capability, Denis Plotnikov, 2018/06/29
- [Qemu-devel] [PATCH v0 7/7] migration: add background snapshotting, Denis Plotnikov, 2018/06/29
- [Qemu-devel] [PATCH v0 5/7] kvm: add failed memeory access exit reason, Denis Plotnikov, 2018/06/29
- [Qemu-devel] [PATCH v0 2/7] bitops: add some atomic versions of bitmap operations, Denis Plotnikov, 2018/06/29
- [Qemu-devel] [PATCH v0 3/7] threads: add infrastructure to process sigsegv, Denis Plotnikov, 2018/06/29
- [Qemu-devel] [PATCH v0 4/7] migration: add background snapshot infrastructure,
Denis Plotnikov <=
- [Qemu-devel] [PATCH v0 6/7] kvm: add vCPU failed memeory access processing, Denis Plotnikov, 2018/06/29
- Re: [Qemu-devel] [PATCH v0 0/7] Background snapshots, Dr. David Alan Gilbert, 2018/06/29