[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [RFC PATCH RDMA support v2: 6/6] send memory over RDMA as b
From: |
Michael R. Hines |
Subject: |
[Qemu-devel] [RFC PATCH RDMA support v2: 6/6] send memory over RDMA as blocks are iterated |
Date: |
Mon, 11 Feb 2013 17:49:57 -0500 |
From: "Michael R. Hines" <address@hidden>
Signed-off-by: Michael R. Hines <address@hidden>
---
arch_init.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
savevm.c | 59 ++++++++++++++++++++++++++++++++++++++++-
2 files changed, 139 insertions(+), 4 deletions(-)
diff --git a/arch_init.c b/arch_init.c
index dada6de..76092cc 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -42,6 +42,7 @@
#include "migration/migration.h"
#include "exec/gdbstub.h"
#include "hw/smbios.h"
+#include "qemu/rdma.h"
#include "exec/address-spaces.h"
#include "hw/pcspk.h"
#include "migration/page_cache.h"
@@ -170,6 +171,15 @@ static int is_dup_page(uint8_t *page)
VECTYPE val = SPLAT(page);
int i;
+ /*
+ * RFC RDMA: The empirical cost of searching for zero pages here
+ * plus the cost of communicating with the other side
+ * seems to take significantly more time than simply
+ * dumping the page into remote memory.
+ */
+ if (migrate_rdma_enabled())
+ return 0;
+
for (i = 0; i < TARGET_PAGE_SIZE / sizeof(VECTYPE); i++) {
if (!ALL_EQ(val, p[i])) {
return 0;
@@ -282,6 +292,44 @@ static size_t save_block_hdr(QEMUFile *f, RAMBlock *block,
ram_addr_t offset,
return size;
}
+static size_t save_rdma_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
+ int cont)
+{
+ size_t bytes_sent = 0;
+ ram_addr_t current_addr;
+
+ acct_info.norm_pages++;
+
+ /*
+ * use RDMA to send page
+ */
+ current_addr = block->offset + offset;
+ if (rdma_write(&rdma_mdata, current_addr,
+ TARGET_PAGE_SIZE)) {
+ fprintf(stderr, "rdma migration: write error!\n");
+ qemu_file_set_error(f, -EIO);
+ return 0;
+ }
+
+ /*
+ * do some polling
+ */
+ while (1) {
+ int ret = rdma_poll(&rdma_mdata);
+ if (ret == RDMA_WRID_NONE) {
+ break;
+ }
+ if (ret < 0) {
+ fprintf(stderr, "rdma migration: polling error!\n");
+ qemu_file_set_error(f, -EIO);
+ return 0;
+ }
+ }
+
+ bytes_sent += TARGET_PAGE_SIZE;
+ return bytes_sent;
+}
+
#define ENCODING_FLAG_XBZRLE 0x1
static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
@@ -474,6 +522,8 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
if (!last_stage) {
p = get_cached_data(XBZRLE.cache, current_addr);
}
+ } else if (migrate_rdma_enabled()) {
+ bytes_sent = save_rdma_page(f, block, offset, cont);
}
/* XBZRLE overflow or normal page */
@@ -601,12 +651,14 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
return 0;
}
+static int tprate = 1000;
+
static int ram_save_iterate(QEMUFile *f, void *opaque)
{
int ret;
int i;
- int64_t t0;
- int total_sent = 0;
+ int64_t t0, tp0;
+ int total_sent = 0, last_total_sent = 0;
qemu_mutex_lock_ramlist();
@@ -625,23 +677,49 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
break;
}
total_sent += bytes_sent;
+ last_total_sent += bytes_sent;
acct_info.iterations++;
/* we want to check in the 1st loop, just in case it was the 1st time
and we had to sync the dirty bitmap.
qemu_get_clock_ns() is a bit expensive, so we only check each some
iterations
*/
+
+ /*
+ * RFC RDMA: Can we have something like this to periodically print
+ * out throughput? This is just a rough-sketch that
+ * partially worked for me. I assume there a better way
+ * that everyone would prefer. Perhaps we could set a QMP
+ * command that toggled a "periodic printing" option that
+ * allowed more details to be printed on stdout.....?
+ */
if ((i & 63) == 0) {
- uint64_t t1 = (qemu_get_clock_ns(rt_clock) - t0) / 1000000;
+ uint64_t curr = qemu_get_clock_ns(rt_clock);
+ uint64_t t1 = (curr - t0) / 1000000;
+ double tp;
if (t1 > MAX_WAIT) {
DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n",
t1, i);
break;
}
+
+ if ((i % tprate) == 0) {
+ uint64_t tp1 = (curr - tp0) / 1000000;
+ tp = ((double) last_total_sent * 8.0 /
+ ((double) tp1 / 1000.0)) / 1000.0 / 1000.0;
+ printf("throughput: %f mbps\n", tp);
+ last_total_sent = 0;
+ tp0 = curr;
+ }
}
i++;
}
+ if (migrate_rdma_enabled() && rdma_write_flush(&rdma_mdata) < 0) {
+ qemu_file_set_error(f, -EIO);
+ return 0;
+ }
+
qemu_mutex_unlock_ramlist();
if (ret < 0) {
diff --git a/savevm.c b/savevm.c
index 304d1ef..4d0bef3 100644
--- a/savevm.c
+++ b/savevm.c
@@ -24,6 +24,7 @@
#include "config-host.h"
#include "qemu-common.h"
+#include "qemu/rdma.h"
#include "hw/hw.h"
#include "hw/qdev.h"
#include "net/net.h"
@@ -417,7 +418,7 @@ int qemu_file_get_error(QEMUFile *f)
return f->last_error;
}
-static void qemu_file_set_error(QEMUFile *f, int ret)
+void qemu_file_set_error(QEMUFile *f, int ret)
{
if (f->last_error == 0) {
f->last_error = ret;
@@ -1613,6 +1614,7 @@ int qemu_savevm_state_iterate(QEMUFile *f)
{
SaveStateEntry *se;
int ret = 1;
+ static int first_time = 1;
QTAILQ_FOREACH(se, &savevm_handlers, entry) {
if (!se->ops || !se->ops->save_live_iterate) {
@@ -1643,6 +1645,30 @@ int qemu_savevm_state_iterate(QEMUFile *f)
}
}
if (ret != 0) {
+#ifdef RDMA_EXTRA_SYNC
+ /*
+ * We use two "sync" infiniband messages happen during migration.
+ * One at the beginning and one at the end, just to be thorough.
+ * This is the first one.
+ */
+ if (first_time && migrate_rdma_enabled()) {
+ int r;
+ first_time = 0;
+ if (rdma_post_send_sync(&rdma_mdata, RDMA_WRID_SEND_EXTRA_SYNC)) {
+ fprintf(stderr,
+ "rdma migration: error posting extra send sync!\n");
+ return -EIO;
+ }
+
+ r = rdma_wait_for_wrid(&rdma_mdata, RDMA_WRID_SEND_EXTRA_SYNC);
+ if (r < 0) {
+ fprintf(stderr,
+ "rdma migration: qemu_savevm_state_iterate"
+ " sync polling error!\n");
+ return -EIO;
+ }
+ }
+#endif
return ret;
}
ret = qemu_file_get_error(f);
@@ -1703,8 +1729,30 @@ int qemu_savevm_state_complete(QEMUFile *f)
trace_savevm_section_end(se->section_id);
}
+ /*
+ * We use two "sync" infiniband messages happen during migration.
+ * One at the beginning and one at the end, just to be thorough.
+ * This is the second one.
+ */
+ if (migrate_rdma_enabled()) {
+ if (rdma_post_send_sync(&rdma_mdata, RDMA_WRID_SEND_SYNC)) {
+ fprintf(stderr, "rdma migration: error posting send sync!\n");
+ return -EIO;
+ }
+ }
+
qemu_put_byte(f, QEMU_VM_EOF);
+ /* wait for RDMA sync message to complete */
+ if (migrate_rdma_enabled()) {
+ int ret = rdma_wait_for_wrid(&rdma_mdata, RDMA_WRID_SEND_SYNC);
+ if (ret < 0) {
+ fprintf(stderr, "rdma migration: qemu_savevm_state_full"
+ " sync polling error!\n");
+ return -EIO;
+ }
+ }
+
return qemu_file_get_error(f);
}
@@ -2014,6 +2062,15 @@ int qemu_loadvm_state(QEMUFile *f)
cpu_synchronize_all_post_init();
+ /* wait for RDMA sync message */
+ if (migrate_rdma_enabled()) {
+ ret = rdma_wait_for_wrid(&rdma_mdata, RDMA_WRID_RECV_SYNC);
+ if (ret < 0) {
+ fprintf(stderr, "rdma migration: qemu_loadvm_state_no_header"
+ " sync polling error!\n");
+ goto out;
+ }
+ }
ret = 0;
out:
--
1.7.10.4
- Re: [Qemu-devel] [RFC PATCH RDMA support v2: 5/6] connection-setup code between client/server, (continued)
- Re: [Qemu-devel] [RFC PATCH RDMA support v2: 5/6] connection-setup code between client/server, Orit Wasserman, 2013/02/18
- Re: [Qemu-devel] [RFC PATCH RDMA support v2: 5/6] connection-setup code between client/server, Michael R. Hines, 2013/02/19
- Re: [Qemu-devel] [RFC PATCH RDMA support v2: 5/6] connection-setup code between client/server, Orit Wasserman, 2013/02/19
- Re: [Qemu-devel] [RFC PATCH RDMA support v2: 5/6] connection-setup code between client/server, Michael R. Hines, 2013/02/19
- Re: [Qemu-devel] [RFC PATCH RDMA support v2: 5/6] connection-setup code between client/server, Paolo Bonzini, 2013/02/19
- Re: [Qemu-devel] [RFC PATCH RDMA support v2: 5/6] connection-setup code between client/server, Michael S. Tsirkin, 2013/02/21
- Re: [Qemu-devel] [RFC PATCH RDMA support v2: 5/6] connection-setup code between client/server, Paolo Bonzini, 2013/02/18
- Re: [Qemu-devel] [RFC PATCH RDMA support v2: 5/6] connection-setup code between client/server, Michael R. Hines, 2013/02/19
- Re: [Qemu-devel] [RFC PATCH RDMA support v2: 5/6] connection-setup code between client/server, Michael S. Tsirkin, 2013/02/21
- Re: [Qemu-devel] [RFC PATCH RDMA support v2: 5/6] connection-setup code between client/server, Michael S. Tsirkin, 2013/02/21
[Qemu-devel] [RFC PATCH RDMA support v2: 6/6] send memory over RDMA as blocks are iterated,
Michael R. Hines <=
[Qemu-devel] [RFC PATCH RDMA support v2: 1/6] add openfabrics RDMA libraries, configure options to build, Michael R. Hines, 2013/02/11