Signed-off-by: Michael R. Hines <address@hidden>
---
include/migration/migration.h | 10 ++++++++
migration.c | 56 +++++++++++++++++++++++++++++++++++------
2 files changed, 58 insertions(+), 8 deletions(-)
diff --git a/include/migration/migration.h b/include/migration/migration.h
index d121409..796cf3d 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -20,6 +20,7 @@
#include "qemu/notify.h"
#include "qapi/error.h"
#include "migration/vmstate.h"
+#include "migration/rdma.h"
#include "qapi-types.h"
struct MigrationParams {
@@ -55,6 +56,9 @@ struct MigrationState
bool enabled_capabilities[MIGRATION_CAPABILITY_MAX];
int64_t xbzrle_cache_size;
bool complete;
+
+ RDMAData rdma;
+ double mbps;
};
void process_incoming_migration(QEMUFile *f);
@@ -75,6 +79,10 @@ void tcp_start_incoming_migration(const char *host_port,
Error **errp);
void tcp_start_outgoing_migration(MigrationState *s, const char *host_port, Error **errp);
+void rdma_start_outgoing_migration(MigrationState *s, const char *host_port, Error **errp);
+
+int rdma_start_incoming_migration(const char * host_port, Error **errp);
+
void unix_start_incoming_migration(const char *path, Error **errp);
void unix_start_outgoing_migration(MigrationState *s, const char *path, Error **errp);
@@ -106,6 +114,7 @@ uint64_t dup_mig_bytes_transferred(void);
uint64_t dup_mig_pages_transferred(void);
uint64_t norm_mig_bytes_transferred(void);
uint64_t norm_mig_pages_transferred(void);
+uint64_t delta_norm_mig_bytes_transferred(void);
uint64_t xbzrle_mig_bytes_transferred(void);
uint64_t xbzrle_mig_pages_transferred(void);
uint64_t xbzrle_mig_pages_overflow(void);
@@ -130,6 +139,7 @@ int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t
*new_buf, int slen,
int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen);
int migrate_use_xbzrle(void);
+int migrate_use_rdma(void);
int64_t migrate_xbzrle_cache_size(void);
int64_t xbzrle_cache_resize(int64_t new_size);
diff --git a/migration.c b/migration.c
index 11725ae..aae2f66 100644
--- a/migration.c
+++ b/migration.c
@@ -25,6 +25,7 @@
#include "qmp-commands.h"
//#define DEBUG_MIGRATION
+//#define DEBUG_MIGRATION_VERBOSE
#ifdef DEBUG_MIGRATION
#define DPRINTF(fmt, ...) \
@@ -34,6 +35,14 @@
do { } while (0)
#endif
+#ifdef DEBUG_MIGRATION_VERBOSE
+#define DDPRINTF(fmt, ...) \
+ do { printf("migration: " fmt, ## __VA_ARGS__); } while (0)
+#else
+#define DDPRINTF(fmt, ...) \
+ do { } while (0)
+#endif
+
enum {
MIG_STATE_ERROR,
MIG_STATE_SETUP,
@@ -76,6 +85,8 @@ void qemu_start_incoming_migration(const char *uri, Error
**errp)
if (strstart(uri, "tcp:", &p))
tcp_start_incoming_migration(p, errp);
+ else if (strstart(uri, "rdma:", &p))
+ rdma_start_incoming_migration(p, errp);
#if !defined(WIN32)
else if (strstart(uri, "exec:", &p))
exec_start_incoming_migration(p, errp);
@@ -130,6 +141,14 @@ void process_incoming_migration(QEMUFile *f)
* units must be in seconds */
static uint64_t max_downtime = 30000000;
+/*
+ * RFC: We probably need a QMP setting for this, but the point
+ * of it is that it's hard to compare RDMA workloads
+ * vs. TCP workloads because the TCP migrations never
+ * complete without some kind of iteration cap.
+ */
+static int max_iterations = 30;
+
uint64_t migrate_max_downtime(void)
{
return max_downtime;
@@ -429,6 +448,8 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
if (strstart(uri, "tcp:", &p)) {
tcp_start_outgoing_migration(s, p, &local_err);
+ } else if (strstart(uri, "rdma:", &p)) {
+ rdma_start_outgoing_migration(s, p, &local_err);
#if !defined(WIN32)
} else if (strstart(uri, "exec:", &p)) {
exec_start_outgoing_migration(s, p, &local_err);
@@ -502,6 +523,16 @@ int migrate_use_xbzrle(void)
return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE];
}
+/*
+ * Don't think we need a 'capability' here
+ * because 'rdma:host:port' must be specified
+ * on the QMP command line...
+ */
+int migrate_use_rdma(void)
+{
+ return migrate_get_current()->rdma.enabled;
+}
+
int64_t migrate_xbzrle_cache_size(void)
{
MigrationState *s;
@@ -550,7 +581,7 @@ static int buffered_put_buffer(void *opaque, const uint8_t
*buf,
MigrationState *s = opaque;
ssize_t error;
- DPRINTF("putting %d bytes at %" PRId64 "\n", size, pos);
+ DDPRINTF("putting %d bytes at %" PRId64 "\n", size, pos);
error = qemu_file_get_error(s->file);
if (error) {
@@ -563,7 +594,7 @@ static int buffered_put_buffer(void *opaque, const uint8_t
*buf,
}
if (size > (s->buffer_capacity - s->buffer_size)) {
- DPRINTF("increasing buffer capacity from %zu by %zu\n",
+ DDPRINTF("increasing buffer capacity from %zu by %d\n",
s->buffer_capacity, size + 1024);
s->buffer_capacity += size + 1024;
@@ -661,7 +692,7 @@ static void *buffered_file_thread(void *opaque)
int64_t sleep_time = 0;
int64_t max_size = 0;
bool last_round = false;
- int ret;
+ int ret, iterations = 0;
qemu_mutex_lock_iothread();
DPRINTF("beginning savevm\n");
@@ -687,11 +718,15 @@ static void *buffered_file_thread(void *opaque)
qemu_mutex_unlock_iothread();
break;
}
+
+ iterations++;
+
if (s->bytes_xfer < s->xfer_limit) {
- DPRINTF("iterate\n");
+ DPRINTF("iterate %d max %d\n", iterations, max_iterations);
pending_size = qemu_savevm_state_pending(s->file, max_size);
DPRINTF("pending size %lu max %lu\n", pending_size, max_size);
- if (pending_size && pending_size >= max_size) {
+ if (pending_size && pending_size >= max_size &&
+ (max_iterations == -1 || iterations < max_iterations)) {
ret = qemu_savevm_state_iterate(s->file);
if (ret < 0) {
qemu_mutex_unlock_iothread();
@@ -730,14 +765,18 @@ static void *buffered_file_thread(void *opaque)
qemu_mutex_unlock_iothread();
current_time = qemu_get_clock_ms(rt_clock);
if (current_time >= initial_time + BUFFER_DELAY) {
- uint64_t transferred_bytes = s->bytes_xfer;
+ uint64_t transferred_bytes = migrate_use_rdma() ?
+ delta_norm_mig_bytes_transferred() : s->bytes_xfer;
uint64_t time_spent = current_time - initial_time - sleep_time;
double bandwidth = transferred_bytes / time_spent;
max_size = bandwidth * migrate_max_downtime() / 1000000;
+ s->mbps = ((double) transferred_bytes * 8.0 /
+ ((double) time_spent / 1000.0)) / 1000.0 / 1000.0;
DPRINTF("transferred %" PRIu64 " time_spent %" PRIu64
- " bandwidth %g max_size %" PRId64 "\n",
- transferred_bytes, time_spent, bandwidth, max_size);
+ " bandwidth %g (%0.2f mbps) max_size %" PRId64 "\n",
+ transferred_bytes, time_spent,
+ bandwidth, s->mbps, max_size);
/* if we haven't sent anything, we don't want to recalculate
10000 is a small enough number for our purposes */
if (s->dirty_bytes_rate && transferred_bytes > 10000) {
@@ -774,6 +813,7 @@ static const QEMUFileOps buffered_file_ops = {
.rate_limit = buffered_rate_limit,
.get_rate_limit = buffered_get_rate_limit,
.set_rate_limit = buffered_set_rate_limit,
+ .send_barrier = qemu_rdma_send_barrier,
};
void migrate_fd_connect(MigrationState *s)