qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [RFC PATCH RDMA support v3: 06/10] Introduce 'max_itera


From: Michael R. Hines
Subject: Re: [Qemu-devel] [RFC PATCH RDMA support v3: 06/10] Introduce 'max_iterations' and Call out to migration-rdma.c when requested
Date: Mon, 11 Mar 2013 12:30:41 -0400
User-agent: Mozilla/5.0 (X11; Linux i686; rv:17.0) Gecko/20130106 Thunderbird/17.0.2

Will do - I'll make another patch for these.

I don't have a good answer for the "computed bandwidth" idea,
but at least some initial max_iterations (even if disabled by default)
would go a long way to helping the problem.....

On 03/11/2013 09:49 AM, Paolo Bonzini wrote:
Il 11/03/2013 05:33, address@hidden ha scritto:
From: "Michael R. Hines" <address@hidden>

Very little changes here except for halting the migration after a maximum
number of iterations is reached.

When comparing against TCP, the migration never ends if we don't cap
the migrations somehow..... just an idea for now.
This makes sense, but please: a) make it a separate patch; b) add QMP
commands for it; c) make it disabled by default.

There are two uses of migrate_use_rdma().  One is to disable the search
for zero pages.  Perhaps we can do that automatically based on the
current computed bandwidth?  At some point, it costs less to just send
the data down the wire.

The other is to use the RDMA-specific primitive to send pages.  I hope
that Orit's work will make that unnecessary; in the meanwhile, however,
the latter is okay.

The "verbose logging" should be yet another patch.  Many of the messages
you touched are gone in the most recent version of the code.  I suspect
that, for the others, it's better to use tracepoints (see the
trace-events file) instead.

Paolo

Signed-off-by: Michael R. Hines <address@hidden>
---
  include/migration/migration.h |   10 ++++++++
  migration.c                   |   56 +++++++++++++++++++++++++++++++++++------
  2 files changed, 58 insertions(+), 8 deletions(-)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index d121409..796cf3d 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -20,6 +20,7 @@
  #include "qemu/notify.h"
  #include "qapi/error.h"
  #include "migration/vmstate.h"
+#include "migration/rdma.h"
  #include "qapi-types.h"
struct MigrationParams {
@@ -55,6 +56,9 @@ struct MigrationState
      bool enabled_capabilities[MIGRATION_CAPABILITY_MAX];
      int64_t xbzrle_cache_size;
      bool complete;
+
+    RDMAData rdma;
+    double mbps;
  };
void process_incoming_migration(QEMUFile *f);
@@ -75,6 +79,10 @@ void tcp_start_incoming_migration(const char *host_port, 
Error **errp);
void tcp_start_outgoing_migration(MigrationState *s, const char *host_port, Error **errp); +void rdma_start_outgoing_migration(MigrationState *s, const char *host_port, Error **errp);
+
+int rdma_start_incoming_migration(const char * host_port, Error **errp);
+
  void unix_start_incoming_migration(const char *path, Error **errp);
void unix_start_outgoing_migration(MigrationState *s, const char *path, Error **errp);
@@ -106,6 +114,7 @@ uint64_t dup_mig_bytes_transferred(void);
  uint64_t dup_mig_pages_transferred(void);
  uint64_t norm_mig_bytes_transferred(void);
  uint64_t norm_mig_pages_transferred(void);
+uint64_t delta_norm_mig_bytes_transferred(void);
  uint64_t xbzrle_mig_bytes_transferred(void);
  uint64_t xbzrle_mig_pages_transferred(void);
  uint64_t xbzrle_mig_pages_overflow(void);
@@ -130,6 +139,7 @@ int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t 
*new_buf, int slen,
  int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen);
int migrate_use_xbzrle(void);
+int migrate_use_rdma(void);
  int64_t migrate_xbzrle_cache_size(void);
int64_t xbzrle_cache_resize(int64_t new_size);
diff --git a/migration.c b/migration.c
index 11725ae..aae2f66 100644
--- a/migration.c
+++ b/migration.c
@@ -25,6 +25,7 @@
  #include "qmp-commands.h"
//#define DEBUG_MIGRATION
+//#define DEBUG_MIGRATION_VERBOSE
#ifdef DEBUG_MIGRATION
  #define DPRINTF(fmt, ...) \
@@ -34,6 +35,14 @@
      do { } while (0)
  #endif
+#ifdef DEBUG_MIGRATION_VERBOSE
+#define DDPRINTF(fmt, ...) \
+    do { printf("migration: " fmt, ## __VA_ARGS__); } while (0)
+#else
+#define DDPRINTF(fmt, ...) \
+    do { } while (0)
+#endif
+
  enum {
      MIG_STATE_ERROR,
      MIG_STATE_SETUP,
@@ -76,6 +85,8 @@ void qemu_start_incoming_migration(const char *uri, Error 
**errp)
if (strstart(uri, "tcp:", &p))
          tcp_start_incoming_migration(p, errp);
+    else if (strstart(uri, "rdma:", &p))
+        rdma_start_incoming_migration(p, errp);
  #if !defined(WIN32)
      else if (strstart(uri, "exec:", &p))
          exec_start_incoming_migration(p, errp);
@@ -130,6 +141,14 @@ void process_incoming_migration(QEMUFile *f)
   * units must be in seconds */
  static uint64_t max_downtime = 30000000;
+/*
+ * RFC: We probably need a QMP setting for this, but the point
+ * of it is that it's hard to compare RDMA workloads
+ * vs. TCP workloads because the TCP migrations never
+ * complete without some kind of iteration cap.
+ */
+static int max_iterations = 30;
+
  uint64_t migrate_max_downtime(void)
  {
      return max_downtime;
@@ -429,6 +448,8 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
if (strstart(uri, "tcp:", &p)) {
          tcp_start_outgoing_migration(s, p, &local_err);
+    } else if (strstart(uri, "rdma:", &p)) {
+        rdma_start_outgoing_migration(s, p, &local_err);
  #if !defined(WIN32)
      } else if (strstart(uri, "exec:", &p)) {
          exec_start_outgoing_migration(s, p, &local_err);
@@ -502,6 +523,16 @@ int migrate_use_xbzrle(void)
      return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE];
  }
+/*
+ * Don't think we need a 'capability' here
+ * because 'rdma:host:port' must be specified
+ * on the QMP command line...
+ */
+int migrate_use_rdma(void)
+{
+    return migrate_get_current()->rdma.enabled;
+}
+
  int64_t migrate_xbzrle_cache_size(void)
  {
      MigrationState *s;
@@ -550,7 +581,7 @@ static int buffered_put_buffer(void *opaque, const uint8_t 
*buf,
      MigrationState *s = opaque;
      ssize_t error;
- DPRINTF("putting %d bytes at %" PRId64 "\n", size, pos);
+    DDPRINTF("putting %d bytes at %" PRId64 "\n", size, pos);
error = qemu_file_get_error(s->file);
      if (error) {
@@ -563,7 +594,7 @@ static int buffered_put_buffer(void *opaque, const uint8_t 
*buf,
      }
if (size > (s->buffer_capacity - s->buffer_size)) {
-        DPRINTF("increasing buffer capacity from %zu by %zu\n",
+        DDPRINTF("increasing buffer capacity from %zu by %d\n",
                  s->buffer_capacity, size + 1024);
s->buffer_capacity += size + 1024;
@@ -661,7 +692,7 @@ static void *buffered_file_thread(void *opaque)
      int64_t sleep_time = 0;
      int64_t max_size = 0;
      bool last_round = false;
-    int ret;
+    int ret, iterations = 0;
qemu_mutex_lock_iothread();
      DPRINTF("beginning savevm\n");
@@ -687,11 +718,15 @@ static void *buffered_file_thread(void *opaque)
              qemu_mutex_unlock_iothread();
              break;
          }
+
+        iterations++;
+
          if (s->bytes_xfer < s->xfer_limit) {
-            DPRINTF("iterate\n");
+            DPRINTF("iterate %d max %d\n", iterations, max_iterations);
              pending_size = qemu_savevm_state_pending(s->file, max_size);
              DPRINTF("pending size %lu max %lu\n", pending_size, max_size);
-            if (pending_size && pending_size >= max_size) {
+            if (pending_size && pending_size >= max_size &&
+                    (max_iterations == -1 || iterations < max_iterations)) {
                  ret = qemu_savevm_state_iterate(s->file);
                  if (ret < 0) {
                      qemu_mutex_unlock_iothread();
@@ -730,14 +765,18 @@ static void *buffered_file_thread(void *opaque)
          qemu_mutex_unlock_iothread();
          current_time = qemu_get_clock_ms(rt_clock);
          if (current_time >= initial_time + BUFFER_DELAY) {
-            uint64_t transferred_bytes = s->bytes_xfer;
+            uint64_t transferred_bytes = migrate_use_rdma() ?
+                    delta_norm_mig_bytes_transferred() : s->bytes_xfer;
              uint64_t time_spent = current_time - initial_time - sleep_time;
              double bandwidth = transferred_bytes / time_spent;
              max_size = bandwidth * migrate_max_downtime() / 1000000;
+            s->mbps = ((double) transferred_bytes * 8.0 /
+                    ((double) time_spent / 1000.0)) / 1000.0 / 1000.0;
DPRINTF("transferred %" PRIu64 " time_spent %" PRIu64
-                    " bandwidth %g max_size %" PRId64 "\n",
-                    transferred_bytes, time_spent, bandwidth, max_size);
+                    " bandwidth %g (%0.2f mbps) max_size %" PRId64 "\n",
+                    transferred_bytes, time_spent,
+                    bandwidth, s->mbps, max_size);
              /* if we haven't sent anything, we don't want to recalculate
                 10000 is a small enough number for our purposes */
              if (s->dirty_bytes_rate && transferred_bytes > 10000) {
@@ -774,6 +813,7 @@ static const QEMUFileOps buffered_file_ops = {
      .rate_limit =     buffered_rate_limit,
      .get_rate_limit = buffered_get_rate_limit,
      .set_rate_limit = buffered_set_rate_limit,
+    .send_barrier   = qemu_rdma_send_barrier,
  };
void migrate_fd_connect(MigrationState *s)






reply via email to

[Prev in Thread] Current Thread [Next in Thread]