[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH FYI 41/46] migration: convert RDMA to use QIOChannel
From: |
Daniel P. Berrange |
Subject: |
[Qemu-devel] [PATCH FYI 41/46] migration: convert RDMA to use QIOChannel interface |
Date: |
Thu, 3 Sep 2015 16:39:23 +0100 |
This converts the RDMA code to provide a subclass of
QIOChannel that uses RDMA for the data transport.
The RDMA code would be much better off it it could
be split up in a generic RDMA layer, a QIOChannel
impl based on RMDA, and then the RMDA migration
glue. This is left as a future exercise for the brave.
Signed-off-by: Daniel P. Berrange <address@hidden>
---
migration/rdma.c | 254 ++++++++++++++++++++++++++++++++++---------------------
1 file changed, 156 insertions(+), 98 deletions(-)
diff --git a/migration/rdma.c b/migration/rdma.c
index ca6b100..4e41c73 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -374,14 +374,19 @@ typedef struct RDMAContext {
GHashTable *blockmap;
} RDMAContext;
-/*
- * Interface to the rest of the migration call stack.
- */
-typedef struct QEMUFileRDMA {
+#define TYPE_QIO_CHANNEL_RDMA "qio-channel-rdma"
+#define QIO_CHANNEL_RDMA(obj) \
+ OBJECT_CHECK(QIOChannelRDMA, (obj), TYPE_QIO_CHANNEL_RDMA)
+
+typedef struct QIOChannelRDMA QIOChannelRDMA;
+
+
+struct QIOChannelRDMA {
+ QIOChannel parent;
RDMAContext *rdma;
+ QEMUFile *file;
size_t len;
- void *file;
-} QEMUFileRDMA;
+};
/*
* Main structure for IB Send/Recv control messages.
@@ -2519,15 +2524,19 @@ static void *qemu_rdma_data_init(const char *host_port,
Error **errp)
* SEND messages for control only.
* VM's ram is handled with regular RDMA messages.
*/
-static int qemu_rdma_put_buffer(void *opaque, const uint8_t *buf,
- int64_t pos, int size)
-{
- QEMUFileRDMA *r = opaque;
- QEMUFile *f = r->file;
- RDMAContext *rdma = r->rdma;
- size_t remaining = size;
- uint8_t * data = (void *) buf;
+static ssize_t qio_channel_rdma_writev(QIOChannel *ioc,
+ const struct iovec *iov,
+ size_t niov,
+ int *fds,
+ size_t nfds,
+ Error **errp)
+{
+ QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
+ QEMUFile *f = rioc->file;
+ RDMAContext *rdma = rioc->rdma;
int ret;
+ ssize_t done = 0;
+ size_t i;
CHECK_ERROR_STATE();
@@ -2541,26 +2550,31 @@ static int qemu_rdma_put_buffer(void *opaque, const
uint8_t *buf,
return ret;
}
- while (remaining) {
- RDMAControlHeader head;
+ for (i = 0; i < niov; i++) {
+ size_t remaining = iov[i].iov_len;
+ uint8_t * data = (void *)iov[i].iov_base;
+ while (remaining) {
+ RDMAControlHeader head;
- r->len = MIN(remaining, RDMA_SEND_INCREMENT);
- remaining -= r->len;
+ rioc->len = MIN(remaining, RDMA_SEND_INCREMENT);
+ remaining -= rioc->len;
- head.len = r->len;
- head.type = RDMA_CONTROL_QEMU_FILE;
+ head.len = rioc->len;
+ head.type = RDMA_CONTROL_QEMU_FILE;
- ret = qemu_rdma_exchange_send(rdma, &head, data, NULL, NULL, NULL);
+ ret = qemu_rdma_exchange_send(rdma, &head, data, NULL, NULL, NULL);
- if (ret < 0) {
- rdma->error_state = ret;
- return ret;
- }
+ if (ret < 0) {
+ rdma->error_state = ret;
+ return ret;
+ }
- data += r->len;
+ data += rioc->len;
+ done += rioc->len;
+ }
}
- return size;
+ return done;
}
static size_t qemu_rdma_fill(RDMAContext *rdma, uint8_t *buf,
@@ -2585,41 +2599,65 @@ static size_t qemu_rdma_fill(RDMAContext *rdma, uint8_t
*buf,
* RDMA links don't use bytestreams, so we have to
* return bytes to QEMUFile opportunistically.
*/
-static int qemu_rdma_get_buffer(void *opaque, uint8_t *buf,
- int64_t pos, int size)
-{
- QEMUFileRDMA *r = opaque;
- RDMAContext *rdma = r->rdma;
+static ssize_t qio_channel_rdma_readv(QIOChannel *ioc,
+ const struct iovec *iov,
+ size_t niov,
+ int **fds,
+ size_t *nfds,
+ Error **errp)
+{
+ QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
+ RDMAContext *rdma = rioc->rdma;
RDMAControlHeader head;
int ret = 0;
+ ssize_t i;
+ size_t done = 0;
CHECK_ERROR_STATE();
- /*
- * First, we hold on to the last SEND message we
- * were given and dish out the bytes until we run
- * out of bytes.
- */
- r->len = qemu_rdma_fill(r->rdma, buf, size, 0);
- if (r->len) {
- return r->len;
- }
+ for (i = 0; i < niov; i++) {
+ size_t want = iov[i].iov_len;
+ uint8_t *data = (void *)iov[i].iov_base;
- /*
- * Once we run out, we block and wait for another
- * SEND message to arrive.
- */
- ret = qemu_rdma_exchange_recv(rdma, &head, RDMA_CONTROL_QEMU_FILE);
+ /*
+ * First, we hold on to the last SEND message we
+ * were given and dish out the bytes until we run
+ * out of bytes.
+ */
+ ret = qemu_rdma_fill(rioc->rdma, data, want, 0);
+ if (ret > 0) {
+ done += ret;
+ if (ret < want) {
+ break;
+ } else {
+ continue;
+ }
+ }
- if (ret < 0) {
- rdma->error_state = ret;
- return ret;
- }
+ /*
+ * Once we run out, we block and wait for another
+ * SEND message to arrive.
+ */
+ ret = qemu_rdma_exchange_recv(rdma, &head, RDMA_CONTROL_QEMU_FILE);
- /*
- * SEND was received with new bytes, now try again.
- */
- return qemu_rdma_fill(r->rdma, buf, size, 0);
+ if (ret < 0) {
+ rdma->error_state = ret;
+ return ret;
+ }
+
+ /*
+ * SEND was received with new bytes, now try again.
+ */
+ ret = qemu_rdma_fill(rioc->rdma, data, want, 0);
+ if (ret > 0) {
+ done += ret;
+ if (ret < want) {
+ break;
+ }
+ }
+ }
+ rioc->len = done;
+ return rioc->len;
}
/*
@@ -2646,15 +2684,16 @@ static int qemu_rdma_drain_cq(QEMUFile *f, RDMAContext
*rdma)
return 0;
}
-static int qemu_rdma_close(void *opaque)
+static int qio_channel_rdma_close(QIOChannel *ioc,
+ Error **errp)
{
+ QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
trace_qemu_rdma_close();
- QEMUFileRDMA *r = opaque;
- if (r->rdma) {
- qemu_rdma_cleanup(r->rdma);
- g_free(r->rdma);
+ if (rioc->rdma) {
+ qemu_rdma_cleanup(rioc->rdma);
+ g_free(rioc->rdma);
+ rioc->rdma = NULL;
}
- g_free(r);
return 0;
}
@@ -2696,8 +2735,8 @@ static size_t qemu_rdma_save_page(QEMUFile *f, void
*opaque,
ram_addr_t block_offset, ram_addr_t offset,
size_t size, uint64_t *bytes_sent)
{
- QEMUFileRDMA *rfile = opaque;
- RDMAContext *rdma = rfile->rdma;
+ QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(opaque);
+ RDMAContext *rdma = rioc->rdma;
int ret;
CHECK_ERROR_STATE();
@@ -2951,8 +2990,8 @@ static int qemu_rdma_registration_handle(QEMUFile *f,
void *opaque)
};
RDMAControlHeader blocks = { .type = RDMA_CONTROL_RAM_BLOCKS_RESULT,
.repeat = 1 };
- QEMUFileRDMA *rfile = opaque;
- RDMAContext *rdma = rfile->rdma;
+ QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(opaque);
+ RDMAContext *rdma = rioc->rdma;
RDMALocalBlocks *local = &rdma->local_ram_blocks;
RDMAControlHeader head;
RDMARegister *reg, *registers;
@@ -3207,9 +3246,9 @@ out:
* We've already built our local RAMBlock list, but not yet sent the list to
* the source.
*/
-static int rdma_block_notification_handle(QEMUFileRDMA *rfile, const char
*name)
+static int rdma_block_notification_handle(QIOChannelRDMA *rioc, const char
*name)
{
- RDMAContext *rdma = rfile->rdma;
+ RDMAContext *rdma = rioc->rdma;
int curr;
int found = -1;
@@ -3251,8 +3290,8 @@ static int rdma_load_hook(QEMUFile *f, void *opaque,
uint64_t flags, void *data)
static int qemu_rdma_registration_start(QEMUFile *f, void *opaque,
uint64_t flags, void *data)
{
- QEMUFileRDMA *rfile = opaque;
- RDMAContext *rdma = rfile->rdma;
+ QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(opaque);
+ RDMAContext *rdma = rioc->rdma;
CHECK_ERROR_STATE();
@@ -3271,8 +3310,8 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void
*opaque,
uint64_t flags, void *data)
{
Error *local_err = NULL, **errp = &local_err;
- QEMUFileRDMA *rfile = opaque;
- RDMAContext *rdma = rfile->rdma;
+ QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(opaque);
+ RDMAContext *rdma = rioc->rdma;
RDMAControlHeader head = { .len = 0, .repeat = 1 };
int ret = 0;
@@ -3368,55 +3407,74 @@ err:
return ret;
}
-static int qemu_rdma_get_fd(void *opaque)
-{
- QEMUFileRDMA *rfile = opaque;
- RDMAContext *rdma = rfile->rdma;
-
- return rdma->comp_channel->fd;
-}
-
-static const QEMUFileOps rdma_read_ops = {
- .get_buffer = qemu_rdma_get_buffer,
- .get_fd = qemu_rdma_get_fd,
- .close = qemu_rdma_close,
-};
-
static const QEMUFileHooks rdma_read_hooks = {
.hook_ram_load = rdma_load_hook,
};
-static const QEMUFileOps rdma_write_ops = {
- .put_buffer = qemu_rdma_put_buffer,
- .close = qemu_rdma_close,
-};
-
static const QEMUFileHooks rdma_write_hooks = {
.before_ram_iterate = qemu_rdma_registration_start,
.after_ram_iterate = qemu_rdma_registration_stop,
.save_page = qemu_rdma_save_page,
};
-static void *qemu_fopen_rdma(RDMAContext *rdma, const char *mode)
+
+static void qio_channel_rdma_finalize(Object *obj)
+{
+ QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(obj);
+ if (rioc->rdma) {
+ qemu_rdma_cleanup(rioc->rdma);
+ g_free(rioc->rdma);
+ rioc->rdma = NULL;
+ }
+}
+
+static void qio_channel_rdma_class_init(ObjectClass *klass,
+ void *class_data G_GNUC_UNUSED)
+{
+ QIOChannelClass *ioc_klass = QIO_CHANNEL_CLASS(klass);
+
+ ioc_klass->io_writev = qio_channel_rdma_writev;
+ ioc_klass->io_readv = qio_channel_rdma_readv;
+ //ioc_klass->io_set_blocking = qio_channel_rdma_set_blocking;
+ ioc_klass->io_close = qio_channel_rdma_close;
+ //ioc_klass->io_create_watch = qio_channel_rdma_create_watch;
+}
+
+static const TypeInfo qio_channel_rdma_info = {
+ .parent = TYPE_QIO_CHANNEL,
+ .name = TYPE_QIO_CHANNEL_RDMA,
+ .instance_size = sizeof(QIOChannelRDMA),
+ .instance_finalize = qio_channel_rdma_finalize,
+ .class_init = qio_channel_rdma_class_init,
+};
+
+static void qio_channel_rdma_register_types(void)
+{
+ type_register_static(&qio_channel_rdma_info);
+}
+
+type_init(qio_channel_rdma_register_types);
+
+static QEMUFile *qemu_fopen_rdma(RDMAContext *rdma, const char *mode)
{
- QEMUFileRDMA *r;
+ QIOChannelRDMA *rioc;
if (qemu_file_mode_is_not_valid(mode)) {
return NULL;
}
- r = g_malloc0(sizeof(QEMUFileRDMA));
- r->rdma = rdma;
+ rioc = QIO_CHANNEL_RDMA(object_new(TYPE_QIO_CHANNEL_RDMA));
+ rioc->rdma = rdma;
if (mode[0] == 'w') {
- r->file = qemu_fopen_ops(r, &rdma_write_ops);
- qemu_file_set_hooks(r->file, &rdma_write_hooks);
+ rioc->file = qemu_fopen_channel_output(QIO_CHANNEL(rioc));
+ qemu_file_set_hooks(rioc->file, &rdma_write_hooks);
} else {
- r->file = qemu_fopen_ops(r, &rdma_read_ops);
- qemu_file_set_hooks(r->file, &rdma_read_hooks);
+ rioc->file = qemu_fopen_channel_input(QIO_CHANNEL(rioc));
+ qemu_file_set_hooks(rioc->file, &rdma_read_hooks);
}
- return r->file;
+ return rioc->file;
}
static void rdma_accept_incoming_migration(void *opaque)
--
2.4.3
- [Qemu-devel] [PATCH FYI 34/46] migration: introduce qemu_fset_blocking function on QEMUFile, (continued)
- [Qemu-devel] [PATCH FYI 34/46] migration: introduce qemu_fset_blocking function on QEMUFile, Daniel P. Berrange, 2015/09/03
- [Qemu-devel] [PATCH FYI 36/46] migration: introduce a new QEMUFile impl based on QIOChannel, Daniel P. Berrange, 2015/09/03
- [Qemu-devel] [PATCH FYI 35/46] migration: force QEMUFile to blocking mode for outgoing migration, Daniel P. Berrange, 2015/09/03
- [Qemu-devel] [PATCH FYI 37/46] migration: convert unix socket protocol to use QIOChannel, Daniel P. Berrange, 2015/09/03
- [Qemu-devel] [PATCH FYI 39/46] migration: convert fd socket protocol to use QIOChannel, Daniel P. Berrange, 2015/09/03
- [Qemu-devel] [PATCH FYI 38/46] migration: convert tcp socket protocol to use QIOChannel, Daniel P. Berrange, 2015/09/03
- [Qemu-devel] [PATCH FYI 42/46] migration: convert savevm to use QIOChannel for writing to files, Daniel P. Berrange, 2015/09/03
- [Qemu-devel] [PATCH FYI 40/46] migration: convert exec socket protocol to use QIOChannel, Daniel P. Berrange, 2015/09/03
- [Qemu-devel] [PATCH FYI 43/46] migration: delete QEMUFile sockets implementation, Daniel P. Berrange, 2015/09/03
- [Qemu-devel] [PATCH FYI 44/46] migration: delete QEMUFile stdio implementation, Daniel P. Berrange, 2015/09/03
- [Qemu-devel] [PATCH FYI 41/46] migration: convert RDMA to use QIOChannel interface,
Daniel P. Berrange <=
- [Qemu-devel] [PATCH FYI 45/46] migration: support TLS encryption with TCP migration backend, Daniel P. Berrange, 2015/09/03
- [Qemu-devel] [PATCH FYI 46/46] migration: remove support for non-iovec based write handlers, Daniel P. Berrange, 2015/09/03
- [Qemu-devel] [PATCH FYI 07/46] io: add helper module for creating watches on FDs, Daniel P. Berrange, 2015/09/03
- [Qemu-devel] [PATCH FYI 23/46] nbd: convert to use the QAPI SocketAddress object, Daniel P. Berrange, 2015/09/03
- [Qemu-devel] [PATCH FYI 21/46] char: don't assume telnet initialization will not block, Daniel P. Berrange, 2015/09/03