qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH RFC 03/19] vfio-user: define VFIO Proxy and communication functio


From: Elena Ufimtseva
Subject: [PATCH RFC 03/19] vfio-user: define VFIO Proxy and communication functions
Date: Sun, 18 Jul 2021 23:27:42 -0700

From: John G Johnson <john.g.johnson@oracle.com>

Add user.c and user.h files for vfio-user with the basic
send and receive functions.

Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
---
 hw/vfio/user.h                | 120 ++++++++++++++
 include/hw/vfio/vfio-common.h |   2 +
 hw/vfio/user.c                | 286 ++++++++++++++++++++++++++++++++++
 MAINTAINERS                   |   4 +
 hw/vfio/meson.build           |   1 +
 5 files changed, 413 insertions(+)
 create mode 100644 hw/vfio/user.h
 create mode 100644 hw/vfio/user.c

diff --git a/hw/vfio/user.h b/hw/vfio/user.h
new file mode 100644
index 0000000000..cdbc074579
--- /dev/null
+++ b/hw/vfio/user.h
@@ -0,0 +1,120 @@
+#ifndef VFIO_USER_H
+#define VFIO_USER_H
+
+/*
+ * vfio protocol over a UNIX socket.
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Each message has a standard header that describes the command
+ * being sent, which is almost always a VFIO ioctl().
+ *
+ * The header may be followed by command-specfic data, such as the
+ * region and offset info for read and write commands.
+ */
+
+/* commands */
+enum vfio_user_command {
+    VFIO_USER_VERSION                   = 1,
+    VFIO_USER_DMA_MAP                   = 2,
+    VFIO_USER_DMA_UNMAP                 = 3,
+    VFIO_USER_DEVICE_GET_INFO           = 4,
+    VFIO_USER_DEVICE_GET_REGION_INFO    = 5,
+    VFIO_USER_DEVICE_GET_REGION_IO_FDS  = 6,
+    VFIO_USER_DEVICE_GET_IRQ_INFO       = 7,
+    VFIO_USER_DEVICE_SET_IRQS           = 8,
+    VFIO_USER_REGION_READ               = 9,
+    VFIO_USER_REGION_WRITE              = 10,
+    VFIO_USER_DMA_READ                  = 11,
+    VFIO_USER_DMA_WRITE                 = 12,
+    VFIO_USER_DEVICE_RESET              = 13,
+    VFIO_USER_DIRTY_PAGES               = 14,
+    VFIO_USER_MAX,
+};
+
+/* flags */
+#define VFIO_USER_REQUEST       0x0
+#define VFIO_USER_REPLY         0x1
+#define VFIO_USER_TYPE          0xF
+
+#define VFIO_USER_NO_REPLY      0x10
+#define VFIO_USER_ERROR         0x20
+
+typedef struct vfio_user_hdr {
+    uint16_t id;
+    uint16_t command;
+    uint32_t size;
+    uint32_t flags;
+    uint32_t error_reply;
+} vfio_user_hdr_t;
+
+/*
+ * VFIO_USER_VERSION
+ */
+#define VFIO_USER_MAJOR_VER     0
+#define VFIO_USER_MINOR_VER     0
+
+struct vfio_user_version {
+    vfio_user_hdr_t hdr;
+    uint16_t major;
+    uint16_t minor;
+    char capabilities[];
+};
+
+#define VFIO_USER_DEF_MAX_FDS   8
+#define VFIO_USER_MAX_MAX_FDS   16
+
+#define VFIO_USER_DEF_MAX_XFER  (1024 * 1024)
+#define VFIO_USER_MAX_MAX_XFER  (64 * 1024 * 1024)
+
+typedef struct VFIOUserFDs {
+    int send_fds;
+    int recv_fds;
+    int *fds;
+} VFIOUserFDs;
+
+typedef struct VFIOUserReply {
+    QTAILQ_ENTRY(VFIOUserReply) next;
+    vfio_user_hdr_t *msg;
+    VFIOUserFDs *fds;
+    int rsize;
+    uint32_t id;
+    QemuCond cv;
+    uint8_t complete;
+} VFIOUserReply;
+
+enum proxy_state {
+    CONNECTED = 1,
+    RECV_ERROR = 2,
+    CLOSING = 3,
+    CLOSED = 4,
+};
+
+typedef struct VFIOProxy {
+    QLIST_ENTRY(VFIOProxy) next;
+    char *sockname;
+    struct QIOChannel *ioc;
+    int (*request)(void *opaque, char *buf, VFIOUserFDs *fds);
+    void *reqarg;
+    int flags;
+    QemuCond close_cv;
+
+    /*
+     * above only changed when iolock is held
+     * below are protected by per-proxy lock
+     */
+    QemuMutex lock;
+    QTAILQ_HEAD(, VFIOUserReply) free;
+    QTAILQ_HEAD(, VFIOUserReply) pending;
+    enum proxy_state state;
+    int close_wait;
+} VFIOProxy;
+
+#define VFIO_PROXY_CLIENT       0x1
+
+void vfio_user_recv(void *opaque);
+void vfio_user_send_reply(VFIOProxy *proxy, char *buf, int ret);
+#endif /* VFIO_USER_H */
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 8af11b0a76..f43dc6e5d0 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -75,6 +75,7 @@ typedef struct VFIOAddressSpace {
 } VFIOAddressSpace;
 
 struct VFIOGroup;
+typedef struct VFIOProxy VFIOProxy;
 
 typedef struct VFIOContainer {
     VFIOAddressSpace *space;
@@ -143,6 +144,7 @@ typedef struct VFIODevice {
     VFIOMigration *migration;
     Error *migration_blocker;
     OnOffAuto pre_copy_dirty_page_tracking;
+    VFIOProxy *proxy;
 } VFIODevice;
 
 struct VFIODeviceOps {
diff --git a/hw/vfio/user.c b/hw/vfio/user.c
new file mode 100644
index 0000000000..021d5540e0
--- /dev/null
+++ b/hw/vfio/user.c
@@ -0,0 +1,286 @@
+/*
+ * vfio protocol over a UNIX socket.
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include <linux/vfio.h>
+#include <sys/ioctl.h>
+
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "qemu/main-loop.h"
+#include "hw/hw.h"
+#include "hw/vfio/vfio-common.h"
+#include "hw/vfio/vfio.h"
+#include "qemu/sockets.h"
+#include "io/channel.h"
+#include "io/channel-util.h"
+#include "sysemu/iothread.h"
+#include "user.h"
+
+static uint64_t max_xfer_size = VFIO_USER_DEF_MAX_XFER;
+static IOThread *vfio_user_iothread;
+static void vfio_user_send_locked(VFIOProxy *proxy, vfio_user_hdr_t *msg,
+                                  VFIOUserFDs *fds);
+static void vfio_user_send(VFIOProxy *proxy, vfio_user_hdr_t *msg,
+                           VFIOUserFDs *fds);
+static void vfio_user_shutdown(VFIOProxy *proxy);
+
+static void vfio_user_shutdown(VFIOProxy *proxy)
+{
+    qio_channel_shutdown(proxy->ioc, QIO_CHANNEL_SHUTDOWN_READ, NULL);
+    qio_channel_set_aio_fd_handler(proxy->ioc,
+                                   
iothread_get_aio_context(vfio_user_iothread),
+                                   NULL, NULL, NULL);
+}
+
+void vfio_user_send_reply(VFIOProxy *proxy, char *buf, int ret)
+{
+    vfio_user_hdr_t *hdr = (vfio_user_hdr_t *)buf;
+
+    /*
+     * convert header to associated reply
+     * positive ret is reply size, negative is error code
+     */
+    hdr->flags = VFIO_USER_REPLY;
+    if (ret > 0) {
+        hdr->size = ret;
+    } else if (ret < 0) {
+        hdr->flags |= VFIO_USER_ERROR;
+        hdr->error_reply = -ret;
+        hdr->size = sizeof(*hdr);
+    }
+    vfio_user_send(proxy, hdr, NULL);
+}
+
+void vfio_user_recv(void *opaque)
+{
+    VFIODevice *vbasedev = opaque;
+    VFIOProxy *proxy = vbasedev->proxy;
+    VFIOUserReply *reply = NULL;
+    g_autofree int *fdp = NULL;
+    VFIOUserFDs reqfds = { 0, 0, fdp };
+    vfio_user_hdr_t msg;
+    struct iovec iov = {
+        .iov_base = &msg,
+        .iov_len = sizeof(msg),
+    };
+    int isreply, i, ret;
+    size_t msgleft, numfds = 0;
+    char *data = NULL;
+    g_autofree char *buf = NULL;
+    Error *local_err = NULL;
+
+    qemu_mutex_lock(&proxy->lock);
+    if (proxy->state == CLOSING) {
+        qemu_mutex_unlock(&proxy->lock);
+        return;
+    }
+
+    ret = qio_channel_readv_full(proxy->ioc, &iov, 1, &fdp, &numfds,
+                                 &local_err);
+    if (ret <= 0) {
+        /* read error or other side closed connection */
+        error_setg_errno(&local_err, errno, "vfio_user_recv read error");
+        goto fatal;
+    }
+
+    if (ret < sizeof(msg)) {
+        error_setg(&local_err, "vfio_user_recv short read of header");
+        goto err;
+    }
+
+    /*
+     * For replies, find the matching pending request
+     */
+    switch (msg.flags & VFIO_USER_TYPE) {
+    case VFIO_USER_REQUEST:
+        isreply = 0;
+        break;
+    case VFIO_USER_REPLY:
+        isreply = 1;
+        break;
+    default:
+        error_setg(&local_err, "vfio_user_recv unknown message type");
+        goto err;
+    }
+
+    if (isreply) {
+        QTAILQ_FOREACH(reply, &proxy->pending, next) {
+            if (msg.id == reply->id) {
+                break;
+            }
+        }
+        if (reply == NULL) {
+            error_setg(&local_err, "vfio_user_recv unexpected reply");
+            goto err;
+        }
+        QTAILQ_REMOVE(&proxy->pending, reply, next);
+
+        /*
+         * Process any received FDs
+         */
+        if (numfds != 0) {
+            if (reply->fds == NULL || reply->fds->recv_fds < numfds) {
+                error_setg(&local_err, "vfio_user_recv unexpected FDs");
+                goto err;
+            }
+            reply->fds->recv_fds = numfds;
+            memcpy(reply->fds->fds, fdp, numfds * sizeof(int));
+        }
+
+    } else {
+        /*
+         * The client doesn't expect any FDs in requests, but
+         * they will be expected on the server
+         */
+        if (numfds != 0 && (proxy->flags & VFIO_PROXY_CLIENT)) {
+            error_setg(&local_err, "vfio_user_recv fd in client reply");
+            goto err;
+        }
+        reqfds.recv_fds = numfds;
+    }
+
+    /*
+     * put the whole message into a single buffer
+     */
+    msgleft = msg.size - sizeof(msg);
+    if (isreply) {
+        if (msg.size > reply->rsize) {
+            error_setg(&local_err,
+                       "vfio_user_recv reply larger than recv buffer");
+            goto fatal;
+        }
+        *reply->msg = msg;
+        data = (char *)reply->msg + sizeof(msg);
+    } else {
+        if (msg.size > max_xfer_size) {
+            error_setg(&local_err, "vfio_user_recv request larger than max");
+            goto fatal;
+        }
+        buf = g_malloc0(msg.size);
+        memcpy(buf, &msg, sizeof(msg));
+        data = buf + sizeof(msg);
+    }
+
+    if (msgleft != 0) {
+        ret = qio_channel_read(proxy->ioc, data, msgleft, &local_err);
+        if (ret < 0) {
+            goto fatal;
+        }
+        if (ret != msgleft) {
+            error_setg(&local_err, "vfio_user_recv short read of msg body");
+            goto err;
+        }
+    }
+
+    /*
+     * Replies signal a waiter, requests get processed by vfio code
+     * that may assume the iothread lock is held.
+     */
+    qemu_mutex_unlock(&proxy->lock);
+    if (isreply) {
+        reply->complete = 1;
+        qemu_cond_signal(&reply->cv);
+    } else {
+        qemu_mutex_lock_iothread();
+        /*
+         * make sure proxy wasn't closed while we waited
+         * checking without holding the proxy lock is safe
+         * since state is only set to CLOSING when iolock is held
+         */
+        if (proxy->state != CLOSING) {
+            ret = proxy->request(proxy->reqarg, buf, &reqfds);
+            if (ret < 0 && !(msg.flags & VFIO_USER_NO_REPLY)) {
+                vfio_user_send_reply(proxy, buf, ret);
+            }
+        }
+        qemu_mutex_unlock_iothread();
+    }
+
+    return;
+ fatal:
+    vfio_user_shutdown(proxy);
+    proxy->state = RECV_ERROR;
+
+ err:
+    qemu_mutex_unlock(&proxy->lock);
+    for (i = 0; i < numfds; i++) {
+        close(fdp[i]);
+    }
+    if (reply != NULL) {
+        /* force an error to keep sending thread from hanging */
+        reply->msg->flags |= VFIO_USER_ERROR;
+        reply->msg->error_reply = EINVAL;
+        reply->complete = 1;
+        qemu_cond_signal(&reply->cv);
+    }
+    error_report_err(local_err);
+}
+
+static void vfio_user_send_locked(VFIOProxy *proxy, vfio_user_hdr_t *msg,
+                                  VFIOUserFDs *fds)
+{
+    struct iovec iov = {
+        .iov_base = msg,
+        .iov_len = msg->size,
+    };
+    size_t numfds = 0;
+    int msgleft, ret, *fdp = NULL;
+    char *buf;
+    Error *local_err = NULL;
+
+    if (proxy->state != CONNECTED) {
+        msg->flags |= VFIO_USER_ERROR;
+        msg->error_reply = ECONNRESET;
+        return;
+    }
+
+    if (fds != NULL && fds->send_fds != 0) {
+        numfds = fds->send_fds;
+        fdp = fds->fds;
+    }
+    ret = qio_channel_writev_full(proxy->ioc, &iov, 1, fdp, numfds, 
&local_err);
+    if (ret < 0) {
+        goto err;
+    }
+    if (ret == msg->size) {
+        return;
+    }
+
+    buf = iov.iov_base + ret;
+    msgleft = iov.iov_len - ret;
+    do {
+        ret = qio_channel_write(proxy->ioc, buf, msgleft, &local_err);
+        if (ret < 0) {
+            goto err;
+        }
+        buf += ret, msgleft -= ret;
+    } while (msgleft != 0);
+    return;
+
+ err:
+    error_report_err(local_err);
+}
+
+static void vfio_user_send(VFIOProxy *proxy, vfio_user_hdr_t *msg,
+                           VFIOUserFDs *fds)
+{
+    bool iolock = qemu_mutex_iothread_locked();
+
+    if (iolock) {
+        qemu_mutex_unlock_iothread();
+    }
+    qemu_mutex_lock(&proxy->lock);
+    vfio_user_send_locked(proxy, msg, fds);
+    qemu_mutex_unlock(&proxy->lock);
+    if (iolock) {
+        qemu_mutex_lock_iothread();
+    }
+}
diff --git a/MAINTAINERS b/MAINTAINERS
index 12d69f3a45..aa4df6c418 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1883,8 +1883,12 @@ L: qemu-s390x@nongnu.org
 vfio-user
 M: John G Johnson <john.g.johnson@oracle.com>
 M: Thanos Makatos <thanos.makatos@nutanix.com>
+M: Elena Ufimtseva <elena.ufimtseva@oracle.com>
+M: Jagannathan Raman <jag.raman@oracle.com>
 S: Supported
 F: docs/devel/vfio-user.rst
+F: hw/vfio/user.c
+F: hw/vfio/user.h
 
 vhost
 M: Michael S. Tsirkin <mst@redhat.com>
diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build
index da9af297a0..739b30be73 100644
--- a/hw/vfio/meson.build
+++ b/hw/vfio/meson.build
@@ -8,6 +8,7 @@ vfio_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files(
   'display.c',
   'pci-quirks.c',
   'pci.c',
+  'user.c',
 ))
 vfio_ss.add(when: 'CONFIG_VFIO_CCW', if_true: files('ccw.c'))
 vfio_ss.add(when: 'CONFIG_VFIO_PLATFORM', if_true: files('platform.c'))
-- 
2.25.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]