[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [RFC PATCH 2/4] vhost-user: add the vhost-user extensio
From: |
Marc-André Lureau |
Subject: |
Re: [Qemu-devel] [RFC PATCH 2/4] vhost-user: add the vhost-user extension to support the vhost-pci based inter-vm communication |
Date: |
Thu, 10 Nov 2016 11:36:08 +0000 |
Hi
On Thu, Nov 10, 2016 at 6:47 AM Wei Wang <address@hidden> wrote:
> This is the slave part of vhost-user implemented in QEMU, with an extension
> to support vhost-pci.
>
Instead of implementing "another vhost-user slave", it would be worth
investigating using libvhost-user instead (
https://lists.gnu.org/archive/html/qemu-devel/2016-10/msg03990.html). This
is just a suggestion, it is quite fine for vhost-pci to have its own
smaller/specific vhost-user slave implementation (without virtio rings
handlings etc). (libvhost-user is also very young, not yet in qemu, we
should be able shape it for vhost-pci needs)
> Signed-off-by: Wei Wang <address@hidden>
> ---
> hw/virtio/Makefile.objs | 1 +
> hw/virtio/vhost-pci-server.c | 469
> +++++++++++++++++++++++++++++++++++
> hw/virtio/vhost-user.c | 86 +------
> include/hw/virtio/vhost-pci-server.h | 45 ++++
> include/hw/virtio/vhost-user.h | 110 ++++++++
> include/sysemu/sysemu.h | 1 +
> qemu-options.hx | 4 +
> vl.c | 26 ++
> 8 files changed, 657 insertions(+), 85 deletions(-)
> create mode 100644 hw/virtio/vhost-pci-server.c
> create mode 100644 include/hw/virtio/vhost-pci-server.h
> create mode 100644 include/hw/virtio/vhost-user.h
>
> diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs
> index 3e2b175..e44feb8 100644
> --- a/hw/virtio/Makefile.objs
> +++ b/hw/virtio/Makefile.objs
> @@ -2,6 +2,7 @@ common-obj-y += virtio-rng.o
> common-obj-$(CONFIG_VIRTIO_PCI) += virtio-pci.o
> common-obj-y += virtio-bus.o
> common-obj-y += virtio-mmio.o
> +common-obj-y += vhost-pci-server.o
>
> obj-y += virtio.o virtio-balloon.o
> obj-$(CONFIG_LINUX) += vhost.o vhost-backend.o vhost-user.o
> diff --git a/hw/virtio/vhost-pci-server.c b/hw/virtio/vhost-pci-server.c
> new file mode 100644
> index 0000000..6ce8516
> --- /dev/null
> +++ b/hw/virtio/vhost-pci-server.c
> @@ -0,0 +1,469 @@
> +/*
> + * Vhost-pci server
> + *
> + * Copyright Intel Corp. 2016
> + *
> + * Authors:
> + * Wei Wang <address@hidden>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or
> later.
> + * See the COPYING file in the top-level directory.
> + */
> +
> +#include <qemu/osdep.h>
> +#include <qemu/thread.h>
> +#include <qemu/main-loop.h>
> +#include <qemu/bitops.h>
> +#include <qemu/bitmap.h>
> +#include <qemu/sockets.h>
> +#include <linux/virtio_net.h>
> +#include "sysemu/char.h"
> +#include "qapi/error.h"
> +#include "hw/virtio/vhost-pci-server.h"
> +#include "qemu/option.h"
> +#include "monitor/qdev.h"
> +#include "hw/virtio/vhost-user.h"
> +#include "hw/qdev.h"
> +
> +#define VHOST_PCI_FEATURE_BITS (1ULL << VIRTIO_F_VERSION_1)
> +
> +#define VHOST_PCI_NET_FEATURE_BITS (1ULL << VIRTIO_NET_F_MRG_RXBUF) | \
> + (1ULL << VIRTIO_NET_F_CTRL_VQ) | \
> + (1ULL << VIRTIO_NET_F_MQ)
> +
> +#define VHOST_USER_SET_PEER_CONNECTION_OFF 0
> +#define VHOST_USER_SET_PEER_CONNECTION_ON 1
> +#define VHOST_USER_SET_PEER_CONNECTION_INIT 2
> +
> +VhostPCIServer *vp_server;
> +
> +QemuOptsList qemu_vhost_pci_server_opts = {
> + .name = "vhost-pci-server",
> + .implied_opt_name = "chardev",
> + .head = QTAILQ_HEAD_INITIALIZER(qemu_vhost_pci_server_opts.head),
> + .desc = {
> + /*
> + * no elements => accept any
> + * sanity checking will happen later
> + * when setting device properties
> + */
> + { /* end of list */ }
> + },
> +};
> +
> +static int vhost_pci_server_write(CharDriverState *chr, VhostUserMsg *msg)
> +{
> + int size = msg->size + VHOST_USER_HDR_SIZE;
> +
> + if (!msg)
> + return 0;
> +
> + msg->flags &= ~VHOST_USER_VERSION_MASK;
> + msg->flags |= VHOST_USER_VERSION;
> +
> + return qemu_chr_fe_write_all_n(chr, msg->conn_id,
> + (const uint8_t *)msg, size) == size ? 0
> : -1;
> +}
> +
> +PeerConnectionTable *vp_server_find_table_ent(const char *dev_id)
> +{
> + int i;
> + PeerConnectionTable *ent;
> + uint64_t max_connections = vp_server->chr->max_connections;
> +
> + for (i = 0; i < max_connections; i++) {
> + ent = &vp_server->peer_table[i];
> + if (!strcmp(dev_id, ent->dev_id))
> + return ent;
> + }
> + return NULL;
> +}
> +
> +static void vhost_pci_init_peer_table(uint64_t id)
> +{
> + PeerConnectionTable *ent = &vp_server->peer_table[id];
> +
> + ent->peer_feature_bits |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
> + QLIST_INIT(&ent->vq_list);
> + ent->vq_num = 0;
> +}
> +
> +static int vhost_pci_get_conn_id(CharDriverState *chr, VhostUserMsg *msg)
> +{
> + unsigned long *conn_bitmap = chr->conn_bitmap;
> + unsigned long *old_conn_bitmap = vp_server->old_conn_bitmap;
> + uint64_t nbits = chr->max_connections;
> + uint64_t id;
> + int r;
> +
> + bitmap_xor(old_conn_bitmap, old_conn_bitmap, conn_bitmap,
> (long)nbits);
> +
> + for (id = find_first_bit(old_conn_bitmap, nbits); id < nbits;
> + id = find_next_bit(old_conn_bitmap, nbits, id + 1)) {
> + vhost_pci_init_peer_table(id);
> + msg->conn_id = id;
> + msg->payload.u64 = id;
> + msg->size = sizeof(msg->payload.u64);
> + msg->flags |= VHOST_USER_REPLY_MASK;
> + r = vhost_pci_server_write(chr, msg);
> + }
> + bitmap_copy(old_conn_bitmap, conn_bitmap, (long)nbits);
> +
> + return r;
> +}
> +
> +static int vhost_pci_get_peer_features(CharDriverState *chr, VhostUserMsg
> *msg)
> +{
> + PeerConnectionTable *ent = &vp_server->peer_table[msg->conn_id];
> + msg->payload.u64 = ent->peer_feature_bits;
> + msg->size = sizeof(msg->payload.u64);
> + msg->flags |= VHOST_USER_REPLY_MASK;
> + return vhost_pci_server_write(chr, msg);
> +}
> +
> +static int vhost_pci_get_queue_num(CharDriverState *chr, VhostUserMsg
> *msg)
> +{
> + PeerConnectionTable *ent = &vp_server->peer_table[msg->conn_id];
> + switch (ent->virtio_id) {
> + case VIRTIO_ID_NET:
> + msg->payload.u64 = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX;
> + break;
> + default:
> + printf("%s: device type not supported yet..\n", __func__);
> + }
> + msg->size = sizeof(msg->payload.u64);
> + msg->flags |= VHOST_USER_REPLY_MASK;
> + return vhost_pci_server_write(chr, msg);
> +}
> +
> +static int vhost_pci_get_protocol_features(CharDriverState *chr,
> VhostUserMsg *msg)
> +{
> + msg->payload.u64 = VHOST_USER_PROTOCOL_FEATURES;
> + msg->size = sizeof(msg->payload.u64);
> + msg->flags |= VHOST_USER_REPLY_MASK;
> + return vhost_pci_server_write(chr, msg);
> +}
> +
> +static void vhost_pci_set_protocol_features(VhostUserMsg *msg)
> +{
> + vp_server->protocol_features = msg->payload.u64;
> +}
> +
> +static int vhost_pci_device_create(uint64_t conn_id)
> +{
> + PeerConnectionTable *ent = &vp_server->peer_table[conn_id];
> + Error *local_err = NULL;
> + QemuOpts *opts;
> + DeviceState *dev;
> + char params[50];
> +
> + switch (ent->virtio_id) {
> + case VIRTIO_ID_NET:
> + sprintf(params, "driver=vhost-pci-net-pci,id=vhost-pci-%ld",
> conn_id);
> + sprintf(ent->dev_id, "vhost-pci-%ld", conn_id);
> + break;
> + default:
> + printf("%s: device type not supported yet..\n", __func__);
> + }
> +
> + opts = qemu_opts_parse_noisily(qemu_find_opts("device"), params,
> true);
> + dev = qdev_device_add(opts, &local_err);
> + if (!dev) {
> + qemu_opts_del(opts);
> + return -1;
> + }
> + object_unref(OBJECT(dev));
> + return 0;
> +}
> +
> +static void vhost_pci_set_device_info(VhostUserMsg *msg)
> +{
> + PeerConnectionTable *ent = &vp_server->peer_table[msg->conn_id];
> + DeviceInfo *info = &msg->payload.dev_info;
> +
> + memcpy(ent->uuid, info->uuid, sizeof(uuid_t));
> + ent->virtio_id = info->virtio_id;
> + switch (ent->virtio_id) {
> + case VIRTIO_ID_NET:
> + ent->peer_feature_bits |= (VHOST_PCI_FEATURE_BITS |
> VHOST_PCI_NET_FEATURE_BITS);
> + break;
> + default:
> + printf("%s: device type not supported yet..\n", __func__);
> + }
> +}
> +
> +static void vhost_pci_set_peer_feature_bits(VhostUserMsg *msg)
> +{
> + PeerConnectionTable *ent = &vp_server->peer_table[msg->conn_id];
> +
> + ent->peer_feature_bits = msg->payload.u64;
> +}
> +
> +static uint64_t vhost_pci_peer_mem_size_get(VhostUserMemory *peer_mem)
> +{
> + int i;
> + uint64_t total_size;
> + uint32_t nregions = peer_mem->nregions;
> + VhostUserMemoryRegion *regions = peer_mem->regions;
> +
> + for (i = 0; i < nregions; i++) {
> + total_size += regions[i].memory_size;
> + }
> +
> + return total_size;
> +}
> +
> +static int vhost_pci_set_mem_table(uint64_t conn_id, VhostUserMemory
> *peer_mem, int *fds)
> +{
> + int i;
> + void *mr_qva;
> + PeerConnectionTable *ent = &vp_server->peer_table[conn_id];
> + uint32_t nregions = peer_mem->nregions;
> + VhostUserMemoryRegion *peer_mr = peer_mem->regions;
> + MemoryRegion *bar_mr = g_malloc(sizeof(MemoryRegion));
> + MemoryRegion *mr = g_malloc(nregions * sizeof(MemoryRegion));
> + uint64_t bar_size = 2 * vhost_pci_peer_mem_size_get(peer_mem);
> + uint64_t bar_map_offset = 0;
> +
> + bar_size = pow2ceil(bar_size);
> + memory_region_init(bar_mr, NULL, "Peer Memory", bar_size);
> +
> + for (i = 0; i < nregions; i++) {
> + mr_qva = mmap(NULL, peer_mr[i].memory_size +
> peer_mr[i].mmap_offset,
> + PROT_READ | PROT_READ, MAP_SHARED, fds[i], 0);
> + if (mr_qva == MAP_FAILED) {
> + printf("%s called: map failed \n", __func__);
> + return -1;
> + }
> + mr_qva += peer_mr[i].mmap_offset;
> + memory_region_init_ram_ptr(&mr[i], NULL, "Peer Memory",
> peer_mr[i].memory_size, mr_qva);
> + memory_region_add_subregion(bar_mr, bar_map_offset, &mr[i]);
> + bar_map_offset += peer_mr[i].memory_size;
> + }
> + ent->bar_mr = bar_mr;
> + ent->bar_map_offset = bar_map_offset;
> +
> + return 0;
> +}
> +
> +static void vhost_pci_alloc_peer_vring_info(uint64_t conn_id)
> +{
> + PeerConnectionTable *ent = &vp_server->peer_table[conn_id];
> + PeerVirtqInfo *virtq_info = g_malloc0(sizeof(PeerVirtqInfo));
> + QLIST_INSERT_HEAD(&ent->vq_list, virtq_info, node);
> + ent->vq_num++;
> +}
> +
> +static void vhost_pci_set_vring_num(VhostUserMsg *msg)
> +{
> + PeerConnectionTable *ent = &vp_server->peer_table[msg->conn_id];
> + PeerVirtqInfo *virtq_info = QLIST_FIRST(&ent->vq_list);
> +
> + virtq_info->vring_num = msg->payload.u64;
> +}
> +
> +static void vhost_pci_set_vring_base(VhostUserMsg *msg)
> +{
> + PeerConnectionTable *ent = &vp_server->peer_table[msg->conn_id];
> + PeerVirtqInfo *virtq_info = QLIST_FIRST(&ent->vq_list);
> +
> + virtq_info->last_avail_idx = msg->payload.u64;
> +}
> +
> +static void vhost_pci_set_vring_addr(VhostUserMsg *msg)
> +{
> + PeerConnectionTable *ent = &vp_server->peer_table[msg->conn_id];
> + PeerVirtqInfo *virtq_info = QLIST_FIRST(&ent->vq_list);
> + memcpy(&virtq_info->addr, &msg->payload.addr,
> + sizeof(struct vhost_vring_addr));
> +}
> +
> +static void vhost_pci_set_vring_kick(uint64_t conn_id, int fd)
> +{
> + PeerConnectionTable *ent = &vp_server->peer_table[conn_id];
> + PeerVirtqInfo *virtq_info = QLIST_FIRST(&ent->vq_list);
> + if (!virtq_info)
> + virtq_info->kickfd = fd;
> +}
> +
> +static void vhost_pci_set_vring_call(uint64_t conn_id, int fd)
> +{
> + PeerConnectionTable *ent = &vp_server->peer_table[conn_id];
> + PeerVirtqInfo *virtq_info = QLIST_FIRST(&ent->vq_list);
> + if (virtq_info)
> + virtq_info->callfd = fd;
> +}
> +
> +static void vhost_pci_set_peer_connection(VhostUserMsg *msg)
> +{
> + uint64_t cmd = msg->payload.u64;
> + uint64_t conn_id = msg->conn_id;
> +
> + switch (cmd) {
> + case VHOST_USER_SET_PEER_CONNECTION_INIT:
> + vhost_pci_device_create(conn_id);
> + break;
> + default:
> + printf("%s called: cmd %lu not supported yet \n", __func__, cmd);
> + }
> +}
> +
> +static void vhost_pci_server_read(void *opaque, const uint8_t *buf, int
> size)
> +{
> + VhostUserMsg msg;
> + uint8_t *p = (uint8_t *) &msg;
> + CharDriverState *chr = (CharDriverState *)opaque;
> + int fds[8], fd_num;
> +
> + if (size != VHOST_USER_HDR_SIZE) {
> + printf("Wrong message size received %d\n", size);
> + return;
> + }
> + memcpy(p, buf, VHOST_USER_HDR_SIZE);
> +
> + if (msg.size) {
> + p += VHOST_USER_HDR_SIZE;
> + size = qemu_chr_fe_read_all_n(chr, msg.conn_id, p, msg.size);
> + if (size != msg.size) {
> + printf("Wrong message size received %d != %d\n",
> + size, msg.size);
> + return;
> + }
> + }
> +
> + if (msg.request > VHOST_USER_MAX)
> + printf("vhost read incorrect msg \n");
> +
> + switch(msg.request) {
> + case VHOST_USER_GET_CONN_ID:
> + vhost_pci_get_conn_id(chr, &msg);
> + break;
> + case VHOST_USER_GET_FEATURES:
> + vhost_pci_get_peer_features(chr, &msg);
> + break;
> + case VHOST_USER_GET_PROTOCOL_FEATURES:
> + vhost_pci_get_protocol_features(chr, &msg);
> + break;
> + case VHOST_USER_SET_PROTOCOL_FEATURES:
> + vhost_pci_set_protocol_features(&msg);
> + break;
> + case VHOST_USER_SET_DEV_INFO:
> + vhost_pci_set_device_info(&msg);
> + break;
> + case VHOST_USER_GET_QUEUE_NUM:
> + vhost_pci_get_queue_num(chr, &msg);
> + break;
> + case VHOST_USER_SET_OWNER:
> + break;
> + case VHOST_USER_SET_FEATURES:
> + vhost_pci_set_peer_feature_bits(&msg);
> + break;
> + case VHOST_USER_SET_VRING_NUM:
> + vhost_pci_alloc_peer_vring_info(msg.conn_id);
> + vhost_pci_set_vring_num(&msg);
> + break;
> + case VHOST_USER_SET_VRING_BASE:
> + vhost_pci_set_vring_base(&msg);
> + break;
> + case VHOST_USER_SET_VRING_ADDR:
> + vhost_pci_set_vring_addr(&msg);
> + break;
> + case VHOST_USER_SET_VRING_KICK:
> + /* consume the fd */
> + qemu_chr_fe_get_msgfds_n(chr, msg.conn_id, fds, 1);
> + printf("VHOST_USER_SET_VRING_KICK called:..kickfd = %d\n",
> fds[0]);
> + vhost_pci_set_vring_kick(msg.conn_id, fds[0]);
> + /*
> + * This is a non-blocking eventfd.
> + * The receive function forces it to be blocking,
> + * so revert it back to non-blocking.
> + */
> + qemu_set_nonblock(fds[0]);
> + break;
> + case VHOST_USER_SET_VRING_CALL:
> + /* consume the fd */
> + qemu_chr_fe_get_msgfds_n(chr, msg.conn_id, fds, 1);
> + vhost_pci_set_vring_call(msg.conn_id, fds[0]);
> + /*
> + * This is a non-blocking eventfd.
> + * The receive function forces it to be blocking,
> + * so revert it back to non-blocking.
> + */
> + qemu_set_nonblock(fds[0]);
> + break;
> + case VHOST_USER_SET_MEM_TABLE:
> + fd_num = qemu_chr_fe_get_msgfds_n(chr, msg.conn_id,
> + fds, sizeof(fds) / sizeof(int));
> + printf("VHOST_USER_SET_MEM_TABLE: fd = %d \n", fd_num);
> + vhost_pci_set_mem_table(msg.conn_id, &msg.payload.memory, fds);
> + break;
> + case VHOST_USER_SET_PEER_CONNECTION:
> + vhost_pci_set_peer_connection(&msg);
> + break;
> + default:
> + printf("default called..msg->request = %d \n", msg.request);
> + break;
> + }
> +}
> +
> +static int vhost_pci_server_can_read(void *opaque)
> +{
> + return VHOST_USER_HDR_SIZE;
> +}
> +
> +static void vhost_pci_server_event(void *opaque, int event)
> +{
> + switch (event) {
> + case CHR_EVENT_OPENED:
> + printf("vhost_pci_server_event called.. \n");
> + break;
> + case CHR_EVENT_CLOSED:
> + printf("vhost_pci_server_event called: event close..\n");
> + break;
> + }
> +}
> +
> +static CharDriverState *vhost_pci_server_parse_chardev(const char *id)
> +{
> + CharDriverState *chr = qemu_chr_find(id);
> + if (chr == NULL) {
> + printf("chardev \"%s\" not found", id);
> + return NULL;
> + }
> +
> + qemu_chr_fe_claim_no_fail(chr);
> +
> + return chr;
> +}
> +
> +int vhost_pci_server_init(QemuOpts *opts)
> +{
> + CharDriverState *chr;
> + const char *chardev_id = qemu_opt_get(opts, "chardev");
> + uint64_t max_connections;
> +
> + vp_server = (VhostPCIServer *)malloc(sizeof(VhostPCIServer));
> +
> + chr = vhost_pci_server_parse_chardev(chardev_id);
> + if (!chr) {
> + return -1;
> + }
> + max_connections = chr->max_connections;
> +
> + qemu_chr_add_handlers(chr, vhost_pci_server_can_read,
> vhost_pci_server_read, vhost_pci_server_event, chr);
> +
> + vp_server->chr = chr;
> +
> + vp_server->peer_table = (PeerConnectionTable
> *)g_malloc0(max_connections * sizeof(PeerConnectionTable));
> +
> + vp_server->old_conn_bitmap = bitmap_new(max_connections);
> +
> + return 0;
> +}
> +
> +int vhost_pci_server_cleanup(void)
> +{
> + free(vp_server);
> + printf("vhost_pci_server_cleanup called.. \n");
> + return 0;
> +}
> diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> index b57454a..bce5181 100644
> --- a/hw/virtio/vhost-user.c
> +++ b/hw/virtio/vhost-user.c
> @@ -13,6 +13,7 @@
> #include "hw/virtio/vhost.h"
> #include "hw/virtio/vhost-backend.h"
> #include "hw/virtio/virtio-net.h"
> +#include "hw/virtio/vhost-user.h"
> #include "sysemu/char.h"
> #include "sysemu/kvm.h"
> #include "qemu/error-report.h"
> @@ -24,91 +25,6 @@
> #include <sys/un.h>
> #include <linux/vhost.h>
>
> -#define VHOST_MEMORY_MAX_NREGIONS 8
> -#define VHOST_USER_F_PROTOCOL_FEATURES 30
> -
> -enum VhostUserProtocolFeature {
> - VHOST_USER_PROTOCOL_F_MQ = 0,
> - VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
> - VHOST_USER_PROTOCOL_F_RARP = 2,
> - VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,
> -
> - VHOST_USER_PROTOCOL_F_MAX
> -};
> -
> -#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 <<
> VHOST_USER_PROTOCOL_F_MAX) - 1)
> -
> -typedef enum VhostUserRequest {
> - VHOST_USER_NONE = 0,
> - VHOST_USER_GET_FEATURES = 1,
> - VHOST_USER_SET_FEATURES = 2,
> - VHOST_USER_SET_OWNER = 3,
> - VHOST_USER_RESET_OWNER = 4,
> - VHOST_USER_SET_MEM_TABLE = 5,
> - VHOST_USER_SET_LOG_BASE = 6,
> - VHOST_USER_SET_LOG_FD = 7,
> - VHOST_USER_SET_VRING_NUM = 8,
> - VHOST_USER_SET_VRING_ADDR = 9,
> - VHOST_USER_SET_VRING_BASE = 10,
> - VHOST_USER_GET_VRING_BASE = 11,
> - VHOST_USER_SET_VRING_KICK = 12,
> - VHOST_USER_SET_VRING_CALL = 13,
> - VHOST_USER_SET_VRING_ERR = 14,
> - VHOST_USER_GET_PROTOCOL_FEATURES = 15,
> - VHOST_USER_SET_PROTOCOL_FEATURES = 16,
> - VHOST_USER_GET_QUEUE_NUM = 17,
> - VHOST_USER_SET_VRING_ENABLE = 18,
> - VHOST_USER_SEND_RARP = 19,
> - VHOST_USER_MAX
> -} VhostUserRequest;
> -
> -typedef struct VhostUserMemoryRegion {
> - uint64_t guest_phys_addr;
> - uint64_t memory_size;
> - uint64_t userspace_addr;
> - uint64_t mmap_offset;
> -} VhostUserMemoryRegion;
> -
> -typedef struct VhostUserMemory {
> - uint32_t nregions;
> - uint32_t padding;
> - VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
> -} VhostUserMemory;
> -
> -typedef struct VhostUserLog {
> - uint64_t mmap_size;
> - uint64_t mmap_offset;
> -} VhostUserLog;
> -
> -typedef struct VhostUserMsg {
> - VhostUserRequest request;
> -
> -#define VHOST_USER_VERSION_MASK (0x3)
> -#define VHOST_USER_REPLY_MASK (0x1<<2)
> -#define VHOST_USER_NEED_REPLY_MASK (0x1 << 3)
> - uint32_t flags;
> - uint32_t size; /* the following payload size */
> - union {
> -#define VHOST_USER_VRING_IDX_MASK (0xff)
> -#define VHOST_USER_VRING_NOFD_MASK (0x1<<8)
> - uint64_t u64;
> - struct vhost_vring_state state;
> - struct vhost_vring_addr addr;
> - VhostUserMemory memory;
> - VhostUserLog log;
> - } payload;
> -} QEMU_PACKED VhostUserMsg;
> -
> -static VhostUserMsg m __attribute__ ((unused));
> -#define VHOST_USER_HDR_SIZE (sizeof(m.request) \
> - + sizeof(m.flags) \
> - + sizeof(m.size))
> -
> -#define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE)
> -
> -/* The version of the protocol we support */
> -#define VHOST_USER_VERSION (0x1)
> -
> static bool ioeventfd_enabled(void)
> {
> return kvm_enabled() && kvm_eventfds_enabled();
> diff --git a/include/hw/virtio/vhost-pci-server.h
> b/include/hw/virtio/vhost-pci-server.h
> new file mode 100644
> index 0000000..c9c4a69
> --- /dev/null
> +++ b/include/hw/virtio/vhost-pci-server.h
> @@ -0,0 +1,45 @@
> +#ifndef QEMU_VHOST_PCI_SERVER_H
> +#define QEMU_VHOST_PCI_SERVER_H
> +
> +#include <uuid/uuid.h>
> +#include <linux/vhost.h>
> +
> +typedef struct PeerVirtqInfo {
> + int kickfd;
> + int callfd;
> + uint32_t vring_num;
> + uint16_t last_avail_idx;
> + struct vhost_vring_addr addr;
> + QLIST_ENTRY(PeerVirtqInfo) node;
> +} PeerVirtqInfo;
> +
> +typedef struct PeerConnectionTable {
> + char dev_id[30];
> + uuid_t uuid;
> + uint16_t virtio_id;
> + uint32_t bar_id;
> + MemoryRegion *bar_mr;
> + uint64_t bar_map_offset;
> + uint64_t peer_feature_bits;
> + void *opaque;
> + uint16_t vq_num;
> + QLIST_HEAD(, PeerVirtqInfo) vq_list;
> +} PeerConnectionTable;
> +
> +typedef struct VhostPCIServer {
> + CharDriverState *chr;
> + uint64_t protocol_features;
> + unsigned long *old_conn_bitmap;
> + /* a table indexed by the peer connection id */
> + PeerConnectionTable *peer_table;
> +} VhostPCIServer;
> +
> +extern VhostPCIServer *vp_server;
> +
> +extern int vhost_pci_server_init(QemuOpts *opts);
> +
> +extern int vhost_pci_server_cleanup(void);
> +
> +extern PeerConnectionTable *vp_server_find_table_ent(const char *dev_id);
> +
> +#endif
> diff --git a/include/hw/virtio/vhost-user.h
> b/include/hw/virtio/vhost-user.h
> new file mode 100644
> index 0000000..794a8d8
> --- /dev/null
> +++ b/include/hw/virtio/vhost-user.h
> @@ -0,0 +1,110 @@
> +#ifndef VHOST_USER_H
> +#define VHOST_USER_H
> +
> +#include <linux/vhost.h>
> +#include <uuid/uuid.h>
> +
> +#define VHOST_MEMORY_MAX_NREGIONS 8
> +#define VHOST_USER_F_PROTOCOL_FEATURES 30
> +
> +enum VhostUserProtocolFeature {
> + VHOST_USER_PROTOCOL_F_MQ = 0,
> + VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
> + VHOST_USER_PROTOCOL_F_RARP = 2,
> + VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,
> + VHOST_USER_PROTOCOL_F_VHOST_PCI =4,
> +
> + VHOST_USER_PROTOCOL_F_MAX
> +};
> +
> +#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 <<
> VHOST_USER_PROTOCOL_F_MAX) - 1)
> +
> +#define VHOST_USER_PROTOCOL_FEATURES ((1ULL << VHOST_USER_PROTOCOL_F_MQ)
> | \
> + (1ULL <<
> VHOST_USER_PROTOCOL_F_LOG_SHMFD) | \
> + (1ULL <<
> VHOST_USER_PROTOCOL_F_RARP)) | \
> + (1ULL <<
> VHOST_USER_PROTOCOL_F_VHOST_PCI)
> +
> +typedef enum VhostUserRequest {
> + VHOST_USER_NONE = 0,
> + VHOST_USER_GET_FEATURES = 1,
> + VHOST_USER_SET_FEATURES = 2,
> + VHOST_USER_SET_OWNER = 3,
> + VHOST_USER_RESET_OWNER = 4,
> + VHOST_USER_SET_MEM_TABLE = 5,
> + VHOST_USER_SET_LOG_BASE = 6,
> + VHOST_USER_SET_LOG_FD = 7,
> + VHOST_USER_SET_VRING_NUM = 8,
> + VHOST_USER_SET_VRING_ADDR = 9,
> + VHOST_USER_SET_VRING_BASE = 10,
> + VHOST_USER_GET_VRING_BASE = 11,
> + VHOST_USER_SET_VRING_KICK = 12,
> + VHOST_USER_SET_VRING_CALL = 13,
> + VHOST_USER_SET_VRING_ERR = 14,
> + VHOST_USER_GET_PROTOCOL_FEATURES = 15,
> + VHOST_USER_SET_PROTOCOL_FEATURES = 16,
> + VHOST_USER_GET_QUEUE_NUM = 17,
> + VHOST_USER_SET_VRING_ENABLE = 18,
> + VHOST_USER_SEND_RARP = 19,
> + VHOST_USER_GET_CONN_ID = 20,
> + VHOST_USER_SET_DEV_INFO = 21,
> + VHOST_USER_SET_PEER_CONNECTION = 22,
> + VHOST_USER_MAX
> +} VhostUserRequest;
> +
> +typedef struct VhostUserMemoryRegion {
> + uint64_t guest_phys_addr;
> + uint64_t memory_size;
> + uint64_t userspace_addr;
> + uint64_t mmap_offset;
> +} VhostUserMemoryRegion;
> +
> +typedef struct VhostUserMemory {
> + uint32_t nregions;
> + uint32_t padding;
> + VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
> +} VhostUserMemory;
> +
> +typedef struct VhostUserLog {
> + uint64_t mmap_size;
> + uint64_t mmap_offset;
> +} VhostUserLog;
> +
> +typedef struct DeviceInfo {
> + uuid_t uuid;
> + uint16_t virtio_id;
> +} DeviceInfo;
> +
> +typedef struct VhostUserMsg {
> + VhostUserRequest request;
> +
> +#define VHOST_USER_VERSION_MASK (0x3)
> +#define VHOST_USER_REPLY_MASK (0x1<<2)
> +#define VHOST_USER_NEED_REPLY_MASK (0x1 << 3)
> + uint32_t flags;
> + uint32_t size; /* the following payload size */
> + uint64_t conn_id;
> + union {
> +#define VHOST_USER_VRING_IDX_MASK (0xff)
> +#define VHOST_USER_VRING_NOFD_MASK (0x1<<8)
> + uint64_t u64;
> + struct vhost_vring_state state;
> + struct vhost_vring_addr addr;
> + VhostUserMemory memory;
> + VhostUserLog log;
> + DeviceInfo dev_info;
> + } payload;
> +} QEMU_PACKED VhostUserMsg;
> +
> +static VhostUserMsg m __attribute__ ((unused));
> +#define VHOST_USER_HDR_SIZE (sizeof(m.request) \
> + + sizeof(m.flags) \
> + + sizeof(m.size)) \
> + + sizeof(m.conn_id)
> +
> +#define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE)
> +
> +/* The version of the protocol we support */
> +#define VHOST_USER_VERSION (0x2)
> +
> +#endif
> +
> diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
> index ee7c760..7f8b25c 100644
> --- a/include/sysemu/sysemu.h
> +++ b/include/sysemu/sysemu.h
> @@ -244,5 +244,6 @@ extern QemuOptsList qemu_netdev_opts;
> extern QemuOptsList qemu_net_opts;
> extern QemuOptsList qemu_global_opts;
> extern QemuOptsList qemu_mon_opts;
> +extern QemuOptsList qemu_vhost_pci_server_opts;
>
> #endif
> diff --git a/qemu-options.hx b/qemu-options.hx
> index a71aaf8..1fdb820 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -3968,6 +3968,10 @@ contents of @code{iv.b64} to the second secret
>
> ETEXI
>
> +DEF("vhost-pci-server", HAS_ARG, QEMU_OPTION_vhost_pci_server,
> + "-vhost-pci-server socket,chrdev={id}\n"
> + " creates a vhost-pci-server",
> + QEMU_ARCH_I386)
>
> HXCOMM This is the last statement. Insert new options before this line!
> STEXI
> diff --git a/vl.c b/vl.c
> index b3c80d5..c1f038d 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -121,6 +121,7 @@ int main(int argc, char **argv)
> #include "crypto/init.h"
> #include "sysemu/replay.h"
> #include "qapi/qmp/qerror.h"
> +#include "hw/virtio/vhost-pci-server.h"
>
> #define MAX_VIRTIO_CONSOLES 1
> #define MAX_SCLP_CONSOLES 1
> @@ -178,6 +179,7 @@ bool boot_strict;
> uint8_t *boot_splash_filedata;
> size_t boot_splash_filedata_size;
> uint8_t qemu_extra_params_fw[2];
> +bool vhost_pci_server_enabled;
>
> int icount_align_option;
>
> @@ -2980,6 +2982,7 @@ int main(int argc, char **argv, char **envp)
> qemu_add_drive_opts(&qemu_drive_opts);
> qemu_add_opts(&qemu_chardev_opts);
> qemu_add_opts(&qemu_device_opts);
> + qemu_add_opts(&qemu_vhost_pci_server_opts);
> qemu_add_opts(&qemu_netdev_opts);
> qemu_add_opts(&qemu_net_opts);
> qemu_add_opts(&qemu_rtc_opts);
> @@ -3970,6 +3973,13 @@ int main(int argc, char **argv, char **envp)
> exit(1);
> }
> break;
> + case QEMU_OPTION_vhost_pci_server:
> + vhost_pci_server_enabled = true;
> + opts =
> qemu_opts_parse_noisily(qemu_find_opts("vhost-pci-server"), optarg, false);
> + if (!opts) {
> + exit(1);
> + }
> + break;
> default:
> os_parse_cmd_args(popt->index, optarg);
> }
> @@ -4479,6 +4489,16 @@ int main(int argc, char **argv, char **envp)
> exit(1);
> }
>
> + /* check if the vhost-pci-server is enabled */
> + if (vhost_pci_server_enabled) {
> + int ret;
> + ret = vhost_pci_server_init(qemu_opts_find(
> + qemu_find_opts("vhost-pci-server"),
> + NULL));
> + if (ret < 0)
> + exit(1);
> + }
> +
> /* init USB devices */
> if (machine_usb(current_machine)) {
> if (foreach_device_config(DEV_USB, usb_parse) < 0)
> @@ -4607,6 +4627,12 @@ int main(int argc, char **argv, char **envp)
> bdrv_close_all();
> pause_all_vcpus();
> res_free();
> + if (vhost_pci_server_enabled) {
> + int ret;
> + ret = vhost_pci_server_cleanup();
> + if (ret < 0)
> + exit(1);
> + }
> #ifdef CONFIG_TPM
> tpm_cleanup();
> #endif
> --
> 2.7.4
>
> --
Marc-André Lureau