[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [RFC] vhost: Introduce packed vq and add buffer elements
From: |
Eugenio Perez Martin |
Subject: |
Re: [RFC] vhost: Introduce packed vq and add buffer elements |
Date: |
Wed, 19 Jun 2024 12:19:29 +0200 |
On Tue, Jun 18, 2024 at 8:19 PM Sahil Siddiq <icegambit91@gmail.com> wrote:
>
> This is the first patch in a series to add support for packed
> virtqueues in vhost_shadow_virtqueue. This patch implements the
> insertion of available buffers in the descriptor area. It takes
> into account descriptor chains, but does not consider indirect
> descriptors.
>
> VhostShadowVirtqueue has also been modified so it acts as a layer
> of abstraction for split and packed virtqueues.
>
> Signed-off-by: Sahil Siddiq <sahilcdq@proton.me>
> ---
> Hi,
>
> I am currently working on adding support for packed virtqueues in
> vhost_shadow_virtqueue [1]. This patch only implements the insertion of
> available buffers in the descriptor area. It does not take into
> account indirect descriptors, event_idx or notifications.
>
> I don't think these changes are testable yet but I thought I would
> still post this patch for feedback. The following email annotates these
> changes with a few comments and questions that I have.
>
> Thanks,
> Sahil
>
Hi Sahil,
Just some nitpicks here and there,
> [1] https://wiki.qemu.org/Internships/ProjectIdeas/PackedShadowVirtqueue
>
> hw/virtio/vhost-shadow-virtqueue.c | 124 ++++++++++++++++++++++++++++-
> hw/virtio/vhost-shadow-virtqueue.h | 66 ++++++++++-----
> 2 files changed, 167 insertions(+), 23 deletions(-)
>
> diff --git a/hw/virtio/vhost-shadow-virtqueue.c
> b/hw/virtio/vhost-shadow-virtqueue.c
> index fc5f408f77..e3b276a9e9 100644
> --- a/hw/virtio/vhost-shadow-virtqueue.c
> +++ b/hw/virtio/vhost-shadow-virtqueue.c
> @@ -217,6 +217,122 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue
> *svq,
> return true;
> }
>
> +/**
> + * Write descriptors to SVQ packed vring
> + *
> + * @svq: The shadow virtqueue
> + * @sg: Cache for hwaddr
> + * @out_sg: The iovec from the guest that is read-only for device
> + * @out_num: iovec length
> + * @in_sg: The iovec from the guest that is write-only for device
> + * @in_num: iovec length
> + * @head_flags: flags for first descriptor in list
> + *
> + * Return true if success, false otherwise and print error.
> + */
> +static bool vhost_svq_vring_write_descs_packed(VhostShadowVirtqueue *svq,
> hwaddr *sg,
> + const struct iovec *out_sg, size_t
> out_num,
> + const struct iovec *in_sg, size_t
> in_num,
> + uint16_t *head_flags)
> +{
> + uint16_t id, curr, head, i;
> + unsigned n;
> + struct vring_packed_desc *descs = svq->vring_packed.vring.desc;
> + bool ok;
> +
> + head = svq->vring_packed.next_avail_idx;
> + i = head;
> + id = svq->free_head;
> + curr = id;
> +
> + size_t num = out_num + in_num;
> +
> + if (num == 0) {
> + return true;
> + }
num == 0 is impossible now, the caller checks for that.
> +
> + ok = vhost_svq_translate_addr(svq, sg, out_sg, out_num);
> + if (unlikely(!ok)) {
> + return false;
> + }
> +
> + ok = vhost_svq_translate_addr(svq, sg + out_num, in_sg, in_num);
> + if (unlikely(!ok)) {
> + return false;
> + }
> +
> + for (n = 0; n < num; n++) {
> + uint16_t flags = cpu_to_le16(svq->vring_packed.avail_used_flags |
> + (n < out_num ? 0 : VRING_DESC_F_WRITE) |
> + (n + 1 == num ? 0 : VRING_DESC_F_NEXT));
> + if (i == head) {
> + *head_flags = flags;
> + } else {
> + descs[i].flags = flags;
> + }
> +
> + descs[i].addr = cpu_to_le64(sg[n]);
> + descs[i].id = id;
> + if (n < out_num) {
> + descs[i].len = cpu_to_le32(out_sg[n].iov_len);
> + } else {
> + descs[i].len = cpu_to_le32(in_sg[n - out_num].iov_len);
> + }
> +
> + curr = cpu_to_le16(svq->desc_next[curr]);
> +
> + if (++i >= svq->vring_packed.vring.num) {
> + i = 0;
> + svq->vring_packed.avail_used_flags ^=
> + 1 << VRING_PACKED_DESC_F_AVAIL |
> + 1 << VRING_PACKED_DESC_F_USED;
> + }
> + }
> +
> + if (i <= head) {
> + svq->vring_packed.avail_wrap_counter ^= 1;
> + }
> +
> + svq->vring_packed.next_avail_idx = i;
> + svq->free_head = curr;
> + return true;
> +}
> +
> +static bool vhost_svq_add_packed(VhostShadowVirtqueue *svq,
> + const struct iovec *out_sg, size_t out_num,
> + const struct iovec *in_sg, size_t in_num,
> + unsigned *head)
> +{
> + bool ok;
> + uint16_t head_flags = 0;
> + g_autofree hwaddr *sgs = g_new(hwaddr, out_num + in_num);
> +
> + *head = svq->vring_packed.next_avail_idx;
> +
> + /* We need some descriptors here */
> + if (unlikely(!out_num && !in_num)) {
> + qemu_log_mask(LOG_GUEST_ERROR,
> + "Guest provided element with no descriptors");
> + return false;
> + }
> +
> + ok = vhost_svq_vring_write_descs_packed(svq, sgs, out_sg, out_num,
> + in_sg, in_num, &head_flags);
> + if (unlikely(!ok)) {
> + return false;
> + }
> +
Ok now I see why you switched sgs length from MAX to sum. But if we're
here, why not just embed all vhost_svq_vring_write_descs_packed here?
vhost_svq_vring_write_descs makes sense to split as we repeat the
operation, but I think it adds nothing here. What do you think?
> + /*
> + * A driver MUST NOT make the first descriptor in the list
> + * available before all subsequent descriptors comprising
> + * the list are made available.
> + */
> + smp_wmb();
> + svq->vring_packed.vring.desc[*head].flags = head_flags;
> +
> + return true;
> +}
> +
> static void vhost_svq_kick(VhostShadowVirtqueue *svq)
> {
> bool needs_kick;
> @@ -258,7 +374,13 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const
> struct iovec *out_sg,
> return -ENOSPC;
> }
>
> - ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num,
> &qemu_head);
> + if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_RING_PACKED)) {
> + ok = vhost_svq_add_packed(svq, out_sg, out_num,
> + in_sg, in_num, &qemu_head);
> + } else {
> + ok = vhost_svq_add_split(svq, out_sg, out_num,
> + in_sg, in_num, &qemu_head);
> + }
> if (unlikely(!ok)) {
> return -EINVAL;
> }
> diff --git a/hw/virtio/vhost-shadow-virtqueue.h
> b/hw/virtio/vhost-shadow-virtqueue.h
> index 19c842a15b..ee1a87f523 100644
> --- a/hw/virtio/vhost-shadow-virtqueue.h
> +++ b/hw/virtio/vhost-shadow-virtqueue.h
> @@ -46,10 +46,53 @@ typedef struct VhostShadowVirtqueueOps {
> VirtQueueAvailCallback avail_handler;
> } VhostShadowVirtqueueOps;
>
> +struct vring_packed {
> + /* Actual memory layout for this queue. */
> + struct {
> + unsigned int num;
> + struct vring_packed_desc *desc;
> + struct vring_packed_desc_event *driver;
> + struct vring_packed_desc_event *device;
> + } vring;
> +
> + /* Avail used flags. */
> + uint16_t avail_used_flags;
> +
> + /* Index of the next avail descriptor. */
> + uint16_t next_avail_idx;
> +
> + /* Driver ring wrap counter */
> + bool avail_wrap_counter;
> +};
> +
> /* Shadow virtqueue to relay notifications */
> typedef struct VhostShadowVirtqueue {
> + /* Virtio queue shadowing */
> + VirtQueue *vq;
> +
> + /* Virtio device */
> + VirtIODevice *vdev;
> +
> + /* SVQ vring descriptors state */
> + SVQDescState *desc_state;
> +
> + /*
> + * Backup next field for each descriptor so we can recover securely, not
> + * needing to trust the device access.
> + */
> + uint16_t *desc_next;
> +
> + /* Next free descriptor */
> + uint16_t free_head;
> +
> + /* Size of SVQ vring free descriptors */
> + uint16_t num_free;
> +
Why the reorder of all of the previous members?
Apart from the nitpicks I don't see anything wrong with this part of
the project. Looking forward to the next!
Thanks!
> /* Shadow vring */
> - struct vring vring;
> + union {
> + struct vring vring;
> + struct vring_packed vring_packed;
> + };
>
> /* Shadow kick notifier, sent to vhost */
> EventNotifier hdev_kick;
> @@ -69,27 +112,12 @@ typedef struct VhostShadowVirtqueue {
> /* Guest's call notifier, where the SVQ calls guest. */
> EventNotifier svq_call;
>
> - /* Virtio queue shadowing */
> - VirtQueue *vq;
> -
> - /* Virtio device */
> - VirtIODevice *vdev;
> -
> /* IOVA mapping */
> VhostIOVATree *iova_tree;
>
> - /* SVQ vring descriptors state */
> - SVQDescState *desc_state;
> -
> /* Next VirtQueue element that guest made available */
> VirtQueueElement *next_guest_avail_elem;
>
> - /*
> - * Backup next field for each descriptor so we can recover securely, not
> - * needing to trust the device access.
> - */
> - uint16_t *desc_next;
> -
> /* Caller callbacks */
> const VhostShadowVirtqueueOps *ops;
>
> @@ -99,17 +127,11 @@ typedef struct VhostShadowVirtqueue {
> /* Next head to expose to the device */
> uint16_t shadow_avail_idx;
>
> - /* Next free descriptor */
> - uint16_t free_head;
> -
> /* Last seen used idx */
> uint16_t shadow_used_idx;
>
> /* Next head to consume from the device */
> uint16_t last_used_idx;
> -
> - /* Size of SVQ vring free descriptors */
> - uint16_t num_free;
> } VhostShadowVirtqueue;
>
> bool vhost_svq_valid_features(uint64_t features, Error **errp);
> --
> 2.45.2
>