qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH 2/2] vdpa: send CVQ state load commands in parallel


From: Hawkins Jiawei
Subject: Re: [PATCH 2/2] vdpa: send CVQ state load commands in parallel
Date: Thu, 20 Apr 2023 19:38:31 +0800

On Thu, 20 Apr 2023 at 01:43, Eugenio Perez Martin <eperezma@redhat.com> wrote:
>
> On Wed, Apr 19, 2023 at 1:50 PM Hawkins Jiawei <yin31149@gmail.com> wrote:
> >
> > This patch introduces the vhost_vdpa_net_cvq_add() and
> > refactors the vhost_vdpa_net_load*(), so that QEMU can
> > send CVQ state load commands in parallel.
> >
> > To be more specific, this patch introduces vhost_vdpa_net_cvq_add()
> > to add SVQ control commands to SVQ and kick the device,
> > but does not poll the device used buffers. QEMU will not
> > poll and check the device used buffers in vhost_vdpa_net_load()
> > until all CVQ state load commands have been sent to the device.
> >
> > What's more, in order to avoid buffer overwriting caused by
> > using `svq->cvq_cmd_out_buffer` and `svq->status` as the
> > buffer for all CVQ state load commands when sending
> > CVQ state load commands in parallel, this patch introduces
> > `out_cursor` and `in_cursor` in vhost_vdpa_net_load(),
> > pointing to the available buffer for in descriptor and
> > out descriptor, so that different CVQ state load commands can
> > use their unique buffer.
> >
> > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1578
> > Signed-off-by: Hawkins Jiawei <yin31149@gmail.com>
> > ---
> >  net/vhost-vdpa.c | 137 +++++++++++++++++++++++++++++++++++------------
> >  1 file changed, 102 insertions(+), 35 deletions(-)
> >
> > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > index 10804c7200..d1f7113992 100644
> > --- a/net/vhost-vdpa.c
> > +++ b/net/vhost-vdpa.c
> > @@ -590,6 +590,44 @@ static void vhost_vdpa_net_cvq_stop(NetClientState *nc)
> >      vhost_vdpa_net_client_stop(nc);
> >  }
> >
> > +/**
> > + * vhost_vdpa_net_cvq_add() adds SVQ control commands to SVQ,
> > + * kicks the device but does not poll the device used buffers.
> > + *
> > + * Return the length of the device used buffers.
> > + */
> > +static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s,
> > +                                void **out_cursor, size_t out_len,
> > +                                void **in_cursor, size_t in_len)
> > +{
> > +    /* Buffers for the device */
> > +    const struct iovec out = {
> > +        .iov_base = *out_cursor,
> > +        .iov_len = out_len,
> > +    };
> > +    const struct iovec in = {
> > +        .iov_base = *in_cursor,
> > +        .iov_len = in_len,
> > +    };
> > +    VhostShadowVirtqueue *svq = 
> > g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
> > +    int r;
> > +
> > +    r = vhost_svq_add(svq, &out, 1, &in, 1, NULL);
> > +    if (unlikely(r != 0)) {
> > +        if (unlikely(r == -ENOSPC)) {
> > +            qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device 
> > queue\n",
> > +                          __func__);
> > +        }
> > +        return r;
> > +    }
> > +
> > +    /* Update the cursor */
> > +    *out_cursor += out_len;
> > +    *in_cursor += in_len;
> > +
> > +    return in_len;
> > +}
> > +
> >  /**
> >   * vhost_vdpa_net_cvq_add_and_wait() adds SVQ control commands to SVQ,
> >   * kicks the device and polls the device used buffers.
> > @@ -628,69 +666,64 @@ static ssize_t 
> > vhost_vdpa_net_cvq_add_and_wait(VhostVDPAState *s,
> >      return vhost_svq_poll(svq);
> >  }
> >
> > -static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, uint8_t class,
> > -                                       uint8_t cmd, const void *data,
> > -                                       size_t data_size)
> > +static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s,
> > +                                void **out_cursor, uint8_t class, uint8_t 
> > cmd,
> > +                                const void *data, size_t data_size,
> > +                                void **in_cursor)
> >  {
> >      const struct virtio_net_ctrl_hdr ctrl = {
> >          .class = class,
> >          .cmd = cmd,
> >      };
> >
> > -    assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl));
> > +    assert(sizeof(ctrl) < vhost_vdpa_net_cvq_cmd_page_len() -
> > +                          (*out_cursor - s->cvq_cmd_out_buffer));
> > +    assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl) -
> > +                       (*out_cursor - s->cvq_cmd_out_buffer));
> >
> > -    memcpy(s->cvq_cmd_out_buffer, &ctrl, sizeof(ctrl));
> > -    memcpy(s->cvq_cmd_out_buffer + sizeof(ctrl), data, data_size);
> > +    memcpy(*out_cursor, &ctrl, sizeof(ctrl));
> > +    memcpy(*out_cursor + sizeof(ctrl), data, data_size);
> >
> > -    return vhost_vdpa_net_cvq_add_and_wait(s, sizeof(ctrl) + data_size,
> > -                                  sizeof(virtio_net_ctrl_ack));
> > +    return vhost_vdpa_net_cvq_add(s, out_cursor, sizeof(ctrl) + data_size,
> > +                                  in_cursor, sizeof(virtio_net_ctrl_ack));
> >  }
> >
> > -static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n)
> > +static ssize_t vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet 
> > *n,
> > +                                   void **out_cursor, void **in_cursor)
> >  {
> >      uint64_t features = n->parent_obj.guest_features;
> >      if (features & BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR)) {
> > -        ssize_t dev_written = vhost_vdpa_net_load_cmd(s, 
> > VIRTIO_NET_CTRL_MAC,
> > -                                                  
> > VIRTIO_NET_CTRL_MAC_ADDR_SET,
> > -                                                  n->mac, sizeof(n->mac));
> > -        if (unlikely(dev_written < 0)) {
> > -            return dev_written;
> > -        }
> > -
> > -        return *s->status != VIRTIO_NET_OK;
> > +        return vhost_vdpa_net_load_cmd(s, out_cursor, VIRTIO_NET_CTRL_MAC,
> > +                                       VIRTIO_NET_CTRL_MAC_ADDR_SET,
> > +                                       n->mac, sizeof(n->mac), in_cursor);
> >      }
> >
> >      return 0;
> >  }
> >
> > -static int vhost_vdpa_net_load_mq(VhostVDPAState *s,
> > -                                  const VirtIONet *n)
> > +static ssize_t vhost_vdpa_net_load_mq(VhostVDPAState *s, const VirtIONet 
> > *n,
> > +                                  void **out_cursor, void **in_cursor)
> >  {
> >      struct virtio_net_ctrl_mq mq;
> >      uint64_t features = n->parent_obj.guest_features;
> > -    ssize_t dev_written;
> >
> >      if (!(features & BIT_ULL(VIRTIO_NET_F_MQ))) {
> >          return 0;
> >      }
> >
> > -    mq.virtqueue_pairs = cpu_to_le16(n->curr_queue_pairs);
>
> So where is mq filled now?

This should be a typo, this line should not be deleted.
I will correct it in the second patch.

>
> > -    dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MQ,
> > -                                          VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, 
> > &mq,
> > -                                          sizeof(mq));
> > -    if (unlikely(dev_written < 0)) {
> > -        return dev_written;
> > -    }
> > -
> > -    return *s->status != VIRTIO_NET_OK;
> > +    return vhost_vdpa_net_load_cmd(s, out_cursor, VIRTIO_NET_CTRL_MQ,
> > +                                   VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET,
> > +                                   &mq, sizeof(mq), in_cursor);
> >  }
> >
> >  static int vhost_vdpa_net_load(NetClientState *nc)
> >  {
> >      VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> > +    VhostShadowVirtqueue *svq;
> > +    void *out_cursor, *in_cursor;
> >      struct vhost_vdpa *v = &s->vhost_vdpa;
> >      const VirtIONet *n;
> > -    int r;
> > +    ssize_t need_poll_len = 0, r, dev_written;
> >
> >      assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> >
> > @@ -699,16 +732,50 @@ static int vhost_vdpa_net_load(NetClientState *nc)
> >      }
> >
> >      n = VIRTIO_NET(v->dev->vdev);
> > -    r = vhost_vdpa_net_load_mac(s, n);
> > +
>
> Extra newline.

Thanks for the comment, I will remove it in the second patch.

>
> > +    need_poll_len = 0;
>
> Maybe we can call it "cmds_in_flight" or something similar? It is not a 
> length.

Do you mean we should maintain the number of successful commands here?

My initial thought was to maintain the total length of device used
buffer, so that we
can know when to exit while polling for the device used buffer.

So far, each SVQ command uses one byte long used buffer for device, so
the effect of maintaining the total length of device used buffer and
the number of
successful commands is consistent.

Yet I prefer to maintain the total length of device used buffer,
because this can
avoid the assumption that each SVQ command uses one byte long used
buffer for device and can use the vhost_svq_poll() return value better during
polling, which makes the code look more reasonable.

What do you think?

>
> > +    out_cursor = s->cvq_cmd_out_buffer;
> > +    in_cursor = s->status;
> > +
> > +    r = vhost_vdpa_net_load_mac(s, n, &out_cursor, &in_cursor);
> >      if (unlikely(r < 0)) {
> > -        return r;
> > +        goto load_err;
> > +    }
> > +    need_poll_len += r;
> > +
> > +    r = vhost_vdpa_net_load_mq(s, n, &out_cursor, &in_cursor);
> > +    if (unlikely(r < 0)) {
> > +        goto load_err;
> > +    }
> > +    need_poll_len += r;
> > +
> > +load_err:
> > +    /* Poll for all SVQ control commands used buffer from device */
> > +    svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
> > +    while (need_poll_len > 0) {
> > +        /*
> > +         * We can poll here since we've had BQL from the time we sent the
> > +         * descriptor. Also, we need to take the answer before SVQ pulls
> > +         * by itself, when BQL is released
> > +         */
> > +        dev_written = vhost_svq_poll(svq);
>
> If vhost_svq_poll returns 0 we must return an error from this code.
> Otherwise this will be an infinite loop.

Yes, you are right, we should return an error here. What error code do
you think we
should return? It seems that vhost_svq_poll() returns 0 when QEMU
waits for too long
or no available used buffer from device, so is it okay to return
-EAGAIN(resource temporarily
unavailable)?

>
> > +        need_poll_len -= dev_written;
>
> On the other hand, vhost_svq_poll returns 1 because that is the length
> written in the "in" buffer, but it is not obvious here. It is more
> clear if we just do need_poll_len-- here.

It should be the same as above. If we maintain the total length of
device used buffer for
`need_poll_len`, it look more reasonable here.

Maybe `used_buffer_len` is clearer?

>
> >      }
> > -    r = vhost_vdpa_net_load_mq(s, n);
> > -    if (unlikely(r)) {
> > +
> > +    /* If code comes from `load_err` label, then we should return directly 
> > */
> > +    if (unlikely(r < 0)) {
>
> If this function returns a failure we can avoid the sending of the
> queued commands, as the caller must cancel the start of the device
> anyway. We can return directly from the failure in
> vhost_vdpa_net_load_* and avoid the label.

Thanks for the explanation, I will refactor the patch as you suggested.

>
> >          return r;
> >      }
> >
> > -    return 0;
> > +    /* Check the used buffer from device */
> > +    for (virtio_net_ctrl_ack * status = s->status; (void *)status < 
> > in_cursor;
>
> in_cursor can be a virtio_net_ctrl_ack so we don't need the casting here.
Thanks for the suggestion, I will refactor the patch as you suggested.

>
> > +         ++status) {
> > +        if (*status != VIRTIO_NET_OK) {
> > +            ++r;
> > +        }
> > +    }
> > +
> > +    return r;
>
> Although the caller is fine with >=0, I think we should keep the 0 ==
> success. The number of commands delivered does not make a lot of sense
> for the callers, just if the call succeeded or not.

Thanks for the explanation, I will refactor the patch as you suggested.

>
> Thanks!
>
> >  }
> >
> >  static NetClientInfo net_vhost_vdpa_cvq_info = {
> > --
> > 2.25.1
> >
>



reply via email to

[Prev in Thread] Current Thread [Next in Thread]