[Qemu-devel] Re: [PATCH v4 2/4] virtio-pci: Use ioeventfd for virtqueue

qemu-devel
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] Re: [PATCH v4 2/4] virtio-pci: Use ioeventfd for virtqueue

From:	Michael S. Tsirkin
Subject:	[Qemu-devel] Re: [PATCH v4 2/4] virtio-pci: Use ioeventfd for virtqueue notify
Date:	Sun, 12 Dec 2010 13:22:33 +0200
User-agent:	Mutt/1.5.21 (2010-09-15)
On Wed, Nov 17, 2010 at 04:19:27PM +0000, Stefan Hajnoczi wrote:
> Virtqueue notify is currently handled synchronously in userspace virtio.  This
> prevents the vcpu from executing guest code while hardware emulation code
> handles the notify.
> 
> On systems that support KVM, the ioeventfd mechanism can be used to make
> virtqueue notify a lightweight exit by deferring hardware emulation to the
> iothread and allowing the VM to continue execution.  This model is similar to
> how vhost receives virtqueue notifies.
> 
> The result of this change is improved performance for userspace virtio 
> devices.
> Virtio-blk throughput increases especially for multithreaded scenarios and
> virtio-net transmit throughput increases substantially.
> 
> Some virtio devices are known to have guest drivers which expect a notify to 
> be
> processed synchronously and spin waiting for completion.  Only enable 
> ioeventfd
> for virtio-blk and virtio-net for now.
> 
> Care must be taken not to interfere with vhost-net, which uses host
> notifiers.  If the set_host_notifier() API is used by a device
> virtio-pci will disable virtio-ioeventfd and let the device deal with
> host notifiers as it wishes.
> 
> After migration and on VM change state (running/paused) virtio-ioeventfd
> will enable/disable itself.
> 
>  * VIRTIO_CONFIG_S_DRIVER_OK -> enable virtio-ioeventfd
>  * !VIRTIO_CONFIG_S_DRIVER_OK -> disable virtio-ioeventfd
>  * virtio_pci_set_host_notifier() -> disable virtio-ioeventfd
>  * vm_change_state(running=0) -> disable virtio-ioeventfd
>  * vm_change_state(running=1) -> enable virtio-ioeventfd
> 
> Signed-off-by: Stefan Hajnoczi <address@hidden>


So this is pretty clean. Two things that worry me a bit:

- With vhost-net, it seems that we might now run in userspace just before start
  or just after stop. This means that e.g. if there's a ring processing
  bug in userspace we'll get hit by it, which I'd like to avoid.
- There seems to be a bit of duplication where we call start/stop
  in a similar way in lots of places. There are really
  all places where we call set_status. Might be nice to
  find a way to reduce that duplication.

I'm trying to think of ways to improve this a bit,
if I don't find any way to improve it I guess
I'll just apply the series as is.

> ---
>  hw/virtio-pci.c |  188 
> ++++++++++++++++++++++++++++++++++++++++++++++++-------
>  hw/virtio.c     |   14 +++-
>  hw/virtio.h     |    1 +
>  3 files changed, 177 insertions(+), 26 deletions(-)
> 
> diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
> index 549118d..0217fda 100644
> --- a/hw/virtio-pci.c
> +++ b/hw/virtio-pci.c
> @@ -83,6 +83,11 @@
>  /* Flags track per-device state like workarounds for quirks in older guests. 
> */
>  #define VIRTIO_PCI_FLAG_BUS_MASTER_BUG  (1 << 0)
>  
> +/* Performance improves when virtqueue kick processing is decoupled from the
> + * vcpu thread using ioeventfd for some devices. */
> +#define VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT 1
> +#define VIRTIO_PCI_FLAG_USE_IOEVENTFD   (1 << 
> VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT)
> +
>  /* QEMU doesn't strictly need write barriers since everything runs in
>   * lock-step.  We'll leave the calls to wmb() in though to make it obvious 
> for
>   * KVM or if kqemu gets SMP support.
> @@ -107,6 +112,8 @@ typedef struct {
>      /* Max. number of ports we can have for a the virtio-serial device */
>      uint32_t max_virtserial_ports;
>      virtio_net_conf net;
> +    bool ioeventfd_started;
> +    VMChangeStateEntry *vm_change_state_entry;
>  } VirtIOPCIProxy;
>  
>  /* virtio device */
> @@ -179,12 +186,129 @@ static int virtio_pci_load_queue(void * opaque, int n, 
> QEMUFile *f)
>      return 0;
>  }
>  
> +static int virtio_pci_set_host_notifier_internal(VirtIOPCIProxy *proxy,
> +                                                 int n, bool assign)
> +{
> +    VirtQueue *vq = virtio_get_queue(proxy->vdev, n);
> +    EventNotifier *notifier = virtio_queue_get_host_notifier(vq);
> +    int r;
> +    if (assign) {
> +        r = event_notifier_init(notifier, 1);
> +        if (r < 0) {
> +            return r;
> +        }
> +        r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier),
> +                                       proxy->addr + VIRTIO_PCI_QUEUE_NOTIFY,
> +                                       n, assign);
> +        if (r < 0) {
> +            event_notifier_cleanup(notifier);
> +        }
> +    } else {
> +        r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier),
> +                                       proxy->addr + VIRTIO_PCI_QUEUE_NOTIFY,
> +                                       n, assign);
> +        if (r < 0) {
> +            return r;
> +        }
> +        event_notifier_cleanup(notifier);
> +    }
> +    return r;
> +}
> +
> +static void virtio_pci_host_notifier_read(void *opaque)
> +{
> +    VirtQueue *vq = opaque;
> +    EventNotifier *n = virtio_queue_get_host_notifier(vq);
> +    if (event_notifier_test_and_clear(n)) {
> +        virtio_queue_notify_vq(vq);
> +    }
> +}
> +
> +static void virtio_pci_set_host_notifier_fd_handler(VirtIOPCIProxy *proxy,
> +                                                    int n, bool assign)
> +{
> +    VirtQueue *vq = virtio_get_queue(proxy->vdev, n);
> +    EventNotifier *notifier = virtio_queue_get_host_notifier(vq);
> +    if (assign) {
> +        qemu_set_fd_handler(event_notifier_get_fd(notifier),
> +                            virtio_pci_host_notifier_read, NULL, vq);
> +    } else {
> +        qemu_set_fd_handler(event_notifier_get_fd(notifier),
> +                            NULL, NULL, NULL);
> +    }
> +}
> +
> +static int virtio_pci_start_ioeventfd(VirtIOPCIProxy *proxy)
> +{
> +    int n, r;
> +
> +    if (!(proxy->flags & VIRTIO_PCI_FLAG_USE_IOEVENTFD) ||
> +        proxy->ioeventfd_started) {
> +        return 0;
> +    }
> +
> +    for (n = 0; n < VIRTIO_PCI_QUEUE_MAX; n++) {
> +        if (!virtio_queue_get_num(proxy->vdev, n)) {
> +            continue;
> +        }
> +
> +        r = virtio_pci_set_host_notifier_internal(proxy, n, true);
> +        if (r < 0) {
> +            goto assign_error;
> +        }
> +
> +        virtio_pci_set_host_notifier_fd_handler(proxy, n, true);
> +    }
> +    proxy->ioeventfd_started = true;
> +    return 0;
> +
> +assign_error:
> +    while (--n >= 0) {
> +        if (!virtio_queue_get_num(proxy->vdev, n)) {
> +            continue;
> +        }
> +
> +        virtio_pci_set_host_notifier_fd_handler(proxy, n, false);
> +        virtio_pci_set_host_notifier_internal(proxy, n, false);
> +        virtio_queue_notify(proxy->vdev, n);
> +    }
> +    proxy->ioeventfd_started = false;
> +    proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD;
> +    return r;
> +}
> +
> +static int virtio_pci_stop_ioeventfd(VirtIOPCIProxy *proxy)
> +{
> +    int n;
> +
> +    if (!proxy->ioeventfd_started) {
> +        return 0;
> +    }
> +
> +    for (n = 0; n < VIRTIO_PCI_QUEUE_MAX; n++) {
> +        if (!virtio_queue_get_num(proxy->vdev, n)) {
> +            continue;
> +        }
> +
> +        virtio_pci_set_host_notifier_fd_handler(proxy, n, false);
> +        virtio_pci_set_host_notifier_internal(proxy, n, false);
> +
> +        /* Now notify the vq to handle the race condition where the guest
> +         * kicked and we deassigned before we got around to handling the 
> kick.
> +         */

Can't we just call event_notifier_test_and_clear to figure out whether
this happened?

This seems cleaner than calling the notifier unconditionally.

> +        virtio_queue_notify(proxy->vdev, n);
> +    }
> +    proxy->ioeventfd_started = false;
> +    return 0;
> +}
> +
>  static void virtio_pci_reset(DeviceState *d)
>  {
>      VirtIOPCIProxy *proxy = container_of(d, VirtIOPCIProxy, pci_dev.qdev);
> +    virtio_pci_stop_ioeventfd(proxy);
>      virtio_reset(proxy->vdev);
>      msix_reset(&proxy->pci_dev);
> -    proxy->flags = 0;
> +    proxy->flags &= ~VIRTIO_PCI_FLAG_BUS_MASTER_BUG;
>  }
>  
>  static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
> @@ -209,6 +333,7 @@ static void virtio_ioport_write(void *opaque, uint32_t 
> addr, uint32_t val)
>      case VIRTIO_PCI_QUEUE_PFN:
>          pa = (target_phys_addr_t)val << VIRTIO_PCI_QUEUE_ADDR_SHIFT;
>          if (pa == 0) {
> +            virtio_pci_stop_ioeventfd(proxy);
>              virtio_reset(proxy->vdev);
>              msix_unuse_all_vectors(&proxy->pci_dev);
>          }
> @@ -223,6 +348,12 @@ static void virtio_ioport_write(void *opaque, uint32_t 
> addr, uint32_t val)
>          virtio_queue_notify(vdev, val);
>          break;
>      case VIRTIO_PCI_STATUS:
> +        if (val & VIRTIO_CONFIG_S_DRIVER_OK) {
> +            virtio_pci_start_ioeventfd(proxy);
> +        } else {
> +            virtio_pci_stop_ioeventfd(proxy);
> +        }
> +
>          virtio_set_status(vdev, val & 0xFF);
>          if (vdev->status == 0) {
>              virtio_reset(proxy->vdev);
> @@ -403,6 +534,7 @@ static void virtio_write_config(PCIDevice *pci_dev, 
> uint32_t address,
>      if (PCI_COMMAND == address) {
>          if (!(val & PCI_COMMAND_MASTER)) {
>              if (!(proxy->flags & VIRTIO_PCI_FLAG_BUS_MASTER_BUG)) {
> +                virtio_pci_stop_ioeventfd(proxy);
>                  virtio_set_status(proxy->vdev,
>                                    proxy->vdev->status & 
> ~VIRTIO_CONFIG_S_DRIVER_OK);
>              }
> @@ -480,30 +612,27 @@ assign_error:
>  static int virtio_pci_set_host_notifier(void *opaque, int n, bool assign)
>  {
>      VirtIOPCIProxy *proxy = opaque;
> -    VirtQueue *vq = virtio_get_queue(proxy->vdev, n);
> -    EventNotifier *notifier = virtio_queue_get_host_notifier(vq);
> -    int r;
> -    if (assign) {
> -        r = event_notifier_init(notifier, 1);
> -        if (r < 0) {
> -            return r;
> -        }
> -        r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier),
> -                                       proxy->addr + VIRTIO_PCI_QUEUE_NOTIFY,
> -                                       n, assign);
> -        if (r < 0) {
> -            event_notifier_cleanup(notifier);
> -        }
> +
> +    /* Stop using ioeventfd for virtqueue kick if the device starts using 
> host
> +     * notifiers.  This makes it easy to avoid stepping on each others' toes.
> +     */
> +    if (proxy->ioeventfd_started) {
> +        virtio_pci_stop_ioeventfd(proxy);
> +        proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD;
> +    }
> +
> +    return virtio_pci_set_host_notifier_internal(proxy, n, assign);
> +}
> +
> +static void virtio_pci_vm_change_state_handler(void *opaque, int running, 
> int reason)
> +{
> +    VirtIOPCIProxy *proxy = opaque;
> +
> +    if (running && (proxy->vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
> +        virtio_pci_start_ioeventfd(proxy);
>      } else {
> -        r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier),
> -                                       proxy->addr + VIRTIO_PCI_QUEUE_NOTIFY,
> -                                       n, assign);
> -        if (r < 0) {
> -            return r;
> -        }
> -        event_notifier_cleanup(notifier);
> +        virtio_pci_stop_ioeventfd(proxy);
>      }
> -    return r;
>  }
>  
>  static const VirtIOBindings virtio_pci_bindings = {
> @@ -563,6 +692,10 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, 
> VirtIODevice *vdev,
>      proxy->host_features |= 0x1 << VIRTIO_F_NOTIFY_ON_EMPTY;
>      proxy->host_features |= 0x1 << VIRTIO_F_BAD_FEATURE;
>      proxy->host_features = vdev->get_features(vdev, proxy->host_features);
> +
> +    proxy->vm_change_state_entry = qemu_add_vm_change_state_handler(
> +                                        virtio_pci_vm_change_state_handler,
> +                                        proxy);
>  }
>  
>  static int virtio_blk_init_pci(PCIDevice *pci_dev)
> @@ -590,6 +723,9 @@ static int virtio_blk_init_pci(PCIDevice *pci_dev)
>  
>  static int virtio_exit_pci(PCIDevice *pci_dev)
>  {
> +    VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev);
> +
> +    qemu_del_vm_change_state_handler(proxy->vm_change_state_entry);
>      return msix_uninit(pci_dev);
>  }
>  
> @@ -597,6 +733,7 @@ static int virtio_blk_exit_pci(PCIDevice *pci_dev)
>  {
>      VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev);
>  
> +    virtio_pci_stop_ioeventfd(proxy);
>      virtio_blk_exit(proxy->vdev);
>      blockdev_mark_auto_del(proxy->block.bs);
>      return virtio_exit_pci(pci_dev);
> @@ -658,6 +795,7 @@ static int virtio_net_exit_pci(PCIDevice *pci_dev)
>  {
>      VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev);
>  
> +    virtio_pci_stop_ioeventfd(proxy);
>      virtio_net_exit(proxy->vdev);
>      return virtio_exit_pci(pci_dev);
>  }
> @@ -702,6 +840,8 @@ static PCIDeviceInfo virtio_info[] = {
>          .qdev.props = (Property[]) {
>              DEFINE_PROP_HEX32("class", VirtIOPCIProxy, class_code, 0),
>              DEFINE_BLOCK_PROPERTIES(VirtIOPCIProxy, block),
> +            DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags,
> +                            VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true),
>              DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 2),
>              DEFINE_VIRTIO_BLK_FEATURES(VirtIOPCIProxy, host_features),
>              DEFINE_PROP_END_OF_LIST(),
> @@ -714,6 +854,8 @@ static PCIDeviceInfo virtio_info[] = {
>          .exit       = virtio_net_exit_pci,
>          .romfile    = "pxe-virtio.bin",
>          .qdev.props = (Property[]) {
> +            DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags,
> +                            VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true),
>              DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 3),
>              DEFINE_VIRTIO_NET_FEATURES(VirtIOPCIProxy, host_features),
>              DEFINE_NIC_PROPERTIES(VirtIOPCIProxy, nic),
> diff --git a/hw/virtio.c b/hw/virtio.c
> index a2a657e..1f83b2c 100644
> --- a/hw/virtio.c
> +++ b/hw/virtio.c
> @@ -574,11 +574,19 @@ int virtio_queue_get_num(VirtIODevice *vdev, int n)
>      return vdev->vq[n].vring.num;
>  }
>  
> +void virtio_queue_notify_vq(VirtQueue *vq)
> +{
> +    if (vq->vring.desc) {
> +        VirtIODevice *vdev = vq->vdev;
> +        trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> +        vq->handle_output(vdev, vq);
> +    }
> +}
> +
>  void virtio_queue_notify(VirtIODevice *vdev, int n)
>  {
> -    if (n < VIRTIO_PCI_QUEUE_MAX && vdev->vq[n].vring.desc) {
> -        trace_virtio_queue_notify(vdev, n, &vdev->vq[n]);
> -        vdev->vq[n].handle_output(vdev, &vdev->vq[n]);
> +    if (n < VIRTIO_PCI_QUEUE_MAX) {
> +        virtio_queue_notify_vq(&vdev->vq[n]);
>      }
>  }
>  
> diff --git a/hw/virtio.h b/hw/virtio.h
> index 02fa312..5ae521c 100644
> --- a/hw/virtio.h
> +++ b/hw/virtio.h
> @@ -219,5 +219,6 @@ void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, 
> int n, uint16_t idx);
>  VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n);
>  EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq);
>  EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq);
> +void virtio_queue_notify_vq(VirtQueue *vq);
>  void virtio_irq(VirtQueue *vq);
>  #endif
> -- 
> 1.7.2.3
[Prev in Thread]
Current Thread
[Next in Thread]
[Qemu-devel] Re: [PATCH v4 2/4] virtio-pci: Use ioeventfd for virtqueue notify, Michael S. Tsirkin <=
- Re: [Qemu-devel] Re: [PATCH v4 2/4] virtio-pci: Use ioeventfd for virtqueue notify, Stefan Hajnoczi, 2010/12/12
Prev by Date: Re: [Qemu-devel] [PATCH] wdt_i6300esb: register a reset function
Next by Date: [Qemu-devel] Re: [PATCH v4 4/4] docs: Document virtio PCI -device ioeventfd=on|off
Previous by thread: [Qemu-devel] sparc OBP psr value
Next by thread: Re: [Qemu-devel] Re: [PATCH v4 2/4] virtio-pci: Use ioeventfd for virtqueue notify
Index(es):
- Date
- Thread