qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v2][for 1.2?] msix: Drop tracking of used vector


From: Michael S. Tsirkin
Subject: Re: [Qemu-devel] [PATCH v2][for 1.2?] msix: Drop tracking of used vectors
Date: Fri, 24 Aug 2012 11:20:48 +0300

On Fri, Aug 24, 2012 at 08:19:50AM +0200, Jan Kiszka wrote:
> From: Jan Kiszka <address@hidden>
> 
> This optimization was once used in qemu-kvm to keep KVM route usage low.
> But now we solved that problem via lazy updates. It also tried to handle
> the case of vectors shared between different sources of the same device.
> However, this never really worked and will have to be addressed
> differently anyway. So drop this obsolete interface.
> 
> We still need interfaces to clear pending vectors though. Provide
> msix_clear_vector and msix_clear_all_vectors for this.
> 
> This also unbreaks MSI-X after reset for ivshmem and megasas as device
> models can't easily mark their vectors used afterward (megasas didn't
> even try).
> 
> Signed-off-by: Jan Kiszka <address@hidden>
> ---
> 
> This patch has been posted some moons again, and we had a discussion
> about the impact on the existing users. At that time, MSI-X refactoring
> for KVM support was not yet merged. Now it is, and it should be better
> much clearer that vector usage tracking has no business with that
> feature.
> 
>  hw/ivshmem.c    |   20 -------------------
>  hw/megasas.c    |    4 ---
>  hw/msix.c       |   57 ++++++++++++------------------------------------------
>  hw/msix.h       |    5 +--
>  hw/pci.h        |    2 -
>  hw/virtio-pci.c |   20 +++++++-----------
>  6 files changed, 23 insertions(+), 85 deletions(-)
> 
> diff --git a/hw/ivshmem.c b/hw/ivshmem.c
> index 47f2a16..5ffff48 100644
> --- a/hw/ivshmem.c
> +++ b/hw/ivshmem.c
> @@ -523,28 +523,11 @@ static void ivshmem_read(void *opaque, const uint8_t * 
> buf, int flags)
>      return;
>  }
>  
> -/* Select the MSI-X vectors used by device.
> - * ivshmem maps events to vectors statically, so
> - * we just enable all vectors on init and after reset. */
> -static void ivshmem_use_msix(IVShmemState * s)
> -{
> -    int i;
> -
> -    if (!msix_present(&s->dev)) {
> -        return;
> -    }
> -
> -    for (i = 0; i < s->vectors; i++) {
> -        msix_vector_use(&s->dev, i);
> -    }
> -}
> -
>  static void ivshmem_reset(DeviceState *d)
>  {
>      IVShmemState *s = DO_UPCAST(IVShmemState, dev.qdev, d);
>  
>      s->intrstatus = 0;
> -    ivshmem_use_msix(s);
>      return;
>  }
>  
> @@ -586,8 +569,6 @@ static void ivshmem_setup_msi(IVShmemState * s)
>  
>      /* allocate QEMU char devices for receiving interrupts */
>      s->eventfd_table = g_malloc0(s->vectors * sizeof(EventfdEntry));
> -
> -    ivshmem_use_msix(s);
>  }
>  
>  static void ivshmem_save(QEMUFile* f, void *opaque)
> @@ -629,7 +610,6 @@ static int ivshmem_load(QEMUFile* f, void *opaque, int 
> version_id)
>  
>      if (ivshmem_has_feature(proxy, IVSHMEM_MSI)) {
>          msix_load(&proxy->dev, f);
> -     ivshmem_use_msix(proxy);
>      } else {
>          proxy->intrstatus = qemu_get_be32(f);
>          proxy->intrmask = qemu_get_be32(f);
> diff --git a/hw/megasas.c b/hw/megasas.c
> index c35a15d..4766117 100644
> --- a/hw/megasas.c
> +++ b/hw/megasas.c
> @@ -2121,10 +2121,6 @@ static int megasas_scsi_init(PCIDevice *dev)
>      pci_register_bar(&s->dev, 2, PCI_BASE_ADDRESS_SPACE_IO, &s->port_io);
>      pci_register_bar(&s->dev, 3, bar_type, &s->queue_io);
>  
> -    if (megasas_use_msix(s)) {
> -        msix_vector_use(&s->dev, 0);
> -    }
> -
>      if (!s->sas_addr) {
>          s->sas_addr = ((NAA_LOCALLY_ASSIGNED_ID << 24) |
>                         IEEE_COMPANY_LOCALLY_ASSIGNED) << 36;
> diff --git a/hw/msix.c b/hw/msix.c
> index aea340b..163ce4c 100644
> --- a/hw/msix.c
> +++ b/hw/msix.c
> @@ -273,7 +273,6 @@ int msix_init(struct PCIDevice *dev, unsigned short 
> nentries,
>  
>      dev->msix_table = g_malloc0(table_size);
>      dev->msix_pba = g_malloc0(pba_size);
> -    dev->msix_entry_used = g_malloc0(nentries * sizeof 
> *dev->msix_entry_used);
>  
>      msix_mask_all(dev, nentries);
>  
> @@ -326,16 +325,6 @@ int msix_init_exclusive_bar(PCIDevice *dev, unsigned 
> short nentries,
>      return 0;
>  }
>  
> -static void msix_free_irq_entries(PCIDevice *dev)
> -{
> -    int vector;
> -
> -    for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
> -        dev->msix_entry_used[vector] = 0;
> -        msix_clr_pending(dev, vector);
> -    }
> -}
> -
>  /* Clean up resources for the device. */
>  void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, MemoryRegion 
> *pba_bar)
>  {
> @@ -344,7 +333,6 @@ void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, 
> MemoryRegion *pba_bar)
>      }
>      pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
>      dev->msix_cap = 0;
> -    msix_free_irq_entries(dev);
>      dev->msix_entries_nr = 0;
>      memory_region_del_subregion(pba_bar, &dev->msix_pba_mmio);
>      memory_region_destroy(&dev->msix_pba_mmio);
> @@ -354,8 +342,6 @@ void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, 
> MemoryRegion *pba_bar)
>      memory_region_destroy(&dev->msix_table_mmio);
>      g_free(dev->msix_table);
>      dev->msix_table = NULL;
> -    g_free(dev->msix_entry_used);
> -    dev->msix_entry_used = NULL;
>      dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
>      return;
>  }
> @@ -390,7 +376,6 @@ void msix_load(PCIDevice *dev, QEMUFile *f)
>          return;
>      }
>  
> -    msix_free_irq_entries(dev);
>      qemu_get_buffer(f, dev->msix_table, n * PCI_MSIX_ENTRY_SIZE);
>      qemu_get_buffer(f, dev->msix_pba, (n + 7) / 8);
>      msix_update_function_masked(dev);
> @@ -419,8 +404,9 @@ void msix_notify(PCIDevice *dev, unsigned vector)
>  {
>      MSIMessage msg;
>  
> -    if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector])
> +    if (vector >= dev->msix_entries_nr) {
>          return;
> +    }
>      if (msix_is_masked(dev, vector)) {
>          msix_set_pending(dev, vector);
>          return;
> @@ -436,7 +422,7 @@ void msix_reset(PCIDevice *dev)
>      if (!msix_present(dev)) {
>          return;
>      }
> -    msix_free_irq_entries(dev);
> +    msix_clear_all_vectors(dev);
>      dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &=
>           ~dev->wmask[dev->msix_cap + MSIX_CONTROL_OFFSET];
>      memset(dev->msix_table, 0, dev->msix_entries_nr * PCI_MSIX_ENTRY_SIZE);
> @@ -444,41 +430,24 @@ void msix_reset(PCIDevice *dev)
>      msix_mask_all(dev, dev->msix_entries_nr);
>  }
>  
> -/* PCI spec suggests that devices make it possible for software to configure
> - * less vectors than supported by the device, but does not specify a standard
> - * mechanism for devices to do so.
> - *
> - * We support this by asking devices to declare vectors software is going to
> - * actually use, and checking this on the notification path. Devices that
> - * don't want to follow the spec suggestion can declare all vectors as used. 
> */
> -
> -/* Mark vector as used. */
> -int msix_vector_use(PCIDevice *dev, unsigned vector)
> +/* Clear pending vector. */
> +void msix_clear_vector(PCIDevice *dev, unsigned vector)
>  {
> -    if (vector >= dev->msix_entries_nr)
> -        return -EINVAL;
> -    dev->msix_entry_used[vector]++;
> -    return 0;
> -}
> -

I keep thinking we should instead call vector notifier
here, so that virtio can detect and handle cases where
kvm_virtio_pci_vq_vector_use fails.
ATM it just asserts.

Maybe I am wrong.

Let's delay this decision until 1.3.

> -/* Mark vector as unused. */
> -void msix_vector_unuse(PCIDevice *dev, unsigned vector)
> -{
> -    if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) {
> -        return;
> -    }
> -    if (--dev->msix_entry_used[vector]) {
> -        return;
> +    if (msix_present(dev) && vector < dev->msix_entries_nr) {
> +        msix_clr_pending(dev, vector);
>      }
> -    msix_clr_pending(dev, vector);
>  }
>  
> -void msix_unuse_all_vectors(PCIDevice *dev)
> +void msix_clear_all_vectors(PCIDevice *dev)
>  {
> +    unsigned int vector;
> +
>      if (!msix_present(dev)) {
>          return;
>      }
> -    msix_free_irq_entries(dev);
> +    for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
> +        msix_clr_pending(dev, vector);
> +    }
>  }
>  
>  unsigned int msix_nr_vectors_allocated(const PCIDevice *dev)
> diff --git a/hw/msix.h b/hw/msix.h
> index 15211cb..5c2512a 100644
> --- a/hw/msix.h
> +++ b/hw/msix.h
> @@ -26,9 +26,8 @@ void msix_load(PCIDevice *dev, QEMUFile *f);
>  int msix_enabled(PCIDevice *dev);
>  int msix_present(PCIDevice *dev);
>  
> -int msix_vector_use(PCIDevice *dev, unsigned vector);
> -void msix_vector_unuse(PCIDevice *dev, unsigned vector);
> -void msix_unuse_all_vectors(PCIDevice *dev);
> +void msix_clear_vector(PCIDevice *dev, unsigned vector);
> +void msix_clear_all_vectors(PCIDevice *dev);
>  
>  void msix_notify(PCIDevice *dev, unsigned vector);
>  
> diff --git a/hw/pci.h b/hw/pci.h
> index 4b6ab3d..1b07450 100644
> --- a/hw/pci.h
> +++ b/hw/pci.h
> @@ -240,8 +240,6 @@ struct PCIDevice {
>      /* Memory Regions for MSIX table and pending bit entries. */
>      MemoryRegion msix_table_mmio;
>      MemoryRegion msix_pba_mmio;
> -    /* Reference-count for entries actually in use by driver. */
> -    unsigned *msix_entry_used;
>      /* MSIX function mask set or MSIX disabled */
>      bool msix_function_masked;
>      /* Version id needed for VMState */
> diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
> index 2a3d86f..f8ce01d 100644
> --- a/hw/virtio-pci.c
> +++ b/hw/virtio-pci.c
> @@ -137,9 +137,6 @@ static int virtio_pci_load_config(void * opaque, QEMUFile 
> *f)
>      } else {
>          proxy->vdev->config_vector = VIRTIO_NO_VECTOR;
>      }
> -    if (proxy->vdev->config_vector != VIRTIO_NO_VECTOR) {
> -        return msix_vector_use(&proxy->pci_dev, proxy->vdev->config_vector);
> -    }
>      return 0;
>  }
>  
> @@ -153,9 +150,6 @@ static int virtio_pci_load_queue(void * opaque, int n, 
> QEMUFile *f)
>          vector = VIRTIO_NO_VECTOR;
>      }
>      virtio_queue_set_vector(proxy->vdev, n, vector);
> -    if (vector != VIRTIO_NO_VECTOR) {
> -        return msix_vector_use(&proxy->pci_dev, vector);
> -    }
>      return 0;
>  }
>  
> @@ -268,7 +262,7 @@ static void virtio_ioport_write(void *opaque, uint32_t 
> addr, uint32_t val)
>          if (pa == 0) {
>              virtio_pci_stop_ioeventfd(proxy);
>              virtio_reset(proxy->vdev);
> -            msix_unuse_all_vectors(&proxy->pci_dev);
> +            msix_clear_all_vectors(&proxy->pci_dev);
>          }
>          else
>              virtio_queue_set_addr(vdev, vdev->queue_sel, pa);
> @@ -295,7 +289,7 @@ static void virtio_ioport_write(void *opaque, uint32_t 
> addr, uint32_t val)
>  
>          if (vdev->status == 0) {
>              virtio_reset(proxy->vdev);
> -            msix_unuse_all_vectors(&proxy->pci_dev);
> +            msix_clear_all_vectors(&proxy->pci_dev);
>          }
>  
>          /* Linux before 2.6.34 sets the device as OK without enabling
> @@ -307,18 +301,20 @@ static void virtio_ioport_write(void *opaque, uint32_t 
> addr, uint32_t val)
>          }
>          break;
>      case VIRTIO_MSI_CONFIG_VECTOR:
> -        msix_vector_unuse(&proxy->pci_dev, vdev->config_vector);
> +        msix_clear_vector(&proxy->pci_dev, vdev->config_vector);
>          /* Make it possible for guest to discover an error took place. */
> -        if (msix_vector_use(&proxy->pci_dev, val) < 0)
> +        if (val >= vdev->nvectors) {
>              val = VIRTIO_NO_VECTOR;
> +        }
>          vdev->config_vector = val;
>          break;
>      case VIRTIO_MSI_QUEUE_VECTOR:
> -        msix_vector_unuse(&proxy->pci_dev,
> +        msix_clear_vector(&proxy->pci_dev,
>                            virtio_queue_vector(vdev, vdev->queue_sel));
>          /* Make it possible for guest to discover an error took place. */
> -        if (msix_vector_use(&proxy->pci_dev, val) < 0)
> +        if (val >= vdev->nvectors) {
>              val = VIRTIO_NO_VECTOR;
> +        }
>          virtio_queue_set_vector(vdev, vdev->queue_sel, val);
>          break;
>      default:
> -- 
> 1.7.3.4



reply via email to

[Prev in Thread] Current Thread [Next in Thread]