qemu-arm
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v13 03/10] virtio-iommu: Implement attach/detach command


From: Peter Xu
Subject: Re: [PATCH v13 03/10] virtio-iommu: Implement attach/detach command
Date: Mon, 3 Feb 2020 08:49:15 -0500

On Sat, Jan 25, 2020 at 06:19:48PM +0100, Eric Auger wrote:
> This patch implements the endpoint attach/detach to/from
> a domain.
> 
> Domain and endpoint internal datatypes are introduced.
> Both are stored in RB trees. The domain owns a list of
> endpoints attached to it. Also helpers to get/put
> end points and domains are introduced.
> 
> As for the IOMMU memory regions, a callback is called on
> PCI bus enumeration that initializes for a given device
> on the bus hierarchy an IOMMU memory region. The PCI bus
> hierarchy is stored locally in IOMMUPciBus and IOMMUDevice
> objects.
> 
> At the time of the enumeration, the bus number may not be
> computed yet.
> 
> So operations that will need to retrieve the IOMMUdevice
> and its IOMMU memory region from the bus number and devfn,
> once the bus number is garanteed to be frozen, use an array
> of IOMMUPciBus, lazily populated.
> 
> Signed-off-by: Eric Auger <address@hidden>
> 
> ---
> 
> v12 -> v13:
> - squashed v12 4, 5, 6 into this patch
> - rename virtio_iommu_get_sid into virtio_iommu_get_bdf
> 
> v11 -> v12:
> - check the device is protected by the iommu on attach
> - on detach, check the domain id the device is attached to matches
>   the one used in the detach command
> - fix mapping ref counter and destroy the domain when no end-points
>   are attached anymore
> ---
>  hw/virtio/trace-events           |   6 +
>  hw/virtio/virtio-iommu.c         | 315 ++++++++++++++++++++++++++++++-
>  include/hw/virtio/virtio-iommu.h |   3 +
>  3 files changed, 322 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
> index f7141aa2f6..15595f8cd7 100644
> --- a/hw/virtio/trace-events
> +++ b/hw/virtio/trace-events
> @@ -64,3 +64,9 @@ virtio_iommu_attach(uint32_t domain_id, uint32_t ep_id) 
> "domain=%d endpoint=%d"
>  virtio_iommu_detach(uint32_t domain_id, uint32_t ep_id) "domain=%d 
> endpoint=%d"
>  virtio_iommu_map(uint32_t domain_id, uint64_t virt_start, uint64_t virt_end, 
> uint64_t phys_start, uint32_t flags) "domain=%d virt_start=0x%"PRIx64" 
> virt_end=0x%"PRIx64 " phys_start=0x%"PRIx64" flags=%d"
>  virtio_iommu_unmap(uint32_t domain_id, uint64_t virt_start, uint64_t 
> virt_end) "domain=%d virt_start=0x%"PRIx64" virt_end=0x%"PRIx64
> +virtio_iommu_translate(const char *name, uint32_t rid, uint64_t iova, int 
> flag) "mr=%s rid=%d addr=0x%"PRIx64" flag=%d"
> +virtio_iommu_init_iommu_mr(char *iommu_mr) "init %s"
> +virtio_iommu_get_endpoint(uint32_t ep_id) "Alloc endpoint=%d"
> +virtio_iommu_put_endpoint(uint32_t ep_id) "Free endpoint=%d"
> +virtio_iommu_get_domain(uint32_t domain_id) "Alloc domain=%d"
> +virtio_iommu_put_domain(uint32_t domain_id) "Free domain=%d"
> diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
> index 9d1b997df7..e5cc94138b 100644
> --- a/hw/virtio/virtio-iommu.c
> +++ b/hw/virtio/virtio-iommu.c
> @@ -23,6 +23,8 @@
>  #include "hw/qdev-properties.h"
>  #include "hw/virtio/virtio.h"
>  #include "sysemu/kvm.h"
> +#include "qapi/error.h"
> +#include "qemu/error-report.h"
>  #include "trace.h"
>  
>  #include "standard-headers/linux/virtio_ids.h"
> @@ -30,19 +32,234 @@
>  #include "hw/virtio/virtio-bus.h"
>  #include "hw/virtio/virtio-access.h"
>  #include "hw/virtio/virtio-iommu.h"
> +#include "hw/pci/pci_bus.h"
> +#include "hw/pci/pci.h"
>  
>  /* Max size */
>  #define VIOMMU_DEFAULT_QUEUE_SIZE 256
>  
> +typedef struct VirtIOIOMMUDomain {
> +    uint32_t id;
> +    GTree *mappings;
> +    QLIST_HEAD(, VirtIOIOMMUEndpoint) endpoint_list;
> +} VirtIOIOMMUDomain;
> +
> +typedef struct VirtIOIOMMUEndpoint {
> +    uint32_t id;
> +    VirtIOIOMMUDomain *domain;
> +    QLIST_ENTRY(VirtIOIOMMUEndpoint) next;
> +} VirtIOIOMMUEndpoint;
> +
> +typedef struct VirtIOIOMMUInterval {
> +    uint64_t low;
> +    uint64_t high;
> +} VirtIOIOMMUInterval;
> +
> +static inline uint16_t virtio_iommu_get_bdf(IOMMUDevice *dev)
> +{
> +    return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn);
> +}
> +
> +/**
> + * The bus number is used for lookup when SID based operations occur.
> + * In that case we lazily populate the IOMMUPciBus array from the bus hash
> + * table. At the time the IOMMUPciBus is created (iommu_find_add_as), the bus
> + * numbers may not be always initialized yet.
> + */
> +static IOMMUPciBus *iommu_find_iommu_pcibus(VirtIOIOMMU *s, uint8_t bus_num)
> +{
> +    IOMMUPciBus *iommu_pci_bus = s->iommu_pcibus_by_bus_num[bus_num];
> +
> +    if (!iommu_pci_bus) {
> +        GHashTableIter iter;
> +
> +        g_hash_table_iter_init(&iter, s->as_by_busptr);
> +        while (g_hash_table_iter_next(&iter, NULL, (void **)&iommu_pci_bus)) 
> {
> +            if (pci_bus_num(iommu_pci_bus->bus) == bus_num) {
> +                s->iommu_pcibus_by_bus_num[bus_num] = iommu_pci_bus;
> +                return iommu_pci_bus;
> +            }
> +        }
> +        return NULL;
> +    }
> +    return iommu_pci_bus;
> +}
> +
> +static IOMMUMemoryRegion *virtio_iommu_mr(VirtIOIOMMU *s, uint32_t sid)
> +{
> +    uint8_t bus_n, devfn;
> +    IOMMUPciBus *iommu_pci_bus;
> +    IOMMUDevice *dev;
> +
> +    bus_n = PCI_BUS_NUM(sid);
> +    iommu_pci_bus = iommu_find_iommu_pcibus(s, bus_n);
> +    if (iommu_pci_bus) {
> +        devfn = sid & PCI_DEVFN_MAX;
> +        dev = iommu_pci_bus->pbdev[devfn];
> +        if (dev) {
> +            return &dev->iommu_mr;
> +        }
> +    }
> +    return NULL;
> +}
> +
> +static gint interval_cmp(gconstpointer a, gconstpointer b, gpointer 
> user_data)
> +{
> +    VirtIOIOMMUInterval *inta = (VirtIOIOMMUInterval *)a;
> +    VirtIOIOMMUInterval *intb = (VirtIOIOMMUInterval *)b;
> +
> +    if (inta->high < intb->low) {
> +        return -1;
> +    } else if (intb->high < inta->low) {
> +        return 1;
> +    } else {
> +        return 0;
> +    }
> +}
> +
> +static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep)
> +{
> +    QLIST_REMOVE(ep, next);
> +    g_tree_unref(ep->domain->mappings);

Here domain->mapping is unreferenced for each endpoint, while at [1]
below you only reference the domain->mappings if it's the first
endpoint.  Is that problematic?

> +    ep->domain = NULL;
> +}
> +
> +static VirtIOIOMMUEndpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s,
> +                                                      uint32_t ep_id)
> +{
> +    VirtIOIOMMUEndpoint *ep;
> +
> +    ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(ep_id));
> +    if (ep) {
> +        return ep;
> +    }
> +    if (!virtio_iommu_mr(s, ep_id)) {
> +        return NULL;
> +    }
> +    ep = g_malloc0(sizeof(*ep));
> +    ep->id = ep_id;
> +    trace_virtio_iommu_get_endpoint(ep_id);
> +    g_tree_insert(s->endpoints, GUINT_TO_POINTER(ep_id), ep);
> +    return ep;
> +}
> +
> +static void virtio_iommu_put_endpoint(gpointer data)
> +{
> +    VirtIOIOMMUEndpoint *ep = (VirtIOIOMMUEndpoint *)data;
> +
> +    assert(!ep->domain);
> +
> +    trace_virtio_iommu_put_endpoint(ep->id);
> +    g_free(ep);
> +}
> +
> +static VirtIOIOMMUDomain *virtio_iommu_get_domain(VirtIOIOMMU *s,
> +                                                  uint32_t domain_id)
> +{
> +    VirtIOIOMMUDomain *domain;
> +
> +    domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id));
> +    if (domain) {
> +        return domain;
> +    }
> +    domain = g_malloc0(sizeof(*domain));
> +    domain->id = domain_id;
> +    domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp,
> +                                   NULL, (GDestroyNotify)g_free,
> +                                   (GDestroyNotify)g_free);
> +    g_tree_insert(s->domains, GUINT_TO_POINTER(domain_id), domain);
> +    QLIST_INIT(&domain->endpoint_list);
> +    trace_virtio_iommu_get_domain(domain_id);
> +    return domain;
> +}
> +
> +static void virtio_iommu_put_domain(gpointer data)
> +{
> +    VirtIOIOMMUDomain *domain = (VirtIOIOMMUDomain *)data;
> +    VirtIOIOMMUEndpoint *iter, *tmp;
> +
> +    QLIST_FOREACH_SAFE(iter, &domain->endpoint_list, next, tmp) {
> +        virtio_iommu_detach_endpoint_from_domain(iter);
> +    }

Do you need to destroy the domain->mappings here?  Is it leaked?

> +    trace_virtio_iommu_put_domain(domain->id);
> +    g_free(domain);
> +}
> +
> +static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque,
> +                                              int devfn)
> +{
> +    VirtIOIOMMU *s = opaque;
> +    IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus);
> +    static uint32_t mr_index;
> +    IOMMUDevice *sdev;
> +
> +    if (!sbus) {
> +        sbus = g_malloc0(sizeof(IOMMUPciBus) +
> +                         sizeof(IOMMUDevice *) * PCI_DEVFN_MAX);
> +        sbus->bus = bus;
> +        g_hash_table_insert(s->as_by_busptr, bus, sbus);
> +    }
> +
> +    sdev = sbus->pbdev[devfn];
> +    if (!sdev) {
> +        char *name = g_strdup_printf("%s-%d-%d",
> +                                     TYPE_VIRTIO_IOMMU_MEMORY_REGION,
> +                                     mr_index++, devfn);
> +        sdev = sbus->pbdev[devfn] = g_malloc0(sizeof(IOMMUDevice));
> +
> +        sdev->viommu = s;
> +        sdev->bus = bus;
> +        sdev->devfn = devfn;
> +
> +        trace_virtio_iommu_init_iommu_mr(name);
> +
> +        memory_region_init_iommu(&sdev->iommu_mr, sizeof(sdev->iommu_mr),
> +                                 TYPE_VIRTIO_IOMMU_MEMORY_REGION,
> +                                 OBJECT(s), name,
> +                                 UINT64_MAX);
> +        address_space_init(&sdev->as,
> +                           MEMORY_REGION(&sdev->iommu_mr), 
> TYPE_VIRTIO_IOMMU);
> +        g_free(name);
> +    }
> +    return &sdev->as;
> +}
> +
>  static int virtio_iommu_attach(VirtIOIOMMU *s,
>                                 struct virtio_iommu_req_attach *req)
>  {
>      uint32_t domain_id = le32_to_cpu(req->domain);
>      uint32_t ep_id = le32_to_cpu(req->endpoint);
> +    VirtIOIOMMUDomain *domain;
> +    VirtIOIOMMUEndpoint *ep;
>  
>      trace_virtio_iommu_attach(domain_id, ep_id);
>  
> -    return VIRTIO_IOMMU_S_UNSUPP;
> +    ep = virtio_iommu_get_endpoint(s, ep_id);
> +    if (!ep) {
> +        return VIRTIO_IOMMU_S_NOENT;
> +    }
> +
> +    if (ep->domain) {
> +        VirtIOIOMMUDomain *previous_domain = ep->domain;
> +        /*
> +         * the device is already attached to a domain,
> +         * detach it first
> +         */
> +        virtio_iommu_detach_endpoint_from_domain(ep);
> +        if (QLIST_EMPTY(&previous_domain->endpoint_list)) {
> +            g_tree_remove(s->domains, GUINT_TO_POINTER(previous_domain->id));
> +        }
> +    }
> +
> +    domain = virtio_iommu_get_domain(s, domain_id);
> +    if (!QLIST_EMPTY(&domain->endpoint_list)) {
> +        g_tree_ref(domain->mappings);

[1]

> +    }
> +    QLIST_INSERT_HEAD(&domain->endpoint_list, ep, next);
> +
> +    ep->domain = domain;
> +
> +    return VIRTIO_IOMMU_S_OK;
>  }
>  
>  static int virtio_iommu_detach(VirtIOIOMMU *s,
> @@ -50,10 +267,34 @@ static int virtio_iommu_detach(VirtIOIOMMU *s,
>  {
>      uint32_t domain_id = le32_to_cpu(req->domain);
>      uint32_t ep_id = le32_to_cpu(req->endpoint);
> +    VirtIOIOMMUDomain *domain;
> +    VirtIOIOMMUEndpoint *ep;
>  
>      trace_virtio_iommu_detach(domain_id, ep_id);
>  
> -    return VIRTIO_IOMMU_S_UNSUPP;
> +    ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(ep_id));
> +    if (!ep) {
> +        return VIRTIO_IOMMU_S_NOENT;
> +    }
> +
> +    domain = ep->domain;
> +
> +    if (!domain || domain->id != domain_id) {
> +        return VIRTIO_IOMMU_S_INVAL;
> +    }
> +
> +    virtio_iommu_detach_endpoint_from_domain(ep);
> +
> +    /*
> +     * when the last EP is detached, simply remove the domain for
> +     * the domain list and destroy it. Note its mappings were already
> +     * freed by the ref count mechanism. Next operation involving
> +     * the same domain id will re-create one domain object.
> +     */
> +    if (QLIST_EMPTY(&domain->endpoint_list)) {
> +        g_tree_remove(s->domains, GUINT_TO_POINTER(domain->id));
> +    }
> +    return VIRTIO_IOMMU_S_OK;
>  }
>  
>  static int virtio_iommu_map(VirtIOIOMMU *s,
> @@ -172,6 +413,27 @@ out:
>      }
>  }
>  
> +static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr 
> addr,
> +                                            IOMMUAccessFlags flag,
> +                                            int iommu_idx)
> +{
> +    IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
> +    uint32_t sid;
> +
> +    IOMMUTLBEntry entry = {
> +        .target_as = &address_space_memory,
> +        .iova = addr,
> +        .translated_addr = addr,
> +        .addr_mask = ~(hwaddr)0,
> +        .perm = IOMMU_NONE,
> +    };
> +
> +    sid = virtio_iommu_get_bdf(sdev);
> +
> +    trace_virtio_iommu_translate(mr->parent_obj.name, sid, addr, flag);
> +    return entry;
> +}
> +
>  static void virtio_iommu_get_config(VirtIODevice *vdev, uint8_t *config_data)
>  {
>      VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev);
> @@ -218,6 +480,13 @@ static const VMStateDescription 
> vmstate_virtio_iommu_device = {
>      .unmigratable = 1,
>  };
>  
> +static gint int_cmp(gconstpointer a, gconstpointer b, gpointer user_data)
> +{
> +    guint ua = GPOINTER_TO_UINT(a);
> +    guint ub = GPOINTER_TO_UINT(b);
> +    return (ua > ub) - (ua < ub);
> +}
> +
>  static void virtio_iommu_device_realize(DeviceState *dev, Error **errp)
>  {
>      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> @@ -226,6 +495,8 @@ static void virtio_iommu_device_realize(DeviceState *dev, 
> Error **errp)
>      virtio_init(vdev, "virtio-iommu", VIRTIO_ID_IOMMU,
>                  sizeof(struct virtio_iommu_config));
>  
> +    memset(s->iommu_pcibus_by_bus_num, 0, 
> sizeof(s->iommu_pcibus_by_bus_num));
> +
>      s->req_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE,
>                               virtio_iommu_handle_command);
>      s->event_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, NULL);
> @@ -244,18 +515,43 @@ static void virtio_iommu_device_realize(DeviceState 
> *dev, Error **errp)
>      virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MMIO);
>  
>      qemu_mutex_init(&s->mutex);
> +
> +    s->as_by_busptr = g_hash_table_new_full(NULL, NULL, NULL, g_free);
> +
> +    if (s->primary_bus) {
> +        pci_setup_iommu(s->primary_bus, virtio_iommu_find_add_as, s);
> +    } else {
> +        error_setg(errp, "VIRTIO-IOMMU is not attached to any PCI bus!");
> +    }
>  }
>  
>  static void virtio_iommu_device_unrealize(DeviceState *dev, Error **errp)
>  {
>      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> +    VirtIOIOMMU *s = VIRTIO_IOMMU(dev);
> +
> +    g_tree_destroy(s->domains);
> +    g_tree_destroy(s->endpoints);
>  
>      virtio_cleanup(vdev);
>  }
>  
>  static void virtio_iommu_device_reset(VirtIODevice *vdev)
>  {
> +    VirtIOIOMMU *s = VIRTIO_IOMMU(vdev);
> +
>      trace_virtio_iommu_device_reset();
> +
> +    if (s->domains) {
> +        g_tree_destroy(s->domains);
> +    }
> +    if (s->endpoints) {
> +        g_tree_destroy(s->endpoints);
> +    }

Is it a must to free domians first then the endpoints here?

I see that virtio_iommu_put_domain() will unlink the domains and
endpoints, then in virtio_iommu_put_endpoint() you assert that
ep->domain is NULL.  It's fine but I'm a bit curious on why not call
virtio_iommu_detach_endpoint_from_domain() too when put the endpoint
then iiuc we don't even need this ordering constraint.  Though in
virtio_iommu_detach_endpoint_from_domain() we should also need:

  if (!ep->domain)
    return;

Otherwise it looks good to me.  Thanks,

> +    s->domains = g_tree_new_full((GCompareDataFunc)int_cmp,
> +                                 NULL, NULL, virtio_iommu_put_domain);
> +    s->endpoints = g_tree_new_full((GCompareDataFunc)int_cmp,
> +                                   NULL, NULL, virtio_iommu_put_endpoint);
>  }
>  
>  static void virtio_iommu_set_status(VirtIODevice *vdev, uint8_t status)
> @@ -301,6 +597,14 @@ static void virtio_iommu_class_init(ObjectClass *klass, 
> void *data)
>      vdc->vmsd = &vmstate_virtio_iommu_device;
>  }
>  
> +static void virtio_iommu_memory_region_class_init(ObjectClass *klass,
> +                                                  void *data)
> +{
> +    IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
> +
> +    imrc->translate = virtio_iommu_translate;
> +}
> +
>  static const TypeInfo virtio_iommu_info = {
>      .name = TYPE_VIRTIO_IOMMU,
>      .parent = TYPE_VIRTIO_DEVICE,
> @@ -309,9 +613,16 @@ static const TypeInfo virtio_iommu_info = {
>      .class_init = virtio_iommu_class_init,
>  };
>  
> +static const TypeInfo virtio_iommu_memory_region_info = {
> +    .parent = TYPE_IOMMU_MEMORY_REGION,
> +    .name = TYPE_VIRTIO_IOMMU_MEMORY_REGION,
> +    .class_init = virtio_iommu_memory_region_class_init,
> +};
> +
>  static void virtio_register_types(void)
>  {
>      type_register_static(&virtio_iommu_info);
> +    type_register_static(&virtio_iommu_memory_region_info);
>  }
>  
>  type_init(virtio_register_types)
> diff --git a/include/hw/virtio/virtio-iommu.h 
> b/include/hw/virtio/virtio-iommu.h
> index 041f2c9390..2a2c2ecf83 100644
> --- a/include/hw/virtio/virtio-iommu.h
> +++ b/include/hw/virtio/virtio-iommu.h
> @@ -28,6 +28,8 @@
>  #define VIRTIO_IOMMU(obj) \
>          OBJECT_CHECK(VirtIOIOMMU, (obj), TYPE_VIRTIO_IOMMU)
>  
> +#define TYPE_VIRTIO_IOMMU_MEMORY_REGION "virtio-iommu-memory-region"
> +
>  typedef struct IOMMUDevice {
>      void         *viommu;
>      PCIBus       *bus;
> @@ -48,6 +50,7 @@ typedef struct VirtIOIOMMU {
>      struct virtio_iommu_config config;
>      uint64_t features;
>      GHashTable *as_by_busptr;
> +    IOMMUPciBus *iommu_pcibus_by_bus_num[PCI_BUS_MAX];
>      PCIBus *primary_bus;
>      GTree *domains;
>      QemuMutex mutex;
> -- 
> 2.20.1
> 

-- 
Peter Xu




reply via email to

[Prev in Thread] Current Thread [Next in Thread]