[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v13 03/10] virtio-iommu: Implement attach/detach command
From: |
Auger Eric |
Subject: |
Re: [PATCH v13 03/10] virtio-iommu: Implement attach/detach command |
Date: |
Mon, 3 Feb 2020 15:59:00 +0100 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Thunderbird/60.4.0 |
Hi Peter,
On 2/3/20 2:49 PM, Peter Xu wrote:
> On Sat, Jan 25, 2020 at 06:19:48PM +0100, Eric Auger wrote:
>> This patch implements the endpoint attach/detach to/from
>> a domain.
>>
>> Domain and endpoint internal datatypes are introduced.
>> Both are stored in RB trees. The domain owns a list of
>> endpoints attached to it. Also helpers to get/put
>> end points and domains are introduced.
>>
>> As for the IOMMU memory regions, a callback is called on
>> PCI bus enumeration that initializes for a given device
>> on the bus hierarchy an IOMMU memory region. The PCI bus
>> hierarchy is stored locally in IOMMUPciBus and IOMMUDevice
>> objects.
>>
>> At the time of the enumeration, the bus number may not be
>> computed yet.
>>
>> So operations that will need to retrieve the IOMMUdevice
>> and its IOMMU memory region from the bus number and devfn,
>> once the bus number is garanteed to be frozen, use an array
>> of IOMMUPciBus, lazily populated.
>>
>> Signed-off-by: Eric Auger <address@hidden>
>>
>> ---
>>
>> v12 -> v13:
>> - squashed v12 4, 5, 6 into this patch
>> - rename virtio_iommu_get_sid into virtio_iommu_get_bdf
>>
>> v11 -> v12:
>> - check the device is protected by the iommu on attach
>> - on detach, check the domain id the device is attached to matches
>> the one used in the detach command
>> - fix mapping ref counter and destroy the domain when no end-points
>> are attached anymore
>> ---
>> hw/virtio/trace-events | 6 +
>> hw/virtio/virtio-iommu.c | 315 ++++++++++++++++++++++++++++++-
>> include/hw/virtio/virtio-iommu.h | 3 +
>> 3 files changed, 322 insertions(+), 2 deletions(-)
>>
>> diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
>> index f7141aa2f6..15595f8cd7 100644
>> --- a/hw/virtio/trace-events
>> +++ b/hw/virtio/trace-events
>> @@ -64,3 +64,9 @@ virtio_iommu_attach(uint32_t domain_id, uint32_t ep_id)
>> "domain=%d endpoint=%d"
>> virtio_iommu_detach(uint32_t domain_id, uint32_t ep_id) "domain=%d
>> endpoint=%d"
>> virtio_iommu_map(uint32_t domain_id, uint64_t virt_start, uint64_t
>> virt_end, uint64_t phys_start, uint32_t flags) "domain=%d
>> virt_start=0x%"PRIx64" virt_end=0x%"PRIx64 " phys_start=0x%"PRIx64" flags=%d"
>> virtio_iommu_unmap(uint32_t domain_id, uint64_t virt_start, uint64_t
>> virt_end) "domain=%d virt_start=0x%"PRIx64" virt_end=0x%"PRIx64
>> +virtio_iommu_translate(const char *name, uint32_t rid, uint64_t iova, int
>> flag) "mr=%s rid=%d addr=0x%"PRIx64" flag=%d"
>> +virtio_iommu_init_iommu_mr(char *iommu_mr) "init %s"
>> +virtio_iommu_get_endpoint(uint32_t ep_id) "Alloc endpoint=%d"
>> +virtio_iommu_put_endpoint(uint32_t ep_id) "Free endpoint=%d"
>> +virtio_iommu_get_domain(uint32_t domain_id) "Alloc domain=%d"
>> +virtio_iommu_put_domain(uint32_t domain_id) "Free domain=%d"
>> diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
>> index 9d1b997df7..e5cc94138b 100644
>> --- a/hw/virtio/virtio-iommu.c
>> +++ b/hw/virtio/virtio-iommu.c
>> @@ -23,6 +23,8 @@
>> #include "hw/qdev-properties.h"
>> #include "hw/virtio/virtio.h"
>> #include "sysemu/kvm.h"
>> +#include "qapi/error.h"
>> +#include "qemu/error-report.h"
>> #include "trace.h"
>>
>> #include "standard-headers/linux/virtio_ids.h"
>> @@ -30,19 +32,234 @@
>> #include "hw/virtio/virtio-bus.h"
>> #include "hw/virtio/virtio-access.h"
>> #include "hw/virtio/virtio-iommu.h"
>> +#include "hw/pci/pci_bus.h"
>> +#include "hw/pci/pci.h"
>>
>> /* Max size */
>> #define VIOMMU_DEFAULT_QUEUE_SIZE 256
>>
>> +typedef struct VirtIOIOMMUDomain {
>> + uint32_t id;
>> + GTree *mappings;
>> + QLIST_HEAD(, VirtIOIOMMUEndpoint) endpoint_list;
>> +} VirtIOIOMMUDomain;
>> +
>> +typedef struct VirtIOIOMMUEndpoint {
>> + uint32_t id;
>> + VirtIOIOMMUDomain *domain;
>> + QLIST_ENTRY(VirtIOIOMMUEndpoint) next;
>> +} VirtIOIOMMUEndpoint;
>> +
>> +typedef struct VirtIOIOMMUInterval {
>> + uint64_t low;
>> + uint64_t high;
>> +} VirtIOIOMMUInterval;
>> +
>> +static inline uint16_t virtio_iommu_get_bdf(IOMMUDevice *dev)
>> +{
>> + return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn);
>> +}
>> +
>> +/**
>> + * The bus number is used for lookup when SID based operations occur.
>> + * In that case we lazily populate the IOMMUPciBus array from the bus hash
>> + * table. At the time the IOMMUPciBus is created (iommu_find_add_as), the
>> bus
>> + * numbers may not be always initialized yet.
>> + */
>> +static IOMMUPciBus *iommu_find_iommu_pcibus(VirtIOIOMMU *s, uint8_t bus_num)
>> +{
>> + IOMMUPciBus *iommu_pci_bus = s->iommu_pcibus_by_bus_num[bus_num];
>> +
>> + if (!iommu_pci_bus) {
>> + GHashTableIter iter;
>> +
>> + g_hash_table_iter_init(&iter, s->as_by_busptr);
>> + while (g_hash_table_iter_next(&iter, NULL, (void
>> **)&iommu_pci_bus)) {
>> + if (pci_bus_num(iommu_pci_bus->bus) == bus_num) {
>> + s->iommu_pcibus_by_bus_num[bus_num] = iommu_pci_bus;
>> + return iommu_pci_bus;
>> + }
>> + }
>> + return NULL;
>> + }
>> + return iommu_pci_bus;
>> +}
>> +
>> +static IOMMUMemoryRegion *virtio_iommu_mr(VirtIOIOMMU *s, uint32_t sid)
>> +{
>> + uint8_t bus_n, devfn;
>> + IOMMUPciBus *iommu_pci_bus;
>> + IOMMUDevice *dev;
>> +
>> + bus_n = PCI_BUS_NUM(sid);
>> + iommu_pci_bus = iommu_find_iommu_pcibus(s, bus_n);
>> + if (iommu_pci_bus) {
>> + devfn = sid & PCI_DEVFN_MAX;
>> + dev = iommu_pci_bus->pbdev[devfn];
>> + if (dev) {
>> + return &dev->iommu_mr;
>> + }
>> + }
>> + return NULL;
>> +}
>> +
>> +static gint interval_cmp(gconstpointer a, gconstpointer b, gpointer
>> user_data)
>> +{
>> + VirtIOIOMMUInterval *inta = (VirtIOIOMMUInterval *)a;
>> + VirtIOIOMMUInterval *intb = (VirtIOIOMMUInterval *)b;
>> +
>> + if (inta->high < intb->low) {
>> + return -1;
>> + } else if (intb->high < inta->low) {
>> + return 1;
>> + } else {
>> + return 0;
>> + }
>> +}
>> +
>> +static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint
>> *ep)
>> +{
>> + QLIST_REMOVE(ep, next);
>> + g_tree_unref(ep->domain->mappings);
>
> Here domain->mapping is unreferenced for each endpoint, while at [1]
> below you only reference the domain->mappings if it's the first
> endpoint. Is that problematic?
in [1] I take a ref to the domain->mappings if it is *not* the 1st
endpoint. This aims at deleting the mappings gtree when the last EP is
detached from the domain.
This fixes the issue reported by Jean in:
https://patchwork.kernel.org/patch/11258267/#23046313
>
>> + ep->domain = NULL;
>> +}
>> +
>> +static VirtIOIOMMUEndpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s,
>> + uint32_t ep_id)
>> +{
>> + VirtIOIOMMUEndpoint *ep;
>> +
>> + ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(ep_id));
>> + if (ep) {
>> + return ep;
>> + }
>> + if (!virtio_iommu_mr(s, ep_id)) {
>> + return NULL;
>> + }
>> + ep = g_malloc0(sizeof(*ep));
>> + ep->id = ep_id;
>> + trace_virtio_iommu_get_endpoint(ep_id);
>> + g_tree_insert(s->endpoints, GUINT_TO_POINTER(ep_id), ep);
>> + return ep;
>> +}
>> +
>> +static void virtio_iommu_put_endpoint(gpointer data)
>> +{
>> + VirtIOIOMMUEndpoint *ep = (VirtIOIOMMUEndpoint *)data;
>> +
>> + assert(!ep->domain);
>> +
>> + trace_virtio_iommu_put_endpoint(ep->id);
>> + g_free(ep);
>> +}
>> +
>> +static VirtIOIOMMUDomain *virtio_iommu_get_domain(VirtIOIOMMU *s,
>> + uint32_t domain_id)
>> +{
>> + VirtIOIOMMUDomain *domain;
>> +
>> + domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id));
>> + if (domain) {
>> + return domain;
>> + }
>> + domain = g_malloc0(sizeof(*domain));
>> + domain->id = domain_id;
>> + domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp,
>> + NULL, (GDestroyNotify)g_free,
>> + (GDestroyNotify)g_free);
>> + g_tree_insert(s->domains, GUINT_TO_POINTER(domain_id), domain);
>> + QLIST_INIT(&domain->endpoint_list);
>> + trace_virtio_iommu_get_domain(domain_id);
>> + return domain;
>> +}
>> +
>> +static void virtio_iommu_put_domain(gpointer data)
>> +{
>> + VirtIOIOMMUDomain *domain = (VirtIOIOMMUDomain *)data;
>> + VirtIOIOMMUEndpoint *iter, *tmp;
>> +
>> + QLIST_FOREACH_SAFE(iter, &domain->endpoint_list, next, tmp) {
>> + virtio_iommu_detach_endpoint_from_domain(iter);
>> + }
>
> Do you need to destroy the domain->mappings here? Is it leaked?
AFIU as we detach all EPs in the loop above, the whole "mappings" gtree
is destroyed so there is no leak.
>
>> + trace_virtio_iommu_put_domain(domain->id);
>> + g_free(domain);
>> +}
>> +
>> +static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque,
>> + int devfn)
>> +{
>> + VirtIOIOMMU *s = opaque;
>> + IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus);
>> + static uint32_t mr_index;
>> + IOMMUDevice *sdev;
>> +
>> + if (!sbus) {
>> + sbus = g_malloc0(sizeof(IOMMUPciBus) +
>> + sizeof(IOMMUDevice *) * PCI_DEVFN_MAX);
>> + sbus->bus = bus;
>> + g_hash_table_insert(s->as_by_busptr, bus, sbus);
>> + }
>> +
>> + sdev = sbus->pbdev[devfn];
>> + if (!sdev) {
>> + char *name = g_strdup_printf("%s-%d-%d",
>> + TYPE_VIRTIO_IOMMU_MEMORY_REGION,
>> + mr_index++, devfn);
>> + sdev = sbus->pbdev[devfn] = g_malloc0(sizeof(IOMMUDevice));
>> +
>> + sdev->viommu = s;
>> + sdev->bus = bus;
>> + sdev->devfn = devfn;
>> +
>> + trace_virtio_iommu_init_iommu_mr(name);
>> +
>> + memory_region_init_iommu(&sdev->iommu_mr, sizeof(sdev->iommu_mr),
>> + TYPE_VIRTIO_IOMMU_MEMORY_REGION,
>> + OBJECT(s), name,
>> + UINT64_MAX);
>> + address_space_init(&sdev->as,
>> + MEMORY_REGION(&sdev->iommu_mr),
>> TYPE_VIRTIO_IOMMU);
>> + g_free(name);
>> + }
>> + return &sdev->as;
>> +}
>> +
>> static int virtio_iommu_attach(VirtIOIOMMU *s,
>> struct virtio_iommu_req_attach *req)
>> {
>> uint32_t domain_id = le32_to_cpu(req->domain);
>> uint32_t ep_id = le32_to_cpu(req->endpoint);
>> + VirtIOIOMMUDomain *domain;
>> + VirtIOIOMMUEndpoint *ep;
>>
>> trace_virtio_iommu_attach(domain_id, ep_id);
>>
>> - return VIRTIO_IOMMU_S_UNSUPP;
>> + ep = virtio_iommu_get_endpoint(s, ep_id);
>> + if (!ep) {
>> + return VIRTIO_IOMMU_S_NOENT;
>> + }
>> +
>> + if (ep->domain) {
>> + VirtIOIOMMUDomain *previous_domain = ep->domain;
>> + /*
>> + * the device is already attached to a domain,
>> + * detach it first
>> + */
>> + virtio_iommu_detach_endpoint_from_domain(ep);
>> + if (QLIST_EMPTY(&previous_domain->endpoint_list)) {
>> + g_tree_remove(s->domains,
>> GUINT_TO_POINTER(previous_domain->id));
>> + }
>> + }
>> +
>> + domain = virtio_iommu_get_domain(s, domain_id);
>> + if (!QLIST_EMPTY(&domain->endpoint_list)) {
>> + g_tree_ref(domain->mappings);
>
> [1]
!QLIST_EMPTY
>
>> + }
>> + QLIST_INSERT_HEAD(&domain->endpoint_list, ep, next);
>> +
>> + ep->domain = domain;
>> +
>> + return VIRTIO_IOMMU_S_OK;
>> }
>>
>> static int virtio_iommu_detach(VirtIOIOMMU *s,
>> @@ -50,10 +267,34 @@ static int virtio_iommu_detach(VirtIOIOMMU *s,
>> {
>> uint32_t domain_id = le32_to_cpu(req->domain);
>> uint32_t ep_id = le32_to_cpu(req->endpoint);
>> + VirtIOIOMMUDomain *domain;
>> + VirtIOIOMMUEndpoint *ep;
>>
>> trace_virtio_iommu_detach(domain_id, ep_id);
>>
>> - return VIRTIO_IOMMU_S_UNSUPP;
>> + ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(ep_id));
>> + if (!ep) {
>> + return VIRTIO_IOMMU_S_NOENT;
>> + }
>> +
>> + domain = ep->domain;
>> +
>> + if (!domain || domain->id != domain_id) {
>> + return VIRTIO_IOMMU_S_INVAL;
>> + }
>> +
>> + virtio_iommu_detach_endpoint_from_domain(ep);
>> +
>> + /*
>> + * when the last EP is detached, simply remove the domain for
>> + * the domain list and destroy it. Note its mappings were already
>> + * freed by the ref count mechanism. Next operation involving
>> + * the same domain id will re-create one domain object.
>> + */
>> + if (QLIST_EMPTY(&domain->endpoint_list)) {
>> + g_tree_remove(s->domains, GUINT_TO_POINTER(domain->id));
>> + }
>> + return VIRTIO_IOMMU_S_OK;
>> }
>>
>> static int virtio_iommu_map(VirtIOIOMMU *s,
>> @@ -172,6 +413,27 @@ out:
>> }
>> }
>>
>> +static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr
>> addr,
>> + IOMMUAccessFlags flag,
>> + int iommu_idx)
>> +{
>> + IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
>> + uint32_t sid;
>> +
>> + IOMMUTLBEntry entry = {
>> + .target_as = &address_space_memory,
>> + .iova = addr,
>> + .translated_addr = addr,
>> + .addr_mask = ~(hwaddr)0,
>> + .perm = IOMMU_NONE,
>> + };
>> +
>> + sid = virtio_iommu_get_bdf(sdev);
>> +
>> + trace_virtio_iommu_translate(mr->parent_obj.name, sid, addr, flag);
>> + return entry;
>> +}
>> +
>> static void virtio_iommu_get_config(VirtIODevice *vdev, uint8_t
>> *config_data)
>> {
>> VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev);
>> @@ -218,6 +480,13 @@ static const VMStateDescription
>> vmstate_virtio_iommu_device = {
>> .unmigratable = 1,
>> };
>>
>> +static gint int_cmp(gconstpointer a, gconstpointer b, gpointer user_data)
>> +{
>> + guint ua = GPOINTER_TO_UINT(a);
>> + guint ub = GPOINTER_TO_UINT(b);
>> + return (ua > ub) - (ua < ub);
>> +}
>> +
>> static void virtio_iommu_device_realize(DeviceState *dev, Error **errp)
>> {
>> VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>> @@ -226,6 +495,8 @@ static void virtio_iommu_device_realize(DeviceState
>> *dev, Error **errp)
>> virtio_init(vdev, "virtio-iommu", VIRTIO_ID_IOMMU,
>> sizeof(struct virtio_iommu_config));
>>
>> + memset(s->iommu_pcibus_by_bus_num, 0,
>> sizeof(s->iommu_pcibus_by_bus_num));
>> +
>> s->req_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE,
>> virtio_iommu_handle_command);
>> s->event_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, NULL);
>> @@ -244,18 +515,43 @@ static void virtio_iommu_device_realize(DeviceState
>> *dev, Error **errp)
>> virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MMIO);
>>
>> qemu_mutex_init(&s->mutex);
>> +
>> + s->as_by_busptr = g_hash_table_new_full(NULL, NULL, NULL, g_free);
>> +
>> + if (s->primary_bus) {
>> + pci_setup_iommu(s->primary_bus, virtio_iommu_find_add_as, s);
>> + } else {
>> + error_setg(errp, "VIRTIO-IOMMU is not attached to any PCI bus!");
>> + }
>> }
>>
>> static void virtio_iommu_device_unrealize(DeviceState *dev, Error **errp)
>> {
>> VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>> + VirtIOIOMMU *s = VIRTIO_IOMMU(dev);
>> +
>> + g_tree_destroy(s->domains);
>> + g_tree_destroy(s->endpoints);
>>
>> virtio_cleanup(vdev);
>> }
>>
>> static void virtio_iommu_device_reset(VirtIODevice *vdev)
>> {
>> + VirtIOIOMMU *s = VIRTIO_IOMMU(vdev);
>> +
>> trace_virtio_iommu_device_reset();
>> +
>> + if (s->domains) {
>> + g_tree_destroy(s->domains);
>> + }
>> + if (s->endpoints) {
>> + g_tree_destroy(s->endpoints);
>> + }
>
> Is it a must to free domians first then the endpoints here?
>
> I see that virtio_iommu_put_domain() will unlink the domains and
> endpoints, then in virtio_iommu_put_endpoint() you assert that
> ep->domain is NULL. It's fine but I'm a bit curious on why not call
> virtio_iommu_detach_endpoint_from_domain() too when put the endpoint
> then iiuc we don't even need this ordering constraint. Though in
> virtio_iommu_detach_endpoint_from_domain() we should also need:
Yes today an EP is meant to be deleted if it is detached from any domain.
I may add virtio_iommu_detach_endpoint_from_domain in put_domain though.
>
> if (!ep->domain)
> return;
>
> Otherwise it looks good to me. Thanks,
Thanks
Eric
>
>> + s->domains = g_tree_new_full((GCompareDataFunc)int_cmp,
>> + NULL, NULL, virtio_iommu_put_domain);
>> + s->endpoints = g_tree_new_full((GCompareDataFunc)int_cmp,
>> + NULL, NULL, virtio_iommu_put_endpoint);
>> }
>>
>> static void virtio_iommu_set_status(VirtIODevice *vdev, uint8_t status)
>> @@ -301,6 +597,14 @@ static void virtio_iommu_class_init(ObjectClass *klass,
>> void *data)
>> vdc->vmsd = &vmstate_virtio_iommu_device;
>> }
>>
>> +static void virtio_iommu_memory_region_class_init(ObjectClass *klass,
>> + void *data)
>> +{
>> + IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
>> +
>> + imrc->translate = virtio_iommu_translate;
>> +}
>> +
>> static const TypeInfo virtio_iommu_info = {
>> .name = TYPE_VIRTIO_IOMMU,
>> .parent = TYPE_VIRTIO_DEVICE,
>> @@ -309,9 +613,16 @@ static const TypeInfo virtio_iommu_info = {
>> .class_init = virtio_iommu_class_init,
>> };
>>
>> +static const TypeInfo virtio_iommu_memory_region_info = {
>> + .parent = TYPE_IOMMU_MEMORY_REGION,
>> + .name = TYPE_VIRTIO_IOMMU_MEMORY_REGION,
>> + .class_init = virtio_iommu_memory_region_class_init,
>> +};
>> +
>> static void virtio_register_types(void)
>> {
>> type_register_static(&virtio_iommu_info);
>> + type_register_static(&virtio_iommu_memory_region_info);
>> }
>>
>> type_init(virtio_register_types)
>> diff --git a/include/hw/virtio/virtio-iommu.h
>> b/include/hw/virtio/virtio-iommu.h
>> index 041f2c9390..2a2c2ecf83 100644
>> --- a/include/hw/virtio/virtio-iommu.h
>> +++ b/include/hw/virtio/virtio-iommu.h
>> @@ -28,6 +28,8 @@
>> #define VIRTIO_IOMMU(obj) \
>> OBJECT_CHECK(VirtIOIOMMU, (obj), TYPE_VIRTIO_IOMMU)
>>
>> +#define TYPE_VIRTIO_IOMMU_MEMORY_REGION "virtio-iommu-memory-region"
>> +
>> typedef struct IOMMUDevice {
>> void *viommu;
>> PCIBus *bus;
>> @@ -48,6 +50,7 @@ typedef struct VirtIOIOMMU {
>> struct virtio_iommu_config config;
>> uint64_t features;
>> GHashTable *as_by_busptr;
>> + IOMMUPciBus *iommu_pcibus_by_bus_num[PCI_BUS_MAX];
>> PCIBus *primary_bus;
>> GTree *domains;
>> QemuMutex mutex;
>> --
>> 2.20.1
>>
>