qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH] vhost: use large iotlb entry if no IOMMU translation is need


From: Jason Wang
Subject: Re: [PATCH] vhost: use large iotlb entry if no IOMMU translation is needed
Date: Tue, 3 Aug 2021 12:43:58 +0800
User-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:78.0) Gecko/20100101 Thunderbird/78.12.0


在 2021/8/3 下午12:29, Chao Gao 写道:
Ping. Could someone help to review this patch?

Thanks
Chao

On Wed, Jul 21, 2021 at 03:54:02PM +0800, Chao Gao wrote:
If guest enables IOMMU_PLATFORM for virtio-net, severe network
performance drop is observed even if there is no IOMMU.


We see such reports internally and we're testing a patch series to disable vhost IOTLB in this case.

Will post a patch soon.



  And disabling
vhost can mitigate the perf issue. Finally, we found the culprit is
frequent iotlb misses: kernel vhost-net has 2048 entries and each
entry is 4K (qemu uses 4K for i386 if no IOMMU); vhost-net can cache
translations for up to 8M (i.e. 4K*2048) IOVAs. If guest uses >8M
memory for DMA, there are some iotlb misses.

If there is no IOMMU or IOMMU is disabled or IOMMU works in pass-thru
mode, we can optimistically use large, unaligned iotlb entries instead
of 4K-aligned entries to reduce iotlb pressure.


Instead of introducing new general facilities like unaligned IOTLB entry. I wonder if we optimize the vtd_iommu_translate() to use e.g 1G instead?

    } else {
        /* DMAR disabled, passthrough, use 4k-page*/
        iotlb.iova = addr & VTD_PAGE_MASK_4K;
        iotlb.translated_addr = addr & VTD_PAGE_MASK_4K;
        iotlb.addr_mask = ~VTD_PAGE_MASK_4K;
        iotlb.perm = IOMMU_RW;
        success = true;
    }


  Actually, vhost-net
in kernel supports unaligned iotlb entry. The alignment requirement is
imposed by address_space_get_iotlb_entry() and flatview_do_translate().


For the passthrough case, is there anyway to detect them and then disable device IOTLB in those case?

Thanks



Introduce IOMMUTLBEntryUnaligned which has a @len field to specify the
iotlb size to abstract a generic iotlb entry: aligned (original
IOMMUTLBEntry) and unaligned entry. flatview_do_translate() now
returns a magic value in @page_mask_out if no IOMMU translation is
needed. Then, address_space_get_iotbl_entry() can accordingly return a
page-aligned iotlb entry or the whole memory region section where the
iova resides as a large iotlb entry.

Signed-off-by: Chao Gao <chao.gao@intel.com>
---
hw/virtio/vhost.c     |  6 +++---
include/exec/memory.h | 16 ++++++++++++++--
softmmu/physmem.c     | 37 +++++++++++++++++++++++++++++--------
3 files changed, 46 insertions(+), 13 deletions(-)

diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index e8f85a5d2d..6745caa129 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -1010,7 +1010,7 @@ static int vhost_memory_region_lookup(struct vhost_dev 
*hdev,

int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write)
{
-    IOMMUTLBEntry iotlb;
+    IOMMUTLBEntryUnaligned iotlb;
     uint64_t uaddr, len;
     int ret = -EFAULT;

@@ -1031,8 +1031,8 @@ int vhost_device_iotlb_miss(struct vhost_dev *dev, 
uint64_t iova, int write)
             goto out;
         }

-        len = MIN(iotlb.addr_mask + 1, len);
-        iova = iova & ~iotlb.addr_mask;
+        len = MIN(iotlb.len, len);
+        iova = iotlb.iova;

         ret = vhost_backend_update_device_iotlb(dev, iova, uaddr,
                                                 len, iotlb.perm);
diff --git a/include/exec/memory.h b/include/exec/memory.h
index c3d417d317..3f04e8fe88 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -94,6 +94,7 @@ struct MemoryRegionSection {
};

typedef struct IOMMUTLBEntry IOMMUTLBEntry;
+typedef struct IOMMUTLBEntryUnaligned IOMMUTLBEntryUnaligned;

/* See address_space_translate: bit 0 is read, bit 1 is write.  */
typedef enum {
@@ -113,6 +114,15 @@ struct IOMMUTLBEntry {
     IOMMUAccessFlags perm;
};

+/* IOMMUTLBEntryUnaligned may be not page-aligned */
+struct IOMMUTLBEntryUnaligned {
+    AddressSpace    *target_as;
+    hwaddr           iova;
+    hwaddr           translated_addr;
+    hwaddr           len;
+    IOMMUAccessFlags perm;
+};
+
/*
  * Bitmap for different IOMMUNotifier capabilities. Each notifier can
  * register with one or multiple IOMMU Notifier capability bit(s).
@@ -2653,8 +2663,10 @@ void address_space_cache_destroy(MemoryRegionCache 
*cache);
/* address_space_get_iotlb_entry: translate an address into an IOTLB
  * entry. Should be called from an RCU critical section.
  */
-IOMMUTLBEntry address_space_get_iotlb_entry(AddressSpace *as, hwaddr addr,
-                                            bool is_write, MemTxAttrs attrs);
+IOMMUTLBEntryUnaligned address_space_get_iotlb_entry(AddressSpace *as,
+                                                     hwaddr addr,
+                                                     bool is_write,
+                                                     MemTxAttrs attrs);

/* address_space_translate: translate an address range into an address space
  * into a MemoryRegion and an address range into that section.  Should be
diff --git a/softmmu/physmem.c b/softmmu/physmem.c
index 3c1912a1a0..469963f754 100644
--- a/softmmu/physmem.c
+++ b/softmmu/physmem.c
@@ -143,6 +143,8 @@ typedef struct subpage_t {

#define PHYS_SECTION_UNASSIGNED 0

+#define PAGE_MASK_NOT_BEHIND_IOMMU ((hwaddr)-1)
+
static void io_mem_init(void);
static void memory_map_init(void);
static void tcg_log_global_after_sync(MemoryListener *listener);
@@ -470,7 +472,9 @@ unassigned:
  * @page_mask_out: page mask for the translated address. This
  *            should only be meaningful for IOMMU translated
  *            addresses, since there may be huge pages that this bit
- *            would tell. It can be @NULL if we don't care about it.
+ *            would tell. If the returned memory region section isn't
+ *            behind an IOMMU, PAGE_MASK_NOT_BEHIND_IOMMU is return.
+ *            It can be @NULL if we don't care about it.
  * @is_write: whether the translation operation is for write
  * @is_mmio: whether this can be MMIO, set true if it can
  * @target_as: the address space targeted by the IOMMU
@@ -508,16 +512,18 @@ static MemoryRegionSection flatview_do_translate(FlatView 
*fv,
                                              target_as, attrs);
     }
     if (page_mask_out) {
-        /* Not behind an IOMMU, use default page size. */
-        *page_mask_out = ~TARGET_PAGE_MASK;
+        /* return a magic value if not behind an IOMMU */
+        *page_mask_out = PAGE_MASK_NOT_BEHIND_IOMMU;
     }

     return *section;
}

/* Called from RCU critical section */
-IOMMUTLBEntry address_space_get_iotlb_entry(AddressSpace *as, hwaddr addr,
-                                            bool is_write, MemTxAttrs attrs)
+IOMMUTLBEntryUnaligned address_space_get_iotlb_entry(AddressSpace *as,
+                                                     hwaddr addr,
+                                                     bool is_write,
+                                                     MemTxAttrs attrs)
{
     MemoryRegionSection section;
     hwaddr xlat, page_mask;
@@ -535,21 +541,36 @@ IOMMUTLBEntry address_space_get_iotlb_entry(AddressSpace 
*as, hwaddr addr,
         goto iotlb_fail;
     }

+    /*
+     * If the section isn't behind an IOMMU, return the whole section as an
+     * IOMMU TLB entry.
+     */
+    if (page_mask == PAGE_MASK_NOT_BEHIND_IOMMU) {
+        return (IOMMUTLBEntryUnaligned) {
+            .target_as = as,
+            .iova = section.offset_within_address_space,
+            .translated_addr = section.offset_within_address_space,
+            .len = section.size,
+            /* IOTLBs are for DMAs, and DMA only allows on RAMs. */
+            .perm = IOMMU_RW,
+        };
+    }
+
     /* Convert memory region offset into address space offset */
     xlat += section.offset_within_address_space -
         section.offset_within_region;

-    return (IOMMUTLBEntry) {
+    return (IOMMUTLBEntryUnaligned) {
         .target_as = as,
         .iova = addr & ~page_mask,
         .translated_addr = xlat & ~page_mask,
-        .addr_mask = page_mask,
+        .len = page_mask + 1,
         /* IOTLBs are for DMAs, and DMA only allows on RAMs. */
         .perm = IOMMU_RW,
     };

iotlb_fail:
-    return (IOMMUTLBEntry) {0};
+    return (IOMMUTLBEntryUnaligned) {0};
}

/* Called from RCU critical section */
--
2.25.1





reply via email to

[Prev in Thread] Current Thread [Next in Thread]