qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH] vhost: use large iotlb entry if no IOMMU translation is need


From: Chao Gao
Subject: Re: [PATCH] vhost: use large iotlb entry if no IOMMU translation is needed
Date: Tue, 3 Aug 2021 12:29:29 +0800
User-agent: Mutt/1.9.4 (2018-02-28)

Ping. Could someone help to review this patch?

Thanks
Chao

On Wed, Jul 21, 2021 at 03:54:02PM +0800, Chao Gao wrote:
>If guest enables IOMMU_PLATFORM for virtio-net, severe network
>performance drop is observed even if there is no IOMMU. And disabling
>vhost can mitigate the perf issue. Finally, we found the culprit is
>frequent iotlb misses: kernel vhost-net has 2048 entries and each
>entry is 4K (qemu uses 4K for i386 if no IOMMU); vhost-net can cache
>translations for up to 8M (i.e. 4K*2048) IOVAs. If guest uses >8M
>memory for DMA, there are some iotlb misses.
>
>If there is no IOMMU or IOMMU is disabled or IOMMU works in pass-thru
>mode, we can optimistically use large, unaligned iotlb entries instead
>of 4K-aligned entries to reduce iotlb pressure. Actually, vhost-net
>in kernel supports unaligned iotlb entry. The alignment requirement is
>imposed by address_space_get_iotlb_entry() and flatview_do_translate().
>
>Introduce IOMMUTLBEntryUnaligned which has a @len field to specify the
>iotlb size to abstract a generic iotlb entry: aligned (original
>IOMMUTLBEntry) and unaligned entry. flatview_do_translate() now
>returns a magic value in @page_mask_out if no IOMMU translation is
>needed. Then, address_space_get_iotbl_entry() can accordingly return a
>page-aligned iotlb entry or the whole memory region section where the
>iova resides as a large iotlb entry.
>
>Signed-off-by: Chao Gao <chao.gao@intel.com>
>---
> hw/virtio/vhost.c     |  6 +++---
> include/exec/memory.h | 16 ++++++++++++++--
> softmmu/physmem.c     | 37 +++++++++++++++++++++++++++++--------
> 3 files changed, 46 insertions(+), 13 deletions(-)
>
>diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
>index e8f85a5d2d..6745caa129 100644
>--- a/hw/virtio/vhost.c
>+++ b/hw/virtio/vhost.c
>@@ -1010,7 +1010,7 @@ static int vhost_memory_region_lookup(struct vhost_dev 
>*hdev,
> 
> int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write)
> {
>-    IOMMUTLBEntry iotlb;
>+    IOMMUTLBEntryUnaligned iotlb;
>     uint64_t uaddr, len;
>     int ret = -EFAULT;
> 
>@@ -1031,8 +1031,8 @@ int vhost_device_iotlb_miss(struct vhost_dev *dev, 
>uint64_t iova, int write)
>             goto out;
>         }
> 
>-        len = MIN(iotlb.addr_mask + 1, len);
>-        iova = iova & ~iotlb.addr_mask;
>+        len = MIN(iotlb.len, len);
>+        iova = iotlb.iova;
> 
>         ret = vhost_backend_update_device_iotlb(dev, iova, uaddr,
>                                                 len, iotlb.perm);
>diff --git a/include/exec/memory.h b/include/exec/memory.h
>index c3d417d317..3f04e8fe88 100644
>--- a/include/exec/memory.h
>+++ b/include/exec/memory.h
>@@ -94,6 +94,7 @@ struct MemoryRegionSection {
> };
> 
> typedef struct IOMMUTLBEntry IOMMUTLBEntry;
>+typedef struct IOMMUTLBEntryUnaligned IOMMUTLBEntryUnaligned;
> 
> /* See address_space_translate: bit 0 is read, bit 1 is write.  */
> typedef enum {
>@@ -113,6 +114,15 @@ struct IOMMUTLBEntry {
>     IOMMUAccessFlags perm;
> };
> 
>+/* IOMMUTLBEntryUnaligned may be not page-aligned */
>+struct IOMMUTLBEntryUnaligned {
>+    AddressSpace    *target_as;
>+    hwaddr           iova;
>+    hwaddr           translated_addr;
>+    hwaddr           len;
>+    IOMMUAccessFlags perm;
>+};
>+
> /*
>  * Bitmap for different IOMMUNotifier capabilities. Each notifier can
>  * register with one or multiple IOMMU Notifier capability bit(s).
>@@ -2653,8 +2663,10 @@ void address_space_cache_destroy(MemoryRegionCache 
>*cache);
> /* address_space_get_iotlb_entry: translate an address into an IOTLB
>  * entry. Should be called from an RCU critical section.
>  */
>-IOMMUTLBEntry address_space_get_iotlb_entry(AddressSpace *as, hwaddr addr,
>-                                            bool is_write, MemTxAttrs attrs);
>+IOMMUTLBEntryUnaligned address_space_get_iotlb_entry(AddressSpace *as,
>+                                                     hwaddr addr,
>+                                                     bool is_write,
>+                                                     MemTxAttrs attrs);
> 
> /* address_space_translate: translate an address range into an address space
>  * into a MemoryRegion and an address range into that section.  Should be
>diff --git a/softmmu/physmem.c b/softmmu/physmem.c
>index 3c1912a1a0..469963f754 100644
>--- a/softmmu/physmem.c
>+++ b/softmmu/physmem.c
>@@ -143,6 +143,8 @@ typedef struct subpage_t {
> 
> #define PHYS_SECTION_UNASSIGNED 0
> 
>+#define PAGE_MASK_NOT_BEHIND_IOMMU ((hwaddr)-1)
>+
> static void io_mem_init(void);
> static void memory_map_init(void);
> static void tcg_log_global_after_sync(MemoryListener *listener);
>@@ -470,7 +472,9 @@ unassigned:
>  * @page_mask_out: page mask for the translated address. This
>  *            should only be meaningful for IOMMU translated
>  *            addresses, since there may be huge pages that this bit
>- *            would tell. It can be @NULL if we don't care about it.
>+ *            would tell. If the returned memory region section isn't
>+ *            behind an IOMMU, PAGE_MASK_NOT_BEHIND_IOMMU is return.
>+ *            It can be @NULL if we don't care about it.
>  * @is_write: whether the translation operation is for write
>  * @is_mmio: whether this can be MMIO, set true if it can
>  * @target_as: the address space targeted by the IOMMU
>@@ -508,16 +512,18 @@ static MemoryRegionSection 
>flatview_do_translate(FlatView *fv,
>                                              target_as, attrs);
>     }
>     if (page_mask_out) {
>-        /* Not behind an IOMMU, use default page size. */
>-        *page_mask_out = ~TARGET_PAGE_MASK;
>+        /* return a magic value if not behind an IOMMU */
>+        *page_mask_out = PAGE_MASK_NOT_BEHIND_IOMMU;
>     }
> 
>     return *section;
> }
> 
> /* Called from RCU critical section */
>-IOMMUTLBEntry address_space_get_iotlb_entry(AddressSpace *as, hwaddr addr,
>-                                            bool is_write, MemTxAttrs attrs)
>+IOMMUTLBEntryUnaligned address_space_get_iotlb_entry(AddressSpace *as,
>+                                                     hwaddr addr,
>+                                                     bool is_write,
>+                                                     MemTxAttrs attrs)
> {
>     MemoryRegionSection section;
>     hwaddr xlat, page_mask;
>@@ -535,21 +541,36 @@ IOMMUTLBEntry address_space_get_iotlb_entry(AddressSpace 
>*as, hwaddr addr,
>         goto iotlb_fail;
>     }
> 
>+    /*
>+     * If the section isn't behind an IOMMU, return the whole section as an
>+     * IOMMU TLB entry.
>+     */
>+    if (page_mask == PAGE_MASK_NOT_BEHIND_IOMMU) {
>+        return (IOMMUTLBEntryUnaligned) {
>+            .target_as = as,
>+            .iova = section.offset_within_address_space,
>+            .translated_addr = section.offset_within_address_space,
>+            .len = section.size,
>+            /* IOTLBs are for DMAs, and DMA only allows on RAMs. */
>+            .perm = IOMMU_RW,
>+        };
>+    }
>+
>     /* Convert memory region offset into address space offset */
>     xlat += section.offset_within_address_space -
>         section.offset_within_region;
> 
>-    return (IOMMUTLBEntry) {
>+    return (IOMMUTLBEntryUnaligned) {
>         .target_as = as,
>         .iova = addr & ~page_mask,
>         .translated_addr = xlat & ~page_mask,
>-        .addr_mask = page_mask,
>+        .len = page_mask + 1,
>         /* IOTLBs are for DMAs, and DMA only allows on RAMs. */
>         .perm = IOMMU_RW,
>     };
> 
> iotlb_fail:
>-    return (IOMMUTLBEntry) {0};
>+    return (IOMMUTLBEntryUnaligned) {0};
> }
> 
> /* Called from RCU critical section */
>-- 
>2.25.1
>



reply via email to

[Prev in Thread] Current Thread [Next in Thread]