A recent OVMF update has resulted in MMIO regions being placed at
the upper end of the physical address space. As a result, when a
Host device is passthrough'd to the Guest via VFIO, the following
mapping failures occur when VFIO tries to map the MMIO regions of
the device:
VFIO_MAP_DMA failed: Invalid argument
vfio_dma_map(0x557b2f2736d0, 0x380000000000, 0x1000000, 0x7f98ac400000) = -22
(Invalid argument)
The above failures are mainly seen on some Intel platforms where
the physical address width is larger than the Host's IOMMU
address width. In these cases, VFIO fails to map the MMIO regions
because the IOVAs would be larger than the IOMMU aperture regions.
Therefore, one way to solve this problem would be to ensure that
cpu->phys_bits = <IOMMU phys_bits>
This can be done by parsing the IOMMU caps value from sysfs and
extracting the address width and using it to override the
phys_bits value as shown in this patch.
Previous attempt at solving this issue in OVMF:
https://edk2.groups.io/g/devel/topic/102359124
Cc: Gerd Hoffmann <kraxel@redhat.com>
Cc: Philippe Mathieu-Daudé <philmd@linaro.org>
Cc: Alex Williamson <alex.williamson@redhat.com>
Cc: Laszlo Ersek <lersek@redhat.com>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
---
target/i386/host-cpu.c | 61 +++++++++++++++++++++++++++++++++++++++++-
1 file changed, 60 insertions(+), 1 deletion(-)
diff --git a/target/i386/host-cpu.c b/target/i386/host-cpu.c
index 92ecb7254b..8326ec95bc 100644
--- a/target/i386/host-cpu.c
+++ b/target/i386/host-cpu.c
@@ -12,6 +12,8 @@
#include "host-cpu.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
+#include "qemu/config-file.h"
+#include "qemu/option.h"
#include "sysemu/sysemu.h"
/* Note: Only safe for use on x86(-64) hosts */
@@ -51,11 +53,58 @@ static void host_cpu_enable_cpu_pm(X86CPU *cpu)
env->features[FEAT_1_ECX] |= CPUID_EXT_MONITOR;
}
+static int intel_iommu_check(void *opaque, QemuOpts *opts, Error **errp)
+{
+ g_autofree char *dev_path = NULL, *iommu_path = NULL, *caps = NULL;
+ const char *driver = qemu_opt_get(opts, "driver");
+ const char *device = qemu_opt_get(opts, "host");
+ uint32_t *iommu_phys_bits = opaque;
+ struct stat st;
+ uint64_t iommu_caps;
+
+ /*
+ * Check if the user is passthroughing any devices via VFIO. We don't
+ * have to limit phys_bits if there are no valid passthrough devices.
+ */
+ if (g_strcmp0(driver, "vfio-pci") || !device) {
+ return 0;
+ }
+
+ dev_path = g_strdup_printf("/sys/bus/pci/devices/%s", device);
+ if (stat(dev_path, &st) < 0) {
+ return 0;
+ }
+
+ iommu_path = g_strdup_printf("%s/iommu/intel-iommu/cap", dev_path);
+ if (stat(iommu_path, &st) < 0) {
+ return 0;
+ }
+
+ if (g_file_get_contents(iommu_path, &caps, NULL, NULL)) {
+ if (sscanf(caps, "%lx", &iommu_caps) != 1) {
+ return 0;
+ }
+ *iommu_phys_bits = ((iommu_caps >> 16) & 0x3f) + 1;
+ }
+
+ return 0;
+}
+
+static uint32_t host_iommu_phys_bits(void)
+{
+ uint32_t iommu_phys_bits = 0;
+
+ qemu_opts_foreach(qemu_find_opts("device"),
+ intel_iommu_check, &iommu_phys_bits, NULL);
+ return iommu_phys_bits;
+}
+
static uint32_t host_cpu_adjust_phys_bits(X86CPU *cpu)
{
uint32_t host_phys_bits = host_cpu_phys_bits();
+ uint32_t iommu_phys_bits = host_iommu_phys_bits();
uint32_t phys_bits = cpu->phys_bits;
- static bool warned;
+ static bool warned, warned2;
/*
* Print a warning if the user set it to a value that's not the
@@ -78,6 +127,16 @@ static uint32_t host_cpu_adjust_phys_bits(X86CPU *cpu)
}
}
+ if (iommu_phys_bits && phys_bits > iommu_phys_bits) {
+ phys_bits = iommu_phys_bits;
+ if (!warned2) {
+ warn_report("Using physical bits (%u)"
+ " to prevent VFIO mapping failures",
+ iommu_phys_bits);
+ warned2 = true;
+ }
+ }
+
return phys_bits;
}