qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH 01/14] Generic DMA memory access interface


From: David Gibson
Subject: [Qemu-devel] [PATCH 01/14] Generic DMA memory access interface
Date: Fri, 3 Jun 2011 01:12:29 +1000

This introduces replacements for memory access functions like
cpu_physical_memory_read(). The new interface can handle address
translation and access checking through an IOMMU.

David Gibson: I have made several bugfixes and cleanups to Eduard's
original patch.

 * dma_memory_rw() was incorrectly using (uninitialized) plen instead
   of len in the fallback to no-IOMMU case.

 * the dma_memory_map() tracking was storing the guest physical
   address of each mapping, but not the qemu user virtual address.
   However in unmap() it was then attempting to lookup by virtual
   using a completely bogus cast.

 * The dma_memory_rw() function is moved from dma_rw.h to dma_rw.c, it
   was a bit too much code for an inline.

 * IOMMU support is now available on all target platforms, not just
   i386, but is configurable (--enable-iommu/--disable-iommu).  Stubs
   are used so that individual drivers can use the new dma interface
   and it will turn into old-style cpu physical accesses at no cost on
   IOMMU-less builds.

Signed-off-by: Eduard - Gabriel Munteanu <address@hidden>
Signed-off-by: David Gibson <address@hidden>
---
 Makefile.target |    1 +
 configure       |   12 +++
 hw/dma_rw.c     |  219 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/dma_rw.h     |  149 +++++++++++++++++++++++++++++++++++++
 hw/qdev.h       |    5 +
 5 files changed, 386 insertions(+), 0 deletions(-)
 create mode 100644 hw/dma_rw.c
 create mode 100644 hw/dma_rw.h

diff --git a/Makefile.target b/Makefile.target
index 4f97b26..76fd734 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -198,6 +198,7 @@ obj-$(CONFIG_REALLY_VIRTFS) += 9pfs/virtio-9p.o
 obj-y += rwhandler.o
 obj-$(CONFIG_KVM) += kvm.o kvm-all.o
 obj-$(CONFIG_NO_KVM) += kvm-stub.o
+obj-$(CONFIG_IOMMU) += dma_rw.o
 LIBS+=-lz
 
 QEMU_CFLAGS += $(VNC_TLS_CFLAGS)
diff --git a/configure b/configure
index a318d37..56f9616 100755
--- a/configure
+++ b/configure
@@ -131,6 +131,7 @@ xen_ctrl_version=""
 linux_aio=""
 attr=""
 vhost_net=""
+iommu="no"
 xfs=""
 
 gprof="no"
@@ -724,6 +725,10 @@ for opt do
   ;;
   --enable-vhost-net) vhost_net="yes"
   ;;
+  --enable-iommu) iommu="yes"
+  ;;
+  --disable-iommu) iommu="no"
+  ;;
   --disable-opengl) opengl="no"
   ;;
   --enable-opengl) opengl="yes"
@@ -1006,6 +1011,8 @@ echo "  --enable-docs            enable documentation 
build"
 echo "  --disable-docs           disable documentation build"
 echo "  --disable-vhost-net      disable vhost-net acceleration support"
 echo "  --enable-vhost-net       enable vhost-net acceleration support"
+echo "  --disable-iommu          disable IOMMU emulation support"
+echo "  --enable-iommu           enable IOMMU emulation support"
 echo "  --enable-trace-backend=B Set trace backend"
 echo "                           Available backends:" 
$("$source_path"/scripts/tracetool --list-backends)
 echo "  --with-trace-file=NAME   Full PATH,NAME of file to store traces"
@@ -2702,6 +2709,7 @@ echo "madvise           $madvise"
 echo "posix_madvise     $posix_madvise"
 echo "uuid support      $uuid"
 echo "vhost-net support $vhost_net"
+echo "IOMMU support     $iommu"
 echo "Trace backend     $trace_backend"
 echo "Trace output file $trace_file-<pid>"
 echo "spice support     $spice"
@@ -3515,6 +3523,10 @@ if test "$target_softmmu" = "yes" -a \( \
   echo "CONFIG_NEED_MMU=y" >> $config_target_mak
 fi
 
+if test "$iommu" = "yes" ; then
+  echo "CONFIG_IOMMU=y" >> $config_host_mak
+fi
+
 if test "$gprof" = "yes" ; then
   echo "TARGET_GPROF=yes" >> $config_target_mak
   if test "$target_linux_user" = "yes" ; then
diff --git a/hw/dma_rw.c b/hw/dma_rw.c
new file mode 100644
index 0000000..6586425
--- /dev/null
+++ b/hw/dma_rw.c
@@ -0,0 +1,219 @@
+/*
+ * Generic DMA memory access interface.
+ *
+ * Copyright (c) 2011 Eduard - Gabriel Munteanu
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "dma_rw.h"
+#include "range.h"
+
+/* #define DEBUG_DMA */
+
+int dma_memory_check(DeviceState *dev, dma_addr_t addr, dma_addr_t len,
+                     int check_read, int check_write)
+{
+    target_phys_addr_t paddr, plen;
+    DMAMmu *iommu = IOMMU(dev);
+
+#ifdef DEBUG_DMA
+    fprintf(stderr, "dma_memory_check [%c%c] iommu=%p addr=0x%llx 
len=0x%llx\n",
+            check_read ? 'R' : '-', check_write ? 'W' : '-',
+            (unsigned long long)addr, (unsigned long long)len);
+#endif
+    if (!iommu) {
+        return 0;
+    }
+
+#ifdef DEBUG_DMA
+    fprintf(stderr, "dma_memory_check(): translate=%p\n", iommu->translate);
+#endif
+
+    while (len) {
+        if (check_read) {
+            if (iommu->translate(dev, addr, &paddr, &plen, 0) != 0) {
+                return -1;
+            }
+        }
+        if (check_write) {
+            if (iommu->translate(dev, addr, &paddr, &plen, 1) != 0) {
+                return -1;
+            }
+        }
+
+        /* The translation might be valid for larger regions. */
+        if (plen > len) {
+            plen = len;
+        }
+
+        len -= plen;
+        addr += plen;
+    }
+
+    return 0;
+}
+
+int dma_memory_rw(DeviceState *dev, dma_addr_t addr,
+                   void *buf, dma_addr_t len, int is_write)
+{
+    target_phys_addr_t paddr, plen;
+    int err;
+    DMAMmu *iommu = IOMMU(dev);
+
+#ifdef DEBUG_DMA
+    fprintf(stderr, "dma_memory_rw [%c] iommu=%p addr=0x%llx len=0x%llx\n",
+            is_write ? 'W' : 'R', iommu,
+            (unsigned long long)addr, (unsigned long long)len);
+#endif
+    /*
+     * Fast-path non-iommu.
+     * More importantly, makes it obvious what this function does.
+     */
+    if (!iommu) {
+        cpu_physical_memory_rw(addr, buf, len, is_write);
+        return 0;
+    }
+
+#ifdef DEBUG_DMA
+    fprintf(stderr, "dma_memory_rw(): translate=%p\n",
+            iommu->translate);
+#endif
+
+    while (len) {
+        err = iommu->translate(dev, addr, &paddr, &plen, is_write);
+        if (err) {
+            return -1;
+        }
+
+        /* The translation might be valid for larger regions. */
+        if (plen > len) {
+            plen = len;
+        }
+
+        cpu_physical_memory_rw(paddr, buf, plen, is_write);
+
+        len -= plen;
+        addr += plen;
+        buf += plen;
+    }
+
+    return 0;
+}
+
+typedef struct DMAMemoryMap DMAMemoryMap;
+struct DMAMemoryMap {
+    dma_addr_t              addr;
+    size_t                  len;
+    void *                  buf;
+    DMAInvalidateMapFunc    *invalidate;
+    void                    *invalidate_opaque;
+
+    QLIST_ENTRY(DMAMemoryMap) list;
+};
+
+void dma_mmu_init(DMAMmu *iommu, DMATranslateFunc fn)
+{
+    iommu->translate = fn;
+    QLIST_INIT(&iommu->memory_maps);
+}
+
+void dma_invalidate_memory_range(DeviceState *dev,
+                                 dma_addr_t addr, dma_addr_t len)
+{
+    DMAMemoryMap *map;
+
+    QLIST_FOREACH(map, &dev->iommu->memory_maps, list) {
+        if (ranges_overlap(addr, len, map->addr, map->len)) {
+            map->invalidate(map->invalidate_opaque);
+            QLIST_REMOVE(map, list);
+            free(map);
+        }
+    }
+}
+
+void *dma_memory_map(DeviceState *dev, DMAInvalidateMapFunc *cb, void *opaque,
+                     dma_addr_t addr, dma_addr_t *len, int is_write)
+{
+    int err;
+    target_phys_addr_t paddr, plen;
+    void *buf;
+    DMAMmu *iommu = IOMMU(dev);
+
+    if (!iommu) {
+        void *p;
+
+        p = cpu_physical_memory_map((target_phys_addr_t)addr, &plen, is_write);
+        *len = plen;
+        return p;
+    }
+
+    plen = *len;
+    err = iommu->translate(dev, addr, &paddr, &plen, is_write);
+    if (err) {
+        return NULL;
+    }
+
+    /*
+     * If this is true, the virtual region is contiguous,
+     * but the translated physical region isn't. We just
+     * clamp *len, much like cpu_physical_memory_map() does.
+     */
+    if (plen < *len) {
+        *len = plen;
+    }
+
+    buf = cpu_physical_memory_map(paddr, &plen, is_write);
+    *len = plen;
+
+    /* We treat maps as remote TLBs to cope with stuff like AIO. */
+    if (cb) {
+        DMAMemoryMap *map;
+
+        map = qemu_malloc(sizeof(DMAMemoryMap));
+        map->addr = addr;
+        map->len = *len;
+        map->buf = buf;
+        map->invalidate = cb;
+        map->invalidate_opaque = opaque;
+        
+        QLIST_INSERT_HEAD(&dev->iommu->memory_maps, map, list);
+    }
+
+    return buf;
+}
+
+void dma_memory_unmap(DeviceState *dev, void *buffer, dma_addr_t len,
+                      int is_write, dma_addr_t access_len)
+{
+    DMAMmu *iommu = IOMMU(dev);
+
+    cpu_physical_memory_unmap(buffer, len, is_write, access_len);
+    if (iommu) {
+        DMAMemoryMap *map;
+
+        QLIST_FOREACH(map, &iommu->memory_maps, list) {
+            if ((map->buf == buffer) && (map->len == len)) {
+                QLIST_REMOVE(map, list);
+                free(map);
+            }
+        }
+    }
+}
+
diff --git a/hw/dma_rw.h b/hw/dma_rw.h
new file mode 100644
index 0000000..b57463d
--- /dev/null
+++ b/hw/dma_rw.h
@@ -0,0 +1,149 @@
+#ifndef DMA_RW_H
+#define DMA_RW_H
+
+#include "qemu-common.h"
+#include "qdev.h"
+
+#ifndef CONFIG_IOMMU
+typedef target_phys_addr_t dma_addr_t;
+#else
+typedef uint64_t dma_addr_t;
+#endif
+
+typedef int DMATranslateFunc(DeviceState *dev,
+                             dma_addr_t addr,
+                             target_phys_addr_t *paddr,
+                             target_phys_addr_t *len,
+                             int is_write);
+
+typedef void DMAInvalidateMapFunc(void *);
+
+struct DMAMmu {
+    DMATranslateFunc *translate;
+    QLIST_HEAD(memory_maps, DMAMemoryMap) memory_maps;
+};
+
+#ifndef CONFIG_IOMMU
+
+#define IOMMU(_dev) (NULL)
+
+static inline int dma_memory_check(DeviceState *dev,
+                                   dma_addr_t addr, dma_addr_t len,
+                                   int check_read, int check_write)
+{
+    return 0;
+}
+
+static inline int dma_memory_rw(DeviceState *dev, dma_addr_t addr,
+                                void *buf, dma_addr_t len, int is_write)
+{
+    cpu_physical_memory_rw(addr, buf, len, is_write);
+    return 0;
+}
+
+static inline void *dma_memory_map(DeviceState *dev,
+                                   DMAInvalidateMapFunc *cb, void *opaque,
+                                   dma_addr_t addr, dma_addr_t *len,
+                                   int is_write)
+{
+    return cpu_physical_memory_map(addr, len, is_write);
+}
+
+static inline void dma_memory_unmap(DeviceState *dev,
+                                    void *buffer, dma_addr_t len,
+                                    int is_write, dma_addr_t access_len)
+{
+    return cpu_physical_memory_unmap(buffer, len, is_write, access_len);
+}
+
+#else /* CONFIG_IOMMU */
+
+#define IOMMU(_dev) ((_dev)->iommu)
+
+int dma_memory_check(DeviceState *dev, dma_addr_t addr, dma_addr_t len,
+                     int check_read, int check_write);
+int dma_memory_rw(DeviceState *dev, dma_addr_t addr,
+                  void *buf, dma_addr_t len, int is_write);
+
+void *dma_memory_map(DeviceState *dev,
+                     DMAInvalidateMapFunc *cb, void *opaque,
+                     dma_addr_t addr, dma_addr_t *len,
+                     int is_write);
+void dma_memory_unmap(DeviceState *dev,
+                      void *buffer, dma_addr_t len,
+                      int is_write, dma_addr_t access_len);
+
+void dma_mmu_init(DMAMmu *iommu, DMATranslateFunc);
+void dma_invalidate_memory_range(DeviceState *dev,
+                                 dma_addr_t addr, dma_addr_t len);
+
+#endif /* CONFIG_IOMMU */
+
+/* warning: like the corresponding ldX_phys / stX_phys functions, these
+ * DMA accessors can only handle aligned accesses */
+
+#define DEFINE_DMA_LD(suffix, size)                                       \
+static inline uint##size##_t                                              \
+dma_ld##suffix(DeviceState *dev, dma_addr_t addr)                         \
+{                                                                         \
+    int err;                                                              \
+    target_phys_addr_t paddr, plen;                                       \
+    DMAMmu *iommu = IOMMU(dev);                                           \
+                                                                          \
+    if (!iommu) {                                                          \
+        return ld##suffix##_phys(addr);                                   \
+    }                                                                     \
+                                                                          \
+    err = iommu->translate(dev, addr, &paddr, &plen, 0);                  \
+    if (err || (plen < size / 8))                                         \
+        return 0;                                                         \
+                                                                          \
+    return ld##suffix##_phys(paddr);                                      \
+}
+
+#define DEFINE_DMA_ST(suffix, size)                                       \
+static inline void                                                        \
+dma_st##suffix(DeviceState *dev, dma_addr_t addr, uint##size##_t val)     \
+{                                                                         \
+    int err;                                                              \
+    target_phys_addr_t paddr, plen;                                       \
+    DMAMmu *iommu = IOMMU(dev);                                           \
+                                                                          \
+    if (!iommu) {                                                          \
+        st##suffix##_phys(addr, val);                                     \
+        return;                                                           \
+    }                                                                     \
+    err = iommu->translate(dev, addr, &paddr, &plen, 1);                  \
+    if (err || (plen < size / 8))                                         \
+        return;                                                           \
+                                                                          \
+    st##suffix##_phys(paddr, val);                                        \
+}
+
+DEFINE_DMA_LD(ub, 8)
+DEFINE_DMA_LD(uw, 16)
+DEFINE_DMA_LD(l, 32)
+DEFINE_DMA_LD(q, 64)
+
+DEFINE_DMA_ST(b, 8)
+DEFINE_DMA_ST(w, 16)
+DEFINE_DMA_ST(l, 32)
+DEFINE_DMA_ST(q, 64)
+
+static inline int dma_memory_read(DeviceState *dev,
+                                  dma_addr_t addr,
+                                  void *buf,
+                                  dma_addr_t len)
+{
+    return dma_memory_rw(dev, addr, buf, len, 0);
+}
+
+static inline int dma_memory_write(DeviceState *dev,
+                                   dma_addr_t addr,
+                                   const void *buf,
+                                   dma_addr_t len)
+{
+    return dma_memory_rw(dev, addr, (void *) buf, len, 1);
+}
+
+#endif /* DMA_RW_H */
diff --git a/hw/qdev.h b/hw/qdev.h
index 8a13ec9..3c74d51 100644
--- a/hw/qdev.h
+++ b/hw/qdev.h
@@ -18,6 +18,8 @@ typedef struct BusState BusState;
 
 typedef struct BusInfo BusInfo;
 
+typedef struct DMAMmu DMAMmu;
+
 enum DevState {
     DEV_STATE_CREATED = 1,
     DEV_STATE_INITIALIZED,
@@ -45,6 +47,9 @@ struct DeviceState {
     QLIST_ENTRY(DeviceState) sibling;
     int instance_id_alias;
     int alias_required_for_version;
+#ifdef CONFIG_IOMMU
+    DMAMmu *iommu;
+#endif
 };
 
 typedef void (*bus_dev_printfn)(Monitor *mon, DeviceState *dev, int indent);
-- 
1.7.4.4




reply via email to

[Prev in Thread] Current Thread [Next in Thread]