From: Joao Martins <joao.m.martins@oracle.com>
Add device dirty page tracking start/stop functionality. This uses the
device DMA logging uAPI to start and stop dirty page tracking by device.
Device dirty page tracking is used only if all devices within a
container support device dirty page tracking.
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: Avihai Horon <avihaih@nvidia.com>
---
include/hw/vfio/vfio-common.h | 2 +
hw/vfio/common.c | 211 +++++++++++++++++++++++++++++++++-
2 files changed, 211 insertions(+), 2 deletions(-)
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 6f36876ce0..1f21e1fa43 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -149,6 +149,8 @@ typedef struct VFIODevice {
VFIOMigration *migration;
Error *migration_blocker;
OnOffAuto pre_copy_dirty_page_tracking;
+ bool dirty_pages_supported;
+ bool dirty_tracking;
} VFIODevice;
struct VFIODeviceOps {
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 6041da6c7e..740153e7d7 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -473,6 +473,22 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer
*container)
return true;
}
+static bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container)
+{
+ VFIOGroup *group;
+ VFIODevice *vbasedev;
+
+ QLIST_FOREACH(group, &container->group_list, container_next) {
+ QLIST_FOREACH(vbasedev, &group->device_list, next) {
+ if (!vbasedev->dirty_pages_supported) {
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
/*
* Check if all VFIO devices are running and migration is active, which is
* essentially equivalent to the migration being in pre-copy phase.
@@ -1404,13 +1420,192 @@ static int vfio_set_dirty_page_tracking(VFIOContainer
*container, bool start)
return ret;
}
+static int vfio_devices_dma_logging_set(VFIOContainer *container,
+ struct vfio_device_feature *feature)
+{
+ bool status = (feature->flags & VFIO_DEVICE_FEATURE_MASK) ==
+ VFIO_DEVICE_FEATURE_DMA_LOGGING_START;
+ VFIODevice *vbasedev;
+ VFIOGroup *group;
+ int ret = 0;
+
+ QLIST_FOREACH(group, &container->group_list, container_next) {
+ QLIST_FOREACH(vbasedev, &group->device_list, next) {
+ if (vbasedev->dirty_tracking == status) {
+ continue;
+ }
+
+ ret = ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature);
+ if (ret) {
+ ret = -errno;
+ error_report("%s: Failed to set DMA logging %s, err %d (%s)",
+ vbasedev->name, status ? "start" : "stop", ret,
+ strerror(errno));
+ goto out;
+ }
+ vbasedev->dirty_tracking = status;
+ }
+ }
+
+out:
+ return ret;
+}
+
+static int vfio_devices_dma_logging_stop(VFIOContainer *container)
+{
+ uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature),
+ sizeof(uint64_t))] = {};
+ struct vfio_device_feature *feature = (struct vfio_device_feature *)buf;
+
+ feature->argsz = sizeof(buf);
+ feature->flags = VFIO_DEVICE_FEATURE_SET;
+ feature->flags |= VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP;
+
+ return vfio_devices_dma_logging_set(container, feature);
+}
+
+static gboolean vfio_device_dma_logging_range_add(DMAMap *map, gpointer data)
+{
+ struct vfio_device_feature_dma_logging_range **out = data;
+ struct vfio_device_feature_dma_logging_range *range = *out;
+
+ range->iova = map->iova;
+ /* IOVATree is inclusive, DMA logging uAPI isn't, so add 1 to length */
+ range->length = map->size + 1;
+
+ *out = ++range;
+
+ return false;
+}
+
+static gboolean vfio_iova_tree_get_first(DMAMap *map, gpointer data)
+{
+ DMAMap *first = data;
+
+ first->iova = map->iova;
+ first->size = map->size;
+
+ return true;
+}
+
+static gboolean vfio_iova_tree_get_last(DMAMap *map, gpointer data)
+{
+ DMAMap *last = data;
+
+ last->iova = map->iova;
+ last->size = map->size;
+
+ return false;
+}
+
+static struct vfio_device_feature *
+vfio_device_feature_dma_logging_start_create(VFIOContainer *container)
+{
+ struct vfio_device_feature *feature;
+ size_t feature_size;
+ struct vfio_device_feature_dma_logging_control *control;
+ struct vfio_device_feature_dma_logging_range *ranges;
+ unsigned int max_ranges;
+ unsigned int cur_ranges;
+
+ feature_size = sizeof(struct vfio_device_feature) +
+ sizeof(struct vfio_device_feature_dma_logging_control);
+ feature = g_malloc0(feature_size);
+ feature->argsz = feature_size;
+ feature->flags = VFIO_DEVICE_FEATURE_SET;
+ feature->flags |= VFIO_DEVICE_FEATURE_DMA_LOGGING_START;
+
+ control = (struct vfio_device_feature_dma_logging_control *)feature->data;
+ control->page_size = qemu_real_host_page_size();
+
+ QEMU_LOCK_GUARD(&container->mappings_mutex);
+
+ /*
+ * DMA logging uAPI guarantees to support at least num_ranges that fits
into
+ * a single host kernel page. To be on the safe side, use this as a limit
+ * from which to merge to a single range.
+ */
+ max_ranges = qemu_real_host_page_size() / sizeof(*ranges);
+ cur_ranges = iova_tree_nnodes(container->mappings);
+ control->num_ranges = (cur_ranges <= max_ranges) ? cur_ranges : 1;