qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [RFC 2/2] vhost-user-fs: Implement stateful migration


From: Anton Kuchin
Subject: Re: [RFC 2/2] vhost-user-fs: Implement stateful migration
Date: Fri, 17 Mar 2023 19:19:46 +0200
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Thunderbird/102.7.1

On 13/03/2023 19:48, Hanna Czenczek wrote:
A virtio-fs device's VM state consists of:
- the virtio device (vring) state (VMSTATE_VIRTIO_DEVICE)
- the back-end's (virtiofsd's) internal state

We get/set the latter via the new vhost-user operations FS_SET_STATE_FD,
FS_GET_STATE, and FS_SET_STATE.

Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
---
  hw/virtio/vhost-user-fs.c | 171 +++++++++++++++++++++++++++++++++++++-
  1 file changed, 170 insertions(+), 1 deletion(-)

diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c
index 83fc20e49e..df1fb02acc 100644
--- a/hw/virtio/vhost-user-fs.c
+++ b/hw/virtio/vhost-user-fs.c
@@ -20,8 +20,10 @@
  #include "hw/virtio/virtio-bus.h"
  #include "hw/virtio/virtio-access.h"
  #include "qemu/error-report.h"
+#include "qemu/memfd.h"
  #include "hw/virtio/vhost.h"
  #include "hw/virtio/vhost-user-fs.h"
+#include "migration/qemu-file-types.h"
  #include "monitor/monitor.h"
  #include "sysemu/sysemu.h"
@@ -298,9 +300,176 @@ static struct vhost_dev *vuf_get_vhost(VirtIODevice *vdev)
      return &fs->vhost_dev;
  }
+/**
+ * Fetch the internal state from the back-end (virtiofsd) and save it
+ * to `f`.
+ */
+static int vuf_save_state(QEMUFile *f, void *pv, size_t size,
+                          const VMStateField *field, JSONWriter *vmdesc)
+{
+    VirtIODevice *vdev = pv;
+    VHostUserFS *fs = VHOST_USER_FS(vdev);
+    int memfd = -1;
+    /* Size of the shared memory through which to transfer the state */
+    const size_t chunk_size = 4 * 1024 * 1024;
+    size_t state_offset;
+    ssize_t remaining;
+    void *shm_buf;
+    Error *local_err = NULL;
+    int ret, ret2;
+
+    /* Set up shared memory through which to receive the state from virtiofsd 
*/
+    shm_buf = qemu_memfd_alloc("vhost-fs-state", chunk_size,
+                               F_SEAL_SEAL | F_SEAL_SHRINK | F_SEAL_GROW,
+                               &memfd, &local_err);
+    if (!shm_buf) {
+        error_report_err(local_err);
+        ret = -ENOMEM;
+        goto early_fail;
+    }
+
+    /* Share the SHM area with virtiofsd */
+    ret = vhost_fs_set_state_fd(&fs->vhost_dev, memfd, chunk_size);
+    if (ret < 0) {
+        goto early_fail;

Don't we need some log message here too?

+    }
+
+    /* Receive the virtiofsd state in chunks, and write them to `f` */
+    state_offset = 0;
+    do {
+        size_t this_chunk_size;
+
+        remaining = vhost_fs_get_state(&fs->vhost_dev, state_offset,
+                                       chunk_size);
+        if (remaining < 0) {
+            ret = remaining;
+            goto fail;
+        }
+
+        /* Prefix the whole state by its total length */
+        if (state_offset == 0) {
+            qemu_put_be64(f, remaining);
+        }
+
+        this_chunk_size = MIN(remaining, chunk_size);
+        qemu_put_buffer(f, shm_buf, this_chunk_size);
+        state_offset += this_chunk_size;
+    } while (remaining >= chunk_size);
+
+    ret = 0;
+fail:
+    /* Have virtiofsd close the shared memory */
+    ret2 = vhost_fs_set_state_fd(&fs->vhost_dev, -1, 0);
+    if (ret2 < 0) {
+        error_report("Failed to remove state FD from the vhost-user-fs back "
+                     "end: %s", strerror(-ret));
+        if (ret == 0) {
+            ret = ret2;
+        }
+    }
+
+early_fail:
+    if (shm_buf) {
+        qemu_memfd_free(shm_buf, chunk_size, memfd);
+    }
+
+    return ret;
+}
+
+/**
+ * Load the back-end's (virtiofsd's) internal state from `f` and send
+ * it over to that back-end.
+ */
+static int vuf_load_state(QEMUFile *f, void *pv, size_t size,
+                          const VMStateField *field)
+{
+    VirtIODevice *vdev = pv;
+    VHostUserFS *fs = VHOST_USER_FS(vdev);
+    int memfd = -1;
+    /* Size of the shared memory through which to transfer the state */
+    const size_t chunk_size = 4 * 1024 * 1024;
+    size_t state_offset;
+    uint64_t remaining;
+    void *shm_buf;
+    Error *local_err = NULL;
+    int ret, ret2;
+
+    /* The state is prefixed by its total length, read that first */
+    remaining = qemu_get_be64(f);
+
+    /* Set up shared memory through which to send the state to virtiofsd */
+    shm_buf = qemu_memfd_alloc("vhost-fs-state", chunk_size,
+                               F_SEAL_SEAL | F_SEAL_SHRINK | F_SEAL_GROW,
+                               &memfd, &local_err);
+    if (!shm_buf) {
+        error_report_err(local_err);
+        ret = -ENOMEM;
+        goto early_fail;
+    }
+
+    /* Share the SHM area with virtiofsd */
+    ret = vhost_fs_set_state_fd(&fs->vhost_dev, memfd, chunk_size);
+    if (ret < 0) {
+        goto early_fail;
+    }
+
+    /*
+     * Read the virtiofsd state in chunks from `f`, and send them over
+     * to virtiofsd
+     */
+    state_offset = 0;
+    do {
+        size_t this_chunk_size = MIN(remaining, chunk_size);
+
+        if (qemu_get_buffer(f, shm_buf, this_chunk_size) < this_chunk_size) {
+            ret = -EINVAL;
+            goto fail;
+        }
+
+        ret = vhost_fs_set_state(&fs->vhost_dev, state_offset, 
this_chunk_size);
+        if (ret < 0) {
+            goto fail;
+        }
+
+        state_offset += this_chunk_size;
+        remaining -= this_chunk_size;
+    } while (remaining > 0);
+
+    ret = 0;
+fail:
+    ret2 = vhost_fs_set_state_fd(&fs->vhost_dev, -1, 0);
+    if (ret2 < 0) {
+        error_report("Failed to remove state FD from the vhost-user-fs back "
+                     "end -- perhaps it failed to deserialize/apply the state: 
"
+                     "%s", strerror(-ret2));
+        if (ret == 0) {
+            ret = ret2;
+        }
+    }
+
+early_fail:
+    if (shm_buf) {
+        qemu_memfd_free(shm_buf, chunk_size, memfd);
+    }
+
+    return ret;
+}
+
  static const VMStateDescription vuf_vmstate = {
      .name = "vhost-user-fs",
-    .unmigratable = 1,
+    .version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_VIRTIO_DEVICE,
+        {
+            .name = "back-end",
+            .info = &(const VMStateInfo) {
+                .name = "virtio-fs back-end state",
+                .get = vuf_load_state,
+                .put = vuf_save_state,
+            },
+        },

I've been working on stateless migration patch [1] and there was discussed that we need to keep some kind of blocker by default if orchestrators rely on unmigratable
field in virtio-fs vmstate to block the migration.
For this purpose I've implemented flag that selects "none" or "external" and is checked
in pre_save, so it could be extended with "internal" option.
We didn't come to conclusion if we also need to check incoming migration, the discussion
has stopped for a while but I'm going back to it now.

I would appreciate if you have time to take a look at the discussion and consider the idea proposed there to store internal state as a subsection of vmstate to make it as an option
but not mandatory.

[1] https://patchew.org/QEMU/20230217170038.1273710-1-antonkuchin@yandex-team.ru/

+        VMSTATE_END_OF_LIST()
+    },
  };
static Property vuf_properties[] = {



reply via email to

[Prev in Thread] Current Thread [Next in Thread]