qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH 1/2] hw/9pfs: Add new virtfs option cache=writet


From: Aneesh Kumar K.V
Subject: Re: [Qemu-devel] [PATCH 1/2] hw/9pfs: Add new virtfs option cache=writethrough to skip host page cache
Date: Wed, 12 Oct 2011 20:45:31 +0530
User-agent: Notmuch/0.9_rc1-42-g76fdca8 (http://notmuchmail.org) Emacs/23.3.1 (x86_64-pc-linux-gnu)

On Wed, 12 Oct 2011 15:32:36 +0100, Stefan Hajnoczi <address@hidden> wrote:
> On Wed, Oct 12, 2011 at 2:19 PM, Aneesh Kumar K.V
> <address@hidden> wrote:
> > On Wed, 12 Oct 2011 10:55:21 +0100, Stefan Hajnoczi <address@hidden> wrote:
> >> On Mon, Oct 10, 2011 at 10:06 AM, Aneesh Kumar K.V
> >> <address@hidden> wrote:
> >> > diff --git a/hw/9pfs/virtio-9p-handle.c b/hw/9pfs/virtio-9p-handle.c
> >> > index 5c8b5ed..441a37f 100644
> >> > --- a/hw/9pfs/virtio-9p-handle.c
> >> > +++ b/hw/9pfs/virtio-9p-handle.c
> >> > @@ -202,6 +202,15 @@ static ssize_t handle_pwritev(FsContext *ctx, int 
> >> > fd, const struct iovec *iov,
> >> >         return writev(fd, iov, iovcnt);
> >>
> >> The sync_file_range(2) call below is dead-code since we'll return
> >> immediately after writev(2) completes.  The writev(2) return value
> >> needs to be saved temporarily and then returned after
> >> sync_file_range(2).
> >
> > Missed that. Will fix in the next update
> >
> >>
> >> >     }
> >> >  #endif
> >> > +    if (ctx->cache_flags & V9FS_WRITETHROUGH_CACHE) {
> >>
> >> -drive cache=writethrough means something different from 9pfs
> >> "writethrough".  This is confusing so I wonder if there is a better
> >> name like immediate write-out.
> >>
> >
> > cache=immediate-write-out ?
> >
> >> > +        /*
> >> > +         * Initiate a writeback. This is not a data integrity sync.
> >> > +         * We want to ensure that we don't leave dirty pages in the 
> >> > cache
> >> > +         * after write when cache=writethrough is sepcified.
> >> > +         */
> >> > +        sync_file_range(fd, offset, 0,
> >> > +                        SYNC_FILE_RANGE_WAIT_BEFORE | 
> >> > SYNC_FILE_RANGE_WRITE);
> >> > +    }
> >>
> >> I'm not sure whether SYNC_FILE_RANGE_WAIT_BEFORE is necessary.  As a
> >> best-effort mechanism just SYNC_FILE_RANGE_WRITE does the job although
> >> a client that rapidly rewrites may be able to leave dirty pages in the
> >> host page cache
> >
> > Shouldn't we prevent this ? That is the reason for me to use that
> > WAIT_BEFORE ?
> 
> The flag will cause sync_file_range(2) to wait on in-flight I/O.  The
> guest will notice slow I/O.  You should at least specify a range
> instead of nbytes=0 in the arguments.

version 3 i have 

commit db3cb2ac561d837176f9e0d02075a898c57ad309
Author: Aneesh Kumar K.V <address@hidden>
Date:   Wed Oct 12 19:11:23 2011 +0530

    hw/9pfs: Add new virtfs option writeout=immediate skip host page cache
    
    writeout=immediate implies the after pwritev we do a sync_file_range.
    
    Signed-off-by: Aneesh Kumar K.V <address@hidden>

diff --git a/fsdev/file-op-9p.h b/fsdev/file-op-9p.h
index 8de8abf..af3ecbe 100644
--- a/fsdev/file-op-9p.h
+++ b/fsdev/file-op-9p.h
@@ -53,12 +53,16 @@ struct xattr_operations;
 /* FsContext flag values */
 #define PATHNAME_FSCONTEXT 0x1
 
+/* export flags */
+#define V9FS_IMMEDIATE_WRITEOUT 0x1
+
 typedef struct FsContext
 {
     int flags;
     char *fs_root;
     SecModel fs_sm;
     uid_t uid;
+    int export_flags;
     struct xattr_operations **xops;
     /* fs driver specific data */
     void *private;
diff --git a/fsdev/qemu-fsdev.c b/fsdev/qemu-fsdev.c
index 768819f..946bad7 100644
--- a/fsdev/qemu-fsdev.c
+++ b/fsdev/qemu-fsdev.c
@@ -34,6 +34,8 @@ int qemu_fsdev_add(QemuOpts *opts)
     const char *fstype = qemu_opt_get(opts, "fstype");
     const char *path = qemu_opt_get(opts, "path");
     const char *sec_model = qemu_opt_get(opts, "security_model");
+    const char *writeout = qemu_opt_get(opts, "writeout");
+
 
     if (!fsdev_id) {
         fprintf(stderr, "fsdev: No id specified\n");
@@ -72,10 +74,14 @@ int qemu_fsdev_add(QemuOpts *opts)
     fsle->fse.path = g_strdup(path);
     fsle->fse.security_model = g_strdup(sec_model);
     fsle->fse.ops = FsTypes[i].ops;
-
+    fsle->fse.export_flags = 0;
+    if (writeout) {
+        if (!strcmp(writeout, "immediate")) {
+            fsle->fse.export_flags = V9FS_IMMEDIATE_WRITEOUT;
+        }
+    }
     QTAILQ_INSERT_TAIL(&fstype_entries, fsle, next);
     return 0;
-
 }
 
 FsTypeEntry *get_fsdev_fsentry(char *id)
diff --git a/fsdev/qemu-fsdev.h b/fsdev/qemu-fsdev.h
index e04931a..3b2feb4 100644
--- a/fsdev/qemu-fsdev.h
+++ b/fsdev/qemu-fsdev.h
@@ -41,6 +41,7 @@ typedef struct FsTypeEntry {
     char *fsdev_id;
     char *path;
     char *security_model;
+    int export_flags;
     FileOperations *ops;
 } FsTypeEntry;
 
diff --git a/hw/9pfs/virtio-9p-device.c b/hw/9pfs/virtio-9p-device.c
index e5b68da..403eed0 100644
--- a/hw/9pfs/virtio-9p-device.c
+++ b/hw/9pfs/virtio-9p-device.c
@@ -115,6 +115,7 @@ VirtIODevice *virtio_9p_init(DeviceState *dev, V9fsConf 
*conf)
         exit(1);
     }
 
+    s->ctx.export_flags = fse->export_flags;
     s->ctx.fs_root = g_strdup(fse->path);
     len = strlen(conf->tag);
     if (len > MAX_TAG_LEN) {
diff --git a/hw/9pfs/virtio-9p-handle.c b/hw/9pfs/virtio-9p-handle.c
index 5c8b5ed..91d757a 100644
--- a/hw/9pfs/virtio-9p-handle.c
+++ b/hw/9pfs/virtio-9p-handle.c
@@ -192,16 +192,27 @@ static ssize_t handle_preadv(FsContext *ctx, int fd, 
const struct iovec *iov,
 static ssize_t handle_pwritev(FsContext *ctx, int fd, const struct iovec *iov,
                               int iovcnt, off_t offset)
 {
+    ssize_t ret;
 #ifdef CONFIG_PREADV
-    return pwritev(fd, iov, iovcnt, offset);
+    ret = pwritev(fd, iov, iovcnt, offset);
 #else
     int err = lseek(fd, offset, SEEK_SET);
     if (err == -1) {
         return err;
     } else {
-        return writev(fd, iov, iovcnt);
+        ret = writev(fd, iov, iovcnt);
     }
 #endif
+    if (ret > 0 && ctx->export_flags & V9FS_IMMEDIATE_WRITEOUT) {
+        /*
+         * Initiate a writeback. This is not a data integrity sync.
+         * We want to ensure that we don't leave dirty pages in the cache
+         * after write when writeout=immediate is sepcified.
+         */
+        sync_file_range(fd, offset, ret,
+                        SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE);
+    }
+    return ret;
 }
 
 static int handle_chmod(FsContext *fs_ctx, V9fsPath *fs_path, FsCred *credp)
diff --git a/hw/9pfs/virtio-9p-local.c b/hw/9pfs/virtio-9p-local.c
index 9559ff6..7f4681a 100644
--- a/hw/9pfs/virtio-9p-local.c
+++ b/hw/9pfs/virtio-9p-local.c
@@ -203,16 +203,28 @@ static ssize_t local_preadv(FsContext *ctx, int fd, const 
struct iovec *iov,
 static ssize_t local_pwritev(FsContext *ctx, int fd, const struct iovec *iov,
                              int iovcnt, off_t offset)
 {
+    ssize_t ret
+;
 #ifdef CONFIG_PREADV
-    return pwritev(fd, iov, iovcnt, offset);
+    ret = pwritev(fd, iov, iovcnt, offset);
 #else
     int err = lseek(fd, offset, SEEK_SET);
     if (err == -1) {
         return err;
     } else {
-        return writev(fd, iov, iovcnt);
+        ret = writev(fd, iov, iovcnt);
     }
 #endif
+    if (ret > 0 && ctx->export_flags & V9FS_IMMEDIATE_WRITEOUT) {
+        /*
+         * Initiate a writeback. This is not a data integrity sync.
+         * We want to ensure that we don't leave dirty pages in the cache
+         * after write when writeout=immediate is sepcified.
+         */
+        sync_file_range(fd, offset, ret,
+                        SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE);
+    }
+    return ret;
 }
 
 static int local_chmod(FsContext *fs_ctx, V9fsPath *fs_path, FsCred *credp)
diff --git a/hw/9pfs/virtio-9p.c b/hw/9pfs/virtio-9p.c
index c01c31a..3958788 100644
--- a/hw/9pfs/virtio-9p.c
+++ b/hw/9pfs/virtio-9p.c
@@ -80,6 +80,20 @@ void cred_init(FsCred *credp)
     credp->fc_rdev = -1;
 }
 
+static int get_dotl_openflags(V9fsState *s, int oflags)
+{
+    int flags;
+    /*
+     * Filter the client open flags
+     */
+    flags = oflags & ~(O_NOCTTY | O_ASYNC | O_CREAT);
+    /*
+     * Ignore direct disk access hint until the server supports it.
+     */
+    flags &= ~O_DIRECT;
+    return flags;
+}
+
 void v9fs_string_init(V9fsString *str)
 {
     str->data = NULL;
@@ -1598,10 +1612,7 @@ static void v9fs_open(void *opaque)
         err = offset;
     } else {
         if (s->proto_version == V9FS_PROTO_2000L) {
-            flags = mode;
-            flags &= ~(O_NOCTTY | O_ASYNC | O_CREAT);
-            /* Ignore direct disk access hint until the server supports it. */
-            flags &= ~O_DIRECT;
+            flags = get_dotl_openflags(s, mode);
         } else {
             flags = omode_to_uflags(mode);
         }
@@ -1650,8 +1661,7 @@ static void v9fs_lcreate(void *opaque)
         goto out_nofid;
     }
 
-    /* Ignore direct disk access hint until the server supports it. */
-    flags &= ~O_DIRECT;
+    flags = get_dotl_openflags(pdu->s, flags);
     err = v9fs_co_open2(pdu, fidp, &name, gid,
                         flags | O_CREAT, mode, &stbuf);
     if (err < 0) {
diff --git a/qemu-config.c b/qemu-config.c
index 7a7854f..4559236 100644
--- a/qemu-config.c
+++ b/qemu-config.c
@@ -177,6 +177,9 @@ QemuOptsList qemu_fsdev_opts = {
         }, {
             .name = "security_model",
             .type = QEMU_OPT_STRING,
+        }, {
+            .name = "writeout",
+            .type = QEMU_OPT_STRING,
         },
         { /*End of list */ }
     },
@@ -199,6 +202,9 @@ QemuOptsList qemu_virtfs_opts = {
         }, {
             .name = "security_model",
             .type = QEMU_OPT_STRING,
+        }, {
+            .name = "writeout",
+            .type = QEMU_OPT_STRING,
         },
 
         { /*End of list */ }
diff --git a/qemu-options.hx b/qemu-options.hx
index dfbabd0..20fd7b5 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -525,7 +525,8 @@ ETEXI
 DEFHEADING(File system options:)
 
 DEF("fsdev", HAS_ARG, QEMU_OPTION_fsdev,
-    "-fsdev local,id=id,path=path,security_model=[mapped|passthrough|none]\n",
+    "-fsdev local,id=id,path=path,security_model=[mapped|passthrough|none]\n"
+    "       [,writeout=immediate]\n",
     QEMU_ARCH_ALL)
 
 STEXI
@@ -541,7 +542,7 @@ The specific Fstype will determine the applicable options.
 
 Options to each backend are described below.
 
address@hidden -fsdev local ,address@hidden ,address@hidden ,address@hidden
address@hidden -fsdev local ,address@hidden ,address@hidden 
,address@hidden,address@hidden
 
 Create a file-system-"device" for local-filesystem.
 
@@ -552,13 +553,17 @@ Create a file-system-"device" for local-filesystem.
 @option{security_model} specifies the security model to be followed.
 @option{security_model} is required.
 
address@hidden specifies whether to skip the host page cache.
address@hidden is an optional argument.
+
 @end table
 ETEXI
 
 DEFHEADING(Virtual File system pass-through options:)
 
 DEF("virtfs", HAS_ARG, QEMU_OPTION_virtfs,
-    "-virtfs 
local,path=path,mount_tag=tag,security_model=[mapped|passthrough|none]\n",
+    "-virtfs 
local,path=path,mount_tag=tag,security_model=[mapped|passthrough|none]\n"
+    "        [,writeout=immediate]\n",
     QEMU_ARCH_ALL)
 
 STEXI
@@ -574,7 +579,7 @@ The specific Fstype will determine the applicable options.
 
 Options to each backend are described below.
 
address@hidden -virtfs local ,address@hidden ,address@hidden ,address@hidden
address@hidden -virtfs local ,address@hidden ,address@hidden 
,address@hidden,address@hidden
 
 Create a Virtual file-system-pass through for local-filesystem.
 
@@ -585,10 +590,12 @@ Create a Virtual file-system-pass through for 
local-filesystem.
 @option{security_model} specifies the security model to be followed.
 @option{security_model} is required.
 
-
 @option{mount_tag} specifies the tag with which the exported file is mounted.
 @option{mount_tag} is required.
 
address@hidden specifies whether to skip the host page cache.
address@hidden is an optional argument.
+
 @end table
 ETEXI
 
diff --git a/vl.c b/vl.c
index dbf7778..08e1a95 100644
--- a/vl.c
+++ b/vl.c
@@ -2785,6 +2785,7 @@ int main(int argc, char **argv, char **envp)
             case QEMU_OPTION_virtfs: {
                 QemuOpts *fsdev;
                 QemuOpts *device;
+                const char *writeout;
 
                 olist = qemu_find_opts("virtfs");
                 if (!olist) {
@@ -2814,6 +2815,11 @@ int main(int argc, char **argv, char **envp)
                             qemu_opt_get(opts, "mount_tag"));
                     exit(1);
                 }
+
+                writeout = qemu_opt_get(opts, "writeout");
+                if (writeout) {
+                    qemu_opt_set(fsdev, "writeout", writeout);
+                }
                 qemu_opt_set(fsdev, "fstype", qemu_opt_get(opts, "fstype"));
                 qemu_opt_set(fsdev, "path", qemu_opt_get(opts, "path"));
                 qemu_opt_set(fsdev, "security_model",




reply via email to

[Prev in Thread] Current Thread [Next in Thread]