qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH] Align file accesses with cache=off (O_DIRECT)


From: Kevin Wolf
Subject: Re: [Qemu-devel] [PATCH] Align file accesses with cache=off (O_DIRECT)
Date: Wed, 30 Apr 2008 11:21:56 +0200
User-agent: Thunderbird 2.0.0.12 (X11/20071114)

Laurent Vivier schrieb:
Hm, yes. We could call raw_pread in raw_aio_read when O_DIRECT is used and the request is not properly aligned. Is this what you meant?

No, it was just a (stupid) comment. I think we must not convert
asynchronous I/O to synchronous I/O.

Why not? If I'm not mistaken (but if you think it's wrong, probably I am) the only possible drawback should be performance. And we're not talking about normal guest IO, these accesses are aligned anyway.

I think we agree that it's mostly item 3 why one would use O_DIRECT with qemu. In terms of reliability, it is important that the data really is written to the disk when the guest OS thinks so. But when for example qemu crashes, I don't think it's too important if 40% or 50% of a snapshot have already been written - it's unusable anyway. A sync afterwards could be enough there.

I don't speak about "qemu crashes" but about "host crashes".

Well, I've never seen a host crash where qemu survived. ;-)

What I wanted to say: If the snapshot is incomplete and not usable anyway, why bother if some bytes more or less have been written?

I'm not in the spirit "my patch is better than yours" (and I don't think
so); but could you try to test my patch ? Because if I remember
correctly I think It manages all cases and this can help you to find a
solution (or perhaps you can add to your patch the part of my patch
about block-qcow2.c)

I really didn't want to say that your code is bad or it wouldn't work or something like that. I just tried it and it works fine as well.

But the approaches are quite different. Your patch makes every user of the block driver functions aware that O_DIRECT might be in effect. My patch tries to do things in one common place, even though possibly at the cost of performance (however, I'm not sure anymore about the bad performance now that I use your fcntl method).

So what I like about my patch is that it is one change in one place which should make everything work. Your patch could be still of use to speed things up, e.g. by making qcow2 aware that there is something like O_DIRECT (would have to do a real benchmark with both patches) and have it align its buffers in the first place.

I'll attach the current version of my patch which emulates AIO through synchronous requests for unaligned buffer. In comparison, with your patch the bootup of my test VM was slightly faster but loading/saving of snapshots was much faster with mine. Perhaps I'll try to combine them to get the best of both.

Kevin
Index: block-raw-posix.c
===================================================================
--- block-raw-posix.c.orig
+++ block-raw-posix.c
@@ -77,6 +77,7 @@
 typedef struct BDRVRawState {
     int fd;
     int type;
+    int flags;
     unsigned int lseek_err_cnt;
 #if defined(__linux__)
     /* linux floppy specific */
@@ -111,6 +112,7 @@ static int raw_open(BlockDriverState *bs
         open_flags |= O_DIRECT;
 #endif
 
+    s->flags = open_flags;
     s->type = FTYPE_FILE;
 
     fd = open(filename, open_flags, 0644);
@@ -141,7 +143,14 @@ static int raw_open(BlockDriverState *bs
 #endif
 */
 
-static int raw_pread(BlockDriverState *bs, int64_t offset,
+/* 
+ * offset and count are in bytes, but must be multiples of 512 for files 
+ * opened with O_DIRECT. buf must be aligned to 512 bytes then.
+ *
+ * This function may be called without alignment if the caller ensures
+ * that O_DIRECT is not in effect.
+ */
+static int raw_pread_aligned(BlockDriverState *bs, int64_t offset,
                      uint8_t *buf, int count)
 {
     BDRVRawState *s = bs->opaque;
@@ -194,7 +203,14 @@ label__raw_read__success:
     return ret;
 }
 
-static int raw_pwrite(BlockDriverState *bs, int64_t offset,
+/* 
+ * offset and count are in bytes, but must be multiples of 512 for files 
+ * opened with O_DIRECT. buf must be aligned to 512 bytes then.
+ *
+ * This function may be called without alignment if the caller ensures
+ * that O_DIRECT is not in effect.
+ */
+static int raw_pwrite_aligned(BlockDriverState *bs, int64_t offset,
                       const uint8_t *buf, int count)
 {
     BDRVRawState *s = bs->opaque;
@@ -230,6 +246,67 @@ label__raw_write__success:
     return ret;
 }
 
+
+#ifdef O_DIRECT
+/* 
+ * offset and count are in bytes and possibly not aligned. For files opened 
+ * with O_DIRECT, necessary alignments are ensured before calling 
+ * raw_pread_aligned to do the actual read.
+ */
+static int raw_pread(BlockDriverState *bs, int64_t offset,
+                     uint8_t *buf, int count)
+{
+    BDRVRawState *s = bs->opaque;
+
+    if (unlikely((s->flags & O_DIRECT) &&
+            (offset % 512 || count % 512 || (uintptr_t) buf % 512))) {
+
+        int ret;
+
+        // Temporarily disable O_DIRECT for unaligned access
+        fcntl(s->fd, F_SETFL, s->flags & ~O_DIRECT);
+        ret = raw_pread_aligned(bs, offset, buf, count);
+        fcntl(s->fd, F_SETFL, s->flags);
+
+        return ret;
+
+    } else {
+        return raw_pread_aligned(bs, offset, buf, count);
+    }
+}
+
+/* 
+ * offset and count are in bytes and possibly not aligned. For files opened 
+ * with O_DIRECT, necessary alignments are ensured before calling 
+ * raw_pwrite_aligned to do the actual write.
+ */
+static int raw_pwrite(BlockDriverState *bs, int64_t offset,
+                      const uint8_t *buf, int count)
+{
+    BDRVRawState *s = bs->opaque;
+
+    if (unlikely((s->flags & O_DIRECT) &&
+            (offset % 512 || count % 512 || (uintptr_t) buf % 512))) {
+
+        int ret;
+
+        // Temporarily disable O_DIRECT for unaligned access
+        fcntl(s->fd, F_SETFL, s->flags & ~O_DIRECT);
+        ret = raw_pwrite_aligned(bs, offset, buf, count);
+        fcntl(s->fd, F_SETFL, s->flags);
+
+        return ret;
+    } else {
+        return raw_pwrite_aligned(bs, offset, buf, count);
+    }
+}
+
+#else
+#define raw_pread raw_pread_aligned
+#define raw_pwrite raw_pwrite_aligned
+#endif
+
+
 /***********************************************************/
 /* Unix AIO using POSIX AIO */
 
@@ -402,10 +479,26 @@ static BlockDriverAIOCB *raw_aio_read(Bl
         BlockDriverCompletionFunc *cb, void *opaque)
 {
     RawAIOCB *acb;
+    BDRVRawState *s = bs->opaque;
+
+    /* 
+     * If O_DIRECT is used and the buffer is not aligned fall back
+     * to synchronous IO.
+     */
+    if (unlikely((s->flags & O_DIRECT) && ((uintptr_t) buf % 512))) {
+        int ret;
+
+        acb = qemu_aio_get(bs, cb, opaque);
+        ret = raw_pread(bs, 512 * sector_num, buf, 512 * nb_sectors);
+        acb->common.cb(acb->common.opaque, ret);
+        qemu_aio_release(acb);
+        return &acb->common;
+    }
 
     acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque);
     if (!acb)
         return NULL;
+
     if (aio_read(&acb->aiocb) < 0) {
         qemu_aio_release(acb);
         return NULL;
@@ -418,6 +511,21 @@ static BlockDriverAIOCB *raw_aio_write(B
         BlockDriverCompletionFunc *cb, void *opaque)
 {
     RawAIOCB *acb;
+    BDRVRawState *s = bs->opaque;
+
+    /* 
+     * If O_DIRECT is used and the buffer is not aligned fall back
+     * to synchronous IO.
+     */
+    if (unlikely((s->flags & O_DIRECT) && ((uintptr_t) buf % 512))) {
+        int ret;
+
+        acb = qemu_aio_get(bs, cb, opaque);
+        ret = raw_pwrite(bs, 512 * sector_num, buf, 512 * nb_sectors);
+        acb->common.cb(acb->common.opaque, ret);
+        qemu_aio_release(acb);
+        return &acb->common;
+    }
 
     acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque);
     if (!acb)

reply via email to

[Prev in Thread] Current Thread [Next in Thread]