qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [RESEND][PATCH 0/3] Fix guest time drift under heavy lo


From: Anthony Liguori
Subject: Re: [Qemu-devel] [RESEND][PATCH 0/3] Fix guest time drift under heavy load.
Date: Tue, 11 Nov 2008 14:17:49 -0600
User-agent: Thunderbird 2.0.0.17 (X11/20080925)

Gleb Natapov wrote:
On Mon, Nov 10, 2008 at 09:46:12AM -0600, Anthony Liguori wrote:
-usbdevice tablet has nothing to do with it. Qemu misses interrupt even
without this option and with SDL screen it misses them in bunches when
SDL redraws a screen. In case of vnc qemu misses interrupt because of
fsync() call in raw_flush(), or so my instrumentation shows.

Can you give this patch a spin?

This introduces a bdrv_aio_flush() which will wait for all existing AIO operations to complete before indicating completion. It also fixes up IDE. Fixing up SCSI will be a little more tricky but not much. Since we now use O_DSYNC, it's unnecessary to do an fsync (or an fdatasync).

Assuming you're using IDE, this should eliminate any delays from fsync. SDL delays are unavoidable because it's going to come down to SDL doing sychronous updates to the X server. The proper long term solution here would be to put SDL in it's own thread but I'm not too worried about this case. It's a good argument for timer fixups but I think it's important to identify all the reasons we're losing ticks and if we can do something about it, we should.

Regards,

Anthony Liguori

--
                        Gleb.

diff --git a/block-qcow.c b/block-qcow.c
index 1fecf30..ed7a62d 100644
--- a/block-qcow.c
+++ b/block-qcow.c
@@ -712,6 +712,13 @@ static BlockDriverAIOCB *qcow_aio_write(BlockDriverState 
*bs,
     return &acb->common;
 }
 
+static BlockDriverAIOCB *qcow_aio_flush(BlockDriverState *bs,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    BDRVQcowState *s = bs->opaque;
+    return bdrv_aio_flush(s->hd, cb, opaque);
+}
+
 static void qcow_aio_cancel(BlockDriverAIOCB *blockacb)
 {
     QCowAIOCB *acb = (QCowAIOCB *)blockacb;
@@ -868,12 +875,6 @@ static int qcow_write_compressed(BlockDriverState *bs, 
int64_t sector_num,
     return 0;
 }
 
-static void qcow_flush(BlockDriverState *bs)
-{
-    BDRVQcowState *s = bs->opaque;
-    bdrv_flush(s->hd);
-}
-
 static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
 {
     BDRVQcowState *s = bs->opaque;
@@ -890,13 +891,14 @@ BlockDriver bdrv_qcow = {
     NULL,
     qcow_close,
     qcow_create,
-    qcow_flush,
+    NULL,
     qcow_is_allocated,
     qcow_set_key,
     qcow_make_empty,
 
     .bdrv_aio_read = qcow_aio_read,
     .bdrv_aio_write = qcow_aio_write,
+    .bdrv_aio_flush = qcow_aio_flush,
     .bdrv_aio_cancel = qcow_aio_cancel,
     .aiocb_size = sizeof(QCowAIOCB),
     .bdrv_write_compressed = qcow_write_compressed,
diff --git a/block-qcow2.c b/block-qcow2.c
index dc73769..69523a5 100644
--- a/block-qcow2.c
+++ b/block-qcow2.c
@@ -1401,6 +1401,13 @@ static BlockDriverAIOCB *qcow_aio_write(BlockDriverState 
*bs,
     return &acb->common;
 }
 
+static BlockDriverAIOCB *qcow_aio_flush(BlockDriverState *bs,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    BDRVQcowState *s = bs->opaque;
+    return bdrv_aio_flush(s->hd, cb, opaque);
+}
+
 static void qcow_aio_cancel(BlockDriverAIOCB *blockacb)
 {
     QCowAIOCB *acb = (QCowAIOCB *)blockacb;
@@ -1632,12 +1639,6 @@ static int qcow_write_compressed(BlockDriverState *bs, 
int64_t sector_num,
     return 0;
 }
 
-static void qcow_flush(BlockDriverState *bs)
-{
-    BDRVQcowState *s = bs->opaque;
-    bdrv_flush(s->hd);
-}
-
 static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
 {
     BDRVQcowState *s = bs->opaque;
@@ -2637,13 +2638,14 @@ BlockDriver bdrv_qcow2 = {
     NULL,
     qcow_close,
     qcow_create,
-    qcow_flush,
+    NULL,
     qcow_is_allocated,
     qcow_set_key,
     qcow_make_empty,
 
     .bdrv_aio_read = qcow_aio_read,
     .bdrv_aio_write = qcow_aio_write,
+    .bdrv_aio_flush = qcow_aio_flush,
     .bdrv_aio_cancel = qcow_aio_cancel,
     .aiocb_size = sizeof(QCowAIOCB),
     .bdrv_write_compressed = qcow_write_compressed,
diff --git a/block-raw-posix.c b/block-raw-posix.c
index c06e38d..8cc1dba 100644
--- a/block-raw-posix.c
+++ b/block-raw-posix.c
@@ -449,6 +449,7 @@ typedef struct RawAIOCB {
     int fd;
     struct aiocb aiocb;
     struct RawAIOCB *next;
+    uint64_t flush_generation;
     int ret;
 } RawAIOCB;
 
@@ -456,24 +457,46 @@ typedef struct PosixAioState
 {
     int rfd, wfd;
     RawAIOCB *first_aio;
+    RawAIOCB *first_aio_flush;
+    uint64_t flush_generation;
 } PosixAioState;
 
 static int raw_fd_pool_get(BDRVRawState *s)
 {
     int i;
 
-    for (i = 0; i < RAW_FD_POOL_SIZE; i++) {
-        /* already in use */
-        if (s->fd_pool[i] != -1)
-            continue;
+    if (s->fd_inuse == 0) {
+        s->fd_inuse++;
+        return s->fd;
+    }
 
-        /* try to dup file descriptor */
-        s->fd_pool[i] = dup(s->fd);
-        if (s->fd_pool[i] != -1)
+    /* try to find an unused fd that has
+     * already been opened */
+    for (i = 0; i < RAW_FD_POOL_SIZE; i++) {
+        if (s->fd_pool_inuse[i] == 0 &&
+            s->fd_pool[i] != -1) {
+            s->fd_pool_inuse[i]++;
             return s->fd_pool[i];
+        }
+    }
+
+    /* try to find an unused slot that we can
+     * dup the main fd from */
+    for (i = 0; i < RAW_FD_POOL_SIZE; I++) {
+        if (s->fd_pool_inuse[i] == 0 &&
+            s->fd_pool[i] == -1) {
+            s->fd_pool[i] = dup(s->fd);
+            if (s->fd_pool[i] != -1) {
+                s->fd_pool_inuse[i]++;
+                return s->fd_pool[i];
+            }
+        }
     }
 
-    /* we couldn't dup the file descriptor so just use the main one */
+    /* Could not find an fd in the pool,
+     * use the main fd */
+    s->fd_inuse++;
+
     return s->fd;
 }
 
@@ -482,10 +505,15 @@ static void raw_fd_pool_put(RawAIOCB *acb)
     BDRVRawState *s = acb->common.bs->opaque;
     int i;
 
+    if (s->fd == acb->fd) {
+        s->fd_inuse--;
+        return;
+    }
+
     for (i = 0; i < RAW_FD_POOL_SIZE; i++) {
-        if (s->fd_pool[i] == acb->fd) {
-            close(s->fd_pool[i]);
-            s->fd_pool[i] = -1;
+        if (s->fd_pool_inuse[i] > 0 &&
+            s->fd_pool[i] == acb->fd) {
+            s->fd_pool_inuse[i]--;
         }
     }
 }
@@ -496,6 +524,7 @@ static void posix_aio_read(void *opaque)
     RawAIOCB *acb, **pacb;
     int ret;
     ssize_t len;
+    uint64_t min_generation = ~0ULL;
 
     do {
         char byte;
@@ -538,11 +567,29 @@ static void posix_aio_read(void *opaque)
                 qemu_aio_release(acb);
                 break;
             } else {
+                min_generation = MIN(min_generation,
+                                     acb->flush_generation);
                 pacb = &acb->next;
             }
         }
     }
  the_end: ;
+    pacb = &s->first_aio_flush;
+
+    while (*pacb) {
+        acb = *pacb;
+        if (acb->flush_generation < min_generation)
+            break;
+        else
+            pacb = &acb->next;
+    }
+
+    while (*pacb) {
+        acb = *pacb;
+        *pacb = acb->next;
+        acb->common.cb(acb->common.opaque, 0);
+        qemu_aio_release(acb);
+    }
 }
 
 static int posix_aio_flush(void *opaque)
@@ -595,6 +642,8 @@ static int posix_aio_init(void)
 
     qemu_aio_set_fd_handler(s->rfd, posix_aio_read, NULL, posix_aio_flush, s);
 
+    s->flush_generation = 0;
+
 #if defined(__linux__)
     {
         struct aioinit ai;
@@ -641,6 +690,7 @@ static RawAIOCB *raw_aio_setup(BlockDriverState *bs,
     else
         acb->aiocb.aio_nbytes = nb_sectors * 512;
     acb->aiocb.aio_offset = sector_num * 512;
+    acb->flush_generation = posix_aio_state->flush_generation;
     acb->next = posix_aio_state->first_aio;
     posix_aio_state->first_aio = acb;
     return acb;
@@ -715,6 +765,25 @@ static BlockDriverAIOCB *raw_aio_write(BlockDriverState 
*bs,
     return &acb->common;
 }
 
+static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    BDRVRawState *s = bs->opaque;
+    RawAIOCB *acb;
+
+    acb = qemu_aio_get(bs, cb, opaque);
+    if (!acb)
+        return NULL;
+
+    posix_aio_state->flush_generation++;
+    acb->flush_generation = posix_aio_state->flush_generation;
+
+    acb->next = posix_aio_state->first_aio_flush;
+    posix_aio_state->first_aio_flush = acb;
+
+    return &acb->common;
+}
+
 static void raw_aio_cancel(BlockDriverAIOCB *blockacb)
 {
     int ret;
@@ -889,6 +958,7 @@ BlockDriver bdrv_raw = {
 #ifdef CONFIG_AIO
     .bdrv_aio_read = raw_aio_read,
     .bdrv_aio_write = raw_aio_write,
+    .bdrv_aio_flush = raw_aio_flush,
     .bdrv_aio_cancel = raw_aio_cancel,
     .aiocb_size = sizeof(RawAIOCB),
 #endif
diff --git a/block.c b/block.c
index 0f1734e..110bbe3 100644
--- a/block.c
+++ b/block.c
@@ -48,6 +48,8 @@ static BlockDriverAIOCB *bdrv_aio_read_em(BlockDriverState 
*bs,
 static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs,
         int64_t sector_num, const uint8_t *buf, int nb_sectors,
         BlockDriverCompletionFunc *cb, void *opaque);
+static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
+        BlockDriverCompletionFunc *cb, void *opaque);
 static void bdrv_aio_cancel_em(BlockDriverAIOCB *acb);
 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
                         uint8_t *buf, int nb_sectors);
@@ -129,6 +131,7 @@ static void bdrv_register(BlockDriver *bdrv)
         /* add AIO emulation layer */
         bdrv->bdrv_aio_read = bdrv_aio_read_em;
         bdrv->bdrv_aio_write = bdrv_aio_write_em;
+        bdrv->bdrv_aio_flush = bdrv_aio_flush_em;
         bdrv->bdrv_aio_cancel = bdrv_aio_cancel_em;
         bdrv->aiocb_size = sizeof(BlockDriverAIOCBSync);
     } else if (!bdrv->bdrv_read && !bdrv->bdrv_pread) {
@@ -1168,6 +1171,19 @@ BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, 
int64_t sector_num,
     return ret;
 }
 
+BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
+                                 BlockDriverCompletionFunc *cb, void *opaque)
+{
+    BlockDriver *drv = bs->drv;
+
+    if (!drv)
+        return NULL;
+    if (bs->read_only)
+        return NULL;
+
+    return drv->bdrv_aio_flush(bs, cb, opaque);
+}
+
 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
 {
     BlockDriver *drv = acb->bs->drv;
@@ -1218,6 +1234,20 @@ static BlockDriverAIOCB 
*bdrv_aio_write_em(BlockDriverState *bs,
     return &acb->common;
 }
 
+static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    BlockDriverAIOCBSync *acb;
+
+    acb = qemu_aio_get(bs, cb, opaque);
+    if (!acb->bh)
+        acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
+    bdrv_flush(bs);
+    acb->ret = 0;
+    qemu_bh_schedule(acb->bh);
+    return &acb->common;
+}
+
 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
 {
     BlockDriverAIOCBSync *acb = (BlockDriverAIOCBSync *)blockacb;
diff --git a/block.h b/block.h
index 12053d9..3139750 100644
--- a/block.h
+++ b/block.h
@@ -89,6 +89,8 @@ BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t 
sector_num,
 BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num,
                                  const uint8_t *buf, int nb_sectors,
                                  BlockDriverCompletionFunc *cb, void *opaque);
+BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
+                                 BlockDriverCompletionFunc *cb, void *opaque);
 void bdrv_aio_cancel(BlockDriverAIOCB *acb);
 
 int qemu_key_check(BlockDriverState *bs, const char *name);
diff --git a/block_int.h b/block_int.h
index e83fd2c..a342e18 100644
--- a/block_int.h
+++ b/block_int.h
@@ -54,6 +54,8 @@ struct BlockDriver {
     BlockDriverAIOCB *(*bdrv_aio_write)(BlockDriverState *bs,
         int64_t sector_num, const uint8_t *buf, int nb_sectors,
         BlockDriverCompletionFunc *cb, void *opaque);
+    BlockDriverAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs,
+        BlockDriverCompletionFunc *cb, void *opaque);
     void (*bdrv_aio_cancel)(BlockDriverAIOCB *acb);
     int aiocb_size;
 
diff --git a/hw/ide.c b/hw/ide.c
index 0b672c8..7ed922c 100644
--- a/hw/ide.c
+++ b/hw/ide.c
@@ -2009,6 +2009,13 @@ static void ide_clear_hob(IDEState *ide_if)
     ide_if[1].select &= ~(1 << 7);
 }
 
+static void ide_flush_cb(void *opaque, int ret)
+{
+    IDEState *s = opaque;
+    s->status = READY_STAT | SEEK_STAT;
+    ide_set_irq(s);
+}
+
 static void ide_ioport_write(void *opaque, uint32_t addr, uint32_t val)
 {
     IDEState *ide_if = opaque;
@@ -2278,9 +2285,7 @@ static void ide_ioport_write(void *opaque, uint32_t addr, 
uint32_t val)
         case WIN_FLUSH_CACHE:
         case WIN_FLUSH_CACHE_EXT:
             if (s->bs)
-                bdrv_flush(s->bs);
-           s->status = READY_STAT | SEEK_STAT;
-            ide_set_irq(s);
+                bdrv_aio_flush(s->bs, ide_flush_cb, s);
             break;
         case WIN_STANDBY:
         case WIN_STANDBY2:

reply via email to

[Prev in Thread] Current Thread [Next in Thread]