Re: [Qemu-devel] [PATCH v1 00/17] dataplane: optimization and multi virt

qemu-devel

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v1 00/17] dataplane: optimization and multi virt

From:	Paolo Bonzini
Subject:	Re: [Qemu-devel] [PATCH v1 00/17] dataplane: optimization and multi virtqueue support
Date:	Mon, 11 Aug 2014 21:37:01 +0200
User-agent:	Mozilla/5.0 (X11; Linux x86_64; rv:24.0) Gecko/20100101 Thunderbird/24.6.0

Il 10/08/2014 05:46, Ming Lei ha scritto:
> Hi Kevin, Paolo, Stefan and all,
> 
> 
> On Wed, 6 Aug 2014 10:48:55 +0200
> Kevin Wolf <address@hidden> wrote:
> 
>> Am 06.08.2014 um 07:33 hat Ming Lei geschrieben:
> 
>>
>> Anyhow, the coroutine version of your benchmark is buggy, it leaks all
>> coroutines instead of exiting them, so it can't make any use of the
>> coroutine pool. On my laptop, I get this (where fixed coroutine is a
>> version that simply removes the yield at the end):
>>
>>                 | bypass        | fixed coro    | buggy coro
>> ----------------+---------------+---------------+--------------
>> time            | 1.09s         | 1.10s         | 1.62s
>> L1-dcache-loads | 921,836,360   | 932,781,747   | 1,298,067,438
>> insns per cycle | 2.39          | 2.39          | 1.90
>>
>> Begs the question whether you see a similar effect on a real qemu and
>> the coroutine pool is still not big enough? With correct use of
>> coroutines, the difference seems to be barely measurable even without
>> any I/O involved.
> 
> Now I fixes the coroutine leak bug, and previous crypt bench is a bit high
> loading, and cause operations per sec very low(~40K/sec), finally I write a 
> new
> and simple one which can generate hundreds of kilo operations per sec and
> the number should match with some fast storage devices, and it does show there
> is not small effect from coroutine.
> 
> Extremely if just getppid() syscall is run in each iteration, with using 
> coroutine,
> only 3M operations/sec can be got, and without using coroutine, the number can
> reach 16M/sec, and there is more than 4 times difference!!!

I should be on vacation, but I'm following a couple threads in the mailing list
and I'm a bit tired to hear the same argument again and again...

The different characteristics of asynchronous I/O vs. any synchronous workload
are such that it is hard to be sure that microbenchmarks make sense.

The below patch is basically the minimal change to bypass coroutines.  Of course
the block.c part is not acceptable as is (the change to refresh_total_sectors
is broken, the others are just ugly), but it is a start.  Please run it with
your fio workloads, or write an aio-based version of a qemu-img/qemu-io *I/O*
benchmark.

Paolo

diff --git a/block.c b/block.c
index 3e252a2..0b6e9cf 100644
--- a/block.c
+++ b/block.c
@@ -704,7 +704,7 @@ static int refresh_total_sectors(BlockDriverState *bs, 
int64_t hint)
         return 0;
 
     /* query actual device if possible, otherwise just trust the hint */
-    if (drv->bdrv_getlength) {
+    if (!hint && drv->bdrv_getlength) {
         int64_t length = drv->bdrv_getlength(bs);
         if (length < 0) {
             return length;
@@ -2651,9 +2651,6 @@ static int bdrv_check_byte_request(BlockDriverState *bs, 
int64_t offset,
     if (!bdrv_is_inserted(bs))
         return -ENOMEDIUM;
 
-    if (bs->growable)
-        return 0;
-
     len = bdrv_getlength(bs);
 
     if (offset < 0)
@@ -3107,7 +3104,7 @@ static int coroutine_fn 
bdrv_co_do_preadv(BlockDriverState *bs,
     if (!drv) {
         return -ENOMEDIUM;
     }
-    if (bdrv_check_byte_request(bs, offset, bytes)) {
+    if (!bs->growable && bdrv_check_byte_request(bs, offset, bytes)) {
         return -EIO;
     }
 
@@ -3347,7 +3344,7 @@ static int coroutine_fn 
bdrv_co_do_pwritev(BlockDriverState *bs,
     if (bs->read_only) {
         return -EACCES;
     }
-    if (bdrv_check_byte_request(bs, offset, bytes)) {
+    if (!bs->growable && bdrv_check_byte_request(bs, offset, bytes)) {
         return -EIO;
     }
 
@@ -4356,6 +4353,20 @@ BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, 
int64_t sector_num,
 {
     trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
 
+    if (bs->drv && bs->drv->bdrv_aio_readv &&
+        bs->drv->bdrv_aio_readv != bdrv_aio_readv_em &&
+        nb_sectors >= 0 && nb_sectors <= (UINT_MAX >> BDRV_SECTOR_BITS) &&
+        !bdrv_check_byte_request(bs, sector_num << BDRV_SECTOR_BITS,
+                                 nb_sectors << BDRV_SECTOR_BITS) &&
+        !bs->copy_on_read && !bs->io_limits_enabled &&
+        bs->request_alignment <= BDRV_SECTOR_SIZE) {
+        BlockDriverAIOCB *acb =
+            bs->drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
+                                    cb, opaque);
+        assert(acb);
+        return acb;
+    }
+
     return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
                                  cb, opaque, false);
 }
@@ -4366,6 +4377,24 @@ BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, 
int64_t sector_num,
 {
     trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
 
+    if (bs->drv && bs->drv->bdrv_aio_writev &&
+        bs->drv->bdrv_aio_writev != bdrv_aio_writev_em &&
+        nb_sectors >= 0 && nb_sectors <= (UINT_MAX >> BDRV_SECTOR_BITS) &&
+        !bdrv_check_byte_request(bs, sector_num << BDRV_SECTOR_BITS,
+                                 nb_sectors << BDRV_SECTOR_BITS) &&
+        !bs->read_only && !bs->io_limits_enabled &&
+        bs->request_alignment <= BDRV_SECTOR_SIZE &&
+        bs->enable_write_cache &&
+        QLIST_EMPTY(&bs->before_write_notifiers.notifiers) &&
+        bs->wr_highest_sector >= sector_num + nb_sectors - 1 &&
+        QLIST_EMPTY(&bs->dirty_bitmaps)) {
+        BlockDriverAIOCB *acb =
+            bs->drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
+                                     cb, opaque);
+        assert(acb);
+        return acb;
+    }
+
     return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
                                  cb, opaque, true);
 }
diff --git a/block/raw_bsd.c b/block/raw_bsd.c
index 492f58d..b86f26b 100644
--- a/block/raw_bsd.c
+++ b/block/raw_bsd.c
@@ -48,6 +48,22 @@ static int raw_reopen_prepare(BDRVReopenState *reopen_state,
     return 0;
 }
 
+static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs, int64_t 
sector_num,
+                                     QEMUIOVector *qiov, int nb_sectors,
+                                    BlockDriverCompletionFunc *cb, void 
*opaque)
+{
+    BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
+    return bdrv_aio_readv(bs->file, sector_num, qiov, nb_sectors, cb, opaque);
+}
+
+static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs, int64_t 
sector_num,
+                                      QEMUIOVector *qiov, int nb_sectors,
+                                     BlockDriverCompletionFunc *cb, void 
*opaque)
+{
+    BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
+    return bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors, cb, opaque);
+}
+
 static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num,
                                      int nb_sectors, QEMUIOVector *qiov)
 {
@@ -181,6 +197,8 @@ static BlockDriver bdrv_raw = {
     .bdrv_open            = &raw_open,
     .bdrv_close           = &raw_close,
     .bdrv_create          = &raw_create,
+    .bdrv_aio_readv       = &raw_aio_readv,
+    .bdrv_aio_writev      = &raw_aio_writev,
     .bdrv_co_readv        = &raw_co_readv,
     .bdrv_co_writev       = &raw_co_writev,
     .bdrv_co_write_zeroes = &raw_co_write_zeroes,

[Prev in Thread]

Current Thread

[Next in Thread]

Re: [Qemu-devel] [PATCH v1 00/17] dataplane: optimization and multi virtqueue support, (continued)

Prev by Date: Re: [Qemu-devel] [PATCH v5 06/10] hw/vfio: create common module
Next by Date: Re: [Qemu-devel] [PATCH] dump.c: Fix memory leak issue in cleanup processing for dump_init()
Previous by thread: Re: [Qemu-devel] [PATCH v1 00/17] dataplane: optimization and multi virtqueue support
Next by thread: Re: [Qemu-devel] [PATCH v1 00/17] dataplane: optimization and multi virtqueue support
Index(es):
- Date
- Thread