qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH 4/4] qcow2: Use block-queue


From: Kevin Wolf
Subject: [Qemu-devel] [PATCH 4/4] qcow2: Use block-queue
Date: Mon, 13 Dec 2010 17:29:07 +0100

This changes qcow2 to use block-queue for metadata I/O, so that the number of
bdrv_flush calls during heavy cluster allocation is greatly reduced.

Most of this is mechanical conversion of bdrv_pwrite_sync to a sequence of
blkqueue_pwrite and blkqueue_barrier (or bdrv_pread -> blkqueue_pread). Also,
most functions get a QcowRequest structure now instead of a BlockDriverState.
The QcowRequest contains the current block-queue context and refers to the
BlockDriverState of the image.

The more interesting parts include the bdrv_(aio_)flush implementation which
has to deal with errors.

Signed-off-by: Kevin Wolf <address@hidden>
---
 block/qcow2-cluster.c  |  139 ++++++++++++++++++-------------
 block/qcow2-refcount.c |  217 ++++++++++++++++++++++++++++--------------------
 block/qcow2-snapshot.c |  106 ++++++++++++++++++------
 block/qcow2.c          |  144 +++++++++++++++++++++++++++++---
 block/qcow2.h          |   33 +++++---
 5 files changed, 440 insertions(+), 199 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index b040208..1dccb79 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -28,8 +28,9 @@
 #include "block_int.h"
 #include "block/qcow2.h"
 
-int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, bool exact_size)
+int qcow2_grow_l1_table(QcowRequest *req, int min_size, bool exact_size)
 {
+    BlockDriverState *bs = req->bs;
     BDRVQcowState *s = bs->opaque;
     int new_l1_size, new_l1_size2, ret, i;
     uint64_t *new_l1_table;
@@ -62,17 +63,19 @@ int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, 
bool exact_size)
 
     /* write new table (align to cluster) */
     BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ALLOC_TABLE);
-    new_l1_table_offset = qcow2_alloc_clusters(bs, new_l1_size2);
+    new_l1_table_offset = qcow2_alloc_clusters(req, new_l1_size2);
     if (new_l1_table_offset < 0) {
         qemu_free(new_l1_table);
         return new_l1_table_offset;
     }
-    bdrv_flush(bs->file);
+    blkqueue_barrier(&req->bq_context);
 
     BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE);
     for(i = 0; i < s->l1_size; i++)
         new_l1_table[i] = cpu_to_be64(new_l1_table[i]);
-    ret = bdrv_pwrite_sync(bs->file, new_l1_table_offset, new_l1_table, 
new_l1_size2);
+    ret = blkqueue_pwrite(&req->bq_context, new_l1_table_offset, new_l1_table,
+        new_l1_size2);
+    blkqueue_barrier(&req->bq_context);
     if (ret < 0)
         goto fail;
     for(i = 0; i < s->l1_size; i++)
@@ -82,24 +85,27 @@ int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, 
bool exact_size)
     BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ACTIVATE_TABLE);
     cpu_to_be32w((uint32_t*)data, new_l1_size);
     cpu_to_be64w((uint64_t*)(data + 4), new_l1_table_offset);
-    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_size), 
data,sizeof(data));
+    ret = blkqueue_pwrite(&req->bq_context, offsetof(QCowHeader, l1_size),
+        data, sizeof(data));
+    blkqueue_barrier(&req->bq_context);
     if (ret < 0) {
         goto fail;
     }
     qemu_free(s->l1_table);
-    qcow2_free_clusters(bs, s->l1_table_offset, s->l1_size * sizeof(uint64_t));
+    qcow2_free_clusters(req, s->l1_table_offset, s->l1_size * 
sizeof(uint64_t));
     s->l1_table_offset = new_l1_table_offset;
     s->l1_table = new_l1_table;
     s->l1_size = new_l1_size;
     return 0;
  fail:
     qemu_free(new_l1_table);
-    qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2);
+    qcow2_free_clusters(req, new_l1_table_offset, new_l1_size2);
     return ret;
 }
 
-void qcow2_l2_cache_reset(BlockDriverState *bs)
+void qcow2_l2_cache_reset(QcowRequest *req)
 {
+    BlockDriverState *bs = req->bs;
     BDRVQcowState *s = bs->opaque;
 
     memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
@@ -107,8 +113,9 @@ void qcow2_l2_cache_reset(BlockDriverState *bs)
     memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t));
 }
 
-static inline int l2_cache_new_entry(BlockDriverState *bs)
+static inline int l2_cache_new_entry(QcowRequest *req)
 {
+    BlockDriverState *bs = req->bs;
     BDRVQcowState *s = bs->opaque;
     uint32_t min_count;
     int min_index, i;
@@ -165,9 +172,10 @@ static uint64_t *seek_l2_table(BDRVQcowState *s, uint64_t 
l2_offset)
  * the image file failed.
  */
 
-static int l2_load(BlockDriverState *bs, uint64_t l2_offset,
+static int l2_load(QcowRequest *req, uint64_t l2_offset,
     uint64_t **l2_table)
 {
+    BlockDriverState *bs = req->bs;
     BDRVQcowState *s = bs->opaque;
     int min_index;
     int ret;
@@ -181,14 +189,14 @@ static int l2_load(BlockDriverState *bs, uint64_t 
l2_offset,
 
     /* not found: load a new entry in the least used one */
 
-    min_index = l2_cache_new_entry(bs);
+    min_index = l2_cache_new_entry(req);
     *l2_table = s->l2_cache + (min_index << s->l2_bits);
 
     BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
-    ret = bdrv_pread(bs->file, l2_offset, *l2_table,
+    ret = blkqueue_pread(&req->bq_context, l2_offset, *l2_table,
         s->l2_size * sizeof(uint64_t));
     if (ret < 0) {
-        qcow2_l2_cache_reset(bs);
+        qcow2_l2_cache_reset(req);
         return ret;
     }
 
@@ -203,8 +211,9 @@ static int l2_load(BlockDriverState *bs, uint64_t l2_offset,
  * and we really don't want bdrv_pread to perform a read-modify-write)
  */
 #define L1_ENTRIES_PER_SECTOR (512 / 8)
-static int write_l1_entry(BlockDriverState *bs, int l1_index)
+static int write_l1_entry(QcowRequest *req, int l1_index)
 {
+    BlockDriverState *bs = req->bs;
     BDRVQcowState *s = bs->opaque;
     uint64_t buf[L1_ENTRIES_PER_SECTOR];
     int l1_start_index;
@@ -216,8 +225,9 @@ static int write_l1_entry(BlockDriverState *bs, int 
l1_index)
     }
 
     BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
-    ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset + 8 * l1_start_index,
-        buf, sizeof(buf));
+    ret = blkqueue_pwrite(&req->bq_context,
+        s->l1_table_offset + 8 * l1_start_index, buf, sizeof(buf));
+    blkqueue_barrier(&req->bq_context);
     if (ret < 0) {
         return ret;
     }
@@ -235,8 +245,9 @@ static int write_l1_entry(BlockDriverState *bs, int 
l1_index)
  *
  */
 
-static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
+static int l2_allocate(QcowRequest *req, int l1_index, uint64_t **table)
 {
+    BlockDriverState *bs = req->bs;
     BDRVQcowState *s = bs->opaque;
     int min_index;
     uint64_t old_l2_offset;
@@ -248,15 +259,15 @@ static int l2_allocate(BlockDriverState *bs, int 
l1_index, uint64_t **table)
 
     /* allocate a new l2 entry */
 
-    l2_offset = qcow2_alloc_clusters(bs, s->l2_size * sizeof(uint64_t));
+    l2_offset = qcow2_alloc_clusters(req, s->l2_size * sizeof(uint64_t));
     if (l2_offset < 0) {
         return l2_offset;
     }
-    bdrv_flush(bs->file);
+    blkqueue_barrier(&req->bq_context);
 
     /* allocate a new entry in the l2 cache */
 
-    min_index = l2_cache_new_entry(bs);
+    min_index = l2_cache_new_entry(req);
     l2_table = s->l2_cache + (min_index << s->l2_bits);
 
     if (old_l2_offset == 0) {
@@ -265,7 +276,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, 
uint64_t **table)
     } else {
         /* if there was an old l2 table, read it from the disk */
         BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ);
-        ret = bdrv_pread(bs->file, old_l2_offset, l2_table,
+        ret = blkqueue_pread(&req->bq_context, old_l2_offset, l2_table,
             s->l2_size * sizeof(uint64_t));
         if (ret < 0) {
             goto fail;
@@ -273,15 +284,16 @@ static int l2_allocate(BlockDriverState *bs, int 
l1_index, uint64_t **table)
     }
     /* write the l2 table to the file */
     BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE);
-    ret = bdrv_pwrite_sync(bs->file, l2_offset, l2_table,
+    ret = blkqueue_pwrite(&req->bq_context, l2_offset, l2_table,
         s->l2_size * sizeof(uint64_t));
+    blkqueue_barrier(&req->bq_context);
     if (ret < 0) {
         goto fail;
     }
 
     /* update the L1 entry */
     s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED;
-    ret = write_l1_entry(bs, l1_index);
+    ret = write_l1_entry(req, l1_index);
     if (ret < 0) {
         goto fail;
     }
@@ -296,7 +308,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, 
uint64_t **table)
 
 fail:
     s->l1_table[l1_index] = old_l2_offset;
-    qcow2_l2_cache_reset(bs);
+    qcow2_l2_cache_reset(req);
     return ret;
 }
 
@@ -352,9 +364,10 @@ void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t 
sector_num,
 }
 
 
-static int qcow_read(BlockDriverState *bs, int64_t sector_num,
+static int qcow_read(QcowRequest *req, int64_t sector_num,
                      uint8_t *buf, int nb_sectors)
 {
+    BlockDriverState *bs = req->bs;
     BDRVQcowState *s = bs->opaque;
     int ret, index_in_cluster, n, n1;
     uint64_t cluster_offset;
@@ -364,7 +377,7 @@ static int qcow_read(BlockDriverState *bs, int64_t 
sector_num,
     while (nb_sectors > 0) {
         n = nb_sectors;
 
-        ret = qcow2_get_cluster_offset(bs, sector_num << 9, &n,
+        ret = qcow2_get_cluster_offset(req, sector_num << 9, &n,
             &cluster_offset);
         if (ret < 0) {
             return ret;
@@ -389,13 +402,14 @@ static int qcow_read(BlockDriverState *bs, int64_t 
sector_num,
                 memset(buf, 0, 512 * n);
             }
         } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
-            if (qcow2_decompress_cluster(bs, cluster_offset) < 0)
+            if (qcow2_decompress_cluster(req, cluster_offset) < 0)
                 return -1;
             memcpy(buf, s->cluster_cache + index_in_cluster * 512, 512 * n);
         } else {
             BLKDBG_EVENT(bs->file, BLKDBG_READ);
-            ret = bdrv_pread(bs->file, cluster_offset + index_in_cluster * 
512, buf, n * 512);
-            if (ret != n * 512)
+            ret = blkqueue_pread(&req->bq_context,
+                cluster_offset + index_in_cluster * 512, buf, n * 512);
+            if (ret < 0)
                 return -1;
             if (s->crypt_method) {
                 qcow2_encrypt_sectors(s, sector_num, buf, buf, n, 0,
@@ -409,9 +423,10 @@ static int qcow_read(BlockDriverState *bs, int64_t 
sector_num,
     return 0;
 }
 
-static int copy_sectors(BlockDriverState *bs, uint64_t start_sect,
+static int copy_sectors(QcowRequest *req, uint64_t start_sect,
                         uint64_t cluster_offset, int n_start, int n_end)
 {
+    BlockDriverState *bs = req->bs;
     BDRVQcowState *s = bs->opaque;
     int n, ret;
 
@@ -419,7 +434,7 @@ static int copy_sectors(BlockDriverState *bs, uint64_t 
start_sect,
     if (n <= 0)
         return 0;
     BLKDBG_EVENT(bs->file, BLKDBG_COW_READ);
-    ret = qcow_read(bs, start_sect + n_start, s->cluster_data, n);
+    ret = qcow_read(req, start_sect + n_start, s->cluster_data, n);
     if (ret < 0)
         return ret;
     if (s->crypt_method) {
@@ -453,9 +468,10 @@ static int copy_sectors(BlockDriverState *bs, uint64_t 
start_sect,
  *
  */
 
-int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
+int qcow2_get_cluster_offset(QcowRequest *req, uint64_t offset,
     int *num, uint64_t *cluster_offset)
 {
+    BlockDriverState *bs = req->bs;
     BDRVQcowState *s = bs->opaque;
     unsigned int l1_index, l2_index;
     uint64_t l2_offset, *l2_table;
@@ -501,7 +517,7 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t 
offset,
     /* load the l2 table in memory */
 
     l2_offset &= ~QCOW_OFLAG_COPIED;
-    ret = l2_load(bs, l2_offset, &l2_table);
+    ret = l2_load(req, l2_offset, &l2_table);
     if (ret < 0) {
         return ret;
     }
@@ -543,11 +559,12 @@ out:
  *
  * Returns 0 on success, -errno in failure case
  */
-static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
+static int get_cluster_table(QcowRequest *req, uint64_t offset,
                              uint64_t **new_l2_table,
                              uint64_t *new_l2_offset,
                              int *new_l2_index)
 {
+    BlockDriverState *bs = req->bs;
     BDRVQcowState *s = bs->opaque;
     unsigned int l1_index, l2_index;
     uint64_t l2_offset;
@@ -558,7 +575,7 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t 
offset,
 
     l1_index = offset >> (s->l2_bits + s->cluster_bits);
     if (l1_index >= s->l1_size) {
-        ret = qcow2_grow_l1_table(bs, l1_index + 1, false);
+        ret = qcow2_grow_l1_table(req, l1_index + 1, false);
         if (ret < 0) {
             return ret;
         }
@@ -570,14 +587,14 @@ static int get_cluster_table(BlockDriverState *bs, 
uint64_t offset,
     if (l2_offset & QCOW_OFLAG_COPIED) {
         /* load the l2 table in memory */
         l2_offset &= ~QCOW_OFLAG_COPIED;
-        ret = l2_load(bs, l2_offset, &l2_table);
+        ret = l2_load(req, l2_offset, &l2_table);
         if (ret < 0) {
             return ret;
         }
     } else {
         if (l2_offset)
-            qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t));
-        ret = l2_allocate(bs, l1_index, &l2_table);
+            qcow2_free_clusters(req, l2_offset, s->l2_size * sizeof(uint64_t));
+        ret = l2_allocate(req, l1_index, &l2_table);
         if (ret < 0) {
             return ret;
         }
@@ -608,17 +625,18 @@ static int get_cluster_table(BlockDriverState *bs, 
uint64_t offset,
  *
  */
 
-uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
+uint64_t qcow2_alloc_compressed_cluster_offset(QcowRequest *req,
                                                uint64_t offset,
                                                int compressed_size)
 {
+    BlockDriverState *bs = req->bs;
     BDRVQcowState *s = bs->opaque;
     int l2_index, ret;
     uint64_t l2_offset, *l2_table;
     int64_t cluster_offset;
     int nb_csectors;
 
-    ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index);
+    ret = get_cluster_table(req, offset, &l2_table, &l2_offset, &l2_index);
     if (ret < 0) {
         return 0;
     }
@@ -628,9 +646,9 @@ uint64_t 
qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
         return cluster_offset & ~QCOW_OFLAG_COPIED;
 
     if (cluster_offset)
-        qcow2_free_any_clusters(bs, cluster_offset, 1);
+        qcow2_free_any_clusters(req, cluster_offset, 1);
 
-    cluster_offset = qcow2_alloc_bytes(bs, compressed_size);
+    cluster_offset = qcow2_alloc_bytes(req, compressed_size);
     if (cluster_offset < 0) {
         return 0;
     }
@@ -647,11 +665,12 @@ uint64_t 
qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
 
     BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED);
     l2_table[l2_index] = cpu_to_be64(cluster_offset);
-    if (bdrv_pwrite_sync(bs->file,
+    if (blkqueue_pwrite(&req->bq_context,
                     l2_offset + l2_index * sizeof(uint64_t),
                     l2_table + l2_index,
                     sizeof(uint64_t)) < 0)
         return 0;
+    blkqueue_barrier(&req->bq_context);
 
     return cluster_offset;
 }
@@ -661,9 +680,10 @@ uint64_t 
qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
  * read-modify-write in bdrv_pwrite
  */
 #define L2_ENTRIES_PER_SECTOR (512 / 8)
-static int write_l2_entries(BlockDriverState *bs, uint64_t *l2_table,
+static int write_l2_entries(QcowRequest *req, uint64_t *l2_table,
     uint64_t l2_offset, int l2_index, int num)
 {
+    BlockDriverState *bs = req->bs;
     int l2_start_index = l2_index & ~(L1_ENTRIES_PER_SECTOR - 1);
     int start_offset = (8 * l2_index) & ~511;
     int end_offset = (8 * (l2_index + num) + 511) & ~511;
@@ -671,7 +691,7 @@ static int write_l2_entries(BlockDriverState *bs, uint64_t 
*l2_table,
     int ret;
 
     BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
-    ret = bdrv_pwrite(bs->file, l2_offset + start_offset,
+    ret = blkqueue_pwrite(&req->bq_context, l2_offset + start_offset,
         &l2_table[l2_start_index], len);
     if (ret < 0) {
         return ret;
@@ -680,8 +700,9 @@ static int write_l2_entries(BlockDriverState *bs, uint64_t 
*l2_table,
     return 0;
 }
 
-int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
+int qcow2_alloc_cluster_link_l2(QcowRequest *req, QCowL2Meta *m)
 {
+    BlockDriverState *bs = req->bs;
     BDRVQcowState *s = bs->opaque;
     int i, j = 0, l2_index, ret;
     uint64_t *old_cluster, start_sect, l2_offset, *l2_table;
@@ -695,21 +716,21 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, 
QCowL2Meta *m)
     /* copy content of unmodified sectors */
     start_sect = (m->offset & ~(s->cluster_size - 1)) >> 9;
     if (m->n_start) {
-        ret = copy_sectors(bs, start_sect, cluster_offset, 0, m->n_start);
+        ret = copy_sectors(req, start_sect, cluster_offset, 0, m->n_start);
         if (ret < 0)
             goto err;
     }
 
     if (m->nb_available & (s->cluster_sectors - 1)) {
         uint64_t end = m->nb_available & ~(uint64_t)(s->cluster_sectors - 1);
-        ret = copy_sectors(bs, start_sect + end, cluster_offset + (end << 9),
+        ret = copy_sectors(req, start_sect + end, cluster_offset + (end << 9),
                 m->nb_available - end, s->cluster_sectors);
         if (ret < 0)
             goto err;
     }
 
     /* update L2 table */
-    ret = get_cluster_table(bs, m->offset, &l2_table, &l2_offset, &l2_index);
+    ret = get_cluster_table(req, m->offset, &l2_table, &l2_offset, &l2_index);
     if (ret < 0) {
         goto err;
     }
@@ -733,11 +754,11 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, 
QCowL2Meta *m)
      * need to be sure that the refcounts have been increased and COW was
      * handled.
      */
-    bdrv_flush(bs->file);
+    blkqueue_barrier(&req->bq_context);
 
-    ret = write_l2_entries(bs, l2_table, l2_offset, l2_index, m->nb_clusters);
+    ret = write_l2_entries(req, l2_table, l2_offset, l2_index, m->nb_clusters);
     if (ret < 0) {
-        qcow2_l2_cache_reset(bs);
+        qcow2_l2_cache_reset(req);
         goto err;
     }
 
@@ -746,9 +767,9 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, 
QCowL2Meta *m)
      * Also flush bs->file to get the right order for L2 and refcount update.
      */
     if (j != 0) {
-        bdrv_flush(bs->file);
+        blkqueue_barrier(&req->bq_context);
         for (i = 0; i < j; i++) {
-            qcow2_free_any_clusters(bs,
+            qcow2_free_any_clusters(req,
                 be64_to_cpu(old_cluster[i]) & ~QCOW_OFLAG_COPIED, 1);
         }
     }
@@ -778,9 +799,10 @@ err:
  *
  * Return 0 on success and -errno in error cases
  */
-int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
+int qcow2_alloc_cluster_offset(QcowRequest *req, uint64_t offset,
     int n_start, int n_end, int *num, QCowL2Meta *m)
 {
+    BlockDriverState *bs = req->bs;
     BDRVQcowState *s = bs->opaque;
     int l2_index, ret;
     uint64_t l2_offset, *l2_table;
@@ -788,7 +810,7 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, 
uint64_t offset,
     unsigned int nb_clusters, i = 0;
     QCowL2Meta *old_alloc;
 
-    ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index);
+    ret = get_cluster_table(req, offset, &l2_table, &l2_offset, &l2_index);
     if (ret < 0) {
         return ret;
     }
@@ -881,7 +903,7 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, 
uint64_t offset,
 
     /* allocate a new cluster */
 
-    cluster_offset = qcow2_alloc_clusters(bs, nb_clusters * s->cluster_size);
+    cluster_offset = qcow2_alloc_clusters(req, nb_clusters * s->cluster_size);
     if (cluster_offset < 0) {
         QLIST_REMOVE(m, next_in_flight);
         return cluster_offset;
@@ -928,8 +950,9 @@ static int decompress_buffer(uint8_t *out_buf, int 
out_buf_size,
     return 0;
 }
 
-int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
+int qcow2_decompress_cluster(QcowRequest *req, uint64_t cluster_offset)
 {
+    BlockDriverState *bs = req->bs;
     BDRVQcowState *s = bs->opaque;
     int ret, csize, nb_csectors, sector_offset;
     uint64_t coffset;
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index a10453c..39ead62 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -26,16 +26,17 @@
 #include "block_int.h"
 #include "block/qcow2.h"
 
-static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size);
-static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
+static int64_t alloc_clusters_noref(QcowRequest *req, int64_t size);
+static int QEMU_WARN_UNUSED_RESULT update_refcount(QcowRequest *req,
                             int64_t offset, int64_t length,
                             int addend);
 
 
 static int cache_refcount_updates = 0;
 
-static int write_refcount_block(BlockDriverState *bs)
+static int write_refcount_block(QcowRequest *req)
 {
+    BlockDriverState *bs = req->bs;
     BDRVQcowState *s = bs->opaque;
     size_t size = s->cluster_size;
 
@@ -44,8 +45,8 @@ static int write_refcount_block(BlockDriverState *bs)
     }
 
     BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE);
-    if (bdrv_pwrite_sync(bs->file, s->refcount_block_cache_offset,
-            s->refcount_block_cache, size) < 0)
+    if (blkqueue_pwrite(&req->bq_context, s->refcount_block_cache_offset,
+        s->refcount_block_cache, size) < 0)
     {
         return -EIO;
     }
@@ -66,8 +67,7 @@ int qcow2_refcount_init(BlockDriverState *bs)
     s->refcount_table = qemu_malloc(refcount_table_size2);
     if (s->refcount_table_size > 0) {
         BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_LOAD);
-        ret = bdrv_pread(bs->file, s->refcount_table_offset,
-                         s->refcount_table, refcount_table_size2);
+        ret = bdrv_pread(bs->file, s->refcount_table_offset, 
s->refcount_table, refcount_table_size2);
         if (ret != refcount_table_size2)
             goto fail;
         for(i = 0; i < s->refcount_table_size; i++)
@@ -86,22 +86,23 @@ void qcow2_refcount_close(BlockDriverState *bs)
 }
 
 
-static int load_refcount_block(BlockDriverState *bs,
+static int load_refcount_block(QcowRequest *req,
                                int64_t refcount_block_offset)
 {
+    BlockDriverState *bs = req->bs;
     BDRVQcowState *s = bs->opaque;
     int ret;
 
     if (cache_refcount_updates) {
-        ret = write_refcount_block(bs);
+        ret = write_refcount_block(req);
         if (ret < 0) {
             return ret;
         }
     }
 
     BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_LOAD);
-    ret = bdrv_pread(bs->file, refcount_block_offset, s->refcount_block_cache,
-                     s->cluster_size);
+    ret = blkqueue_pread(&req->bq_context, refcount_block_offset,
+        s->refcount_block_cache, s->cluster_size);
     if (ret < 0) {
         s->refcount_block_cache_offset = 0;
         return ret;
@@ -116,8 +117,9 @@ static int load_refcount_block(BlockDriverState *bs,
  * return value is the refcount of the cluster, negative values are -errno
  * and indicate an error.
  */
-static int get_refcount(BlockDriverState *bs, int64_t cluster_index)
+static int get_refcount(QcowRequest *req, int64_t cluster_index)
 {
+    BlockDriverState *bs = req->bs;
     BDRVQcowState *s = bs->opaque;
     int refcount_table_index, block_index;
     int64_t refcount_block_offset;
@@ -131,7 +133,7 @@ static int get_refcount(BlockDriverState *bs, int64_t 
cluster_index)
         return 0;
     if (refcount_block_offset != s->refcount_block_cache_offset) {
         /* better than nothing: return allocated if read error */
-        ret = load_refcount_block(bs, refcount_block_offset);
+        ret = load_refcount_block(req, refcount_block_offset);
         if (ret < 0) {
             return ret;
         }
@@ -176,8 +178,9 @@ static int in_same_refcount_block(BDRVQcowState *s, 
uint64_t offset_a,
  *
  * Returns the offset of the refcount block on success or -errno in error case
  */
-static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t 
cluster_index)
+static int64_t alloc_refcount_block(QcowRequest *req, int64_t cluster_index)
 {
+    BlockDriverState *bs = req->bs;
     BDRVQcowState *s = bs->opaque;
     unsigned int refcount_table_index;
     int ret;
@@ -195,7 +198,7 @@ static int64_t alloc_refcount_block(BlockDriverState *bs, 
int64_t cluster_index)
         /* If it's already there, we're done */
         if (refcount_block_offset) {
             if (refcount_block_offset != s->refcount_block_cache_offset) {
-                ret = load_refcount_block(bs, refcount_block_offset);
+                ret = load_refcount_block(req, refcount_block_offset);
                 if (ret < 0) {
                     return ret;
                 }
@@ -227,14 +230,14 @@ static int64_t alloc_refcount_block(BlockDriverState *bs, 
int64_t cluster_index)
      */
 
     if (cache_refcount_updates) {
-        ret = write_refcount_block(bs);
+        ret = write_refcount_block(req);
         if (ret < 0) {
             return ret;
         }
     }
 
     /* Allocate the refcount block itself and mark it as used */
-    int64_t new_block = alloc_clusters_noref(bs, s->cluster_size);
+    int64_t new_block = alloc_clusters_noref(req, s->cluster_size);
     if (new_block < 0) {
         return new_block;
     }
@@ -257,12 +260,12 @@ static int64_t alloc_refcount_block(BlockDriverState *bs, 
int64_t cluster_index)
     } else {
         /* Described somewhere else. This can recurse at most twice before we
          * arrive at a block that describes itself. */
-        ret = update_refcount(bs, new_block, s->cluster_size, 1);
+        ret = update_refcount(req, new_block, s->cluster_size, 1);
         if (ret < 0) {
             goto fail_block;
         }
 
-        bdrv_flush(bs->file);
+        blkqueue_barrier(&req->bq_context);
 
         /* Initialize the new refcount block only after updating its refcount,
          * update_refcount uses the refcount cache itself */
@@ -272,8 +275,9 @@ static int64_t alloc_refcount_block(BlockDriverState *bs, 
int64_t cluster_index)
 
     /* Now the new refcount block needs to be written to disk */
     BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE);
-    ret = bdrv_pwrite_sync(bs->file, new_block, s->refcount_block_cache,
+    ret = blkqueue_pwrite(&req->bq_context, new_block, s->refcount_block_cache,
         s->cluster_size);
+    blkqueue_barrier(&req->bq_context);
     if (ret < 0) {
         goto fail_block;
     }
@@ -282,9 +286,10 @@ static int64_t alloc_refcount_block(BlockDriverState *bs, 
int64_t cluster_index)
     if (refcount_table_index < s->refcount_table_size) {
         uint64_t data64 = cpu_to_be64(new_block);
         BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_HOOKUP);
-        ret = bdrv_pwrite_sync(bs->file,
+        ret = blkqueue_pwrite(&req->bq_context,
             s->refcount_table_offset + refcount_table_index * sizeof(uint64_t),
             &data64, sizeof(data64));
+        blkqueue_barrier(&req->bq_context);
         if (ret < 0) {
             goto fail_block;
         }
@@ -362,8 +367,9 @@ static int64_t alloc_refcount_block(BlockDriverState *bs, 
int64_t cluster_index)
 
     /* Write refcount blocks to disk */
     BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS);
-    ret = bdrv_pwrite_sync(bs->file, meta_offset, new_blocks,
+    ret = blkqueue_pwrite(&req->bq_context, meta_offset, new_blocks,
         blocks_clusters * s->cluster_size);
+    blkqueue_barrier(&req->bq_context);
     qemu_free(new_blocks);
     if (ret < 0) {
         goto fail_table;
@@ -375,8 +381,9 @@ static int64_t alloc_refcount_block(BlockDriverState *bs, 
int64_t cluster_index)
     }
 
     BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE);
-    ret = bdrv_pwrite_sync(bs->file, table_offset, new_table,
+    ret = blkqueue_pwrite(&req->bq_context, table_offset, new_table,
         table_size * sizeof(uint64_t));
+    blkqueue_barrier(&req->bq_context);
     if (ret < 0) {
         goto fail_table;
     }
@@ -390,8 +397,9 @@ static int64_t alloc_refcount_block(BlockDriverState *bs, 
int64_t cluster_index)
     cpu_to_be64w((uint64_t*)data, table_offset);
     cpu_to_be32w((uint32_t*)(data + 8), table_clusters);
     BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE);
-    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, 
refcount_table_offset),
-        data, sizeof(data));
+    ret = blkqueue_pwrite(&req->bq_context,
+        offsetof(QCowHeader, refcount_table_offset), data, sizeof(data));
+    blkqueue_barrier(&req->bq_context);
     if (ret < 0) {
         goto fail_table;
     }
@@ -407,10 +415,11 @@ static int64_t alloc_refcount_block(BlockDriverState *bs, 
int64_t cluster_index)
 
     /* Free old table. Remember, we must not change free_cluster_index */
     uint64_t old_free_cluster_index = s->free_cluster_index;
-    qcow2_free_clusters(bs, old_table_offset, old_table_size * 
sizeof(uint64_t));
+    qcow2_free_clusters(req, old_table_offset,
+        old_table_size * sizeof(uint64_t));
     s->free_cluster_index = old_free_cluster_index;
 
-    ret = load_refcount_block(bs, new_block);
+    ret = load_refcount_block(req, new_block);
     if (ret < 0) {
         goto fail_block;
     }
@@ -425,9 +434,10 @@ fail_block:
 }
 
 #define REFCOUNTS_PER_SECTOR (512 >> REFCOUNT_SHIFT)
-static int write_refcount_block_entries(BlockDriverState *bs,
+static int write_refcount_block_entries(QcowRequest *req,
     int64_t refcount_block_offset, int first_index, int last_index)
 {
+    BlockDriverState *bs = req->bs;
     BDRVQcowState *s = bs->opaque;
     size_t size;
     int ret;
@@ -447,7 +457,7 @@ static int write_refcount_block_entries(BlockDriverState 
*bs,
     size = (last_index - first_index) << REFCOUNT_SHIFT;
 
     BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
-    ret = bdrv_pwrite(bs->file,
+    ret = blkqueue_pwrite(&req->bq_context,
         refcount_block_offset + (first_index << REFCOUNT_SHIFT),
         &s->refcount_block_cache[first_index], size);
     if (ret < 0) {
@@ -458,9 +468,10 @@ static int write_refcount_block_entries(BlockDriverState 
*bs,
 }
 
 /* XXX: cache several refcount block clusters ? */
-static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
+static int QEMU_WARN_UNUSED_RESULT update_refcount(QcowRequest *req,
     int64_t offset, int64_t length, int addend)
 {
+    BlockDriverState *bs = req->bs;
     BDRVQcowState *s = bs->opaque;
     int64_t start, last, cluster_offset;
     int64_t refcount_block_offset = 0;
@@ -492,7 +503,7 @@ static int QEMU_WARN_UNUSED_RESULT 
update_refcount(BlockDriverState *bs,
         table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
         if ((old_table_index >= 0) && (table_index != old_table_index)) {
 
-            ret = write_refcount_block_entries(bs, refcount_block_offset,
+            ret = write_refcount_block_entries(req, refcount_block_offset,
                 first_index, last_index);
             if (ret < 0) {
                 return ret;
@@ -503,7 +514,7 @@ static int QEMU_WARN_UNUSED_RESULT 
update_refcount(BlockDriverState *bs,
         }
 
         /* Load the refcount block and allocate it if needed */
-        new_block = alloc_refcount_block(bs, cluster_index);
+        new_block = alloc_refcount_block(req, cluster_index);
         if (new_block < 0) {
             ret = new_block;
             goto fail;
@@ -538,7 +549,7 @@ fail:
     /* Write last changed block to disk */
     if (refcount_block_offset != 0) {
         int wret;
-        wret = write_refcount_block_entries(bs, refcount_block_offset,
+        wret = write_refcount_block_entries(req, refcount_block_offset,
             first_index, last_index);
         if (wret < 0) {
             return ret < 0 ? ret : wret;
@@ -551,7 +562,7 @@ fail:
      */
     if (ret < 0) {
         int dummy;
-        dummy = update_refcount(bs, offset, cluster_offset - offset, -addend);
+        dummy = update_refcount(req, offset, cluster_offset - offset, -addend);
         (void)dummy;
     }
 
@@ -565,21 +576,22 @@ fail:
  * If the return value is non-negative, it is the new refcount of the cluster.
  * If it is negative, it is -errno and indicates an error.
  */
-static int update_cluster_refcount(BlockDriverState *bs,
+static int update_cluster_refcount(QcowRequest *req,
                                    int64_t cluster_index,
                                    int addend)
 {
+    BlockDriverState *bs = req->bs;
     BDRVQcowState *s = bs->opaque;
     int ret;
 
-    ret = update_refcount(bs, cluster_index << s->cluster_bits, 1, addend);
+    ret = update_refcount(req, cluster_index << s->cluster_bits, 1, addend);
     if (ret < 0) {
         return ret;
     }
 
-    bdrv_flush(bs->file);
+    blkqueue_barrier(&req->bq_context);
 
-    return get_refcount(bs, cluster_index);
+    return get_refcount(req, cluster_index);
 }
 
 
@@ -590,8 +602,9 @@ static int update_cluster_refcount(BlockDriverState *bs,
 
 
 /* return < 0 if error */
-static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size)
+static int64_t alloc_clusters_noref(QcowRequest *req, int64_t size)
 {
+    BlockDriverState *bs = req->bs;
     BDRVQcowState *s = bs->opaque;
     int i, nb_clusters, refcount;
 
@@ -599,7 +612,7 @@ static int64_t alloc_clusters_noref(BlockDriverState *bs, 
int64_t size)
 retry:
     for(i = 0; i < nb_clusters; i++) {
         int64_t next_cluster_index = s->free_cluster_index++;
-        refcount = get_refcount(bs, next_cluster_index);
+        refcount = get_refcount(req, next_cluster_index);
 
         if (refcount < 0) {
             return refcount;
@@ -615,18 +628,19 @@ retry:
     return (s->free_cluster_index - nb_clusters) << s->cluster_bits;
 }
 
-int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size)
+int64_t qcow2_alloc_clusters(QcowRequest *req, int64_t size)
 {
+    BlockDriverState *bs = req->bs;
     int64_t offset;
     int ret;
 
     BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC);
-    offset = alloc_clusters_noref(bs, size);
+    offset = alloc_clusters_noref(req, size);
     if (offset < 0) {
         return offset;
     }
 
-    ret = update_refcount(bs, offset, size, 1);
+    ret = update_refcount(req, offset, size, 1);
     if (ret < 0) {
         return ret;
     }
@@ -636,8 +650,9 @@ int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t 
size)
 
 /* only used to allocate compressed sectors. We try to allocate
    contiguous sectors. size must be <= cluster_size */
-int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
+int64_t qcow2_alloc_bytes(QcowRequest *req, int size)
 {
+    BlockDriverState *bs = req->bs;
     BDRVQcowState *s = bs->opaque;
     int64_t offset, cluster_offset;
     int free_in_cluster;
@@ -645,7 +660,7 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
     BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_BYTES);
     assert(size > 0 && size <= s->cluster_size);
     if (s->free_byte_offset == 0) {
-        s->free_byte_offset = qcow2_alloc_clusters(bs, s->cluster_size);
+        s->free_byte_offset = qcow2_alloc_clusters(req, s->cluster_size);
         if (s->free_byte_offset < 0) {
             return s->free_byte_offset;
         }
@@ -661,9 +676,9 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
         if (free_in_cluster == 0)
             s->free_byte_offset = 0;
         if ((offset & (s->cluster_size - 1)) != 0)
-            update_cluster_refcount(bs, offset >> s->cluster_bits, 1);
+            update_cluster_refcount(req, offset >> s->cluster_bits, 1);
     } else {
-        offset = qcow2_alloc_clusters(bs, s->cluster_size);
+        offset = qcow2_alloc_clusters(req, s->cluster_size);
         if (offset < 0) {
             return offset;
         }
@@ -671,7 +686,7 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
         if ((cluster_offset + s->cluster_size) == offset) {
             /* we are lucky: contiguous data */
             offset = s->free_byte_offset;
-            update_cluster_refcount(bs, offset >> s->cluster_bits, 1);
+            update_cluster_refcount(req, offset >> s->cluster_bits, 1);
             s->free_byte_offset += size;
         } else {
             s->free_byte_offset = offset;
@@ -679,21 +694,28 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
         }
     }
 
-    bdrv_flush(bs->file);
+    blkqueue_barrier(&req->bq_context);
     return offset;
 }
 
-void qcow2_free_clusters(BlockDriverState *bs,
+void qcow2_free_clusters(QcowRequest *req,
                           int64_t offset, int64_t size)
 {
+    BlockDriverState *bs = req->bs;
+    BDRVQcowState *s = bs->opaque;
     int ret;
 
     BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_FREE);
-    ret = update_refcount(bs, offset, size, -1);
+    ret = update_refcount(req, offset, size, -1);
     if (ret < 0) {
         fprintf(stderr, "qcow2_free_clusters failed: %s\n", strerror(-ret));
         /* TODO Remember the clusters to free them later and avoid leaking */
     }
+
+    /* TODO The cluster may be reused as a data cluster, and data doesn't go
+     * through block-queue at the moment. As soon as it does, this flush can be
+     * dropped. */
+    blkqueue_flush(s->bq);
 }
 
 /*
@@ -703,9 +725,10 @@ void qcow2_free_clusters(BlockDriverState *bs,
  *
  */
 
-void qcow2_free_any_clusters(BlockDriverState *bs,
+void qcow2_free_any_clusters(QcowRequest *req,
     uint64_t cluster_offset, int nb_clusters)
 {
+    BlockDriverState *bs = req->bs;
     BDRVQcowState *s = bs->opaque;
 
     /* free the cluster */
@@ -714,13 +737,13 @@ void qcow2_free_any_clusters(BlockDriverState *bs,
         int nb_csectors;
         nb_csectors = ((cluster_offset >> s->csize_shift) &
                        s->csize_mask) + 1;
-        qcow2_free_clusters(bs,
+        qcow2_free_clusters(req,
             (cluster_offset & s->cluster_offset_mask) & ~511,
             nb_csectors * 512);
         return;
     }
 
-    qcow2_free_clusters(bs, cluster_offset, nb_clusters << s->cluster_bits);
+    qcow2_free_clusters(req, cluster_offset, nb_clusters << s->cluster_bits);
 
     return;
 }
@@ -751,15 +774,16 @@ void qcow2_create_refcount_update(QCowCreateState *s, 
int64_t offset,
 }
 
 /* update the refcounts of snapshots and the copied flag */
-int qcow2_update_snapshot_refcount(BlockDriverState *bs,
+int qcow2_update_snapshot_refcount(QcowRequest *req,
     int64_t l1_table_offset, int l1_size, int addend)
 {
+    BlockDriverState *bs = req->bs;
     BDRVQcowState *s = bs->opaque;
     uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, l1_allocated;
     int64_t old_offset, old_l2_offset;
     int l2_size, i, j, l1_modified, l2_modified, nb_csectors, refcount;
 
-    qcow2_l2_cache_reset(bs);
+    qcow2_l2_cache_reset(req);
     cache_refcount_updates = 1;
 
     l2_table = NULL;
@@ -772,8 +796,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
             l1_table = NULL;
         }
         l1_allocated = 1;
-        if (bdrv_pread(bs->file, l1_table_offset,
-                       l1_table, l1_size2) != l1_size2)
+        if (blkqueue_pread(&req->bq_context, l1_table_offset, l1_table, 
l1_size2) < 0)
             goto fail;
         for(i = 0;i < l1_size; i++)
             be64_to_cpus(&l1_table[i]);
@@ -792,7 +815,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
             old_l2_offset = l2_offset;
             l2_offset &= ~QCOW_OFLAG_COPIED;
             l2_modified = 0;
-            if (bdrv_pread(bs->file, l2_offset, l2_table, l2_size) != l2_size)
+            if (blkqueue_pread(&req->bq_context, l2_offset, l2_table, l2_size) 
< 0)
                 goto fail;
             for(j = 0; j < s->l2_size; j++) {
                 offset = be64_to_cpu(l2_table[j]);
@@ -804,7 +827,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
                                        s->csize_mask) + 1;
                         if (addend != 0) {
                             int ret;
-                            ret = update_refcount(bs,
+                            ret = update_refcount(req,
                                 (offset & s->cluster_offset_mask) & ~511,
                                 nb_csectors * 512, addend);
                             if (ret < 0) {
@@ -813,15 +836,15 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
 
                             /* TODO Flushing once for the whole function should
                              * be enough */
-                            bdrv_flush(bs->file);
+                            blkqueue_barrier(&req->bq_context);
                         }
                         /* compressed clusters are never modified */
                         refcount = 2;
                     } else {
                         if (addend != 0) {
-                            refcount = update_cluster_refcount(bs, offset >> 
s->cluster_bits, addend);
+                            refcount = update_cluster_refcount(req, offset >> 
s->cluster_bits, addend);
                         } else {
-                            refcount = get_refcount(bs, offset >> 
s->cluster_bits);
+                            refcount = get_refcount(req, offset >> 
s->cluster_bits);
                         }
 
                         if (refcount < 0) {
@@ -839,15 +862,16 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
                 }
             }
             if (l2_modified) {
-                if (bdrv_pwrite_sync(bs->file,
+                if (blkqueue_pwrite(&req->bq_context,
                                 l2_offset, l2_table, l2_size) < 0)
                     goto fail;
+                blkqueue_barrier(&req->bq_context);
             }
 
             if (addend != 0) {
-                refcount = update_cluster_refcount(bs, l2_offset >> 
s->cluster_bits, addend);
+                refcount = update_cluster_refcount(req, l2_offset >> 
s->cluster_bits, addend);
             } else {
-                refcount = get_refcount(bs, l2_offset >> s->cluster_bits);
+                refcount = get_refcount(req, l2_offset >> s->cluster_bits);
             }
             if (refcount < 0) {
                 goto fail;
@@ -863,9 +887,10 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
     if (l1_modified) {
         for(i = 0; i < l1_size; i++)
             cpu_to_be64s(&l1_table[i]);
-        if (bdrv_pwrite_sync(bs->file, l1_table_offset, l1_table,
+        if (blkqueue_pwrite(&req->bq_context, l1_table_offset, l1_table,
                         l1_size2) < 0)
             goto fail;
+        blkqueue_barrier(&req->bq_context);
         for(i = 0; i < l1_size; i++)
             be64_to_cpus(&l1_table[i]);
     }
@@ -873,14 +898,14 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
         qemu_free(l1_table);
     qemu_free(l2_table);
     cache_refcount_updates = 0;
-    write_refcount_block(bs);
+    write_refcount_block(req);
     return 0;
  fail:
     if (l1_allocated)
         qemu_free(l1_table);
     qemu_free(l2_table);
     cache_refcount_updates = 0;
-    write_refcount_block(bs);
+    write_refcount_block(req);
     return -EIO;
 }
 
@@ -899,12 +924,13 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
  *
  * Modifies the number of errors in res.
  */
-static void inc_refcounts(BlockDriverState *bs,
+static void inc_refcounts(QcowRequest *req,
                           BdrvCheckResult *res,
                           uint16_t *refcount_table,
                           int refcount_table_size,
                           int64_t offset, int64_t size)
 {
+    BlockDriverState *bs = req->bs;
     BDRVQcowState *s = bs->opaque;
     int64_t start, last, cluster_offset;
     int k;
@@ -944,10 +970,11 @@ static void inc_refcounts(BlockDriverState *bs,
  * Returns the number of errors found by the checks or -errno if an internal
  * error occurred.
  */
-static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
+static int check_refcounts_l2(QcowRequest *req, BdrvCheckResult *res,
     uint16_t *refcount_table, int refcount_table_size, int64_t l2_offset,
     int check_copied)
 {
+    BlockDriverState *bs = req->bs;
     BDRVQcowState *s = bs->opaque;
     uint64_t *l2_table, offset;
     int i, l2_size, nb_csectors, refcount;
@@ -956,8 +983,9 @@ static int check_refcounts_l2(BlockDriverState *bs, 
BdrvCheckResult *res,
     l2_size = s->l2_size * sizeof(uint64_t);
     l2_table = qemu_malloc(l2_size);
 
-    if (bdrv_pread(bs->file, l2_offset, l2_table, l2_size) != l2_size)
+    if (blkqueue_pread(&req->bq_context, l2_offset, l2_table, l2_size) < 0) {
         goto fail;
+    }
 
     /* Do the actual checks */
     for(i = 0; i < s->l2_size; i++) {
@@ -977,14 +1005,14 @@ static int check_refcounts_l2(BlockDriverState *bs, 
BdrvCheckResult *res,
                 nb_csectors = ((offset >> s->csize_shift) &
                                s->csize_mask) + 1;
                 offset &= s->cluster_offset_mask;
-                inc_refcounts(bs, res, refcount_table, refcount_table_size,
+                inc_refcounts(req, res, refcount_table, refcount_table_size,
                     offset & ~511, nb_csectors * 512);
             } else {
                 /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
                 if (check_copied) {
                     uint64_t entry = offset;
                     offset &= ~QCOW_OFLAG_COPIED;
-                    refcount = get_refcount(bs, offset >> s->cluster_bits);
+                    refcount = get_refcount(req, offset >> s->cluster_bits);
                     if (refcount < 0) {
                         fprintf(stderr, "Can't get refcount for offset %"
                             PRIx64 ": %s\n", entry, strerror(-refcount));
@@ -999,7 +1027,7 @@ static int check_refcounts_l2(BlockDriverState *bs, 
BdrvCheckResult *res,
 
                 /* Mark cluster as used */
                 offset &= ~QCOW_OFLAG_COPIED;
-                inc_refcounts(bs, res, refcount_table,refcount_table_size,
+                inc_refcounts(req, res, refcount_table,refcount_table_size,
                     offset, s->cluster_size);
 
                 /* Correct offsets are cluster aligned */
@@ -1029,13 +1057,14 @@ fail:
  * Returns the number of errors found by the checks or -errno if an internal
  * error occurred.
  */
-static int check_refcounts_l1(BlockDriverState *bs,
+static int check_refcounts_l1(QcowRequest *req,
                               BdrvCheckResult *res,
                               uint16_t *refcount_table,
                               int refcount_table_size,
                               int64_t l1_table_offset, int l1_size,
                               int check_copied)
 {
+    BlockDriverState *bs = req->bs;
     BDRVQcowState *s = bs->opaque;
     uint64_t *l1_table, l2_offset, l1_size2;
     int i, refcount, ret;
@@ -1043,7 +1072,7 @@ static int check_refcounts_l1(BlockDriverState *bs,
     l1_size2 = l1_size * sizeof(uint64_t);
 
     /* Mark L1 table as used */
-    inc_refcounts(bs, res, refcount_table, refcount_table_size,
+    inc_refcounts(req, res, refcount_table, refcount_table_size,
         l1_table_offset, l1_size2);
 
     /* Read L1 table entries from disk */
@@ -1051,9 +1080,12 @@ static int check_refcounts_l1(BlockDriverState *bs,
         l1_table = NULL;
     } else {
         l1_table = qemu_malloc(l1_size2);
-        if (bdrv_pread(bs->file, l1_table_offset,
-                       l1_table, l1_size2) != l1_size2)
+        ret = blkqueue_pread(&req->bq_context, l1_table_offset, l1_table,
+            l1_size2);
+        if (ret < 0) {
             goto fail;
+        }
+
         for(i = 0;i < l1_size; i++)
             be64_to_cpus(&l1_table[i]);
     }
@@ -1064,7 +1096,7 @@ static int check_refcounts_l1(BlockDriverState *bs,
         if (l2_offset) {
             /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
             if (check_copied) {
-                refcount = get_refcount(bs, (l2_offset & ~QCOW_OFLAG_COPIED)
+                refcount = get_refcount(req, (l2_offset & ~QCOW_OFLAG_COPIED)
                     >> s->cluster_bits);
                 if (refcount < 0) {
                     fprintf(stderr, "Can't get refcount for l2_offset %"
@@ -1080,7 +1112,7 @@ static int check_refcounts_l1(BlockDriverState *bs,
 
             /* Mark L2 table as used */
             l2_offset &= ~QCOW_OFLAG_COPIED;
-            inc_refcounts(bs, res, refcount_table, refcount_table_size,
+            inc_refcounts(req, res, refcount_table, refcount_table_size,
                 l2_offset, s->cluster_size);
 
             /* L2 tables are cluster aligned */
@@ -1091,7 +1123,7 @@ static int check_refcounts_l1(BlockDriverState *bs,
             }
 
             /* Process and check L2 entries */
-            ret = check_refcounts_l2(bs, res, refcount_table,
+            ret = check_refcounts_l2(req, res, refcount_table,
                 refcount_table_size, l2_offset, check_copied);
             if (ret < 0) {
                 goto fail;
@@ -1123,16 +1155,23 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res)
     uint16_t *refcount_table;
     int ret;
 
+    QcowRequest req1 = {
+        .bs = bs,
+    };
+    QcowRequest *req = &req1;
+
+    blkqueue_init_context(&req->bq_context, s->bq);
+
     size = bdrv_getlength(bs->file);
     nb_clusters = size_to_clusters(s, size);
     refcount_table = qemu_mallocz(nb_clusters * sizeof(uint16_t));
 
     /* header */
-    inc_refcounts(bs, res, refcount_table, nb_clusters,
+    inc_refcounts(req, res, refcount_table, nb_clusters,
         0, s->cluster_size);
 
     /* current L1 table */
-    ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters,
+    ret = check_refcounts_l1(req, res, refcount_table, nb_clusters,
                        s->l1_table_offset, s->l1_size, 1);
     if (ret < 0) {
         return ret;
@@ -1141,17 +1180,17 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res)
     /* snapshots */
     for(i = 0; i < s->nb_snapshots; i++) {
         sn = s->snapshots + i;
-        ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters,
+        ret = check_refcounts_l1(req, res, refcount_table, nb_clusters,
             sn->l1_table_offset, sn->l1_size, 0);
         if (ret < 0) {
             return ret;
         }
     }
-    inc_refcounts(bs, res, refcount_table, nb_clusters,
+    inc_refcounts(req, res, refcount_table, nb_clusters,
         s->snapshots_offset, s->snapshots_size);
 
     /* refcount data */
-    inc_refcounts(bs, res, refcount_table, nb_clusters,
+    inc_refcounts(req, res, refcount_table, nb_clusters,
         s->refcount_table_offset,
         s->refcount_table_size * sizeof(uint64_t));
 
@@ -1175,7 +1214,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res)
         }
 
         if (offset != 0) {
-            inc_refcounts(bs, res, refcount_table, nb_clusters,
+            inc_refcounts(req, res, refcount_table, nb_clusters,
                 offset, s->cluster_size);
             if (refcount_table[cluster] != 1) {
                 fprintf(stderr, "ERROR refcount block %d refcount=%d\n",
@@ -1187,7 +1226,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res)
 
     /* compare ref counts */
     for(i = 0; i < nb_clusters; i++) {
-        refcount1 = get_refcount(bs, i);
+        refcount1 = get_refcount(req, i);
         if (refcount1 < 0) {
             fprintf(stderr, "Can't get refcount for cluster %d: %s\n",
                 i, strerror(-refcount1));
diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
index aacf357..dd42220 100644
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -68,6 +68,13 @@ int qcow2_read_snapshots(BlockDriverState *bs)
     int i, id_str_size, name_size;
     int64_t offset;
     uint32_t extra_data_size;
+    int ret;
+
+    QcowRequest req = {
+        .bs = bs,
+    };
+
+    blkqueue_init_context(&req.bq_context, s->bq);
 
     if (!s->nb_snapshots) {
         s->snapshots = NULL;
@@ -79,8 +86,10 @@ int qcow2_read_snapshots(BlockDriverState *bs)
     s->snapshots = qemu_mallocz(s->nb_snapshots * sizeof(QCowSnapshot));
     for(i = 0; i < s->nb_snapshots; i++) {
         offset = align_offset(offset, 8);
-        if (bdrv_pread(bs->file, offset, &h, sizeof(h)) != sizeof(h))
+        ret = blkqueue_pread(&req.bq_context, offset, &h, sizeof(h));
+        if (ret < 0) {
             goto fail;
+        }
         offset += sizeof(h);
         sn = s->snapshots + i;
         sn->l1_table_offset = be64_to_cpu(h.l1_table_offset);
@@ -97,14 +106,18 @@ int qcow2_read_snapshots(BlockDriverState *bs)
         offset += extra_data_size;
 
         sn->id_str = qemu_malloc(id_str_size + 1);
-        if (bdrv_pread(bs->file, offset, sn->id_str, id_str_size) != 
id_str_size)
+        ret = blkqueue_pread(&req.bq_context, offset, sn->id_str, id_str_size);
+        if (ret < 0) {
             goto fail;
+        }
         offset += id_str_size;
         sn->id_str[id_str_size] = '\0';
 
         sn->name = qemu_malloc(name_size + 1);
-        if (bdrv_pread(bs->file, offset, sn->name, name_size) != name_size)
+        ret = blkqueue_pread(&req.bq_context, offset, sn->name, name_size);
+        if (ret < 0) {
             goto fail;
+        }
         offset += name_size;
         sn->name[name_size] = '\0';
     }
@@ -126,6 +139,12 @@ static int qcow_write_snapshots(BlockDriverState *bs)
     uint32_t data32;
     int64_t offset, snapshots_offset;
 
+    QcowRequest req = {
+        .bs = bs,
+    };
+
+    blkqueue_init_context(&req.bq_context, s->bq);
+
     /* compute the size of the snapshots */
     offset = 0;
     for(i = 0; i < s->nb_snapshots; i++) {
@@ -137,7 +156,7 @@ static int qcow_write_snapshots(BlockDriverState *bs)
     }
     snapshots_size = offset;
 
-    snapshots_offset = qcow2_alloc_clusters(bs, snapshots_size);
+    snapshots_offset = qcow2_alloc_clusters(&req, snapshots_size);
     bdrv_flush(bs->file);
     offset = snapshots_offset;
     if (offset < 0) {
@@ -159,29 +178,29 @@ static int qcow_write_snapshots(BlockDriverState *bs)
         h.id_str_size = cpu_to_be16(id_str_size);
         h.name_size = cpu_to_be16(name_size);
         offset = align_offset(offset, 8);
-        if (bdrv_pwrite_sync(bs->file, offset, &h, sizeof(h)) < 0)
+        if (blkqueue_pwrite(&req.bq_context, offset, &h, sizeof(h)) < 0)
             goto fail;
         offset += sizeof(h);
-        if (bdrv_pwrite_sync(bs->file, offset, sn->id_str, id_str_size) < 0)
+        if (blkqueue_pwrite(&req.bq_context, offset, sn->id_str, id_str_size) 
< 0)
             goto fail;
         offset += id_str_size;
-        if (bdrv_pwrite_sync(bs->file, offset, sn->name, name_size) < 0)
+        if (blkqueue_pwrite(&req.bq_context, offset, sn->name, name_size) < 0)
             goto fail;
         offset += name_size;
     }
 
     /* update the various header fields */
     data64 = cpu_to_be64(snapshots_offset);
-    if (bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, snapshots_offset),
+    if (blkqueue_pwrite(&req.bq_context, offsetof(QCowHeader, 
snapshots_offset),
                     &data64, sizeof(data64)) < 0)
         goto fail;
     data32 = cpu_to_be32(s->nb_snapshots);
-    if (bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
+    if (blkqueue_pwrite(&req.bq_context, offsetof(QCowHeader, nb_snapshots),
                     &data32, sizeof(data32)) < 0)
         goto fail;
 
     /* free the old snapshot table */
-    qcow2_free_clusters(bs, s->snapshots_offset, s->snapshots_size);
+    qcow2_free_clusters(&req, s->snapshots_offset, s->snapshots_size);
     s->snapshots_offset = snapshots_offset;
     s->snapshots_size = snapshots_size;
     return 0;
@@ -241,6 +260,12 @@ int qcow2_snapshot_create(BlockDriverState *bs, 
QEMUSnapshotInfo *sn_info)
     uint64_t *l1_table = NULL;
     int64_t l1_table_offset;
 
+    QcowRequest req = {
+        .bs = bs,
+    };
+
+    blkqueue_init_context(&req.bq_context, s->bq);
+
     memset(sn, 0, sizeof(*sn));
 
     if (sn_info->id_str[0] == '\0') {
@@ -263,12 +288,13 @@ int qcow2_snapshot_create(BlockDriverState *bs, 
QEMUSnapshotInfo *sn_info)
     sn->date_nsec = sn_info->date_nsec;
     sn->vm_clock_nsec = sn_info->vm_clock_nsec;
 
-    ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 
1);
+    ret = qcow2_update_snapshot_refcount(&req, s->l1_table_offset,
+        s->l1_size, 1);
     if (ret < 0)
         goto fail;
 
     /* create the L1 table of the snapshot */
-    l1_table_offset = qcow2_alloc_clusters(bs, s->l1_size * sizeof(uint64_t));
+    l1_table_offset = qcow2_alloc_clusters(&req, s->l1_size * 
sizeof(uint64_t));
     if (l1_table_offset < 0) {
         goto fail;
     }
@@ -286,7 +312,7 @@ int qcow2_snapshot_create(BlockDriverState *bs, 
QEMUSnapshotInfo *sn_info)
     for(i = 0; i < s->l1_size; i++) {
         l1_table[i] = cpu_to_be64(s->l1_table[i]);
     }
-    if (bdrv_pwrite_sync(bs->file, sn->l1_table_offset,
+    if (blkqueue_pwrite(&req.bq_context, sn->l1_table_offset,
                     l1_table, s->l1_size * sizeof(uint64_t)) < 0)
         goto fail;
     qemu_free(l1_table);
@@ -318,32 +344,45 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char 
*snapshot_id)
     BDRVQcowState *s = bs->opaque;
     QCowSnapshot *sn;
     int i, snapshot_index, l1_size2;
+    int ret;
+
+    QcowRequest req = {
+        .bs = bs,
+    };
+
+    blkqueue_init_context(&req.bq_context, s->bq);
 
     snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
     if (snapshot_index < 0)
         return -ENOENT;
     sn = &s->snapshots[snapshot_index];
 
-    if (qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, -1) 
< 0)
+    if (qcow2_update_snapshot_refcount(&req, s->l1_table_offset, s->l1_size, 
-1) < 0)
         goto fail;
 
-    if (qcow2_grow_l1_table(bs, sn->l1_size, true) < 0)
+    if (qcow2_grow_l1_table(&req, sn->l1_size, true) < 0)
         goto fail;
 
     s->l1_size = sn->l1_size;
     l1_size2 = s->l1_size * sizeof(uint64_t);
     /* copy the snapshot l1 table to the current l1 table */
-    if (bdrv_pread(bs->file, sn->l1_table_offset,
-                   s->l1_table, l1_size2) != l1_size2)
+    ret = blkqueue_pread(&req.bq_context, sn->l1_table_offset,
+                   s->l1_table, l1_size2);
+    if (ret < 0) {
         goto fail;
-    if (bdrv_pwrite_sync(bs->file, s->l1_table_offset,
-                    s->l1_table, l1_size2) < 0)
+    }
+
+    ret = blkqueue_pwrite(&req.bq_context, s->l1_table_offset,
+                    s->l1_table, l1_size2);
+    if (ret < 0) {
         goto fail;
+    }
+
     for(i = 0;i < s->l1_size; i++) {
         be64_to_cpus(&s->l1_table[i]);
     }
 
-    if (qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1) 
< 0)
+    if (qcow2_update_snapshot_refcount(&req, s->l1_table_offset, s->l1_size, 
1) < 0)
         goto fail;
 
 #ifdef DEBUG_ALLOC
@@ -360,19 +399,28 @@ int qcow2_snapshot_delete(BlockDriverState *bs, const 
char *snapshot_id)
     QCowSnapshot *sn;
     int snapshot_index, ret;
 
+    QcowRequest req = {
+        .bs = bs,
+    };
+
+    blkqueue_init_context(&req.bq_context, s->bq);
+
     snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
     if (snapshot_index < 0)
         return -ENOENT;
     sn = &s->snapshots[snapshot_index];
 
-    ret = qcow2_update_snapshot_refcount(bs, sn->l1_table_offset, sn->l1_size, 
-1);
+    ret = qcow2_update_snapshot_refcount(&req, sn->l1_table_offset,
+        sn->l1_size, -1);
     if (ret < 0)
         return ret;
     /* must update the copied flag on the current cluster offsets */
-    ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 
0);
+    ret = qcow2_update_snapshot_refcount(&req, s->l1_table_offset,
+        s->l1_size, 0);
     if (ret < 0)
         return ret;
-    qcow2_free_clusters(bs, sn->l1_table_offset, sn->l1_size * 
sizeof(uint64_t));
+    qcow2_free_clusters(&req, sn->l1_table_offset,
+        sn->l1_size * sizeof(uint64_t));
 
     qemu_free(sn->id_str);
     qemu_free(sn->name);
@@ -423,6 +471,13 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs, const 
char *snapshot_name)
     int i, snapshot_index, l1_size2;
     BDRVQcowState *s = bs->opaque;
     QCowSnapshot *sn;
+    int ret;
+
+    QcowRequest req = {
+        .bs = bs,
+    };
+
+    blkqueue_init_context(&req.bq_context, s->bq);
 
     snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_name);
     if (snapshot_index < 0) {
@@ -439,8 +494,9 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs, const 
char *snapshot_name)
     s->l1_table_offset = sn->l1_table_offset;
     s->l1_table = qemu_mallocz(align_offset(l1_size2, 512));
 
-    if (bdrv_pread(bs->file, sn->l1_table_offset,
-                   s->l1_table, l1_size2) != l1_size2) {
+    ret = blkqueue_pread(&req.bq_context, sn->l1_table_offset,
+                   s->l1_table, l1_size2);
+    if (ret < 0) {
         return -1;
     }
 
diff --git a/block/qcow2.c b/block/qcow2.c
index 537c479..e445913 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -136,6 +136,20 @@ static int qcow_read_extensions(BlockDriverState *bs, 
uint64_t start_offset,
     return 0;
 }
 
+static bool qcow_blkqueue_error_cb(void *opaque, int ret)
+{
+    BlockDriverState *bs = opaque;
+    BlockErrorAction action = bdrv_get_on_error(bs, 0);
+
+    if ((action == BLOCK_ERR_STOP_ENOSPC && ret == -ENOSPC)
+        || action == BLOCK_ERR_STOP_ANY)
+    {
+        bdrv_mon_event(bs, BDRV_ACTION_STOP, 0);
+        return vm_stop(0);
+    }
+
+    return false;
+}
 
 static int qcow_open(BlockDriverState *bs, int flags)
 {
@@ -234,6 +248,11 @@ static int qcow_open(BlockDriverState *bs, int flags)
             goto fail;
         bs->backing_file[len] = '\0';
     }
+
+    /* Block queue */
+    s->bq = blkqueue_create(bs->file, qcow_blkqueue_error_cb, bs);
+
+    /* Snapshots */
     if (qcow2_read_snapshots(bs) < 0)
         goto fail;
 
@@ -242,7 +261,11 @@ static int qcow_open(BlockDriverState *bs, int flags)
 #endif
     return 0;
 
- fail:
+fail:
+    if (s->bq) {
+        blkqueue_destroy(s->bq);
+    }
+
     qcow2_free_snapshots(bs);
     qcow2_refcount_close(bs);
     qemu_free(s->l1_table);
@@ -297,13 +320,20 @@ static int qcow_set_key(BlockDriverState *bs, const char 
*key)
 static int qcow_is_allocated(BlockDriverState *bs, int64_t sector_num,
                              int nb_sectors, int *pnum)
 {
+    BDRVQcowState *s = bs->opaque;
     uint64_t cluster_offset;
     int ret;
+    QcowRequest req = {
+        .bs = bs,
+    };
+
+    blkqueue_init_context(&req.bq_context, s->bq);
 
     *pnum = nb_sectors;
     /* FIXME We can get errors here, but the bdrv_is_allocated interface can't
      * pass them on today */
-    ret = qcow2_get_cluster_offset(bs, sector_num << 9, pnum, &cluster_offset);
+    ret = qcow2_get_cluster_offset(&req, sector_num << 9, pnum,
+        &cluster_offset);
     if (ret < 0) {
         *pnum = 0;
     }
@@ -341,6 +371,7 @@ typedef struct QCowAIOCB {
     QEMUIOVector hd_qiov;
     QEMUBH *bh;
     QCowL2Meta l2meta;
+    QcowRequest req;
     QLIST_ENTRY(QCowAIOCB) next_depend;
 } QCowAIOCB;
 
@@ -425,7 +456,7 @@ static void qcow_aio_read_cb(void *opaque, int ret)
             QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
     }
 
-    ret = qcow2_get_cluster_offset(bs, acb->sector_num << 9,
+    ret = qcow2_get_cluster_offset(&acb->req, acb->sector_num << 9,
         &acb->cur_nr_sectors, &acb->cluster_offset);
     if (ret < 0) {
         goto done;
@@ -464,7 +495,7 @@ static void qcow_aio_read_cb(void *opaque, int ret)
         }
     } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
         /* add AIO support for compressed blocks ? */
-        if (qcow2_decompress_cluster(bs, acb->cluster_offset) < 0)
+        if (qcow2_decompress_cluster(&acb->req, acb->cluster_offset) < 0)
             goto done;
 
         qemu_iovec_from_buffer(&acb->hd_qiov,
@@ -519,6 +550,7 @@ static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
         BlockDriverCompletionFunc *cb, void *opaque, int is_write)
 {
+    BDRVQcowState *s = bs->opaque;
     QCowAIOCB *acb;
 
     acb = qemu_aio_get(&qcow_aio_pool, bs, cb, opaque);
@@ -536,6 +568,10 @@ static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
     acb->cluster_offset = 0;
     acb->l2meta.nb_clusters = 0;
     QLIST_INIT(&acb->l2meta.dependent_requests);
+
+    acb->req.bs = bs;
+    blkqueue_init_context(&acb->req.bq_context, s->bq);
+
     return acb;
 }
 
@@ -585,7 +621,7 @@ static void qcow_aio_write_cb(void *opaque, int ret)
     acb->hd_aiocb = NULL;
 
     if (ret >= 0) {
-        ret = qcow2_alloc_cluster_link_l2(bs, &acb->l2meta);
+        ret = qcow2_alloc_cluster_link_l2(&acb->req, &acb->l2meta);
     }
 
     run_dependent_requests(&acb->l2meta);
@@ -609,7 +645,7 @@ static void qcow_aio_write_cb(void *opaque, int ret)
         n_end > QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors)
         n_end = QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors;
 
-    ret = qcow2_alloc_cluster_offset(bs, acb->sector_num << 9,
+    ret = qcow2_alloc_cluster_offset(&acb->req, acb->sector_num << 9,
         index_in_cluster, n_end, &acb->cur_nr_sectors, &acb->l2meta);
     if (ret < 0) {
         goto done;
@@ -689,6 +725,9 @@ static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState 
*bs,
 static void qcow_close(BlockDriverState *bs)
 {
     BDRVQcowState *s = bs->opaque;
+
+    blkqueue_destroy(s->bq);
+
     qemu_free(s->l1_table);
     qemu_free(s->l2_cache);
     qemu_free(s->cluster_cache);
@@ -797,11 +836,17 @@ static int qcow2_change_backing_file(BlockDriverState *bs,
 
 static int preallocate(BlockDriverState *bs)
 {
+    BDRVQcowState *s = bs->opaque;
     uint64_t nb_sectors;
     uint64_t offset;
     int num;
     int ret;
     QCowL2Meta meta;
+    QcowRequest req = {
+        .bs = bs,
+    };
+
+    blkqueue_init_context(&req.bq_context, s->bq);
 
     nb_sectors = bdrv_getlength(bs) >> 9;
     offset = 0;
@@ -810,14 +855,15 @@ static int preallocate(BlockDriverState *bs)
 
     while (nb_sectors) {
         num = MIN(nb_sectors, INT_MAX >> 9);
-        ret = qcow2_alloc_cluster_offset(bs, offset, 0, num, &num, &meta);
+        ret = qcow2_alloc_cluster_offset(&req, offset, 0, num, &num, &meta);
         if (ret < 0) {
             return ret;
         }
 
-        ret = qcow2_alloc_cluster_link_l2(bs, &meta);
+        ret = qcow2_alloc_cluster_link_l2(&req, &meta);
         if (ret < 0) {
-            qcow2_free_any_clusters(bs, meta.cluster_offset, meta.nb_clusters);
+            qcow2_free_any_clusters(&req, meta.cluster_offset,
+                meta.nb_clusters);
             return ret;
         }
 
@@ -931,13 +977,20 @@ static int qcow_create2(const char *filename, int64_t 
total_size,
      * table)
      */
     BlockDriver* drv = bdrv_find_format("qcow2");
+    QcowRequest req;
+    BDRVQcowState *s;
+
     assert(drv != NULL);
     ret = bdrv_open(bs, filename, BDRV_O_RDWR | BDRV_O_NO_FLUSH, drv);
     if (ret < 0) {
         goto out;
     }
 
-    ret = qcow2_alloc_clusters(bs, 2 * cluster_size);
+    s = bs->opaque;
+    req.bs = bs;
+    blkqueue_init_context(&req.bq_context, s->bq);
+
+    ret = qcow2_alloc_clusters(&req, 2 * cluster_size);
     if (ret < 0) {
         goto out;
 
@@ -1045,6 +1098,11 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t 
offset)
 {
     BDRVQcowState *s = bs->opaque;
     int ret, new_l1_size;
+    QcowRequest req = {
+        .bs = bs,
+    };
+
+    blkqueue_init_context(&req.bq_context, s->bq);
 
     if (offset & 511) {
         return -EINVAL;
@@ -1061,19 +1119,21 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t 
offset)
     }
 
     new_l1_size = size_to_l1(s, offset);
-    ret = qcow2_grow_l1_table(bs, new_l1_size, true);
+    ret = qcow2_grow_l1_table(&req, new_l1_size, true);
     if (ret < 0) {
         return ret;
     }
 
     /* write updated header.size */
     offset = cpu_to_be64(offset);
-    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size),
+    ret = blkqueue_pwrite(&req.bq_context, offsetof(QCowHeader, size),
                            &offset, sizeof(uint64_t));
     if (ret < 0) {
         return ret;
     }
 
+    blkqueue_barrier(&req.bq_context);
+
     s->l1_vm_state_index = new_l1_size;
     return 0;
 }
@@ -1088,6 +1148,11 @@ static int qcow_write_compressed(BlockDriverState *bs, 
int64_t sector_num,
     int ret, out_len;
     uint8_t *out_buf;
     uint64_t cluster_offset;
+    QcowRequest req = {
+        .bs = bs,
+    };
+
+    blkqueue_init_context(&req.bq_context, s->bq);
 
     if (nb_sectors == 0) {
         /* align end of file to a sector boundary to ease reading with
@@ -1132,7 +1197,7 @@ static int qcow_write_compressed(BlockDriverState *bs, 
int64_t sector_num,
         /* could not compress: write normal cluster */
         bdrv_write(bs, sector_num, buf, s->cluster_sectors);
     } else {
-        cluster_offset = qcow2_alloc_compressed_cluster_offset(bs,
+        cluster_offset = qcow2_alloc_compressed_cluster_offset(&req,
             sector_num << 9, out_len);
         if (!cluster_offset)
             return -1;
@@ -1150,13 +1215,64 @@ static int qcow_write_compressed(BlockDriverState *bs, 
int64_t sector_num,
 
 static int qcow_flush(BlockDriverState *bs)
 {
+    BDRVQcowState *s = bs->opaque;
+    int ret;
+
+    ret = blkqueue_flush(s->bq);
+    if (ret < 0) {
+        /*
+         * If the queue is empty, we couldn't handle the write error by
+         * stopping the guest. In this case we don't know which metadata writes
+         * have succeeded. Reopen the qcow2 layer to make sure that all caches
+         * are invalidated.
+         */
+        if (blkqueue_is_empty(s->bq)) {
+            qcow_close(bs);
+            qcow_open(bs, 0);
+        }
+
+        return ret;
+    }
+
     return bdrv_flush(bs->file);
 }
 
+typedef struct QcowFlushAIOCB {
+    BlockDriverState *bs;
+    BlockDriverCompletionFunc *cb;
+    void *opaque;
+} QcowFlushAIOCB;
+
+static void qcow_aio_flush_cb(void *opaque, int ret)
+{
+    QcowFlushAIOCB *acb = opaque;
+    BlockDriverState *bs = acb->bs;
+    BDRVQcowState *s = bs->opaque;
+
+    if (ret < 0 && blkqueue_is_empty(s->bq)) {
+        qcow_close(bs);
+        qcow_open(bs, 0);
+    }
+
+    acb->cb(acb->opaque, ret);
+    qemu_free(acb);
+}
+
 static BlockDriverAIOCB *qcow_aio_flush(BlockDriverState *bs,
          BlockDriverCompletionFunc *cb, void *opaque)
 {
-    return bdrv_aio_flush(bs->file, cb, opaque);
+    BDRVQcowState *s = bs->opaque;
+    BlockQueueContext context;
+    QcowFlushAIOCB *acb;
+
+    blkqueue_init_context(&context, s->bq);
+
+    acb = qemu_malloc(sizeof(*acb));
+    acb->bs = bs;
+    acb->cb = cb;
+    acb->opaque = opaque;
+
+    return blkqueue_aio_flush(&context, qcow_aio_flush_cb, acb);
 }
 
 static int64_t qcow_vm_state_offset(BDRVQcowState *s)
diff --git a/block/qcow2.h b/block/qcow2.h
index 5217bea..589767c 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -26,6 +26,7 @@
 #define BLOCK_QCOW2_H
 
 #include "aes.h"
+#include "block-queue.h"
 
 //#define DEBUG_ALLOC
 //#define DEBUG_ALLOC2
@@ -107,6 +108,8 @@ typedef struct BDRVQcowState {
     int64_t free_cluster_index;
     int64_t free_byte_offset;
 
+    BlockQueue *bq;
+
     uint32_t crypt_method; /* current crypt method, 0 if no key yet */
     uint32_t crypt_method_header;
     AES_KEY aes_encrypt_key;
@@ -144,6 +147,11 @@ typedef struct QCowL2Meta
     QLIST_ENTRY(QCowL2Meta) next_in_flight;
 } QCowL2Meta;
 
+typedef struct QcowRequest {
+    BlockDriverState*   bs;
+    BlockQueueContext   bq_context;
+} QcowRequest;
+
 static inline int size_to_clusters(BDRVQcowState *s, int64_t size)
 {
     return (size + (s->cluster_size - 1)) >> s->cluster_bits;
@@ -172,38 +180,37 @@ int qcow2_backing_read1(BlockDriverState *bs, 
QEMUIOVector *qiov,
 int qcow2_refcount_init(BlockDriverState *bs);
 void qcow2_refcount_close(BlockDriverState *bs);
 
-int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size);
-int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size);
-void qcow2_free_clusters(BlockDriverState *bs,
-    int64_t offset, int64_t size);
-void qcow2_free_any_clusters(BlockDriverState *bs,
+int64_t qcow2_alloc_clusters(QcowRequest *req, int64_t size);
+int64_t qcow2_alloc_bytes(QcowRequest *req, int size);
+void qcow2_free_clusters(QcowRequest *req, int64_t offset, int64_t size);
+void qcow2_free_any_clusters(QcowRequest *req,
     uint64_t cluster_offset, int nb_clusters);
 
 void qcow2_create_refcount_update(QCowCreateState *s, int64_t offset,
     int64_t size);
-int qcow2_update_snapshot_refcount(BlockDriverState *bs,
+int qcow2_update_snapshot_refcount(QcowRequest *req,
     int64_t l1_table_offset, int l1_size, int addend);
 
 int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res);
 
 /* qcow2-cluster.c functions */
-int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, bool exact_size);
-void qcow2_l2_cache_reset(BlockDriverState *bs);
-int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
+int qcow2_grow_l1_table(QcowRequest *req, int min_size, bool exact_size);
+void qcow2_l2_cache_reset(QcowRequest *req);
+int qcow2_decompress_cluster(QcowRequest *req, uint64_t cluster_offset);
 void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
                      uint8_t *out_buf, const uint8_t *in_buf,
                      int nb_sectors, int enc,
                      const AES_KEY *key);
 
-int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
+int qcow2_get_cluster_offset(QcowRequest *req, uint64_t offset,
     int *num, uint64_t *cluster_offset);
-int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
+int qcow2_alloc_cluster_offset(QcowRequest *req, uint64_t offset,
     int n_start, int n_end, int *num, QCowL2Meta *m);
-uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
+uint64_t qcow2_alloc_compressed_cluster_offset(QcowRequest *req,
                                          uint64_t offset,
                                          int compressed_size);
 
-int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m);
+int qcow2_alloc_cluster_link_l2(QcowRequest *req, QCowL2Meta *m);
 
 /* qcow2-snapshot.c functions */
 int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info);
-- 
1.7.2.3




reply via email to

[Prev in Thread] Current Thread [Next in Thread]