[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v3 6/6] block/qcow2: use seqcache for compressed writes
From: |
Vladimir Sementsov-Ogievskiy |
Subject: |
[PATCH v3 6/6] block/qcow2: use seqcache for compressed writes |
Date: |
Fri, 5 Mar 2021 20:35:07 +0300 |
Compressed writes are unaligned to 512, which works very slow in
O_DIRECT mode. Let's use the cache.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
---
block/coroutines.h | 3 +
block/qcow2.h | 4 ++
block/qcow2-refcount.c | 10 +++
block/qcow2.c | 158 ++++++++++++++++++++++++++++++++++++++---
4 files changed, 164 insertions(+), 11 deletions(-)
diff --git a/block/coroutines.h b/block/coroutines.h
index 4cfb4946e6..cb8e05830e 100644
--- a/block/coroutines.h
+++ b/block/coroutines.h
@@ -66,4 +66,7 @@ int coroutine_fn bdrv_co_readv_vmstate(BlockDriverState *bs,
int coroutine_fn bdrv_co_writev_vmstate(BlockDriverState *bs,
QEMUIOVector *qiov, int64_t pos);
+int coroutine_fn qcow2_co_flush_compressed_cache(BlockDriverState *bs);
+int generated_co_wrapper qcow2_flush_compressed_cache(BlockDriverState *bs);
+
#endif /* BLOCK_COROUTINES_INT_H */
diff --git a/block/qcow2.h b/block/qcow2.h
index e18d8dfbae..8b8fed0950 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -28,6 +28,7 @@
#include "crypto/block.h"
#include "qemu/coroutine.h"
#include "qemu/units.h"
+#include "qemu/seqcache.h"
#include "block/block_int.h"
//#define DEBUG_ALLOC
@@ -422,6 +423,9 @@ typedef struct BDRVQcow2State {
Qcow2CompressionType compression_type;
GHashTable *inflight_writes_counters;
+
+ SeqCache *compressed_cache;
+ int compressed_flush_ret;
} BDRVQcow2State;
typedef struct Qcow2COWRegion {
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 464d133368..218917308e 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -29,6 +29,7 @@
#include "qemu/bswap.h"
#include "qemu/cutils.h"
#include "trace.h"
+#include "block/coroutines.h"
static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size,
uint64_t max);
@@ -1040,6 +1041,10 @@ static int QEMU_WARN_UNUSED_RESULT
update_refcount(BlockDriverState *bs,
qcow2_cache_discard(s->l2_table_cache, table);
}
+ if (s->compressed_cache) {
+ seqcache_discard_cluster(s->compressed_cache, cluster_offset);
+ }
+
if (s->discard_passthrough[type]) {
update_refcount_discard(bs, cluster_offset, s->cluster_size);
}
@@ -1349,6 +1354,11 @@ int coroutine_fn qcow2_write_caches(BlockDriverState *bs)
BDRVQcow2State *s = bs->opaque;
int ret;
+ ret = qcow2_flush_compressed_cache(bs);
+ if (ret < 0) {
+ return ret;
+ }
+
ret = qcow2_cache_write(bs, s->l2_table_cache);
if (ret < 0) {
return ret;
diff --git a/block/qcow2.c b/block/qcow2.c
index 6ee94421e0..5f3713cd6f 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -42,6 +42,7 @@
#include "qapi/qapi-visit-block-core.h"
#include "crypto.h"
#include "block/aio_task.h"
+#include "block/coroutines.h"
/*
Differences with QCOW:
@@ -1834,6 +1835,10 @@ static int coroutine_fn qcow2_do_open(BlockDriverState
*bs, QDict *options,
s->inflight_writes_counters =
g_hash_table_new_full(g_int64_hash, g_int64_equal, g_free, g_free);
+ if (!has_data_file(bs) && (bs->file->bs->open_flags & BDRV_O_NOCACHE)) {
+ s->compressed_cache = seqcache_new(s->cluster_size);
+ }
+
return ret;
fail:
@@ -2653,6 +2658,91 @@ fail_nometa:
return ret;
}
+/*
+ * Flush one cluster of compressed cache if exists. If @unfinished is true and
+ * there is no finished cluster to flush, flush the unfinished one. Otherwise
+ * flush only finished clusters.
+ *
+ * Return 0 if nothing to flush, 1 if one cluster successfully flushed and <0
on
+ * error.
+ */
+static int coroutine_fn
+qcow2_co_compressed_flush_one(BlockDriverState *bs, bool unfinished)
+{
+ BDRVQcow2State *s = bs->opaque;
+ int ret;
+ int64_t align = 1;
+ int64_t offset, bytes;
+ uint8_t *buf;
+
+ if (!s->compressed_cache) {
+ return 0;
+ }
+
+ if (!seqcache_get_next_flush(s->compressed_cache, &offset, &bytes, &buf,
+ &unfinished))
+ {
+ return 0;
+ }
+
+ qcow2_inflight_writes_inc(bs, offset, bytes);
+
+ /*
+ * Don't try to align-up unfinished cached cluster, parallel write to it is
+ * possible! For finished host clusters data in the tail of the cluster
will
+ * be never used. So, take some good alignment for speed.
+ *
+ * Note also, that seqcache guarantees that allocated size of @buf is
enough
+ * to fill the cluster up to the end, so we are safe to align up with
+ * align <= cluster_size.
+ */
+ if (!unfinished) {
+ align = MIN(s->cluster_size,
+ MAX(s->data_file->bs->bl.request_alignment,
+ bdrv_opt_mem_align(bs)));
+ }
+
+ BLKDBG_EVENT(s->data_file, BLKDBG_WRITE_COMPRESSED);
+ ret = bdrv_co_pwrite(s->data_file, offset,
+ QEMU_ALIGN_UP(offset + bytes, align) - offset,
+ buf, 0);
+ if (ret < 0 && s->compressed_flush_ret == 0) {
+ /*
+ * The data that was "written" earlier is lost. We don't want to
+ * care with storing it somehow to retry flushing later. Also, how much
+ * data will we have to store, if not drop it after failed flush?
+ * After this point qcow2_co_flush_compressed_cache() (and
+ * qcow2_flush_compressed_cache()) never return success.
+ */
+ s->compressed_flush_ret = ret;
+ }
+
+ seqcache_discard_cluster(s->compressed_cache, offset);
+
+ qcow2_inflight_writes_dec(bs, offset, bytes);
+
+ return ret < 0 ? ret : 1;
+}
+
+int coroutine_fn qcow2_co_flush_compressed_cache(BlockDriverState *bs)
+{
+ BDRVQcow2State *s = bs->opaque;
+
+ if (s->compressed_cache) {
+ uint64_t nb_clusters = seqcache_nb_clusters(s->compressed_cache);
+
+ /*
+ * If parallel writes are possible we don't want to loop forever. So
+ * flush only clusters available at start of flush operation.
+ */
+ while (nb_clusters--) {
+ qcow2_co_compressed_flush_one(bs, true);
+ }
+ }
+
+ return s->compressed_flush_ret;
+}
+
static int qcow2_inactivate(BlockDriverState *bs)
{
BDRVQcow2State *s = bs->opaque;
@@ -2667,6 +2757,13 @@ static int qcow2_inactivate(BlockDriverState *bs)
bdrv_get_device_or_node_name(bs));
}
+ ret = qcow2_flush_compressed_cache(bs);
+ if (ret) {
+ result = ret;
+ error_report("Failed to flush the compressed write cache: %s",
+ strerror(-ret));
+ }
+
ret = qcow2_cache_flush(bs, s->l2_table_cache);
if (ret) {
result = ret;
@@ -2699,6 +2796,12 @@ static void qcow2_close(BlockDriverState *bs)
qcow2_inactivate(bs);
}
+ /*
+ * Cache should be flushed in qcow2_inactivate() and should be empty in
+ * inactive mode. So we are safe to free it.
+ */
+ seqcache_free(s->compressed_cache);
+
cache_clean_timer_del(bs);
qcow2_cache_destroy(s->l2_table_cache);
qcow2_cache_destroy(s->refcount_block_cache);
@@ -4558,18 +4661,42 @@ qcow2_co_pwritev_compressed_task(BlockDriverState *bs,
goto fail;
}
- qcow2_inflight_writes_inc(bs, cluster_offset, out_len);
+ if (s->compressed_cache) {
+ /*
+ * It's important to do seqcache_write() in the same critical section
+ * (by s->lock) as qcow2_alloc_compressed_cluster_offset(), so that the
+ * cache is filled sequentially.
+ */
+ seqcache_write(s->compressed_cache, cluster_offset, out_len, out_buf);
- qemu_co_mutex_unlock(&s->lock);
+ qemu_co_mutex_unlock(&s->lock);
- BLKDBG_EVENT(s->data_file, BLKDBG_WRITE_COMPRESSED);
- ret = bdrv_co_pwrite(s->data_file, cluster_offset, out_len, out_buf, 0);
+ ret = qcow2_co_compressed_flush_one(bs, false);
+ if (ret < 0) {
+ /*
+ * if ret < 0 it probably not this request which failed, but some
+ * previous one that was cached. Moreover, when we write to the
+ * cache we probably may always report success, because
+ * seqcache_write() never fails. Still, it's better to inform
+ * compressed backup now then wait until final flush.
+ */
+ goto fail;
+ }
+ } else {
+ qcow2_inflight_writes_inc(bs, cluster_offset, out_len);
- qcow2_inflight_writes_dec(bs, cluster_offset, out_len);
+ qemu_co_mutex_unlock(&s->lock);
- if (ret < 0) {
- goto fail;
+ BLKDBG_EVENT(s->data_file, BLKDBG_WRITE_COMPRESSED);
+ ret = bdrv_co_pwrite(s->data_file, cluster_offset, out_len, out_buf,
0);
+
+ qcow2_inflight_writes_dec(bs, cluster_offset, out_len);
+
+ if (ret < 0) {
+ goto fail;
+ }
}
+
success:
ret = 0;
fail:
@@ -4681,10 +4808,19 @@ qcow2_co_preadv_compressed(BlockDriverState *bs,
out_buf = qemu_blockalign(bs, s->cluster_size);
- BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
- ret = bdrv_co_pread(bs->file, coffset, csize, buf, 0);
- if (ret < 0) {
- goto fail;
+ /*
+ * seqcache_read may return less bytes than csize, as csize may exceed
+ * actual compressed data size. So we are OK if seqcache_read returns
+ * something > 0.
+ */
+ if (!s->compressed_cache ||
+ seqcache_read(s->compressed_cache, coffset, csize, buf) <= 0)
+ {
+ BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
+ ret = bdrv_co_pread(bs->file, coffset, csize, buf, 0);
+ if (ret < 0) {
+ goto fail;
+ }
}
if (qcow2_co_decompress(bs, out_buf, s->cluster_size, buf, csize) < 0) {
--
2.29.2
- [PATCH v3 0/6] qcow2: compressed write cache, Vladimir Sementsov-Ogievskiy, 2021/03/05
- [PATCH v3 1/6] block-jobs: flush target at the end of .run(), Vladimir Sementsov-Ogievskiy, 2021/03/05
- [PATCH v3 6/6] block/qcow2: use seqcache for compressed writes,
Vladimir Sementsov-Ogievskiy <=
- Re: [PATCH v3 6/6] block/qcow2: use seqcache for compressed writes, Max Reitz, 2021/03/12
- Re: [PATCH v3 6/6] block/qcow2: use seqcache for compressed writes, Vladimir Sementsov-Ogievskiy, 2021/03/12
- Re: [PATCH v3 6/6] block/qcow2: use seqcache for compressed writes, Max Reitz, 2021/03/15
- Re: [PATCH v3 6/6] block/qcow2: use seqcache for compressed writes, Vladimir Sementsov-Ogievskiy, 2021/03/15
- Re: [PATCH v3 6/6] block/qcow2: use seqcache for compressed writes, Max Reitz, 2021/03/16
- Re: [PATCH v3 6/6] block/qcow2: use seqcache for compressed writes, Vladimir Sementsov-Ogievskiy, 2021/03/16
- Re: [PATCH v3 6/6] block/qcow2: use seqcache for compressed writes, Max Reitz, 2021/03/17
- Re: [PATCH v3 6/6] block/qcow2: use seqcache for compressed writes, Vladimir Sementsov-Ogievskiy, 2021/03/12
- Re: [PATCH v3 6/6] block/qcow2: use seqcache for compressed writes, Vladimir Sementsov-Ogievskiy, 2021/03/29
[PATCH v3 2/6] iotests: add qcow2-discard-during-rewrite, Vladimir Sementsov-Ogievskiy, 2021/03/05