qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v5 4/6] qcow2: introduce inflight-write-counters


From: Vladimir Sementsov-Ogievskiy
Subject: [PATCH v5 4/6] qcow2: introduce inflight-write-counters
Date: Fri, 26 Mar 2021 23:00:43 +0300

We have a bug in qcow2: assume we've started data write into host
cluster A. s->lock is unlocked. During the write the refcount of
cluster A may become zero, cluster may be reallocated for other needs,
and our in-flight write become a use-after-free. More details will be
in the further commit which actually fixes the bug.

For now, let's prepare infrastructure for the following fix. We are
going to track these in-flight data writes. So, we create a hash map

  cluster_index -> Qcow2InFlightRefcount

For now, add only basic structure and simple counting logic. No guest
write is actually counted, we only add infrastructure.
Qcow2InFlightRefcount will be expanded in the following commit, that's
why we need a structure.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
---
 block/qcow2.h          | 16 ++++++++
 block/qcow2-refcount.c | 86 ++++++++++++++++++++++++++++++++++++++++++
 block/qcow2.c          |  5 +++
 3 files changed, 107 insertions(+)

diff --git a/block/qcow2.h b/block/qcow2.h
index 0fe5f74ed3..b25ef06111 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -420,6 +420,17 @@ typedef struct BDRVQcow2State {
      * is to convert the image with the desired compression type set.
      */
     Qcow2CompressionType compression_type;
+
+    /*
+     * inflight_writes_counters:
+     *   Map cluster index (int64_t) -> Qcow2InFlightWriteCounter
+     *
+     * The map contains entries only for clusters that have in-flight data
+     * (not-metadata) writes. So Qcow2InFlightWriteCounter::inflight_writes_cnt
+     * is always (except for when being removed in update_inflight_write_cnt())
+     * >= 1 for stored elements.
+     */
+    GHashTable *inflight_writes_counters;
 } BDRVQcow2State;
 
 typedef struct Qcow2COWRegion {
@@ -896,6 +907,11 @@ int qcow2_shrink_reftable(BlockDriverState *bs);
 int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size);
 int qcow2_detect_metadata_preallocation(BlockDriverState *bs);
 
+void qcow2_inflight_writes_inc(BlockDriverState *bs, int64_t offset,
+                               int64_t length);
+void qcow2_inflight_writes_dec(BlockDriverState *bs, int64_t offset,
+                               int64_t length);
+
 /* qcow2-cluster.c functions */
 int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
                         bool exact_size);
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 1369724b41..eedc83ea4a 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -799,6 +799,92 @@ found:
     }
 }
 
+typedef struct Qcow2InFlightWriteCounter {
+    /*
+     * Number of in-flight writes to the cluster, always > 0, as when it 
becomes
+     * 0 the entry is removed from s->inflight_writes_counters.
+     */
+    uint64_t inflight_writes_cnt;
+} Qcow2InFlightWriteCounter;
+
+/* Find Qcow2InFlightWriteCounter corresponding to @cluster_index */
+static Qcow2InFlightWriteCounter *find_infl_wr(BDRVQcow2State *s,
+                                               int64_t cluster_index)
+{
+    Qcow2InFlightWriteCounter *infl;
+
+    if (!s->inflight_writes_counters) {
+        return NULL;
+    }
+
+    infl = g_hash_table_lookup(s->inflight_writes_counters, &cluster_index);
+
+    if (infl) {
+        assert(infl->inflight_writes_cnt > 0);
+    }
+
+    return infl;
+}
+
+/*
+ * The function is intended to be called with decrease=false before writing
+ * guest data and with decrease=true after write finish.
+ */
+static void coroutine_fn
+update_inflight_write_cnt(BlockDriverState *bs, int64_t offset, int64_t length,
+                          bool decrease)
+{
+    BDRVQcow2State *s = bs->opaque;
+    int64_t start, last, cluster_index;
+
+    start = start_of_cluster(s, offset) >> s->cluster_bits;
+    last = start_of_cluster(s, offset + length - 1) >> s->cluster_bits;
+    for (cluster_index = start; cluster_index <= last; cluster_index++) {
+        Qcow2InFlightWriteCounter *infl = find_infl_wr(s, cluster_index);
+
+        if (!decrease) {
+            if (!infl) {
+                infl = g_new0(Qcow2InFlightWriteCounter, 1);
+                g_hash_table_insert(s->inflight_writes_counters,
+                                    g_memdup(&cluster_index,
+                                             sizeof(cluster_index)), infl);
+            }
+            infl->inflight_writes_cnt++;
+            continue;
+        }
+
+        /* decrease */
+        assert(infl);
+        assert(infl->inflight_writes_cnt >= 1);
+
+        infl->inflight_writes_cnt--;
+
+        if (infl->inflight_writes_cnt == 0) {
+            g_hash_table_remove(s->inflight_writes_counters, &cluster_index);
+        }
+    }
+}
+
+/*
+ * Works both with locked and unlocked s->lock. It just doesn't touch s->lock 
in
+ * contrast to qcow2_inflight_writes_dec()
+ */
+void qcow2_inflight_writes_inc(BlockDriverState *bs, int64_t offset,
+                               int64_t length)
+{
+    update_inflight_write_cnt(bs, offset, length, false);
+}
+
+/*
+ * Called with s->lock not locked by caller. Will take s->lock only if need to
+ * actually discard some clusters.
+ */
+void qcow2_inflight_writes_dec(BlockDriverState *bs, int64_t offset,
+                               int64_t length)
+{
+    update_inflight_write_cnt(bs, offset, length, true);
+}
+
 /* XXX: cache several refcount block clusters ? */
 /* @addend is the absolute value of the addend; if @decrease is set, @addend
  * will be subtracted from the current refcount, otherwise it will be added */
diff --git a/block/qcow2.c b/block/qcow2.c
index 0db1227ac9..0a5bd4ea4e 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1833,6 +1833,8 @@ static int coroutine_fn qcow2_do_open(BlockDriverState 
*bs, QDict *options,
 #endif
 
     qemu_co_queue_init(&s->thread_task_queue);
+    s->inflight_writes_counters =
+        g_hash_table_new_full(g_int64_hash, g_int64_equal, g_free, g_free);
 
     return ret;
 
@@ -2709,6 +2711,9 @@ static void qcow2_close(BlockDriverState *bs)
     g_free(s->image_backing_file);
     g_free(s->image_backing_format);
 
+    assert(g_hash_table_size(s->inflight_writes_counters) == 0);
+    g_hash_table_unref(s->inflight_writes_counters);
+
     if (has_data_file(bs)) {
         bdrv_unref_child(bs, s->data_file);
         s->data_file = NULL;
-- 
2.29.2




reply via email to

[Prev in Thread] Current Thread [Next in Thread]