qemu-block
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH 10/12] block.c: add subtree_drains where needed


From: Paolo Bonzini
Subject: Re: [PATCH 10/12] block.c: add subtree_drains where needed
Date: Wed, 19 Jan 2022 10:47:15 +0100
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:91.0) Gecko/20100101 Thunderbird/91.4.0

Subject doesn't say what needs them: "block: drain block devices across graph modifications"

On 1/18/22 17:27, Emanuele Giuseppe Esposito wrote:
Protect bdrv_replace_child_noperm, as it modifies the
graph by adding/removing elements to .children and .parents
list of a bs. Use the newly introduced
bdrv_subtree_drained_{begin/end}_unlocked drains to achieve
that and be free from the aiocontext lock.

One important criteria to keep in mind is that if the caller of
bdrv_replace_child_noperm creates a transaction, we need to make sure that the
whole transaction is under the same drain block.

Okay, this is the important part and it should be mentioned in patch 8 as well. It should also be in a comment above bdrv_replace_child_noperm().

This is imperative, as having
multiple drains also in the .abort() class of functions causes discrepancies
in the drained counters (as nodes are put back into the original positions),
making it really hard to retourn all to zero and leaving the code very buggy.
See https://patchew.org/QEMU/20211213104014.69858-1-eesposit@redhat.com/
for more explanations.

Unfortunately we still need to have bdrv_subtree_drained_begin/end
in bdrv_detach_child() releasing and then holding the AioContext
lock, since it later invokes bdrv_try_set_aio_context() that is
not safe yet. Once all is cleaned up, we can also remove the
acquire/release locks in job_unref, artificially added because of this.

About this:

+         * TODO: this is called by job_unref with lock held, because
+         * afterwards it calls bdrv_try_set_aio_context.
+         * Once all of this is fixed, take care of removing
+         * the aiocontext lock and make this function _unlocked.

It may be clear to you, but it's quite cryptic:

- which lock is held by job_unref()? Also, would it make more sense to talk about block_job_free() rather than job_unref()? I can't quite follow where the AioContext lock is taken.

- what is "all of this", and what do you mean by "not safe yet"? Do both refer to bdrv_try_set_aio_context() needing the AioContext lock?

- what is "this function" (that should become _unlocked)?

I think you could also split the patch in multiple parts for different call chains. In particular bdrv_set_backing_hd can be merged with the patch to bdrv_reopen_parse_file_or_backing, since both of them deal with bdrv_set_file_or_backing_noperm.

Paolo

Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
---
  block.c | 50 ++++++++++++++++++++++++++++++++++++++++++++------
  1 file changed, 44 insertions(+), 6 deletions(-)

diff --git a/block.c b/block.c
index fcc44a49a0..6196c95aae 100644
--- a/block.c
+++ b/block.c
@@ -3114,8 +3114,22 @@ static void bdrv_detach_child(BdrvChild **childp)
      BlockDriverState *old_bs = (*childp)->bs;
assert(qemu_in_main_thread());
+    if (old_bs) {
+        /*
+         * TODO: this is called by job_unref with lock held, because
+         * afterwards it calls bdrv_try_set_aio_context.
+         * Once all of this is fixed, take care of removing
+         * the aiocontext lock and make this function _unlocked.
+         */
+        bdrv_subtree_drained_begin(old_bs);
+    }
+
      bdrv_replace_child_noperm(childp, NULL, true);
+ if (old_bs) {
+        bdrv_subtree_drained_end(old_bs);
+    }
+
      if (old_bs) {
          /*
           * Update permissions for old node. We're just taking a parent away, 
so
@@ -3154,6 +3168,7 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState 
*child_bs,
      Transaction *tran = tran_new();
assert(qemu_in_main_thread());
+    bdrv_subtree_drained_begin_unlocked(child_bs);
ret = bdrv_attach_child_common(child_bs, child_name, child_class,
                                     child_role, perm, shared_perm, opaque,
@@ -3168,6 +3183,7 @@ out:
      tran_finalize(tran, ret);
      /* child is unset on failure by bdrv_attach_child_common_abort() */
      assert((ret < 0) == !child);
+    bdrv_subtree_drained_end_unlocked(child_bs);
bdrv_unref(child_bs);
      return child;
@@ -3197,6 +3213,9 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
assert(qemu_in_main_thread()); + bdrv_subtree_drained_begin_unlocked(parent_bs);
+    bdrv_subtree_drained_begin_unlocked(child_bs);
+
      ret = bdrv_attach_child_noperm(parent_bs, child_bs, child_name, 
child_class,
                                     child_role, &child, tran, errp);
      if (ret < 0) {
@@ -3211,6 +3230,9 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
  out:
      tran_finalize(tran, ret);
      /* child is unset on failure by bdrv_attach_child_common_abort() */
+    bdrv_subtree_drained_end_unlocked(child_bs);
+    bdrv_subtree_drained_end_unlocked(parent_bs);
+
      assert((ret < 0) == !child);
bdrv_unref(child_bs);
@@ -3456,6 +3478,11 @@ int bdrv_set_backing_hd(BlockDriverState *bs, 
BlockDriverState *backing_hd,
assert(qemu_in_main_thread()); + bdrv_subtree_drained_begin_unlocked(bs);
+    if (backing_hd) {
+        bdrv_subtree_drained_begin_unlocked(backing_hd);
+    }
+
      ret = bdrv_set_backing_noperm(bs, backing_hd, tran, errp);
      if (ret < 0) {
          goto out;
@@ -3464,6 +3491,10 @@ int bdrv_set_backing_hd(BlockDriverState *bs, 
BlockDriverState *backing_hd,
      ret = bdrv_refresh_perms(bs, errp);
  out:
      tran_finalize(tran, ret);
+    if (backing_hd) {
+        bdrv_subtree_drained_end_unlocked(backing_hd);
+    }
+    bdrv_subtree_drained_end_unlocked(bs);
return ret;
  }
@@ -5266,7 +5297,8 @@ static int bdrv_replace_node_common(BlockDriverState 
*from,
assert(qemu_get_current_aio_context() == qemu_get_aio_context());
      assert(bdrv_get_aio_context(from) == bdrv_get_aio_context(to));
-    bdrv_drained_begin(from);
+    bdrv_subtree_drained_begin_unlocked(from);
+    bdrv_subtree_drained_begin_unlocked(to);
/*
       * Do the replacement without permission update.
@@ -5298,7 +5330,8 @@ static int bdrv_replace_node_common(BlockDriverState 
*from,
  out:
      tran_finalize(tran, ret);
- bdrv_drained_end(from);
+    bdrv_subtree_drained_end_unlocked(to);
+    bdrv_subtree_drained_end_unlocked(from);
      bdrv_unref(from);
return ret;
@@ -5345,6 +5378,9 @@ int bdrv_append(BlockDriverState *bs_new, 
BlockDriverState *bs_top,
assert(!bs_new->backing); + bdrv_subtree_drained_begin_unlocked(bs_new);
+    bdrv_subtree_drained_begin_unlocked(bs_top);
+
      ret = bdrv_attach_child_noperm(bs_new, bs_top, "backing",
                                     &child_of_bds, bdrv_backing_role(bs_new),
                                     &bs_new->backing, tran, errp);
@@ -5360,6 +5396,8 @@ int bdrv_append(BlockDriverState *bs_new, 
BlockDriverState *bs_top,
      ret = bdrv_refresh_perms(bs_new, errp);
  out:
      tran_finalize(tran, ret);
+    bdrv_subtree_drained_end_unlocked(bs_top);
+    bdrv_subtree_drained_end_unlocked(bs_new);
bdrv_refresh_limits(bs_top, NULL, NULL); @@ -5379,8 +5417,8 @@ int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs,
      assert(qemu_in_main_thread());
bdrv_ref(old_bs);
-    bdrv_drained_begin(old_bs);
-    bdrv_drained_begin(new_bs);
+    bdrv_subtree_drained_begin_unlocked(old_bs);
+    bdrv_subtree_drained_begin_unlocked(new_bs);
bdrv_replace_child_tran(&child, new_bs, tran, true);
      /* @new_bs must have been non-NULL, so @child must not have been freed */
@@ -5394,8 +5432,8 @@ int bdrv_replace_child_bs(BdrvChild *child, 
BlockDriverState *new_bs,
tran_finalize(tran, ret); - bdrv_drained_end(old_bs);
-    bdrv_drained_end(new_bs);
+    bdrv_subtree_drained_end_unlocked(new_bs);
+    bdrv_subtree_drained_end_unlocked(old_bs);
      bdrv_unref(old_bs);
return ret;




reply via email to

[Prev in Thread] Current Thread [Next in Thread]