qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [RFC PATCH 5/6] job: use global job_mutex to protect struct Job


From: Emanuele Giuseppe Esposito
Subject: Re: [RFC PATCH 5/6] job: use global job_mutex to protect struct Job
Date: Mon, 12 Jul 2021 10:43:42 +0200
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:78.0) Gecko/20100101 Thunderbird/78.10.1



On 08/07/2021 14:56, Stefan Hajnoczi wrote:
On Wed, Jul 07, 2021 at 06:58:12PM +0200, Emanuele Giuseppe Esposito wrote:
This lock is going to replace most of the AioContext locks
in the job and blockjob, so that a Job can run in an arbitrary
AioContext.

Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
---
  include/block/blockjob_int.h |   1 +
  include/qemu/job.h           |   2 +
  block/backup.c               |   4 +
  block/mirror.c               |  11 +-
  blockdev.c                   |  62 ++++----
  blockjob.c                   |  67 +++++++--
  job-qmp.c                    |  55 +++----
  job.c                        | 284 +++++++++++++++++++++++++++--------
  qemu-img.c                   |  15 +-
  9 files changed, 350 insertions(+), 151 deletions(-)

diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h
index 6633d83da2..8b91126506 100644
--- a/include/block/blockjob_int.h
+++ b/include/block/blockjob_int.h
@@ -53,6 +53,7 @@ struct BlockJobDriver {
       */
      void (*attached_aio_context)(BlockJob *job, AioContext *new_context);
+ /* Called with job mutex *not* held. */
      void (*set_speed)(BlockJob *job, int64_t speed);
  };
diff --git a/include/qemu/job.h b/include/qemu/job.h
index 4421d08d93..359f4e6b3a 100644
--- a/include/qemu/job.h
+++ b/include/qemu/job.h
@@ -49,6 +49,8 @@ typedef struct Job {
      /**
       * The type of this job.
       * Set it in job_create and just read.
+     * All calls to the driver function must be not locked by job_mutex,
+     * to avoid deadlocks.
       */
      const JobDriver *driver;
diff --git a/block/backup.c b/block/backup.c
index bd3614ce70..80ce956299 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -315,6 +315,10 @@ static void coroutine_fn backup_pause(Job *job)
      }
  }
+/*
+ * Called with job mutex *not* held (we don't want to call block_copy_kick
+ * with the lock held!)
+ */
  static void coroutine_fn backup_set_speed(BlockJob *job, int64_t speed)
  {
      BackupBlockJob *s = container_of(job, BackupBlockJob, common);
diff --git a/block/mirror.c b/block/mirror.c
index 49aaaafffa..deefaa6a39 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1150,9 +1150,11 @@ static void mirror_complete(Job *job, Error **errp)
      s->should_complete = true;
/* If the job is paused, it will be re-entered when it is resumed */
+    job_lock();
      if (!job_is_paused(job)) {
-        job_enter(job);
+        job_enter_locked(job);
      }
+    job_unlock();
  }
static void coroutine_fn mirror_pause(Job *job)
@@ -1171,10 +1173,13 @@ static bool mirror_drained_poll(BlockJob *job)
       * from one of our own drain sections, to avoid a deadlock waiting for
       * ourselves.
       */
-    if (!job_is_paused(&s->common.job) && !job_is_cancelled(&s->common.job) &&
-        !s->in_drain) {
+    job_lock();
+    if (!job_is_paused(&s->common.job) &&
+        !job_is_cancelled_locked(&s->common.job) && !s->in_drain) {
+        job_unlock();
          return true;
      }
+    job_unlock();
return !!s->in_flight;
  }
diff --git a/blockdev.c b/blockdev.c
index 8e2c15370e..9255aea6a2 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -150,9 +150,11 @@ void blockdev_mark_auto_del(BlockBackend *blk)
              AioContext *aio_context = job_get_aiocontext(&job->job);
              aio_context_acquire(aio_context);
+ job_lock();
              job_cancel(&job->job, false);
aio_context_release(aio_context);
+            job_unlock();

This looks strange. The way it's written suggests there is a reason why
job_unlock() has to be called after aio_context_release(). Can
job_unlock() be called immediately after job_cancel()?

Yes, another mistake I shouldn't have done.

          }
      }
@@ -3309,48 +3311,44 @@ out:
      aio_context_release(aio_context);
  }
-/* Get a block job using its ID and acquire its AioContext */
-static BlockJob *find_block_job(const char *id, AioContext **aio_context,
-                                Error **errp)
+/* Get a block job using its ID and acquire its job_lock */

"its" suggests job_lock is per-Job. I suggest saying something like
"Returns with job_lock held on success" instead.

Agree. Changed the same comment also for find_job() in job-qmp.c

Emanuele




reply via email to

[Prev in Thread] Current Thread [Next in Thread]