[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-block] [PATCH 4/4] blockjob: reimplement block_job_sleep_ns to
From: |
Jeff Cody |
Subject: |
Re: [Qemu-block] [PATCH 4/4] blockjob: reimplement block_job_sleep_ns to allow cancellation |
Date: |
Wed, 29 Nov 2017 08:56:21 -0500 |
User-agent: |
Mutt/1.5.24 (2015-08-30) |
On Wed, Nov 29, 2017 at 11:25:13AM +0100, Paolo Bonzini wrote:
> This reverts the effects of commit 4afeffc857 ("blockjob: do not allow
> coroutine double entry or entry-after-completion", 2017-11-21)
>
> This fixed the symptom of a bug rather than the root cause. Canceling the
> wait on a sleeping blockjob coroutine is generally fine, we just need to
> make it work correctly across AioContexts. To do so, use a QEMUTimer
> that calls block_job_enter. Use a mutex to ensure that block_job_enter
> synchronizes correctly with block_job_sleep_ns.
>
> Signed-off-by: Paolo Bonzini <address@hidden>
> ---
> blockjob.c | 57
> +++++++++++++++++++++++++++++++++++---------
> include/block/blockjob.h | 5 +++-
> include/block/blockjob_int.h | 4 ++--
> 3 files changed, 52 insertions(+), 14 deletions(-)
>
> diff --git a/blockjob.c b/blockjob.c
> index 4d22b7d2fb..3fdaabbc1f 100644
> --- a/blockjob.c
> +++ b/blockjob.c
> @@ -37,6 +37,26 @@
> #include "qemu/timer.h"
> #include "qapi-event.h"
>
> +/* Right now, this mutex is only needed to synchronize accesses to job->busy,
> + * especially concurrent calls to block_job_enter.
> + */
See my comment on Kevin's comment
> +static QemuMutex block_job_mutex;
> +
> +static void block_job_lock(void)
> +{
> + qemu_mutex_lock(&block_job_mutex);
> +}
> +
> +static void block_job_unlock(void)
> +{
> + qemu_mutex_unlock(&block_job_mutex);
> +}
> +
> +static void __attribute__((__constructor__)) block_job_init(void)
> +{
> + qemu_mutex_init(&block_job_mutex);
> +}
> +
> static void block_job_event_cancelled(BlockJob *job);
> static void block_job_event_completed(BlockJob *job, const char *msg);
>
> @@ -161,6 +181,7 @@ void block_job_unref(BlockJob *job)
> blk_unref(job->blk);
> error_free(job->blocker);
> g_free(job->id);
> + assert(!timer_pending(&job->sleep_timer));
> g_free(job);
> }
> }
> @@ -287,6 +308,13 @@ static void coroutine_fn block_job_co_entry(void *opaque)
> job->driver->start(job);
> }
>
> +static void block_job_sleep_timer_cb(void *opaque)
> +{
> + BlockJob *job = opaque;
> +
> + block_job_enter(job);
> +}
> +
> void block_job_start(BlockJob *job)
> {
> assert(job && !block_job_started(job) && job->paused &&
> @@ -556,7 +584,7 @@ BlockJobInfo *block_job_query(BlockJob *job, Error **errp)
> info->type = g_strdup(BlockJobType_str(job->driver->job_type));
> info->device = g_strdup(job->id);
> info->len = job->len;
> - info->busy = job->busy;
> + info->busy = atomic_read(&job->busy);
> info->paused = job->pause_count > 0;
> info->offset = job->offset;
> info->speed = job->speed;
> @@ -664,6 +692,9 @@ void *block_job_create(const char *job_id, const
> BlockJobDriver *driver,
> job->paused = true;
> job->pause_count = 1;
> job->refcnt = 1;
> + aio_timer_init(qemu_get_aio_context(), &job->sleep_timer,
> + QEMU_CLOCK_REALTIME, SCALE_NS,
> + block_job_sleep_timer_cb, job);
>
> error_setg(&job->blocker, "block device is in use by block job: %s",
> BlockJobType_str(driver->job_type));
> @@ -729,9 +760,14 @@ static bool block_job_should_pause(BlockJob *job)
> return job->pause_count > 0;
> }
>
> -static void block_job_do_yield(BlockJob *job)
> +static void block_job_do_yield(BlockJob *job, uint64_t ns)
Maybe worth a comment that using '-1' on a uint64_t is by design, and not a
bug, so this doesn't get 'fixed' in the future?
> {
> + block_job_lock();
> + if (ns != -1) {
> + timer_mod(&job->sleep_timer, ns);
> + }
> job->busy = false;
> + block_job_unlock();
> qemu_coroutine_yield();
>
> /* Set by block_job_enter before re-entering the coroutine. */
> @@ -755,7 +791,7 @@ void coroutine_fn block_job_pause_point(BlockJob *job)
>
> if (block_job_should_pause(job) && !block_job_is_cancelled(job)) {
> job->paused = true;
> - block_job_do_yield(job);
> + block_job_do_yield(job, -1);
> job->paused = false;
> }
>
> @@ -785,11 +821,16 @@ void block_job_enter(BlockJob *job)
> return;
> }
>
> + block_job_lock();
> if (job->busy) {
> + block_job_unlock();
> return;
> }
>
> + assert(!job->deferred_to_main_loop);
> + timer_del(&job->sleep_timer);
> job->busy = true;
> + block_job_unlock();
> aio_co_wake(job->co);
> }
>
> @@ -807,14 +848,8 @@ void block_job_sleep_ns(BlockJob *job, int64_t ns)
> return;
> }
>
> - /* We need to leave job->busy set here, because when we have
> - * put a coroutine to 'sleep', we have scheduled it to run in
> - * the future. We cannot enter that same coroutine again before
> - * it wakes and runs, otherwise we risk double-entry or entry after
> - * completion. */
> if (!block_job_should_pause(job)) {
> - co_aio_sleep_ns(blk_get_aio_context(job->blk),
> - QEMU_CLOCK_REALTIME, ns);
> + block_job_do_yield(job, qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + ns);
> }
>
> block_job_pause_point(job);
> @@ -830,7 +865,7 @@ void block_job_yield(BlockJob *job)
> }
>
> if (!block_job_should_pause(job)) {
> - block_job_do_yield(job);
> + block_job_do_yield(job, -1);
> }
>
> block_job_pause_point(job);
> diff --git a/include/block/blockjob.h b/include/block/blockjob.h
> index 67c0968fa5..956f0d6819 100644
> --- a/include/block/blockjob.h
> +++ b/include/block/blockjob.h
> @@ -77,7 +77,7 @@ typedef struct BlockJob {
> /**
> * Set to false by the job while the coroutine has yielded and may be
> * re-entered by block_job_enter(). There may still be I/O or event loop
> - * activity pending.
> + * activity pending. Accessed under block_job_mutex (in blockjob.c).
> */
> bool busy;
>
> @@ -135,6 +135,9 @@ typedef struct BlockJob {
> */
> int ret;
>
> + /** Timer that is used by @block_job_sleep_ns. */
Maybe also add similar statement as above for busy:
/* Accessed under block_job_mutex (in blockjob.c) */
> + QEMUTimer sleep_timer;
> +
> /** Non-NULL if this job is part of a transaction */
> BlockJobTxn *txn;
> QLIST_ENTRY(BlockJob) txn_list;
> diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h
> index f7ab183a39..c9b23b0cc9 100644
> --- a/include/block/blockjob_int.h
> +++ b/include/block/blockjob_int.h
> @@ -142,8 +142,8 @@ void *block_job_create(const char *job_id, const
> BlockJobDriver *driver,
> * @ns: How many nanoseconds to stop for.
> *
> * Put the job to sleep (assuming that it wasn't canceled) for @ns
> - * %QEMU_CLOCK_REALTIME nanoseconds. Canceling the job will not interrupt
> - * the wait, so the cancel will not process until the coroutine wakes up.
> + * %QEMU_CLOCK_REALTIME nanoseconds. Canceling the job will immediately
> + * interrupt the wait.
> */
> void block_job_sleep_ns(BlockJob *job, int64_t ns);
>
> --
> 2.14.3
>
My only concerns were regarding comments, and Kevin can fix them when
applying if he wants. So:
Reviewed-by: Jeff Cody <address@hidden>
- [Qemu-block] [PATCH 2/4] blockjob: remove clock argument from block_job_sleep_ns, (continued)
- [Qemu-block] [PATCH 2/4] blockjob: remove clock argument from block_job_sleep_ns, Paolo Bonzini, 2017/11/29
- [Qemu-block] [PATCH 3/4] blockjob: introduce block_job_do_yield, Paolo Bonzini, 2017/11/29
- [Qemu-block] [PATCH 4/4] blockjob: reimplement block_job_sleep_ns to allow cancellation, Paolo Bonzini, 2017/11/29
- Re: [Qemu-block] [PATCH 4/4] blockjob: reimplement block_job_sleep_ns to allow cancellation, Stefan Hajnoczi, 2017/11/29
- Re: [Qemu-block] [Qemu-devel] [PATCH 4/4] blockjob: reimplement block_job_sleep_ns to allow cancellation, Eric Blake, 2017/11/29
- Re: [Qemu-block] [PATCH 0/4] blockjob: reinstate busy=false/busy=true in block_job_sleep_ns, Jeff Cody, 2017/11/29
- Re: [Qemu-block] [PATCH 0/4] blockjob: reinstate busy=false/busy=true in block_job_sleep_ns, Fam Zheng, 2017/11/29