[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [RFC 1/4] block: fix I/O throttling accounting blind sp
From: |
Zhi Yong Wu |
Subject: |
Re: [Qemu-devel] [RFC 1/4] block: fix I/O throttling accounting blind spot |
Date: |
Wed, 27 Mar 2013 16:50:55 +0800 |
On Thu, Mar 21, 2013 at 10:49 PM, Stefan Hajnoczi <address@hidden> wrote:
> I/O throttling relies on bdrv_acct_done() which is called when a request
> completes. This leaves a blind spot since we only charge for completed
> requests, not submitted requests.
>
> For example, if there is 1 operation remaining in this time slice the
> guest could submit 3 operations and they will all be submitted
> successfully since they don't actually get accounted for until they
> complete.
>
> Originally we probably thought this is okay since the requests will be
> accounted when the time slice is extended. In practice it causes
> fluctuations since the guest can exceed its I/O limit and it will be
> punished for this later on.
>
> Account for I/O upon submission so that I/O limits are enforced
> properly.
>
> Signed-off-by: Stefan Hajnoczi <address@hidden>
> ---
> block.c | 19 ++++++++-----------
> include/block/block_int.h | 2 +-
> 2 files changed, 9 insertions(+), 12 deletions(-)
>
> diff --git a/block.c b/block.c
> index 0a062c9..31fb0b0 100644
> --- a/block.c
> +++ b/block.c
> @@ -141,7 +141,6 @@ void bdrv_io_limits_disable(BlockDriverState *bs)
> bs->slice_start = 0;
> bs->slice_end = 0;
> bs->slice_time = 0;
> - memset(&bs->io_base, 0, sizeof(bs->io_base));
If we try some concussive operations, enable I/O throttling at first,
then disable it, and then enable it, how about? I guess that
bs->slice_submitted will maybe be not correct.
> }
>
> static void bdrv_block_timer(void *opaque)
> @@ -1329,8 +1328,8 @@ static void bdrv_move_feature_fields(BlockDriverState
> *bs_dest,
> bs_dest->slice_time = bs_src->slice_time;
> bs_dest->slice_start = bs_src->slice_start;
> bs_dest->slice_end = bs_src->slice_end;
> + bs_dest->slice_submitted = bs_src->slice_submitted;
> bs_dest->io_limits = bs_src->io_limits;
> - bs_dest->io_base = bs_src->io_base;
> bs_dest->throttled_reqs = bs_src->throttled_reqs;
> bs_dest->block_timer = bs_src->block_timer;
> bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
> @@ -3665,9 +3664,9 @@ static bool bdrv_exceed_bps_limits(BlockDriverState
> *bs, int nb_sectors,
> slice_time = bs->slice_end - bs->slice_start;
> slice_time /= (NANOSECONDS_PER_SECOND);
> bytes_limit = bps_limit * slice_time;
> - bytes_base = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
> + bytes_base = bs->slice_submitted.bytes[is_write];
> if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
> - bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
> + bytes_base += bs->slice_submitted.bytes[!is_write];
> }
>
> /* bytes_base: the bytes of data which have been read/written; and
> @@ -3683,6 +3682,7 @@ static bool bdrv_exceed_bps_limits(BlockDriverState
> *bs, int nb_sectors,
> *wait = 0;
> }
>
> + bs->slice_submitted.bytes[is_write] += bytes_res;
> return false;
> }
>
> @@ -3725,9 +3725,9 @@ static bool bdrv_exceed_iops_limits(BlockDriverState
> *bs, bool is_write,
> slice_time = bs->slice_end - bs->slice_start;
> slice_time /= (NANOSECONDS_PER_SECOND);
> ios_limit = iops_limit * slice_time;
> - ios_base = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
> + ios_base = bs->slice_submitted.ios[is_write];
> if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
> - ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
> + ios_base += bs->slice_submitted.ios[!is_write];
> }
>
> if (ios_base + 1 <= ios_limit) {
> @@ -3735,6 +3735,7 @@ static bool bdrv_exceed_iops_limits(BlockDriverState
> *bs, bool is_write,
> *wait = 0;
> }
>
> + bs->slice_submitted.ios[is_write]++;
> return false;
> }
>
> @@ -3772,11 +3773,7 @@ static bool bdrv_exceed_io_limits(BlockDriverState
> *bs, int nb_sectors,
> bs->slice_start = now;
> bs->slice_end = now + bs->slice_time;
>
> - bs->io_base.bytes[is_write] = bs->nr_bytes[is_write];
> - bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
> -
> - bs->io_base.ios[is_write] = bs->nr_ops[is_write];
> - bs->io_base.ios[!is_write] = bs->nr_ops[!is_write];
> + memset(&bs->slice_submitted, 0, sizeof(bs->slice_submitted));
> }
>
> elapsed_time = now - bs->slice_start;
> diff --git a/include/block/block_int.h b/include/block/block_int.h
> index ce0aa26..b461764 100644
> --- a/include/block/block_int.h
> +++ b/include/block/block_int.h
> @@ -251,7 +251,7 @@ struct BlockDriverState {
> int64_t slice_start;
> int64_t slice_end;
> BlockIOLimit io_limits;
> - BlockIOBaseValue io_base;
> + BlockIOBaseValue slice_submitted;
> CoQueue throttled_reqs;
> QEMUTimer *block_timer;
> bool io_limits_enabled;
> --
> 1.8.1.4
>
>
--
Regards,
Zhi Yong Wu