[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH 3/5] block: introduce preallocate filter
From: |
Stefan Hajnoczi |
Subject: |
Re: [PATCH 3/5] block: introduce preallocate filter |
Date: |
Wed, 8 Jul 2020 13:07:26 +0100 |
On Sat, Jun 20, 2020 at 05:36:47PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> It may be used for file-systems with slow allocation.
>
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> ---
> qapi/block-core.json | 3 +-
> block/preallocate.c | 255 +++++++++++++++++++++++++++++++++++++++++++
> block/Makefile.objs | 1 +
> 3 files changed, 258 insertions(+), 1 deletion(-)
> create mode 100644 block/preallocate.c
Please add documentation to docs/system/qemu-block-drivers.rst.inc
describing the purpose of this block driver and how to use it.
Since this filter grows the file I guess it's intended to be below an
image format?
> diff --git a/qapi/block-core.json b/qapi/block-core.json
> index 0e1c6a59f2..a0bda399d6 100644
> --- a/qapi/block-core.json
> +++ b/qapi/block-core.json
> @@ -2805,7 +2805,7 @@
> 'cloop', 'compress', 'copy-on-read', 'dmg', 'file', 'ftp',
> 'ftps',
> 'gluster', 'host_cdrom', 'host_device', 'http', 'https', 'iscsi',
> 'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels',
> - 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
> + 'preallocate', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
> { 'name': 'replication', 'if': 'defined(CONFIG_REPLICATION)' },
> 'sheepdog',
> 'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat', 'vxhs'
> ] }
> @@ -3995,6 +3995,7 @@
> 'null-co': 'BlockdevOptionsNull',
> 'nvme': 'BlockdevOptionsNVMe',
> 'parallels': 'BlockdevOptionsGenericFormat',
> + 'preallocate':'BlockdevOptionsGenericFormat',
> 'qcow2': 'BlockdevOptionsQcow2',
> 'qcow': 'BlockdevOptionsQcow',
> 'qed': 'BlockdevOptionsGenericCOWFormat',
> diff --git a/block/preallocate.c b/block/preallocate.c
> new file mode 100644
> index 0000000000..c272a6e41d
> --- /dev/null
> +++ b/block/preallocate.c
> @@ -0,0 +1,255 @@
> +/*
> + * preallocate filter driver
> + *
> + * The driver performs preallocate operation: it is injected above
> + * some node, and before each write over EOF it does additional preallocating
> + * write-zeroes request.
> + *
> + * Copyright (c) 2020 Virtuozzo International GmbH.
> + *
> + * Author:
> + * Sementsov-Ogievskiy Vladimir <vsementsov@virtuozzo.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program. If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include "qemu/osdep.h"
> +
> +#include "qemu/module.h"
> +#include "qemu/units.h"
> +#include "block/block_int.h"
> +
> +
> +typedef struct BDRVPreallocateState {
> + int64_t prealloc_size;
> + int64_t prealloc_align;
> +
> + /*
> + * Track real data end, to crop preallocation on close data_end may be
> + * negative, which means that actual status is unknown (nothing cropped
> in
> + * this case)
> + */
> + int64_t data_end;
> +} BDRVPreallocateState;
> +
> +
> +static int preallocate_open(BlockDriverState *bs, QDict *options, int flags,
> + Error **errp)
> +{
> + BDRVPreallocateState *s = bs->opaque;
> +
> + /*
> + * Parameters are hardcoded now. May need to add corresponding options in
> + * future.
> + */
The code for .bdrv_open() options is quick to write. If you add the
options right away then it will be much easier for users who need to
tweak them in the future.
> + s->prealloc_align = 1 * MiB;
> + s->prealloc_size = 128 * MiB;
> +
> + bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
> + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
> + false, errp);
> + if (!bs->file) {
> + return -EINVAL;
> + }
> +
> + s->data_end = bdrv_getlength(bs->file->bs);
> + if (s->data_end < 0) {
> + return s->data_end;
> + }
> +
> + bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
> + (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
> +
> + bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
> + ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
> + bs->file->bs->supported_zero_flags);
> +
> + return 0;
> +}
> +
> +static void preallocate_close(BlockDriverState *bs)
> +{
> + BDRVPreallocateState *s = bs->opaque;
> +
> + if (s->data_end >= 0 && bdrv_getlength(bs->file->bs) > s->data_end) {
> + bdrv_truncate(bs->file, s->data_end, true, PREALLOC_MODE_OFF, 0,
> NULL);
> + }
> +}
> +
> +static void preallocate_child_perm(BlockDriverState *bs, BdrvChild *c,
> + BdrvChildRole role,
> + BlockReopenQueue *reopen_queue,
> + uint64_t perm, uint64_t shared,
> + uint64_t *nperm, uint64_t *nshared)
> +{
> + bdrv_default_perms(bs, c, role, reopen_queue, perm, shared, nperm,
> nshared);
> +
> + /* Force RESIZE permission, to be able to crop file on close() */
> + *nperm |= BLK_PERM_RESIZE;
> +}
> +
> +static coroutine_fn int preallocate_co_preadv_part(
> + BlockDriverState *bs, uint64_t offset, uint64_t bytes,
> + QEMUIOVector *qiov, size_t qiov_offset, int flags)
> +{
> + return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
> + flags);
> +}
> +
> +static int coroutine_fn preallocate_co_pdiscard(BlockDriverState *bs,
> + int64_t offset, int bytes)
> +{
> + return bdrv_co_pdiscard(bs->file, offset, bytes);
> +}
> +
> +static bool coroutine_fn do_preallocate(BlockDriverState *bs, int64_t offset,
> + int64_t bytes, bool write_zero)
> +{
> + BDRVPreallocateState *s = bs->opaque;
> + int64_t len, start, end;
> + BdrvTrackedRequest *lock;
> + int ret;
> +
> + if (s->data_end >= 0) {
> + s->data_end = MAX(s->data_end,
> + QEMU_ALIGN_UP(offset + bytes, BDRV_SECTOR_SIZE));
> + }
> +
> + len = bdrv_getlength(bs->file->bs);
> + if (len < 0) {
> + return false;
> + }
> +
> + if (s->data_end < 0) {
> + s->data_end = MAX(len,
> + QEMU_ALIGN_UP(offset + bytes, BDRV_SECTOR_SIZE));
> + }
> +
> + if (offset + bytes <= len) {
> + return false;
> + }
> +
> + lock = bdrv_co_range_try_lock(bs->file->bs, len, INT64_MAX - len);
> + if (!lock) {
> + /* There are already preallocating requests in-fligth */
s/fligth/flight/
> + return false;
> + }
> +
> + /* Length should not have changed */
> + assert(len == bdrv_getlength(bs->file->bs));
> +
> + start = write_zero ? MIN(offset, len) : len;
> + end = QEMU_ALIGN_UP(offset + bytes + s->prealloc_size,
> s->prealloc_align);
> +
> + ret = bdrv_co_pwrite_zeroes_locked(bs->file, start, end - start,
> + BDRV_REQ_NO_FALLBACK, lock);
> +
> + bdrv_co_range_unlock(lock);
Hmm...if this piece of code is the only user of bdrv_co_range_try_lock()
then a BDRV_REQ_NO_WAIT flag might be a simpler API.
I thought the lock request would be used to perform multiple operations,
but if it's just for a single operation then I think it's less code and
easier to understand without the lock request.
> +
> + return !ret;
> +}
> +
> +static int coroutine_fn preallocate_co_pwrite_zeroes(BlockDriverState *bs,
> + int64_t offset, int bytes, BdrvRequestFlags flags)
> +{
> + if (do_preallocate(bs, offset, bytes, true)) {
> + return 0;
> + }
> +
> + return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
> +}
> +
> +static coroutine_fn int preallocate_co_pwritev_part(BlockDriverState *bs,
> + uint64_t offset,
> + uint64_t bytes,
> + QEMUIOVector *qiov,
> + size_t qiov_offset,
> + int flags)
> +{
> + do_preallocate(bs, offset, bytes, false);
> +
> + return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, qiov_offset,
> + flags);
> +}
> +
> +static int coroutine_fn
> +preallocate_co_truncate(BlockDriverState *bs, int64_t offset,
> + bool exact, PreallocMode prealloc,
> + BdrvRequestFlags flags, Error **errp)
> +{
> + BDRVPreallocateState *s = bs->opaque;
> + int ret = bdrv_co_truncate(bs->file, offset, exact, prealloc, flags,
> errp);
> +
> + /* s->data_end may become negative here, which means unknown data end */
> + s->data_end = bdrv_getlength(bs->file->bs);
> +
> + return ret;
> +}
> +
> +static int coroutine_fn preallocate_co_flush(BlockDriverState *bs)
> +{
> + if (!bs->file) {
> + return 0;
> + }
When does this happen? It's surprising to see the !bs->file check here
but not in other functions.
> +
> + return bdrv_co_flush(bs->file->bs);
> +}
> +
> +static int64_t preallocate_getlength(BlockDriverState *bs)
> +{
> + /*
> + * We probably can return s->data_end here, but seems safer to return
> real
> + * file length, not trying to hide the preallocation.
> + *
> + * Still, don't miss the chance to restore s->data_end if it is broken.
> + */
> + BDRVPreallocateState *s = bs->opaque;
> + int64_t ret = bdrv_getlength(bs->file->bs);
> +
> + if (s->data_end < 0) {
> + s->data_end = ret;
> + }
> +
> + return ret;
> +}
> +
> +BlockDriver bdrv_preallocate_filter = {
> + .format_name = "preallocate",
> + .instance_size = sizeof(BDRVPreallocateState),
> +
> + .bdrv_getlength = preallocate_getlength,
> + .bdrv_open = preallocate_open,
> + .bdrv_close = preallocate_close,
> +
> + .bdrv_co_preadv_part = preallocate_co_preadv_part,
> + .bdrv_co_pwritev_part = preallocate_co_pwritev_part,
> + .bdrv_co_pwrite_zeroes = preallocate_co_pwrite_zeroes,
> + .bdrv_co_pdiscard = preallocate_co_pdiscard,
> + .bdrv_co_flush = preallocate_co_flush,
> + .bdrv_co_truncate = preallocate_co_truncate,
> +
> + .bdrv_co_block_status = bdrv_co_block_status_from_file,
> +
> + .bdrv_child_perm = preallocate_child_perm,
> +
> + .has_variable_length = true,
> + .is_filter = true,
> +};
> +
> +static void bdrv_preallocate_init(void)
> +{
> + bdrv_register(&bdrv_preallocate_filter);
> +}
> +
> +block_init(bdrv_preallocate_init);
> diff --git a/block/Makefile.objs b/block/Makefile.objs
> index 3635b6b4c1..f46a353a35 100644
> --- a/block/Makefile.objs
> +++ b/block/Makefile.objs
> @@ -45,6 +45,7 @@ block-obj-y += crypto.o
> block-obj-y += aio_task.o
> block-obj-y += backup-top.o
> block-obj-y += filter-compress.o
> +block-obj-y += preallocate.o
> common-obj-y += monitor/
>
> block-obj-y += stream.o
> --
> 2.18.0
>
signature.asc
Description: PGP signature
- Re: [PATCH 3/5] block: introduce preallocate filter,
Stefan Hajnoczi <=