qemu-block
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [RFC v3 1/5] block: add block layer APIs resembling Linux ZonedBlock


From: Stefan Hajnoczi
Subject: Re: [RFC v3 1/5] block: add block layer APIs resembling Linux ZonedBlockDevice ioctls.
Date: Mon, 27 Jun 2022 15:15:36 +0100

On Mon, Jun 27, 2022 at 08:19:13AM +0800, Sam Li wrote:
> diff --git a/block/block-backend.c b/block/block-backend.c
> index e0e1aff4b1..786f964d02 100644
> --- a/block/block-backend.c
> +++ b/block/block-backend.c
> @@ -1810,6 +1810,62 @@ int blk_flush(BlockBackend *blk)
>      return ret;
>  }
>  
> +/*
> + * Return zone_report from BlockDriver. Offset can be any number within
> + * the zone size. No alignment for offset and len.

What is the purpose of len? Is it the maximum number of zones to return
in nr_zones[]?

How does the caller know how many zones were returned?

> + */
> +int coroutine_fn blk_co_zone_report(BlockBackend *blk, int64_t offset,
> +                       int64_t len, int64_t *nr_zones,
> +                       BlockZoneDescriptor *zones)
> +{
> +    int ret;
> +    BlockDriverState *bs;
> +    IO_CODE();
> +
> +    blk_inc_in_flight(blk); /* increase before waiting */
> +    blk_wait_while_drained(blk);
> +    bs = blk_bs(blk);
> +
> +    ret = blk_check_byte_request(blk, offset, len);
> +    if (ret < 0) {
> +        return ret;
> +    }
> +
> +    bdrv_inc_in_flight(bs);

The bdrv_inc/dec_in_flight() call should be inside
bdrv_co_zone_report(). See bdrv_co_ioctl() for an example.

> +    ret = bdrv_co_zone_report(blk->root->bs, offset, len,
> +                              nr_zones, zones);
> +    bdrv_dec_in_flight(bs);
> +    blk_dec_in_flight(blk);
> +    return ret;
> +}
> +
> +/*
> + * Return zone_mgmt from BlockDriver.

Maybe this should be:

  Send a zone management command.

> @@ -216,6 +217,11 @@ typedef struct RawPosixAIOData {
>              PreallocMode prealloc;
>              Error **errp;
>          } truncate;
> +        struct {
> +            int64_t *nr_zones;
> +            BlockZoneDescriptor *zones;
> +        } zone_report;
> +        zone_op op;

It's cleaner to put op inside a struct zone_mgmt so its purpose is
self-explanatory:

  struct {
      zone_op op;
  } zone_mgmt;

> +static int handle_aiocb_zone_report(void *opaque) {
> +    RawPosixAIOData *aiocb = opaque;
> +    int fd = aiocb->aio_fildes;
> +    int64_t *nr_zones = aiocb->zone_report.nr_zones;
> +    BlockZoneDescriptor *zones = aiocb->zone_report.zones;
> +    int64_t offset = aiocb->aio_offset;
> +    int64_t len = aiocb->aio_nbytes;
> +
> +    struct blk_zone *blkz;
> +    int64_t rep_size, nrz;
> +    int ret, n = 0, i = 0;
> +
> +    nrz = *nr_zones;
> +    if (len == -1) {
> +        return -errno;

Where is errno set? Should this be an errno constant instead like
-EINVAL?

> +    }
> +    rep_size = sizeof(struct blk_zone_report) + nrz * sizeof(struct 
> blk_zone);
> +    g_autofree struct blk_zone_report *rep = g_new(struct blk_zone_report, 
> nrz);

g_new() looks incorrect. There should be 1 struct blk_zone_report
followed by nrz struct blk_zone structs. Please use g_malloc(rep_size)
instead.

> +    offset = offset / 512; /* get the unit of the start sector: sector size 
> is 512 bytes. */
> +    printf("start to report zone with offset: 0x%lx\n", offset);
> +
> +    blkz = (struct blk_zone *)(rep + 1);
> +    while (n < nrz) {
> +        memset(rep, 0, rep_size);
> +        rep->sector = offset;
> +        rep->nr_zones = nrz;

What prevents zones[] overflowing? nrz isn't being reduced in the loop,
so maybe the rep->nr_zones return value will eventually exceed the
number of elements still available in zones[n..]?

> +static int handle_aiocb_zone_mgmt(void *opaque) {
> +    RawPosixAIOData *aiocb = opaque;
> +    int fd = aiocb->aio_fildes;
> +    int64_t offset = aiocb->aio_offset;
> +    int64_t len = aiocb->aio_nbytes;
> +    zone_op op = aiocb->op;
> +
> +    struct blk_zone_range range;
> +    const char *ioctl_name;
> +    unsigned long ioctl_op;
> +    int64_t zone_size;
> +    int64_t zone_size_mask;
> +    int ret;
> +
> +    ret = ioctl(fd, BLKGETZONESZ, &zone_size);

Can this value be stored in bs (maybe in BlockLimits) to avoid calling
ioctl(BLKGETZONESZ) each time?

> +    if (ret) {
> +        return -1;

The return value should be a negative errno. -1 is -EPERM but it's
probably not that error code you wanted. This should be:

  return -errno;

> +    }
> +
> +    zone_size_mask = zone_size - 1;
> +    if (offset & zone_size_mask) {
> +        error_report("offset is not the start of a zone");
> +        return -1;

return -EINVAL;

> +    }
> +
> +    if (len & zone_size_mask) {
> +        error_report("len is not aligned to zones");
> +        return -1;

return -EINVAL;

> +static int coroutine_fn raw_co_zone_report(BlockDriverState *bs, int64_t 
> offset,
> +        int64_t len, int64_t *nr_zones,
> +        BlockZoneDescriptor *zones) {
> +    BDRVRawState *s = bs->opaque;
> +    RawPosixAIOData acb;
> +
> +    acb = (RawPosixAIOData) {
> +        .bs         = bs,
> +        .aio_fildes = s->fd,
> +        .aio_type   = QEMU_AIO_IOCTL,
> +        .aio_offset = offset,
> +        .aio_nbytes = len,
> +        .zone_report    = {
> +                .nr_zones       = nr_zones,
> +                .zones          = zones,
> +                },

Indentation is off here. Please use 4-space indentation.

Attachment: signature.asc
Description: PGP signature


reply via email to

[Prev in Thread] Current Thread [Next in Thread]