[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-block] [Qemu-devel] [PATCH v3 44/44] nbd: Implement NBD_OPT_BL
From: |
Alex Bligh |
Subject: |
Re: [Qemu-block] [Qemu-devel] [PATCH v3 44/44] nbd: Implement NBD_OPT_BLOCK_SIZE on client |
Date: |
Mon, 25 Apr 2016 13:19:41 +0100 |
On 23 Apr 2016, at 00:40, Eric Blake <address@hidden> wrote:
> The upstream NBD Protocol has defined a new extension to allow
> the server to advertise block sizes to the client, as well as
> a way for the client to inform the server that it intends to
> obey block sizes.
>
> Pass any received sizes on to the block layer.
>
> Use the minimum block size as the sector size we pass to the
> kernel - which also has the nice effect of cooperating with
> (non-qemu) servers that don't do read-modify-write when exposing
> a block device with 4k sectors; it can also allow us to visit a
> file larger than 2T on a 32-bit kernel.
>
> Signed-off-by: Eric Blake <address@hidden>
> ---
> include/block/nbd.h | 3 +++
> block/nbd-client.c | 3 +++
> block/nbd.c | 17 +++++++++---
> nbd/client.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++-----
> 4 files changed, 87 insertions(+), 10 deletions(-)
>
> diff --git a/include/block/nbd.h b/include/block/nbd.h
> index a5c68df..27a6854 100644
> --- a/include/block/nbd.h
> +++ b/include/block/nbd.h
> @@ -133,6 +133,9 @@ enum {
> struct NbdExportInfo {
> uint64_t size;
> uint16_t flags;
> + uint32_t min_block;
> + uint32_t opt_block;
> + uint32_t max_block;
> };
> typedef struct NbdExportInfo NbdExportInfo;
>
> diff --git a/block/nbd-client.c b/block/nbd-client.c
> index 2b6ac27..602a8ab 100644
> --- a/block/nbd-client.c
> +++ b/block/nbd-client.c
> @@ -443,6 +443,9 @@ int nbd_client_init(BlockDriverState *bs,
> logout("Failed to negotiate with the NBD server\n");
> return ret;
> }
> + if (client->info.min_block > bs->request_alignment) {
> + bs->request_alignment = client->info.min_block;
> + }
>
> qemu_co_mutex_init(&client->send_mutex);
> qemu_co_mutex_init(&client->free_sema);
> diff --git a/block/nbd.c b/block/nbd.c
> index 5172039..bb7df55 100644
> --- a/block/nbd.c
> +++ b/block/nbd.c
> @@ -407,9 +407,20 @@ static int nbd_co_flush(BlockDriverState *bs)
>
> static void nbd_refresh_limits(BlockDriverState *bs, Error **errp)
> {
> - bs->bl.max_discard = UINT32_MAX >> BDRV_SECTOR_BITS;
> - bs->bl.max_write_zeroes = UINT32_MAX >> BDRV_SECTOR_BITS;
> - bs->bl.max_transfer_length = UINT32_MAX >> BDRV_SECTOR_BITS;
> + NbdClientSession *s = nbd_get_client_session(bs);
> + int max = UINT32_MAX >> BDRV_SECTOR_BITS;
> +
> + if (s->info.max_block) {
> + max = s->info.max_block >> BDRV_SECTOR_BITS;
> + }
> + bs->bl.max_discard = max;
> + bs->bl.max_write_zeroes = max;
> + bs->bl.max_transfer_length = max;
> +
> + if (s->info.opt_block &&
> + s->info.opt_block >> BDRV_SECTOR_BITS > bs->bl.opt_transfer_length) {
> + bs->bl.opt_transfer_length = s->info.opt_block >> BDRV_SECTOR_BITS;
> + }
> }
>
> static int nbd_co_discard(BlockDriverState *bs, int64_t sector_num,
> diff --git a/nbd/client.c b/nbd/client.c
> index dac4f29..24f6b0b 100644
> --- a/nbd/client.c
> +++ b/nbd/client.c
> @@ -232,6 +232,11 @@ static int nbd_handle_reply_err(QIOChannel *ioc,
> nbd_opt_reply *reply,
> reply->option);
> break;
>
> + case NBD_REP_ERR_BLOCK_SIZE_REQD:
> + error_setg(errp, "Server wants OPT_BLOCK_SIZE before option %"
> PRIx32,
> + reply->option);
> + break;
> +
> default:
> error_setg(errp, "Unknown error code when asking for option %" PRIx32,
> reply->option);
> @@ -333,6 +338,21 @@ static int nbd_opt_go(QIOChannel *ioc, const char
> *wantname,
> * flags still 0 is a witness of a broken server. */
> info->flags = 0;
>
> + /* Some servers use NBD_OPT_GO to advertise non-default block
> + * sizes, and require that we first use NBD_OPT_BLOCK_SIZE to
> + * agree to that. */
> + TRACE("Attempting NBD_OPT_BLOCK_SIZE");
> + if (nbd_send_option_request(ioc, NBD_OPT_BLOCK_SIZE, 0, NULL, errp) < 0)
> {
> + return -1;
> + }
> + if (nbd_receive_option_reply(ioc, NBD_OPT_BLOCK_SIZE, &reply, errp) < 0)
> {
> + return -1;
> + }
> + error = nbd_handle_reply_err(ioc, &reply, errp);
> + if (error < 0) {
> + return error;
> + }
> +
> TRACE("Attempting NBD_OPT_GO for export '%s'", wantname);
> if (nbd_send_option_request(ioc, NBD_OPT_GO, -1, wantname, errp) < 0) {
> return -1;
> @@ -402,6 +422,45 @@ static int nbd_opt_go(QIOChannel *ioc, const char
> *wantname,
> info->size, info->flags);
> break;
>
> + case NBD_INFO_BLOCK_SIZE:
> + if (len != sizeof(info->min_block) * 3) {
> + error_setg(errp, "remaining export info len %" PRIu32
> + " is unexpected size", len);
> + return -1;
> + }
> + if (read_sync(ioc, &info->min_block, sizeof(info->min_block)) !=
> + sizeof(info->min_block)) {
> + error_setg(errp, "failed to read info minimum block size");
> + return -1;
> + }
> + be32_to_cpus(&info->min_block);
> + if (!is_power_of_2(info->min_block)) {
> + error_setg(errp, "server minimum block size %" PRId32
> + "is not a power of two", info->min_block);
> + return -1;
> + }
> + if (read_sync(ioc, &info->opt_block, sizeof(info->opt_block)) !=
> + sizeof(info->opt_block)) {
> + error_setg(errp, "failed to read info preferred block size");
> + return -1;
> + }
> + be32_to_cpus(&info->opt_block);
> + if (!is_power_of_2(info->opt_block) ||
> + info->opt_block < info->min_block) {
> + error_setg(errp, "server preferred block size %" PRId32
> + "is not valid", info->opt_block);
> + return -1;
> + }
> + if (read_sync(ioc, &info->max_block, sizeof(info->max_block)) !=
> + sizeof(info->max_block)) {
> + error_setg(errp, "failed to read info maximum block size");
> + return -1;
> + }
> + be32_to_cpus(&info->max_block);
> + TRACE("Block sizes are 0x%" PRIx32 ", 0x%" PRIx32 ", 0x%" PRIx32,
> + info->min_block, info->opt_block, info->max_block);
> + break;
> +
You should probably check min_block <= opt_block <= max_block here
Also should there be a check that BDRV_SECTOR_SIZE >= min_block?
> default:
> TRACE("ignoring unknown export info %" PRIu16, type);
> if (drop_sync(ioc, len) != len) {
> @@ -710,8 +769,9 @@ fail:
> #ifdef __linux__
> int nbd_init(int fd, QIOChannelSocket *sioc, NbdExportInfo *info)
> {
> - unsigned long sectors = info->size / BDRV_SECTOR_SIZE;
> - if (info->size / BDRV_SECTOR_SIZE != sectors) {
> + unsigned long sector_size = MAX(BDRV_SECTOR_SIZE, info->min_block);
> + unsigned long sectors = info->size / sector_size;
> + if (info->size / sector_size != sectors) {
> LOG("Export size %" PRId64 " too large for 32-bit kernel",
> info->size);
> return -E2BIG;
> }
> @@ -724,18 +784,18 @@ int nbd_init(int fd, QIOChannelSocket *sioc,
> NbdExportInfo *info)
> return -serrno;
> }
>
> - TRACE("Setting block size to %lu", (unsigned long)BDRV_SECTOR_SIZE);
> + TRACE("Setting block size to %lu", sector_size);
>
> - if (ioctl(fd, NBD_SET_BLKSIZE, (unsigned long)BDRV_SECTOR_SIZE) < 0) {
> + if (ioctl(fd, NBD_SET_BLKSIZE, sector_size) < 0) {
> int serrno = errno;
> LOG("Failed setting NBD block size");
> return -serrno;
> }
>
> TRACE("Setting size to %lu block(s)", sectors);
> - if (size % BDRV_SECTOR_SIZE) {
> - TRACE("Ignoring trailing %d bytes of export",
> - (int) (size % BDRV_SECTOR_SIZE));
> + if (info->size % sector_size) {
> + TRACE("Ignoring trailing %" PRId64 " bytes of export",
> + info->size % sector_size);
> }
>
> if (ioctl(fd, NBD_SET_SIZE_BLOCKS, sectors) < 0) {
> --
> 2.5.5
>
>
--
Alex Bligh
- Re: [Qemu-block] [Qemu-devel] [PATCH v3 30/44] nbd: Treat flags vs. command type as separate fields, (continued)
- [Qemu-block] [PATCH v3 29/44] nbd: Avoid magic number for NBD max name size, Eric Blake, 2016/04/22
- [Qemu-block] [PATCH v3 38/44] block: Add blk_get_opt_transfer_length(), Eric Blake, 2016/04/22
- [Qemu-block] [PATCH v3 41/44] nbd: Implement NBD_CMD_WRITE_ZEROES on server, Eric Blake, 2016/04/22
- [Qemu-block] [PATCH v3 43/44] nbd: Implement NBD_OPT_BLOCK_SIZE on server, Eric Blake, 2016/04/22
- [Qemu-block] [PATCH v3 44/44] nbd: Implement NBD_OPT_BLOCK_SIZE on client, Eric Blake, 2016/04/22
- Re: [Qemu-block] [Qemu-devel] [PATCH v3 44/44] nbd: Implement NBD_OPT_BLOCK_SIZE on client,
Alex Bligh <=
- [Qemu-block] [PATCH v3 34/44] nbd: Less allocation during NBD_OPT_LIST, Eric Blake, 2016/04/22
- [Qemu-block] [PATCH v3 32/44] nbd: Share common option-sending code in client, Eric Blake, 2016/04/22
- [Qemu-block] [PATCH v3 36/44] nbd: Improve handling of shutdown requests, Eric Blake, 2016/04/22
[Qemu-block] [PATCH v3 39/44] nbd: Implement NBD_OPT_GO on server, Eric Blake, 2016/04/22