[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCHv2] iscsi: retry read, write, flush and unmap on
From: |
Paolo Bonzini |
Subject: |
Re: [Qemu-devel] [PATCHv2] iscsi: retry read, write, flush and unmap on unit attention check conditions |
Date: |
Mon, 25 Feb 2013 12:12:06 +0100 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:17.0) Gecko/20130110 Thunderbird/17.0.2 |
Il 21/02/2013 16:15, Peter Lieven ha scritto:
> the storage might return a check condition status for various reasons.
> (e.g. bus reset, capacity change, thin-provisioning info etc.)
>
> currently all these informative status responses lead to an I/O error
> which is populated to the guest. this patch introduces a retry mechanism
> to avoid this.
>
> v2:
> - fix duplication in iscsi_aio_discard()
> - allow any type of unit attention check condition in
> iscsi_readcapacity_sync().
Moved the iscsi_readcapacity_sync() hunk to the iscsi_truncate() patch
and applied to scsi-next branch, thanks.
Paolo
>
> Signed-off-by: Peter Lieven <address@hidden>
> ---
> block/iscsi.c | 303
> +++++++++++++++++++++++++++++++++++++++------------------
> 1 file changed, 209 insertions(+), 94 deletions(-)
>
> diff --git a/block/iscsi.c b/block/iscsi.c
> index 1ea290e..62ebd79 100644
> --- a/block/iscsi.c
> +++ b/block/iscsi.c
> @@ -60,8 +60,11 @@ typedef struct IscsiAIOCB {
> uint8_t *buf;
> int status;
> int canceled;
> + int retries;
> size_t read_size;
> size_t read_offset;
> + int64_t sector_num;
> + int nb_sectors;
> #ifdef __linux__
> sg_io_hdr_t *ioh;
> #endif
> @@ -69,6 +72,7 @@ typedef struct IscsiAIOCB {
>
> #define NOP_INTERVAL 5000
> #define MAX_NOP_FAILURES 3
> +#define ISCSI_CMD_RETRIES 5
>
> static void
> iscsi_bh_cb(void *p)
> @@ -191,6 +195,8 @@ iscsi_process_write(void *arg)
> iscsi_set_events(iscsilun);
> }
>
> +static int
> +iscsi_aio_writev_acb(IscsiAIOCB *acb);
>
> static void
> iscsi_aio_write16_cb(struct iscsi_context *iscsi, int status,
> @@ -208,7 +214,19 @@ iscsi_aio_write16_cb(struct iscsi_context *iscsi,
> int status,
> }
>
> acb->status = 0;
> - if (status < 0) {
> + if (status != 0) {
> + if (status == SCSI_STATUS_CHECK_CONDITION
> + && acb->task->sense.key == SCSI_SENSE_UNIT_ATTENTION
> + && acb->retries-- > 0) {
> + if (acb->task != NULL) {
> + scsi_free_scsi_task(acb->task);
> + acb->task = NULL;
> + }
> + if (iscsi_aio_writev_acb(acb) == 0) {
> + iscsi_set_events(acb->iscsilun);
> + return;
> + }
> + }
> error_report("Failed to write16 data to iSCSI lun. %s",
> iscsi_get_error(iscsi));
> acb->status = -EIO;
> @@ -222,15 +240,10 @@ static int64_t sector_qemu2lun(int64_t sector,
> IscsiLun *iscsilun)
> return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
> }
>
> -static BlockDriverAIOCB *
> -iscsi_aio_writev(BlockDriverState *bs, int64_t sector_num,
> - QEMUIOVector *qiov, int nb_sectors,
> - BlockDriverCompletionFunc *cb,
> - void *opaque)
> +static int
> +iscsi_aio_writev_acb(IscsiAIOCB *acb)
> {
> - IscsiLun *iscsilun = bs->opaque;
> - struct iscsi_context *iscsi = iscsilun->iscsi;
> - IscsiAIOCB *acb;
> + struct iscsi_context *iscsi = acb->iscsilun->iscsi;
> size_t size;
> uint32_t num_sectors;
> uint64_t lba;
> @@ -238,20 +251,14 @@ iscsi_aio_writev(BlockDriverState *bs, int64_t
> sector_num,
> struct iscsi_data data;
> #endif
> int ret;
> -
> - acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
> - trace_iscsi_aio_writev(iscsi, sector_num, nb_sectors, opaque, acb);
> -
> - acb->iscsilun = iscsilun;
> - acb->qiov = qiov;
> -
> +
> acb->canceled = 0;
> acb->bh = NULL;
> acb->status = -EINPROGRESS;
> acb->buf = NULL;
>
> /* this will allow us to get rid of 'buf' completely */
> - size = nb_sectors * BDRV_SECTOR_SIZE;
> + size = acb->nb_sectors * BDRV_SECTOR_SIZE;
>
> #if !defined(LIBISCSI_FEATURE_IOVECTOR)
> data.size = MIN(size, acb->qiov->size);
> @@ -270,48 +277,76 @@ iscsi_aio_writev(BlockDriverState *bs, int64_t
> sector_num,
> if (acb->task == NULL) {
> error_report("iSCSI: Failed to allocate task for scsi WRITE16 "
> "command. %s", iscsi_get_error(iscsi));
> - qemu_aio_release(acb);
> - return NULL;
> + return -1;
> }
> memset(acb->task, 0, sizeof(struct scsi_task));
>
> acb->task->xfer_dir = SCSI_XFER_WRITE;
> acb->task->cdb_size = 16;
> acb->task->cdb[0] = 0x8a;
> - lba = sector_qemu2lun(sector_num, iscsilun);
> + lba = sector_qemu2lun(acb->sector_num, acb->iscsilun);
> *(uint32_t *)&acb->task->cdb[2] = htonl(lba >> 32);
> *(uint32_t *)&acb->task->cdb[6] = htonl(lba & 0xffffffff);
> - num_sectors = size / iscsilun->block_size;
> + num_sectors = size / acb->iscsilun->block_size;
> *(uint32_t *)&acb->task->cdb[10] = htonl(num_sectors);
> acb->task->expxferlen = size;
>
> #if defined(LIBISCSI_FEATURE_IOVECTOR)
> - ret = iscsi_scsi_command_async(iscsi, iscsilun->lun, acb->task,
> + ret = iscsi_scsi_command_async(iscsi, acb->iscsilun->lun, acb->task,
> iscsi_aio_write16_cb,
> NULL,
> acb);
> #else
> - ret = iscsi_scsi_command_async(iscsi, iscsilun->lun, acb->task,
> + ret = iscsi_scsi_command_async(iscsi, acb->iscsilun->lun, acb->task,
> iscsi_aio_write16_cb,
> &data,
> acb);
> #endif
> if (ret != 0) {
> - scsi_free_scsi_task(acb->task);
> g_free(acb->buf);
> - qemu_aio_release(acb);
> - return NULL;
> + return -1;
> }
>
> #if defined(LIBISCSI_FEATURE_IOVECTOR)
> scsi_task_set_iov_out(acb->task, (struct scsi_iovec*)
> acb->qiov->iov, acb->qiov->niov);
> #endif
>
> - iscsi_set_events(iscsilun);
> + return 0;
> +}
> +
> +static BlockDriverAIOCB *
> +iscsi_aio_writev(BlockDriverState *bs, int64_t sector_num,
> + QEMUIOVector *qiov, int nb_sectors,
> + BlockDriverCompletionFunc *cb,
> + void *opaque)
> +{
> + IscsiLun *iscsilun = bs->opaque;
> + IscsiAIOCB *acb;
> +
> + acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
> + trace_iscsi_aio_writev(iscsilun->iscsi, sector_num, nb_sectors,
> opaque, acb);
> +
> + acb->iscsilun = iscsilun;
> + acb->qiov = qiov;
> + acb->nb_sectors = nb_sectors;
> + acb->sector_num = sector_num;
> + acb->retries = ISCSI_CMD_RETRIES;
> +
> + if (iscsi_aio_writev_acb(acb) != 0) {
> + if (acb->task) {
> + scsi_free_scsi_task(acb->task);
> + }
> + qemu_aio_release(acb);
> + return NULL;
> + }
>
> + iscsi_set_events(iscsilun);
> return &acb->common;
> }
>
> +static int
> +iscsi_aio_readv_acb(IscsiAIOCB *acb);
> +
> static void
> iscsi_aio_read16_cb(struct iscsi_context *iscsi, int status,
> void *command_data, void *opaque)
> @@ -326,6 +361,18 @@ iscsi_aio_read16_cb(struct iscsi_context *iscsi,
> int status,
>
> acb->status = 0;
> if (status != 0) {
> + if (status == SCSI_STATUS_CHECK_CONDITION
> + && acb->task->sense.key == SCSI_SENSE_UNIT_ATTENTION
> + && acb->retries-- > 0) {
> + if (acb->task != NULL) {
> + scsi_free_scsi_task(acb->task);
> + acb->task = NULL;
> + }
> + if (iscsi_aio_readv_acb(acb) == 0) {
> + iscsi_set_events(acb->iscsilun);
> + return;
> + }
> + }
> error_report("Failed to read16 data from iSCSI lun. %s",
> iscsi_get_error(iscsi));
> acb->status = -EIO;
> @@ -334,35 +381,20 @@ iscsi_aio_read16_cb(struct iscsi_context *iscsi,
> int status,
> iscsi_schedule_bh(acb);
> }
>
> -static BlockDriverAIOCB *
> -iscsi_aio_readv(BlockDriverState *bs, int64_t sector_num,
> - QEMUIOVector *qiov, int nb_sectors,
> - BlockDriverCompletionFunc *cb,
> - void *opaque)
> +static int
> +iscsi_aio_readv_acb(IscsiAIOCB *acb)
> {
> - IscsiLun *iscsilun = bs->opaque;
> - struct iscsi_context *iscsi = iscsilun->iscsi;
> - IscsiAIOCB *acb;
> - size_t qemu_read_size;
> + struct iscsi_context *iscsi = acb->iscsilun->iscsi;
> + uint64_t lba;
> + uint32_t num_sectors;
> + int ret;
> #if !defined(LIBISCSI_FEATURE_IOVECTOR)
> int i;
> #endif
> - int ret;
> - uint64_t lba;
> - uint32_t num_sectors;
> -
> - qemu_read_size = BDRV_SECTOR_SIZE * (size_t)nb_sectors;
> -
> - acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
> - trace_iscsi_aio_readv(iscsi, sector_num, nb_sectors, opaque, acb);
> -
> - acb->iscsilun = iscsilun;
> - acb->qiov = qiov;
>
> acb->canceled = 0;
> acb->bh = NULL;
> acb->status = -EINPROGRESS;
> - acb->read_size = qemu_read_size;
> acb->buf = NULL;
>
> /* If LUN blocksize is bigger than BDRV_BLOCK_SIZE a read from QEMU
> @@ -370,30 +402,29 @@ iscsi_aio_readv(BlockDriverState *bs, int64_t
> sector_num,
> * data.
> */
> acb->read_offset = 0;
> - if (iscsilun->block_size > BDRV_SECTOR_SIZE) {
> - uint64_t bdrv_offset = BDRV_SECTOR_SIZE * sector_num;
> + if (acb->iscsilun->block_size > BDRV_SECTOR_SIZE) {
> + uint64_t bdrv_offset = BDRV_SECTOR_SIZE * acb->sector_num;
>
> - acb->read_offset = bdrv_offset % iscsilun->block_size;
> + acb->read_offset = bdrv_offset % acb->iscsilun->block_size;
> }
>
> - num_sectors = (qemu_read_size + iscsilun->block_size
> + num_sectors = (acb->read_size + acb->iscsilun->block_size
> + acb->read_offset - 1)
> - / iscsilun->block_size;
> + / acb->iscsilun->block_size;
>
> acb->task = malloc(sizeof(struct scsi_task));
> if (acb->task == NULL) {
> error_report("iSCSI: Failed to allocate task for scsi READ16 "
> "command. %s", iscsi_get_error(iscsi));
> - qemu_aio_release(acb);
> - return NULL;
> + return -1;
> }
> memset(acb->task, 0, sizeof(struct scsi_task));
>
> acb->task->xfer_dir = SCSI_XFER_READ;
> - lba = sector_qemu2lun(sector_num, iscsilun);
> - acb->task->expxferlen = qemu_read_size;
> + lba = sector_qemu2lun(acb->sector_num, acb->iscsilun);
> + acb->task->expxferlen = acb->read_size;
>
> - switch (iscsilun->type) {
> + switch (acb->iscsilun->type) {
> case TYPE_DISK:
> acb->task->cdb_size = 16;
> acb->task->cdb[0] = 0x88;
> @@ -409,14 +440,12 @@ iscsi_aio_readv(BlockDriverState *bs, int64_t
> sector_num,
> break;
> }
>
> - ret = iscsi_scsi_command_async(iscsi, iscsilun->lun, acb->task,
> + ret = iscsi_scsi_command_async(iscsi, acb->iscsilun->lun, acb->task,
> iscsi_aio_read16_cb,
> NULL,
> acb);
> if (ret != 0) {
> - scsi_free_scsi_task(acb->task);
> - qemu_aio_release(acb);
> - return NULL;
> + return -1;
> }
>
> #if defined(LIBISCSI_FEATURE_IOVECTOR)
> @@ -428,12 +457,42 @@ iscsi_aio_readv(BlockDriverState *bs, int64_t
> sector_num,
> acb->qiov->iov[i].iov_base);
> }
> #endif
> + return 0;
> +}
>
> - iscsi_set_events(iscsilun);
> +static BlockDriverAIOCB *
> +iscsi_aio_readv(BlockDriverState *bs, int64_t sector_num,
> + QEMUIOVector *qiov, int nb_sectors,
> + BlockDriverCompletionFunc *cb,
> + void *opaque)
> +{
> + IscsiLun *iscsilun = bs->opaque;
> + IscsiAIOCB *acb;
> +
> + acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
> + trace_iscsi_aio_readv(iscsilun->iscsi, sector_num, nb_sectors,
> opaque, acb);
> +
> + acb->nb_sectors = nb_sectors;
> + acb->sector_num = sector_num;
> + acb->iscsilun = iscsilun;
> + acb->qiov = qiov;
> + acb->read_size = BDRV_SECTOR_SIZE * (size_t)acb->nb_sectors;
> + acb->retries = ISCSI_CMD_RETRIES;
> +
> + if (iscsi_aio_readv_acb(acb) != 0) {
> + if (acb->task) {
> + scsi_free_scsi_task(acb->task);
> + }
> + qemu_aio_release(acb);
> + return NULL;
> + }
>
> + iscsi_set_events(iscsilun);
> return &acb->common;
> }
>
> +static int
> +iscsi_aio_flush_acb(IscsiAIOCB *acb);
>
> static void
> iscsi_synccache10_cb(struct iscsi_context *iscsi, int status,
> @@ -446,7 +505,19 @@ iscsi_synccache10_cb(struct iscsi_context *iscsi,
> int status,
> }
>
> acb->status = 0;
> - if (status < 0) {
> + if (status != 0) {
> + if (status == SCSI_STATUS_CHECK_CONDITION
> + && acb->task->sense.key == SCSI_SENSE_UNIT_ATTENTION
> + && acb->retries-- > 0) {
> + if (acb->task != NULL) {
> + scsi_free_scsi_task(acb->task);
> + acb->task = NULL;
> + }
> + if (iscsi_aio_flush_acb(acb) == 0) {
> + iscsi_set_events(acb->iscsilun);
> + return;
> + }
> + }
> error_report("Failed to sync10 data on iSCSI lun. %s",
> iscsi_get_error(iscsi));
> acb->status = -EIO;
> @@ -455,29 +526,43 @@ iscsi_synccache10_cb(struct iscsi_context *iscsi,
> int status,
> iscsi_schedule_bh(acb);
> }
>
> -static BlockDriverAIOCB *
> -iscsi_aio_flush(BlockDriverState *bs,
> - BlockDriverCompletionFunc *cb, void *opaque)
> +static int
> +iscsi_aio_flush_acb(IscsiAIOCB *acb)
> {
> - IscsiLun *iscsilun = bs->opaque;
> - struct iscsi_context *iscsi = iscsilun->iscsi;
> - IscsiAIOCB *acb;
> -
> - acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
> + struct iscsi_context *iscsi = acb->iscsilun->iscsi;
>
> - acb->iscsilun = iscsilun;
> acb->canceled = 0;
> acb->bh = NULL;
> acb->status = -EINPROGRESS;
> acb->buf = NULL;
>
> - acb->task = iscsi_synchronizecache10_task(iscsi, iscsilun->lun,
> + acb->task = iscsi_synchronizecache10_task(iscsi, acb->iscsilun->lun,
> 0, 0, 0, 0,
> iscsi_synccache10_cb,
> acb);
> if (acb->task == NULL) {
> error_report("iSCSI: Failed to send synchronizecache10 command.
> %s",
> iscsi_get_error(iscsi));
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> +static BlockDriverAIOCB *
> +iscsi_aio_flush(BlockDriverState *bs,
> + BlockDriverCompletionFunc *cb, void *opaque)
> +{
> + IscsiLun *iscsilun = bs->opaque;
> +
> + IscsiAIOCB *acb;
> +
> + acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
> +
> + acb->iscsilun = iscsilun;
> + acb->retries = ISCSI_CMD_RETRIES;
> +
> + if (iscsi_aio_flush_acb(acb) != 0) {
> qemu_aio_release(acb);
> return NULL;
> }
> @@ -487,6 +572,8 @@ iscsi_aio_flush(BlockDriverState *bs,
> return &acb->common;
> }
>
> +static int iscsi_aio_discard_acb(IscsiAIOCB *acb);
> +
> static void
> iscsi_unmap_cb(struct iscsi_context *iscsi, int status,
> void *command_data, void *opaque)
> @@ -498,7 +585,19 @@ iscsi_unmap_cb(struct iscsi_context *iscsi, int
> status,
> }
>
> acb->status = 0;
> - if (status < 0) {
> + if (status != 0) {
> + if (status == SCSI_STATUS_CHECK_CONDITION
> + && acb->task->sense.key == SCSI_SENSE_UNIT_ATTENTION
> + && acb->retries-- > 0) {
> + if (acb->task != NULL) {
> + scsi_free_scsi_task(acb->task);
> + acb->task = NULL;
> + }
> + if (iscsi_aio_discard_acb(acb) == 0) {
> + iscsi_set_events(acb->iscsilun);
> + return;
> + }
> + }
> error_report("Failed to unmap data on iSCSI lun. %s",
> iscsi_get_error(iscsi));
> acb->status = -EIO;
> @@ -507,38 +606,54 @@ iscsi_unmap_cb(struct iscsi_context *iscsi, int
> status,
> iscsi_schedule_bh(acb);
> }
>
> -static BlockDriverAIOCB *
> -iscsi_aio_discard(BlockDriverState *bs,
> - int64_t sector_num, int nb_sectors,
> - BlockDriverCompletionFunc *cb, void *opaque)
> -{
> - IscsiLun *iscsilun = bs->opaque;
> - struct iscsi_context *iscsi = iscsilun->iscsi;
> - IscsiAIOCB *acb;
> +static int iscsi_aio_discard_acb(IscsiAIOCB *acb) {
> + struct iscsi_context *iscsi = acb->iscsilun->iscsi;
> struct unmap_list list[1];
> -
> - acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
> -
> - acb->iscsilun = iscsilun;
> +
> acb->canceled = 0;
> acb->bh = NULL;
> acb->status = -EINPROGRESS;
> acb->buf = NULL;
>
> - list[0].lba = sector_qemu2lun(sector_num, iscsilun);
> - list[0].num = nb_sectors * BDRV_SECTOR_SIZE / iscsilun->block_size;
> + list[0].lba = sector_qemu2lun(acb->sector_num, acb->iscsilun);
> + list[0].num = acb->nb_sectors * BDRV_SECTOR_SIZE /
> acb->iscsilun->block_size;
>
> - acb->task = iscsi_unmap_task(iscsi, iscsilun->lun,
> + acb->task = iscsi_unmap_task(iscsi, acb->iscsilun->lun,
> 0, 0, &list[0], 1,
> iscsi_unmap_cb,
> acb);
> if (acb->task == NULL) {
> error_report("iSCSI: Failed to send unmap command. %s",
> iscsi_get_error(iscsi));
> - qemu_aio_release(acb);
> - return NULL;
> + return -1;
> }
>
> + return 0;
> +}
> +
> +static BlockDriverAIOCB *
> +iscsi_aio_discard(BlockDriverState *bs,
> + int64_t sector_num, int nb_sectors,
> + BlockDriverCompletionFunc *cb, void *opaque)
> +{
> + IscsiLun *iscsilun = bs->opaque;
> + IscsiAIOCB *acb;
> +
> + acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
> +
> + acb->iscsilun = iscsilun;
> + acb->nb_sectors = nb_sectors;
> + acb->sector_num = sector_num;
> + acb->retries = ISCSI_CMD_RETRIES;
> +
> + if (iscsi_aio_discard_acb(acb) != 0) {
> + if (acb->task) {
> + scsi_free_scsi_task(acb->task);
> + }
> + qemu_aio_release(acb);
> + return NULL;
> + }
> +
> iscsi_set_events(iscsilun);
>
> return &acb->common;
> @@ -828,16 +943,16 @@ static int iscsi_readcapacity_sync(IscsiLun
> *iscsilun) {
> struct scsi_readcapacity10 *rc10 = NULL;
> struct scsi_readcapacity16 *rc16 = NULL;
> int ret = 0;
> + int retries = ISCSI_CMD_RETRIES;
>
> try_again:
> switch (iscsilun->type) {
> case TYPE_DISK:
> task = iscsi_readcapacity16_sync(iscsilun->iscsi, iscsilun->lun);
> if (task == NULL || task->status != SCSI_STATUS_GOOD) {
> - /* Capacity data has changed */
> if (task->status == SCSI_STATUS_CHECK_CONDITION
> && task->sense.key == SCSI_SENSE_UNIT_ATTENTION
> - && task->sense.ascq == 0x2a09) {
> + && retries-- > 0) {
> scsi_free_scsi_task(task);
> goto try_again;
> }