[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PULL 08/23] hw/nvme: reimplement flush to allow cancellation
From: |
Klaus Jensen |
Subject: |
[PULL 08/23] hw/nvme: reimplement flush to allow cancellation |
Date: |
Tue, 29 Jun 2021 20:47:28 +0200 |
From: Klaus Jensen <k.jensen@samsung.com>
Prior to this patch, a broadcast flush would result in submitting
multiple "fire and forget" aios (no reference saved to the aiocbs
returned from the blk_aio_flush calls).
Fix this by issuing the flushes one after another.
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
Reviewed-by: Keith Busch <kbusch@kernel.org>
---
hw/nvme/nvme.h | 2 +
hw/nvme/ctrl.c | 206 ++++++++++++++++++++++++++-----------------
hw/nvme/trace-events | 6 +-
3 files changed, 130 insertions(+), 84 deletions(-)
diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
index 371ac9bfd8fc..7f3d0a181d1d 100644
--- a/hw/nvme/nvme.h
+++ b/hw/nvme/nvme.h
@@ -28,6 +28,8 @@
#define NVME_MAX_NAMESPACES 256
#define NVME_EUI64_DEFAULT ((uint64_t)0x5254000000000000)
+QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_NSID_BROADCAST - 1);
+
typedef struct NvmeCtrl NvmeCtrl;
typedef struct NvmeNamespace NvmeNamespace;
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 762bb82e3cac..26c65a12e80c 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -1788,22 +1788,19 @@ static inline bool nvme_is_write(NvmeRequest *req)
rw->opcode == NVME_CMD_WRITE_ZEROES;
}
+static AioContext *nvme_get_aio_context(BlockAIOCB *acb)
+{
+ return qemu_get_aio_context();
+}
+
static void nvme_misc_cb(void *opaque, int ret)
{
NvmeRequest *req = opaque;
- NvmeNamespace *ns = req->ns;
- BlockBackend *blk = ns->blkconf.blk;
- BlockAcctCookie *acct = &req->acct;
- BlockAcctStats *stats = blk_get_stats(blk);
-
- trace_pci_nvme_misc_cb(nvme_cid(req), blk_name(blk));
+ trace_pci_nvme_misc_cb(nvme_cid(req));
if (ret) {
- block_acct_failed(stats, acct);
nvme_aio_err(req, ret);
- } else {
- block_acct_done(stats, acct);
}
nvme_enqueue_req_completion(nvme_cq(req), req);
@@ -1919,41 +1916,6 @@ static void nvme_aio_format_cb(void *opaque, int ret)
nvme_enqueue_req_completion(nvme_cq(req), req);
}
-struct nvme_aio_flush_ctx {
- NvmeRequest *req;
- NvmeNamespace *ns;
- BlockAcctCookie acct;
-};
-
-static void nvme_aio_flush_cb(void *opaque, int ret)
-{
- struct nvme_aio_flush_ctx *ctx = opaque;
- NvmeRequest *req = ctx->req;
- uintptr_t *num_flushes = (uintptr_t *)&req->opaque;
-
- BlockBackend *blk = ctx->ns->blkconf.blk;
- BlockAcctCookie *acct = &ctx->acct;
- BlockAcctStats *stats = blk_get_stats(blk);
-
- trace_pci_nvme_aio_flush_cb(nvme_cid(req), blk_name(blk));
-
- if (!ret) {
- block_acct_done(stats, acct);
- } else {
- block_acct_failed(stats, acct);
- nvme_aio_err(req, ret);
- }
-
- (*num_flushes)--;
- g_free(ctx);
-
- if (*num_flushes) {
- return;
- }
-
- nvme_enqueue_req_completion(nvme_cq(req), req);
-}
-
static void nvme_verify_cb(void *opaque, int ret)
{
NvmeBounceContext *ctx = opaque;
@@ -2868,56 +2830,138 @@ static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest
*req)
return NVME_NO_COMPLETE;
}
+typedef struct NvmeFlushAIOCB {
+ BlockAIOCB common;
+ BlockAIOCB *aiocb;
+ NvmeRequest *req;
+ QEMUBH *bh;
+ int ret;
+
+ NvmeNamespace *ns;
+ uint32_t nsid;
+ bool broadcast;
+} NvmeFlushAIOCB;
+
+static void nvme_flush_cancel(BlockAIOCB *acb)
+{
+ NvmeFlushAIOCB *iocb = container_of(acb, NvmeFlushAIOCB, common);
+
+ iocb->ret = -ECANCELED;
+
+ if (iocb->aiocb) {
+ blk_aio_cancel_async(iocb->aiocb);
+ }
+}
+
+static const AIOCBInfo nvme_flush_aiocb_info = {
+ .aiocb_size = sizeof(NvmeFlushAIOCB),
+ .cancel_async = nvme_flush_cancel,
+ .get_aio_context = nvme_get_aio_context,
+};
+
+static void nvme_flush_ns_cb(void *opaque, int ret)
+{
+ NvmeFlushAIOCB *iocb = opaque;
+ NvmeNamespace *ns = iocb->ns;
+
+ if (ret < 0) {
+ iocb->ret = ret;
+ goto out;
+ } else if (iocb->ret < 0) {
+ goto out;
+ }
+
+ if (ns) {
+ trace_pci_nvme_flush_ns(iocb->nsid);
+
+ iocb->ns = NULL;
+ iocb->aiocb = blk_aio_flush(ns->blkconf.blk, nvme_flush_ns_cb, iocb);
+ return;
+ }
+
+out:
+ iocb->aiocb = NULL;
+ qemu_bh_schedule(iocb->bh);
+}
+
+static void nvme_flush_bh(void *opaque)
+{
+ NvmeFlushAIOCB *iocb = opaque;
+ NvmeRequest *req = iocb->req;
+ NvmeCtrl *n = nvme_ctrl(req);
+ int i;
+
+ if (iocb->ret < 0) {
+ goto done;
+ }
+
+ if (iocb->broadcast) {
+ for (i = iocb->nsid + 1; i <= NVME_MAX_NAMESPACES; i++) {
+ iocb->ns = nvme_ns(n, i);
+ if (iocb->ns) {
+ iocb->nsid = i;
+ break;
+ }
+ }
+ }
+
+ if (!iocb->ns) {
+ goto done;
+ }
+
+ nvme_flush_ns_cb(iocb, 0);
+ return;
+
+done:
+ qemu_bh_delete(iocb->bh);
+ iocb->bh = NULL;
+
+ iocb->common.cb(iocb->common.opaque, iocb->ret);
+
+ qemu_aio_unref(iocb);
+
+ return;
+}
+
static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest *req)
{
+ NvmeFlushAIOCB *iocb;
uint32_t nsid = le32_to_cpu(req->cmd.nsid);
- uintptr_t *num_flushes = (uintptr_t *)&req->opaque;
uint16_t status;
- struct nvme_aio_flush_ctx *ctx;
- NvmeNamespace *ns;
- trace_pci_nvme_flush(nvme_cid(req), nsid);
+ iocb = qemu_aio_get(&nvme_flush_aiocb_info, NULL, nvme_misc_cb, req);
- if (nsid != NVME_NSID_BROADCAST) {
- req->ns = nvme_ns(n, nsid);
- if (unlikely(!req->ns)) {
- return NVME_INVALID_FIELD | NVME_DNR;
+ iocb->req = req;
+ iocb->bh = qemu_bh_new(nvme_flush_bh, iocb);
+ iocb->ret = 0;
+ iocb->ns = NULL;
+ iocb->nsid = 0;
+ iocb->broadcast = (nsid == NVME_NSID_BROADCAST);
+
+ if (!iocb->broadcast) {
+ if (!nvme_nsid_valid(n, nsid)) {
+ status = NVME_INVALID_NSID | NVME_DNR;
+ goto out;
}
- block_acct_start(blk_get_stats(req->ns->blkconf.blk), &req->acct, 0,
- BLOCK_ACCT_FLUSH);
- req->aiocb = blk_aio_flush(req->ns->blkconf.blk, nvme_misc_cb, req);
- return NVME_NO_COMPLETE;
- }
-
- /* 1-initialize; see comment in nvme_dsm */
- *num_flushes = 1;
-
- for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) {
- ns = nvme_ns(n, i);
- if (!ns) {
- continue;
+ iocb->ns = nvme_ns(n, nsid);
+ if (!iocb->ns) {
+ status = NVME_INVALID_FIELD | NVME_DNR;
+ goto out;
}
- ctx = g_new(struct nvme_aio_flush_ctx, 1);
- ctx->req = req;
- ctx->ns = ns;
-
- (*num_flushes)++;
-
- block_acct_start(blk_get_stats(ns->blkconf.blk), &ctx->acct, 0,
- BLOCK_ACCT_FLUSH);
- blk_aio_flush(ns->blkconf.blk, nvme_aio_flush_cb, ctx);
+ iocb->nsid = nsid;
}
- /* account for the 1-initialization */
- (*num_flushes)--;
+ req->aiocb = &iocb->common;
+ qemu_bh_schedule(iocb->bh);
- if (*num_flushes) {
- status = NVME_NO_COMPLETE;
- } else {
- status = req->status;
- }
+ return NVME_NO_COMPLETE;
+
+out:
+ qemu_bh_delete(iocb->bh);
+ iocb->bh = NULL;
+ qemu_aio_unref(iocb);
return status;
}
diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events
index ea33d0ccc383..ce6b6ffe9604 100644
--- a/hw/nvme/trace-events
+++ b/hw/nvme/trace-events
@@ -7,16 +7,16 @@ pci_nvme_map_addr(uint64_t addr, uint64_t len) "addr
0x%"PRIx64" len %"PRIu64""
pci_nvme_map_addr_cmb(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len
%"PRIu64""
pci_nvme_map_prp(uint64_t trans_len, uint32_t len, uint64_t prp1, uint64_t
prp2, int num_prps) "trans_len %"PRIu64" len %"PRIu32" prp1 0x%"PRIx64" prp2
0x%"PRIx64" num_prps %d"
pci_nvme_map_sgl(uint8_t typ, uint64_t len) "type 0x%"PRIx8" len %"PRIu64""
-pci_nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode,
const char *opname) "cid %"PRIu16" nsid %"PRIu32" sqid %"PRIu16" opc 0x%"PRIx8"
opname '%s'"
+pci_nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode,
const char *opname) "cid %"PRIu16" nsid 0x%"PRIx32" sqid %"PRIu16" opc
0x%"PRIx8" opname '%s'"
pci_nvme_admin_cmd(uint16_t cid, uint16_t sqid, uint8_t opcode, const char
*opname) "cid %"PRIu16" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'"
-pci_nvme_flush(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32""
+pci_nvme_flush_ns(uint32_t nsid) "nsid 0x%"PRIx32""
pci_nvme_format(uint16_t cid, uint32_t nsid, uint8_t lbaf, uint8_t mset,
uint8_t pi, uint8_t pil) "cid %"PRIu16" nsid %"PRIu32" lbaf %"PRIu8" mset
%"PRIu8" pi %"PRIu8" pil %"PRIu8""
pci_nvme_format_ns(uint16_t cid, uint32_t nsid, uint8_t lbaf, uint8_t mset,
uint8_t pi, uint8_t pil) "cid %"PRIu16" nsid %"PRIu32" lbaf %"PRIu8" mset
%"PRIu8" pi %"PRIu8" pil %"PRIu8""
pci_nvme_format_cb(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32""
pci_nvme_read(uint16_t cid, uint32_t nsid, uint32_t nlb, uint64_t count,
uint64_t lba) "cid %"PRIu16" nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba
0x%"PRIx64""
pci_nvme_write(uint16_t cid, const char *verb, uint32_t nsid, uint32_t nlb,
uint64_t count, uint64_t lba) "cid %"PRIu16" opname '%s' nsid %"PRIu32" nlb
%"PRIu32" count %"PRIu64" lba 0x%"PRIx64""
pci_nvme_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
-pci_nvme_misc_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
+pci_nvme_misc_cb(uint16_t cid) "cid %"PRIu16""
pci_nvme_dif_rw(uint8_t pract, uint8_t prinfo) "pract 0x%"PRIx8" prinfo
0x%"PRIx8""
pci_nvme_dif_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
pci_nvme_dif_rw_mdata_in_cb(uint16_t cid, const char *blkname) "cid %"PRIu16"
blk '%s'"
--
2.32.0
- [PULL 00/23] hw/nvme patches, Klaus Jensen, 2021/06/29
- [PULL 01/23] hw/nvme: fix style, Klaus Jensen, 2021/06/29
- [PULL 02/23] hw/nvme: add identify namespace flbas/mc enums, Klaus Jensen, 2021/06/29
- [PULL 03/23] hw/nvme: fix lbaf formats initialization, Klaus Jensen, 2021/06/29
- [PULL 05/23] hw/nvme: fix csi field for cns 0x00 and 0x11, Klaus Jensen, 2021/06/29
- [PULL 04/23] hw/nvme: add param to control auto zone transitioning to zone state closed, Klaus Jensen, 2021/06/29
- [PULL 07/23] hw/nvme: default for namespace EUI-64, Klaus Jensen, 2021/06/29
- [PULL 09/23] hw/nvme: add nvme_block_status_all helper, Klaus Jensen, 2021/06/29
- [PULL 08/23] hw/nvme: reimplement flush to allow cancellation,
Klaus Jensen <=
- [PULL 11/23] hw/nvme: save reftag when generating pi, Klaus Jensen, 2021/06/29
- [PULL 06/23] hw/nvme: namespace parameter for EUI-64, Klaus Jensen, 2021/06/29
- [PULL 10/23] hw/nvme: reimplement dsm to allow cancellation, Klaus Jensen, 2021/06/29
- [PULL 12/23] hw/nvme: remove assert from nvme_get_zone_by_slba, Klaus Jensen, 2021/06/29
- [PULL 13/23] hw/nvme: use prinfo directly in nvme_check_prinfo and nvme_dif_check, Klaus Jensen, 2021/06/29
- [PULL 14/23] hw/nvme: add dw0/1 to the req completion trace event, Klaus Jensen, 2021/06/29
- [PULL 15/23] hw/nvme: reimplement the copy command to allow aio cancellation, Klaus Jensen, 2021/06/29
- [PULL 16/23] hw/nvme: reimplement zone reset to allow cancellation, Klaus Jensen, 2021/06/29
- [PULL 17/23] hw/nvme: reimplement format nvm to allow cancellation, Klaus Jensen, 2021/06/29
- [PULL 18/23] Partially revert "hw/block/nvme: drain namespaces on sq deletion", Klaus Jensen, 2021/06/29