[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v4 4/5] hw/nvme: fix mmio read
From: |
Klaus Jensen |
Subject: |
Re: [PATCH v4 4/5] hw/nvme: fix mmio read |
Date: |
Mon, 19 Jul 2021 22:26:31 +0200 |
On Jul 19 22:07, Klaus Jensen wrote:
> From: Klaus Jensen <k.jensen@samsung.com>
>
> The new PMR test unearthed a long-standing issue with MMIO reads on
> big-endian hosts.
>
> Fix this by unconditionally storing all controller registers in little
> endian.
>
> Cc: Gollu Appalanaidu <anaidu.gollu@samsung.com>
> Reported-by: Peter Maydell <peter.maydell@linaro.org>
> Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> ---
> hw/nvme/ctrl.c | 290 +++++++++++++++++++++++++++----------------------
> 1 file changed, 162 insertions(+), 128 deletions(-)
>
> diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> index 0449cc4dee9b..76721e31c6b1 100644
> --- a/hw/nvme/ctrl.c
> +++ b/hw/nvme/ctrl.c
> @@ -439,10 +439,12 @@ static uint8_t nvme_sq_empty(NvmeSQueue *sq)
>
> static void nvme_irq_check(NvmeCtrl *n)
> {
> + uint32_t intms = ldl_le_p(&n->bar.intms);
> +
> if (msix_enabled(&(n->parent_obj))) {
> return;
> }
> - if (~n->bar.intms & n->irq_status) {
> + if (~intms & n->irq_status) {
> pci_irq_assert(&n->parent_obj);
> } else {
> pci_irq_deassert(&n->parent_obj);
> @@ -1289,7 +1291,7 @@ static void nvme_post_cqes(void *opaque)
> if (ret) {
> trace_pci_nvme_err_addr_write(addr);
> trace_pci_nvme_err_cfs();
> - n->bar.csts = NVME_CSTS_FAILED;
> + stl_le_p(&n->bar.csts, NVME_CSTS_FAILED);
> break;
> }
> QTAILQ_REMOVE(&cq->req_list, req, entry);
> @@ -4022,7 +4024,7 @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeRequest
> *req)
> trace_pci_nvme_err_invalid_create_sq_sqid(sqid);
> return NVME_INVALID_QID | NVME_DNR;
> }
> - if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) {
> + if (unlikely(!qsize || qsize > NVME_CAP_MQES(ldq_le_p(&n->bar.cap)))) {
> trace_pci_nvme_err_invalid_create_sq_size(qsize);
> return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
> }
> @@ -4208,7 +4210,7 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t
> csi, uint32_t buf_len,
> return NVME_INVALID_FIELD | NVME_DNR;
> }
>
> - switch (NVME_CC_CSS(n->bar.cc)) {
> + switch (NVME_CC_CSS(ldl_le_p(&n->bar.cc))) {
> case NVME_CC_CSS_NVM:
> src_iocs = nvme_cse_iocs_nvm;
> /* fall through */
> @@ -4370,7 +4372,7 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest
> *req)
> trace_pci_nvme_err_invalid_create_cq_cqid(cqid);
> return NVME_INVALID_QID | NVME_DNR;
> }
> - if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) {
> + if (unlikely(!qsize || qsize > NVME_CAP_MQES(ldq_le_p(&n->bar.cap)))) {
> trace_pci_nvme_err_invalid_create_cq_size(qsize);
> return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
> }
> @@ -5163,17 +5165,19 @@ static void nvme_update_dmrsl(NvmeCtrl *n)
>
> static void nvme_select_iocs_ns(NvmeCtrl *n, NvmeNamespace *ns)
> {
> + uint32_t cc = ldl_le_p(&n->bar.cc);
> +
> ns->iocs = nvme_cse_iocs_none;
> switch (ns->csi) {
> case NVME_CSI_NVM:
> - if (NVME_CC_CSS(n->bar.cc) != NVME_CC_CSS_ADMIN_ONLY) {
> + if (NVME_CC_CSS(cc) != NVME_CC_CSS_ADMIN_ONLY) {
> ns->iocs = nvme_cse_iocs_nvm;
> }
> break;
> case NVME_CSI_ZONED:
> - if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_CSI) {
> + if (NVME_CC_CSS(cc) == NVME_CC_CSS_CSI) {
> ns->iocs = nvme_cse_iocs_zoned;
> - } else if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_NVM) {
> + } else if (NVME_CC_CSS(cc) == NVME_CC_CSS_NVM) {
> ns->iocs = nvme_cse_iocs_nvm;
> }
> break;
> @@ -5510,7 +5514,7 @@ static void nvme_process_sq(void *opaque)
> if (nvme_addr_read(n, addr, (void *)&cmd, sizeof(cmd))) {
> trace_pci_nvme_err_addr_read(addr);
> trace_pci_nvme_err_cfs();
> - n->bar.csts = NVME_CSTS_FAILED;
> + stl_le_p(&n->bar.csts, NVME_CSTS_FAILED);
> break;
> }
> nvme_inc_sq_head(sq);
> @@ -5565,8 +5569,6 @@ static void nvme_ctrl_reset(NvmeCtrl *n)
> n->aer_queued = 0;
> n->outstanding_aers = 0;
> n->qs_created = false;
> -
> - n->bar.cc = 0;
> }
>
> static void nvme_ctrl_shutdown(NvmeCtrl *n)
> @@ -5605,7 +5607,12 @@ static void nvme_select_iocs(NvmeCtrl *n)
>
> static int nvme_start_ctrl(NvmeCtrl *n)
> {
> - uint32_t page_bits = NVME_CC_MPS(n->bar.cc) + 12;
> + uint64_t cap = ldq_le_p(&n->bar.cap);
> + uint32_t cc = ldl_le_p(&n->bar.cc);
> + uint32_t aqa = ldl_le_p(&n->bar.aqa);
> + uint64_t asq = ldq_le_p(&n->bar.asq);
> + uint64_t acq = ldq_le_p(&n->bar.acq);
> + uint32_t page_bits = NVME_CC_MPS(cc) + 12;
> uint32_t page_size = 1 << page_bits;
>
> if (unlikely(n->cq[0])) {
> @@ -5616,73 +5623,72 @@ static int nvme_start_ctrl(NvmeCtrl *n)
> trace_pci_nvme_err_startfail_sq();
> return -1;
> }
> - if (unlikely(!n->bar.asq)) {
> + if (unlikely(!asq)) {
> trace_pci_nvme_err_startfail_nbarasq();
> return -1;
> }
> - if (unlikely(!n->bar.acq)) {
> + if (unlikely(!acq)) {
> trace_pci_nvme_err_startfail_nbaracq();
> return -1;
> }
> - if (unlikely(n->bar.asq & (page_size - 1))) {
> - trace_pci_nvme_err_startfail_asq_misaligned(n->bar.asq);
> + if (unlikely(asq & (page_size - 1))) {
> + trace_pci_nvme_err_startfail_asq_misaligned(asq);
> return -1;
> }
> - if (unlikely(n->bar.acq & (page_size - 1))) {
> - trace_pci_nvme_err_startfail_acq_misaligned(n->bar.acq);
> + if (unlikely(acq & (page_size - 1))) {
> + trace_pci_nvme_err_startfail_acq_misaligned(acq);
> return -1;
> }
> - if (unlikely(!(NVME_CAP_CSS(n->bar.cap) & (1 <<
> NVME_CC_CSS(n->bar.cc))))) {
> - trace_pci_nvme_err_startfail_css(NVME_CC_CSS(n->bar.cc));
> + if (unlikely(!(NVME_CAP_CSS(cap) & (1 << NVME_CC_CSS(cc))))) {
> + trace_pci_nvme_err_startfail_css(NVME_CC_CSS(cc));
> return -1;
> }
> - if (unlikely(NVME_CC_MPS(n->bar.cc) <
> - NVME_CAP_MPSMIN(n->bar.cap))) {
> + if (unlikely(NVME_CC_MPS(cc) < NVME_CAP_MPSMIN(cap))) {
> trace_pci_nvme_err_startfail_page_too_small(
> - NVME_CC_MPS(n->bar.cc),
> - NVME_CAP_MPSMIN(n->bar.cap));
> + NVME_CC_MPS(cc),
> + NVME_CAP_MPSMIN(cap));
> return -1;
> }
> - if (unlikely(NVME_CC_MPS(n->bar.cc) >
> - NVME_CAP_MPSMAX(n->bar.cap))) {
> + if (unlikely(NVME_CC_MPS(cc) >
> + NVME_CAP_MPSMAX(cap))) {
> trace_pci_nvme_err_startfail_page_too_large(
> - NVME_CC_MPS(n->bar.cc),
> - NVME_CAP_MPSMAX(n->bar.cap));
> + NVME_CC_MPS(cc),
> + NVME_CAP_MPSMAX(cap));
> return -1;
> }
> - if (unlikely(NVME_CC_IOCQES(n->bar.cc) <
> + if (unlikely(NVME_CC_IOCQES(cc) <
> NVME_CTRL_CQES_MIN(n->id_ctrl.cqes))) {
> trace_pci_nvme_err_startfail_cqent_too_small(
> - NVME_CC_IOCQES(n->bar.cc),
> - NVME_CTRL_CQES_MIN(n->bar.cap));
> + NVME_CC_IOCQES(cc),
> + NVME_CTRL_CQES_MIN(cap));
> return -1;
> }
> - if (unlikely(NVME_CC_IOCQES(n->bar.cc) >
> + if (unlikely(NVME_CC_IOCQES(cc) >
> NVME_CTRL_CQES_MAX(n->id_ctrl.cqes))) {
> trace_pci_nvme_err_startfail_cqent_too_large(
> - NVME_CC_IOCQES(n->bar.cc),
> - NVME_CTRL_CQES_MAX(n->bar.cap));
> + NVME_CC_IOCQES(cc),
> + NVME_CTRL_CQES_MAX(cap));
> return -1;
> }
> - if (unlikely(NVME_CC_IOSQES(n->bar.cc) <
> + if (unlikely(NVME_CC_IOSQES(cc) <
> NVME_CTRL_SQES_MIN(n->id_ctrl.sqes))) {
> trace_pci_nvme_err_startfail_sqent_too_small(
> - NVME_CC_IOSQES(n->bar.cc),
> - NVME_CTRL_SQES_MIN(n->bar.cap));
> + NVME_CC_IOSQES(cc),
> + NVME_CTRL_SQES_MIN(cap));
> return -1;
> }
> - if (unlikely(NVME_CC_IOSQES(n->bar.cc) >
> + if (unlikely(NVME_CC_IOSQES(cc) >
> NVME_CTRL_SQES_MAX(n->id_ctrl.sqes))) {
> trace_pci_nvme_err_startfail_sqent_too_large(
> - NVME_CC_IOSQES(n->bar.cc),
> - NVME_CTRL_SQES_MAX(n->bar.cap));
> + NVME_CC_IOSQES(cc),
> + NVME_CTRL_SQES_MAX(cap));
> return -1;
> }
> - if (unlikely(!NVME_AQA_ASQS(n->bar.aqa))) {
> + if (unlikely(!NVME_AQA_ASQS(aqa))) {
> trace_pci_nvme_err_startfail_asqent_sz_zero();
> return -1;
> }
> - if (unlikely(!NVME_AQA_ACQS(n->bar.aqa))) {
> + if (unlikely(!NVME_AQA_ACQS(aqa))) {
> trace_pci_nvme_err_startfail_acqent_sz_zero();
> return -1;
> }
> @@ -5690,12 +5696,10 @@ static int nvme_start_ctrl(NvmeCtrl *n)
> n->page_bits = page_bits;
> n->page_size = page_size;
> n->max_prp_ents = n->page_size / sizeof(uint64_t);
> - n->cqe_size = 1 << NVME_CC_IOCQES(n->bar.cc);
> - n->sqe_size = 1 << NVME_CC_IOSQES(n->bar.cc);
> - nvme_init_cq(&n->admin_cq, n, n->bar.acq, 0, 0,
> - NVME_AQA_ACQS(n->bar.aqa) + 1, 1);
> - nvme_init_sq(&n->admin_sq, n, n->bar.asq, 0, 0,
> - NVME_AQA_ASQS(n->bar.aqa) + 1);
> + n->cqe_size = 1 << NVME_CC_IOCQES(cc);
> + n->sqe_size = 1 << NVME_CC_IOSQES(cc);
> + nvme_init_cq(&n->admin_cq, n, acq, 0, 0, NVME_AQA_ACQS(aqa) + 1, 1);
> + nvme_init_sq(&n->admin_sq, n, asq, 0, 0, NVME_AQA_ASQS(aqa) + 1);
>
> nvme_set_timestamp(n, 0ULL);
>
> @@ -5708,22 +5712,33 @@ static int nvme_start_ctrl(NvmeCtrl *n)
>
> static void nvme_cmb_enable_regs(NvmeCtrl *n)
> {
> - NVME_CMBLOC_SET_CDPCILS(n->bar.cmbloc, 1);
> - NVME_CMBLOC_SET_CDPMLS(n->bar.cmbloc, 1);
> - NVME_CMBLOC_SET_BIR(n->bar.cmbloc, NVME_CMB_BIR);
> + uint32_t cmbloc = ldl_le_p(&n->bar.cmbloc);
> + uint32_t cmbsz = ldl_le_p(&n->bar.cmbsz);
>
> - NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1);
> - NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0);
> - NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 1);
> - NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1);
> - NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1);
> - NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); /* MBs */
> - NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->params.cmb_size_mb);
> + NVME_CMBLOC_SET_CDPCILS(cmbloc, 1);
> + NVME_CMBLOC_SET_CDPMLS(cmbloc, 1);
> + NVME_CMBLOC_SET_BIR(cmbloc, NVME_CMB_BIR);
> + stl_le_p(&n->bar.cmbloc, cmbloc);
> +
> + NVME_CMBSZ_SET_SQS(cmbsz, 1);
> + NVME_CMBSZ_SET_CQS(cmbsz, 0);
> + NVME_CMBSZ_SET_LISTS(cmbsz, 1);
> + NVME_CMBSZ_SET_RDS(cmbsz, 1);
> + NVME_CMBSZ_SET_WDS(cmbsz, 1);
> + NVME_CMBSZ_SET_SZU(cmbsz, 2); /* MBs */
> + NVME_CMBSZ_SET_SZ(cmbsz, n->params.cmb_size_mb);
> + stl_le_p(&n->bar.cmbsz, cmbsz);
> }
>
> static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
> unsigned size)
> {
> + uint64_t cap = ldq_le_p(&n->bar.cap);
> + uint32_t cc = ldl_le_p(&n->bar.cc);
> + uint32_t intms = ldl_le_p(&n->bar.intms);
> + uint32_t csts = ldl_le_p(&n->bar.csts);
> + uint32_t pmrsts = ldl_le_p(&n->bar.pmrsts);
> +
> if (unlikely(offset & (sizeof(uint32_t) - 1))) {
> NVME_GUEST_ERR(pci_nvme_ub_mmiowr_misaligned32,
> "MMIO write not 32-bit aligned,"
> @@ -5747,9 +5762,10 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset,
> uint64_t data,
> " when MSI-X is enabled");
> /* should be ignored, fall through for now */
> }
> - n->bar.intms |= data & 0xffffffff;
> + intms |= data;
> + stl_le_p(&n->bar.intms, intms);
> n->bar.intmc = n->bar.intms;
> - trace_pci_nvme_mmio_intm_set(data & 0xffffffff, n->bar.intmc);
> + trace_pci_nvme_mmio_intm_set(data & 0xffffffff, intms);
> nvme_irq_check(n);
> break;
> case NVME_REG_INTMC:
> @@ -5759,44 +5775,55 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr
> offset, uint64_t data,
> " when MSI-X is enabled");
> /* should be ignored, fall through for now */
> }
> - n->bar.intms &= ~(data & 0xffffffff);
> + intms &= ~data;
> + stl_le_p(&n->bar.intms, intms);
> n->bar.intmc = n->bar.intms;
> - trace_pci_nvme_mmio_intm_clr(data & 0xffffffff, n->bar.intmc);
> + trace_pci_nvme_mmio_intm_clr(data & 0xffffffff, intms);
> nvme_irq_check(n);
> break;
> case NVME_REG_CC:
> trace_pci_nvme_mmio_cfg(data & 0xffffffff);
> +
> /* Windows first sends data, then sends enable bit */
> - if (!NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc) &&
> - !NVME_CC_SHN(data) && !NVME_CC_SHN(n->bar.cc))
> + if (!NVME_CC_EN(data) && !NVME_CC_EN(cc) &&
> + !NVME_CC_SHN(data) && !NVME_CC_SHN(cc))
> {
> - n->bar.cc = data;
> + cc = data;
> }
>
> - if (NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc)) {
> - n->bar.cc = data;
> + if (NVME_CC_EN(data) && !NVME_CC_EN(cc)) {
> + cc = data;
> +
> + /* flush CC since nvme_start_ctrl() needs the value */
> + stl_le_p(&n->bar.cc, cc);
> if (unlikely(nvme_start_ctrl(n))) {
> trace_pci_nvme_err_startfail();
> - n->bar.csts = NVME_CSTS_FAILED;
> + csts = NVME_CSTS_FAILED;
> } else {
> trace_pci_nvme_mmio_start_success();
> - n->bar.csts = NVME_CSTS_READY;
> + csts = NVME_CSTS_READY;
> }
> - } else if (!NVME_CC_EN(data) && NVME_CC_EN(n->bar.cc)) {
> + } else if (!NVME_CC_EN(data) && NVME_CC_EN(cc)) {
> trace_pci_nvme_mmio_stopped();
> nvme_ctrl_reset(n);
> - n->bar.csts &= ~NVME_CSTS_READY;
> + cc = 0;
> + csts &= ~NVME_CSTS_READY;
> }
> - if (NVME_CC_SHN(data) && !(NVME_CC_SHN(n->bar.cc))) {
> +
> + if (NVME_CC_SHN(data) && !(NVME_CC_SHN(cc))) {
> trace_pci_nvme_mmio_shutdown_set();
> nvme_ctrl_shutdown(n);
> - n->bar.cc = data;
> - n->bar.csts |= NVME_CSTS_SHST_COMPLETE;
> - } else if (!NVME_CC_SHN(data) && NVME_CC_SHN(n->bar.cc)) {
> + cc = data;
> + csts |= NVME_CSTS_SHST_COMPLETE;
> + } else if (!NVME_CC_SHN(data) && NVME_CC_SHN(cc)) {
> trace_pci_nvme_mmio_shutdown_cleared();
> - n->bar.csts &= ~NVME_CSTS_SHST_COMPLETE;
> - n->bar.cc = data;
> + csts &= ~NVME_CSTS_SHST_COMPLETE;
> + cc = data;
> }
> +
> + stl_le_p(&n->bar.cc, cc);
> + stl_le_p(&n->bar.csts, csts);
> +
> break;
> case NVME_REG_CSTS:
> if (data & (1 << 4)) {
> @@ -5818,26 +5845,24 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr
> offset, uint64_t data,
> }
> break;
> case NVME_REG_AQA:
> - n->bar.aqa = data & 0xffffffff;
> + stl_le_p(&n->bar.aqa, data);
> trace_pci_nvme_mmio_aqattr(data & 0xffffffff);
> break;
> case NVME_REG_ASQ:
> - n->bar.asq = size == 8 ? data :
> - (n->bar.asq & ~0xffffffffULL) | (data & 0xffffffff);
> + stn_le_p(&n->bar.asq, data, size);
Argh. I hurried a change here and it bit me.
I'll let my CI run to the end and I'll send a v5...
signature.asc
Description: PGP signature
- [PATCH v4 0/5] hw/nvme: fix mmio read, Klaus Jensen, 2021/07/19
- [PATCH v4 1/5] hw/nvme: split pmrmsc register into upper and lower, Klaus Jensen, 2021/07/19
- [PATCH v4 2/5] hw/nvme: use symbolic names for registers, Klaus Jensen, 2021/07/19
- [PATCH v4 4/5] hw/nvme: fix mmio read, Klaus Jensen, 2021/07/19
- Re: [PATCH v4 4/5] hw/nvme: fix mmio read,
Klaus Jensen <=
- [PATCH v4 3/5] hw/nvme: fix out-of-bounds reads, Klaus Jensen, 2021/07/19
- [PATCH v4 5/5] tests/qtest/nvme-test: add mmio read test, Klaus Jensen, 2021/07/19