qemu-block
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v4 4/5] hw/nvme: fix mmio read


From: Klaus Jensen
Subject: Re: [PATCH v4 4/5] hw/nvme: fix mmio read
Date: Mon, 19 Jul 2021 22:26:31 +0200

On Jul 19 22:07, Klaus Jensen wrote:
> From: Klaus Jensen <k.jensen@samsung.com>
> 
> The new PMR test unearthed a long-standing issue with MMIO reads on
> big-endian hosts.
> 
> Fix this by unconditionally storing all controller registers in little
> endian.
> 
> Cc: Gollu Appalanaidu <anaidu.gollu@samsung.com>
> Reported-by: Peter Maydell <peter.maydell@linaro.org>
> Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> ---
>  hw/nvme/ctrl.c | 290 +++++++++++++++++++++++++++----------------------
>  1 file changed, 162 insertions(+), 128 deletions(-)
> 
> diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> index 0449cc4dee9b..76721e31c6b1 100644
> --- a/hw/nvme/ctrl.c
> +++ b/hw/nvme/ctrl.c
> @@ -439,10 +439,12 @@ static uint8_t nvme_sq_empty(NvmeSQueue *sq)
>  
>  static void nvme_irq_check(NvmeCtrl *n)
>  {
> +    uint32_t intms = ldl_le_p(&n->bar.intms);
> +
>      if (msix_enabled(&(n->parent_obj))) {
>          return;
>      }
> -    if (~n->bar.intms & n->irq_status) {
> +    if (~intms & n->irq_status) {
>          pci_irq_assert(&n->parent_obj);
>      } else {
>          pci_irq_deassert(&n->parent_obj);
> @@ -1289,7 +1291,7 @@ static void nvme_post_cqes(void *opaque)
>          if (ret) {
>              trace_pci_nvme_err_addr_write(addr);
>              trace_pci_nvme_err_cfs();
> -            n->bar.csts = NVME_CSTS_FAILED;
> +            stl_le_p(&n->bar.csts, NVME_CSTS_FAILED);
>              break;
>          }
>          QTAILQ_REMOVE(&cq->req_list, req, entry);
> @@ -4022,7 +4024,7 @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeRequest 
> *req)
>          trace_pci_nvme_err_invalid_create_sq_sqid(sqid);
>          return NVME_INVALID_QID | NVME_DNR;
>      }
> -    if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) {
> +    if (unlikely(!qsize || qsize > NVME_CAP_MQES(ldq_le_p(&n->bar.cap)))) {
>          trace_pci_nvme_err_invalid_create_sq_size(qsize);
>          return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
>      }
> @@ -4208,7 +4210,7 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t 
> csi, uint32_t buf_len,
>          return NVME_INVALID_FIELD | NVME_DNR;
>      }
>  
> -    switch (NVME_CC_CSS(n->bar.cc)) {
> +    switch (NVME_CC_CSS(ldl_le_p(&n->bar.cc))) {
>      case NVME_CC_CSS_NVM:
>          src_iocs = nvme_cse_iocs_nvm;
>          /* fall through */
> @@ -4370,7 +4372,7 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest 
> *req)
>          trace_pci_nvme_err_invalid_create_cq_cqid(cqid);
>          return NVME_INVALID_QID | NVME_DNR;
>      }
> -    if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) {
> +    if (unlikely(!qsize || qsize > NVME_CAP_MQES(ldq_le_p(&n->bar.cap)))) {
>          trace_pci_nvme_err_invalid_create_cq_size(qsize);
>          return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
>      }
> @@ -5163,17 +5165,19 @@ static void nvme_update_dmrsl(NvmeCtrl *n)
>  
>  static void nvme_select_iocs_ns(NvmeCtrl *n, NvmeNamespace *ns)
>  {
> +    uint32_t cc = ldl_le_p(&n->bar.cc);
> +
>      ns->iocs = nvme_cse_iocs_none;
>      switch (ns->csi) {
>      case NVME_CSI_NVM:
> -        if (NVME_CC_CSS(n->bar.cc) != NVME_CC_CSS_ADMIN_ONLY) {
> +        if (NVME_CC_CSS(cc) != NVME_CC_CSS_ADMIN_ONLY) {
>              ns->iocs = nvme_cse_iocs_nvm;
>          }
>          break;
>      case NVME_CSI_ZONED:
> -        if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_CSI) {
> +        if (NVME_CC_CSS(cc) == NVME_CC_CSS_CSI) {
>              ns->iocs = nvme_cse_iocs_zoned;
> -        } else if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_NVM) {
> +        } else if (NVME_CC_CSS(cc) == NVME_CC_CSS_NVM) {
>              ns->iocs = nvme_cse_iocs_nvm;
>          }
>          break;
> @@ -5510,7 +5514,7 @@ static void nvme_process_sq(void *opaque)
>          if (nvme_addr_read(n, addr, (void *)&cmd, sizeof(cmd))) {
>              trace_pci_nvme_err_addr_read(addr);
>              trace_pci_nvme_err_cfs();
> -            n->bar.csts = NVME_CSTS_FAILED;
> +            stl_le_p(&n->bar.csts, NVME_CSTS_FAILED);
>              break;
>          }
>          nvme_inc_sq_head(sq);
> @@ -5565,8 +5569,6 @@ static void nvme_ctrl_reset(NvmeCtrl *n)
>      n->aer_queued = 0;
>      n->outstanding_aers = 0;
>      n->qs_created = false;
> -
> -    n->bar.cc = 0;
>  }
>  
>  static void nvme_ctrl_shutdown(NvmeCtrl *n)
> @@ -5605,7 +5607,12 @@ static void nvme_select_iocs(NvmeCtrl *n)
>  
>  static int nvme_start_ctrl(NvmeCtrl *n)
>  {
> -    uint32_t page_bits = NVME_CC_MPS(n->bar.cc) + 12;
> +    uint64_t cap = ldq_le_p(&n->bar.cap);
> +    uint32_t cc = ldl_le_p(&n->bar.cc);
> +    uint32_t aqa = ldl_le_p(&n->bar.aqa);
> +    uint64_t asq = ldq_le_p(&n->bar.asq);
> +    uint64_t acq = ldq_le_p(&n->bar.acq);
> +    uint32_t page_bits = NVME_CC_MPS(cc) + 12;
>      uint32_t page_size = 1 << page_bits;
>  
>      if (unlikely(n->cq[0])) {
> @@ -5616,73 +5623,72 @@ static int nvme_start_ctrl(NvmeCtrl *n)
>          trace_pci_nvme_err_startfail_sq();
>          return -1;
>      }
> -    if (unlikely(!n->bar.asq)) {
> +    if (unlikely(!asq)) {
>          trace_pci_nvme_err_startfail_nbarasq();
>          return -1;
>      }
> -    if (unlikely(!n->bar.acq)) {
> +    if (unlikely(!acq)) {
>          trace_pci_nvme_err_startfail_nbaracq();
>          return -1;
>      }
> -    if (unlikely(n->bar.asq & (page_size - 1))) {
> -        trace_pci_nvme_err_startfail_asq_misaligned(n->bar.asq);
> +    if (unlikely(asq & (page_size - 1))) {
> +        trace_pci_nvme_err_startfail_asq_misaligned(asq);
>          return -1;
>      }
> -    if (unlikely(n->bar.acq & (page_size - 1))) {
> -        trace_pci_nvme_err_startfail_acq_misaligned(n->bar.acq);
> +    if (unlikely(acq & (page_size - 1))) {
> +        trace_pci_nvme_err_startfail_acq_misaligned(acq);
>          return -1;
>      }
> -    if (unlikely(!(NVME_CAP_CSS(n->bar.cap) & (1 << 
> NVME_CC_CSS(n->bar.cc))))) {
> -        trace_pci_nvme_err_startfail_css(NVME_CC_CSS(n->bar.cc));
> +    if (unlikely(!(NVME_CAP_CSS(cap) & (1 << NVME_CC_CSS(cc))))) {
> +        trace_pci_nvme_err_startfail_css(NVME_CC_CSS(cc));
>          return -1;
>      }
> -    if (unlikely(NVME_CC_MPS(n->bar.cc) <
> -                 NVME_CAP_MPSMIN(n->bar.cap))) {
> +    if (unlikely(NVME_CC_MPS(cc) < NVME_CAP_MPSMIN(cap))) {
>          trace_pci_nvme_err_startfail_page_too_small(
> -                    NVME_CC_MPS(n->bar.cc),
> -                    NVME_CAP_MPSMIN(n->bar.cap));
> +                    NVME_CC_MPS(cc),
> +                    NVME_CAP_MPSMIN(cap));
>          return -1;
>      }
> -    if (unlikely(NVME_CC_MPS(n->bar.cc) >
> -                 NVME_CAP_MPSMAX(n->bar.cap))) {
> +    if (unlikely(NVME_CC_MPS(cc) >
> +                 NVME_CAP_MPSMAX(cap))) {
>          trace_pci_nvme_err_startfail_page_too_large(
> -                    NVME_CC_MPS(n->bar.cc),
> -                    NVME_CAP_MPSMAX(n->bar.cap));
> +                    NVME_CC_MPS(cc),
> +                    NVME_CAP_MPSMAX(cap));
>          return -1;
>      }
> -    if (unlikely(NVME_CC_IOCQES(n->bar.cc) <
> +    if (unlikely(NVME_CC_IOCQES(cc) <
>                   NVME_CTRL_CQES_MIN(n->id_ctrl.cqes))) {
>          trace_pci_nvme_err_startfail_cqent_too_small(
> -                    NVME_CC_IOCQES(n->bar.cc),
> -                    NVME_CTRL_CQES_MIN(n->bar.cap));
> +                    NVME_CC_IOCQES(cc),
> +                    NVME_CTRL_CQES_MIN(cap));
>          return -1;
>      }
> -    if (unlikely(NVME_CC_IOCQES(n->bar.cc) >
> +    if (unlikely(NVME_CC_IOCQES(cc) >
>                   NVME_CTRL_CQES_MAX(n->id_ctrl.cqes))) {
>          trace_pci_nvme_err_startfail_cqent_too_large(
> -                    NVME_CC_IOCQES(n->bar.cc),
> -                    NVME_CTRL_CQES_MAX(n->bar.cap));
> +                    NVME_CC_IOCQES(cc),
> +                    NVME_CTRL_CQES_MAX(cap));
>          return -1;
>      }
> -    if (unlikely(NVME_CC_IOSQES(n->bar.cc) <
> +    if (unlikely(NVME_CC_IOSQES(cc) <
>                   NVME_CTRL_SQES_MIN(n->id_ctrl.sqes))) {
>          trace_pci_nvme_err_startfail_sqent_too_small(
> -                    NVME_CC_IOSQES(n->bar.cc),
> -                    NVME_CTRL_SQES_MIN(n->bar.cap));
> +                    NVME_CC_IOSQES(cc),
> +                    NVME_CTRL_SQES_MIN(cap));
>          return -1;
>      }
> -    if (unlikely(NVME_CC_IOSQES(n->bar.cc) >
> +    if (unlikely(NVME_CC_IOSQES(cc) >
>                   NVME_CTRL_SQES_MAX(n->id_ctrl.sqes))) {
>          trace_pci_nvme_err_startfail_sqent_too_large(
> -                    NVME_CC_IOSQES(n->bar.cc),
> -                    NVME_CTRL_SQES_MAX(n->bar.cap));
> +                    NVME_CC_IOSQES(cc),
> +                    NVME_CTRL_SQES_MAX(cap));
>          return -1;
>      }
> -    if (unlikely(!NVME_AQA_ASQS(n->bar.aqa))) {
> +    if (unlikely(!NVME_AQA_ASQS(aqa))) {
>          trace_pci_nvme_err_startfail_asqent_sz_zero();
>          return -1;
>      }
> -    if (unlikely(!NVME_AQA_ACQS(n->bar.aqa))) {
> +    if (unlikely(!NVME_AQA_ACQS(aqa))) {
>          trace_pci_nvme_err_startfail_acqent_sz_zero();
>          return -1;
>      }
> @@ -5690,12 +5696,10 @@ static int nvme_start_ctrl(NvmeCtrl *n)
>      n->page_bits = page_bits;
>      n->page_size = page_size;
>      n->max_prp_ents = n->page_size / sizeof(uint64_t);
> -    n->cqe_size = 1 << NVME_CC_IOCQES(n->bar.cc);
> -    n->sqe_size = 1 << NVME_CC_IOSQES(n->bar.cc);
> -    nvme_init_cq(&n->admin_cq, n, n->bar.acq, 0, 0,
> -                 NVME_AQA_ACQS(n->bar.aqa) + 1, 1);
> -    nvme_init_sq(&n->admin_sq, n, n->bar.asq, 0, 0,
> -                 NVME_AQA_ASQS(n->bar.aqa) + 1);
> +    n->cqe_size = 1 << NVME_CC_IOCQES(cc);
> +    n->sqe_size = 1 << NVME_CC_IOSQES(cc);
> +    nvme_init_cq(&n->admin_cq, n, acq, 0, 0, NVME_AQA_ACQS(aqa) + 1, 1);
> +    nvme_init_sq(&n->admin_sq, n, asq, 0, 0, NVME_AQA_ASQS(aqa) + 1);
>  
>      nvme_set_timestamp(n, 0ULL);
>  
> @@ -5708,22 +5712,33 @@ static int nvme_start_ctrl(NvmeCtrl *n)
>  
>  static void nvme_cmb_enable_regs(NvmeCtrl *n)
>  {
> -    NVME_CMBLOC_SET_CDPCILS(n->bar.cmbloc, 1);
> -    NVME_CMBLOC_SET_CDPMLS(n->bar.cmbloc, 1);
> -    NVME_CMBLOC_SET_BIR(n->bar.cmbloc, NVME_CMB_BIR);
> +    uint32_t cmbloc = ldl_le_p(&n->bar.cmbloc);
> +    uint32_t cmbsz = ldl_le_p(&n->bar.cmbsz);
>  
> -    NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1);
> -    NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0);
> -    NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 1);
> -    NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1);
> -    NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1);
> -    NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); /* MBs */
> -    NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->params.cmb_size_mb);
> +    NVME_CMBLOC_SET_CDPCILS(cmbloc, 1);
> +    NVME_CMBLOC_SET_CDPMLS(cmbloc, 1);
> +    NVME_CMBLOC_SET_BIR(cmbloc, NVME_CMB_BIR);
> +    stl_le_p(&n->bar.cmbloc, cmbloc);
> +
> +    NVME_CMBSZ_SET_SQS(cmbsz, 1);
> +    NVME_CMBSZ_SET_CQS(cmbsz, 0);
> +    NVME_CMBSZ_SET_LISTS(cmbsz, 1);
> +    NVME_CMBSZ_SET_RDS(cmbsz, 1);
> +    NVME_CMBSZ_SET_WDS(cmbsz, 1);
> +    NVME_CMBSZ_SET_SZU(cmbsz, 2); /* MBs */
> +    NVME_CMBSZ_SET_SZ(cmbsz, n->params.cmb_size_mb);
> +    stl_le_p(&n->bar.cmbsz, cmbsz);
>  }
>  
>  static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
>                             unsigned size)
>  {
> +    uint64_t cap = ldq_le_p(&n->bar.cap);
> +    uint32_t cc = ldl_le_p(&n->bar.cc);
> +    uint32_t intms = ldl_le_p(&n->bar.intms);
> +    uint32_t csts = ldl_le_p(&n->bar.csts);
> +    uint32_t pmrsts = ldl_le_p(&n->bar.pmrsts);
> +
>      if (unlikely(offset & (sizeof(uint32_t) - 1))) {
>          NVME_GUEST_ERR(pci_nvme_ub_mmiowr_misaligned32,
>                         "MMIO write not 32-bit aligned,"
> @@ -5747,9 +5762,10 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, 
> uint64_t data,
>                             " when MSI-X is enabled");
>              /* should be ignored, fall through for now */
>          }
> -        n->bar.intms |= data & 0xffffffff;
> +        intms |= data;
> +        stl_le_p(&n->bar.intms, intms);
>          n->bar.intmc = n->bar.intms;
> -        trace_pci_nvme_mmio_intm_set(data & 0xffffffff, n->bar.intmc);
> +        trace_pci_nvme_mmio_intm_set(data & 0xffffffff, intms);
>          nvme_irq_check(n);
>          break;
>      case NVME_REG_INTMC:
> @@ -5759,44 +5775,55 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr 
> offset, uint64_t data,
>                             " when MSI-X is enabled");
>              /* should be ignored, fall through for now */
>          }
> -        n->bar.intms &= ~(data & 0xffffffff);
> +        intms &= ~data;
> +        stl_le_p(&n->bar.intms, intms);
>          n->bar.intmc = n->bar.intms;
> -        trace_pci_nvme_mmio_intm_clr(data & 0xffffffff, n->bar.intmc);
> +        trace_pci_nvme_mmio_intm_clr(data & 0xffffffff, intms);
>          nvme_irq_check(n);
>          break;
>      case NVME_REG_CC:
>          trace_pci_nvme_mmio_cfg(data & 0xffffffff);
> +
>          /* Windows first sends data, then sends enable bit */
> -        if (!NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc) &&
> -            !NVME_CC_SHN(data) && !NVME_CC_SHN(n->bar.cc))
> +        if (!NVME_CC_EN(data) && !NVME_CC_EN(cc) &&
> +            !NVME_CC_SHN(data) && !NVME_CC_SHN(cc))
>          {
> -            n->bar.cc = data;
> +            cc = data;
>          }
>  
> -        if (NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc)) {
> -            n->bar.cc = data;
> +        if (NVME_CC_EN(data) && !NVME_CC_EN(cc)) {
> +            cc = data;
> +
> +            /* flush CC since nvme_start_ctrl() needs the value */
> +            stl_le_p(&n->bar.cc, cc);
>              if (unlikely(nvme_start_ctrl(n))) {
>                  trace_pci_nvme_err_startfail();
> -                n->bar.csts = NVME_CSTS_FAILED;
> +                csts = NVME_CSTS_FAILED;
>              } else {
>                  trace_pci_nvme_mmio_start_success();
> -                n->bar.csts = NVME_CSTS_READY;
> +                csts = NVME_CSTS_READY;
>              }
> -        } else if (!NVME_CC_EN(data) && NVME_CC_EN(n->bar.cc)) {
> +        } else if (!NVME_CC_EN(data) && NVME_CC_EN(cc)) {
>              trace_pci_nvme_mmio_stopped();
>              nvme_ctrl_reset(n);
> -            n->bar.csts &= ~NVME_CSTS_READY;
> +            cc = 0;
> +            csts &= ~NVME_CSTS_READY;
>          }
> -        if (NVME_CC_SHN(data) && !(NVME_CC_SHN(n->bar.cc))) {
> +
> +        if (NVME_CC_SHN(data) && !(NVME_CC_SHN(cc))) {
>              trace_pci_nvme_mmio_shutdown_set();
>              nvme_ctrl_shutdown(n);
> -            n->bar.cc = data;
> -            n->bar.csts |= NVME_CSTS_SHST_COMPLETE;
> -        } else if (!NVME_CC_SHN(data) && NVME_CC_SHN(n->bar.cc)) {
> +            cc = data;
> +            csts |= NVME_CSTS_SHST_COMPLETE;
> +        } else if (!NVME_CC_SHN(data) && NVME_CC_SHN(cc)) {
>              trace_pci_nvme_mmio_shutdown_cleared();
> -            n->bar.csts &= ~NVME_CSTS_SHST_COMPLETE;
> -            n->bar.cc = data;
> +            csts &= ~NVME_CSTS_SHST_COMPLETE;
> +            cc = data;
>          }
> +
> +        stl_le_p(&n->bar.cc, cc);
> +        stl_le_p(&n->bar.csts, csts);
> +
>          break;
>      case NVME_REG_CSTS:
>          if (data & (1 << 4)) {
> @@ -5818,26 +5845,24 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr 
> offset, uint64_t data,
>          }
>          break;
>      case NVME_REG_AQA:
> -        n->bar.aqa = data & 0xffffffff;
> +        stl_le_p(&n->bar.aqa, data);
>          trace_pci_nvme_mmio_aqattr(data & 0xffffffff);
>          break;
>      case NVME_REG_ASQ:
> -        n->bar.asq = size == 8 ? data :
> -            (n->bar.asq & ~0xffffffffULL) | (data & 0xffffffff);
> +        stn_le_p(&n->bar.asq, data, size);

Argh. I hurried a change here and it bit me.

I'll let my CI run to the end and I'll send a v5...

Attachment: signature.asc
Description: PGP signature


reply via email to

[Prev in Thread] Current Thread [Next in Thread]