qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v2 3/4] hw/nvme: add support for ratified TP4084


From: Klaus Jensen
Subject: Re: [PATCH v2 3/4] hw/nvme: add support for ratified TP4084
Date: Tue, 28 Jun 2022 07:57:03 +0200

On Jun 27 13:47, Niklas Cassel wrote:
> TP4084 adds a new mode, CC.CRIME, that can be used to mark a namespace
> as ready independently from the controller.
> 
> When CC.CRIME is 0 (default), things behave as before, all namespaces
> are ready when CSTS.RDY gets set to 1.
> 
> When CC.CRIME is 1, the controller will become ready when CSTS.RDY gets
> set to 1, but commands accessing a namespace are allowed to return
> "Namespace Not Ready" or "Admin Command Media Not Ready".
> After CRTO.CRWMT amount of time, if the namespace has not yet been
> marked ready, the status codes also need to have the DNR bit set.
> 
> Add a new "ready_delay" namespace device parameter, in order to emulate
> different ready latencies for namespaces.
> 
> Once a namespace is ready, it will set the NRDY bit in the I/O Command
> Set Independent Identify Namespace Data Structure, and then send out a
> Namespace Attribute Changed event.
> 
> This new "ready_delay" is supported on controllers not part of a NVMe
> subsystem. The reasons are many. One problem is that multiple controllers
> can have different CC.CRIME modes running. Another problem is the extra
> locking needed. The third problem is when to actually clear NRDY. If we
> assume that a namespace clears NRDY when it no longer has any controller
> online for that namespace. The problem then is that Linux will reset the
> controllers one by one during probe time. The reset goes so fast so that
> there is no time when all controllers are in reset at the same time, so
> NRDY will never get cleared. (The controllers are enabled by SeaBIOS by
> default.) We could introduce a reset_time param, but this would only
> increase the chances that all controllers are in reset at the same time.
> 
> Signed-off-by: Niklas Cassel <niklas.cassel@wdc.com>
> ---
>  hw/nvme/ctrl.c       | 123 +++++++++++++++++++++++++++++++++++++++++--
>  hw/nvme/ns.c         |  18 +++++++
>  hw/nvme/nvme.h       |   6 +++
>  hw/nvme/trace-events |   1 +
>  include/block/nvme.h |  60 ++++++++++++++++++++-
>  5 files changed, 204 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> index 8ca824ea14..5404f67480 100644
> --- a/hw/nvme/ctrl.c
> +++ b/hw/nvme/ctrl.c
> @@ -88,6 +88,12 @@
>   *   completion when there are no outstanding AERs. When the maximum number 
> of
>   *   enqueued events are reached, subsequent events will be dropped.
>   *
> + * - `crwmt`
> + *   This is the Controller Ready With Media Timeout (CRWMT) field that is
> + *   defined in the CRTO register. This specifies the worst-case time that 
> host
> + *   software should wait for the controller and all attached namespaces to
> + *   become ready. The value is in units of 500 milliseconds.
> + *
>   * - `mdts`
>   *   Indicates the maximum data transfer size for a command that transfers 
> data
>   *   between host-accessible memory and the controller. The value is 
> specified
> @@ -157,6 +163,11 @@
>   *   namespace will be available in the subsystem but not attached to any
>   *   controllers.
>   *
> + * - `ready_delay`
> + *   This parameter specifies the amount of time that the namespace should 
> wait
> + *   before being marked ready. Only applicable if CC.CRIME is set by the 
> user.
> + *   The value is in units of 500 milliseconds (to be consistent with 
> `crwmt`).
> + *
>   * Setting `zoned` to true selects Zoned Command Set at the namespace.
>   * In this case, the following namespace properties are available to 
> configure
>   * zoned operation:
> @@ -216,6 +227,8 @@
>  #define NVME_VF_RES_GRANULARITY 1
>  #define NVME_VF_OFFSET 0x1
>  #define NVME_VF_STRIDE 1
> +#define NVME_DEFAULT_CRIMT 0xa
> +#define NVME_DEFAULT_CRWMT 0xf
>  
>  #define NVME_GUEST_ERR(trace, fmt, ...) \
>      do { \
> @@ -4188,6 +4201,10 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest 
> *req)
>          return NVME_INVALID_OPCODE | NVME_DNR;
>      }
>  
> +    if (!(ns->id_indep_ns.nstat & NVME_NSTAT_NRDY)) {
> +        return NVME_NS_NOT_READY;
> +    }
> +

I'd add a ns->ready bool instead. See below for my previously posted
patch.

>      if (ns->status) {
>          return ns->status;
>      }
> @@ -4791,6 +4808,27 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, 
> NvmeRequest *req, bool active)
>      return NVME_INVALID_CMD_SET | NVME_DNR;
>  }
>  
> +static uint16_t nvme_identify_cs_indep_ns(NvmeCtrl *n, NvmeRequest *req)
> +{
> +    NvmeNamespace *ns;
> +    NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
> +    uint32_t nsid = le32_to_cpu(c->nsid);
> +
> +    trace_pci_nvme_identify_cs_indep_ns(nsid);
> +
> +    if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) {
> +        return NVME_INVALID_NSID | NVME_DNR;
> +    }
> +
> +    ns = nvme_ns(n, nsid);
> +    if (unlikely(!ns)) {
> +            return nvme_rpt_empty_id_struct(n, req);
> +    }
> +
> +    return nvme_c2h(n, (uint8_t *)&ns->id_indep_ns, sizeof(NvmeIdNsCsIndep),
> +                    req);
> +}
> +

I posted a patch for this some time ago

https://lore.kernel.org/qemu-devel/20220531060342.2556973-1-its@irrelevant.dk/

The structure is so simple that we can just "build" it here instead of
adding yet another (mostly empty) 4k member to the NvmeNamespace struct.

>  static uint16_t nvme_identify_ctrl_list(NvmeCtrl *n, NvmeRequest *req,
>                                          bool attached)
>  {
> @@ -5081,6 +5119,8 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeRequest 
> *req)
>          return nvme_identify_ns(n, req, true);
>      case NVME_ID_CNS_NS_PRESENT:
>          return nvme_identify_ns(n, req, false);
> +    case NVME_ID_CNS_CS_INDEPENDENT_NS:
> +        return nvme_identify_cs_indep_ns(n, req);
>      case NVME_ID_CNS_NS_ATTACHED_CTRL_LIST:
>          return nvme_identify_ctrl_list(n, req, true);
>      case NVME_ID_CNS_CTRL_LIST:
> @@ -5556,6 +5596,44 @@ static void nvme_select_iocs_ns(NvmeCtrl *n, 
> NvmeNamespace *ns)
>      }
>  }
>  
> +void nvme_ns_ready_cb(void *opaque)
> +{
> +    NvmeNamespace *ns = opaque;
> +    DeviceState *dev = &ns->parent_obj;
> +    BusState *s = qdev_get_parent_bus(dev);
> +    NvmeCtrl *n = NVME(s->parent);
> +
> +    ns->id_indep_ns.nstat |= NVME_NSTAT_NRDY;
> +
> +    if (!test_and_set_bit(ns->params.nsid, n->changed_nsids)) {
> +        nvme_enqueue_event(n, NVME_AER_TYPE_NOTICE,
> +                           NVME_AER_INFO_NOTICE_NS_ATTR_CHANGED,
> +                           NVME_LOG_CHANGED_NSLIST);
> +    }
> +}

Move to hw/nvme/ns.c.

> +
> +static void nvme_set_ready_or_start_timer(NvmeCtrl *n, NvmeNamespace *ns)
> +{
> +    int64_t expire_time;
> +
> +    if (!NVME_CC_CRIME(ldl_le_p(&n->bar.cc)) || ns->params.ready_delay == 0) 
> {
> +        ns->id_indep_ns.nstat |= NVME_NSTAT_NRDY;
> +        return;
> +    }
> +
> +    expire_time = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
> +    expire_time += ns->params.ready_delay * 500;
> +    timer_mod(ns->ready_delay_timer, expire_time);
> +}

This can be made independent of NvmeCtrl by passing the CRIME value (and
then moved to hw/nvme/ns.c).

> +
> +static inline void nvme_ns_clear_ready_and_stop_timer(NvmeNamespace *ns)
> +{
> +    if (!ns->subsys) {
> +        timer_del(ns->ready_delay_timer);
> +        ns->id_indep_ns.nstat &= ~NVME_NSTAT_NRDY;
> +    }
> +}
> +

Move to hw/nvme/ns.c.

Attachment: signature.asc
Description: PGP signature


reply via email to

[Prev in Thread] Current Thread [Next in Thread]