qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v2 08/18] hw/block/nvme: Make Zoned NS Command Set definition


From: Alistair Francis
Subject: Re: [PATCH v2 08/18] hw/block/nvme: Make Zoned NS Command Set definitions
Date: Tue, 30 Jun 2020 15:11:59 -0700

On Wed, Jun 17, 2020 at 2:51 PM Dmitry Fomichev <dmitry.fomichev@wdc.com> wrote:
>
> Define values and structures that are needed to support Zoned
> Namespace Command Set (NVMe TP 4053) in PCI NVMe controller emulator.
>
> All new protocol definitions are located in include/block/nvme.h
> and everything added that is specific to this implementation is kept
> in hw/block/nvme.h.
>
> In order to improve scalability, all open, closed and full zones
> are organized in separate linked lists. Consequently, almost all
> zone operations don't require scanning of the entire zone array
> (which potentially can be quite large) - it is only necessary to
> enumerate one or more zone lists. Zone lists are designed to be
> position-independent as they can be persisted to the backing file
> as a part of zone metadata. NvmeZoneList struct defined in this patch
> serves as a head of every zone list.
>
> NvmeZone structure encapsulates NvmeZoneDescriptor defined in Zoned
> Command Set specification and adds a few more fields that are
> internal to this implementation.
>
> Signed-off-by: Niklas Cassel <niklas.cassel@wdc.com>
> Signed-off-by: Hans Holmberg <hans.holmberg@wdc.com>
> Signed-off-by: Ajay Joshi <ajay.joshi@wdc.com>
> Signed-off-by: Matias Bjorling <matias.bjorling@wdc.com>
> Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
> Signed-off-by: Alexey Bogoslavsky <alexey.bogoslavsky@wdc.com>
> Signed-off-by: Dmitry Fomichev <dmitry.fomichev@wdc.com>
> ---
>  hw/block/nvme.h      | 130 +++++++++++++++++++++++++++++++++++++++++++
>  include/block/nvme.h | 119 ++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 248 insertions(+), 1 deletion(-)
>
> diff --git a/hw/block/nvme.h b/hw/block/nvme.h
> index 0d29f75475..2c932b5e29 100644
> --- a/hw/block/nvme.h
> +++ b/hw/block/nvme.h
> @@ -3,12 +3,22 @@
>
>  #include "block/nvme.h"
>
> +#define NVME_DEFAULT_ZONE_SIZE   128 /* MiB */
> +#define NVME_DEFAULT_MAX_ZA_SIZE 128 /* KiB */
> +
>  typedef struct NvmeParams {
>      char     *serial;
>      uint32_t num_queues; /* deprecated since 5.1 */
>      uint32_t max_ioqpairs;
>      uint16_t msix_qsize;
>      uint32_t cmb_size_mb;
> +
> +    bool        zoned;
> +    bool        cross_zone_read;
> +    uint8_t     fill_pattern;
> +    uint32_t    zamds_bs;
> +    uint64_t    zone_size;
> +    uint64_t    zone_capacity;
>  } NvmeParams;
>
>  typedef struct NvmeAsyncEvent {
> @@ -17,6 +27,8 @@ typedef struct NvmeAsyncEvent {
>
>  enum NvmeRequestFlags {
>      NVME_REQ_FLG_HAS_SG   = 1 << 0,
> +    NVME_REQ_FLG_FILL     = 1 << 1,
> +    NVME_REQ_FLG_APPEND   = 1 << 2,
>  };
>
>  typedef struct NvmeRequest {
> @@ -24,6 +36,7 @@ typedef struct NvmeRequest {
>      BlockAIOCB              *aiocb;
>      uint16_t                status;
>      uint16_t                flags;
> +    uint64_t                fill_ofs;
>      NvmeCqe                 cqe;
>      BlockAcctCookie         acct;
>      QEMUSGList              qsg;
> @@ -61,11 +74,35 @@ typedef struct NvmeCQueue {
>      QTAILQ_HEAD(, NvmeRequest) req_list;
>  } NvmeCQueue;
>
> +typedef struct NvmeZone {
> +    NvmeZoneDescr   d;
> +    uint64_t        tstamp;
> +    uint32_t        next;
> +    uint32_t        prev;
> +    uint8_t         rsvd80[8];
> +} NvmeZone;
> +
> +#define NVME_ZONE_LIST_NIL    UINT_MAX
> +
> +typedef struct NvmeZoneList {
> +    uint32_t        head;
> +    uint32_t        tail;
> +    uint32_t        size;
> +    uint8_t         rsvd12[4];
> +} NvmeZoneList;
> +
>  typedef struct NvmeNamespace {
>      NvmeIdNs        id_ns;
>      uint32_t        nsid;
>      uint8_t         csi;
>      QemuUUID        uuid;
> +
> +    NvmeIdNsZoned   *id_ns_zoned;
> +    NvmeZone        *zone_array;
> +    NvmeZoneList    *exp_open_zones;
> +    NvmeZoneList    *imp_open_zones;
> +    NvmeZoneList    *closed_zones;
> +    NvmeZoneList    *full_zones;
>  } NvmeNamespace;
>
>  static inline NvmeLBAF *nvme_ns_lbaf(NvmeNamespace *ns)
> @@ -100,6 +137,7 @@ typedef struct NvmeCtrl {
>      uint32_t    num_namespaces;
>      uint32_t    max_q_ents;
>      uint64_t    ns_size;
> +
>      uint8_t     *cmbuf;
>      uint32_t    irq_status;
>      uint64_t    host_timestamp;                 /* Timestamp sent by the 
> host */
> @@ -107,6 +145,12 @@ typedef struct NvmeCtrl {
>
>      HostMemoryBackend *pmrdev;
>
> +    int             zone_file_fd;
> +    uint32_t        num_zones;
> +    uint64_t        zone_size_bs;
> +    uint64_t        zone_array_size;
> +    uint8_t         zamds;
> +
>      NvmeNamespace   *namespaces;
>      NvmeSQueue      **sq;
>      NvmeCQueue      **cq;
> @@ -121,6 +165,86 @@ static inline uint64_t nvme_ns_nlbas(NvmeCtrl *n, 
> NvmeNamespace *ns)
>      return n->ns_size >> nvme_ns_lbads(ns);
>  }
>
> +static inline uint8_t nvme_get_zone_state(NvmeZone *zone)
> +{
> +    return zone->d.zs >> 4;
> +}
> +
> +static inline void nvme_set_zone_state(NvmeZone *zone, enum NvmeZoneState 
> state)
> +{
> +    zone->d.zs = state << 4;
> +}
> +
> +static inline uint64_t nvme_zone_rd_boundary(NvmeCtrl *n, NvmeZone *zone)
> +{
> +    return zone->d.zslba + n->params.zone_size;
> +}
> +
> +static inline uint64_t nvme_zone_wr_boundary(NvmeZone *zone)
> +{
> +    return zone->d.zslba + zone->d.zcap;
> +}
> +
> +static inline bool nvme_wp_is_valid(NvmeZone *zone)
> +{
> +    uint8_t st = nvme_get_zone_state(zone);
> +
> +    return st != NVME_ZONE_STATE_FULL &&
> +           st != NVME_ZONE_STATE_READ_ONLY &&
> +           st != NVME_ZONE_STATE_OFFLINE;
> +}
> +
> +/*
> + * Initialize a zone list head.
> + */
> +static inline void nvme_init_zone_list(NvmeZoneList *zl)
> +{
> +    zl->head = NVME_ZONE_LIST_NIL;
> +    zl->tail = NVME_ZONE_LIST_NIL;
> +    zl->size = 0;
> +}
> +
> +/*
> + * Initialize the number of entries contained in a zone list.
> + */

This should be retrieve (or something similar) instead of initialise.

> +static inline uint32_t nvme_zone_list_size(NvmeZoneList *zl)
> +{
> +    return zl->size;
> +}
> +
> +/*
> + * Check if the zone is not currently included into any zone list.
> + */
> +static inline bool nvme_zone_not_in_list(NvmeZone *zone)
> +{
> +    return (bool)(zone->prev == 0 && zone->next == 0);

You don't need the cast to bool.

Besides that it looks good. I didn't check every value against the spec though.

Acked-by: Alistair Francis <alistair.francis@wdc.com>

Alistair

> +}
> +
> +/*
> + * Return the zone at the head of zone list or NULL if the list is empty.
> + */
> +static inline NvmeZone *nvme_peek_zone_head(NvmeNamespace *ns, NvmeZoneList 
> *zl)
> +{
> +    if (zl->head == NVME_ZONE_LIST_NIL) {
> +        return NULL;
> +    }
> +    return &ns->zone_array[zl->head];
> +}
> +
> +/*
> + * Return the next zone in the list.
> + */
> +static inline NvmeZone *nvme_next_zone_in_list(NvmeNamespace *ns, NvmeZone 
> *z,
> +    NvmeZoneList *zl)
> +{
> +    assert(!nvme_zone_not_in_list(z));
> +
> +    if (z->next == NVME_ZONE_LIST_NIL) {
> +        return NULL;
> +    }
> +    return &ns->zone_array[z->next];
> +}
> +
>  static inline int nvme_ilog2(uint64_t i)
>  {
>      int log = -1;
> @@ -132,4 +256,10 @@ static inline int nvme_ilog2(uint64_t i)
>      return log;
>  }
>
> +static inline void _hw_nvme_check_size(void)
> +{
> +    QEMU_BUILD_BUG_ON(sizeof(NvmeZoneList) != 16);
> +    QEMU_BUILD_BUG_ON(sizeof(NvmeZone) != 88);
> +}
> +
>  #endif /* HW_NVME_H */
> diff --git a/include/block/nvme.h b/include/block/nvme.h
> index 5a1e5e137c..596c39162b 100644
> --- a/include/block/nvme.h
> +++ b/include/block/nvme.h
> @@ -446,6 +446,9 @@ enum NvmeIoCommands {
>      NVME_CMD_COMPARE            = 0x05,
>      NVME_CMD_WRITE_ZEROS        = 0x08,
>      NVME_CMD_DSM                = 0x09,
> +    NVME_CMD_ZONE_MGMT_SEND     = 0x79,
> +    NVME_CMD_ZONE_MGMT_RECV     = 0x7a,
> +    NVME_CMD_ZONE_APND          = 0x7d,
>  };
>
>  typedef struct NvmeDeleteQ {
> @@ -539,6 +542,7 @@ enum NvmeNidLength {
>
>  enum NvmeCsi {
>      NVME_CSI_NVM                = 0x00,
> +    NVME_CSI_ZONED              = 0x02,
>  };
>
>  #define NVME_SET_CSI(vec, csi) (vec |= (uint8_t)(1 << (csi)))
> @@ -661,6 +665,7 @@ enum NvmeStatusCodes {
>      NVME_INVALID_NSID           = 0x000b,
>      NVME_CMD_SEQ_ERROR          = 0x000c,
>      NVME_CMD_SET_CMB_REJECTED   = 0x002b,
> +    NVME_INVALID_CMD_SET        = 0x002c,
>      NVME_LBA_RANGE              = 0x0080,
>      NVME_CAP_EXCEEDED           = 0x0081,
>      NVME_NS_NOT_READY           = 0x0082,
> @@ -684,6 +689,14 @@ enum NvmeStatusCodes {
>      NVME_CONFLICTING_ATTRS      = 0x0180,
>      NVME_INVALID_PROT_INFO      = 0x0181,
>      NVME_WRITE_TO_RO            = 0x0182,
> +    NVME_ZONE_BOUNDARY_ERROR    = 0x01b8,
> +    NVME_ZONE_FULL              = 0x01b9,
> +    NVME_ZONE_READ_ONLY         = 0x01ba,
> +    NVME_ZONE_OFFLINE           = 0x01bb,
> +    NVME_ZONE_INVALID_WRITE     = 0x01bc,
> +    NVME_ZONE_TOO_MANY_ACTIVE   = 0x01bd,
> +    NVME_ZONE_TOO_MANY_OPEN     = 0x01be,
> +    NVME_ZONE_INVAL_TRANSITION  = 0x01bf,
>      NVME_WRITE_FAULT            = 0x0280,
>      NVME_UNRECOVERED_READ       = 0x0281,
>      NVME_E2E_GUARD_ERROR        = 0x0282,
> @@ -807,7 +820,17 @@ typedef struct NvmeIdCtrl {
>      uint8_t     ieee[3];
>      uint8_t     cmic;
>      uint8_t     mdts;
> -    uint8_t     rsvd255[178];
> +    uint16_t    cntlid;
> +    uint32_t    ver;
> +    uint32_t    rtd3r;
> +    uint32_t    rtd3e;
> +    uint32_t    oaes;
> +    uint32_t    ctratt;
> +    uint8_t     rsvd100[28];
> +    uint16_t    crdt1;
> +    uint16_t    crdt2;
> +    uint16_t    crdt3;
> +    uint8_t     rsvd134[122];
>      uint16_t    oacs;
>      uint8_t     acl;
>      uint8_t     aerl;
> @@ -832,6 +855,11 @@ typedef struct NvmeIdCtrl {
>      uint8_t     vs[1024];
>  } NvmeIdCtrl;
>
> +typedef struct NvmeIdCtrlZoned {
> +    uint8_t     zamds;
> +    uint8_t     rsvd1[4095];
> +} NvmeIdCtrlZoned;
> +
>  enum NvmeIdCtrlOacs {
>      NVME_OACS_SECURITY  = 1 << 0,
>      NVME_OACS_FORMAT    = 1 << 1,
> @@ -908,6 +936,12 @@ typedef struct NvmeLBAF {
>      uint8_t     rp;
>  } NvmeLBAF;
>
> +typedef struct NvmeLBAFE {
> +    uint64_t    zsze;
> +    uint8_t     zdes;
> +    uint8_t     rsvd9[7];
> +} NvmeLBAFE;
> +
>  typedef struct NvmeIdNs {
>      uint64_t    nsze;
>      uint64_t    ncap;
> @@ -930,6 +964,19 @@ typedef struct NvmeIdNs {
>      uint8_t     vs[3712];
>  } NvmeIdNs;
>
> +typedef struct NvmeIdNsZoned {
> +    uint16_t    zoc;
> +    uint16_t    ozcs;
> +    uint32_t    mar;
> +    uint32_t    mor;
> +    uint32_t    rrl;
> +    uint32_t    frl;
> +    uint8_t     rsvd20[2796];
> +    NvmeLBAFE   lbafe[16];
> +    uint8_t     rsvd3072[768];
> +    uint8_t     vs[256];
> +} NvmeIdNsZoned;
> +
>
>  /*Deallocate Logical Block Features*/
>  #define NVME_ID_NS_DLFEAT_GUARD_CRC(dlfeat)       ((dlfeat) & 0x10)
> @@ -962,6 +1009,71 @@ enum NvmeIdNsDps {
>      DPS_FIRST_EIGHT = 8,
>  };
>
> +enum NvmeZoneAttr {
> +    NVME_ZA_FINISHED_BY_CTLR         = 1 << 0,
> +    NVME_ZA_FINISH_RECOMMENDED       = 1 << 1,
> +    NVME_ZA_RESET_RECOMMENDED        = 1 << 2,
> +    NVME_ZA_ZD_EXT_VALID             = 1 << 7,
> +};
> +
> +typedef struct NvmeZoneReportHeader {
> +    uint64_t    nr_zones;
> +    uint8_t     rsvd[56];
> +} NvmeZoneReportHeader;
> +
> +enum NvmeZoneReceiveAction {
> +    NVME_ZONE_REPORT                 = 0,
> +    NVME_ZONE_REPORT_EXTENDED        = 1,
> +};
> +
> +enum NvmeZoneReportType {
> +    NVME_ZONE_REPORT_ALL             = 0,
> +    NVME_ZONE_REPORT_EMPTY           = 1,
> +    NVME_ZONE_REPORT_IMPLICITLY_OPEN = 2,
> +    NVME_ZONE_REPORT_EXPLICITLY_OPEN = 3,
> +    NVME_ZONE_REPORT_CLOSED          = 4,
> +    NVME_ZONE_REPORT_FULL            = 5,
> +    NVME_ZONE_REPORT_READ_ONLY       = 6,
> +    NVME_ZONE_REPORT_OFFLINE         = 7,
> +};
> +
> +typedef struct NvmeZoneDescr {
> +    uint8_t     zt;
> +    uint8_t     zs;
> +    uint8_t     za;
> +    uint8_t     rsvd3[5];
> +    uint64_t    zcap;
> +    uint64_t    zslba;
> +    uint64_t    wp;
> +    uint8_t     rsvd32[32];
> +} NvmeZoneDescr;
> +
> +enum NvmeZoneState {
> +    NVME_ZONE_STATE_RESERVED         = 0x00,
> +    NVME_ZONE_STATE_EMPTY            = 0x01,
> +    NVME_ZONE_STATE_IMPLICITLY_OPEN  = 0x02,
> +    NVME_ZONE_STATE_EXPLICITLY_OPEN  = 0x03,
> +    NVME_ZONE_STATE_CLOSED           = 0x04,
> +    NVME_ZONE_STATE_READ_ONLY        = 0x0D,
> +    NVME_ZONE_STATE_FULL             = 0x0E,
> +    NVME_ZONE_STATE_OFFLINE          = 0x0F,
> +};
> +
> +enum NvmeZoneType {
> +    NVME_ZONE_TYPE_RESERVED          = 0x00,
> +    NVME_ZONE_TYPE_SEQ_WRITE         = 0x02,
> +};
> +
> +enum NvmeZoneSendAction {
> +    NVME_ZONE_ACTION_RSD             = 0x00,
> +    NVME_ZONE_ACTION_CLOSE           = 0x01,
> +    NVME_ZONE_ACTION_FINISH          = 0x02,
> +    NVME_ZONE_ACTION_OPEN            = 0x03,
> +    NVME_ZONE_ACTION_RESET           = 0x04,
> +    NVME_ZONE_ACTION_OFFLINE         = 0x05,
> +    NVME_ZONE_ACTION_SET_ZD_EXT      = 0x10,
> +};
> +
>  static inline void _nvme_check_size(void)
>  {
>      QEMU_BUILD_BUG_ON(sizeof(NvmeCqe) != 16);
> @@ -978,8 +1090,13 @@ static inline void _nvme_check_size(void)
>      QEMU_BUILD_BUG_ON(sizeof(NvmeFwSlotInfoLog) != 512);
>      QEMU_BUILD_BUG_ON(sizeof(NvmeSmartLog) != 512);
>      QEMU_BUILD_BUG_ON(sizeof(NvmeIdCtrl) != 4096);
> +    QEMU_BUILD_BUG_ON(sizeof(NvmeIdCtrlZoned) != 4096);
>      QEMU_BUILD_BUG_ON(sizeof(NvmeNsIdDesc) != 4);
> +    QEMU_BUILD_BUG_ON(sizeof(NvmeLBAF) != 4);
> +    QEMU_BUILD_BUG_ON(sizeof(NvmeLBAFE) != 16);
>      QEMU_BUILD_BUG_ON(sizeof(NvmeIdNs) != 4096);
> +    QEMU_BUILD_BUG_ON(sizeof(NvmeIdNsZoned) != 4096);
>      QEMU_BUILD_BUG_ON(sizeof(NvmeEffectsLog) != 4096);
> +    QEMU_BUILD_BUG_ON(sizeof(NvmeZoneDescr) != 64);
>  }
>  #endif
> --
> 2.21.0
>
>



reply via email to

[Prev in Thread] Current Thread [Next in Thread]