[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [RFC,v1] Namespace Management Support
From: |
Matt Fitzpatrick |
Subject: |
[Qemu-devel] [RFC,v1] Namespace Management Support |
Date: |
Tue, 2 Jul 2019 10:39:36 -0700 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Thunderbird/60.6.1 |
Adding namespace management support to the nvme device. Namespace
creation requires contiguous block space for a simple method of allocation.
I wrote this a few years ago based on Keith's fork and nvmeqemu fork and
have recently re-synced with the latest trunk. Some data structures in
nvme.h are a bit more filled out that strictly necessary as this is also
the base for sr-iov and IOD patched to be submitted later.
Signed-off-by: fitzpat <address@hidden>
---
hw/block/nvme.c | 506 +++++++++++++++++++++++++++++++++++++------
hw/block/nvme.h | 57 ++++-
include/block/nvme.h | 128 ++++++++++-
3 files changed, 610 insertions(+), 81 deletions(-)
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 107a719b95..11d7da26f3 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -42,6 +44,9 @@
#include "trace.h"
#include "nvme.h"
+#define NVME_CTRL_LIST_MAX_ENTRIES 2047
+#define NVME_MAX_NUM_NAMESPACES 256
+
#define NVME_GUEST_ERR(trace, fmt, ...) \
do { \
(trace_##trace)(__VA_ARGS__); \
@@ -50,6 +55,8 @@
} while (0)
static void nvme_process_sq(void *opaque);
+static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
+ unsigned size);
static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
{
@@ -377,7 +384,7 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace
*ns, NvmeCmd *cmd,
uint8_t lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds;
uint64_t data_size = (uint64_t)nlb << data_shift;
- uint64_t data_offset = slba << data_shift;
+ uint64_t data_offset = (slba << data_shift) + ns->start_byte_index;
int is_write = rw->opcode == NVME_CMD_WRITE ? 1 : 0;
enum BlockAcctType acct = is_write ? BLOCK_ACCT_WRITE :
BLOCK_ACCT_READ;
@@ -425,6 +432,11 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd
*cmd, NvmeRequest *req)
}
ns = &n->namespaces[nsid - 1];
+
+ if (unlikely(!ns->ctrl)) {
+ return NVME_INVALID_NSID | NVME_DNR;
+ }
+
switch (cmd->opcode) {
case NVME_CMD_FLUSH:
return nvme_flush(n, ns, cmd, req);
@@ -676,6 +688,49 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n,
NvmeIdentify *c)
prp1, prp2);
}
+/**
+ * Identify Allocated Namespace List
+ * @param n
+ * @param c
+ * @return
+ */
+static uint16_t nvme_identify_ns_allocated(NvmeCtrl *n, NvmeIdentify *c)
+{
+ static const int data_len = 4 * KiB;
+ uint32_t min_nsid = le32_to_cpu(c->nsid);
+ uint64_t prp1 = le64_to_cpu(c->prp1);
+ uint64_t prp2 = le64_to_cpu(c->prp2);
+ uint32_t *list;
+ uint16_t ret;
+ int i, j = 0;
+
+ trace_nvme_identify_nslist(min_nsid);
+
+ list = g_malloc0(data_len);
+ for (i = 0; i < NVME_MAX_NUM_NAMESPACES; i++) {
+ if (i < min_nsid) {
+ continue;
+ }
+ if (n->namespaces[i].created) {
+ list[j++] = cpu_to_le32(i + 1);
+ if (j == data_len / sizeof(uint32_t)) {
+ break;
+ }
+ }
+ }
+ ret = nvme_dma_read_prp(n, (uint8_t *)list, data_len, prp1, prp2);
+ g_free(list);
+ return ret;
+}
+
+/**
+ * Identify Active Namespace List
+ * Active is defined as created and attached.
+ *
+ * @param n
+ * @param c
+ * @return
+ */
static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c)
{
static const int data_len = 4 * KiB;
@@ -689,13 +744,15 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n,
NvmeIdentify *c)
trace_nvme_identify_nslist(min_nsid);
list = g_malloc0(data_len);
- for (i = 0; i < n->num_namespaces; i++) {
+ for (i = 0; i < NVME_MAX_NUM_NAMESPACES; i++) {
if (i < min_nsid) {
continue;
}
- list[j++] = cpu_to_le32(i + 1);
- if (j == data_len / sizeof(uint32_t)) {
- break;
+ if (n->namespaces[i].created && n->namespaces[i].ctrl) {
+ list[j++] = cpu_to_le32(i + 1);
+ if (j == data_len / sizeof(uint32_t)) {
+ break;
+ }
}
}
ret = nvme_dma_read_prp(n, (uint8_t *)list, data_len, prp1, prp2);
@@ -708,18 +765,271 @@ static uint16_t nvme_identify(NvmeCtrl *n,
NvmeCmd *cmd)
NvmeIdentify *c = (NvmeIdentify *)cmd;
switch (le32_to_cpu(c->cns)) {
- case 0x00:
+ case NVME_ADM_CNS_ID_NS:
return nvme_identify_ns(n, c);
- case 0x01:
+ case NVME_ADM_CNS_ID_CTRL:
return nvme_identify_ctrl(n, c);
- case 0x02:
+ case NVME_ADM_CNS_ID_NS_LIST:
return nvme_identify_nslist(n, c);
+ case NVME_ADM_CNS_ID_NS_LIST_ALLOC:
+ return nvme_identify_ns_allocated(n, c);
+ case NVME_ADM_CNS_ID_NS_ALLOC:
+ return nvme_identify_ns(n, c);
default:
trace_nvme_err_invalid_identify_cns(le32_to_cpu(c->cns));
return NVME_INVALID_FIELD | NVME_DNR;
}
}
+static uint16_t nvme_namespace_controller_attach(NvmeCtrl *n, NvmeCmd *cmd)
+{
+ int i;
+ uint64_t prp1 = le64_to_cpu(cmd->prp1);
+ uint64_t prp2 = le64_to_cpu(cmd->prp2);
+ NvmeNamespace *ns = &n->namespaces[cmd->nsid - 1];
+
+ uint16_t ctrl_list[2048];
+ uint16_t ctrl_list_size;
+
+ if (nvme_dma_write_prp(n, (uint8_t *)ctrl_list, sizeof(ctrl_list),
prp1, prp2)) {
+ return NVME_INVALID_FIELD;
+ }
+
+ ctrl_list_size = ctrl_list[0];
+
+ if (!ctrl_list_size || ctrl_list_size > NVME_CTRL_LIST_MAX_ENTRIES) {
+ return NVME_CTRL_LIST_INVALID;
+ }
+
+ if (ns->ctrl == n) {
+ return NVME_NS_ALREADY_ATTACHED;
+ }
+ if (!ns->created) {
+ return NVME_INVALID_NSID;
+ }
+
+ /* TODO: Update NvmeNamespace to link multiple controllers */
+ for ( i = 1; i <= ctrl_list_size; i++) {
+ if (n->id_ctrl.cntlid == ctrl_list[i]) {
+ ns->ctrl = n;
+ return NVME_SUCCESS;
+ }
+ }
+ return NVME_CTRL_LIST_INVALID;
+}
+
+static uint16_t nvme_namespace_controller_detach(NvmeCtrl *n, NvmeCmd *cmd)
+{
+ int i;
+ uint64_t prp1 = le64_to_cpu(cmd->prp1);
+ uint64_t prp2 = le64_to_cpu(cmd->prp2);
+ NvmeNamespace *ns = &n->namespaces[cmd->nsid - 1];
+
+ uint16_t ctrl_list[2048];
+ uint16_t ctrl_list_size;
+
+ if (nvme_dma_write_prp(n, (uint8_t *)ctrl_list, sizeof(ctrl_list),
prp1, prp2)) {
+ return NVME_INVALID_FIELD;
+ }
+
+ ctrl_list_size = ctrl_list[0];
+
+ if (!ctrl_list_size || ctrl_list_size > NVME_CTRL_LIST_MAX_ENTRIES) {
+ return NVME_CTRL_LIST_INVALID;
+ }
+ /* TODO: semaphore to lock NS on detach for scenario with detach
during IO */
+ if (!ns->ctrl || (ns->ctrl != n) ) {
+ return NVME_NS_NOT_ATTACHED;
+ }
+ if (!ns->created) {
+ return NVME_INVALID_NSID;
+ }
+
+ /* TODO: Update NvmeNamespace to link multiple controllers */
+ for ( i = 1; i <= ctrl_list_size; i++) {
+ if (n->id_ctrl.cntlid == ctrl_list[i]) {
+ ns->ctrl = NULL;
+ return NVME_SUCCESS;
+ }
+ }
+ return NVME_CTRL_LIST_INVALID;
+}
+
+static uint16_t nvme_namespace_attachment(NvmeCtrl *n, NvmeCmd *cmd)
+{
+ uint32_t dw10 = le32_to_cpu(cmd->cdw10);
+
+ if ( (!cmd->nsid || cmd->nsid > NVME_MAX_NUM_NAMESPACES)
+ && (cmd->nsid != 0xFFFFFFFF)) {
+ return NVME_INVALID_FIELD;
+ }
+
+ switch (dw10) {
+ case NVME_NS_CONTROLLER_ATTACH:
+ return nvme_namespace_controller_attach(n, cmd);
+ case NVME_NS_CONTROLLER_DETACH:
+ return nvme_namespace_controller_detach(n, cmd);
+ default:
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+}
+
+static int nvme_set_start_index(NvmeCtrl *n, uint64_t *ns_start_index,
uint64_t requested_ns_size)
+{
+ int i;
+ int lba_index;
+ uint64_t start_index = 0;
+ uint64_t end_index, ns_bytes;
+ bool adjusted;
+
+ if (requested_ns_size > n->nvm_capacity) {
+ return -1;
+ }
+ do {
+ adjusted = false;
+ end_index = start_index + requested_ns_size;
+ if (end_index > n->nvm_capacity) {
+ return -1;
+ }
+
+ for (i = 0; i < NVME_MAX_NUM_NAMESPACES; i++) {
+ NvmeNamespace *ns = &n->namespaces[i];
+ NvmeIdNs *id_ns = &ns->id_ns;
+ if (ns->created) {
+
+ lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
+ ns_bytes = id_ns->nsze * ((1 <<
id_ns->lbaf[lba_index].ds));
+
+ if ((start_index >= ns->start_byte_index &&
+ start_index < (ns->start_byte_index + ns_bytes)) ||
+ (end_index >= ns->start_byte_index &&
+ end_index < (ns->start_byte_index + ns_bytes))) {
+ start_index = ns->start_byte_index + ns_bytes;
+ adjusted = true;
+ }
+ }
+ }
+ } while (adjusted);
+
+ *ns_start_index = start_index;
+ return 0;
+}
+
+/**
+ * Attempts to create a namespace in a free contiguous space within the
block layer
+ *
+ * @param n
+ * @param cmd
+ * @param req
+ * @return NVME_SUCCESS is successfuly created
+ */
+static uint16_t nvme_namespace_create(NvmeCtrl *n, NvmeCmd *cmd,
NvmeRequest *req)
+{
+ int i;
+ uint64_t prp1 = le64_to_cpu(cmd->prp1);
+ uint64_t prp2 = le64_to_cpu(cmd->prp2);
+ NvmeIdNs id_ns_host;
+
+
+ if (nvme_dma_write_prp(n, (uint8_t*)&id_ns_host,
sizeof(id_ns_host), prp1, prp2)) {
+ return NVME_INVALID_FIELD;
+ }
+
+ for (i = 0; i < NVME_MAX_NUM_NAMESPACES; i++) {
+ uint64_t ns_size;
+ int lba_index;
+ NvmeNamespace *ns = &n->namespaces[i];
+ NvmeIdNs *id_ns = &ns->id_ns;
+
+ if (id_ns_host.flbas || id_ns_host.mc || id_ns_host.dps) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ if (!ns->created) { /* take the first available NS */
+
+ id_ns->flbas = id_ns_host.flbas;
+ id_ns->mc = id_ns_host.mc;
+ id_ns->dps = id_ns_host.dps;
+
+ id_ns->nuse = id_ns_host.nsze;
+ id_ns->ncap = id_ns_host.ncap;
+ id_ns->nsze = id_ns_host.nsze;
+
+ lba_index = NVME_ID_NS_FLBAS_INDEX(id_ns->flbas);
+ id_ns->lbaf[lba_index].ds = BDRV_SECTOR_BITS;
+ ns_size = id_ns->nsze * (1 << id_ns->lbaf[lba_index].ds);
+ id_ns->nvmcap = ns_size;
+
+ ns->id = i + 1;
+ id_ns->nguid = ns->id;
+
+ if (nvme_set_start_index(n, &ns->start_byte_index, ns_size)) {
+ return NVME_NS_INSUFF_CAP;
+ }
+ ns->created = true;
+ n->id_ctrl.unvmcap -= id_ns->nvmcap;
+
+ ns->ctrl = NULL; /* not attached */
+
+ n->num_namespaces++;
+ n->id_ctrl.nn++;
+
+ req->cqe.result = ns->id;
+ return NVME_SUCCESS;
+ }
+ }
+
+ return NVME_NS_INSUFF_CAP;
+}
+
+static uint16_t nvme_namespace_delete(NvmeCtrl *n, NvmeCmd *cmd,
NvmeRequest *req)
+{
+ NvmeNamespace *ns = &n->namespaces[cmd->nsid - 1];
+ if (ns->created) {
+ ns->created = false;
+ ns->ctrl = NULL;
+ n->num_namespaces--;
+ n->id_ctrl.nn--;
+ n->id_ctrl.unvmcap += ns->id_ns.nvmcap;
+ return NVME_SUCCESS;
+ }
+ return NVME_INVALID_NSID;
+}
+
+static uint16_t nvme_namespace_management(NvmeCtrl *n, NvmeCmd *cmd,
NvmeRequest *req)
+{
+ uint32_t dw10 = le32_to_cpu(cmd->cdw10);
+
+ if ( (cmd->nsid > NVME_MAX_NUM_NAMESPACES)
+ && (cmd->nsid != 0xFFFFFFFF)) {
+ return NVME_INVALID_FIELD;
+ }
+
+ switch (dw10) {
+ case NVME_NS_CREATE:
+ return nvme_namespace_create(n, cmd, req);
+ case NVME_NS_DELETE:
+ if ( cmd->nsid == 0xFFFFFFFF ) {
+ uint32_t i;
+ uint16_t ret = NVME_SUCCESS;
+
+ for (i = 1; i < NVME_MAX_NUM_NAMESPACES; i++) {
+ cmd->nsid = i;
+ if ( &n->namespaces[cmd->nsid - 1].created) {
+ ret = nvme_namespace_delete(n, cmd, req);
+ }
+ if (ret != NVME_SUCCESS) {
+ return ret;
+ }
+ }
+ return ret;
+ }
+ return nvme_namespace_delete(n, cmd, req);
+ default:
+ return NVME_INVALID_FIELD;
+ }
+}
+
static inline void nvme_set_timestamp(NvmeCtrl *n, uint64_t ts)
{
trace_nvme_setfeat_timestamp(ts);
@@ -860,6 +1170,10 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n,
NvmeCmd *cmd, NvmeRequest *req)
return nvme_set_feature(n, cmd, req);
case NVME_ADM_CMD_GET_FEATURES:
return nvme_get_feature(n, cmd, req);
+ case NVME_ADM_CMD_NS_MANAGEMENT:
+ return nvme_namespace_management(n, cmd, req);
+ case NVME_ADM_CMD_NS_ATTACH:
+ return nvme_namespace_attachment(n, cmd);
default:
trace_nvme_err_invalid_admin_opc(cmd->opcode);
return NVME_INVALID_OPCODE | NVME_DNR;
@@ -915,6 +1229,7 @@ static void nvme_clear_ctrl(NvmeCtrl *n)
}
blk_flush(n->conf.blk);
+
n->bar.cc = 0;
}
@@ -1302,61 +1617,10 @@ static const MemoryRegionOps nvme_cmb_ops = {
},
};
-static void nvme_realize(PCIDevice *pci_dev, Error **errp)
+static void nvme_init_ctrl(NvmeCtrl *n)
{
- NvmeCtrl *n = NVME(pci_dev);
NvmeIdCtrl *id = &n->id_ctrl;
-
- int i;
- int64_t bs_size;
- uint8_t *pci_conf;
-
- if (!n->num_queues) {
- error_setg(errp, "num_queues can't be zero");
- return;
- }
-
- if (!n->conf.blk) {
- error_setg(errp, "drive property not set");
- return;
- }
-
- bs_size = blk_getlength(n->conf.blk);
- if (bs_size < 0) {
- error_setg(errp, "could not get backing file size");
- return;
- }
-
- if (!n->serial) {
- error_setg(errp, "serial property not set");
- return;
- }
- blkconf_blocksizes(&n->conf);
- if (!blkconf_apply_backend_options(&n->conf,
blk_is_read_only(n->conf.blk),
- false, errp)) {
- return;
- }
-
- pci_conf = pci_dev->config;
- pci_conf[PCI_INTERRUPT_PIN] = 1;
- pci_config_set_prog_interface(pci_dev->config, 0x2);
- pci_config_set_class(pci_dev->config, PCI_CLASS_STORAGE_EXPRESS);
- pcie_endpoint_cap_init(pci_dev, 0x80);
-
- n->num_namespaces = 1;
- n->reg_size = pow2ceil(0x1004 + 2 * (n->num_queues + 1) * 4);
- n->ns_size = bs_size / (uint64_t)n->num_namespaces;
-
- n->namespaces = g_new0(NvmeNamespace, n->num_namespaces);
- n->sq = g_new0(NvmeSQueue *, n->num_queues);
- n->cq = g_new0(NvmeCQueue *, n->num_queues);
-
- memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n,
- "nvme", n->reg_size);
- pci_register_bar(pci_dev, 0,
- PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64,
- &n->iomem);
- msix_init_exclusive_bar(pci_dev, n->num_queues, 4, NULL);
+ uint8_t *pci_conf = n->parent_obj.config;
id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID));
id->ssvid = cpu_to_le16(pci_get_word(pci_conf +
PCI_SUBSYSTEM_VENDOR_ID));
@@ -1367,16 +1631,25 @@ static void nvme_realize(PCIDevice *pci_dev,
Error **errp)
id->ieee[0] = 0x00;
id->ieee[1] = 0x02;
id->ieee[2] = 0xb3;
- id->oacs = cpu_to_le16(0);
+ id->oacs = cpu_to_le16(0x8); // Namespace Management Supported
+
id->frmw = 7 << 1;
id->lpa = 1 << 0;
id->sqes = (0x6 << 4) | 0x6;
id->cqes = (0x4 << 4) | 0x4;
- id->nn = cpu_to_le32(n->num_namespaces);
+ id->mnan = 0;
+ id->nn = NVME_MAX_NUM_NAMESPACES;
id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROS | NVME_ONCS_TIMESTAMP);
id->psd[0].mp = cpu_to_le16(0x9c4);
id->psd[0].enlat = cpu_to_le32(0x10);
id->psd[0].exlat = cpu_to_le32(0x4);
+ id->tnvmcap = n->nvm_capacity;
+ id->unvmcap = 0;
+ id->hmpre = n->hmpre;
+ id->hmmin = n->hmmin;
+
+ snprintf ((char*)id->subnqn, sizeof(id->subnqn), "QEMU NVMe
Subsystem 1.2 Compatible");
+
if (blk_enable_write_cache(n->conf.blk)) {
id->vwc = 1;
}
@@ -1387,10 +1660,34 @@ static void nvme_realize(PCIDevice *pci_dev,
Error **errp)
NVME_CAP_SET_AMS(n->bar.cap, 1);
NVME_CAP_SET_TO(n->bar.cap, 0xf);
NVME_CAP_SET_CSS(n->bar.cap, 1);
+ NVME_CAP_SET_MPSMIN(n->bar.cap, 0);
NVME_CAP_SET_MPSMAX(n->bar.cap, 4);
n->bar.vs = 0x00010200;
n->bar.intmc = n->bar.intms = 0;
+}
+
+static void nvme_init_pci(NvmeCtrl *n) {
+ uint8_t *pci_conf = n->parent_obj.config;
+
+ pci_conf[PCI_INTERRUPT_PIN] = 1;
+ pci_config_set_prog_interface(pci_conf, 0x2);
+ pci_config_set_class(pci_conf, PCI_CLASS_STORAGE_EXPRESS);
+
+
+ pci_config_set_device_id(pci_conf, 0x5845);
+ pcie_endpoint_cap_init(&n->parent_obj, 0x80);
+
+ memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme",
+ n->reg_size);
+
+
+ pci_register_bar(&n->parent_obj, 0,
+ PCI_BASE_ADDRESS_SPACE_MEMORY |
PCI_BASE_ADDRESS_MEM_TYPE_64,
+ &n->iomem);
+
+
+ msix_init_exclusive_bar(&n->parent_obj, n->num_queues, 4, NULL);
if (n->cmb_size_mb) {
@@ -1406,20 +1703,31 @@ static void nvme_realize(PCIDevice *pci_dev,
Error **errp)
NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->cmb_size_mb);
n->cmbloc = n->bar.cmbloc;
- n->cmbsz = n->bar.cmbsz;
+ n->cmbsz = n->bar.cmbsz;
n->cmbuf = g_malloc0(NVME_CMBSZ_GETSIZE(n->bar.cmbsz));
memory_region_init_io(&n->ctrl_mem, OBJECT(n), &nvme_cmb_ops, n,
"nvme-cmb",
NVME_CMBSZ_GETSIZE(n->bar.cmbsz));
- pci_register_bar(pci_dev, NVME_CMBLOC_BIR(n->bar.cmbloc),
- PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 |
- PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem);
-
+ pci_register_bar(&n->parent_obj, NVME_CMBLOC_BIR(n->bar.cmbloc),
+ PCI_BASE_ADDRESS_SPACE_MEMORY |
PCI_BASE_ADDRESS_MEM_TYPE_64 |
+ PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem);
}
+}
+
+/**
+ * Divides up the total block space between all requested namespaces.
+ * @param n
+ */
+static void nvme_init_namespaces(NvmeCtrl *n)
+{
+ uint8_t i;
for (i = 0; i < n->num_namespaces; i++) {
+ uint64_t blks;
+ int lba_index;
NvmeNamespace *ns = &n->namespaces[i];
NvmeIdNs *id_ns = &ns->id_ns;
+
id_ns->nsfeat = 0;
id_ns->nlbaf = 0;
id_ns->flbas = 0;
@@ -1427,12 +1735,65 @@ static void nvme_realize(PCIDevice *pci_dev,
Error **errp)
id_ns->dpc = 0;
id_ns->dps = 0;
id_ns->lbaf[0].ds = BDRV_SECTOR_BITS;
- id_ns->ncap = id_ns->nuse = id_ns->nsze =
- cpu_to_le64(n->ns_size >>
- id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas)].ds);
+ id_ns->nsze = n->nvm_capacity / (uint64_t)n->num_namespaces;
+
+ lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
+ blks = id_ns->nsze / (1 << id_ns->lbaf[lba_index].ds);
+ id_ns->nuse = id_ns->ncap = id_ns->nsze = cpu_to_le64(blks);
+ id_ns->nvmcap = id_ns->nsze * (1 << id_ns->lbaf[lba_index].ds);
+
+ ns->id = i + 1;
+ ns->start_byte_index = (i * id_ns->nsze) >> BDRV_SECTOR_BITS;
+ ns->created = true;
+ ns->ctrl = n; /* attached */
+
}
}
+static void nvme_realize(PCIDevice *pci_dev, Error **errp)
+{
+ NvmeCtrl *n = NVME(pci_dev);
+
+ int64_t bs_size;
+ Error *local_err = NULL;
+
+ if (!n->conf.blk) {
+ error_setg(errp, "drive property not set");
+ return;
+ }
+
+ bs_size = blk_getlength(n->conf.blk);
+ if (bs_size < 0) {
+ error_setg(errp, "could not get backing file size");
+ return;
+ }
+
+ if (!n->serial) {
+ error_setg(errp, "serial property not set");
+ return;
+ }
+ blkconf_blocksizes(&n->conf);
+ blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk),
+ false, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ return;
+ }
+
+ n->reg_size = pow2ceil(0x1004 + 2 * (n->num_queues + 1) * 4);
+ n->nvm_capacity = bs_size;
+ n->sq = g_new0(NvmeSQueue *, n->num_queues);
+ n->cq = g_new0(NvmeCQueue *, n->num_queues);
+ n->namespaces = g_new0(NvmeNamespace, NVME_MAX_NUM_NAMESPACES);
+
+ nvme_init_pci(n);
+ nvme_init_ctrl(n);
+ nvme_init_namespaces(n);
+
+}
+
static void nvme_exit(PCIDevice *pci_dev)
{
NvmeCtrl *n = NVME(pci_dev);
@@ -1451,6 +1812,7 @@ static void nvme_exit(PCIDevice *pci_dev)
static Property nvme_props[] = {
DEFINE_BLOCK_PROPERTIES(NvmeCtrl, conf),
DEFINE_PROP_STRING("serial", NvmeCtrl, serial),
+ DEFINE_PROP_UINT32("namespaces", NvmeCtrl, num_namespaces, 1),
DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, cmb_size_mb, 0),
DEFINE_PROP_UINT32("num_queues", NvmeCtrl, num_queues, 64),
DEFINE_PROP_END_OF_LIST(),
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 557194ee19..c182dcb10a 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -9,6 +9,7 @@ typedef struct NvmeAsyncEvent {
typedef struct NvmeRequest {
struct NvmeSQueue *sq;
+ struct NvmeNamespace *ns;
BlockAIOCB *aiocb;
uint16_t status;
bool has_sg;
@@ -50,7 +51,16 @@ typedef struct NvmeCQueue {
} NvmeCQueue;
typedef struct NvmeNamespace {
+ struct NvmeCtrl *ctrl;
+ bool created;
NvmeIdNs id_ns;
+ NvmeRangeType lba_range[64];
+ unsigned long *util;
+ unsigned long *uncorrectable;
+ uint32_t id;
+ uint64_t start_byte_index;
+ uint64_t meta_start_offset;
+ BlockConf conf;
} NvmeNamespace;
#define TYPE_NVME "nvme"
@@ -64,23 +74,66 @@ typedef struct NvmeCtrl {
NvmeBar bar;
BlockConf conf;
- uint32_t page_size;
+ time_t start_time;
+ uint16_t temperature;
+ uint16_t page_size;
uint16_t page_bits;
uint16_t max_prp_ents;
uint16_t cqe_size;
uint16_t sqe_size;
+ uint16_t oacs;
+ uint16_t oncs;
uint32_t reg_size;
uint32_t num_namespaces;
uint32_t num_queues;
uint32_t max_q_ents;
- uint64_t ns_size;
+ uint64_t nvm_capacity;
+ uint8_t db_stride;
+ uint8_t aerl;
+ uint8_t acl;
+ uint8_t elpe;
+ uint8_t elp_index;
+ uint8_t error_count;
+ uint8_t mdts;
+ uint8_t cqr;
+ uint8_t max_sqes;
+ uint8_t max_cqes;
+ uint8_t meta;
+ uint8_t vwc;
+ uint8_t mc;
+ uint8_t dpc;
+ uint8_t dps;
+ uint8_t nlbaf;
+ uint8_t extended;
+ uint8_t lba_index;
+ uint8_t mpsmin;
+ uint8_t mpsmax;
+ uint8_t intc;
+ uint8_t intc_thresh;
+ uint8_t intc_time;
+ uint8_t outstanding_aers;
+ uint8_t temp_warn_issued;
+ uint8_t num_errors;
+ uint8_t cqes_pending;
+ uint16_t vid;
+ uint16_t did;
uint32_t cmb_size_mb;
uint32_t cmbsz;
uint32_t cmbloc;
+ uint32_t sriov_total_vfs;
uint8_t *cmbuf;
uint64_t irq_status;
uint64_t host_timestamp; /* Timestamp sent by
the host */
uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */
+ uint8_t ehm;
+ uint8_t hsize;
+ uint32_t hmdlal;
+ uint32_t hmdlua;
+ uint32_t hmdlec;
+ uint8_t *hmbuf;
+ uint32_t hmmin;
+ uint32_t hmpre;
+
char *serial;
NvmeNamespace *namespaces;
diff --git a/include/block/nvme.h b/include/block/nvme.h
index 3ec8efcc43..8c1e8c6cdc 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -17,6 +17,16 @@ typedef struct NvmeBar {
uint32_t cmbsz;
} NvmeBar;
+enum NvmeNsSelect {
+ NVME_NS_CONTROLLER_ATTACH = 0,
+ NVME_NS_CONTROLLER_DETACH = 1,
+};
+
+enum NvmeNsManagement {
+ NVME_NS_CREATE = 0,
+ NVME_NS_DELETE = 1,
+};
+
enum NvmeCapShift {
CAP_MQES_SHIFT = 0,
CAP_CQR_SHIFT = 16,
@@ -233,13 +243,31 @@ enum NvmeAdminCommands {
NVME_ADM_CMD_SET_FEATURES = 0x09,
NVME_ADM_CMD_GET_FEATURES = 0x0a,
NVME_ADM_CMD_ASYNC_EV_REQ = 0x0c,
+ NVME_ADM_CMD_NS_MANAGEMENT = 0x0d,
NVME_ADM_CMD_ACTIVATE_FW = 0x10,
NVME_ADM_CMD_DOWNLOAD_FW = 0x11,
+ NVME_ADM_CMD_NS_ATTACH = 0x15,
+ NVME_ADM_VIRT_MANAGEMENT = 0x1C,
NVME_ADM_CMD_FORMAT_NVM = 0x80,
NVME_ADM_CMD_SECURITY_SEND = 0x81,
NVME_ADM_CMD_SECURITY_RECV = 0x82,
};
+
+enum NvmeAdminCns {
+ NVME_ADM_CNS_ID_NS = 0x00,
+ NVME_ADM_CNS_ID_CTRL = 0x01,
+ NVME_ADM_CNS_ID_NS_LIST = 0x02,
+ NVME_ADM_CNS_NS_DESC_LIST = 0x03,
+ NVME_ADM_CNS_NVM_SET_LIST = 0x04,
+ NVME_ADM_CNS_ID_NS_LIST_ALLOC = 0x10,
+ NVME_ADM_CNS_ID_NS_ALLOC = 0x11,
+ NVME_ADM_CNS_CTRL_LIST_NS_ATT = 0x12,
+ NVME_ADM_CNS_CTRL_LIST = 0x13,
+ NVME_ADM_CNS_PRIM_CTRL_CAP = 0x14,
+ NVME_ADM_CNS_SEC_CTRL_LIST = 0x15,
+};
+
enum NvmeIoCommands {
NVME_CMD_FLUSH = 0x00,
NVME_CMD_WRITE = 0x01,
@@ -427,6 +455,17 @@ enum NvmeStatusCodes {
NVME_CMD_ABORT_MISSING_FUSE = 0x000a,
NVME_INVALID_NSID = 0x000b,
NVME_CMD_SEQ_ERROR = 0x000c,
+ NVME_NS_INSUFF_CAP = 0x0015,
+ NVME_NS_ID_UNAVAILABLE = 0x0016,
+ NVME_NS_ALREADY_ATTACHED = 0x0018,
+ NVME_NS_PRIVATE = 0x0019,
+ NVME_NS_NOT_ATTACHED = 0x001A,
+ NVME_THIN_PROV_NOT_SUP = 0x001B,
+ NVME_CTRL_LIST_INVALID = 0x001C,
+ NVME_INVALID_CTRL_ID = 0x001F,
+ NVME_INVALID_SEC_CTRL_ST = 0x0020,
+ NVME_INVALID_NUM_CTRL_RES = 0x0021,
+ NVME_INVALID_RES_ID = 0x0022,
NVME_LBA_RANGE = 0x0080,
NVME_CAP_EXCEEDED = 0x0081,
NVME_NS_NOT_READY = 0x0082,
@@ -543,7 +582,20 @@ typedef struct NvmeIdCtrl {
uint8_t ieee[3];
uint8_t cmic;
uint8_t mdts;
- uint8_t rsvd255[178];
+ uint16_t cntlid;
+ uint32_t ver;
+ uint8_t rsvd_95[8];
+ uint32_t oaes;
+ uint32_t ctratt;
+ uint16_t rrls;
+ uint8_t rsvd110[9];
+ uint8_t cntrltype;
+ uint64_t fguid;
+ uint64_t fguid_u;
+ uint16_t crdt1;
+ uint16_t crdt2;
+ uint16_t crdt3;
+ uint8_t rsvd255[122];
uint16_t oacs;
uint8_t acl;
uint8_t aerl;
@@ -551,10 +603,39 @@ typedef struct NvmeIdCtrl {
uint8_t lpa;
uint8_t elpe;
uint8_t npss;
- uint8_t rsvd511[248];
+ uint8_t avscc;
+ uint8_t apsta;
+ uint16_t wctemp;
+ uint16_t cctemp;
+ uint16_t mtfa;
+ uint32_t hmpre;
+ uint32_t hmmin;
+ uint64_t tnvmcap;
+ uint64_t tnvmcap_u;
+ uint64_t unvmcap;
+ uint64_t unvmcap_u;
+ uint32_t rpmbs;
+ uint16_t edstt;
+ uint8_t dsto;
+ uint8_t fwug;
+ uint16_t kas;
+ uint16_t hctma;
+ uint16_t mntmt;
+ uint16_t mxtmt;
+ uint32_t sanicap;
+ uint32_t hmminds;
+ uint16_t hmmaxd;
+ uint16_t nsetidmax;
+ uint16_t endgidmax;
+ uint8_t anatt;
+ uint8_t anacap;
+ uint32_t anagrpmax;
+ uint32_t nanagrpid;
+ uint32_t pels;
+ uint8_t rsvd511[156];
uint8_t sqes;
uint8_t cqes;
- uint16_t rsvd515;
+ uint16_t maxcmd;
uint32_t nn;
uint16_t oncs;
uint16_t fuses;
@@ -562,8 +643,15 @@ typedef struct NvmeIdCtrl {
uint8_t vwc;
uint16_t awun;
uint16_t awupf;
- uint8_t rsvd703[174];
- uint8_t rsvd2047[1344];
+ uint8_t nvscc;
+ uint8_t nwpc;
+ uint16_t acwu;
+ uint8_t rsvd535[2];
+ uint32_t sgls;
+ uint32_t mnan;
+ uint8_t rsvd767[224];
+ uint8_t subnqn[256];
+ uint8_t rsvd2047[1024];
NvmePSD psd[32];
uint8_t vs[1024];
} NvmeIdCtrl;
@@ -653,9 +741,35 @@ typedef struct NvmeIdNs {
uint8_t mc;
uint8_t dpc;
uint8_t dps;
- uint8_t res30[98];
+ uint8_t nmic;
+ uint8_t rescap;
+ uint8_t fpi;
+ uint8_t dlfeat;
+ uint16_t nawun;
+ uint16_t nawupf;
+ uint16_t nacwu;
+ uint16_t nabsn;
+ uint16_t nabo;
+ uint16_t nabspf;
+ uint16_t noiob;
+ uint64_t nvmcap;
+ uint64_t nvmcap_u;
+ uint16_t npwg;
+ uint16_t npwa;
+ uint16_t npdg;
+ uint16_t npda;
+ uint16_t nows;
+ uint8_t rsvd91[18];
+ uint32_t anagrpid;
+ uint8_t rsvd98[3];
+ uint8_t nsattr;
+ uint16_t nvmsetid;
+ uint16_t endgid;
+ uint64_t nguid;
+ uint64_t nguid_u;
+ uint64_t eui64;
NvmeLBAF lbaf[16];
- uint8_t res192[192];
+ uint8_t rsvd383[192];
uint8_t vs[3712];
} NvmeIdNs;
--
2.17.1
- [Qemu-devel] [RFC,v1] Namespace Management Support,
Matt Fitzpatrick <=