[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v2 07/15] hw/nvme: Add support for Secondary Controller List
From: |
Łukasz Gieryk |
Subject: |
[PATCH v2 07/15] hw/nvme: Add support for Secondary Controller List |
Date: |
Tue, 16 Nov 2021 16:34:38 +0100 |
From: Lukasz Maniak <lukasz.maniak@linux.intel.com>
Introduce handling for Secondary Controller List (Identify command with
CNS value of 15h).
Secondary controller ids are unique in the subsystem, hence they are
reserved by it upon initialization of the primary controller to the
number of sriov_max_vfs.
ID reservation requires the addition of an intermediate controller slot
state, so the reserved controller has the address 0xFFFF.
A secondary controller is in the reserved state when it has no virtual
function assigned, but its primary controller is realized.
Secondary controller reservations are released to NULL when its primary
controller is unregistered.
Signed-off-by: Lukasz Maniak <lukasz.maniak@linux.intel.com>
---
hw/nvme/ctrl.c | 56 +++++++++++++++++++++++++++++++++
hw/nvme/ns.c | 2 +-
hw/nvme/nvme.h | 18 +++++++++++
hw/nvme/subsys.c | 74 ++++++++++++++++++++++++++++++++++++++------
hw/nvme/trace-events | 1 +
include/block/nvme.h | 20 ++++++++++++
6 files changed, 161 insertions(+), 10 deletions(-)
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 44734a74f9..961161ba8e 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -161,6 +161,7 @@
#include "qemu/error-report.h"
#include "qemu/log.h"
#include "qemu/units.h"
+#include "qemu/range.h"
#include "qapi/error.h"
#include "qapi/visitor.h"
#include "sysemu/sysemu.h"
@@ -4545,6 +4546,14 @@ static uint16_t nvme_identify_pri_ctrl_cap(NvmeCtrl *n,
NvmeRequest *req)
return nvme_c2h(n, (uint8_t *)&n->pri_ctrl_cap, sizeof(NvmePriCtrlCap),
req);
}
+static uint16_t nvme_identify_sec_ctrl_list(NvmeCtrl *n, NvmeRequest *req)
+{
+ trace_pci_nvme_identify_sec_ctrl_list(le16_to_cpu(n->pri_ctrl_cap.cntlid),
+ n->sec_ctrl_list.numcntl);
+
+ return nvme_c2h(n, (uint8_t *)&n->sec_ctrl_list, sizeof(NvmeSecCtrlList),
req);
+}
+
static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req,
bool active)
{
@@ -4765,6 +4774,8 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeRequest
*req)
return nvme_identify_ctrl_list(n, req, false);
case NVME_ID_CNS_PRIMARY_CTRL_CAP:
return nvme_identify_pri_ctrl_cap(n, req);
+ case NVME_ID_CNS_SECONDARY_CTRL_LIST:
+ return nvme_identify_sec_ctrl_list(n, req);
case NVME_ID_CNS_CS_NS:
return nvme_identify_ns_csi(n, req, true);
case NVME_ID_CNS_CS_NS_PRESENT:
@@ -6316,6 +6327,9 @@ static void nvme_check_constraints(NvmeCtrl *n, Error
**errp)
static void nvme_init_state(NvmeCtrl *n)
{
NvmePriCtrlCap *cap = &n->pri_ctrl_cap;
+ NvmeSecCtrlList *list = &n->sec_ctrl_list;
+ NvmeSecCtrlEntry *sctrl;
+ int i;
/* add one to max_ioqpairs to account for the admin queue pair */
n->reg_size = pow2ceil(sizeof(NvmeBar) +
@@ -6327,6 +6341,12 @@ static void nvme_init_state(NvmeCtrl *n)
n->starttime_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
n->aer_reqs = g_new0(NvmeRequest *, n->params.aerl + 1);
+ list->numcntl = cpu_to_le16(n->params.sriov_max_vfs);
+ for (i = 0; i < n->params.sriov_max_vfs; i++) {
+ sctrl = &list->sec[i];
+ sctrl->pcid = cpu_to_le16(n->cntlid);
+ }
+
cap->cntlid = cpu_to_le16(n->cntlid);
}
@@ -6755,6 +6775,41 @@ static void nvme_set_smart_warning(Object *obj, Visitor
*v, const char *name,
}
}
+static void nvme_sriov_pre_write_ctrl(PCIDevice *dev, uint32_t address,
+ uint32_t val, int len)
+{
+ NvmeCtrl *n = NVME(dev);
+ NvmeSecCtrlEntry *sctrl;
+ uint16_t sriov_cap = dev->exp.sriov_cap;
+ uint32_t off = address - sriov_cap;
+ int i, num_vfs;
+
+ if (!sriov_cap) {
+ return;
+ }
+
+ if (range_covers_byte(off, len, PCI_SRIOV_CTRL)) {
+ num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF);
+
+ for (i = 0; i < num_vfs; i++) {
+ sctrl = &n->sec_ctrl_list.sec[i];
+
+ if (val & PCI_SRIOV_CTRL_VFE) {
+ sctrl->vfn = cpu_to_le16(i + 1);
+ } else {
+ sctrl->vfn = 0;
+ }
+ }
+ }
+}
+
+static void nvme_pci_write_config(PCIDevice *dev, uint32_t address,
+ uint32_t val, int len)
+{
+ nvme_sriov_pre_write_ctrl(dev, address, val, len);
+ pci_default_write_config(dev, address, val, len);
+}
+
static const VMStateDescription nvme_vmstate = {
.name = "nvme",
.unmigratable = 1,
@@ -6766,6 +6821,7 @@ static void nvme_class_init(ObjectClass *oc, void *data)
PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc);
pc->realize = nvme_realize;
+ pc->config_write = nvme_pci_write_config;
pc->exit = nvme_exit;
pc->class_id = PCI_CLASS_STORAGE_EXPRESS;
pc->revision = 2;
diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
index b7cf1494e7..c70aed8c66 100644
--- a/hw/nvme/ns.c
+++ b/hw/nvme/ns.c
@@ -517,7 +517,7 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
for (i = 0; i < ARRAY_SIZE(subsys->ctrls); i++) {
NvmeCtrl *ctrl = subsys->ctrls[i];
- if (ctrl) {
+ if (ctrl && ctrl != SUBSYS_SLOT_RSVD) {
nvme_attach_ns(ctrl, ns);
}
}
diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
index 81deb45dfb..2157a7b95f 100644
--- a/hw/nvme/nvme.h
+++ b/hw/nvme/nvme.h
@@ -43,6 +43,7 @@ typedef struct NvmeBus {
#define TYPE_NVME_SUBSYS "nvme-subsys"
#define NVME_SUBSYS(obj) \
OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
+#define SUBSYS_SLOT_RSVD (void *)0xFFFF
typedef struct NvmeSubsystem {
DeviceState parent_obj;
@@ -67,6 +68,10 @@ static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem
*subsys,
return NULL;
}
+ if (subsys->ctrls[cntlid] == SUBSYS_SLOT_RSVD) {
+ return NULL;
+ }
+
return subsys->ctrls[cntlid];
}
@@ -463,6 +468,7 @@ typedef struct NvmeCtrl {
} features;
NvmePriCtrlCap pri_ctrl_cap;
+ NvmeSecCtrlList sec_ctrl_list;
} NvmeCtrl;
static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid)
@@ -497,6 +503,18 @@ static inline uint16_t nvme_cid(NvmeRequest *req)
return le16_to_cpu(req->cqe.cid);
}
+static inline NvmeSecCtrlEntry *nvme_sctrl(NvmeCtrl *n)
+{
+ PCIDevice *pci_dev = &n->parent_obj;
+ NvmeCtrl *pf = NVME(pcie_sriov_get_pf(pci_dev));
+
+ if (pci_is_vf(pci_dev)) {
+ return &pf->sec_ctrl_list.sec[pcie_sriov_vf_number(pci_dev)];
+ }
+
+ return NULL;
+}
+
void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns);
uint16_t nvme_bounce_data(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
NvmeTxDirection dir, NvmeRequest *req);
diff --git a/hw/nvme/subsys.c b/hw/nvme/subsys.c
index 495dcff5eb..43c295056f 100644
--- a/hw/nvme/subsys.c
+++ b/hw/nvme/subsys.c
@@ -11,20 +11,71 @@
#include "nvme.h"
-int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp)
+static int nvme_subsys_reserve_cntlids(NvmeCtrl *n, int start, int num)
{
NvmeSubsystem *subsys = n->subsys;
- int cntlid;
+ NvmeSecCtrlList *list = &n->sec_ctrl_list;
+ NvmeSecCtrlEntry *sctrl;
+ int i, cnt = 0;
+
+ for (i = start; i < ARRAY_SIZE(subsys->ctrls) && cnt < num; i++) {
+ if (!subsys->ctrls[i]) {
+ sctrl = &list->sec[cnt];
+ sctrl->scid = cpu_to_le16(i);
+ subsys->ctrls[i] = SUBSYS_SLOT_RSVD;
+ cnt++;
+ }
+ }
+
+ return cnt;
+}
- for (cntlid = 0; cntlid < ARRAY_SIZE(subsys->ctrls); cntlid++) {
- if (!subsys->ctrls[cntlid]) {
- break;
+static void nvme_subsys_unreserve_cntlids(NvmeCtrl *n)
+{
+ NvmeSubsystem *subsys = n->subsys;
+ NvmeSecCtrlList *list = &n->sec_ctrl_list;
+ NvmeSecCtrlEntry *sctrl;
+ int i, cntlid;
+
+ for (i = 0; i < n->params.sriov_max_vfs; i++) {
+ sctrl = &list->sec[i];
+ cntlid = le16_to_cpu(sctrl->scid);
+
+ if (cntlid) {
+ assert(subsys->ctrls[cntlid] == SUBSYS_SLOT_RSVD);
+ subsys->ctrls[cntlid] = NULL;
+ sctrl->scid = 0;
}
}
+}
- if (cntlid == ARRAY_SIZE(subsys->ctrls)) {
- error_setg(errp, "no more free controller id");
- return -1;
+int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp)
+{
+ NvmeSubsystem *subsys = n->subsys;
+ NvmeSecCtrlEntry *sctrl = nvme_sctrl(n);
+ int cntlid, num_rsvd, num_vfs = n->params.sriov_max_vfs;
+
+ if (pci_is_vf(&n->parent_obj)) {
+ cntlid = le16_to_cpu(sctrl->scid);
+ } else {
+ for (cntlid = 0; cntlid < ARRAY_SIZE(subsys->ctrls); cntlid++) {
+ if (!subsys->ctrls[cntlid]) {
+ break;
+ }
+ }
+
+ if (cntlid == ARRAY_SIZE(subsys->ctrls)) {
+ error_setg(errp, "no more free controller id");
+ return -1;
+ }
+
+ num_rsvd = nvme_subsys_reserve_cntlids(n, cntlid + 1, num_vfs);
+ if (num_rsvd != num_vfs) {
+ nvme_subsys_unreserve_cntlids(n);
+ error_setg(errp,
+ "no more free controller ids for secondary
controllers");
+ return -1;
+ }
}
subsys->ctrls[cntlid] = n;
@@ -34,7 +85,12 @@ int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp)
void nvme_subsys_unregister_ctrl(NvmeSubsystem *subsys, NvmeCtrl *n)
{
- subsys->ctrls[n->cntlid] = NULL;
+ if (pci_is_vf(&n->parent_obj)) {
+ subsys->ctrls[n->cntlid] = SUBSYS_SLOT_RSVD;
+ } else {
+ subsys->ctrls[n->cntlid] = NULL;
+ nvme_subsys_unreserve_cntlids(n);
+ }
}
static void nvme_subsys_setup(NvmeSubsystem *subsys)
diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events
index 1014ebceb6..dd2aac3418 100644
--- a/hw/nvme/trace-events
+++ b/hw/nvme/trace-events
@@ -53,6 +53,7 @@ pci_nvme_identify_ctrl_csi(uint8_t csi) "identify controller,
csi=0x%"PRIx8""
pci_nvme_identify_ns(uint32_t ns) "nsid %"PRIu32""
pci_nvme_identify_ctrl_list(uint8_t cns, uint16_t cntid) "cns 0x%"PRIx8" cntid
%"PRIu16""
pci_nvme_identify_pri_ctrl_cap(uint16_t cntlid) "identify primary controller
capabilities cntlid=%"PRIu16""
+pci_nvme_identify_sec_ctrl_list(uint16_t cntlid, uint8_t numcntl) "identify
secondary controller list cntlid=%"PRIu16" numcntl=%"PRIu8""
pci_nvme_identify_ns_csi(uint32_t ns, uint8_t csi) "nsid=%"PRIu32",
csi=0x%"PRIx8""
pci_nvme_identify_nslist(uint32_t ns) "nsid %"PRIu32""
pci_nvme_identify_nslist_csi(uint16_t ns, uint8_t csi) "nsid=%"PRIu16",
csi=0x%"PRIx8""
diff --git a/include/block/nvme.h b/include/block/nvme.h
index f69bd1d14f..96595ea8f1 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -1018,6 +1018,7 @@ enum NvmeIdCns {
NVME_ID_CNS_NS_ATTACHED_CTRL_LIST = 0x12,
NVME_ID_CNS_CTRL_LIST = 0x13,
NVME_ID_CNS_PRIMARY_CTRL_CAP = 0x14,
+ NVME_ID_CNS_SECONDARY_CTRL_LIST = 0x15,
NVME_ID_CNS_CS_NS_PRESENT_LIST = 0x1a,
NVME_ID_CNS_CS_NS_PRESENT = 0x1b,
NVME_ID_CNS_IO_COMMAND_SET = 0x1c,
@@ -1487,6 +1488,23 @@ typedef struct QEMU_PACKED NvmePriCtrlCap {
uint8_t rsvd80[4016];
} NvmePriCtrlCap;
+typedef struct QEMU_PACKED NvmeSecCtrlEntry {
+ uint16_t scid;
+ uint16_t pcid;
+ uint8_t scs;
+ uint8_t rsvd5[3];
+ uint16_t vfn;
+ uint16_t nvq;
+ uint16_t nvi;
+ uint8_t rsvd14[18];
+} NvmeSecCtrlEntry;
+
+typedef struct QEMU_PACKED NvmeSecCtrlList {
+ uint8_t numcntl;
+ uint8_t rsvd1[31];
+ NvmeSecCtrlEntry sec[127];
+} NvmeSecCtrlList;
+
static inline void _nvme_check_size(void)
{
QEMU_BUILD_BUG_ON(sizeof(NvmeBar) != 4096);
@@ -1520,5 +1538,7 @@ static inline void _nvme_check_size(void)
QEMU_BUILD_BUG_ON(sizeof(NvmeZoneDescr) != 64);
QEMU_BUILD_BUG_ON(sizeof(NvmeDifTuple) != 8);
QEMU_BUILD_BUG_ON(sizeof(NvmePriCtrlCap) != 4096);
+ QEMU_BUILD_BUG_ON(sizeof(NvmeSecCtrlEntry) != 32);
+ QEMU_BUILD_BUG_ON(sizeof(NvmeSecCtrlList) != 4096);
}
#endif
--
2.25.1
- [PATCH v2 00/15] hw/nvme: SR-IOV with Virtualization Enhancements, Łukasz Gieryk, 2021/11/16
- [PATCH v2 02/15] pcie: Add some SR/IOV API documentation in docs/pcie_sriov.txt, Łukasz Gieryk, 2021/11/16
- [PATCH v2 01/15] pcie: Add support for Single Root I/O Virtualization (SR/IOV), Łukasz Gieryk, 2021/11/16
- [PATCH v2 04/15] pcie: Add 1.2 version token for the Power Management Capability, Łukasz Gieryk, 2021/11/16
- [PATCH v2 05/15] hw/nvme: Add support for SR-IOV, Łukasz Gieryk, 2021/11/16
- [PATCH v2 06/15] hw/nvme: Add support for Primary Controller Capabilities, Łukasz Gieryk, 2021/11/16
- [PATCH v2 10/15] hw/nvme: Remove reg_size variable and update BAR0 size calculation, Łukasz Gieryk, 2021/11/16
- [PATCH v2 08/15] hw/nvme: Implement the Function Level Reset, Łukasz Gieryk, 2021/11/16
- [PATCH v2 07/15] hw/nvme: Add support for Secondary Controller List,
Łukasz Gieryk <=
- [PATCH v2 09/15] hw/nvme: Make max_ioqpairs and msix_qsize configurable in runtime, Łukasz Gieryk, 2021/11/16
- [PATCH v2 03/15] pcie: Add helpers to the SR/IOV API, Łukasz Gieryk, 2021/11/16
- [PATCH v2 11/15] hw/nvme: Calculate BAR attributes in a function, Łukasz Gieryk, 2021/11/16
- [PATCH v2 12/15] hw/nvme: Initialize capability structures for primary/secondary controllers, Łukasz Gieryk, 2021/11/16
- [PATCH v2 14/15] docs: Add documentation for SR-IOV and Virtualization Enhancements, Łukasz Gieryk, 2021/11/16
- [PATCH v2 15/15] hw/nvme: Update the initalization place for the AER queue, Łukasz Gieryk, 2021/11/16
- [PATCH v2 13/15] hw/nvme: Add support for the Virtualization Management command, Łukasz Gieryk, 2021/11/16