[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-block] [PATCH 2/8] nvme: bump supported spec to 1.3
From: |
Klaus Birkelund Jensen |
Subject: |
[Qemu-block] [PATCH 2/8] nvme: bump supported spec to 1.3 |
Date: |
Fri, 17 May 2019 10:42:28 +0200 |
Bump the supported NVMe version to 1.3. To do so, this patch adds a
number of missing 'Mandatory' features from the spec:
* Support for returning a Namespace Identification Descriptor List in
the Identify command (CNS 03h).
* Support for the Asynchronous Event Request command.
* Support for the Get Log Page command and the mandatory Error
Information, Smart / Health Information and Firmware Slot
Information log pages.
* Support for the Abort command.
As a side-effect, this bump also fixes support for multiple namespaces.
The implementation of AER, Get Log Page and Abort commands has been
imported and slightly modified from Keith's qemu-nvme tree[1]. Thanks!
[1]: http://git.infradead.org/users/kbusch/qemu-nvme.git
Signed-off-by: Klaus Birkelund Jensen <address@hidden>
---
hw/block/nvme.c | 792 ++++++++++++++++++++++++++++++++++++------
hw/block/nvme.h | 31 +-
hw/block/trace-events | 16 +-
include/block/nvme.h | 58 +++-
4 files changed, 783 insertions(+), 114 deletions(-)
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index b689c0776e72..65dfc04f71e5 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -9,17 +9,35 @@
*/
/**
- * Reference Specs: http://www.nvmexpress.org, 1.2, 1.1, 1.0e
+ * Reference Specs: http://www.nvmexpress.org, 1.3d, 1.2, 1.1, 1.0e
*
* http://www.nvmexpress.org/resources/
*/
/**
* Usage: add options:
- * -drive file=<file>,if=none,id=<drive_id>
- * -device nvme,drive=<drive_id>,serial=<serial>,id=<id[optional]>, \
- * cmb_size_mb=<cmb_size_mb[optional]>, \
- * num_queues=<N[optional]>
+ * -drive file=<file>,if=none,id=<drive_id>
+ * -device nvme,drive=<drive_id>,serial=<serial>,id=<id[optional]>
+ *
+ * The "file" option must point to a path to a real file that you will use as
+ * the backing storage for your NVMe device. It must be a non-zero length, as
+ * this will be the disk image that your nvme controller will use to carve up
+ * namespaces for storage.
+ *
+ * Note the "drive" option's "id" name must match the "device nvme" drive's
+ * name to link the block device used for backing storage to the nvme
+ * interface.
+ *
+ * Advanced optional options:
+ *
+ * num_ns=<int> : Namespaces to make out of the backing storage,
+ * Default:1
+ * num_queues=<int> : Number of possible IO Queues, Default:64
+ * cmb_size_mb=<int> : Size of CMB in MBs, Default:0
+ *
+ * Parameters will be verified against conflicting capabilities and attributes
+ * and fail to load if there is a conflict or a configuration the emulated
+ * device is unable to handle.
*
* Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at
* offset 0 in BAR2 and supports only WDS, RDS and SQS for now.
@@ -38,6 +56,12 @@
#include "trace.h"
#include "nvme.h"
+#define NVME_MAX_QS PCI_MSIX_FLAGS_QSIZE
+#define NVME_TEMPERATURE 0x143
+#define NVME_ELPE 3
+#define NVME_AERL 3
+#define NVME_OP_ABORTED 0xff
+
#define NVME_GUEST_ERR(trace, fmt, ...) \
do { \
(trace_##trace)(__VA_ARGS__); \
@@ -57,6 +81,16 @@ static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void
*buf, int size)
}
}
+static void nvme_addr_write(NvmeCtrl *n, hwaddr addr, void *buf, int size)
+{
+ if (n->cmbsz && addr >= n->ctrl_mem.addr &&
+ addr < (n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size))) {
+ memcpy((void *)&n->cmbuf[addr - n->ctrl_mem.addr], buf, size);
+ return;
+ }
+ pci_dma_write(&n->parent_obj, addr, buf, size);
+}
+
static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid)
{
return sqid < n->params.num_queues && n->sq[sqid] != NULL ? 0 : -1;
@@ -244,6 +278,24 @@ static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t
*ptr, uint32_t len,
return status;
}
+static void nvme_post_cqe(NvmeCQueue *cq, NvmeRequest *req)
+{
+ NvmeCtrl *n = cq->ctrl;
+ NvmeSQueue *sq = req->sq;
+ NvmeCqe *cqe = &req->cqe;
+ uint8_t phase = cq->phase;
+ hwaddr addr;
+
+ addr = cq->dma_addr + cq->tail * n->cqe_size;
+ cqe->status = cpu_to_le16((req->status << 1) | phase);
+ cqe->sq_id = cpu_to_le16(sq->sqid);
+ cqe->sq_head = cpu_to_le16(sq->head);
+ nvme_addr_write(n, addr, (void *) cqe, sizeof(*cqe));
+ nvme_inc_cq_tail(cq);
+
+ QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
+}
+
static void nvme_post_cqes(void *opaque)
{
NvmeCQueue *cq = opaque;
@@ -251,24 +303,14 @@ static void nvme_post_cqes(void *opaque)
NvmeRequest *req, *next;
QTAILQ_FOREACH_SAFE(req, &cq->req_list, entry, next) {
- NvmeSQueue *sq;
- hwaddr addr;
-
if (nvme_cq_full(cq)) {
break;
}
QTAILQ_REMOVE(&cq->req_list, req, entry);
- sq = req->sq;
- req->cqe.status = cpu_to_le16((req->status << 1) | cq->phase);
- req->cqe.sq_id = cpu_to_le16(sq->sqid);
- req->cqe.sq_head = cpu_to_le16(sq->head);
- addr = cq->dma_addr + cq->tail * n->cqe_size;
- nvme_inc_cq_tail(cq);
- pci_dma_write(&n->parent_obj, addr, (void *)&req->cqe,
- sizeof(req->cqe));
- QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
+ nvme_post_cqe(cq, req);
}
+
if (cq->tail != cq->head) {
nvme_irq_assert(n, cq);
}
@@ -277,11 +319,88 @@ static void nvme_post_cqes(void *opaque)
static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req)
{
assert(cq->cqid == req->sq->cqid);
+
+ trace_nvme_enqueue_req_completion(req->cqe.cid, cq->cqid);
QTAILQ_REMOVE(&req->sq->out_req_list, req, entry);
QTAILQ_INSERT_TAIL(&cq->req_list, req, entry);
timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
}
+static void nvme_enqueue_event(NvmeCtrl *n, uint8_t event_type,
+ uint8_t event_info, uint8_t log_page)
+{
+ NvmeAsyncEvent *event;
+
+ trace_nvme_enqueue_event(event_type, event_info, log_page);
+
+ /*
+ * Do not enqueue the event if something of this type is already queued.
+ * This bounds the size of the event queue and makes sure it does not grow
+ * indefinitely when events are not processed by the host (i.e. does not
+ * issue any AERs).
+ */
+ if (n->aer_mask_queued & (1 << event_type)) {
+ return;
+ }
+ n->aer_mask_queued |= (1 << event_type);
+
+ event = g_new(NvmeAsyncEvent, 1);
+ event->result = (NvmeAerResult) {
+ .event_type = event_type,
+ .event_info = event_info,
+ .log_page = log_page,
+ };
+
+ QSIMPLEQ_INSERT_TAIL(&n->aer_queue, event, entry);
+
+ timer_mod(n->aer_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
+}
+
+static void nvme_process_aers(void *opaque)
+{
+ NvmeCtrl *n = opaque;
+ NvmeRequest *req;
+ NvmeAerResult *result;
+ NvmeAsyncEvent *event, *next;
+
+ trace_nvme_process_aers();
+
+ QSIMPLEQ_FOREACH_SAFE(event, &n->aer_queue, entry, next) {
+ /* can't post cqe if there is nothing to complete */
+ if (!n->outstanding_aers) {
+ trace_nvme_no_outstanding_aers();
+ break;
+ }
+
+ /* ignore if masked (cqe posted, but event not cleared) */
+ if (n->aer_mask & (1 << event->result.event_type)) {
+ trace_nvme_aer_masked(event->result.event_type, n->aer_mask);
+ continue;
+ }
+
+ QSIMPLEQ_REMOVE_HEAD(&n->aer_queue, entry);
+
+ n->aer_mask |= 1 << event->result.event_type;
+ n->aer_mask_queued &= ~(1 << event->result.event_type);
+ n->outstanding_aers--;
+
+ req = n->aer_reqs[n->outstanding_aers];
+
+ result = (NvmeAerResult *) &req->cqe.result;
+ result->event_type = event->result.event_type;
+ result->event_info = event->result.event_info;
+ result->log_page = event->result.log_page;
+ g_free(event);
+
+ req->status = NVME_SUCCESS;
+
+ trace_nvme_aer_post_cqe(result->event_type, result->event_info,
+ result->log_page);
+
+ nvme_enqueue_req_completion(&n->admin_cq, req);
+ }
+}
+
static void nvme_rw_cb(void *opaque, int ret)
{
NvmeRequest *req = opaque;
@@ -318,7 +437,7 @@ static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeNamespace
*ns, NvmeCmd *cmd,
{
NvmeRwCmd *rw = (NvmeRwCmd *)cmd;
const uint8_t lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
- const uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds;
+ const uint8_t data_shift = ns->id_ns.lbaf[lba_index].lbads;
uint64_t slba = le64_to_cpu(rw->slba);
uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
uint64_t offset = slba << data_shift;
@@ -347,9 +466,9 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns,
NvmeCmd *cmd,
uint64_t prp2 = le64_to_cpu(rw->prp2);
uint8_t lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
- uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds;
+ uint8_t data_shift = ns->id_ns.lbaf[lba_index].lbads;
uint64_t data_size = (uint64_t)nlb << data_shift;
- uint64_t data_offset = slba << data_shift;
+ uint64_t data_offset = ns->blk_offset + (slba << data_shift);
int is_write = rw->opcode == NVME_CMD_WRITE ? 1 : 0;
enum BlockAcctType acct = is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ;
@@ -391,8 +510,8 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd,
NvmeRequest *req)
NvmeNamespace *ns;
uint32_t nsid = le32_to_cpu(cmd->nsid);
- if (unlikely(nsid == 0 || nsid > n->num_namespaces)) {
- trace_nvme_err_invalid_ns(nsid, n->num_namespaces);
+ if (unlikely(nsid == 0 || nsid > n->params.num_ns)) {
+ trace_nvme_err_invalid_ns(nsid, n->params.num_ns);
return NVME_INVALID_NSID | NVME_DNR;
}
@@ -420,6 +539,7 @@ static void nvme_free_sq(NvmeSQueue *sq, NvmeCtrl *n)
if (sq->sqid) {
g_free(sq);
}
+ n->qs_created--;
}
static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd)
@@ -486,6 +606,7 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n,
uint64_t dma_addr,
cq = n->cq[cqid];
QTAILQ_INSERT_TAIL(&(cq->sq_list), sq, entry);
n->sq[sqid] = sq;
+ n->qs_created++;
}
static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd)
@@ -535,6 +656,7 @@ static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n)
if (cq->cqid) {
g_free(cq);
}
+ n->qs_created--;
}
static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeCmd *cmd)
@@ -575,6 +697,7 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n,
uint64_t dma_addr,
msix_vector_use(&n->parent_obj, cq->vector);
n->cq[cqid] = cq;
cq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_post_cqes, cq);
+ n->qs_created++;
}
static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd)
@@ -637,8 +760,8 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify
*c)
trace_nvme_identify_ns(nsid);
- if (unlikely(nsid == 0 || nsid > n->num_namespaces)) {
- trace_nvme_err_invalid_ns(nsid, n->num_namespaces);
+ if (unlikely(nsid == 0 || nsid > n->params.num_ns)) {
+ trace_nvme_err_invalid_ns(nsid, n->params.num_ns);
return NVME_INVALID_NSID | NVME_DNR;
}
@@ -648,7 +771,7 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify
*c)
prp1, prp2);
}
-static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c)
+static uint16_t nvme_identify_ns_list(NvmeCtrl *n, NvmeIdentify *c)
{
static const int data_len = 4 * KiB;
uint32_t min_nsid = le32_to_cpu(c->nsid);
@@ -658,10 +781,10 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n,
NvmeIdentify *c)
uint16_t ret;
int i, j = 0;
- trace_nvme_identify_nslist(min_nsid);
+ trace_nvme_identify_ns_list(min_nsid);
list = g_malloc0(data_len);
- for (i = 0; i < n->num_namespaces; i++) {
+ for (i = 0; i < n->params.num_ns; i++) {
if (i < min_nsid) {
continue;
}
@@ -675,6 +798,25 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n,
NvmeIdentify *c)
return ret;
}
+static uint16_t nvme_identify_ns_descriptor_list(NvmeCtrl *n, NvmeCmd *c)
+{
+ static const int data_len = sizeof(NvmeIdentifyNamespaceDescriptor) + 0x10;
+ uint32_t nsid = le32_to_cpu(c->nsid);
+ uint64_t prp1 = le64_to_cpu(c->prp1);
+ uint64_t prp2 = le64_to_cpu(c->prp2);
+ NvmeIdentifyNamespaceDescriptor *list;
+ uint16_t ret;
+
+ trace_nvme_identify_ns_descriptor_list(nsid);
+
+ list = g_malloc0(data_len);
+ list->nidt = 0x3;
+ list->nidl = 0x10;
+
+ ret = nvme_dma_read_prp(n, (uint8_t *) list, data_len, prp1, prp2);
+ g_free(list);
+ return ret;
+}
static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd)
{
@@ -686,7 +828,9 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd)
case 0x01:
return nvme_identify_ctrl(n, c);
case 0x02:
- return nvme_identify_nslist(n, c);
+ return nvme_identify_ns_list(n, c);
+ case 0x03:
+ return nvme_identify_ns_descriptor_list(n, cmd);
default:
trace_nvme_err_invalid_identify_cns(le32_to_cpu(c->cns));
return NVME_INVALID_FIELD | NVME_DNR;
@@ -696,18 +840,49 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd)
static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
{
uint32_t dw10 = le32_to_cpu(cmd->cdw10);
+ uint32_t dw11 = le32_to_cpu(cmd->cdw11);
uint32_t result;
+ trace_nvme_getfeat(dw10);
+
switch (dw10) {
+ case NVME_ARBITRATION:
+ result = cpu_to_le32(n->features.arbitration);
+ break;
+ case NVME_POWER_MANAGEMENT:
+ result = cpu_to_le32(n->features.power_mgmt);
+ break;
+ case NVME_TEMPERATURE_THRESHOLD:
+ result = cpu_to_le32(n->features.temp_thresh);
+ break;
+ case NVME_ERROR_RECOVERY:
+ result = cpu_to_le32(n->features.err_rec);
+ break;
case NVME_VOLATILE_WRITE_CACHE:
result = blk_enable_write_cache(n->conf.blk);
trace_nvme_getfeat_vwcache(result ? "enabled" : "disabled");
break;
case NVME_NUMBER_OF_QUEUES:
result = cpu_to_le32((n->params.num_queues - 2) |
- ((n->params.num_queues - 2) << 16));
+ ((n->params.num_queues - 2) << 16));
trace_nvme_getfeat_numq(result);
break;
+ case NVME_INTERRUPT_COALESCING:
+ result = cpu_to_le32(n->features.int_coalescing);
+ break;
+ case NVME_INTERRUPT_VECTOR_CONF:
+ if ((dw11 & 0xffff) > n->params.num_queues) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ result = cpu_to_le32(n->features.int_vector_config[dw11 & 0xffff]);
+ break;
+ case NVME_WRITE_ATOMICITY:
+ result = cpu_to_le32(n->features.write_atomicity);
+ break;
+ case NVME_ASYNCHRONOUS_EVENT_CONF:
+ result = cpu_to_le32(n->features.async_config);
+ break;
default:
trace_nvme_err_invalid_getfeat(dw10);
return NVME_INVALID_FIELD | NVME_DNR;
@@ -722,22 +897,244 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd
*cmd, NvmeRequest *req)
uint32_t dw10 = le32_to_cpu(cmd->cdw10);
uint32_t dw11 = le32_to_cpu(cmd->cdw11);
+ trace_nvme_setfeat(dw10, dw11);
+
switch (dw10) {
+ case NVME_TEMPERATURE_THRESHOLD:
+ n->features.temp_thresh = dw11;
+ if (n->features.temp_thresh <= n->temperature) {
+ nvme_enqueue_event(n, NVME_AER_TYPE_SMART,
+ NVME_AER_INFO_SMART_TEMP_THRESH, NVME_LOG_SMART_INFO);
+ }
+ break;
case NVME_VOLATILE_WRITE_CACHE:
blk_set_enable_write_cache(n->conf.blk, dw11 & 1);
break;
case NVME_NUMBER_OF_QUEUES:
+ if (n->qs_created > 2) {
+ return NVME_CMD_SEQ_ERROR | NVME_DNR;
+ }
+
+ if ((dw11 & 0xffff) == 0xffff || ((dw11 >> 16) & 0xffff) == 0xffff) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
trace_nvme_setfeat_numq((dw11 & 0xFFFF) + 1,
((dw11 >> 16) & 0xFFFF) + 1,
n->params.num_queues - 1,
n->params.num_queues - 1);
req->cqe.result = cpu_to_le32((n->params.num_queues - 2) |
- ((n->params.num_queues - 2) << 16));
+ ((n->params.num_queues - 2) << 16));
break;
+ case NVME_ASYNCHRONOUS_EVENT_CONF:
+ n->features.async_config = dw11;
+ break;
+ case NVME_ARBITRATION:
+ case NVME_POWER_MANAGEMENT:
+ case NVME_ERROR_RECOVERY:
+ case NVME_INTERRUPT_COALESCING:
+ case NVME_INTERRUPT_VECTOR_CONF:
+ case NVME_WRITE_ATOMICITY:
+ return NVME_FEAT_NOT_CHANGABLE | NVME_DNR;
default:
trace_nvme_err_invalid_setfeat(dw10);
return NVME_INVALID_FIELD | NVME_DNR;
}
+
+ return NVME_SUCCESS;
+}
+
+static void nvme_clear_events(NvmeCtrl *n, uint8_t event_type)
+{
+ n->aer_mask &= ~(1 << event_type);
+ if (!QSIMPLEQ_EMPTY(&n->aer_queue)) {
+ timer_mod(n->aer_timer,
+ qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
+ }
+}
+
+static uint16_t nvme_error_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae,
+ uint32_t buf_len, uint64_t off, NvmeRequest *req)
+{
+ uint32_t trans_len;
+ uint64_t prp1 = le64_to_cpu(cmd->prp1);
+ uint64_t prp2 = le64_to_cpu(cmd->prp2);
+
+ if (off > sizeof(*n->elpes) * (NVME_ELPE + 1)) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ trans_len = MIN(sizeof(*n->elpes) * (NVME_ELPE + 1) - off, buf_len);
+
+ if (!rae) {
+ nvme_clear_events(n, NVME_AER_TYPE_ERROR);
+ }
+
+ return nvme_dma_read_prp(n, (uint8_t *) n->elpes + off, trans_len, prp1,
+ prp2);
+}
+
+static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae,
+ uint32_t buf_len, uint64_t off, NvmeRequest *req)
+{
+ uint64_t prp1 = le64_to_cpu(cmd->prp1);
+ uint64_t prp2 = le64_to_cpu(cmd->prp2);
+
+ uint32_t trans_len;
+ time_t current_ms;
+ NvmeSmartLog smart;
+
+ if (cmd->nsid != 0 && cmd->nsid != 0xffffffff) {
+ trace_nvme_err(req->cqe.cid, "smart log not supported for namespace",
+ NVME_INVALID_FIELD | NVME_DNR);
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ if (off > sizeof(smart)) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ trans_len = MIN(sizeof(smart) - off, buf_len);
+
+ memset(&smart, 0x0, sizeof(smart));
+ smart.number_of_error_log_entries[0] = cpu_to_le64(0);
+ smart.temperature[0] = n->temperature & 0xff;
+ smart.temperature[1] = (n->temperature >> 8) & 0xff;
+
+ if (n->features.temp_thresh <= n->temperature) {
+ smart.critical_warning |= NVME_SMART_TEMPERATURE;
+ }
+
+ current_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+ smart.power_on_hours[0] = cpu_to_le64(
+ (((current_ms - n->starttime_ms) / 1000) / 60) / 60);
+
+ if (!rae) {
+ nvme_clear_events(n, NVME_AER_TYPE_SMART);
+ }
+
+ return nvme_dma_read_prp(n, (uint8_t *) &smart + off, trans_len, prp1,
+ prp2);
+}
+
+static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
+ uint64_t off, NvmeRequest *req)
+{
+ uint32_t trans_len;
+ uint64_t prp1 = le64_to_cpu(cmd->prp1);
+ uint64_t prp2 = le64_to_cpu(cmd->prp2);
+ NvmeFwSlotInfoLog fw_log;
+
+ if (off > sizeof(fw_log)) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ memset(&fw_log, 0, sizeof(NvmeFwSlotInfoLog));
+
+ trans_len = MIN(sizeof(fw_log) - off, buf_len);
+
+ return nvme_dma_read_prp(n, (uint8_t *) &fw_log + off, trans_len, prp1,
+ prp2);
+}
+
+static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
+{
+ uint32_t dw10 = le32_to_cpu(cmd->cdw10);
+ uint32_t dw11 = le32_to_cpu(cmd->cdw11);
+ uint32_t dw12 = le32_to_cpu(cmd->cdw12);
+ uint32_t dw13 = le32_to_cpu(cmd->cdw13);
+ uint16_t lid = dw10 & 0xff;
+ uint8_t rae = (dw10 >> 15) & 0x1;
+ uint32_t numdl, numdu, len;
+ uint64_t off, lpol, lpou;
+
+ numdl = (dw10 >> 16);
+ numdu = (dw11 & 0xffff);
+ lpol = dw12;
+ lpou = dw13;
+
+ len = (((numdu << 16) | numdl) + 1) << 2;
+ off = (lpou << 32ULL) | lpol;
+
+ if (off & 0x3) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ trace_nvme_get_log(req->cqe.cid, lid);
+
+ switch (lid) {
+ case NVME_LOG_ERROR_INFO:
+ return nvme_error_log_info(n, cmd, rae, len, off, req);
+ case NVME_LOG_SMART_INFO:
+ return nvme_smart_info(n, cmd, rae, len, off, req);
+ case NVME_LOG_FW_SLOT_INFO:
+ return nvme_fw_log_info(n, cmd, len, off, req);
+ default:
+ trace_nvme_err_invalid_log_page(req->cqe.cid, lid);
+ return NVME_INVALID_LOG_ID | NVME_DNR;
+ }
+}
+
+static uint16_t nvme_aer(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
+{
+ trace_nvme_aer(req->cqe.cid);
+
+ if (n->outstanding_aers > NVME_AERL) {
+ trace_nvme_aer_aerl_exceeded();
+ return NVME_AER_LIMIT_EXCEEDED;
+ }
+
+ n->aer_reqs[n->outstanding_aers] = req;
+ timer_mod(n->aer_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
+ n->outstanding_aers++;
+
+ return NVME_NO_COMPLETE;
+}
+
+static uint16_t nvme_abort(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
+{
+ NvmeSQueue *sq;
+ NvmeRequest *new;
+ uint32_t index = 0;
+ uint16_t sqid = cmd->cdw10 & 0xffff;
+ uint16_t cid = (cmd->cdw10 >> 16) & 0xffff;
+
+ req->cqe.result = 1;
+ if (nvme_check_sqid(n, sqid)) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ sq = n->sq[sqid];
+
+ /* only consider queued (and not executing) commands for abort */
+ while ((sq->head + index) % sq->size != sq->tail) {
+ NvmeCmd abort_cmd;
+ hwaddr addr;
+
+ addr = sq->dma_addr + ((sq->head + index) % sq->size) * n->sqe_size;
+
+ nvme_addr_read(n, addr, (void *) &abort_cmd, sizeof(abort_cmd));
+ if (abort_cmd.cid == cid) {
+ req->cqe.result = 0;
+ new = QTAILQ_FIRST(&sq->req_list);
+ QTAILQ_REMOVE(&sq->req_list, new, entry);
+ QTAILQ_INSERT_TAIL(&sq->out_req_list, new, entry);
+
+ memset(&new->cqe, 0, sizeof(new->cqe));
+ new->cqe.cid = cid;
+ new->status = NVME_CMD_ABORT_REQ;
+
+ abort_cmd.opcode = NVME_OP_ABORTED;
+ nvme_addr_write(n, addr, (void *) &abort_cmd, sizeof(abort_cmd));
+
+ nvme_enqueue_req_completion(n->cq[sq->cqid], new);
+
+ return NVME_SUCCESS;
+ }
+
+ ++index;
+ }
+
return NVME_SUCCESS;
}
@@ -758,6 +1155,12 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd,
NvmeRequest *req)
return nvme_set_feature(n, cmd, req);
case NVME_ADM_CMD_GET_FEATURES:
return nvme_get_feature(n, cmd, req);
+ case NVME_ADM_CMD_GET_LOG_PAGE:
+ return nvme_get_log(n, cmd, req);
+ case NVME_ADM_CMD_ASYNC_EV_REQ:
+ return nvme_aer(n, cmd, req);
+ case NVME_ADM_CMD_ABORT:
+ return nvme_abort(n, cmd, req);
default:
trace_nvme_err_invalid_admin_opc(cmd->opcode);
return NVME_INVALID_OPCODE | NVME_DNR;
@@ -780,6 +1183,10 @@ static void nvme_process_sq(void *opaque)
nvme_addr_read(n, addr, (void *)&cmd, sizeof(cmd));
nvme_inc_sq_head(sq);
+ if (cmd.opcode == NVME_OP_ABORTED) {
+ continue;
+ }
+
req = QTAILQ_FIRST(&sq->req_list);
QTAILQ_REMOVE(&sq->req_list, req, entry);
QTAILQ_INSERT_TAIL(&sq->out_req_list, req, entry);
@@ -797,6 +1204,7 @@ static void nvme_process_sq(void *opaque)
static void nvme_clear_ctrl(NvmeCtrl *n)
{
+ NvmeAsyncEvent *event;
int i;
blk_drain(n->conf.blk);
@@ -812,8 +1220,19 @@ static void nvme_clear_ctrl(NvmeCtrl *n)
}
}
+ if (n->aer_timer) {
+ timer_del(n->aer_timer);
+ timer_free(n->aer_timer);
+ n->aer_timer = NULL;
+ }
+ while ((event = QSIMPLEQ_FIRST(&n->aer_queue)) != NULL) {
+ QSIMPLEQ_REMOVE_HEAD(&n->aer_queue, entry);
+ g_free(event);
+ }
+
blk_flush(n->conf.blk);
n->bar.cc = 0;
+ n->outstanding_aers = 0;
}
static int nvme_start_ctrl(NvmeCtrl *n)
@@ -906,6 +1325,9 @@ static int nvme_start_ctrl(NvmeCtrl *n)
nvme_init_sq(&n->admin_sq, n, n->bar.asq, 0, 0,
NVME_AQA_ASQS(n->bar.aqa) + 1);
+ n->aer_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_process_aers, n);
+ QSIMPLEQ_INIT(&n->aer_queue);
+
return 0;
}
@@ -1098,6 +1520,13 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr,
int val)
"completion queue doorbell write"
" for nonexistent queue,"
" sqid=%"PRIu32", ignoring", qid);
+
+ if (n->outstanding_aers) {
+ nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
+ NVME_AER_INFO_ERR_INVALID_DB_REGISTER,
+ NVME_LOG_ERROR_INFO);
+ }
+
return;
}
@@ -1108,6 +1537,12 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr,
int val)
" beyond queue size, sqid=%"PRIu32","
" new_head=%"PRIu16", ignoring",
qid, new_head);
+
+ if (n->outstanding_aers) {
+ nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
+ NVME_AER_INFO_ERR_INVALID_DB_VALUE, NVME_LOG_ERROR_INFO);
+ }
+
return;
}
@@ -1136,6 +1571,13 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr,
int val)
"submission queue doorbell write"
" for nonexistent queue,"
" sqid=%"PRIu32", ignoring", qid);
+
+ if (n->outstanding_aers) {
+ nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
+ NVME_AER_INFO_ERR_INVALID_DB_REGISTER,
+ NVME_LOG_ERROR_INFO);
+ }
+
return;
}
@@ -1146,6 +1588,12 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr,
int val)
" beyond queue size, sqid=%"PRIu32","
" new_tail=%"PRIu16", ignoring",
qid, new_tail);
+
+ if (n->outstanding_aers) {
+ nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
+ NVME_AER_INFO_ERR_INVALID_DB_VALUE, NVME_LOG_ERROR_INFO);
+ }
+
return;
}
@@ -1198,134 +1646,271 @@ static const MemoryRegionOps nvme_cmb_ops = {
},
};
-static void nvme_realize(PCIDevice *pci_dev, Error **errp)
+static int nvme_check_constraints(NvmeCtrl *n, Error **errp)
{
- NvmeCtrl *n = NVME(pci_dev);
- NvmeIdCtrl *id = &n->id_ctrl;
-
- int i;
- int64_t bs_size;
- uint8_t *pci_conf;
-
- if (!n->params.num_queues) {
- error_setg(errp, "num_queues can't be zero");
- return;
- }
+ NvmeParams *params = &n->params;
if (!n->conf.blk) {
- error_setg(errp, "drive property not set");
- return;
+ error_setg(errp, "nvme: block backend not configured");
+ return 1;
}
- bs_size = blk_getlength(n->conf.blk);
- if (bs_size < 0) {
- error_setg(errp, "could not get backing file size");
- return;
+ if (!params->serial) {
+ error_setg(errp, "nvme: serial not configured");
+ return 1;
}
- if (!n->params.serial) {
- error_setg(errp, "serial property not set");
- return;
+ if ((params->num_queues < 1 || params->num_queues > NVME_MAX_QS)) {
+ error_setg(errp, "nvme: invalid queue configuration");
+ return 1;
}
+
+ return 0;
+}
+
+static int nvme_init_blk(NvmeCtrl *n, Error **errp)
+{
blkconf_blocksizes(&n->conf);
if (!blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk),
- false, errp)) {
- return;
+ false, errp)) {
+ return 1;
}
- pci_conf = pci_dev->config;
- pci_conf[PCI_INTERRUPT_PIN] = 1;
- pci_config_set_prog_interface(pci_dev->config, 0x2);
- pci_config_set_class(pci_dev->config, PCI_CLASS_STORAGE_EXPRESS);
- pcie_endpoint_cap_init(pci_dev, 0x80);
+ return 0;
+}
+
+static int nvme_init_state(NvmeCtrl *n, Error **errp)
+{
+ int64_t bs_size;
+ Error *local_err = NULL;
+
+ if (!n->params.serial) {
+ error_setg(errp, "serial property not set");
+ return 1;
+ }
+
+ if (nvme_check_constraints(n, &local_err)) {
+ error_propagate_prepend(errp, local_err,
+ "nvme_check_constraints failed");
+ return 1;
+ }
- n->num_namespaces = 1;
n->reg_size = pow2ceil(0x1004 + 2 * (n->params.num_queues + 1) * 4);
- n->ns_size = bs_size / (uint64_t)n->num_namespaces;
- n->namespaces = g_new0(NvmeNamespace, n->num_namespaces);
+ bs_size = blk_getlength(n->conf.blk);
+ if (bs_size < 0) {
+ error_setg(errp, "could not get backing file size");
+ return 1;
+ }
+
+ n->ns_size = bs_size / (uint64_t) n->params.num_ns;
+
+ n->starttime_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
n->sq = g_new0(NvmeSQueue *, n->params.num_queues);
n->cq = g_new0(NvmeCQueue *, n->params.num_queues);
+ n->elpes = g_new0(NvmeErrorLog, NVME_ELPE + 1);
+ n->aer_reqs = g_new0(NvmeRequest *, NVME_AERL + 1);
+ n->features.int_vector_config = g_malloc0_n(n->params.num_queues,
+ sizeof(*n->features.int_vector_config));
- memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n,
- "nvme", n->reg_size);
+ return 0;
+}
+
+static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev)
+{
+ NVME_CMBLOC_SET_BIR(n->bar.cmbloc, 2);
+ NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0);
+
+ NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1);
+ NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0);
+ NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0);
+ NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1);
+ NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1);
+ NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2);
+ NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->params.cmb_size_mb);
+
+ n->cmbloc = n->bar.cmbloc;
+ n->cmbsz = n->bar.cmbsz;
+
+ n->cmbuf = g_malloc0(NVME_CMBSZ_GETSIZE(n->bar.cmbsz));
+ memory_region_init_io(&n->ctrl_mem, OBJECT(n), &nvme_cmb_ops, n,
+ "nvme-cmb", NVME_CMBSZ_GETSIZE(n->bar.cmbsz));
+ pci_register_bar(pci_dev, NVME_CMBLOC_BIR(n->bar.cmbloc),
+ PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 |
+ PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem);
+}
+
+static void nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev)
+{
+ uint8_t *pci_conf = pci_dev->config;
+
+ pci_conf[PCI_INTERRUPT_PIN] = 1;
+ pci_config_set_prog_interface(pci_conf, 0x2);
+ pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL);
+ pci_config_set_device_id(pci_conf, 0x5845);
+ pci_config_set_class(pci_conf, PCI_CLASS_STORAGE_EXPRESS);
+ pcie_endpoint_cap_init(pci_dev, 0x80);
+
+ memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme",
+ n->reg_size);
pci_register_bar(pci_dev, 0,
PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64,
&n->iomem);
msix_init_exclusive_bar(pci_dev, n->params.num_queues, 4, NULL);
+ if (n->params.cmb_size_mb) {
+ nvme_init_cmb(n, pci_dev);
+ }
+}
+
+static void nvme_init_ctrl(NvmeCtrl *n)
+{
+ NvmeIdCtrl *id = &n->id_ctrl;
+ NvmeParams *params = &n->params;
+ uint8_t *pci_conf = n->parent_obj.config;
+
id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID));
id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID));
strpadcpy((char *)id->mn, sizeof(id->mn), "QEMU NVMe Ctrl", ' ');
strpadcpy((char *)id->fr, sizeof(id->fr), "1.0", ' ');
- strpadcpy((char *)id->sn, sizeof(id->sn), n->params.serial, ' ');
+ strpadcpy((char *)id->sn, sizeof(id->sn), params->serial, ' ');
id->rab = 6;
id->ieee[0] = 0x00;
id->ieee[1] = 0x02;
id->ieee[2] = 0xb3;
+ id->cmic = 0;
+ id->ver = cpu_to_le32(0x00010300);
id->oacs = cpu_to_le16(0);
- id->frmw = 7 << 1;
- id->lpa = 1 << 0;
+ id->acl = 3;
+ id->aerl = NVME_AERL;
+ id->frmw = 7 << 1 | 1;
+ id->lpa = 1 << 2;
+ id->elpe = NVME_ELPE;
+ id->npss = 0;
id->sqes = (0x6 << 4) | 0x6;
id->cqes = (0x4 << 4) | 0x4;
- id->nn = cpu_to_le32(n->num_namespaces);
+ id->nn = cpu_to_le32(params->num_ns);
id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROS);
+ id->fuses = cpu_to_le16(0);
+ id->fna = 0;
+ if (blk_enable_write_cache(n->conf.blk)) {
+ id->vwc = 1;
+ }
+ id->awun = cpu_to_le16(0);
+ id->awupf = cpu_to_le16(0);
+ id->sgls = cpu_to_le32(0);
+
+ strcpy((char *) id->subnqn, "nqn.2014-08.org.nvmexpress:uuid:");
+ qemu_uuid_unparse(&qemu_uuid,
+ (char *) id->subnqn + strlen((char *) id->subnqn));
+
id->psd[0].mp = cpu_to_le16(0x9c4);
id->psd[0].enlat = cpu_to_le32(0x10);
id->psd[0].exlat = cpu_to_le32(0x4);
- if (blk_enable_write_cache(n->conf.blk)) {
- id->vwc = 1;
+
+ n->temperature = NVME_TEMPERATURE;
+ n->features.temp_thresh = 0x14d;
+
+ for (int i = 0; i < n->params.num_queues; i++) {
+ n->features.int_vector_config[i] = i;
}
n->bar.cap = 0;
NVME_CAP_SET_MQES(n->bar.cap, 0x7ff);
NVME_CAP_SET_CQR(n->bar.cap, 1);
- NVME_CAP_SET_AMS(n->bar.cap, 1);
NVME_CAP_SET_TO(n->bar.cap, 0xf);
NVME_CAP_SET_CSS(n->bar.cap, 1);
NVME_CAP_SET_MPSMAX(n->bar.cap, 4);
- n->bar.vs = 0x00010200;
+ n->bar.vs = 0x00010300;
n->bar.intmc = n->bar.intms = 0;
+}
- if (n->params.cmb_size_mb) {
+static uint64_t nvme_ns_calc_blks(NvmeCtrl *n, NvmeNamespace *ns)
+{
+ return n->ns_size / nvme_ns_lbads_bytes(ns);
+}
- NVME_CMBLOC_SET_BIR(n->bar.cmbloc, 2);
- NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0);
+static void nvme_ns_init_identify(NvmeCtrl *n, NvmeIdNs *id_ns)
+{
+ id_ns->nlbaf = 0;
+ id_ns->flbas = 0;
+ id_ns->mc = 0;
+ id_ns->dpc = 0;
+ id_ns->dps = 0;
+ id_ns->lbaf[0].lbads = BDRV_SECTOR_BITS;
+}
- NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1);
- NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0);
- NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0);
- NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1);
- NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1);
- NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); /* MBs */
- NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->params.cmb_size_mb);
+static int nvme_init_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
+{
+ NvmeIdNs *id_ns = &ns->id_ns;
- n->cmbloc = n->bar.cmbloc;
- n->cmbsz = n->bar.cmbsz;
+ nvme_ns_init_identify(n, id_ns);
- n->cmbuf = g_malloc0(NVME_CMBSZ_GETSIZE(n->bar.cmbsz));
- memory_region_init_io(&n->ctrl_mem, OBJECT(n), &nvme_cmb_ops, n,
- "nvme-cmb", NVME_CMBSZ_GETSIZE(n->bar.cmbsz));
- pci_register_bar(pci_dev, NVME_CMBLOC_BIR(n->bar.cmbloc),
- PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 |
- PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem);
+ ns->ns_blks = nvme_ns_calc_blks(n, ns);
+ id_ns->nuse = id_ns->ncap = id_ns->nsze = cpu_to_le64(ns->ns_blks);
+ return 0;
+}
+
+static int nvme_init_namespaces(NvmeCtrl *n, Error **errp)
+{
+ int64_t bs_size;
+ Error *local_err = NULL;
+ NvmeNamespace *ns;
+
+ n->namespaces = g_new0(NvmeNamespace, n->params.num_ns);
+
+ bs_size = blk_getlength(n->conf.blk);
+ if (bs_size < 0) {
+ error_setg_errno(errp, -bs_size, "blk_getlength");
+ return 1;
+ }
+
+ n->ns_size = bs_size / (uint64_t) n->params.num_ns;
+
+ for (int i = 0; i < n->params.num_ns; i++) {
+ ns = &n->namespaces[i];
+ ns->id = i + 1;
+ ns->blk_offset = i * n->ns_size;
+
+ if (nvme_init_namespace(n, ns, &local_err)) {
+ error_propagate_prepend(errp, local_err,
+ "nvme_init_namespace: ");
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static void nvme_realize(PCIDevice *pci_dev, Error **errp)
+{
+ NvmeCtrl *n = NVME(pci_dev);
+ Error *local_err = NULL;
+
+ if (nvme_check_constraints(n, &local_err)) {
+ error_propagate_prepend(errp, local_err, "nvme_check_constraints: ");
+ return;
+ }
+
+ if (nvme_init_blk(n, &local_err)) {
+ error_propagate_prepend(errp, local_err, "nvme_init_blk: ");
+ return;
+ }
+
+ if (nvme_init_state(n, &local_err)) {
+ error_propagate_prepend(errp, local_err, "nvme_init_state: ");
+ return;
}
- for (i = 0; i < n->num_namespaces; i++) {
- NvmeNamespace *ns = &n->namespaces[i];
- NvmeIdNs *id_ns = &ns->id_ns;
- id_ns->nsfeat = 0;
- id_ns->nlbaf = 0;
- id_ns->flbas = 0;
- id_ns->mc = 0;
- id_ns->dpc = 0;
- id_ns->dps = 0;
- id_ns->lbaf[0].ds = BDRV_SECTOR_BITS;
- id_ns->ncap = id_ns->nuse = id_ns->nsze =
- cpu_to_le64(n->ns_size >>
- id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas)].ds);
+ nvme_init_pci(n, pci_dev);
+ nvme_init_ctrl(n);
+
+ if (nvme_init_namespaces(n, &local_err)) {
+ error_propagate_prepend(errp, local_err,
+ "nvme_init_namespaces: ");
+ return;
}
}
@@ -1337,6 +1922,9 @@ static void nvme_exit(PCIDevice *pci_dev)
g_free(n->namespaces);
g_free(n->cq);
g_free(n->sq);
+ g_free(n->elpes);
+ g_free(n->aer_reqs);
+ g_free(n->features.int_vector_config);
if (n->params.cmb_size_mb) {
g_free(n->cmbuf);
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 8866373058f6..8925a05445da 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -6,11 +6,13 @@
#define DEFINE_NVME_PROPERTIES(_state, _props) \
DEFINE_PROP_STRING("serial", _state, _props.serial), \
DEFINE_PROP_UINT32("cmb_size_mb", _state, _props.cmb_size_mb, 0), \
- DEFINE_PROP_UINT32("num_queues", _state, _props.num_queues, 64)
+ DEFINE_PROP_UINT32("num_queues", _state, _props.num_queues, 64), \
+ DEFINE_PROP_UINT32("num_ns", _state, _props.num_ns, 1)
typedef struct NvmeParams {
char *serial;
uint32_t num_queues;
+ uint32_t num_ns;
uint32_t cmb_size_mb;
} NvmeParams;
@@ -63,6 +65,9 @@ typedef struct NvmeCQueue {
typedef struct NvmeNamespace {
NvmeIdNs id_ns;
+ uint32_t id;
+ uint64_t ns_blks;
+ uint64_t blk_offset;
} NvmeNamespace;
#define TYPE_NVME "nvme"
@@ -77,26 +82,48 @@ typedef struct NvmeCtrl {
BlockConf conf;
NvmeParams params;
+ uint64_t starttime_ms;
+ uint16_t temperature;
uint32_t page_size;
uint16_t page_bits;
uint16_t max_prp_ents;
uint16_t cqe_size;
uint16_t sqe_size;
uint32_t reg_size;
- uint32_t num_namespaces;
uint32_t max_q_ents;
uint64_t ns_size;
+ uint8_t outstanding_aers;
uint32_t cmbsz;
uint32_t cmbloc;
uint8_t *cmbuf;
uint64_t irq_status;
+ uint32_t qs_created;
+ QSIMPLEQ_HEAD(, NvmeAsyncEvent) aer_queue;
+ QEMUTimer *aer_timer;
+ uint8_t aer_mask;
+ uint8_t aer_mask_queued;
+
+ NvmeErrorLog *elpes;
+ NvmeRequest **aer_reqs;
NvmeNamespace *namespaces;
NvmeSQueue **sq;
NvmeCQueue **cq;
NvmeSQueue admin_sq;
NvmeCQueue admin_cq;
+ NvmeFeatureVal features;
NvmeIdCtrl id_ctrl;
} NvmeCtrl;
+static inline uint8_t nvme_ns_lbads(NvmeNamespace *ns)
+{
+ NvmeIdNs *id = &ns->id_ns;
+ return id->lbaf[NVME_ID_NS_FLBAS_INDEX(id->flbas)].lbads;
+}
+
+static inline size_t nvme_ns_lbads_bytes(NvmeNamespace *ns)
+{
+ return 1 << nvme_ns_lbads(ns);
+}
+
#endif /* HW_NVME_H */
diff --git a/hw/block/trace-events b/hw/block/trace-events
index b92039a5739f..abec518167d0 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -42,10 +42,22 @@ nvme_del_sq(uint16_t qid) "deleting submission queue
sqid=%"PRIu16""
nvme_del_cq(uint16_t cqid) "deleted completion queue, sqid=%"PRIu16""
nvme_identify_ctrl(void) "identify controller"
nvme_identify_ns(uint16_t ns) "identify namespace, nsid=%"PRIu16""
-nvme_identify_nslist(uint16_t ns) "identify namespace list, nsid=%"PRIu16""
+nvme_identify_ns_list(uint16_t ns) "identify namespace list, nsid=%"PRIu16""
+nvme_identify_ns_descriptor_list(uint16_t ns) "identify namespace descriptor
list, nsid=%"PRIu16""
+nvme_getfeat(uint32_t fid) "fid 0x%"PRIx32""
+nvme_setfeat(uint32_t fid, uint32_t val) "fid 0x%"PRIx32" val 0x%"PRIx32""
nvme_getfeat_vwcache(const char* result) "get feature volatile write cache,
result=%s"
nvme_getfeat_numq(int result) "get feature number of queues, result=%d"
nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested
cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d"
+nvme_get_log(uint16_t cid, uint16_t lid) "cid %"PRIu16" lid 0x%"PRIx16""
+nvme_process_aers(void) "processing aers"
+nvme_aer(uint16_t cid) "cid %"PRIu16""
+nvme_aer_aerl_exceeded(void) "aerl exceeded"
+nvme_aer_masked(uint8_t type, uint8_t mask) "type 0x%"PRIx8" mask 0x%"PRIx8""
+nvme_aer_post_cqe(uint8_t typ, uint8_t info, uint8_t log_page) "type
0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8""
+nvme_enqueue_req_completion(uint16_t cid, uint16_t cqid) "cid %"PRIu16" cqid
%"PRIu16""
+nvme_enqueue_event(uint8_t typ, uint8_t info, uint8_t log_page) "type
0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8""
+nvme_no_outstanding_aers(void) "ignoring event; no outstanding AERs"
nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt
mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64""
nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt
mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64""
nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64""
@@ -60,6 +72,7 @@ nvme_mmio_shutdown_set(void) "shutdown bit set"
nvme_mmio_shutdown_cleared(void) "shutdown bit cleared"
# nvme traces for error conditions
+nvme_err(uint16_t cid, const char *s, uint16_t status) "cid %"PRIu16" \"%s\"
status 0x%"PRIx16""
nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size"
nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null or not
page aligned: 0x%"PRIx64""
nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned:
0x%"PRIx64""
@@ -85,6 +98,7 @@ nvme_err_invalid_create_cq_qflags(uint16_t qflags) "failed
creating completion q
nvme_err_invalid_identify_cns(uint16_t cns) "identify, invalid cns=0x%"PRIx16""
nvme_err_invalid_getfeat(int dw10) "invalid get features, dw10=0x%"PRIx32""
nvme_err_invalid_setfeat(uint32_t dw10) "invalid set features,
dw10=0x%"PRIx32""
+nvme_err_invalid_log_page(uint16_t cid, uint16_t lid) "cid %"PRIu16" lid
0x%"PRIx16""
nvme_err_startfail_cq(void) "nvme_start_ctrl failed because there are
non-admin completion queues"
nvme_err_startfail_sq(void) "nvme_start_ctrl failed because there are
non-admin submission queues"
nvme_err_startfail_nbarasq(void) "nvme_start_ctrl failed because the admin
submission queue address is null"
diff --git a/include/block/nvme.h b/include/block/nvme.h
index 849a6f3fa346..5a169e7ed7ac 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -386,8 +386,8 @@ enum NvmeAsyncEventRequest {
NVME_AER_TYPE_SMART = 1,
NVME_AER_TYPE_IO_SPECIFIC = 6,
NVME_AER_TYPE_VENDOR_SPECIFIC = 7,
- NVME_AER_INFO_ERR_INVALID_SQ = 0,
- NVME_AER_INFO_ERR_INVALID_DB = 1,
+ NVME_AER_INFO_ERR_INVALID_DB_REGISTER = 0,
+ NVME_AER_INFO_ERR_INVALID_DB_VALUE = 1,
NVME_AER_INFO_ERR_DIAG_FAIL = 2,
NVME_AER_INFO_ERR_PERS_INTERNAL_ERR = 3,
NVME_AER_INFO_ERR_TRANS_INTERNAL_ERR = 4,
@@ -445,7 +445,8 @@ enum NvmeStatusCodes {
NVME_FW_REQ_RESET = 0x010b,
NVME_INVALID_QUEUE_DEL = 0x010c,
NVME_FID_NOT_SAVEABLE = 0x010d,
- NVME_FID_NOT_NSID_SPEC = 0x010f,
+ NVME_FEAT_NOT_CHANGABLE = 0x010e,
+ NVME_FEAT_NOT_NSID_SPEC = 0x010f,
NVME_FW_REQ_SUSYSTEM_RESET = 0x0110,
NVME_CONFLICTING_ATTRS = 0x0180,
NVME_INVALID_PROT_INFO = 0x0181,
@@ -462,6 +463,13 @@ enum NvmeStatusCodes {
NVME_NO_COMPLETE = 0xffff,
};
+typedef struct NvmeIdentifyNamespaceDescriptor {
+ uint8_t nidt;
+ uint8_t nidl;
+ uint8_t rsvd[2];
+ uint8_t nid[];
+} NvmeIdentifyNamespaceDescriptor;
+
typedef struct NvmeFwSlotInfoLog {
uint8_t afi;
uint8_t reserved1[7];
@@ -543,7 +551,15 @@ typedef struct NvmeIdCtrl {
uint8_t ieee[3];
uint8_t cmic;
uint8_t mdts;
- uint8_t rsvd255[178];
+ uint16_t cntlid;
+ uint32_t ver;
+ uint16_t rtd3r;
+ uint32_t rtd3e;
+ uint32_t oaes;
+ uint32_t ctratt;
+ uint8_t rsvd111[12];
+ uint8_t fguid[16];
+ uint8_t rsvd255[128];
uint16_t oacs;
uint8_t acl;
uint8_t aerl;
@@ -551,10 +567,28 @@ typedef struct NvmeIdCtrl {
uint8_t lpa;
uint8_t elpe;
uint8_t npss;
- uint8_t rsvd511[248];
+ uint8_t avscc;
+ uint8_t apsta;
+ uint16_t wctemp;
+ uint16_t cctemp;
+ uint16_t mtfa;
+ uint32_t hmpre;
+ uint32_t hmmin;
+ uint8_t tnvmcap[16];
+ uint8_t unvmcap[16];
+ uint32_t rpmbs;
+ uint16_t edstt;
+ uint8_t dsto;
+ uint8_t fwug;
+ uint16_t kas;
+ uint16_t hctma;
+ uint16_t mntmt;
+ uint16_t mxtmt;
+ uint32_t sanicap;
+ uint8_t rsvd511[180];
uint8_t sqes;
uint8_t cqes;
- uint16_t rsvd515;
+ uint16_t maxcmd;
uint32_t nn;
uint16_t oncs;
uint16_t fuses;
@@ -562,8 +596,14 @@ typedef struct NvmeIdCtrl {
uint8_t vwc;
uint16_t awun;
uint16_t awupf;
- uint8_t rsvd703[174];
- uint8_t rsvd2047[1344];
+ uint8_t nvscc;
+ uint8_t rsvd531;
+ uint16_t acwu;
+ uint16_t rsvd535;
+ uint32_t sgls;
+ uint8_t rsvd767[228];
+ uint8_t subnqn[256];
+ uint8_t rsvd2047[1024];
NvmePSD psd[32];
uint8_t vs[1024];
} NvmeIdCtrl;
@@ -637,7 +677,7 @@ typedef struct NvmeRangeType {
typedef struct NvmeLBAF {
uint16_t ms;
- uint8_t ds;
+ uint8_t lbads;
uint8_t rp;
} NvmeLBAF;
--
2.21.0
- [Qemu-block] [PATCH 0/8] nvme: v1.3, sgls, metadata and new 'ocssd' device, Klaus Birkelund Jensen, 2019/05/17
- [Qemu-block] [PATCH 1/8] nvme: move device parameters to separate struct, Klaus Birkelund Jensen, 2019/05/17
- [Qemu-block] [PATCH 5/8] nvme: add support for metadata, Klaus Birkelund Jensen, 2019/05/17
- [Qemu-block] [PATCH 7/8] nvme: keep a copy of the NVMe command in request, Klaus Birkelund Jensen, 2019/05/17
- [Qemu-block] [PATCH 4/8] nvme: allow multiple i/o's per request, Klaus Birkelund Jensen, 2019/05/17
- [Qemu-block] [PATCH 3/8] nvme: simplify PRP mappings, Klaus Birkelund Jensen, 2019/05/17
- [Qemu-block] [PATCH 6/8] nvme: add support for scatter gather lists, Klaus Birkelund Jensen, 2019/05/17
- [Qemu-block] [PATCH 2/8] nvme: bump supported spec to 1.3,
Klaus Birkelund Jensen <=
- [Qemu-block] [PATCH 8/8] nvme: add an OpenChannel 2.0 NVMe device (ocssd), Klaus Birkelund Jensen, 2019/05/17
- Re: [Qemu-block] [PATCH 0/8] nvme: v1.3, sgls, metadata and new 'ocssd' device, Kevin Wolf, 2019/05/20