[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-block] [PATCH v4 3/9] block: Add VFIO based NVMe driver
From: |
Stefan Hajnoczi |
Subject: |
Re: [Qemu-block] [PATCH v4 3/9] block: Add VFIO based NVMe driver |
Date: |
Wed, 10 Jan 2018 18:33:05 +0000 |
User-agent: |
Mutt/1.9.1 (2017-09-22) |
On Wed, Jan 10, 2018 at 05:18:40PM +0800, Fam Zheng wrote:
There are several memory and lock leaks in this patch. Please work with
Paolo to get the __attribute__((cleanup(...))) patch series merged so
this class of bugs can be eliminated:
https://lists.gnu.org/archive/html/qemu-devel/2017-12/msg01648.html
> +typedef struct {
> + CoQueue free_req_queue;
> + QemuMutex lock;
> +
> + /* Fields protected by BQL */
> + int index;
> + uint8_t *prp_list_pages;
> +
> + /* Fields protected by @lock */
Does this lock serve any purpose? I didn't see a place where these
fields is accessed from multiple threads. Perhaps you're trying to
prepare for multiqueue, but then other things like the
BDRVNVMeState->inflight counter aren't protected so it doesn't make
sense.
> +static NVMeRequest *nvme_get_free_req(NVMeQueuePair *q)
> +{
> + int i;
> + NVMeRequest *req = NULL;
> +
> + qemu_mutex_lock(&q->lock);
> + while (q->inflight + q->need_kick > NVME_QUEUE_SIZE - 2) {
> + /* We have to leave one slot empty as that is the full queue case
> (head
> + * == tail + 1). */
> + if (qemu_in_coroutine()) {
> + trace_nvme_free_req_queue_wait(q);
> + qemu_mutex_unlock(&q->lock);
> + qemu_co_queue_wait(&q->free_req_queue, NULL);
> + qemu_mutex_lock(&q->lock);
> + } else {
> + return NULL;
q->lock is held.
> +static void nvme_identify(BlockDriverState *bs, int namespace, Error **errp)
> +{
> + BDRVNVMeState *s = bs->opaque;
> + NvmeIdCtrl *idctrl;
> + NvmeIdNs *idns;
> + uint8_t *resp;
> + int r;
> + uint64_t iova;
> + NvmeCmd cmd = {
> + .opcode = NVME_ADM_CMD_IDENTIFY,
> + .cdw10 = cpu_to_le32(0x1),
> + };
> +
> + resp = qemu_try_blockalign0(bs, sizeof(NvmeIdCtrl));
> + if (!resp) {
> + error_setg(errp, "Cannot allocate buffer for identify response");
> + goto out;
> + }
> + idctrl = (NvmeIdCtrl *)resp;
> + idns = (NvmeIdNs *)resp;
> + r = qemu_vfio_dma_map(s->vfio, resp, sizeof(NvmeIdCtrl), true, &iova);
> + if (r) {
> + error_setg(errp, "Cannot map buffer for DMA");
> + goto out;
> + }
> + cmd.prp1 = cpu_to_le64(iova);
> +
> + if (nvme_cmd_sync(bs, s->queues[0], &cmd)) {
> + error_setg(errp, "Failed to identify controller");
> + goto out;
> + }
> +
> + if (le32_to_cpu(idctrl->nn) < namespace) {
> + error_setg(errp, "Invalid namespace");
> + goto out;
> + }
> + s->write_cache = le32_to_cpu(idctrl->vwc) & 0x1;
> + s->max_transfer = (idctrl->mdts ? 1 << idctrl->mdts : 0) * s->page_size;
> + /* For now the page list buffer per command is one page, to hold at most
> + * s->page_size / sizeof(uint64_t) entries. */
> + s->max_transfer = MIN_NON_ZERO(s->max_transfer,
> + s->page_size / sizeof(uint64_t) * s->page_size);
> +
> + memset(resp, 0, 4096);
> +
> + cmd.cdw10 = 0;
> + cmd.nsid = namespace;
Missing cpu_to_le32().
> +static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
> + Error **errp)
> +{
> + BDRVNVMeState *s = bs->opaque;
> + int ret;
> + uint64_t cap;
> + uint64_t timeout_ms;
> + uint64_t deadline, now;
> + Error *local_err = NULL;
> +
> + qemu_co_mutex_init(&s->dma_map_lock);
> + qemu_co_queue_init(&s->dma_flush_queue);
> + s->nsid = namespace;
> + s->aio_context = qemu_get_current_aio_context();
Why not bdrv_get_aio_context(bs)?
> + ret = event_notifier_init(&s->irq_notifier, 0);
> + if (ret) {
> + error_setg(errp, "Failed to init event notifier");
> + return ret;
dma_map_lock should be destroyed.
> + }
> +
> + s->vfio = qemu_vfio_open_pci(device, errp);
> + if (!s->vfio) {
> + ret = -EINVAL;
> + goto fail;
> + }
> +
> + s->regs = qemu_vfio_pci_map_bar(s->vfio, 0, errp);
> + if (!s->regs) {
> + ret = -EINVAL;
> + goto fail;
> + }
> +
> + /* Perform initialize sequence as described in NVMe spec "7.6.1
> + * Initialization". */
> +
> + cap = le64_to_cpu(s->regs->cap);
> + if (!(cap & (1ULL << 37))) {
> + error_setg(errp, "Device doesn't support NVMe command set");
> + ret = -EINVAL;
> + goto fail;
> + }
> +
> + s->page_size = MAX(4096, 1 << (12 + ((cap >> 48) & 0xF)));
> + s->doorbell_scale = (4 << (((cap >> 32) & 0xF))) / sizeof(uint32_t);
> + bs->bl.opt_mem_alignment = s->page_size;
> + timeout_ms = MIN(500 * ((cap >> 24) & 0xFF), 30000);
> +
> + /* Reset device to get a clean state. */
> + s->regs->cc = cpu_to_le32(le32_to_cpu(s->regs->cc) & 0xFE);
> + /* Wait for CSTS.RDY = 0. */
> + deadline = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + timeout_ms *
> 1000000ULL;
> + while (le32_to_cpu(s->regs->csts) & 0x1) {
> + if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) {
> + error_setg(errp, "Timeout while waiting for device to reset (%ld
> ms)",
> + timeout_ms);
> + ret = -ETIMEDOUT;
> + goto fail;
> + }
> + }
> +
> + /* Set up admin queue. */
> + s->queues = g_new(NVMeQueuePair *, 1);
> + s->nr_queues = 1;
> + s->queues[0] = nvme_create_queue_pair(bs, 0, NVME_QUEUE_SIZE, errp);
> + if (!s->queues[0]) {
> + ret = -EINVAL;
> + goto fail;
> + }
> + QEMU_BUILD_BUG_ON(NVME_QUEUE_SIZE & 0xF000);
> + s->regs->aqa = cpu_to_le32((NVME_QUEUE_SIZE << 16) | NVME_QUEUE_SIZE);
> + s->regs->asq = cpu_to_le64(s->queues[0]->sq.iova);
> + s->regs->acq = cpu_to_le64(s->queues[0]->cq.iova);
> +
> + /* After setting up all control registers we can enable device now. */
> + s->regs->cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << 20) |
> + (ctz32(NVME_SQ_ENTRY_BYTES) << 16) |
> + 0x1);
> + /* Wait for CSTS.RDY = 1. */
> + now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
> + deadline = now + timeout_ms * 1000000;
> + while (!(le32_to_cpu(s->regs->csts) & 0x1)) {
> + if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) {
> + error_setg(errp, "Timeout while waiting for device to start (%ld
> ms)",
> + timeout_ms);
> + ret = -ETIMEDOUT;
> + goto fail_queue;
> + }
> + }
> +
> + ret = qemu_vfio_pci_init_irq(s->vfio, &s->irq_notifier,
> + VFIO_PCI_MSIX_IRQ_INDEX, errp);
> + if (ret) {
> + goto fail_queue;
> + }
> + aio_set_event_notifier(bdrv_get_aio_context(bs), &s->irq_notifier,
> + false, nvme_handle_event, nvme_poll_cb);
> +
> + nvme_identify(bs, namespace, errp);
> + if (local_err) {
> + error_propagate(errp, local_err);
> + ret = -EIO;
> + goto fail_handler;
> + }
> +
> + /* Set up command queues. */
> + if (!nvme_add_io_queue(bs, errp)) {
> + ret = -EIO;
> + goto fail_handler;
> + }
> + return 0;
> +
> +fail_handler:
> + aio_set_event_notifier(bdrv_get_aio_context(bs), &s->irq_notifier,
> + false, NULL, NULL);
> +fail_queue:
> + nvme_free_queue_pair(bs, s->queues[0]);
> +fail:
s->queues is not freed.
> + qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)s->regs);
> + qemu_vfio_close(s->vfio);
> + event_notifier_cleanup(&s->irq_notifier);
dma_map_lock should be destroyed.
> +static int nvme_file_open(BlockDriverState *bs, QDict *options, int flags,
> + Error **errp)
> +{
> + const char *device;
> + QemuOpts *opts;
> + int namespace;
> +
> + opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
> + qemu_opts_absorb_qdict(opts, options, &error_abort);
> + device = qemu_opt_get(opts, NVME_BLOCK_OPT_DEVICE);
> + if (!device) {
> + error_setg(errp, "'" NVME_BLOCK_OPT_DEVICE "' option is required");
> + return -EINVAL;
opts is leaked.
signature.asc
Description: PGP signature
- [Qemu-block] [PATCH v4 2/9] util: Introduce vfio helpers, (continued)
- [Qemu-block] [PATCH v4 2/9] util: Introduce vfio helpers, Fam Zheng, 2018/01/10
- [Qemu-block] [PATCH v4 3/9] block: Add VFIO based NVMe driver, Fam Zheng, 2018/01/10
- [Qemu-block] [PATCH v4 4/9] block: Introduce buf register API, Fam Zheng, 2018/01/10
- [Qemu-block] [PATCH v4 5/9] block/nvme: Implement .bdrv_(un)register_buf, Fam Zheng, 2018/01/10
- [Qemu-block] [PATCH v4 6/9] qemu-img: Map bench buffer, Fam Zheng, 2018/01/10
- [Qemu-block] [PATCH v4 8/9] docs: Add section for NVMe VFIO driver, Fam Zheng, 2018/01/10