qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH 6/9] virtio net: introduce dataplane for virtio net


From: Liu Ping Fan
Subject: [Qemu-devel] [PATCH 6/9] virtio net: introduce dataplane for virtio net
Date: Thu, 21 Feb 2013 20:54:50 +0800

From: Liu Ping Fan <address@hidden>

This is a emulation to virtio-blk dataplane, which push the data
handling out of biglock. And it is a try to implement this process
in userspace, while vhost-net in kernel.

Signed-off-by: Liu Ping Fan <address@hidden>
---
 hw/dataplane/virtio-net.c |  422 +++++++++++++++++++++++++++++++++++++++++++++
 hw/dataplane/virtio-net.h |   26 +++
 hw/virtio-net.c           |   56 +-----
 hw/virtio-net.h           |   61 +++++++
 4 files changed, 517 insertions(+), 48 deletions(-)
 create mode 100644 hw/dataplane/virtio-net.c
 create mode 100644 hw/dataplane/virtio-net.h

diff --git a/hw/dataplane/virtio-net.c b/hw/dataplane/virtio-net.c
new file mode 100644
index 0000000..9a1795d
--- /dev/null
+++ b/hw/dataplane/virtio-net.c
@@ -0,0 +1,422 @@
+/* Copyright IBM, Corp. 2013
+ *
+ * Based on vhost-net and virtio-blk dataplane code
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+#include "hw/virtio.h"
+#include "qemu/iov.h"
+#include "vring.h"
+#include <linux/virtio_ring.h>
+#include "net/net.h"
+#include "net/checksum.h"
+#include "net/tap.h"
+#include "virtio-net.h"
+#include "qemu/error-report.h"
+
+typedef struct VirtIONetDataPlane {
+    int async_tx_head;
+    Vring *rx_vring;
+    Vring *tx_vring;
+    EventHandler *rx_handler;
+    EventHandler *tx_handler;
+    bool stop;
+} VirtIONetDataPlane;
+
+WorkThread virt_net_thread;
+
+#define VRING_MAX 128
+
+static int32_t virtnet_tx(VirtIONet *n, VirtQueue *vq);
+
+static void virtnet_tx_complete(struct NetClientState *nc, ssize_t sz)
+{
+    int ret;
+    VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
+
+    vring_push(n->dp->tx_vring, n->dp->async_tx_head, 0);
+    ret = virtnet_tx(n, n->tx_vq);
+    if (ret != -EBUSY) {
+        vring_enable_notification(&n->vdev, n->dp->tx_vring);
+    }
+}
+
+static int virtnet_tx(VirtIONet *n, VirtQueue *vq)
+{
+    struct iovec out_iov[VRING_MAX], sg[VRING_MAX];
+    struct iovec *snd, *end = &out_iov[VRING_MAX];
+    int head;
+    unsigned int out_num, in_num, sg_num;
+    int ret;
+    int num_packets = 0;
+
+    if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
+        return num_packets;
+    }
+
+    assert(n->vdev.vm_running);
+
+    if (n->async_tx.elem.out_num) {
+        return num_packets;
+    }
+
+    while (true) {
+        head = vring_pop(&n->vdev, n->dp->tx_vring, out_iov, end, &out_num,
+                            &in_num);
+        if (head < 0) {
+            break;
+        }
+        snd = out_iov;
+        assert(n->host_hdr_len <= n->guest_hdr_len);
+        if (n->host_hdr_len != n->guest_hdr_len) {
+            sg_num = iov_copy(sg, ARRAY_SIZE(sg),
+                                       out_iov, out_num,
+                                       0, n->host_hdr_len);
+            sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
+                             out_iov, out_num,
+                             n->guest_hdr_len, -1);
+            out_num = sg_num;
+            snd = sg;
+        }
+
+        ret = qemu_sendv_packet_async(&n->nic->nc, snd, out_num,
+                    virtnet_tx_complete);
+        if (ret == 0) {
+            n->dp->async_tx_head = head;
+            return -EBUSY;
+        }
+        vring_push(n->dp->tx_vring, head, 0);
+        if (num_packets++ > n->tx_burst) {
+            break;
+        }
+    }
+
+    return num_packets;
+}
+
+static void virtnet_handle_tx(VirtIODevice *vdev, VirtQueue *vq)
+{
+    int32 ret;
+    VirtIONet *n = (VirtIONet *)vdev;
+
+    /* This happens when device was stopped but VCPU wasn't. */
+    if (!n->vdev.vm_running) {
+        return;
+    }
+    vring_disable_notification(vdev, n->dp->tx_vring);
+    ret = virtnet_tx(n, vq);
+    if (ret != -EBUSY) {
+        vring_enable_notification(vdev, n->dp->tx_vring);
+    }
+}
+
+
+static int virtio_net_can_receive(NetClientState *nc)
+{
+    VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
+    if (!n->vdev.vm_running) {
+        return 0;
+    }
+    if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
+        return 0;
+    }
+
+    return 1;
+}
+
+/* peek but not use */
+static int rx_mergeable_buf_sz(VirtIONet *n)
+{
+    uint16_t start, idx, head;
+    int total = 0;
+    Vring *vring = n->dp->rx_vring;
+    struct vring_desc *dsc;
+    struct vring_desc *base;
+
+    for (start = vring->last_avail_idx; start != vring->vr.avail->idx;
+            start++) {
+        head = start%vring->vr.num;
+        idx = vring->vr.avail->ring[head];
+        if (vring->vr.desc[idx].flags & VRING_DESC_F_INDIRECT) {
+            base = hostmem_lookup(&vring->hostmem, vring->vr.desc[idx].addr,
+                    vring->vr.desc[idx].len, 0);
+        } else {
+            base = vring->vr.desc;
+        }
+        dsc = base;
+        do {
+            total += dsc->len;
+            if (!(dsc->flags & VRING_DESC_F_NEXT)) {
+                break;
+            }
+            dsc = &base[dsc->next];
+        } while (true);
+    }
+    return total;
+}
+
+static bool virtnet_has_buffers(VirtIONet *n, int bufsize)
+{
+    if (!vring_more_avail(n->dp->rx_vring)) {
+        return false;
+    }
+    if (n->mergeable_rx_bufs) {
+        if (rx_mergeable_buf_sz(n) <  bufsize) {
+            return false;
+        }
+    }
+    return true;
+}
+
+static ssize_t virtnet_rx(NetClientState *nc, const uint8_t *buf, size_t size)
+{
+    VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
+    struct iovec in_vec[VIRTQUEUE_MAX_SIZE], head_iov[2], *end;
+    unsigned int in_num, out_num, vnet_hdr_sz;
+    int head;
+    size_t len, total, offset = 0;
+    uint16_t numbuff = 0;
+
+    total = offset = 0;
+
+    end = &in_vec[VIRTQUEUE_MAX_SIZE];
+    if (!virtio_net_receive_filter(n, buf, size)) {
+        return size;
+    }
+
+    /* enough buff ? */
+    if (!virtnet_has_buffers(n, size)) {
+        vring_enable_notification(&n->vdev, n->dp->rx_vring);
+        return 0;
+    }
+
+    while (size > offset) {
+        head = vring_pop(&n->vdev, n->dp->rx_vring, in_vec, end, &out_num,
+                &in_num);
+        if (head < 0) {
+            return 0;
+        }
+        len = 0;
+        if (numbuff == 0) {
+            virtio_net_receive_header(n, in_vec, in_num, buf, size);
+
+            if (n->mergeable_rx_bufs) {
+                vnet_hdr_sz = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+            } else {
+                vnet_hdr_sz = sizeof(struct virtio_net_hdr);
+            }
+            iov_copy(head_iov, 2, in_vec, in_num, 0, vnet_hdr_sz);
+            offset += n->host_hdr_len;
+            total += vnet_hdr_sz;
+            len += vnet_hdr_sz;
+        }
+        len += iov_from_buf(in_vec, in_num, vnet_hdr_sz, buf+offset,
+                        size-offset);
+        offset += len;
+        total += len;
+        numbuff++;
+        /* Guest wont see used->idx until we are ready */
+        vring_fill(n->dp->rx_vring, head, len);
+    }
+
+    if (n->mergeable_rx_bufs) {
+        iov_from_buf(head_iov, 2,
+            offsetof(struct virtio_net_hdr_mrg_rxbuf, num_buffers), &numbuff,
+                sizeof(numbuff));
+    }
+    vring_flush(n->dp->rx_vring);
+
+    if (vring_should_notify(&n->vdev, n->dp->rx_vring)) {
+        virtio_irq(n->rx_vq);
+    }
+
+    return size;
+}
+
+static void tx_cb(EventHandler *handler, uint32_t events)
+{
+    VirtIONet *n = handler->opaque;
+
+    event_notifier_test_and_clear(handler->notifier);
+    virtnet_handle_tx(&n->vdev, n->tx_vq);
+}
+
+/* rvq has buffer again, push tap to fill in */
+static void rx_cb(EventHandler *handler, uint32_t events)
+{
+    VirtIONet *n = handler->opaque;
+
+    event_notifier_test_and_clear(handler->notifier);
+    qemu_flush_queued_packets(&n->nic->nc);
+}
+
+static NetClientInfo net_dp_info = {
+    .type = NET_CLIENT_OPTIONS_KIND_NIC,
+    .size = sizeof(NICState),
+    .can_receive = virtio_net_can_receive,
+    .receive = virtnet_rx,
+    .cleanup = virtio_net_cleanup,
+    .link_status_changed = virtio_net_set_link_status,
+};
+
+void virtnet_dataplane_create(VirtIONet *n)
+{
+    EventHandler *tx_handler, *rx_handler;
+
+    n->dp = g_malloc(sizeof(VirtIONetDataPlane));
+    n->dp->stop = false;
+    n->dp->rx_vring = g_malloc(sizeof(Vring));
+    n->dp->tx_vring = g_malloc(sizeof(Vring));
+    rx_handler = n->dp->rx_handler = g_malloc(sizeof(EventHandler));
+    tx_handler = n->dp->tx_handler = g_malloc(sizeof(EventHandler));
+    tx_handler->opaque = n;
+    rx_handler->opaque = n;
+
+    /* safely redirect receive handler */
+    n->nic->nc.info = &net_dp_info;
+}
+
+static int virtnet_dataplane_disable_notifiers(VirtIONet *n)
+{
+    int i, r;
+    VirtIODevice *vdev = &n->vdev;
+
+    for (i = 0; i < 2; ++i) {
+        r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, false);
+        if (r < 0) {
+            fprintf(stderr, "virtnet dataplane %d notifier unbinding failed:
+                    %d\n", i, -r);
+        }
+    }
+    return r;
+}
+
+static int virtnet_dataplane_enable_notifiers(VirtIONet *n)
+{
+    int i, r;
+    VirtIODevice *vdev = &n->vdev;
+
+    if (!vdev->binding->set_host_notifier) {
+        fprintf(stderr, "binding does not support host notifiers\n");
+        r = -ENOSYS;
+        goto fail;
+    }
+    for (i = 0; i < 2; ++i) {
+        r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, true);
+        if (r < 0) {
+            fprintf(stderr, "virtnet dataplane %d notifier binding failed:
+                    %d\n", i, -r);
+            goto fail_vq;
+        }
+    }
+
+    return 0;
+fail_vq:
+    while (--i >= 0) {
+        r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, false);
+        if (r < 0) {
+            fprintf(stderr, "virtnet dataplane %d notifier cleanup error:
+                    %d\n", i, -r);
+            fflush(stderr);
+        }
+        assert(r >= 0);
+    }
+fail:
+    return r;
+}
+
+
+static void thread_cb(EventHandler *handler, uint32_t events)
+{
+    EventNotifier *e = handler->notifier;
+    event_notifier_test_and_clear(e);
+}
+
+static void *working_thread(void *data)
+{
+    WorkThread *t = (WorkThread *)data;
+
+    qemu_mutex_lock(&t->lock);
+    qemu_cond_signal(&t->cond_start);
+    qemu_mutex_unlock(&t->lock);
+    while (t->state == THREAD_START) {
+        event_poll(&t->polltbl);
+    }
+    return NULL;
+}
+
+static void init_work_thread(void)
+{
+    EventHandler *thread_handler = g_malloc(sizeof(EventHandler));
+    WorkThread *t = &virt_net_thread;
+
+    qemu_mutex_init(&t->lock);
+    qemu_cond_init(&t->cond_start);
+    event_poll_init(&t->polltbl, 4);
+    event_notifier_init(&t->e, 0);
+    event_poll_add(&t->polltbl, thread_handler, &t->e, thread_cb);
+    qemu_mutex_lock(&t->lock);
+    t->state = THREAD_START;
+    qemu_thread_create(&t->thread, working_thread, t, QEMU_THREAD_JOINABLE);
+    qemu_cond_wait(&t->cond_start, &t->lock);
+    qemu_mutex_unlock(&t->lock);
+}
+
+void virtnet_dataplane_start(VirtIONet *n)
+{
+    bool rslt;
+    EventNotifier *tx_e = virtio_queue_get_host_notifier(n->tx_vq);
+    EventNotifier *rx_e = virtio_queue_get_host_notifier(n->rx_vq);
+    WorkThread *t = &virt_net_thread;
+
+    virtnet_dataplane_enable_notifiers(n);
+    rslt = vring_setup(n->dp->rx_vring, &n->vdev, 0);
+    if (!rslt) {
+        error_report("fail to setup rx vring\n");
+        exit(1);
+    }
+    vring_restore(n->dp->rx_vring,
+                    virtio_queue_get_last_avail_idx(&n->vdev, 0));
+    rslt = vring_setup(n->dp->tx_vring, &n->vdev, 1);
+    if (!rslt) {
+        error_report("fail to setup tx vring\n");
+        exit(1);
+    }
+    vring_restore(n->dp->tx_vring,
+            virtio_queue_get_last_avail_idx(&n->vdev, 1));
+    init_work_thread();
+
+    event_poll_add(&t->polltbl, n->dp->rx_handler, rx_e, rx_cb);
+    event_poll_add(&t->polltbl, n->dp->tx_handler, tx_e, tx_cb);
+}
+
+void virtnet_dataplane_stop(VirtIONet *n)
+{
+    EventNotifier *rx_e = virtio_queue_get_host_notifier(n->rx_vq);
+    EventNotifier *tx_e = virtio_queue_get_host_notifier(n->tx_vq);
+    WorkThread *t = &virt_net_thread;
+
+    event_poll_del_fd(&t->polltbl, event_notifier_get_fd(rx_e));
+    event_poll_del_fd(&t->polltbl, event_notifier_get_fd(tx_e));
+
+    t->state = THREAD_EXIT;
+    event_notifier_set(&t->e);
+    qemu_thread_join(&t->thread);
+    virtio_queue_set_last_avail_idx(&n->vdev, 0,
+            n->dp->rx_vring->last_avail_idx);
+    virtio_queue_set_last_avail_idx(&n->vdev, 1,
+            n->dp->tx_vring->last_avail_idx);
+    vring_teardown(n->dp->rx_vring);
+    vring_teardown(n->dp->tx_vring);
+    virtnet_dataplane_disable_notifiers(n);
+}
+
+void virtnet_dataplane_destroy(VirtIONet *n)
+{
+    virtnet_dataplane_stop(n);
+    g_free(n->dp->rx_vring);
+    g_free(n->dp->tx_vring);
+    g_free(n->dp->rx_handler);
+    g_free(n->dp->tx_handler);
+    g_free(n->dp);
+}
diff --git a/hw/dataplane/virtio-net.h b/hw/dataplane/virtio-net.h
new file mode 100644
index 0000000..e50b2de
--- /dev/null
+++ b/hw/dataplane/virtio-net.h
@@ -0,0 +1,26 @@
+/* Copyright IBM, Corp. 2013
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+#ifndef VIRT_NET_DATAPLANE_H
+#define VIRT_NET_DATAPLANE_H
+
+#include "event-poll.h"
+#include "qemu/thread.h"
+#include "hw/virtio-net.h"
+
+typedef enum  { THREAD_START, THREAD_EXIT
+} WorkState;
+
+typedef struct WorkThread {
+    EventPoll polltbl;
+    QemuThread thread;
+    EventNotifier e;
+
+    WorkState state;
+    QemuMutex lock;
+    QemuCond cond_start;
+} WorkThread;
+
+extern WorkThread virt_net_thread;
+#endif
diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 5d03b31..6bf4a40 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -26,47 +26,6 @@
 #define MAC_TABLE_ENTRIES    64
 #define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
 
-typedef struct VirtIONet
-{
-    VirtIODevice vdev;
-    uint8_t mac[ETH_ALEN];
-    uint16_t status;
-    VirtQueue *rx_vq;
-    VirtQueue *tx_vq;
-    VirtQueue *ctrl_vq;
-    NICState *nic;
-    QEMUTimer *tx_timer;
-    QEMUBH *tx_bh;
-    uint32_t tx_timeout;
-    int32_t tx_burst;
-    int tx_waiting;
-    uint32_t has_vnet_hdr;
-    size_t host_hdr_len;
-    size_t guest_hdr_len;
-    uint8_t has_ufo;
-    struct {
-        VirtQueueElement elem;
-        ssize_t len;
-    } async_tx;
-    int mergeable_rx_bufs;
-    uint8_t promisc;
-    uint8_t allmulti;
-    uint8_t alluni;
-    uint8_t nomulti;
-    uint8_t nouni;
-    uint8_t nobcast;
-    uint8_t vhost_started;
-    struct {
-        int in_use;
-        int first_multi;
-        uint8_t multi_overflow;
-        uint8_t uni_overflow;
-        uint8_t *macs;
-    } mac_table;
-    uint32_t *vlans;
-    DeviceState *qdev;
-} VirtIONet;
-
 /* TODO
  * - we could suppress RX interrupt if we were so inclined.
  */
@@ -165,7 +124,7 @@ static void virtio_net_set_status(struct VirtIODevice 
*vdev, uint8_t status)
     }
 }
 
-static void virtio_net_set_link_status(NetClientState *nc)
+void virtio_net_set_link_status(NetClientState *nc)
 {
     VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
     uint16_t old_status = n->status;
@@ -528,8 +487,8 @@ static void work_around_broken_dhclient(struct 
virtio_net_hdr *hdr,
     }
 }
 
-static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
-                           const void *buf, size_t size)
+void virtio_net_receive_header(VirtIONet *n, const struct iovec *iov,
+                           int iov_cnt, const void *buf, size_t size)
 {
     if (n->has_vnet_hdr) {
         /* FIXME this cast is evil */
@@ -546,7 +505,7 @@ static void receive_header(VirtIONet *n, const struct iovec 
*iov, int iov_cnt,
     }
 }
 
-static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
+int virtio_net_receive_filter(VirtIONet *n, const uint8_t *buf, int size)
 {
     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
     static const uint8_t vlan[] = {0x81, 0x00};
@@ -612,8 +571,9 @@ static ssize_t virtio_net_receive(NetClientState *nc, const 
uint8_t *buf, size_t
     if (!virtio_net_has_buffers(n, size + n->guest_hdr_len - n->host_hdr_len))
         return 0;
 
-    if (!receive_filter(n, buf, size))
+    if (!virtio_net_receive_filter(n, buf, size)) {
         return size;
+    }
 
     offset = i = 0;
 
@@ -649,7 +609,7 @@ static ssize_t virtio_net_receive(NetClientState *nc, const 
uint8_t *buf, size_t
                                     sizeof(mhdr.num_buffers));
             }
 
-            receive_header(n, sg, elem.in_num, buf, size);
+            virtio_net_receive_header(n, sg, elem.in_num, buf, size);
             offset = n->host_hdr_len;
             total += n->guest_hdr_len;
             guest_offset = n->guest_hdr_len;
@@ -994,7 +954,7 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int 
version_id)
     return 0;
 }
 
-static void virtio_net_cleanup(NetClientState *nc)
+void virtio_net_cleanup(NetClientState *nc)
 {
     VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
 
diff --git a/hw/virtio-net.h b/hw/virtio-net.h
index d46fb98..ed91a02 100644
--- a/hw/virtio-net.h
+++ b/hw/virtio-net.h
@@ -159,4 +159,65 @@ struct virtio_net_ctrl_mac {
         DEFINE_PROP_BIT("ctrl_rx", _state, _field, VIRTIO_NET_F_CTRL_RX, 
true), \
         DEFINE_PROP_BIT("ctrl_vlan", _state, _field, VIRTIO_NET_F_CTRL_VLAN, 
true), \
         DEFINE_PROP_BIT("ctrl_rx_extra", _state, _field, 
VIRTIO_NET_F_CTRL_RX_EXTRA, true)
+
+
+#ifdef CONFIG_VIRTIO_NET_DATA_PLANE
+struct VirtIONetDataPlane;
+#endif
+
+typedef struct VirtIONet {
+    VirtIODevice vdev;
+    uint8_t mac[ETH_ALEN];
+    uint16_t status;
+    VirtQueue *rx_vq;
+    VirtQueue *tx_vq;
+    VirtQueue *ctrl_vq;
+    NICState *nic;
+    QEMUTimer *tx_timer;
+    QEMUBH *tx_bh;
+    uint32_t tx_timeout;
+    int32_t tx_burst;
+    int tx_waiting;
+    uint32_t has_vnet_hdr;
+    size_t host_hdr_len;
+    size_t guest_hdr_len;
+    uint8_t has_ufo;
+    struct {
+        VirtQueueElement elem;
+        ssize_t len;
+    } async_tx;
+    int mergeable_rx_bufs;
+    uint8_t promisc;
+    uint8_t allmulti;
+    uint8_t alluni;
+    uint8_t nomulti;
+    uint8_t nouni;
+    uint8_t nobcast;
+    uint8_t vhost_started;
+    struct {
+        int in_use;
+        int first_multi;
+        uint8_t multi_overflow;
+        uint8_t uni_overflow;
+        uint8_t *macs;
+    } mac_table;
+    uint32_t *vlans;
+
+#ifdef CONFIG_VIRTIO_NET_DATA_PLANE
+    struct VirtIONetDataPlane *dp;
+    bool dp_start;
+#endif
+    DeviceState *qdev;
+} VirtIONet;
+
+int virtio_net_receive_filter(VirtIONet *n, const uint8_t *buf, int size);
+void virtio_net_receive_header(VirtIONet *n, const struct iovec *iov,
+                    int iov_cnt, const void *buf, size_t size);
+void virtio_net_set_link_status(NetClientState *nc);
+void virtio_net_cleanup(NetClientState *nc);
+
+#ifdef CONFIG_VIRTIO_NET_DATA_PLANE
+void virtnet_dataplane_create(VirtIONet *n);
+#endif
+
 #endif
-- 
1.7.4.4




reply via email to

[Prev in Thread] Current Thread [Next in Thread]