qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH 01/12] tap: multiqueue support


From: Jason Wang
Subject: [Qemu-devel] [PATCH 01/12] tap: multiqueue support
Date: Fri, 28 Dec 2012 18:31:53 +0800

Recently, linux support multiqueue tap which could let userspace call TUNSETIFF
for a signle device many times to create multiple file descriptors as
independent queues. User could also enable/disabe a specific queue through
TUNSETQUEUE.

The patch adds the generic infrastructure to create multiqueue taps. To achieve
this a new parameter "queues" were introduced to specify how many queues were
expected to be created for tap. The "fd" parameter were also changed to support
a list of file descriptors which could be used by management (such as libvirt)
to pass pre-created file descriptors (queues) to qemu.

Each TAPState were still associated to a tap fd, which mean multiple TAPStates
were created when user needs multiqueue taps.

Only linux part were implemented now, since it's the only OS that support
multiqueue tap.

Signed-off-by: Jason Wang <address@hidden>
---
 net/tap-aix.c     |   18 ++++-
 net/tap-bsd.c     |   18 ++++-
 net/tap-haiku.c   |   18 ++++-
 net/tap-linux.c   |   70 +++++++++++++++-
 net/tap-linux.h   |    4 +
 net/tap-solaris.c |   18 ++++-
 net/tap-win32.c   |   10 ++
 net/tap.c         |  248 +++++++++++++++++++++++++++++++++++++----------------
 net/tap.h         |    8 ++-
 qapi-schema.json  |    5 +-
 10 files changed, 335 insertions(+), 82 deletions(-)

diff --git a/net/tap-aix.c b/net/tap-aix.c
index f27c177..f931ef3 100644
--- a/net/tap-aix.c
+++ b/net/tap-aix.c
@@ -25,7 +25,8 @@
 #include "net/tap.h"
 #include <stdio.h>
 
-int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int 
vnet_hdr_required)
+int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
+             int vnet_hdr_required, int mq_required)
 {
     fprintf(stderr, "no tap on AIX\n");
     return -1;
@@ -59,3 +60,18 @@ void tap_fd_set_offload(int fd, int csum, int tso4,
                         int tso6, int ecn, int ufo)
 {
 }
+
+int tap_fd_attach(int fd)
+{
+    return -1;
+}
+
+int tap_fd_detach(int fd)
+{
+    return -1;
+}
+
+int tap_fd_ifname(int fd, char *ifname)
+{
+    return -1;
+}
diff --git a/net/tap-bsd.c b/net/tap-bsd.c
index a3b717d..07c287d 100644
--- a/net/tap-bsd.c
+++ b/net/tap-bsd.c
@@ -33,7 +33,8 @@
 #include <net/if_tap.h>
 #endif
 
-int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int 
vnet_hdr_required)
+int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
+             int vnet_hdr_required, int mq_required)
 {
     int fd;
 #ifdef TAPGIFNAME
@@ -145,3 +146,18 @@ void tap_fd_set_offload(int fd, int csum, int tso4,
                         int tso6, int ecn, int ufo)
 {
 }
+
+int tap_fd_attach(int fd)
+{
+    return -1;
+}
+
+int tap_fd_detach(int fd)
+{
+    return -1;
+}
+
+int tap_fd_ifname(int fd, char *ifname)
+{
+    return -1;
+}
diff --git a/net/tap-haiku.c b/net/tap-haiku.c
index 34739d1..62ab423 100644
--- a/net/tap-haiku.c
+++ b/net/tap-haiku.c
@@ -25,7 +25,8 @@
 #include "net/tap.h"
 #include <stdio.h>
 
-int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int 
vnet_hdr_required)
+int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
+             int vnet_hdr_required, int mq_required)
 {
     fprintf(stderr, "no tap on Haiku\n");
     return -1;
@@ -59,3 +60,18 @@ void tap_fd_set_offload(int fd, int csum, int tso4,
                         int tso6, int ecn, int ufo)
 {
 }
+
+int tap_fd_attach(int fd)
+{
+    return -1;
+}
+
+int tap_fd_detach(int fd)
+{
+    return -1;
+}
+
+int tap_fd_ifname(int fd, char *ifname)
+{
+    return -1;
+}
diff --git a/net/tap-linux.c b/net/tap-linux.c
index c6521be..0854ef5 100644
--- a/net/tap-linux.c
+++ b/net/tap-linux.c
@@ -35,7 +35,8 @@
 
 #define PATH_NET_TUN "/dev/net/tun"
 
-int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int 
vnet_hdr_required)
+int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
+             int vnet_hdr_required, int mq_required)
 {
     struct ifreq ifr;
     int fd, ret;
@@ -67,6 +68,20 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr, 
int vnet_hdr_required
         }
     }
 
+    if (mq_required) {
+        unsigned int features;
+
+        if ((ioctl(fd, TUNGETFEATURES, &features) != 0) ||
+            !(features & IFF_MULTI_QUEUE)) {
+            error_report("multiqueue required, but no kernel "
+                         "support for IFF_MULTI_QUEUE available");
+            close(fd);
+            return -1;
+        } else {
+            ifr.ifr_flags |= IFF_MULTI_QUEUE;
+        }
+    }
+
     if (ifname[0] != '\0')
         pstrcpy(ifr.ifr_name, IFNAMSIZ, ifname);
     else
@@ -200,3 +215,56 @@ void tap_fd_set_offload(int fd, int csum, int tso4,
         }
     }
 }
+
+/* Attach a file descriptor to a TUN/TAP device. This descriptor should be
+ * detached before.
+ */
+int tap_fd_attach(int fd)
+{
+    struct ifreq ifr;
+    int ret;
+
+    memset(&ifr, 0, sizeof(ifr));
+
+    ifr.ifr_flags = IFF_ATTACH_QUEUE;
+    ret = ioctl(fd, TUNSETQUEUE, (void *) &ifr);
+
+    if (ret != 0) {
+        error_report("could not attach fd to tap");
+    }
+
+    return ret;
+}
+
+/* Detach a file descriptor to a TUN/TAP device. This file descriptor must have
+ * been attach to a device.
+ */
+int tap_fd_detach(int fd)
+{
+    struct ifreq ifr;
+    int ret;
+
+    memset(&ifr, 0, sizeof(ifr));
+
+    ifr.ifr_flags = IFF_DETACH_QUEUE;
+    ret = ioctl(fd, TUNSETQUEUE, (void *) &ifr);
+
+    if (ret != 0) {
+        error_report("could not detach fd");
+    }
+
+    return ret;
+}
+
+int tap_get_ifname(int fd, char *ifname)
+{
+    struct ifreq ifr;
+
+    if (ioctl(fd, TUNGETIFF, &ifr) != 0) {
+        error_report("TUNGETIFF ioctl() failed: %s", strerror(errno));
+        return -1;
+    }
+
+    pstrcpy(ifname, sizeof(ifr.ifr_name), ifr.ifr_name);
+    return 0;
+}
diff --git a/net/tap-linux.h b/net/tap-linux.h
index 659e981..648d29f 100644
--- a/net/tap-linux.h
+++ b/net/tap-linux.h
@@ -29,6 +29,7 @@
 #define TUNSETSNDBUF   _IOW('T', 212, int)
 #define TUNGETVNETHDRSZ _IOR('T', 215, int)
 #define TUNSETVNETHDRSZ _IOW('T', 216, int)
+#define TUNSETQUEUE  _IOW('T', 217, int)
 
 #endif
 
@@ -36,6 +37,9 @@
 #define IFF_TAP                0x0002
 #define IFF_NO_PI      0x1000
 #define IFF_VNET_HDR   0x4000
+#define IFF_MULTI_QUEUE 0x0100
+#define IFF_ATTACH_QUEUE 0x0200
+#define IFF_DETACH_QUEUE 0x0400
 
 /* Features for GSO (TUNSETOFFLOAD). */
 #define TUN_F_CSUM     0x01    /* You can hand me unchecksummed packets. */
diff --git a/net/tap-solaris.c b/net/tap-solaris.c
index 5d6ac42..2df3ec1 100644
--- a/net/tap-solaris.c
+++ b/net/tap-solaris.c
@@ -173,7 +173,8 @@ static int tap_alloc(char *dev, size_t dev_size)
     return tap_fd;
 }
 
-int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int 
vnet_hdr_required)
+int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
+             int vnet_hdr_required, int mq_required)
 {
     char  dev[10]="";
     int fd;
@@ -225,3 +226,18 @@ void tap_fd_set_offload(int fd, int csum, int tso4,
                         int tso6, int ecn, int ufo)
 {
 }
+
+int tap_fd_attach(int fd)
+{
+    return -1;
+}
+
+int tap_fd_detach(int fd)
+{
+    return -1;
+}
+
+int tap_fd_ifname(int fd, char *ifname)
+{
+    return -1;
+}
diff --git a/net/tap-win32.c b/net/tap-win32.c
index f9bd741..d7b1f7a 100644
--- a/net/tap-win32.c
+++ b/net/tap-win32.c
@@ -763,3 +763,13 @@ void tap_set_vnet_hdr_len(NetClientState *nc, int len)
 {
     assert(0);
 }
+
+int tap_attach(NetClientState *nc)
+{
+    assert(0);
+}
+
+int tap_detach(NetClientState *nc)
+{
+    assert(0);
+}
diff --git a/net/tap.c b/net/tap.c
index 1abfd44..01f826a 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -60,6 +60,7 @@ typedef struct TAPState {
     unsigned int write_poll : 1;
     unsigned int using_vnet_hdr : 1;
     unsigned int has_ufo: 1;
+    unsigned int enabled:1;
     VHostNetState *vhost_net;
     unsigned host_vnet_hdr_len;
 } TAPState;
@@ -73,9 +74,9 @@ static void tap_writable(void *opaque);
 static void tap_update_fd_handler(TAPState *s)
 {
     qemu_set_fd_handler2(s->fd,
-                         s->read_poll  ? tap_can_send : NULL,
-                         s->read_poll  ? tap_send     : NULL,
-                         s->write_poll ? tap_writable : NULL,
+                         s->read_poll && s->enabled ? tap_can_send : NULL,
+                         s->read_poll && s->enabled ? tap_send     : NULL,
+                         s->write_poll && s->enabled ? tap_writable : NULL,
                          s);
 }
 
@@ -340,6 +341,7 @@ static TAPState *net_tap_fd_init(NetClientState *peer,
     s->host_vnet_hdr_len = vnet_hdr ? sizeof(struct virtio_net_hdr) : 0;
     s->using_vnet_hdr = 0;
     s->has_ufo = tap_probe_has_ufo(s->fd);
+    s->enabled = 1;
     tap_set_offload(&s->nc, 0, 0, 0, 0, 0);
     /*
      * Make sure host header length is set correctly in tap:
@@ -559,17 +561,10 @@ int net_init_bridge(const NetClientOptions *opts, const 
char *name,
 
 static int net_tap_init(const NetdevTapOptions *tap, int *vnet_hdr,
                         const char *setup_script, char *ifname,
-                        size_t ifname_sz)
+                        size_t ifname_sz, int mq_required)
 {
     int fd, vnet_hdr_required;
 
-    if (tap->has_ifname) {
-        pstrcpy(ifname, ifname_sz, tap->ifname);
-    } else {
-        assert(ifname_sz > 0);
-        ifname[0] = '\0';
-    }
-
     if (tap->has_vnet_hdr) {
         *vnet_hdr = tap->vnet_hdr;
         vnet_hdr_required = *vnet_hdr;
@@ -578,7 +573,8 @@ static int net_tap_init(const NetdevTapOptions *tap, int 
*vnet_hdr,
         vnet_hdr_required = 0;
     }
 
-    TFR(fd = tap_open(ifname, ifname_sz, vnet_hdr, vnet_hdr_required));
+    TFR(fd = tap_open(ifname, ifname_sz, vnet_hdr, vnet_hdr_required,
+                      mq_required));
     if (fd < 0) {
         return -1;
     }
@@ -594,69 +590,37 @@ static int net_tap_init(const NetdevTapOptions *tap, int 
*vnet_hdr,
     return fd;
 }
 
-int net_init_tap(const NetClientOptions *opts, const char *name,
-                 NetClientState *peer)
-{
-    const NetdevTapOptions *tap;
-
-    int fd, vnet_hdr = 0;
-    const char *model;
-    TAPState *s;
+#define MAX_TAP_QUEUES 1024
 
-    /* for the no-fd, no-helper case */
-    const char *script = NULL; /* suppress wrong "uninit'd use" gcc warning */
-    char ifname[128];
-
-    assert(opts->kind == NET_CLIENT_OPTIONS_KIND_TAP);
-    tap = opts->tap;
-
-    if (tap->has_fd) {
-        if (tap->has_ifname || tap->has_script || tap->has_downscript ||
-            tap->has_vnet_hdr || tap->has_helper) {
-            error_report("ifname=, script=, downscript=, vnet_hdr=, "
-                         "and helper= are invalid with fd=");
-            return -1;
-        }
-
-        fd = monitor_handle_fd_param(cur_mon, tap->fd);
-        if (fd == -1) {
-            return -1;
-        }
-
-        fcntl(fd, F_SETFL, O_NONBLOCK);
-
-        vnet_hdr = tap_probe_vnet_hdr(fd);
-
-        model = "tap";
-
-    } else if (tap->has_helper) {
-        if (tap->has_ifname || tap->has_script || tap->has_downscript ||
-            tap->has_vnet_hdr) {
-            error_report("ifname=, script=, downscript=, and vnet_hdr= "
-                         "are invalid with helper=");
-            return -1;
-        }
-
-        fd = net_bridge_run_helper(tap->helper, DEFAULT_BRIDGE_INTERFACE);
-        if (fd == -1) {
-            return -1;
-        }
+static int tap_fd(const StringList *fd, const char **fds)
+{
+    const StringList *c = fd;
+    size_t i = 0, num_opts = 0;
 
-        fcntl(fd, F_SETFL, O_NONBLOCK);
+    while (c) {
+        num_opts++;
+        c = c->next;
+    }
 
-        vnet_hdr = tap_probe_vnet_hdr(fd);
+    if (num_opts == 0) {
+        return 0;
+    }
 
-        model = "bridge";
+    c = fd;
+    while (c) {
+        fds[i++] = c->value->str;
+        c = c->next;
+    }
 
-    } else {
-        script = tap->has_script ? tap->script : DEFAULT_NETWORK_SCRIPT;
-        fd = net_tap_init(tap, &vnet_hdr, script, ifname, sizeof ifname);
-        if (fd == -1) {
-            return -1;
-        }
+    return num_opts;
+}
 
-        model = "tap";
-    }
+static int __net_init_tap(const NetdevTapOptions *tap, NetClientState *peer,
+                          const char *model, const char *name,
+                          const char *ifname, const char *script,
+                          const char *downscript, int vnet_hdr, int fd)
+{
+    TAPState *s;
 
     s = net_tap_fd_init(peer, model, name, fd, vnet_hdr);
     if (!s) {
@@ -674,11 +638,6 @@ int net_init_tap(const NetClientOptions *opts, const char 
*name,
         snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s",
                  tap->helper);
     } else {
-        const char *downscript;
-
-        downscript = tap->has_downscript ? tap->downscript :
-                                           DEFAULT_NETWORK_DOWN_SCRIPT;
-
         snprintf(s->nc.info_str, sizeof(s->nc.info_str),
                  "ifname=%s,script=%s,downscript=%s", ifname, script,
                  downscript);
@@ -716,9 +675,150 @@ int net_init_tap(const NetClientOptions *opts, const char 
*name,
     return 0;
 }
 
+int net_init_tap(const NetClientOptions *opts, const char *name,
+                 NetClientState *peer)
+{
+    const NetdevTapOptions *tap;
+    const char *fds[MAX_TAP_QUEUES];
+    int fd, vnet_hdr = 0, i, queues;
+    /* for the no-fd, no-helper case */
+    const char *script = NULL; /* suppress wrong "uninit'd use" gcc warning */
+    const char *downscript = NULL;
+    char ifname[128];
+
+    assert(opts->kind == NET_CLIENT_OPTIONS_KIND_TAP);
+    tap = opts->tap;
+    queues = tap->has_queues ? tap->queues : 1;
+
+    if (tap->has_fd) {
+        if (tap->has_ifname || tap->has_script || tap->has_downscript ||
+            tap->has_vnet_hdr || tap->has_helper) {
+            error_report("ifname=, script=, downscript=, vnet_hdr=, "
+                         "and helper= are invalid with fd=");
+            return -1;
+        }
+
+        if (queues != tap_fd(tap->fd, fds)) {
+            error_report("the number of fds were not equal to queues");
+            return -1;
+        }
+
+        for (i = 0; i < queues; i++) {
+            fd = monitor_handle_fd_param(cur_mon, fds[i]);
+            if (fd == -1) {
+                return -1;
+            }
+
+            fcntl(fd, F_SETFL, O_NONBLOCK);
+
+            if (i == 0) {
+                vnet_hdr = tap_probe_vnet_hdr(fd);
+            }
+
+            if (__net_init_tap(tap, peer, "tap", name, ifname,
+                               script, downscript, vnet_hdr, fd)) {
+                return -1;
+            }
+        }
+    } else if (tap->has_helper) {
+        if (tap->has_ifname || tap->has_script || tap->has_downscript ||
+            tap->has_vnet_hdr) {
+            error_report("ifname=, script=, downscript=, and vnet_hdr= "
+                         "are invalid with helper=");
+            return -1;
+        }
+
+        /* FIXME: correct ? */
+        for (i = 0; i < queues; i++) {
+            fd = net_bridge_run_helper(tap->helper, DEFAULT_BRIDGE_INTERFACE);
+            if (fd == -1) {
+                return -1;
+            }
+
+            fcntl(fd, F_SETFL, O_NONBLOCK);
+
+            if (i == 0) {
+                vnet_hdr = tap_probe_vnet_hdr(fd);
+            }
+
+            if (__net_init_tap(tap, peer, "bridge", name, ifname,
+                               script, downscript, vnet_hdr, fd)) {
+                return -1;
+            }
+        }
+    } else {
+        script = tap->has_script ? tap->script : DEFAULT_NETWORK_SCRIPT;
+        downscript = tap->has_downscript ? tap->downscript :
+                                           DEFAULT_NETWORK_DOWN_SCRIPT;
+
+        if (tap->has_ifname) {
+            pstrcpy(ifname, sizeof ifname, tap->ifname);
+        } else {
+            ifname[0] = '\0';
+        }
+
+        for (i = 0; i < queues; i++) {
+            fd = net_tap_init(tap, &vnet_hdr, i >= 1 ? "no" : script,
+                              ifname, sizeof ifname, queues > 1);
+            if (fd == -1) {
+                return -1;
+            }
+
+            if (i == 0 && tap_get_ifname(fd, ifname) != 0) {
+                error_report("could not get ifname");
+                return -1;
+            }
+
+            if (__net_init_tap(tap, peer, "tap", name, ifname,
+                               i >= 1 ? "no" : script,
+                               i >= 1 ? "no" : downscript,
+                               vnet_hdr, fd)) {
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
 VHostNetState *tap_get_vhost_net(NetClientState *nc)
 {
     TAPState *s = DO_UPCAST(TAPState, nc, nc);
     assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
     return s->vhost_net;
 }
+
+int tap_attach(NetClientState *nc)
+{
+    TAPState *s = DO_UPCAST(TAPState, nc, nc);
+    int ret;
+
+    if (s->enabled) {
+        return 0;
+    } else {
+        ret = tap_fd_attach(s->fd);
+        if (ret == 0) {
+            s->enabled = 1;
+            tap_update_fd_handler(s);
+        }
+        return ret;
+    }
+}
+
+int tap_detach(NetClientState *nc)
+{
+    TAPState *s = DO_UPCAST(TAPState, nc, nc);
+    int ret;
+
+    if (s->enabled == 0) {
+        return 0;
+    } else {
+        ret = tap_fd_detach(s->fd);
+        if (ret == 0) {
+            qemu_purge_queued_packets(nc);
+            s->enabled = 0;
+            tap_update_fd_handler(s);
+        }
+        return ret;
+    }
+}
diff --git a/net/tap.h b/net/tap.h
index d44d83a..02f154e 100644
--- a/net/tap.h
+++ b/net/tap.h
@@ -32,7 +32,8 @@
 #define DEFAULT_NETWORK_SCRIPT "/etc/qemu-ifup"
 #define DEFAULT_NETWORK_DOWN_SCRIPT "/etc/qemu-ifdown"
 
-int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int 
vnet_hdr_required);
+int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
+             int vnet_hdr_required, int mq_required);
 
 ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen);
 
@@ -49,6 +50,11 @@ int tap_probe_vnet_hdr_len(int fd, int len);
 int tap_probe_has_ufo(int fd);
 void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int 
ufo);
 void tap_fd_set_vnet_hdr_len(int fd, int len);
+int tap_fd_attach(int fd);
+int tap_fd_detach(int fd);
+int tap_attach(NetClientState *nc);
+int tap_detach(NetClientState *nc);
+int tap_get_ifname(int fd, char *ifname);
 
 int tap_get_fd(NetClientState *nc);
 
diff --git a/qapi-schema.json b/qapi-schema.json
index 5dfa052..583eb7c 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -2465,7 +2465,7 @@
 { 'type': 'NetdevTapOptions',
   'data': {
     '*ifname':     'str',
-    '*fd':         'str',
+    '*fd':         ['String'],
     '*script':     'str',
     '*downscript': 'str',
     '*helper':     'str',
@@ -2473,7 +2473,8 @@
     '*vnet_hdr':   'bool',
     '*vhost':      'bool',
     '*vhostfd':    'str',
-    '*vhostforce': 'bool' } }
+    '*vhostforce': 'bool',
+    '*queues':     'uint32'} }
 
 ##
 # @NetdevSocketOptions
-- 
1.7.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]