qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] Adding support for Stateless Static NAT for TAP devices


From: John Basila
Subject: [Qemu-devel] Adding support for Stateless Static NAT for TAP devices
Date: Thu, 30 Aug 2012 09:12:19 +0300

When running multiple instances of QEMU from the same image file
(using -snapshot) and connecting each instance to a dedicated TAP
device, the Guest OS will most likely not be able to communicate
with the outside world as all packets leave the Guest OS from the
same IP and thus the Host OS will have difficulty returning the
packets to the correct TAP device/Guest OS. Stateless Static
Network Address Translation or SSNAT allows the QEMU to map the
network of the Guest OS to the network of the TAP device allowing
a unique IP address for each Guest OS that ease such case.
The only mandatory argument to the SSNAT is the Guest OS network
IP, the rest will be figured out from the underlying TAP device.

Signed-off-by: John Basila <address@hidden>
---
 net/tap.c        |  369 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 qapi-schema.json |    5 +-
 qemu-options.hx  |   10 ++-
 3 files changed, 381 insertions(+), 3 deletions(-)

diff --git a/net/tap.c b/net/tap.c
index 1971525..2408a49 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -39,16 +39,88 @@
 #include "qemu-char.h"
 #include "qemu-common.h"
 #include "qemu-error.h"
+#include "qemu_socket.h"

 #include "net/tap-linux.h"

 #include "hw/vhost_net.h"

+#include "checksum.h"
+
+#define ETH_P_ARP      0x0806  /* Address Resolution packet */
+#define ETH_P_IP       0x0800  /* Internet Protocol packet  */
+#define ETH_P_IPV6     0x86DD  /* IPv6 over blueblook */
+
+#define ETH_ALEN 6
+
+struct ethhdr {
+    unsigned char  h_dest[ETH_ALEN];   /* destination eth addr */
+    unsigned char  h_source[ETH_ALEN]; /* source ether addr    */
+    unsigned short h_proto;            /* packet type ID field */
+};
+
+#define IP_PROTO_TCP  6
+#define IP_PROTO_UDP 17
+#define IPV4_ADRESS_LENGTH             4
+
+struct arphdr {
+    unsigned short ar_hrd;      /* format of hardware address */
+    unsigned short ar_pro;      /* format of protocol address */
+    unsigned char  ar_hln;      /* length of hardware address */
+    unsigned char  ar_pln;      /* length of protocol address */
+    unsigned short ar_op;       /* ARP opcode (command)       */
+
+    /*
+     *  Ethernet looks like this : This bit is variable sized however...
+     */
+    unsigned char ar_sha[ETH_ALEN];                    /* sender hardware 
address */
+    unsigned char ar_sip[IPV4_ADRESS_LENGTH];  /* sender IP address       */
+    unsigned char ar_tha[ETH_ALEN];                    /* target hardware 
address */
+    unsigned char ar_tip[IPV4_ADRESS_LENGTH];  /* target IP address       */
+};
+
+#define IP_HEADER_LENGTH(ip) (((ip->ip_hlv)&0xf) << 2)
+
+/** An IPv4 packet header */
+struct iphdr {
+       uint8_t ip_hlv;      /**< Header length and version of the header      
*/
+       uint8_t ip_tos;      /**< Type of Service                              
*/
+       uint16_t ip_len;     /**< Length in octets, inlc. this header and data 
*/
+       uint16_t ip_id;      /**< ID is used to aid in assembling framents     
*/
+       uint16_t ip_off;     /**< Info about fragmentation (control, offset)   
*/
+       uint8_t ip_ttl;      /**< Time to Live                                 
*/
+       uint8_t ip_p;        /**< Next level protocol type                     
*/
+       uint16_t ip_sum;     /**< Header checksum                              
*/
+       uint32_t ip_src;     /**< Source IP address                            
*/
+       uint32_t ip_dst;     /**< Destination IP address                       
*/
+};
+
+/** UDP packet header */
+typedef struct udphdr {
+    uint16_t uh_sport; /* source port */
+    uint16_t uh_dport; /* destination port */
+    uint16_t uh_ulen;  /* udp length */
+    uint16_t uh_chksum;/* udp checksum */
+} udp_header;
+
+
 /* Maximum GSO packet size (64k) plus plenty of room for
  * the ethernet and virtio_net headers
  */
 #define TAP_BUFSIZE (4096 + 65536)

+typedef struct SSNATInfo {
+       unsigned int ssnat_active : 1;
+
+       struct in_addr  ssnat_ifaddr;
+       struct in_addr  ssnat_ifmask;
+       uint8_t                 ssnat_hwaddr[ETH_ALEN];
+
+       struct in_addr ssnat_guest;
+       struct in_addr ssnat_host;
+       struct in_addr ssnat_mask;
+} SSNATInfo;
+
 typedef struct TAPState {
     NetClientState nc;
     int fd;
@@ -59,6 +131,9 @@ typedef struct TAPState {
     unsigned int write_poll : 1;
     unsigned int using_vnet_hdr : 1;
     unsigned int has_ufo: 1;
+
+       SSNATInfo ssnat_info;
+
     VHostNetState *vhost_net;
     unsigned host_vnet_hdr_len;
 } TAPState;
@@ -154,11 +229,154 @@ static ssize_t tap_receive_raw(NetClientState *nc, const 
uint8_t *buf, size_t si
     return tap_write_packet(s, iov, iovcnt);
 }

+#define SSNAT_MAP_IP(_orig, _to, _mask) ( (_orig.s_addr & ~_mask.s_addr) | 
(_to.s_addr & _mask.s_addr) )
+#define SSNAT_IS_MATCH(_orig, _from, _mask)    ( (_orig.s_addr & _mask.s_addr) 
== (_from.s_addr & _mask.s_addr) )
+
+static void tap_ssnat_translate_arp(uint8_t* buf, size_t size, const struct 
in_addr from, const struct in_addr to, const struct in_addr mask)
+{
+       size_t packetSize = size;
+       uint8_t* pPacket = buf;
+
+       if(packetSize >= sizeof(struct arphdr))
+       {
+               struct arphdr* pArpHeader = (struct arphdr*)pPacket;
+               struct in_addr sourceAddress;
+               struct in_addr targetAddress;
+
+               memcpy(&sourceAddress, pArpHeader->ar_sip, 
sizeof(sourceAddress));
+               if( SSNAT_IS_MATCH(sourceAddress, from, mask) )
+               {
+                       sourceAddress.s_addr = SSNAT_MAP_IP(sourceAddress, to, 
mask);
+                       memcpy(pArpHeader->ar_sip, &sourceAddress, 
sizeof(sourceAddress));
+               }
+
+               memcpy(&targetAddress, pArpHeader->ar_tip, 
sizeof(targetAddress));
+               if( SSNAT_IS_MATCH(targetAddress, from, mask) )
+               {
+                       targetAddress.s_addr = SSNAT_MAP_IP(targetAddress, to, 
mask);
+                       memcpy(pArpHeader->ar_tip, &targetAddress, 
sizeof(targetAddress));
+               }
+       }
+}
+
+static void tap_ssnat_adjust_ip_checksums(uint8_t* pBuffer, const size_t size)
+{
+       size_t packetSize = size;
+       uint8_t* pPacket = pBuffer;
+
+       if(packetSize >= sizeof(struct iphdr))
+       {
+               struct iphdr* pIpHeader = (struct iphdr*)pPacket;
+               uint16_t* pCheckSumField = NULL;
+               uint32_t uiCheckSum = 0;
+
+               pIpHeader->ip_sum = 0;
+               if(IP_HEADER_LENGTH(pIpHeader) <= packetSize)
+               {
+                       /*pIpHeader->ip_sum = 
tap_ssnat_calculate_checksum(pIpHeader, IP_HEADER_LENGTH(pIpHeader));*/
+
+                       uiCheckSum = 
net_checksum_add(IP_HEADER_LENGTH(pIpHeader), (uint8_t*)pIpHeader);
+                       pIpHeader->ip_sum = 
htons(net_checksum_finish(uiCheckSum));
+               }
+
+               switch(pIpHeader->ip_p)
+               {
+                       case IP_PROTO_TCP:
+                       {
+                               struct tcphdr* pTcpHeader = (struct 
tcphdr*)(pPacket + IP_HEADER_LENGTH(pIpHeader));
+                               pCheckSumField = &pTcpHeader->check;
+                               break;
+                       }
+
+                       case IP_PROTO_UDP:
+                       {
+                               struct udphdr* pUdpHeader = (struct 
udphdr*)(pPacket + IP_HEADER_LENGTH(pIpHeader));
+                               pCheckSumField = &pUdpHeader->uh_chksum;
+                               break;
+                       }
+
+                       default:
+                               return;
+               }
+
+               *pCheckSumField = 0;
+               uiCheckSum =  net_checksum_add(ntohs(pIpHeader->ip_len) - 
IP_HEADER_LENGTH(pIpHeader), pPacket + IP_HEADER_LENGTH(pIpHeader));
+               uiCheckSum += net_checksum_add(sizeof(pIpHeader->ip_src) + 
sizeof(pIpHeader->ip_dst), (uint8_t*)&pIpHeader->ip_src);
+               uiCheckSum += pIpHeader->ip_p + ntohs(pIpHeader->ip_len) - 
IP_HEADER_LENGTH(pIpHeader);
+               *pCheckSumField = htons(net_checksum_finish(uiCheckSum));
+       }
+}
+
+static void tap_ssnat_translate_ip(uint8_t* buf, size_t size, const struct 
in_addr from, const struct in_addr to, const struct in_addr mask)
+{
+       size_t packetSize = size;
+       uint8_t* pPacket = buf;
+
+       if(packetSize >= sizeof(struct iphdr))
+       {
+               struct iphdr* pIpHeader = (struct iphdr*)pPacket;
+               struct in_addr sourceAddress;
+               struct in_addr targetAddress;
+               int iCalculateCheckSum = 0;
+
+               sourceAddress.s_addr = pIpHeader->ip_src;
+               targetAddress.s_addr = pIpHeader->ip_dst;
+               if( SSNAT_IS_MATCH(sourceAddress, from, mask) )
+               {
+                       pIpHeader->ip_src = SSNAT_MAP_IP(sourceAddress, to, 
mask);
+                       iCalculateCheckSum = 1;
+               }
+
+               if( SSNAT_IS_MATCH(targetAddress, from, mask) )
+               {
+                       pIpHeader->ip_dst = SSNAT_MAP_IP(targetAddress, to, 
mask);
+                       iCalculateCheckSum = 1;
+               }
+
+               if(iCalculateCheckSum)
+               {
+                       tap_ssnat_adjust_ip_checksums(pPacket, packetSize);
+               }
+       }
+}
+
+static void tap_ssnat_translate(uint8_t* buf, size_t size, const struct 
in_addr from, const struct in_addr to, const struct in_addr mask)
+{
+       size_t packetSize = size;
+       uint8_t* pPacket = buf;
+
+       if(packetSize >= sizeof(struct ethhdr))
+       {
+               struct ethhdr *pEthernetHeader = (struct ethhdr*)pPacket;
+               pPacket += sizeof(struct ethhdr);
+               packetSize -= sizeof(struct ethhdr);
+               switch(htons(pEthernetHeader->h_proto))
+               {
+                       case ETH_P_ARP:
+                                       tap_ssnat_translate_arp(pPacket, 
packetSize, from, to, mask);
+                               break;
+
+                       case ETH_P_IP:
+                                       tap_ssnat_translate_ip(pPacket, 
packetSize, from, to, mask);
+                               break;
+               }
+       }
+}
+
+static void tap_ssnat_reveive(TAPState *s, uint8_t* buf, size_t size)
+{
+       if(s->ssnat_info.ssnat_active)
+       {
+               tap_ssnat_translate(buf, size, s->ssnat_info.ssnat_guest, 
s->ssnat_info.ssnat_host,s->ssnat_info.ssnat_mask);
+       }
+}
+
 static ssize_t tap_receive(NetClientState *nc, const uint8_t *buf, size_t size)
 {
     TAPState *s = DO_UPCAST(TAPState, nc, nc);
     struct iovec iov[1];

+       tap_ssnat_reveive(s, (uint8_t*)buf, size);
     if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
         return tap_receive_raw(nc, buf, size);
     }
@@ -189,6 +407,14 @@ static void tap_send_completed(NetClientState *nc, ssize_t 
len)
     tap_read_poll(s, 1);
 }

+static void tap_ssnat_send(TAPState *s, uint8_t *buf, size_t size)
+{
+       if(s->ssnat_info.ssnat_active)
+       {
+               tap_ssnat_translate(buf, size, s->ssnat_info.ssnat_host, 
s->ssnat_info.ssnat_guest, s->ssnat_info.ssnat_mask);
+       }
+}
+
 static void tap_send(void *opaque)
 {
     TAPState *s = opaque;
@@ -207,6 +433,8 @@ static void tap_send(void *opaque)
             size -= s->host_vnet_hdr_len;
         }

+               tap_ssnat_send(s, buf, size);
+
         size = qemu_send_packet_async(&s->nc, buf, size, tap_send_completed);
         if (size == 0) {
             tap_read_poll(s, 0);
@@ -586,6 +814,145 @@ static int net_tap_init(const NetdevTapOptions *tap, int 
*vnet_hdr,
     return fd;
 }

+static int get_str_sep(char *buf, int buf_size, const char **pp, int sep)
+{
+    const char *p, *p1;
+    int len;
+    p = *pp;
+    p1 = strchr(p, sep);
+    if (!p1)
+        return -1;
+    len = p1 - p;
+    p1++;
+    if (buf_size > 0) {
+        if (len > buf_size - 1)
+            len = buf_size - 1;
+        memcpy(buf, p, len);
+        buf[len] = '\0';
+    }
+    *pp = p1;
+    return 0;
+}
+
+static int ssnat_net_tap_set_ifinfo(const char* ifname, TAPState* s)
+{
+       int iReturnValue = -1;
+       struct ifreq ifr;
+       int fd = -1;
+
+       fd = socket(AF_INET, SOCK_DGRAM, 0);
+       if(fd > 0)
+       {
+               strncpy(ifr.ifr_name, ifname, IFNAMSIZ-1);
+
+               if(ioctl(fd, SIOCGIFHWADDR, &ifr) == 0)
+               {
+                       memcpy(s->ssnat_info.ssnat_hwaddr, 
(void*)ifr.ifr_hwaddr.sa_data, sizeof(s->ssnat_info.ssnat_hwaddr));
+
+                       if(ioctl(fd, SIOCGIFADDR, &ifr) == 0)
+                       {
+                               memcpy(&s->ssnat_info.ssnat_ifaddr, &(((struct 
sockaddr_in *)&ifr.ifr_addr)->sin_addr), sizeof(s->ssnat_info.ssnat_ifaddr));
+
+                               if(ioctl(fd, SIOCGIFNETMASK, &ifr) == 0)
+                               {
+                                       memcpy(&s->ssnat_info.ssnat_ifmask, 
&(((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr), 
sizeof(s->ssnat_info.ssnat_ifmask));
+                                       iReturnValue = 0;
+                               }
+                       }
+                       else
+                               error_report("failed to fetch the ip address of 
interface '%s'", ifname);
+               }
+               else
+                       error_report("failed to fetch the hardware address of 
interface '%s'", ifname);
+
+               close(fd);
+       }
+
+       return iReturnValue;
+}
+
+static int ssnat_net_tap_init(TAPState* s, const NetdevTapOptions *tap)
+{
+       int returnValue = -1;
+
+       if(tap->has_ssnat)
+       {
+               const char *ifname = NULL;
+               ifname = tap->ifname;
+               if(ifname)
+               {
+                       const char* ssnat_value = tap->ssnat;
+                       if(ssnat_net_tap_set_ifinfo(ifname, s) >= 0)
+                       {
+                               char ssnat_str[1024] = { 0 };
+                               const char* p = ssnat_value;
+
+                               pstrcpy(ssnat_str, sizeof(ssnat_str), 
ssnat_value);
+                               if(get_str_sep(ssnat_str, sizeof(ssnat_str), 
&p, ':') >= 0)
+                               {
+                                       if(ssnat_str[0])
+                                       {
+                                               if(inet_aton(ssnat_str, 
&s->ssnat_info.ssnat_guest))
+                                               {
+                                                       
if(get_str_sep(ssnat_str, sizeof(ssnat_str), &p, ':') >= 0)
+                                                       {
+                                                               if(ssnat_str[0])
+                                                               {
+                                                                       
if(inet_aton(ssnat_str, &s->ssnat_info.ssnat_mask))
+                                                                       {
+                                                                               
if(p[0])
+                                                                               
{
+                                                                               
        if(inet_aton(p, &s->ssnat_info.ssnat_host))
+                                                                               
        {
+                                                                               
                returnValue = 0;
+                                                                               
        }
+                                                                               
        else
+                                                                               
                error_report("invalid stateless static nat rule '%s', invalid 
host-side-ip", ssnat_value);
+                                                                               
}
+                                                                               
else
+                                                                               
        error_report("invalid stateless static nat rule '%s', empty 
host-side-ip", ssnat_value);
+                                                                       }
+                                                                       else
+                                                                               
error_report("invalid stateless static nat rule '%s', invalid netmask", 
ssnat_value);
+                                                               }
+                                                               else
+                                                                       
error_report("invalid stateless static nat rule '%s', empty netmask length", 
ssnat_value);
+                                                       }
+                                                       else
+                                                               
error_report("invalid stateless static nat rule '%s', incomplete rule", 
ssnat_value);
+                                               }
+                                               else
+                                                       error_report("invalid 
stateless static nat rule '%s', invalid guest-side-ip", ssnat_value);
+                                       }
+                                       else
+                                               error_report("invalid stateless 
static nat rule '%s', empty guest-side-ip", ssnat_value);
+                               }
+                               else
+                               {
+                                       if(inet_aton(p, 
&s->ssnat_info.ssnat_guest))
+                                       {
+                                               s->ssnat_info.ssnat_host = 
s->ssnat_info.ssnat_ifaddr;
+                                               s->ssnat_info.ssnat_mask = 
s->ssnat_info.ssnat_ifmask;
+
+                                               s->ssnat_info.ssnat_active = 1;
+                                               returnValue = 0;
+                                       }
+                                       else
+                                               error_report("invalid stateless 
static nat rule '%s'", ssnat_value);
+                               }
+                       }
+                       else
+                               error_report("failed to fetch the interface 
'%s' information", ifname);
+               }
+               else
+                       error_report("could not retrieve ifname attribute");
+       }
+       else
+               returnValue = 0;
+
+       return returnValue;
+}
+
 int net_init_tap(const NetClientOptions *opts, const char *name,
                  NetClientState *peer)
 {
@@ -705,7 +1072,7 @@ int net_init_tap(const NetClientOptions *opts, const char 
*name,
         return -1;
     }

-    return 0;
+    return ssnat_net_tap_init(s, tap);
 }

 VHostNetState *tap_get_vhost_net(NetClientState *nc)
diff --git a/qapi-schema.json b/qapi-schema.json
index bd8ad74..59aa127 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -2105,6 +2105,8 @@
 #
 # @vhostforce: #optional vhost on for non-MSIX virtio guests
 #
+# @ssnat: #optional stateless static nat
+#
 # Since 1.2
 ##
 { 'type': 'NetdevTapOptions',
@@ -2118,7 +2120,8 @@
     '*vnet_hdr':   'bool',
     '*vhost':      'bool',
     '*vhostfd':    'str',
-    '*vhostforce': 'bool' } }
+    '*vhostforce': 'bool',
+    '*ssnat':      'str' } }

 ##
 # @NetdevSocketOptions
diff --git a/qemu-options.hx b/qemu-options.hx
index 3c411c4..c0aa852 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1268,7 +1268,9 @@ DEF("net", HAS_ARG, QEMU_OPTION_net,
     "-net tap[,vlan=n][,name=str],ifname=name\n"
     "                connect the host TAP network interface to VLAN 'n'\n"
 #else
-    "-net 
tap[,vlan=n][,name=str][,fd=h][,ifname=name][,script=file][,downscript=dfile][,helper=helper][,sndbuf=nbytes][,vnet_hdr=on|off][,vhost=on|off][,vhostfd=h][,vhostforce=on|off]\n"
+    "-net 
tap[,vlan=n][,name=str][,fd=h][,ifname=name][,script=file][,downscript=dfile]\n"
+    "        
[,helper=helper][,sndbuf=nbytes][,vnet_hdr=on|off][,vhost=on|off][,vhostfd=h]\n"
+    "        [,vhostforce=on|off][,ssnat=rule]\n"
     "                connect the host TAP network interface to VLAN 'n' \n"
     "                use network scripts 'file' (default=" 
DEFAULT_NETWORK_SCRIPT ")\n"
     "                to configure it and 'dfile' (default=" 
DEFAULT_NETWORK_DOWN_SCRIPT ")\n"
@@ -1285,6 +1287,12 @@ DEF("net", HAS_ARG, QEMU_OPTION_net,
     "                    (only has effect for virtio guests which use MSIX)\n"
     "                use vhostforce=on to force vhost on for non-MSIX virtio 
guests\n"
     "                use 'vhostfd=h' to connect to an already opened vhost net 
device\n"
+    "                use 'ssnat=rule' to create stateless static nat\n"
+    "                    rule: <guest-side-ip>[:<netmask>:<host-side-ip>]\n"
+    "                    for example: 
'ssnat=172.16.0.0:255.255.255.0:192.168.1.0' will result in translating\n"
+    "                                 the Guest machine IP addresses from 
172.16.0.x to 192.168.1.x\n"
+    "                      If only the guest-side-ip parameter is passed, the 
netmask and host-side-ip\n"
+    "                      will be taken from the interface passed via 
ifname\n"
     "-net bridge[,vlan=n][,name=str][,br=bridge][,helper=helper]\n"
     "                connects a host TAP network interface to a host bridge 
device 'br'\n"
     "                (default=" DEFAULT_BRIDGE_INTERFACE ") using the program 
'helper'\n"
--
1.7.2.5




reply via email to

[Prev in Thread] Current Thread [Next in Thread]