[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] Adding support for Stateless Static NAT for TAP devices
From: |
Blue Swirl |
Subject: |
Re: [Qemu-devel] Adding support for Stateless Static NAT for TAP devices |
Date: |
Sat, 1 Sep 2012 10:37:00 +0000 |
On Thu, Aug 30, 2012 at 6:12 AM, John Basila <address@hidden> wrote:
> When running multiple instances of QEMU from the same image file
> (using -snapshot) and connecting each instance to a dedicated TAP
> device, the Guest OS will most likely not be able to communicate
> with the outside world as all packets leave the Guest OS from the
> same IP and thus the Host OS will have difficulty returning the
> packets to the correct TAP device/Guest OS. Stateless Static
> Network Address Translation or SSNAT allows the QEMU to map the
> network of the Guest OS to the network of the TAP device allowing
> a unique IP address for each Guest OS that ease such case.
> The only mandatory argument to the SSNAT is the Guest OS network
> IP, the rest will be figured out from the underlying TAP device.
>
> Signed-off-by: John Basila <address@hidden>
> ---
> net/tap.c | 369
> +++++++++++++++++++++++++++++++++++++++++++++++++++++-
> qapi-schema.json | 5 +-
> qemu-options.hx | 10 ++-
> 3 files changed, 381 insertions(+), 3 deletions(-)
>
> diff --git a/net/tap.c b/net/tap.c
> index 1971525..2408a49 100644
> --- a/net/tap.c
> +++ b/net/tap.c
> @@ -39,16 +39,88 @@
> #include "qemu-char.h"
> #include "qemu-common.h"
> #include "qemu-error.h"
> +#include "qemu_socket.h"
>
> #include "net/tap-linux.h"
>
> #include "hw/vhost_net.h"
>
> +#include "checksum.h"
> +
> +#define ETH_P_ARP 0x0806 /* Address Resolution packet */
> +#define ETH_P_IP 0x0800 /* Internet Protocol packet */
> +#define ETH_P_IPV6 0x86DD /* IPv6 over blueblook */
Probably not used. Would it be hard to add NAT for IPv6 too?
> +
> +#define ETH_ALEN 6
> +
> +struct ethhdr {
> + unsigned char h_dest[ETH_ALEN]; /* destination eth addr */
> + unsigned char h_source[ETH_ALEN]; /* source ether addr */
> + unsigned short h_proto; /* packet type ID field */
> +};
The definitions probably conflict with system headers. Can you use
those or slirp headers instead?
> +
> +#define IP_PROTO_TCP 6
> +#define IP_PROTO_UDP 17
> +#define IPV4_ADRESS_LENGTH 4
> +
> +struct arphdr {
> + unsigned short ar_hrd; /* format of hardware address */
> + unsigned short ar_pro; /* format of protocol address */
> + unsigned char ar_hln; /* length of hardware address */
> + unsigned char ar_pln; /* length of protocol address */
> + unsigned short ar_op; /* ARP opcode (command) */
> +
> + /*
> + * Ethernet looks like this : This bit is variable sized however...
> + */
> + unsigned char ar_sha[ETH_ALEN]; /* sender hardware
> address */
> + unsigned char ar_sip[IPV4_ADRESS_LENGTH]; /* sender IP address */
> + unsigned char ar_tha[ETH_ALEN]; /* target hardware
> address */
> + unsigned char ar_tip[IPV4_ADRESS_LENGTH]; /* target IP address */
> +};
> +
> +#define IP_HEADER_LENGTH(ip) (((ip->ip_hlv)&0xf) << 2)
Please add spaces around operators, like '&' here. I think
checkpatch.pl should catch this.
Macro arguments ('ip' here) should be surrounded by parentheses when used.
> +
> +/** An IPv4 packet header */
> +struct iphdr {
> + uint8_t ip_hlv; /**< Header length and version of the header
> */
Remove '*<'
> + uint8_t ip_tos; /**< Type of Service
> */
> + uint16_t ip_len; /**< Length in octets, inlc. this header and
> data */
incl
> + uint16_t ip_id; /**< ID is used to aid in assembling framents
> */
fragments
> + uint16_t ip_off; /**< Info about fragmentation (control, offset)
> */
> + uint8_t ip_ttl; /**< Time to Live
> */
> + uint8_t ip_p; /**< Next level protocol type
> */
> + uint16_t ip_sum; /**< Header checksum
> */
> + uint32_t ip_src; /**< Source IP address
> */
> + uint32_t ip_dst; /**< Destination IP address
> */
> +};
> +
> +/** UDP packet header */
> +typedef struct udphdr {
> + uint16_t uh_sport; /* source port */
> + uint16_t uh_dport; /* destination port */
> + uint16_t uh_ulen; /* udp length */
> + uint16_t uh_chksum;/* udp checksum */
> +} udp_header;
> +
> +
> /* Maximum GSO packet size (64k) plus plenty of room for
> * the ethernet and virtio_net headers
> */
> #define TAP_BUFSIZE (4096 + 65536)
>
> +typedef struct SSNATInfo {
> + unsigned int ssnat_active : 1;
bool
> +
> + struct in_addr ssnat_ifaddr;
> + struct in_addr ssnat_ifmask;
> + uint8_t ssnat_hwaddr[ETH_ALEN];
> +
> + struct in_addr ssnat_guest;
> + struct in_addr ssnat_host;
> + struct in_addr ssnat_mask;
> +} SSNATInfo;
> +
> typedef struct TAPState {
> NetClientState nc;
> int fd;
> @@ -59,6 +131,9 @@ typedef struct TAPState {
> unsigned int write_poll : 1;
> unsigned int using_vnet_hdr : 1;
> unsigned int has_ufo: 1;
> +
> + SSNATInfo ssnat_info;
This only accommodates a single rule, but it would be nice to support several.
> +
> VHostNetState *vhost_net;
> unsigned host_vnet_hdr_len;
> } TAPState;
> @@ -154,11 +229,154 @@ static ssize_t tap_receive_raw(NetClientState *nc,
> const uint8_t *buf, size_t si
> return tap_write_packet(s, iov, iovcnt);
> }
>
> +#define SSNAT_MAP_IP(_orig, _to, _mask) ( (_orig.s_addr & ~_mask.s_addr) |
> (_to.s_addr & _mask.s_addr) )
> +#define SSNAT_IS_MATCH(_orig, _from, _mask) ( (_orig.s_addr &
> _mask.s_addr) == (_from.s_addr & _mask.s_addr) )
> +
> +static void tap_ssnat_translate_arp(uint8_t* buf, size_t size, const struct
> in_addr from, const struct in_addr to, const struct in_addr mask)
> +{
> + size_t packetSize = size;
Please don't use Hungarian notation. Only struct and typedef names
should use CamelCase. The default indent is 4 and maximum line length
80 chars. Tabs may not be used. Maybe you have not read CODING_STYLE?
> + uint8_t* pPacket = buf;
> +
> + if(packetSize >= sizeof(struct arphdr))
Space after 'if', please.
> + {
Braces must cuddle with the line with 'if', 'else' etc.
> + struct arphdr* pArpHeader = (struct arphdr*)pPacket;
> + struct in_addr sourceAddress;
> + struct in_addr targetAddress;
> +
> + memcpy(&sourceAddress, pArpHeader->ar_sip,
> sizeof(sourceAddress));
> + if( SSNAT_IS_MATCH(sourceAddress, from, mask) )
> + {
> + sourceAddress.s_addr = SSNAT_MAP_IP(sourceAddress,
> to, mask);
> + memcpy(pArpHeader->ar_sip, &sourceAddress,
> sizeof(sourceAddress));
> + }
> +
> + memcpy(&targetAddress, pArpHeader->ar_tip,
> sizeof(targetAddress));
> + if( SSNAT_IS_MATCH(targetAddress, from, mask) )
> + {
> + targetAddress.s_addr = SSNAT_MAP_IP(targetAddress,
> to, mask);
> + memcpy(pArpHeader->ar_tip, &targetAddress,
> sizeof(targetAddress));
> + }
> + }
> +}
> +
> +static void tap_ssnat_adjust_ip_checksums(uint8_t* pBuffer, const size_t
> size)
> +{
> + size_t packetSize = size;
> + uint8_t* pPacket = pBuffer;
> +
> + if(packetSize >= sizeof(struct iphdr))
> + {
> + struct iphdr* pIpHeader = (struct iphdr*)pPacket;
> + uint16_t* pCheckSumField = NULL;
> + uint32_t uiCheckSum = 0;
> +
> + pIpHeader->ip_sum = 0;
> + if(IP_HEADER_LENGTH(pIpHeader) <= packetSize)
> + {
> + /*pIpHeader->ip_sum =
> tap_ssnat_calculate_checksum(pIpHeader, IP_HEADER_LENGTH(pIpHeader));*/
Why commented out code? Remove.
> +
> + uiCheckSum =
> net_checksum_add(IP_HEADER_LENGTH(pIpHeader), (uint8_t*)pIpHeader);
> + pIpHeader->ip_sum =
> htons(net_checksum_finish(uiCheckSum));
> + }
> +
> + switch(pIpHeader->ip_p)
> + {
> + case IP_PROTO_TCP:
Case labels should align with switch statement, the code block is indented.
> + {
> + struct tcphdr* pTcpHeader = (struct
> tcphdr*)(pPacket + IP_HEADER_LENGTH(pIpHeader));
> + pCheckSumField = &pTcpHeader->check;
> + break;
> + }
> +
> + case IP_PROTO_UDP:
> + {
> + struct udphdr* pUdpHeader = (struct
> udphdr*)(pPacket + IP_HEADER_LENGTH(pIpHeader));
> + pCheckSumField = &pUdpHeader->uh_chksum;
> + break;
> + }
> +
> + default:
> + return;
> + }
> +
> + *pCheckSumField = 0;
> + uiCheckSum = net_checksum_add(ntohs(pIpHeader->ip_len) -
> IP_HEADER_LENGTH(pIpHeader), pPacket + IP_HEADER_LENGTH(pIpHeader));
> + uiCheckSum += net_checksum_add(sizeof(pIpHeader->ip_src) +
> sizeof(pIpHeader->ip_dst), (uint8_t*)&pIpHeader->ip_src);
> + uiCheckSum += pIpHeader->ip_p + ntohs(pIpHeader->ip_len) -
> IP_HEADER_LENGTH(pIpHeader);
> + *pCheckSumField = htons(net_checksum_finish(uiCheckSum));
> + }
> +}
> +
> +static void tap_ssnat_translate_ip(uint8_t* buf, size_t size, const struct
> in_addr from, const struct in_addr to, const struct in_addr mask)
> +{
> + size_t packetSize = size;
> + uint8_t* pPacket = buf;
> +
> + if(packetSize >= sizeof(struct iphdr))
> + {
> + struct iphdr* pIpHeader = (struct iphdr*)pPacket;
> + struct in_addr sourceAddress;
> + struct in_addr targetAddress;
> + int iCalculateCheckSum = 0;
'bool'
> +
> + sourceAddress.s_addr = pIpHeader->ip_src;
> + targetAddress.s_addr = pIpHeader->ip_dst;
> + if( SSNAT_IS_MATCH(sourceAddress, from, mask) )
> + {
> + pIpHeader->ip_src = SSNAT_MAP_IP(sourceAddress, to,
> mask);
> + iCalculateCheckSum = 1;
'true'
> + }
> +
> + if( SSNAT_IS_MATCH(targetAddress, from, mask) )
> + {
> + pIpHeader->ip_dst = SSNAT_MAP_IP(targetAddress, to,
> mask);
> + iCalculateCheckSum = 1;
> + }
> +
> + if(iCalculateCheckSum)
> + {
> + tap_ssnat_adjust_ip_checksums(pPacket, packetSize);
> + }
> + }
> +}
> +
> +static void tap_ssnat_translate(uint8_t* buf, size_t size, const struct
> in_addr from, const struct in_addr to, const struct in_addr mask)
> +{
> + size_t packetSize = size;
> + uint8_t* pPacket = buf;
> +
> + if(packetSize >= sizeof(struct ethhdr))
> + {
> + struct ethhdr *pEthernetHeader = (struct ethhdr*)pPacket;
> + pPacket += sizeof(struct ethhdr);
> + packetSize -= sizeof(struct ethhdr);
> + switch(htons(pEthernetHeader->h_proto))
> + {
> + case ETH_P_ARP:
> + tap_ssnat_translate_arp(pPacket,
> packetSize, from, to, mask);
> + break;
The whole code block should use the same indent (except for case label).
> +
> + case ETH_P_IP:
> + tap_ssnat_translate_ip(pPacket,
> packetSize, from, to, mask);
> + break;
> + }
> + }
> +}
> +
> +static void tap_ssnat_reveive(TAPState *s, uint8_t* buf, size_t size)
'receive', but I'd just put the check to tap_receive() and call
tap_ssnat_translate() from there.
> +{
> + if(s->ssnat_info.ssnat_active)
> + {
> + tap_ssnat_translate(buf, size, s->ssnat_info.ssnat_guest,
> s->ssnat_info.ssnat_host,s->ssnat_info.ssnat_mask);
> + }
> +}
> +
> static ssize_t tap_receive(NetClientState *nc, const uint8_t *buf, size_t
> size)
> {
> TAPState *s = DO_UPCAST(TAPState, nc, nc);
> struct iovec iov[1];
>
> + tap_ssnat_reveive(s, (uint8_t*)buf, size);
> if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
> return tap_receive_raw(nc, buf, size);
> }
> @@ -189,6 +407,14 @@ static void tap_send_completed(NetClientState *nc,
> ssize_t len)
> tap_read_poll(s, 1);
> }
>
> +static void tap_ssnat_send(TAPState *s, uint8_t *buf, size_t size)
> +{
> + if(s->ssnat_info.ssnat_active)
> + {
> + tap_ssnat_translate(buf, size, s->ssnat_info.ssnat_host,
> s->ssnat_info.ssnat_guest, s->ssnat_info.ssnat_mask);
> + }
> +}
> +
> static void tap_send(void *opaque)
> {
> TAPState *s = opaque;
> @@ -207,6 +433,8 @@ static void tap_send(void *opaque)
> size -= s->host_vnet_hdr_len;
> }
>
> + tap_ssnat_send(s, buf, size);
> +
> size = qemu_send_packet_async(&s->nc, buf, size, tap_send_completed);
> if (size == 0) {
> tap_read_poll(s, 0);
> @@ -586,6 +814,145 @@ static int net_tap_init(const NetdevTapOptions *tap,
> int *vnet_hdr,
> return fd;
> }
>
> +static int get_str_sep(char *buf, int buf_size, const char **pp, int sep)
Probably other net parsing functions, strtok() or GLib string
functions can be reused instead.
> +{
> + const char *p, *p1;
Odd indentation.
> + int len;
> + p = *pp;
> + p1 = strchr(p, sep);
> + if (!p1)
Missing braces.
> + return -1;
> + len = p1 - p;
> + p1++;
> + if (buf_size > 0) {
> + if (len > buf_size - 1)
Ditto.
> + len = buf_size - 1;
> + memcpy(buf, p, len);
> + buf[len] = '\0';
> + }
> + *pp = p1;
> + return 0;
> +}
> +
> +static int ssnat_net_tap_set_ifinfo(const char* ifname, TAPState* s)
Return bool? 'int' could be useful for errno passing.
> +{
> + int iReturnValue = -1;
> + struct ifreq ifr;
> + int fd = -1;
> +
> + fd = socket(AF_INET, SOCK_DGRAM, 0);
> + if(fd > 0)
>=
> + {
> + strncpy(ifr.ifr_name, ifname, IFNAMSIZ-1);
Missing NUL termination, please use pstrcpy().
> +
> + if(ioctl(fd, SIOCGIFHWADDR, &ifr) == 0)
> + {
> + memcpy(s->ssnat_info.ssnat_hwaddr,
> (void*)ifr.ifr_hwaddr.sa_data, sizeof(s->ssnat_info.ssnat_hwaddr));
> +
> + if(ioctl(fd, SIOCGIFADDR, &ifr) == 0)
> + {
> + memcpy(&s->ssnat_info.ssnat_ifaddr,
> &(((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr),
> sizeof(s->ssnat_info.ssnat_ifaddr));
> +
> + if(ioctl(fd, SIOCGIFNETMASK, &ifr) == 0)
> + {
> + memcpy(&s->ssnat_info.ssnat_ifmask,
> &(((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr),
> sizeof(s->ssnat_info.ssnat_ifmask));
> + iReturnValue = 0;
> + }
> + }
> + else
'else' must cuddle with the previous '}', missing braces for the block below.
> + error_report("failed to fetch the ip address
> of interface '%s'", ifname);
> + }
> + else
> + error_report("failed to fetch the hardware address of
> interface '%s'", ifname);
> +
> + close(fd);
> + }
> +
> + return iReturnValue;
> +}
> +
> +static int ssnat_net_tap_init(TAPState* s, const NetdevTapOptions *tap)
> +{
> + int returnValue = -1;
> +
> + if(tap->has_ssnat)
> + {
> + const char *ifname = NULL;
> + ifname = tap->ifname;
> + if(ifname)
> + {
> + const char* ssnat_value = tap->ssnat;
> + if(ssnat_net_tap_set_ifinfo(ifname, s) >= 0)
> + {
> + char ssnat_str[1024] = { 0 };
> + const char* p = ssnat_value;
> +
> + pstrcpy(ssnat_str, sizeof(ssnat_str),
> ssnat_value);
> + if(get_str_sep(ssnat_str, sizeof(ssnat_str),
> &p, ':') >= 0)
> + {
> + if(ssnat_str[0])
> + {
> + if(inet_aton(ssnat_str,
> &s->ssnat_info.ssnat_guest))
Please use getaddrinfo() instead.
> + {
> +
> if(get_str_sep(ssnat_str, sizeof(ssnat_str), &p, ':') >= 0)
> + {
> +
> if(ssnat_str[0])
> + {
> +
> if(inet_aton(ssnat_str, &s->ssnat_info.ssnat_mask))
It would be nice to support /24 style masks too.
> + {
> +
> if(p[0])
> +
> {
> +
> if(inet_aton(p, &s->ssnat_info.ssnat_host))
> +
> {
Pretty deep nesting. Please combine some expressions with '&&'.
> +
> returnValue = 0;
> +
> }
> +
> else
> +
> error_report("invalid stateless static nat rule '%s',
> invalid host-side-ip", ssnat_value);
> +
> }
> +
> else
> +
> error_report("invalid stateless static nat rule '%s', empty
> host-side-ip", ssnat_value);
> + }
> + else
> +
> error_report("invalid stateless static nat rule '%s', invalid netmask",
> ssnat_value);
> + }
> + else
> +
> error_report("invalid stateless static nat rule '%s', empty netmask length",
> ssnat_value);
> + }
> + else
> +
> error_report("invalid stateless static nat rule '%s', incomplete rule",
> ssnat_value);
> + }
> + else
> + error_report("invalid
> stateless static nat rule '%s', invalid guest-side-ip", ssnat_value);
> + }
> + else
> + error_report("invalid
> stateless static nat rule '%s', empty guest-side-ip", ssnat_value);
> + }
> + else
> + {
> + if(inet_aton(p,
> &s->ssnat_info.ssnat_guest))
> + {
> + s->ssnat_info.ssnat_host =
> s->ssnat_info.ssnat_ifaddr;
> + s->ssnat_info.ssnat_mask =
> s->ssnat_info.ssnat_ifmask;
> +
> + s->ssnat_info.ssnat_active =
> 1;
> + returnValue = 0;
> + }
> + else
> + error_report("invalid
> stateless static nat rule '%s'", ssnat_value);
> + }
> + }
> + else
> + error_report("failed to fetch the interface
> '%s' information", ifname);
> + }
> + else
> + error_report("could not retrieve ifname attribute");
> + }
> + else
> + returnValue = 0;
> +
> + return returnValue;
> +}
> +
> int net_init_tap(const NetClientOptions *opts, const char *name,
> NetClientState *peer)
> {
> @@ -705,7 +1072,7 @@ int net_init_tap(const NetClientOptions *opts, const
> char *name,
> return -1;
> }
>
> - return 0;
> + return ssnat_net_tap_init(s, tap);
> }
>
> VHostNetState *tap_get_vhost_net(NetClientState *nc)
> diff --git a/qapi-schema.json b/qapi-schema.json
> index bd8ad74..59aa127 100644
> --- a/qapi-schema.json
> +++ b/qapi-schema.json
> @@ -2105,6 +2105,8 @@
> #
> # @vhostforce: #optional vhost on for non-MSIX virtio guests
> #
> +# @ssnat: #optional stateless static nat
NAT
> +#
> # Since 1.2
> ##
> { 'type': 'NetdevTapOptions',
> @@ -2118,7 +2120,8 @@
> '*vnet_hdr': 'bool',
> '*vhost': 'bool',
> '*vhostfd': 'str',
> - '*vhostforce': 'bool' } }
> + '*vhostforce': 'bool',
> + '*ssnat': 'str' } }
>
> ##
> # @NetdevSocketOptions
> diff --git a/qemu-options.hx b/qemu-options.hx
> index 3c411c4..c0aa852 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -1268,7 +1268,9 @@ DEF("net", HAS_ARG, QEMU_OPTION_net,
> "-net tap[,vlan=n][,name=str],ifname=name\n"
> " connect the host TAP network interface to VLAN 'n'\n"
> #else
> - "-net
> tap[,vlan=n][,name=str][,fd=h][,ifname=name][,script=file][,downscript=dfile][,helper=helper][,sndbuf=nbytes][,vnet_hdr=on|off][,vhost=on|off][,vhostfd=h][,vhostforce=on|off]\n"
> + "-net
> tap[,vlan=n][,name=str][,fd=h][,ifname=name][,script=file][,downscript=dfile]\n"
> + "
> [,helper=helper][,sndbuf=nbytes][,vnet_hdr=on|off][,vhost=on|off][,vhostfd=h]\n"
> + " [,vhostforce=on|off][,ssnat=rule]\n"
> " connect the host TAP network interface to VLAN 'n' \n"
> " use network scripts 'file' (default="
> DEFAULT_NETWORK_SCRIPT ")\n"
> " to configure it and 'dfile' (default="
> DEFAULT_NETWORK_DOWN_SCRIPT ")\n"
> @@ -1285,6 +1287,12 @@ DEF("net", HAS_ARG, QEMU_OPTION_net,
> " (only has effect for virtio guests which use
> MSIX)\n"
> " use vhostforce=on to force vhost on for non-MSIX virtio
> guests\n"
> " use 'vhostfd=h' to connect to an already opened vhost
> net device\n"
> + " use 'ssnat=rule' to create stateless static nat\n"
> + " rule: <guest-side-ip>[:<netmask>:<host-side-ip>]\n"
> + " for example:
> 'ssnat=172.16.0.0:255.255.255.0:192.168.1.0' will result in translating\n"
> + " the Guest machine IP addresses from
> 172.16.0.x to 192.168.1.x\n"
guest
> + " If only the guest-side-ip parameter is passed,
> the netmask and host-side-ip\n"
> + " will be taken from the interface passed via
> ifname\n"
> "-net bridge[,vlan=n][,name=str][,br=bridge][,helper=helper]\n"
> " connects a host TAP network interface to a host bridge
> device 'br'\n"
> " (default=" DEFAULT_BRIDGE_INTERFACE ") using the
> program 'helper'\n"
> --
> 1.7.2.5
>
>
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- Re: [Qemu-devel] Adding support for Stateless Static NAT for TAP devices,
Blue Swirl <=