Re: [Qemu-devel] [PATCH RFC 1/3] linux-user: add rtnetlink(7) support

qemu-devel
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH RFC 1/3] linux-user: add rtnetlink(7) support

From:	Laurent Vivier
Subject:	Re: [Qemu-devel] [PATCH RFC 1/3] linux-user: add rtnetlink(7) support
Date:	Sat, 14 May 2016 11:37:26 +0200
User-agent:	Mozilla/5.0 (X11; Linux x86_64; rv:45.0) Gecko/20100101 Thunderbird/45.0

Le 13/05/2016 à 18:40, Peter Maydell a écrit :
> On 30 January 2016 at 22:26, Laurent Vivier <address@hidden> wrote:
>> rtnetlink is needed to use iproute package (ip addr, ip route)
>> and dhcp client.
>>
>> Examples:
>>
>> Without this patch:
>>     # ip link
>>     Cannot open netlink socket: Address family not supported by protocol
>>     # ip addr
>>     Cannot open netlink socket: Address family not supported by protocol
>>     # ip route
>>     Cannot open netlink socket: Address family not supported by protocol
>>     # dhclient eth0
>>     Cannot open netlink socket: Address family not supported by protocol
>>     Cannot open netlink socket: Address family not supported by protocol
>>
>> With this patch:
>>     # ip link
>>     1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN mode 
>> DEFAULT
>>         link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
>>     51: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel 
>> state UP mode DEFAULT qlen 1000
>>         link/ether 00:16:3e:89:6b:d7 brd ff:ff:ff:ff:ff:ff
>>     # ip addr show eth0
>>     51: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel 
>> state UP qlen 1000
>>         link/ether 00:16:3e:89:6b:d7 brd ff:ff:ff:ff:ff:ff
>>         inet 192.168.122.197/24 brd 192.168.122.255 scope global eth0
>>            valid_lft forever preferred_lft forever
>>         inet6 fe80::216:3eff:fe89:6bd7/64 scope link
>>            valid_lft forever preferred_lft forever
>>     # ip route
>>     default via 192.168.122.1 dev eth0
>>     192.168.122.0/24 dev eth0  proto kernel  scope link  src 192.168.122.197
>>     # ip addr flush eth0
>>     # ip addr add 192.168.122.10 dev eth0
>>     # ip addr show eth0
>>     51: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel 
>> state UP qlen 1000
>>         link/ether 00:16:3e:89:6b:d7 brd ff:ff:ff:ff:ff:ff
>>         inet 192.168.122.10/32 scope global eth0
>>            valid_lft forever preferred_lft forever
>>     # ip route add 192.168.122.0/24 via 192.168.122.10
>>     # ip route
>>         192.168.122.0/24 via 192.168.122.10 dev eth0
>>
>> Signed-off-by: Laurent Vivier <address@hidden>
>> ---
>>  linux-user/syscall.c | 477 
>> ++++++++++++++++++++++++++++++++++++++++++++++++++-
>>  1 file changed, 471 insertions(+), 6 deletions(-)
> 
> Apologies for not getting to this patch earlier. Mostly it looks
> OK but I have a few questions/suggestions below.
> 
>> diff --git a/linux-user/syscall.c b/linux-user/syscall.c
>> index dac5518..a1ed2f5 100644
>> --- a/linux-user/syscall.c
>> +++ b/linux-user/syscall.c
>> @@ -100,6 +100,8 @@ int __clone2(int (*fn)(void *), void *child_stack_base,
>>  #include <linux/filter.h>
>>  #include <linux/blkpg.h>
>>  #include "linux_loop.h"
>> +#include <linux/netlink.h>
>> +#include <linux/rtnetlink.h>
>>  #include "uname.h"
>>
>>  #include "qemu.h"
>> @@ -295,6 +297,14 @@ static TargetFdTrans **target_fd_trans;
>>
>>  static unsigned int target_fd_max;
>>
>> +static TargetFdDataFunc fd_trans_target_to_host_data(int fd)
>> +{
>> +    if (fd >= 0 && fd < target_fd_max && target_fd_trans[fd]) {
>> +        return target_fd_trans[fd]->target_to_host_data;
>> +    }
>> +    return NULL;
>> +}
>> +
>>  static TargetFdDataFunc fd_trans_host_to_target_data(int fd)
>>  {
>>      if (fd >= 0 && fd < target_fd_max && target_fd_trans[fd]) {
>> @@ -1222,6 +1232,11 @@ static inline abi_long 
>> host_to_target_sockaddr(abi_ulong target_addr,
>>          return -TARGET_EFAULT;
>>      memcpy(target_saddr, addr, len);
>>      target_saddr->sa_family = tswap16(addr->sa_family);
>> +    if (addr->sa_family == AF_NETLINK) {
>> +        struct sockaddr_nl *target_nl = (struct sockaddr_nl *)target_saddr;
>> +        target_nl->nl_pid = tswap32(target_nl->nl_pid);
>> +        target_nl->nl_groups = tswap32(target_nl->nl_groups);
>> +    }
>>      unlock_user(target_saddr, target_addr, len);
>>
>>      return 0;
>> @@ -1453,6 +1468,416 @@ static inline abi_long host_to_target_cmsg(struct 
>> target_msghdr *target_msgh,
>>      return 0;
>>  }
>>
>> +static void tswap_nlmsghdr(struct nlmsghdr *nlh)
>> +{
>> +    nlh->nlmsg_len = tswap32(nlh->nlmsg_len);
>> +    nlh->nlmsg_type = tswap16(nlh->nlmsg_type);
>> +    nlh->nlmsg_flags = tswap16(nlh->nlmsg_flags);
>> +    nlh->nlmsg_seq = tswap32(nlh->nlmsg_seq);
>> +    nlh->nlmsg_pid = tswap32(nlh->nlmsg_pid);
>> +}
>> +
>> +static abi_long host_to_target_for_each_nlmsg(struct nlmsghdr *nlh,
>> +                                              size_t len,
>> +                                              abi_long 
>> (*host_to_target_nlmsg)
>> +                                                       (struct nlmsghdr *))
>> +{
>> +    uint32_t nlmsg_len;
>> +    abi_long ret;
>> +
>> +    while (len > (int)sizeof(struct nlmsghdr)) {
> 
> What is this cast to int for ?
>

I agree it seems useless, I have copied some parts from the kernel :

/usr/include/linux/netlink.h

#define NLMSG_OK(nlh,len) ((len) >= (int)sizeof(struct nlmsghdr) && \
...

so the "(int)" comes from this.

>> +
>> +        nlmsg_len = nlh->nlmsg_len;
>> +        if (nlmsg_len < sizeof(struct nlmsghdr) ||
>> +            nlmsg_len > len) {
>> +            break;
>> +        }
>> +
>> +        if (nlh->nlmsg_type == NLMSG_DONE) {
>> +            tswap_nlmsghdr(nlh);
>> +            break;
>> +        }
> 
> Why not put this one inside the switch too? (you'd just need to
> change the 'break' to 'return 0'.)

You're right.
The idea was to break the while without exiting the function.

> 
>> +        switch (nlh->nlmsg_type) {
>> +        case NLMSG_NOOP:
>> +            break;
>> +        case NLMSG_ERROR: {
> 
> Odd brace placement.

OK

>> +                struct nlmsgerr *e = NLMSG_DATA(nlh);
>> +                e->error = tswap32(e->error);
>> +                tswap_nlmsghdr(&e->msg);
>> +                tswap_nlmsghdr(nlh);
>> +            }
>> +            return 0;
>> +        default:
>> +            ret = host_to_target_nlmsg(nlh);
>> +            if (ret < 0) {
>> +                tswap_nlmsghdr(nlh);
>> +                return ret;
>> +            }
>> +            break;
>> +        }
>> +        tswap_nlmsghdr(nlh);
>> +        len -= NLMSG_ALIGN(nlmsg_len);
>> +        nlh = (struct nlmsghdr *)(((char*)nlh) + NLMSG_ALIGN(nlmsg_len));
>> +    }
>> +    return 0;
>> +}
>> +
>> +static abi_long target_to_host_for_each_nlmsg(struct nlmsghdr *nlh,
>> +                                              size_t len,
>> +                                              abi_long 
>> (*target_to_host_nlmsg)
>> +                                                       (struct nlmsghdr *))
>> +{
>> +    int ret;
>> +
>> +    while (len > (int)sizeof(struct nlmsghdr)) {
>> +        if (tswap32(nlh->nlmsg_len) < sizeof(struct nlmsghdr) ||
>> +            tswap32(nlh->nlmsg_len) > len) {
>> +            break;
>> +        }
>> +        tswap_nlmsghdr(nlh);
>> +        if (nlh->nlmsg_type == NLMSG_DONE) {
>> +            break;
>> +        }
>> +        switch (nlh->nlmsg_type) {
>> +        case NLMSG_NOOP:
>> +            break;
>> +        case NLMSG_ERROR: {
>> +                struct nlmsgerr *e = NLMSG_DATA(nlh);
>> +                e->error = tswap32(e->error);
>> +                tswap_nlmsghdr(&e->msg);
>> +            }
>> +        default:
>> +            ret = target_to_host_nlmsg(nlh);
>> +            if (ret < 0) {
>> +                return ret;
>> +            }
>> +        }
>> +        len -= NLMSG_ALIGN(nlh->nlmsg_len);
>> +        nlh = (struct nlmsghdr *)(((char *)nlh) + 
>> NLMSG_ALIGN(nlh->nlmsg_len));
> 
> You could use 'nlh = NLMSG_NEXT(nlh, len);' here rather than
> manually adjusting len and nlh (though maybe you prefer the
> parallel with the host_to_target function, and there we can't
> use NLMSG_NEXT).

Yes, this is the reason.

>> +    }
>> +    return 0;
>> +}
>> +
>> +static abi_long host_to_target_for_each_rtattr(struct rtattr *rtattr,
>> +                                               size_t len,
>> +                                               abi_long 
>> (*host_to_target_rtattr)
>> +                                                        (struct rtattr *))
>> +{
>> +    unsigned short rta_len;
>> +    abi_long ret;
>> +
>> +    while (len > (int)sizeof(struct rtattr)) {
>> +        rta_len = rtattr->rta_len;
>> +        if (rta_len < sizeof(struct rtattr) ||
>> +            rta_len > len) {
>> +            rtattr->rta_len = tswap16(rtattr->rta_len);
>> +            rtattr->rta_type = tswap16(rtattr->rta_type);
> 
> In the code above for iterating nlmsgs, we don't try to
> touch the fields if the length claims to be out of bounds;
> here we do. Why the difference ?

This series has the "RFC" tag because there are some issues with them,
and in some cases I've tried to add workaround when the guest client
crashes. And I think it was the case for the RTATTR scan and not for the
NLMSG case. I will remove this here and check what happens.

>> +            break;
>> +        }
>> +        ret = host_to_target_rtattr(rtattr);
>> +        rtattr->rta_len = tswap16(rtattr->rta_len);
>> +        rtattr->rta_type = tswap16(rtattr->rta_type);
>> +        if (ret < 0) {
>> +            return ret;
>> +        }
>> +        len -= RTA_ALIGN(rta_len);
>> +        rtattr = (struct rtattr *)(((char *)rtattr) + RTA_ALIGN(rta_len));
>> +    }
>> +    return 0;
>> +}
>> +
>> +static abi_long host_to_target_data_link_rtattr(struct rtattr *rtattr)
>> +{
>> +    uint32_t *u32;
>> +
>> +    switch (rtattr->rta_type) {
>> +    case IFLA_ADDRESS:
>> +    case IFLA_BROADCAST:
>> +    case IFLA_IFNAME:
> 
> Comments here about the types we are converting (or not) would be nice.

OK

> 
>> +        break;
>> +    case IFLA_MTU:
>> +    case IFLA_LINK:
>> +    case IFLA_WEIGHT:
>> +    case IFLA_TXQLEN:
>> +    case IFLA_CARRIER_CHANGES:
>> +    case IFLA_NUM_RX_QUEUES:
>> +    case IFLA_NUM_TX_QUEUES:
>> +    case IFLA_PROMISCUITY:
>> +    case IFLA_EXT_MASK:
>> +    case IFLA_LINK_NETNSID:
>> +    case IFLA_GROUP:
>> +    case IFLA_MASTER:
>> +    case IFLA_NUM_VF:
>> +    case IFLA_STATS:
>> +        u32 = RTA_DATA(rtattr);
>> +        *u32 = tswap32(*u32);
>> +        break;
>> +    case IFLA_QDISC:
>> +    case IFLA_COST:
>> +    case IFLA_MAP:
>> +    case IFLA_OPERSTATE:
>> +    case IFLA_LINKMODE:
>> +    case IFLA_CARRIER:
>> +    case IFLA_STATS64:
>> +    case IFLA_AF_SPEC:
>> +    case IFLA_LINKINFO:
> 
> What order are these cases in (ie why is this set of "do nothing"
> different from the ones at the top of the switch) ?

I've tried to keep the order of then enum in
/usr/include/linux/if_link.h. It is easier to debug because the assigned
numbers are not explicit.

> 
>> +        break;
>> +    default:
> 
> Should we log something about an unsupported IFLA type here?

Yes, I use that to debug too. But it is "fprintf()".
Is "gemu_log()" a good choice?

> 
>> +        break;
> 
> 
> Looking at
> http://lxr.free-electrons.com/source/net/core/rtnetlink.c
> I'm not sure all of these are right. I think IFLA_MAP means we have a
> pointer to a struct rtnl_link_ifmap, for instance, which would need
> some of its fields swapping. IFLA_STATS is an rtnl_link_stats
> struct, and so on.

Yes, you're right. I'll check that.

>> +    }
>> +    return 0;
>> +}
>> +
>> +static abi_long host_to_target_data_addr_rtattr(struct rtattr *rtattr)
>> +{
>> +    uint32_t *u32;
>> +    struct ifa_cacheinfo *ci;
>> +
>> +    switch (rtattr->rta_type) {
>> +    case IFA_FLAGS:
>> +        u32 = RTA_DATA(rtattr);
>> +        *u32 = tswap32(*u32);
>> +        break;
>> +    case IFA_LOCAL:
>> +    case IFA_ADDRESS:
>> +    case IFA_BROADCAST:
>> +    case IFA_LABEL:
>> +        break;
>> +    case IFA_CACHEINFO:
>> +        ci = RTA_DATA(rtattr);
>> +        ci->ifa_prefered = tswap32(ci->ifa_prefered);
>> +        ci->ifa_valid = tswap32(ci->ifa_valid);
>> +        ci->cstamp = tswap32(ci->cstamp);
>> +        ci->tstamp = tswap32(ci->tstamp);
>> +        break;
>> +    default:
>> +        break;
> 
> Log about unsupported values?

OK

> 
>> +    }
>> +    return 0;
>> +}
>> +
>> +static abi_long host_to_target_data_route_rtattr(struct rtattr *rtattr)
>> +{
>> +    uint32_t *u32;
>> +    switch (rtattr->rta_type) {
>> +    case RTA_GATEWAY:
>> +    case RTA_DST:
>> +    case RTA_PREFSRC:
>> +        break;
>> +    case RTA_PRIORITY:
>> +    case RTA_TABLE:
>> +    case RTA_OIF:
>> +        u32 = RTA_DATA(rtattr);
>> +        *u32 = tswap32(*u32);
>> +        break;
> 
> This doesn't seem to be the complete list of RTA_ values; what
> decides which ones are listed here?

Only testing. There are too many values (and the structure of the value
can depend on the interface type). I've in fact some logs in the default
case, to log undecoded entries and then I boot a container
(ppc64/ppc/m68k/sh4/...) and I try to implement missing entries.

> 
>> +    default:
>> +        break;
>> +    }
>> +    return 0;
>> +}
>> +
>> +static abi_long host_to_target_link_rtattr(struct rtattr *rtattr,
>> +                                         uint32_t rtattr_len)
>> +{
>> +    return host_to_target_for_each_rtattr(rtattr, rtattr_len,
>> +                                          host_to_target_data_link_rtattr);
>> +}
>> +
>> +static abi_long host_to_target_addr_rtattr(struct rtattr *rtattr,
>> +                                         uint32_t rtattr_len)
>> +{
>> +    return host_to_target_for_each_rtattr(rtattr, rtattr_len,
>> +                                          host_to_target_data_addr_rtattr);
>> +}
>> +
>> +static abi_long host_to_target_route_rtattr(struct rtattr *rtattr,
>> +                                         uint32_t rtattr_len)
>> +{
>> +    return host_to_target_for_each_rtattr(rtattr, rtattr_len,
>> +                                          host_to_target_data_route_rtattr);
>> +}
>> +
>> +static abi_long host_to_target_data_route(struct nlmsghdr *nlh)
>> +{
>> +    uint32_t nlmsg_len;
>> +    struct ifinfomsg *ifi;
>> +    struct ifaddrmsg *ifa;
>> +    struct rtmsg *rtm;
>> +
>> +    nlmsg_len = nlh->nlmsg_len;
>> +    switch (nlh->nlmsg_type) {
>> +    case RTM_NEWLINK:
>> +    case RTM_DELLINK:
>> +    case RTM_GETLINK:
>> +        ifi = NLMSG_DATA(nlh);
>> +        ifi->ifi_type = tswap16(ifi->ifi_type);
>> +        ifi->ifi_index = tswap32(ifi->ifi_index);
>> +        ifi->ifi_flags = tswap32(ifi->ifi_flags);
>> +        ifi->ifi_change = tswap32(ifi->ifi_change);
>> +        host_to_target_link_rtattr(IFLA_RTA(ifi),
>> +                                   nlmsg_len - NLMSG_LENGTH(sizeof(*ifi)));
>> +        break;
>> +    case RTM_NEWADDR:
>> +    case RTM_DELADDR:
>> +    case RTM_GETADDR:
>> +        ifa = NLMSG_DATA(nlh);
>> +        ifa->ifa_index = tswap32(ifa->ifa_index);
>> +        host_to_target_addr_rtattr(IFA_RTA(ifa),
>> +                                   nlmsg_len - NLMSG_LENGTH(sizeof(*ifa)));
>> +        break;
>> +    case RTM_NEWROUTE:
>> +    case RTM_DELROUTE:
>> +    case RTM_GETROUTE:
>> +        rtm = NLMSG_DATA(nlh);
>> +        rtm->rtm_flags = tswap32(rtm->rtm_flags);
>> +        host_to_target_route_rtattr(RTM_RTA(rtm),
>> +                                    nlmsg_len - NLMSG_LENGTH(sizeof(*rtm)));
>> +        break;
>> +    default:
>> +        return -TARGET_EINVAL;
>> +    }
>> +    return 0;
>> +}
>> +
>> +static inline abi_long host_to_target_nlmsg_route(struct nlmsghdr *nlh,
>> +                                                  size_t len)
>> +{
>> +    return host_to_target_for_each_nlmsg(nlh, len, 
>> host_to_target_data_route);
>> +}
>> +
>> +static abi_long target_to_host_for_each_rtattr(struct rtattr *rtattr,
>> +                                               size_t len,
>> +                                               abi_long 
>> (*target_to_host_rtattr)
>> +                                                        (struct rtattr *))
>> +{
>> +    abi_long ret;
>> +
>> +    while (len >= (int)sizeof(struct rtattr)) {
>> +        rtattr->rta_len = tswap16(rtattr->rta_len);
>> +        rtattr->rta_type = tswap16(rtattr->rta_type);
>> +        if (rtattr->rta_len < sizeof(struct rtattr) ||
>> +            rtattr->rta_len > len) {
>> +            break;
>> +        }
> 
> Length check after swap of len/type again.

I don't understand: why "again"?

>> +        ret = target_to_host_rtattr(rtattr);
>> +        if (ret < 0) {
>> +            return ret;
>> +        }
>> +        len -= RTA_ALIGN(rtattr->rta_len);
>> +        rtattr = (struct rtattr *)(((char *)rtattr) +
>> +                 RTA_ALIGN(rtattr->rta_len));
>> +    }
>> +    return 0;
>> +}
>> +
>> +static abi_long target_to_host_data_link_rtattr(struct rtattr *rtattr)
>> +{
>> +    switch (rtattr->rta_type) {
>> +    default:
>> +        break;
>> +    }
>> +    return 0;
>> +}
>> +
>> +static abi_long target_to_host_data_addr_rtattr(struct rtattr *rtattr)
>> +{
>> +    switch (rtattr->rta_type) {
>> +    case IFA_LOCAL:
>> +    case IFA_ADDRESS:
>> +        break;
>> +    default:
>> +        break;
>> +    }
>> +    return 0;
>> +}
>> +
>> +static abi_long target_to_host_data_route_rtattr(struct rtattr *rtattr)
>> +{
>> +    uint32_t *u32;
>> +    switch (rtattr->rta_type) {
>> +    case RTA_DST:
>> +    case RTA_SRC:
>> +    case RTA_GATEWAY:
>> +        /* nothing to do */
>> +        break;
>> +    case RTA_OIF:
>> +        u32 = RTA_DATA(rtattr);
>> +        *u32 = tswap32(*u32);
>> +        break;
>> +    default:
>> +        break;
>> +    }
>> +    return 0;
>> +}
>> +
>> +static void target_to_host_link_rtattr(struct rtattr *rtattr,
>> +                                       uint32_t rtattr_len)
>> +{
>> +    target_to_host_for_each_rtattr(rtattr, rtattr_len,
>> +                                   target_to_host_data_link_rtattr);
>> +}
>> +
>> +static void target_to_host_addr_rtattr(struct rtattr *rtattr,
>> +                                     uint32_t rtattr_len)
>> +{
>> +    target_to_host_for_each_rtattr(rtattr, rtattr_len,
>> +                                   target_to_host_data_addr_rtattr);
>> +}
>> +
>> +static void target_to_host_route_rtattr(struct rtattr *rtattr,
>> +                                     uint32_t rtattr_len)
>> +{
>> +    target_to_host_for_each_rtattr(rtattr, rtattr_len,
>> +                                   target_to_host_data_route_rtattr);
>> +}
>> +
>> +static abi_long target_to_host_data_route(struct nlmsghdr *nlh)
>> +{
>> +    struct ifinfomsg *ifi;
>> +    struct ifaddrmsg *ifa;
>> +    struct rtmsg *rtm;
>> +
>> +    switch (nlh->nlmsg_type) {
>> +    case RTM_GETLINK:
>> +        break;
>> +    case RTM_NEWLINK:
>> +    case RTM_DELLINK:
>> +        ifi = NLMSG_DATA(nlh);
>> +        ifi->ifi_type = tswap16(ifi->ifi_type);
>> +        ifi->ifi_index = tswap32(ifi->ifi_index);
>> +        ifi->ifi_flags = tswap32(ifi->ifi_flags);
>> +        ifi->ifi_change = tswap32(ifi->ifi_change);
>> +        target_to_host_link_rtattr(IFLA_RTA(ifi), nlh->nlmsg_len -
>> +                                   NLMSG_LENGTH(sizeof(*ifi)));
>> +        break;
>> +    case RTM_GETADDR:
>> +    case RTM_NEWADDR:
>> +    case RTM_DELADDR:
>> +        ifa = NLMSG_DATA(nlh);
>> +        ifa->ifa_index = tswap32(ifa->ifa_index);
>> +        target_to_host_addr_rtattr(IFA_RTA(ifa), nlh->nlmsg_len -
>> +                                   NLMSG_LENGTH(sizeof(*ifa)));
>> +        break;
>> +    case RTM_GETROUTE:
>> +        break;
>> +    case RTM_NEWROUTE:
>> +    case RTM_DELROUTE:
>> +        rtm = NLMSG_DATA(nlh);
>> +        rtm->rtm_flags = tswap32(rtm->rtm_flags);
>> +        target_to_host_route_rtattr(RTM_RTA(rtm), nlh->nlmsg_len -
>> +                                    NLMSG_LENGTH(sizeof(*rtm)));
>> +        break;
>> +    default:
>> +        return -TARGET_EOPNOTSUPP;
>> +    }
>> +    return 0;
>> +}
>> +
>> +static abi_long target_to_host_nlmsg_route(struct nlmsghdr *nlh, size_t len)
>> +{
>> +    return target_to_host_for_each_nlmsg(nlh, len, 
>> target_to_host_data_route);
>> +}
>> +
>>  /* do_setsockopt() Must return target values and target errnos. */
>>  static abi_long do_setsockopt(int sockfd, int level, int optname,
>>                                abi_ulong optval_addr, socklen_t optlen)
>> @@ -2103,6 +2528,21 @@ static TargetFdTrans target_packet_trans = {
>>      .target_to_host_addr = packet_target_to_host_sockaddr,
>>  };
>>
>> +static abi_long netlink_route_target_to_host(void *buf, size_t len)
>> +{
>> +    return target_to_host_nlmsg_route(buf, len);
>> +}
>> +
>> +static abi_long netlink_route_host_to_target(void *buf, size_t len)
>> +{
>> +    return host_to_target_nlmsg_route(buf, len);
>> +}
>> +
>> +static TargetFdTrans target_netlink_route_trans = {
>> +    .target_to_host_data = netlink_route_target_to_host,
>> +    .host_to_target_data = netlink_route_host_to_target,
>> +};
>> +
>>  /* do_socket() Must return target values and target errnos. */
>>  static abi_long do_socket(int domain, int type, int protocol)
>>  {
>> @@ -2114,9 +2554,6 @@ static abi_long do_socket(int domain, int type, int 
>> protocol)
>>          return ret;
>>      }
>>
>> -    if (domain == PF_NETLINK)
>> -        return -TARGET_EAFNOSUPPORT;
>> -
>>      if (domain == AF_PACKET ||
>>          (domain == AF_INET && type == SOCK_PACKET)) {
>>          protocol = tswap16(protocol);
>> @@ -2130,6 +2567,16 @@ static abi_long do_socket(int domain, int type, int 
>> protocol)
>>               * if socket type is SOCK_PACKET, bind by name
>>               */
>>              fd_trans_register(ret, &target_packet_trans);
>> +        } else if (domain == PF_NETLINK) {
>> +            switch (protocol) {
>> +            case NETLINK_ROUTE:
>> +                fd_trans_register(ret, &target_netlink_route_trans);
>> +                break;
>> +            default:
>> +                close(ret);
>> +                ret = -EPFNOSUPPORT;
> 
> Can we handle the "PF_NETLINK but unsupported protocol" check
> before we try to open the host socket, rather than having to
> back it out here?

Yes

>> +                break;
>> +            }
>>          }
>>      }
>>      return ret;
>> @@ -2214,14 +2661,25 @@ static abi_long do_sendrecvmsg_locked(int fd, struct 
>> target_msghdr *msgp,
>>      msg.msg_iov = vec;
>>
>>      if (send) {
>> -        ret = target_to_host_cmsg(&msg, msgp);
>> -        if (ret == 0)
>> +        if (fd_trans_target_to_host_data(fd)) {
>> +            ret = fd_trans_target_to_host_data(fd)(msg.msg_iov->iov_base,
>> +                                                   msg.msg_iov->iov_len);
>> +        } else {
>> +            ret = target_to_host_cmsg(&msg, msgp);
>> +        }
>> +        if (ret == 0) {
>>              ret = get_errno(sendmsg(fd, &msg, flags));
>> +        }
>>      } else {
>>          ret = get_errno(recvmsg(fd, &msg, flags));
>>          if (!is_error(ret)) {
>>              len = ret;
>> -            ret = host_to_target_cmsg(msgp, &msg);
>> +            if (fd_trans_host_to_target_data(fd)) {
>> +                ret = 
>> fd_trans_host_to_target_data(fd)(msg.msg_iov->iov_base,
>> +                                                       
>> msg.msg_iov->iov_len);
>> +            } else {
>> +                ret = host_to_target_cmsg(msgp, &msg);
>> +            }
>>              if (!is_error(ret)) {
>>                  msgp->msg_namelen = tswap32(msg.msg_namelen);
>>                  if (msg.msg_name != NULL) {
>> @@ -2448,6 +2906,13 @@ static abi_long do_sendto(int fd, abi_ulong msg, 
>> size_t len, int flags,
>>      host_msg = lock_user(VERIFY_READ, msg, len, 1);
>>      if (!host_msg)
>>          return -TARGET_EFAULT;
>> +    if (fd_trans_target_to_host_data(fd)) {
>> +        ret = fd_trans_target_to_host_data(fd)(host_msg, len);
>> +        if (ret < 0) {
>> +            unlock_user(host_msg, msg, 0);
>> +            return ret;
>> +        }
>> +    }
>>      if (target_addr) {
>>          addr = alloca(addrlen+1);
>>          ret = target_to_host_sockaddr(fd, addr, target_addr, addrlen);
>> --
>> 2.5.0
> 
> This felt like a pretty long patch to wade through on review; if
> there's an easy way to split it up that would be nice for v2,
> but I don't insist on it if there doesn't seem to be a nice
> splitting point.

I like to have small patches, but this one is hard to split.
I will try, but I'm not sure I can.

> 
> thanks
> -- PMM
> 

Thanks for the review,
Laurent
[Prev in Thread]
Current Thread
[Next in Thread]
Re: [Qemu-devel] [PATCH RFC 1/3] linux-user: add rtnetlink(7) support, Peter Maydell, 2016/05/13
- Re: [Qemu-devel] [PATCH RFC 1/3] linux-user: add rtnetlink(7) support, Laurent Vivier <=
  - Re: [Qemu-devel] [PATCH RFC 1/3] linux-user: add rtnetlink(7) support, Peter Maydell, 2016/05/14
Prev by Date: Re: [Qemu-devel] [PATCH] Fix QEMU PPC e500v1 efscmp* instructions
Next by Date: Re: [Qemu-devel] [V10 1/4] hw/i386: Introduce AMD IOMMU
Previous by thread: Re: [Qemu-devel] [PATCH RFC 1/3] linux-user: add rtnetlink(7) support
Next by thread: Re: [Qemu-devel] [PATCH RFC 1/3] linux-user: add rtnetlink(7) support
Index(es):
- Date
- Thread