qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH V9 3/8] Introduce HostPCIDevice to access a pci


From: Anthony PERARD
Subject: Re: [Qemu-devel] [PATCH V9 3/8] Introduce HostPCIDevice to access a pci device on the host.
Date: Fri, 23 Mar 2012 13:56:27 +0000

On Wed, Mar 21, 2012 at 20:30, Michael S. Tsirkin <address@hidden> wrote:
> On Wed, Mar 21, 2012 at 06:29:00PM +0000, Anthony PERARD wrote:
>> Signed-off-by: Anthony PERARD <address@hidden>
>> Acked-by: Stefano Stabellini <address@hidden>
>
> So this interface is really LinuxSysfsPCIDevice.
> For example the assumption that you can just open
> device by pci address is broken with vfio.
> Domain number is also not something anyone
> besides linux knows about.
>
> If I were you I would just call it xen- ....
> and if it comes in handy it can be later renamed.

Ok, I will rename that XenHostPCIDevice.

>> ---
>>  Makefile.target      |    3 +
>>  hw/host-pci-device.c |  278 
>> ++++++++++++++++++++++++++++++++++++++++++++++++++
>>  hw/host-pci-device.h |   75 ++++++++++++++
>>  3 files changed, 356 insertions(+), 0 deletions(-)
>>  create mode 100644 hw/host-pci-device.c
>>  create mode 100644 hw/host-pci-device.h
>>
>> diff --git a/Makefile.target b/Makefile.target
>> index 63cf769..0ccfd5b 100644
>> --- a/Makefile.target
>> +++ b/Makefile.target
>> @@ -232,6 +232,9 @@ obj-$(CONFIG_NO_XEN) += xen-stub.o
>>
>>  obj-i386-$(CONFIG_XEN) += xen_platform.o
>>
>> +# Xen PCI Passthrough
>> +obj-i386-$(CONFIG_XEN_PCI_PASSTHROUGH) += host-pci-device.o
>> +
>>  # Inter-VM PCI shared memory
>>  CONFIG_IVSHMEM =
>>  ifeq ($(CONFIG_KVM), y)
>> diff --git a/hw/host-pci-device.c b/hw/host-pci-device.c
>> new file mode 100644
>> index 0000000..3dacb30
>> --- /dev/null
>> +++ b/hw/host-pci-device.c
>> @@ -0,0 +1,278 @@
>> +/*
>> + * Copyright (C) 2011       Citrix Ltd.
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2.  See
>> + * the COPYING file in the top-level directory.
>> + *
>> + */
>> +
>> +#include "qemu-common.h"
>> +#include "host-pci-device.h"
>> +
>> +#define PCI_MAX_EXT_CAP \
>> +    ((PCIE_CONFIG_SPACE_SIZE - PCI_CONFIG_SPACE_SIZE) / (PCI_CAP_SIZEOF + 
>> 4))
>
> namespace pollution.
> name all things HOST_PCI_....
>
> in this case, open-coding will make things clearer.
>
>
>> +
>> +enum error_code {
>
> seems unused. So why name the type?
>
>> +    ERROR_SYNTAX = 1,
>
> We return -1 on error, just do that and you won't need ERROR_SYNTAX.

Ok, I'll remove this.

>> +};
>> +
>> +static int path_to(const HostPCIDevice *d,
>> +                   const char *name, char *buf, ssize_t size)
>> +{
>> +    return snprintf(buf, size, "/sys/bus/pci/devices/%04x:%02x:%02x.%x/%s",
>> +                    d->domain, d->bus, d->dev, d->func, name);
>> +}
>
> users ignore return value. Also, want to check no overflow
> and assert?

I will check the return value in this function an then return 0 or -1.

>> +
>> +static int get_resource(HostPCIDevice *d)
>> +{
>> +    int i, rc = 0;
>> +    FILE *f;
>> +    char path[PATH_MAX];
>> +    unsigned long long start, end, flags, size;
>> +
>> +    path_to(d, "resource", path, sizeof (path));
>
> I think this might not fit, snprintf needs an extra byte for \0.

I just check snprintf write size byte including the \0, sw we should
just give the size of the buffer.

>> +    f = fopen(path, "r");
>> +    if (!f) {
>> +        fprintf(stderr, "Error: Can't open %s: %s\n", path, 
>> strerror(errno));
>> +        return -errno;
>> +    }
>> +
>> +    for (i = 0; i < PCI_NUM_REGIONS; i++) {
>> +        if (fscanf(f, "%llx %llx %llx", &start, &end, &flags) != 3) {
>
> People mentioned that scanf is not a good way to parse input.
> Applies here.

Ok, I'll do a manual parsing :(.

>> +            fprintf(stderr, "Error: Syntax error in %s\n", path);
>> +            rc = ERROR_SYNTAX;
>> +            break;
>> +        }
>> +        if (start) {
>> +            size = end - start + 1;
>> +        } else {
>> +            size = 0;
>> +        }
>> +
>> +        if (i < PCI_ROM_SLOT) {
>> +            d->io_regions[i].base_addr = start;
>> +            d->io_regions[i].size = size;
>> +            d->io_regions[i].flags = flags;
>> +        } else {
>> +            d->rom.base_addr = start;
>> +            d->rom.size = size;
>> +            d->rom.flags = flags;
>> +        }
>> +    }
>> +
>> +    fclose(f);
>> +    return rc;
>> +}
>> +
>> +static int get_hex_value(HostPCIDevice *d, const char *name,
>> +                         unsigned long *pvalue)
>
> why long?

Do be a bit generic I suppose, but I just use this function for
vendor_id and device_id, I probably just need an int.

>> +{
>> +    char path[PATH_MAX];
>> +    FILE *f;
>> +    unsigned long value;
>> +
>> +    path_to(d, name, path, sizeof (path));
>> +    f = fopen(path, "r");
>> +    if (!f) {
>> +        fprintf(stderr, "Error: Can't open %s: %s\n", path, 
>> strerror(errno));
>> +        return -errno;
>> +    }
>> +    if (fscanf(f, "%lx\n", &value) != 1) {
>> +        fprintf(stderr, "Error: Syntax error in %s\n", path);
>> +        fclose(f);
>> +        return ERROR_SYNTAX;
>> +    }
>> +    fclose(f);
>> +    *pvalue = value;
>> +    return 0;
>> +}
>> +
>> +static bool pci_dev_is_virtfn(HostPCIDevice *d)
>> +{
>> +    char path[PATH_MAX];
>> +    struct stat buf;
>> +
>> +    path_to(d, "physfn", path, sizeof (path));
>> +    return !stat(path, &buf);
>> +}
>> +
>
> Don't start names with pci_.
> It would also be better to avoid things like path_to IMO.

Do you mean avoiding the name or the purpose of the function path_to ?
For the name, I can probably rename it to sysfs_device_path()

>> +static int host_pci_config_fd(HostPCIDevice *d)
>
> So this opens if needed, and returns.
> Why not explicitly open on get?
> then you won't need these hacks.

Ok, I'll change that.

>> +{
>> +    char path[PATH_MAX];
>> +
>> +    if (d->config_fd < 0) {
>> +        path_to(d, "config", path, sizeof (path));
>
> sizeof path
>
>> +        d->config_fd = open(path, O_RDWR);
>> +        if (d->config_fd < 0) {
>> +            fprintf(stderr, "HostPCIDevice: Can not open '%s': %s\n",
>> +                    path, strerror(errno));
>
> strerror is not thread safe
>
>> +        }
>> +    }
>> +    return d->config_fd;
>> +}
>> +static int host_pci_config_read(HostPCIDevice *d, int pos, void *buf, int 
>> len)
>> +{
>> +    int fd = host_pci_config_fd(d);
>
> You open file on each access?
>
>> +    int res = 0;
>
> why initialize here?
>
>> +
>> +again:
>> +    res = pread(fd, buf, len, pos);
>> +    if (res != len) {
>> +        if (res < 0 && (errno == EINTR || errno == EAGAIN)) {
>> +            goto again;
>
> code loops with while or for.

ok.

>> +        }
>> +        fprintf(stderr, "%s: read failed: %s (fd: %i)\n",
>> +                __func__, strerror(errno), fd);
>> +        return -errno;
>> +    }
>> +    return 0;
>> +}
>> +static int host_pci_config_write(HostPCIDevice *d,
>> +                                 int pos, const void *buf, int len)
>> +{
>> +    int fd = host_pci_config_fd(d);
>> +    int res = 0;
>> +
>> +again:
>> +    res = pwrite(fd, buf, len, pos);
>> +    if (res != len) {
>> +        if (res < 0 && (errno == EINTR || errno == EAGAIN)) {
>> +            goto again;
>> +        }
>> +        fprintf(stderr, "%s: write failed: %s\n",
>> +                __func__, strerror(errno));
>> +        return -errno;
>> +    }
>> +    return 0;
>> +}
>> +
>
> same comments as above. also,
> Don't report errors with fprintf.
>
>> +int host_pci_get_byte(HostPCIDevice *d, int pos, uint8_t *p)
>> +{
>> +    uint8_t buf;
>> +    int rc = host_pci_config_read(d, pos, &buf, 1);
>> +    if (rc == 0) {
>
> !rc.
>
>> +        *p = buf;
>
> why not pass in p directly?
>
>> +    }
>> +    return rc;
>> +}
>> +int host_pci_get_word(HostPCIDevice *d, int pos, uint16_t *p)
>> +{
>> +    uint16_t buf;
>> +    int rc = host_pci_config_read(d, pos, &buf, 2);
>> +    if (rc == 0) {
>
> !rc.
>
>> +        *p = le16_to_cpu(buf);
>> +    }
>> +    return rc;
>> +}
>
> This looks wrong wrt endian-ness.

It's seams that PCI config space registers are little-endian, so,
get/read a word/dword from the pci config space should be converted
from little-endian to the cpu endian-ness.

>> +int host_pci_get_long(HostPCIDevice *d, int pos, uint32_t *p)
>> +{
>> +    uint32_t buf;
>> +    int rc = host_pci_config_read(d, pos, &buf, 4);
>> +    if (rc == 0) {
>> +        *p = le32_to_cpu(buf);
>> +    }
>> +    return rc;
>> +}
>
> Add empty lines between {}

It's look nicer when I fold the function to only see one line :), but,
I add this empty lines.

>> +int host_pci_get_block(HostPCIDevice *d, int pos, uint8_t *buf, int len)
>> +{
>> +    return host_pci_config_read(d, pos, buf, len);
>> +}
>
> when would this be useful?

It's used to initialize the "emulated" config space (of pci.h) and
every time a pci config read or write is issued by the guest.

>> +
>> +int host_pci_set_byte(HostPCIDevice *d, int pos, uint8_t data)
>> +{
>> +    return host_pci_config_write(d, pos, &data, 1);
>> +}
>> +int host_pci_set_word(HostPCIDevice *d, int pos, uint16_t data)
>> +{
>> +    data = cpu_to_le16(data);
>> +    return host_pci_config_write(d, pos, &data, 2);
>> +}
>> +int host_pci_set_long(HostPCIDevice *d, int pos, uint32_t data)
>> +{
>> +    data = cpu_to_le32(data);
>> +    return host_pci_config_write(d, pos, &data, 4);
>> +}
>> +int host_pci_set_block(HostPCIDevice *d, int pos, uint8_t *buf, int len)
>> +{
>> +    return host_pci_config_write(d, pos, buf, len);
>> +}
>> +
>> +uint32_t host_pci_find_ext_cap_offset(HostPCIDevice *d, uint32_t cap)
>
> Why 32? Ext config offsets are < 12 bit.

No apparent reason, the user of this function was just expecting a uint32.

>> +{
>> +    uint32_t header = 0;
>> +    int max_cap = PCI_MAX_EXT_CAP;
>> +    int pos = PCI_CONFIG_SPACE_SIZE;
>> +
>> +    do {
>> +        if (host_pci_get_long(d, pos, &header)) {
>> +            break;
>> +        }
>> +        /*
>> +         * If we have no capabilities, this is indicated by cap ID,
>> +         * cap version and next pointer all being 0.
>> +         */
>> +        if (header == 0) {
>> +            break;
>> +        }
>> +
>> +        if (PCI_EXT_CAP_ID(header) == cap) {
>> +            return pos;
>> +        }
>> +
>> +        pos = PCI_EXT_CAP_NEXT(header);
>> +        if (pos < PCI_CONFIG_SPACE_SIZE) {
>> +            break;
>> +        }
>> +
>> +        max_cap--;
>> +    } while (max_cap > 0);
>> +
>> +    return 0;
>> +}
>> +
>> +HostPCIDevice *host_pci_device_get(uint8_t bus, uint8_t dev, uint8_t func)
>
> Why skip domain in the interface?
> Also, HostPCIDevice structure is public so there is little value
> in allocating, just get it by pointer and init/cleanup.

You mean like pci_bus_new_inplace ? Ok, I'll do that.

>> +{
>> +    HostPCIDevice *d = NULL;
>> +    unsigned long v = 0;
>> +
>> +    d = g_new0(HostPCIDevice, 1);
>> +
>> +    d->config_fd = -1;
>> +    d->domain = 0;
>> +    d->bus = bus;
>> +    d->dev = dev;
>> +    d->func = func;
>> +
>> +    if (host_pci_config_fd(d) == -1) {
>> +        goto error;
>> +    }
>> +    if (get_resource(d) != 0) {
>
> just get_resource(d).
>
>> +        goto error;
>> +    }
>> +
>> +    if (get_hex_value(d, "vendor", &v)) {
>> +        goto error;
>> +    }
>> +    d->vendor_id = v;
>> +    if (get_hex_value(d, "device", &v)) {
>> +        goto error;
>> +    }
>> +    d->device_id = v;
>> +    d->is_virtfn = pci_dev_is_virtfn(d);
>> +
>> +    return d;
>> +error:
>> +    if (d->config_fd >= 0) {
>> +        close(d->config_fd);
>> +    }
>> +    g_free(d);
>> +    return NULL;
>> +}
>> +
>> +void host_pci_device_put(HostPCIDevice *d)
>> +{
>> +    if (d->config_fd >= 0) {
>> +        close(d->config_fd);
>> +    }
>> +    g_free(d);
>> +}
>> diff --git a/hw/host-pci-device.h b/hw/host-pci-device.h
>> new file mode 100644
>> index 0000000..c8880eb
>> --- /dev/null
>> +++ b/hw/host-pci-device.h
>> @@ -0,0 +1,75 @@
>> +#ifndef HW_HOST_PCI_DEVICE
>> +#  define HW_HOST_PCI_DEVICE
>
> Don't put space after #.
>
> Also HOST_PCI_DEVICE_H would be less likely to confuse.
>
>> +
>> +#include "pci.h"
>> +
>> +/*
>> + * from linux/ioport.h
>> + * IO resources have these defined flags.
>> + */
>> +#define IORESOURCE_BITS         0x000000ff      /* Bus-specific bits */
>> +
>> +#define IORESOURCE_TYPE_BITS    0x00000f00      /* Resource type */
>> +#define IORESOURCE_IO           0x00000100
>> +#define IORESOURCE_MEM          0x00000200
>> +#define IORESOURCE_IRQ          0x00000400
>> +#define IORESOURCE_DMA          0x00000800
>> +
>> +#define IORESOURCE_PREFETCH     0x00001000      /* No side effects */
>> +#define IORESOURCE_READONLY     0x00002000
>> +#define IORESOURCE_CACHEABLE    0x00004000
>> +#define IORESOURCE_RANGELENGTH  0x00008000
>> +#define IORESOURCE_SHADOWABLE   0x00010000
>> +
>> +#define IORESOURCE_SIZEALIGN    0x00020000      /* size indicates alignment 
>> */
>> +#define IORESOURCE_STARTALIGN   0x00040000      /* start field is alignment 
>> */
>> +
>> +#define IORESOURCE_MEM_64       0x00100000
>> +
>> +    /* Userland may not map this resource */
>> +#define IORESOURCE_EXCLUSIVE    0x08000000
>> +#define IORESOURCE_DISABLED     0x10000000
>> +#define IORESOURCE_UNSET        0x20000000
>> +#define IORESOURCE_AUTO         0x40000000
>> +    /* Driver has marked this resource busy */
>> +#define IORESOURCE_BUSY         0x80000000
>> +
>
> Why do above make sense in an API?
> Abstract it in some reasonable way, don't just expose
> flags from sysfs as is.

Ok.

>> +
>
> kill extra empty lines
>
>> +typedef struct HostPCIIORegion {
>> +    unsigned long flags;
>> +    pcibus_t base_addr;
>> +    pcibus_t size;
>> +} HostPCIIORegion;
>> +
>> +typedef struct HostPCIDevice {
>> +    uint16_t domain;
>> +    uint8_t bus;
>> +    uint8_t dev;
>> +    uint8_t func;
>> +
>> +    uint16_t vendor_id;
>> +    uint16_t device_id;
>> +
>> +    HostPCIIORegion io_regions[PCI_NUM_REGIONS - 1];
>> +    HostPCIIORegion rom;
>> +
>> +    bool is_virtfn;
>> +
>> +    int config_fd;
>> +} HostPCIDevice;
>> +
>> +HostPCIDevice *host_pci_device_get(uint8_t bus, uint8_t dev, uint8_t func);
>> +void host_pci_device_put(HostPCIDevice *pci_dev);
>> +
>> +int host_pci_get_byte(HostPCIDevice *d, int pos, uint8_t *p);
>> +int host_pci_get_word(HostPCIDevice *d, int pos, uint16_t *p);
>> +int host_pci_get_long(HostPCIDevice *d, int pos, uint32_t *p);
>> +int host_pci_get_block(HostPCIDevice *d, int pos, uint8_t *buf, int len);
>> +int host_pci_set_byte(HostPCIDevice *d, int pos, uint8_t data);
>> +int host_pci_set_word(HostPCIDevice *d, int pos, uint16_t data);
>> +int host_pci_set_long(HostPCIDevice *d, int pos, uint32_t data);
>> +int host_pci_set_block(HostPCIDevice *d, int pos, uint8_t *buf, int len);
>> +
>> +uint32_t host_pci_find_ext_cap_offset(HostPCIDevice *s, uint32_t cap);
>> +
>> +#endif /* !HW_HOST_PCI_DEVICE */
>> --
>> Anthony PERARD
>



-- 
Anthony PERARD



reply via email to

[Prev in Thread] Current Thread [Next in Thread]