qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [V12 3/4] hw/i386: Introduce AMD IOMMU


From: David Kiarie
Subject: Re: [Qemu-devel] [V12 3/4] hw/i386: Introduce AMD IOMMU
Date: Mon, 4 Jul 2016 08:06:00 +0300

On Wed, Jun 22, 2016 at 11:24 PM, Jan Kiszka <address@hidden> wrote:
> On 2016-06-15 14:21, David Kiarie wrote:
>> +
>> +/* System Software might never read from some of this fields but anyways */
>
> No read-modify-write accesses observed in the field? And fields like
> AMDVI_MMIO_STATUS or AMDVI_MMIO_EXT_FEATURES sound a lot like they are
> rather about reading than writing. Misleading comment?

Yeah, misleading comment. AMDVI_MMIO_EXT_FEATURES is read only while
some AMDVI_MMIO_STATUS is r/w1c and yes, I'm enforcing that in the
code.

>
>> +static uint64_t amdvi_mmio_read(void *opaque, hwaddr addr, unsigned size)
>> +{
>> +    AMDVIState *s = opaque;
>> +
>> +    uint64_t val = -1;
>> +    if (addr + size > AMDVI_MMIO_SIZE) {
>> +        trace_amdvi_mmio_read("error: addr outside region: max ",
>> +                (uint64_t)AMDVI_MMIO_SIZE, addr, size);
>> +        return (uint64_t)-1;
>> +    }
>> +
>> +    if (size == 2) {
>> +        val = amdvi_readw(s, addr);
>> +    } else if (size == 4) {
>> +        val = amdvi_readl(s, addr);
>> +    } else if (size == 8) {
>> +        val = amdvi_readq(s, addr);
>> +    }
>> +
>> +    switch (addr & ~0x07) {
>> +    case AMDVI_MMIO_DEVICE_TABLE:
>> +        trace_amdvi_mmio_read("MMIO_DEVICE_TABLE", addr, size, addr & 
>> ~0x07);
>> +        break;
>> +
>> +    case AMDVI_MMIO_COMMAND_BASE:
>> +        trace_amdvi_mmio_read("MMIO_COMMAND_BASE", addr, size, addr & 
>> ~0x07);
>> +        break;
>> +
>> +    case AMDVI_MMIO_EVENT_BASE:
>> +        trace_amdvi_mmio_read("MMIO_EVENT_BASE", addr, size, addr & ~0x07);
>> +        break;
>> +
>> +    case AMDVI_MMIO_CONTROL:
>> +        trace_amdvi_mmio_read("MMIO_MMIO_CONTROL", addr, size, addr & 
>> ~0x07);
>> +        break;
>> +
>> +    case AMDVI_MMIO_EXCL_BASE:
>> +        trace_amdvi_mmio_read("MMIO_EXCL_BASE", addr, size, addr & ~0x07);
>> +        break;
>> +
>> +    case AMDVI_MMIO_EXCL_LIMIT:
>> +        trace_amdvi_mmio_read("MMIO_EXCL_LIMIT", addr, size, addr & ~0x07);
>> +        break;
>> +
>> +    case AMDVI_MMIO_COMMAND_HEAD:
>> +        trace_amdvi_mmio_read("MMIO_COMMAND_HEAD", addr, size, addr & 
>> ~0x07);
>> +        break;
>> +
>> +    case AMDVI_MMIO_COMMAND_TAIL:
>> +        trace_amdvi_mmio_read("MMIO_COMMAND_TAIL", addr, size, addr & 
>> ~0x07);
>> +        break;
>> +
>> +    case AMDVI_MMIO_EVENT_HEAD:
>> +        trace_amdvi_mmio_read("MMIO_EVENT_HEAD", addr, size, addr & ~0x07);
>> +        break;
>> +
>> +    case AMDVI_MMIO_EVENT_TAIL:
>> +        trace_amdvi_mmio_read("MMIO_EVENT_TAIL", addr, size, addr & ~0x07);
>> +        break;
>> +
>> +    case AMDVI_MMIO_STATUS:
>> +        trace_amdvi_mmio_read("MMIO_STATUS", addr, size, addr & ~0x07);
>> +        break;
>> +
>> +    case AMDVI_MMIO_EXT_FEATURES:
>> +        trace_amdvi_mmio_read("MMIO_EXT_FEATURES", addr, size, addr & 
>> ~0x07);
>> +        break;
>
> What about a lookup table for that name?

I can't find an obvious way to index a table given the register address.

>
>> +
>> +    default:
>> +        trace_amdvi_mmio_read("UNHANDLED READ", addr, size, addr & ~0x07);
>> +    }
>> +    return val;
>> +}
>> +
>> +static void amdvi_handle_control_write(AMDVIState *s)
>> +{
>> +    /*
>> +     * read whatever is already written in case
>> +     * software is writing in chucks less than 8 bytes
>> +     */
>> +    unsigned long control = amdvi_readq(s, AMDVI_MMIO_CONTROL);
>> +    s->enabled = !!(control & AMDVI_MMIO_CONTROL_AMDVIEN);
>> +
>> +    s->ats_enabled = !!(control & AMDVI_MMIO_CONTROL_HTTUNEN);
>> +    s->evtlog_enabled = s->enabled && !!(control &
>> +                        AMDVI_MMIO_CONTROL_EVENTLOGEN);
>> +
>> +    s->evtlog_intr = !!(control & AMDVI_MMIO_CONTROL_EVENTINTEN);
>> +    s->completion_wait_intr = !!(control & AMDVI_MMIO_CONTROL_COMWAITINTEN);
>> +    s->cmdbuf_enabled = s->enabled && !!(control &
>> +                        AMDVI_MMIO_CONTROL_CMDBUFLEN);
>> +
>> +    /* update the flags depending on the control register */
>> +    if (s->cmdbuf_enabled) {
>> +        amdvi_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_CMDBUF_RUN);
>> +    } else {
>> +        amdvi_and_assignq(s, AMDVI_MMIO_STATUS, 
>> ~AMDVI_MMIO_STATUS_CMDBUF_RUN);
>> +    }
>> +    if (s->evtlog_enabled) {
>> +        amdvi_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_RUN);
>> +    } else {
>> +        amdvi_and_assignq(s, AMDVI_MMIO_STATUS, ~AMDVI_MMIO_STATUS_EVT_RUN);
>> +    }
>> +
>> +    trace_amdvi_control_status(control);
>> +
>> +    amdvi_cmdbuf_run(s);
>> +}
>> +
>> +static inline void amdvi_handle_devtab_write(AMDVIState *s)
>> +
>> +{
>> +    uint64_t val = amdvi_readq(s, AMDVI_MMIO_DEVICE_TABLE);
>> +    s->devtab = (val & AMDVI_MMIO_DEVTAB_BASE_MASK);
>> +
>> +    /* set device table length */
>> +    s->devtab_len = ((val & AMDVI_MMIO_DEVTAB_SIZE_MASK) + 1 *
>> +                    (AMDVI_MMIO_DEVTAB_SIZE_UNIT /
>> +                     AMDVI_MMIO_DEVTAB_ENTRY_SIZE));
>> +}
>> +
>> +static inline void amdvi_handle_cmdhead_write(AMDVIState *s)
>> +{
>> +    s->cmdbuf_head = amdvi_readq(s, AMDVI_MMIO_COMMAND_HEAD)
>> +                     & AMDVI_MMIO_CMDBUF_HEAD_MASK;
>> +    amdvi_cmdbuf_run(s);
>> +}
>> +
>> +static inline void amdvi_handle_cmdbase_write(AMDVIState *s)
>> +{
>> +    s->cmdbuf = amdvi_readq(s, AMDVI_MMIO_COMMAND_BASE)
>> +                & AMDVI_MMIO_CMDBUF_BASE_MASK;
>> +    s->cmdbuf_len = 1UL << (s->mmior[AMDVI_MMIO_CMDBUF_SIZE_BYTE]
>> +                    & AMDVI_MMIO_CMDBUF_SIZE_MASK);
>> +    s->cmdbuf_head = s->cmdbuf_tail = 0;
>> +}
>> +
>> +static inline void amdvi_handle_cmdtail_write(AMDVIState *s)
>> +{
>> +    s->cmdbuf_tail = amdvi_readq(s, AMDVI_MMIO_COMMAND_TAIL)
>> +                     & AMDVI_MMIO_CMDBUF_TAIL_MASK;
>> +    amdvi_cmdbuf_run(s);
>> +}
>> +
>> +static inline void amdvi_handle_excllim_write(AMDVIState *s)
>> +{
>> +    uint64_t val = amdvi_readq(s, AMDVI_MMIO_EXCL_LIMIT);
>> +    s->excl_limit = (val & AMDVI_MMIO_EXCL_LIMIT_MASK) |
>> +                    AMDVI_MMIO_EXCL_LIMIT_LOW;
>> +}
>> +
>> +static inline void amdvi_handle_evtbase_write(AMDVIState *s)
>> +{
>> +    uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_BASE);
>> +    s->evtlog = val & AMDVI_MMIO_EVTLOG_BASE_MASK;
>> +    s->evtlog_len = 1UL << (*(uint64_t 
>> *)&s->mmior[AMDVI_MMIO_EVTLOG_SIZE_BYTE]
>> +                    & AMDVI_MMIO_EVTLOG_SIZE_MASK);
>> +}
>> +
>> +static inline void amdvi_handle_evttail_write(AMDVIState *s)
>> +{
>> +    uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_TAIL);
>> +    s->evtlog_tail = val & AMDVI_MMIO_EVTLOG_TAIL_MASK;
>> +}
>> +
>> +static inline void amdvi_handle_evthead_write(AMDVIState *s)
>> +{
>> +    uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_HEAD);
>> +    s->evtlog_head = val & AMDVI_MMIO_EVTLOG_HEAD_MASK;
>> +}
>> +
>> +static inline void amdvi_handle_pprbase_write(AMDVIState *s)
>> +{
>> +    uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_BASE);
>> +    s->ppr_log = val & AMDVI_MMIO_PPRLOG_BASE_MASK;
>> +    s->pprlog_len = 1UL << (*(uint64_t 
>> *)&s->mmior[AMDVI_MMIO_PPRLOG_SIZE_BYTE]
>> +                    & AMDVI_MMIO_PPRLOG_SIZE_MASK);
>> +}
>> +
>> +static inline void amdvi_handle_pprhead_write(AMDVIState *s)
>> +{
>> +    uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_HEAD);
>> +    s->pprlog_head = val & AMDVI_MMIO_PPRLOG_HEAD_MASK;
>> +}
>> +
>> +static inline void amdvi_handle_pprtail_write(AMDVIState *s)
>> +{
>> +    uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_TAIL);
>> +    s->pprlog_tail = val & AMDVI_MMIO_PPRLOG_TAIL_MASK;
>> +}
>> +
>> +/* FIXME: something might go wrong if System Software writes in chunks
>> + * of one byte but linux writes in chunks of 4 bytes so currently it
>> + * works correctly with linux but will definitely be busted if software
>> + * reads/writes 8 bytes
>
> What does the spec tell us about non-dword accesses? If they aren't
> allowed, filter them out completely at the beginning.

Non-dword accesses are allowed. Spec 2.62

".... Accesses must be aligned to the size of the access and the size
in bytes must be a power
of two. Software may use accesses as small as one byte..... "

Linux uses dword accesses though but even in this case a change in the
order of access whereby the high part of the register is accessed
first then the lower accessed could cause a problem (??)

>
>> + */
>> +static void amdvi_mmio_write(void *opaque, hwaddr addr, uint64_t val,
>> +                             unsigned size)
>> +{
>> +    AMDVIState *s = opaque;
>> +    unsigned long offset = addr & 0x07;
>> +
>> +    if (addr + size > AMDVI_MMIO_SIZE) {
>> +        trace_amdvi_mmio_write("error: addr outside region: max ",
>> +                (uint64_t)AMDVI_MMIO_SIZE, size, val, offset);
>> +        return;
>> +    }
>> +
>> +    switch (addr & ~0x07) {
>> +    case AMDVI_MMIO_CONTROL:
>> +        trace_amdvi_mmio_write("MMIO_CONTROL", addr, size, val, offset);
>> +        if (size == 2) {
>> +            amdvi_writew(s, addr, val);
>> +        } else if (size == 4) {
>> +            amdvi_writel(s, addr,  val);
>> +        } else if (size == 8) {
>> +            amdvi_writeq(s, addr, val);
>> +        }
>
> This pattern repeats and screams for being factored out into a helper
> function.
>
>> +        amdvi_handle_control_write(s);
>> +        break;
>> +
>> +    case AMDVI_MMIO_DEVICE_TABLE:
>> +        trace_amdvi_mmio_write("MMIO_DEVICE_TABLE", addr, size, val, 
>> offset);
>> +        if (size == 2) {
>> +            amdvi_writew(s, addr, val);
>> +        } else if (size == 4) {
>> +            amdvi_writel(s, addr, val);
>> +        } else if (size == 8) {
>> +            amdvi_writeq(s, addr, val);
>> +        }
>> +
>> +       /*  set device table address
>> +        *   This also suffers from inability to tell whether software
>> +        *   is done writing
>> +        */
>> +
>> +        if (offset || (size == 8)) {
>> +            amdvi_handle_devtab_write(s);
>> +        }
>> +        break;
>> +
>> +    case AMDVI_MMIO_COMMAND_HEAD:
>> +        trace_amdvi_mmio_write("MMIO_COMMAND_HEAD", addr, size, val, 
>> offset);
>> +        if (size == 2) {
>> +            amdvi_writew(s, addr, val);
>> +        } else if (size == 4) {
>> +            amdvi_writel(s, addr, val);
>> +        } else if (size == 8) {
>> +            amdvi_writeq(s, addr, val);
>> +        }
>> +
>> +        amdvi_handle_cmdhead_write(s);
>> +        break;
>> +
>> +    case AMDVI_MMIO_COMMAND_BASE:
>> +        trace_amdvi_mmio_write("MMIO_COMMAND_BASE", addr, size, val, 
>> offset);
>> +        if (size == 2) {
>> +            amdvi_writew(s, addr, val);
>> +        } else if (size == 4) {
>> +            amdvi_writel(s, addr, val);
>> +        } else if (size == 8) {
>> +            amdvi_writeq(s, addr, val);
>> +        }
>> +
>> +        /* FIXME - make sure System Software has finished writing incase
>> +         * it writes in chucks less than 8 bytes in a robust way.As for
>> +         * now, this hacks works for the linux driver
>> +         */
>> +        if (offset || (size == 8)) {
>> +            amdvi_handle_cmdbase_write(s);
>> +        }
>> +        break;
>> +
>> +    case AMDVI_MMIO_COMMAND_TAIL:
>> +        trace_amdvi_mmio_write("MMIO_COMMAND_TAIL", addr, size, val, 
>> offset);
>> +        if (size == 2) {
>> +            amdvi_writew(s, addr, val);
>> +        } else if (size == 4) {
>> +            amdvi_writel(s, addr, val);
>> +        } else if (size == 8) {
>> +            amdvi_writeq(s, addr, val);
>> +        }
>> +        amdvi_handle_cmdtail_write(s);
>> +        break;
>> +
>> +    case AMDVI_MMIO_EVENT_BASE:
>> +        trace_amdvi_mmio_write("MMIO_EVENT_BASE", addr, size, val, offset);
>> +        if (size == 2) {
>> +            amdvi_writew(s, addr, val);
>> +        } else if (size == 4) {
>> +            amdvi_writel(s, addr, val);
>> +        } else if (size == 8) {
>> +            amdvi_writeq(s, addr, val);
>> +        }
>> +        amdvi_handle_evtbase_write(s);
>> +        break;
>> +
>> +    case AMDVI_MMIO_EVENT_HEAD:
>> +        trace_amdvi_mmio_write("MMIO_EVENT_HEAD", addr, size, val, offset);
>> +        if (size == 2) {
>> +            amdvi_writew(s, addr, val);
>> +        } else if (size == 4) {
>> +            amdvi_writel(s, addr, val);
>> +        } else if (size == 8) {
>> +            amdvi_writeq(s, addr, val);
>> +        }
>> +        amdvi_handle_evthead_write(s);
>> +        break;
>> +
>> +    case AMDVI_MMIO_EVENT_TAIL:
>> +        trace_amdvi_mmio_write("MMIO_EVENT_TAIL", addr, size, val, offset);
>> +        if (size == 2) {
>> +            amdvi_writew(s, addr, val);
>> +        } else if (size == 4) {
>> +            amdvi_writel(s, addr, val);
>> +        } else if (size == 8) {
>> +            amdvi_writeq(s, addr, val);
>> +        }
>> +        amdvi_handle_evttail_write(s);
>> +        break;
>> +
>> +    case AMDVI_MMIO_EXCL_LIMIT:
>> +        trace_amdvi_mmio_write("MMIO_EXCL_LIMIT", addr, size, val, offset);
>> +        if (size == 2) {
>> +            amdvi_writew(s, addr, val);
>> +        } else if (size == 4) {
>> +            amdvi_writel(s, addr, val);
>> +        } else if (size == 8) {
>> +            amdvi_writeq(s, addr, val);
>> +        }
>> +        amdvi_handle_excllim_write(s);
>> +        break;
>> +
>> +        /* PPR log base - unused for now */
>> +    case AMDVI_MMIO_PPR_BASE:
>> +        trace_amdvi_mmio_write("MMIO_PPR_BASE", addr, size, val, offset);
>> +        if (size == 2) {
>> +            amdvi_writew(s, addr, val);
>> +        } else if (size == 4) {
>> +            amdvi_writel(s, addr, val);
>> +        } else if (size == 8) {
>> +            amdvi_writeq(s, addr, val);
>> +        }
>> +        amdvi_handle_pprbase_write(s);
>> +        break;
>> +        /* PPR log head - also unused for now */
>> +    case AMDVI_MMIO_PPR_HEAD:
>> +        trace_amdvi_mmio_write("MMIO_PPR_HEAD", addr, size, val, offset);
>> +        if (size == 2) {
>> +            amdvi_writew(s, addr, val);
>> +        } else if (size == 4) {
>> +            amdvi_writel(s, addr, val);
>> +        } else if (size == 8) {
>> +            amdvi_writeq(s, addr, val);
>> +        }
>> +        amdvi_handle_pprhead_write(s);
>> +        break;
>> +        /* PPR log tail - unused for now */
>> +    case AMDVI_MMIO_PPR_TAIL:
>> +        trace_amdvi_mmio_write("MMIO_PPR_TAIL", addr, size, val, offset);
>> +        if (size == 2) {
>> +            amdvi_writew(s, addr, val);
>> +        } else if (size == 4) {
>> +            amdvi_writel(s, addr, val);
>> +        } else if (size == 8) {
>> +            amdvi_writeq(s, addr, val);
>> +        }
>> +        amdvi_handle_pprtail_write(s);
>> +        break;
>> +
>> +        /* ignore write to ext_features */
>> +    default:
>> +        trace_amdvi_mmio_write("UNHANDLED MMIO", addr, size, val, offset);
>> +    }
>> +
>> +}
>> +
>> +
>> +/* PCI SIG constants */
>> +#define PCI_BUS_MAX 256
>> +#define PCI_SLOT_MAX 32
>> +#define PCI_FUNC_MAX 8
>> +#define PCI_DEVFN_MAX 256
>
> Shouldn't those four go to the pci header?

The macros/defines in PCI header are picked from linux while some of
these are not picked from linux. I'v prefixed them with AMDVI_ though.

>
>> +
>> +/* IOTLB */
>> +#define AMDVI_IOTLB_MAX_SIZE 1024
>> +#define AMDVI_DEVID_SHIFT    36
>> +
>> +/* extended feature support */
>> +#define AMDVI_EXT_FEATURES (AMDVI_FEATURE_PREFETCH | AMDVI_FEATURE_PPR | \
>> +        AMDVI_FEATURE_IA | AMDVI_FEATURE_GT | AMDVI_FEATURE_GA | \
>> +        AMDVI_FEATURE_HE | AMDVI_GATS_MODE | AMDVI_HATS_MODE)
>> +
>> +
>> +    /* IOTLB */
>> +    GHashTable *iotlb;
>> +} AMDVIState;
>> +
>> +#endif
>>
>
> I didn't look for the AMDVi logic itself, and I still need to redo some
> test myself (but that may take a while). Except for the few remarks, the
> code looks for me like being in pretty good shape!
>
> Jan
>



reply via email to

[Prev in Thread] Current Thread [Next in Thread]