[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [V12 3/4] hw/i386: Introduce AMD IOMMU
From: |
David Kiarie |
Subject: |
Re: [Qemu-devel] [V12 3/4] hw/i386: Introduce AMD IOMMU |
Date: |
Mon, 4 Jul 2016 08:06:00 +0300 |
On Wed, Jun 22, 2016 at 11:24 PM, Jan Kiszka <address@hidden> wrote:
> On 2016-06-15 14:21, David Kiarie wrote:
>> +
>> +/* System Software might never read from some of this fields but anyways */
>
> No read-modify-write accesses observed in the field? And fields like
> AMDVI_MMIO_STATUS or AMDVI_MMIO_EXT_FEATURES sound a lot like they are
> rather about reading than writing. Misleading comment?
Yeah, misleading comment. AMDVI_MMIO_EXT_FEATURES is read only while
some AMDVI_MMIO_STATUS is r/w1c and yes, I'm enforcing that in the
code.
>
>> +static uint64_t amdvi_mmio_read(void *opaque, hwaddr addr, unsigned size)
>> +{
>> + AMDVIState *s = opaque;
>> +
>> + uint64_t val = -1;
>> + if (addr + size > AMDVI_MMIO_SIZE) {
>> + trace_amdvi_mmio_read("error: addr outside region: max ",
>> + (uint64_t)AMDVI_MMIO_SIZE, addr, size);
>> + return (uint64_t)-1;
>> + }
>> +
>> + if (size == 2) {
>> + val = amdvi_readw(s, addr);
>> + } else if (size == 4) {
>> + val = amdvi_readl(s, addr);
>> + } else if (size == 8) {
>> + val = amdvi_readq(s, addr);
>> + }
>> +
>> + switch (addr & ~0x07) {
>> + case AMDVI_MMIO_DEVICE_TABLE:
>> + trace_amdvi_mmio_read("MMIO_DEVICE_TABLE", addr, size, addr &
>> ~0x07);
>> + break;
>> +
>> + case AMDVI_MMIO_COMMAND_BASE:
>> + trace_amdvi_mmio_read("MMIO_COMMAND_BASE", addr, size, addr &
>> ~0x07);
>> + break;
>> +
>> + case AMDVI_MMIO_EVENT_BASE:
>> + trace_amdvi_mmio_read("MMIO_EVENT_BASE", addr, size, addr & ~0x07);
>> + break;
>> +
>> + case AMDVI_MMIO_CONTROL:
>> + trace_amdvi_mmio_read("MMIO_MMIO_CONTROL", addr, size, addr &
>> ~0x07);
>> + break;
>> +
>> + case AMDVI_MMIO_EXCL_BASE:
>> + trace_amdvi_mmio_read("MMIO_EXCL_BASE", addr, size, addr & ~0x07);
>> + break;
>> +
>> + case AMDVI_MMIO_EXCL_LIMIT:
>> + trace_amdvi_mmio_read("MMIO_EXCL_LIMIT", addr, size, addr & ~0x07);
>> + break;
>> +
>> + case AMDVI_MMIO_COMMAND_HEAD:
>> + trace_amdvi_mmio_read("MMIO_COMMAND_HEAD", addr, size, addr &
>> ~0x07);
>> + break;
>> +
>> + case AMDVI_MMIO_COMMAND_TAIL:
>> + trace_amdvi_mmio_read("MMIO_COMMAND_TAIL", addr, size, addr &
>> ~0x07);
>> + break;
>> +
>> + case AMDVI_MMIO_EVENT_HEAD:
>> + trace_amdvi_mmio_read("MMIO_EVENT_HEAD", addr, size, addr & ~0x07);
>> + break;
>> +
>> + case AMDVI_MMIO_EVENT_TAIL:
>> + trace_amdvi_mmio_read("MMIO_EVENT_TAIL", addr, size, addr & ~0x07);
>> + break;
>> +
>> + case AMDVI_MMIO_STATUS:
>> + trace_amdvi_mmio_read("MMIO_STATUS", addr, size, addr & ~0x07);
>> + break;
>> +
>> + case AMDVI_MMIO_EXT_FEATURES:
>> + trace_amdvi_mmio_read("MMIO_EXT_FEATURES", addr, size, addr &
>> ~0x07);
>> + break;
>
> What about a lookup table for that name?
I can't find an obvious way to index a table given the register address.
>
>> +
>> + default:
>> + trace_amdvi_mmio_read("UNHANDLED READ", addr, size, addr & ~0x07);
>> + }
>> + return val;
>> +}
>> +
>> +static void amdvi_handle_control_write(AMDVIState *s)
>> +{
>> + /*
>> + * read whatever is already written in case
>> + * software is writing in chucks less than 8 bytes
>> + */
>> + unsigned long control = amdvi_readq(s, AMDVI_MMIO_CONTROL);
>> + s->enabled = !!(control & AMDVI_MMIO_CONTROL_AMDVIEN);
>> +
>> + s->ats_enabled = !!(control & AMDVI_MMIO_CONTROL_HTTUNEN);
>> + s->evtlog_enabled = s->enabled && !!(control &
>> + AMDVI_MMIO_CONTROL_EVENTLOGEN);
>> +
>> + s->evtlog_intr = !!(control & AMDVI_MMIO_CONTROL_EVENTINTEN);
>> + s->completion_wait_intr = !!(control & AMDVI_MMIO_CONTROL_COMWAITINTEN);
>> + s->cmdbuf_enabled = s->enabled && !!(control &
>> + AMDVI_MMIO_CONTROL_CMDBUFLEN);
>> +
>> + /* update the flags depending on the control register */
>> + if (s->cmdbuf_enabled) {
>> + amdvi_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_CMDBUF_RUN);
>> + } else {
>> + amdvi_and_assignq(s, AMDVI_MMIO_STATUS,
>> ~AMDVI_MMIO_STATUS_CMDBUF_RUN);
>> + }
>> + if (s->evtlog_enabled) {
>> + amdvi_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_RUN);
>> + } else {
>> + amdvi_and_assignq(s, AMDVI_MMIO_STATUS, ~AMDVI_MMIO_STATUS_EVT_RUN);
>> + }
>> +
>> + trace_amdvi_control_status(control);
>> +
>> + amdvi_cmdbuf_run(s);
>> +}
>> +
>> +static inline void amdvi_handle_devtab_write(AMDVIState *s)
>> +
>> +{
>> + uint64_t val = amdvi_readq(s, AMDVI_MMIO_DEVICE_TABLE);
>> + s->devtab = (val & AMDVI_MMIO_DEVTAB_BASE_MASK);
>> +
>> + /* set device table length */
>> + s->devtab_len = ((val & AMDVI_MMIO_DEVTAB_SIZE_MASK) + 1 *
>> + (AMDVI_MMIO_DEVTAB_SIZE_UNIT /
>> + AMDVI_MMIO_DEVTAB_ENTRY_SIZE));
>> +}
>> +
>> +static inline void amdvi_handle_cmdhead_write(AMDVIState *s)
>> +{
>> + s->cmdbuf_head = amdvi_readq(s, AMDVI_MMIO_COMMAND_HEAD)
>> + & AMDVI_MMIO_CMDBUF_HEAD_MASK;
>> + amdvi_cmdbuf_run(s);
>> +}
>> +
>> +static inline void amdvi_handle_cmdbase_write(AMDVIState *s)
>> +{
>> + s->cmdbuf = amdvi_readq(s, AMDVI_MMIO_COMMAND_BASE)
>> + & AMDVI_MMIO_CMDBUF_BASE_MASK;
>> + s->cmdbuf_len = 1UL << (s->mmior[AMDVI_MMIO_CMDBUF_SIZE_BYTE]
>> + & AMDVI_MMIO_CMDBUF_SIZE_MASK);
>> + s->cmdbuf_head = s->cmdbuf_tail = 0;
>> +}
>> +
>> +static inline void amdvi_handle_cmdtail_write(AMDVIState *s)
>> +{
>> + s->cmdbuf_tail = amdvi_readq(s, AMDVI_MMIO_COMMAND_TAIL)
>> + & AMDVI_MMIO_CMDBUF_TAIL_MASK;
>> + amdvi_cmdbuf_run(s);
>> +}
>> +
>> +static inline void amdvi_handle_excllim_write(AMDVIState *s)
>> +{
>> + uint64_t val = amdvi_readq(s, AMDVI_MMIO_EXCL_LIMIT);
>> + s->excl_limit = (val & AMDVI_MMIO_EXCL_LIMIT_MASK) |
>> + AMDVI_MMIO_EXCL_LIMIT_LOW;
>> +}
>> +
>> +static inline void amdvi_handle_evtbase_write(AMDVIState *s)
>> +{
>> + uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_BASE);
>> + s->evtlog = val & AMDVI_MMIO_EVTLOG_BASE_MASK;
>> + s->evtlog_len = 1UL << (*(uint64_t
>> *)&s->mmior[AMDVI_MMIO_EVTLOG_SIZE_BYTE]
>> + & AMDVI_MMIO_EVTLOG_SIZE_MASK);
>> +}
>> +
>> +static inline void amdvi_handle_evttail_write(AMDVIState *s)
>> +{
>> + uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_TAIL);
>> + s->evtlog_tail = val & AMDVI_MMIO_EVTLOG_TAIL_MASK;
>> +}
>> +
>> +static inline void amdvi_handle_evthead_write(AMDVIState *s)
>> +{
>> + uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_HEAD);
>> + s->evtlog_head = val & AMDVI_MMIO_EVTLOG_HEAD_MASK;
>> +}
>> +
>> +static inline void amdvi_handle_pprbase_write(AMDVIState *s)
>> +{
>> + uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_BASE);
>> + s->ppr_log = val & AMDVI_MMIO_PPRLOG_BASE_MASK;
>> + s->pprlog_len = 1UL << (*(uint64_t
>> *)&s->mmior[AMDVI_MMIO_PPRLOG_SIZE_BYTE]
>> + & AMDVI_MMIO_PPRLOG_SIZE_MASK);
>> +}
>> +
>> +static inline void amdvi_handle_pprhead_write(AMDVIState *s)
>> +{
>> + uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_HEAD);
>> + s->pprlog_head = val & AMDVI_MMIO_PPRLOG_HEAD_MASK;
>> +}
>> +
>> +static inline void amdvi_handle_pprtail_write(AMDVIState *s)
>> +{
>> + uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_TAIL);
>> + s->pprlog_tail = val & AMDVI_MMIO_PPRLOG_TAIL_MASK;
>> +}
>> +
>> +/* FIXME: something might go wrong if System Software writes in chunks
>> + * of one byte but linux writes in chunks of 4 bytes so currently it
>> + * works correctly with linux but will definitely be busted if software
>> + * reads/writes 8 bytes
>
> What does the spec tell us about non-dword accesses? If they aren't
> allowed, filter them out completely at the beginning.
Non-dword accesses are allowed. Spec 2.62
".... Accesses must be aligned to the size of the access and the size
in bytes must be a power
of two. Software may use accesses as small as one byte..... "
Linux uses dword accesses though but even in this case a change in the
order of access whereby the high part of the register is accessed
first then the lower accessed could cause a problem (??)
>
>> + */
>> +static void amdvi_mmio_write(void *opaque, hwaddr addr, uint64_t val,
>> + unsigned size)
>> +{
>> + AMDVIState *s = opaque;
>> + unsigned long offset = addr & 0x07;
>> +
>> + if (addr + size > AMDVI_MMIO_SIZE) {
>> + trace_amdvi_mmio_write("error: addr outside region: max ",
>> + (uint64_t)AMDVI_MMIO_SIZE, size, val, offset);
>> + return;
>> + }
>> +
>> + switch (addr & ~0x07) {
>> + case AMDVI_MMIO_CONTROL:
>> + trace_amdvi_mmio_write("MMIO_CONTROL", addr, size, val, offset);
>> + if (size == 2) {
>> + amdvi_writew(s, addr, val);
>> + } else if (size == 4) {
>> + amdvi_writel(s, addr, val);
>> + } else if (size == 8) {
>> + amdvi_writeq(s, addr, val);
>> + }
>
> This pattern repeats and screams for being factored out into a helper
> function.
>
>> + amdvi_handle_control_write(s);
>> + break;
>> +
>> + case AMDVI_MMIO_DEVICE_TABLE:
>> + trace_amdvi_mmio_write("MMIO_DEVICE_TABLE", addr, size, val,
>> offset);
>> + if (size == 2) {
>> + amdvi_writew(s, addr, val);
>> + } else if (size == 4) {
>> + amdvi_writel(s, addr, val);
>> + } else if (size == 8) {
>> + amdvi_writeq(s, addr, val);
>> + }
>> +
>> + /* set device table address
>> + * This also suffers from inability to tell whether software
>> + * is done writing
>> + */
>> +
>> + if (offset || (size == 8)) {
>> + amdvi_handle_devtab_write(s);
>> + }
>> + break;
>> +
>> + case AMDVI_MMIO_COMMAND_HEAD:
>> + trace_amdvi_mmio_write("MMIO_COMMAND_HEAD", addr, size, val,
>> offset);
>> + if (size == 2) {
>> + amdvi_writew(s, addr, val);
>> + } else if (size == 4) {
>> + amdvi_writel(s, addr, val);
>> + } else if (size == 8) {
>> + amdvi_writeq(s, addr, val);
>> + }
>> +
>> + amdvi_handle_cmdhead_write(s);
>> + break;
>> +
>> + case AMDVI_MMIO_COMMAND_BASE:
>> + trace_amdvi_mmio_write("MMIO_COMMAND_BASE", addr, size, val,
>> offset);
>> + if (size == 2) {
>> + amdvi_writew(s, addr, val);
>> + } else if (size == 4) {
>> + amdvi_writel(s, addr, val);
>> + } else if (size == 8) {
>> + amdvi_writeq(s, addr, val);
>> + }
>> +
>> + /* FIXME - make sure System Software has finished writing incase
>> + * it writes in chucks less than 8 bytes in a robust way.As for
>> + * now, this hacks works for the linux driver
>> + */
>> + if (offset || (size == 8)) {
>> + amdvi_handle_cmdbase_write(s);
>> + }
>> + break;
>> +
>> + case AMDVI_MMIO_COMMAND_TAIL:
>> + trace_amdvi_mmio_write("MMIO_COMMAND_TAIL", addr, size, val,
>> offset);
>> + if (size == 2) {
>> + amdvi_writew(s, addr, val);
>> + } else if (size == 4) {
>> + amdvi_writel(s, addr, val);
>> + } else if (size == 8) {
>> + amdvi_writeq(s, addr, val);
>> + }
>> + amdvi_handle_cmdtail_write(s);
>> + break;
>> +
>> + case AMDVI_MMIO_EVENT_BASE:
>> + trace_amdvi_mmio_write("MMIO_EVENT_BASE", addr, size, val, offset);
>> + if (size == 2) {
>> + amdvi_writew(s, addr, val);
>> + } else if (size == 4) {
>> + amdvi_writel(s, addr, val);
>> + } else if (size == 8) {
>> + amdvi_writeq(s, addr, val);
>> + }
>> + amdvi_handle_evtbase_write(s);
>> + break;
>> +
>> + case AMDVI_MMIO_EVENT_HEAD:
>> + trace_amdvi_mmio_write("MMIO_EVENT_HEAD", addr, size, val, offset);
>> + if (size == 2) {
>> + amdvi_writew(s, addr, val);
>> + } else if (size == 4) {
>> + amdvi_writel(s, addr, val);
>> + } else if (size == 8) {
>> + amdvi_writeq(s, addr, val);
>> + }
>> + amdvi_handle_evthead_write(s);
>> + break;
>> +
>> + case AMDVI_MMIO_EVENT_TAIL:
>> + trace_amdvi_mmio_write("MMIO_EVENT_TAIL", addr, size, val, offset);
>> + if (size == 2) {
>> + amdvi_writew(s, addr, val);
>> + } else if (size == 4) {
>> + amdvi_writel(s, addr, val);
>> + } else if (size == 8) {
>> + amdvi_writeq(s, addr, val);
>> + }
>> + amdvi_handle_evttail_write(s);
>> + break;
>> +
>> + case AMDVI_MMIO_EXCL_LIMIT:
>> + trace_amdvi_mmio_write("MMIO_EXCL_LIMIT", addr, size, val, offset);
>> + if (size == 2) {
>> + amdvi_writew(s, addr, val);
>> + } else if (size == 4) {
>> + amdvi_writel(s, addr, val);
>> + } else if (size == 8) {
>> + amdvi_writeq(s, addr, val);
>> + }
>> + amdvi_handle_excllim_write(s);
>> + break;
>> +
>> + /* PPR log base - unused for now */
>> + case AMDVI_MMIO_PPR_BASE:
>> + trace_amdvi_mmio_write("MMIO_PPR_BASE", addr, size, val, offset);
>> + if (size == 2) {
>> + amdvi_writew(s, addr, val);
>> + } else if (size == 4) {
>> + amdvi_writel(s, addr, val);
>> + } else if (size == 8) {
>> + amdvi_writeq(s, addr, val);
>> + }
>> + amdvi_handle_pprbase_write(s);
>> + break;
>> + /* PPR log head - also unused for now */
>> + case AMDVI_MMIO_PPR_HEAD:
>> + trace_amdvi_mmio_write("MMIO_PPR_HEAD", addr, size, val, offset);
>> + if (size == 2) {
>> + amdvi_writew(s, addr, val);
>> + } else if (size == 4) {
>> + amdvi_writel(s, addr, val);
>> + } else if (size == 8) {
>> + amdvi_writeq(s, addr, val);
>> + }
>> + amdvi_handle_pprhead_write(s);
>> + break;
>> + /* PPR log tail - unused for now */
>> + case AMDVI_MMIO_PPR_TAIL:
>> + trace_amdvi_mmio_write("MMIO_PPR_TAIL", addr, size, val, offset);
>> + if (size == 2) {
>> + amdvi_writew(s, addr, val);
>> + } else if (size == 4) {
>> + amdvi_writel(s, addr, val);
>> + } else if (size == 8) {
>> + amdvi_writeq(s, addr, val);
>> + }
>> + amdvi_handle_pprtail_write(s);
>> + break;
>> +
>> + /* ignore write to ext_features */
>> + default:
>> + trace_amdvi_mmio_write("UNHANDLED MMIO", addr, size, val, offset);
>> + }
>> +
>> +}
>> +
>> +
>> +/* PCI SIG constants */
>> +#define PCI_BUS_MAX 256
>> +#define PCI_SLOT_MAX 32
>> +#define PCI_FUNC_MAX 8
>> +#define PCI_DEVFN_MAX 256
>
> Shouldn't those four go to the pci header?
The macros/defines in PCI header are picked from linux while some of
these are not picked from linux. I'v prefixed them with AMDVI_ though.
>
>> +
>> +/* IOTLB */
>> +#define AMDVI_IOTLB_MAX_SIZE 1024
>> +#define AMDVI_DEVID_SHIFT 36
>> +
>> +/* extended feature support */
>> +#define AMDVI_EXT_FEATURES (AMDVI_FEATURE_PREFETCH | AMDVI_FEATURE_PPR | \
>> + AMDVI_FEATURE_IA | AMDVI_FEATURE_GT | AMDVI_FEATURE_GA | \
>> + AMDVI_FEATURE_HE | AMDVI_GATS_MODE | AMDVI_HATS_MODE)
>> +
>> +
>> + /* IOTLB */
>> + GHashTable *iotlb;
>> +} AMDVIState;
>> +
>> +#endif
>>
>
> I didn't look for the AMDVi logic itself, and I still need to redo some
> test myself (but that may take a while). Except for the few remarks, the
> code looks for me like being in pretty good shape!
>
> Jan
>
- Re: [Qemu-devel] [V12 3/4] hw/i386: Introduce AMD IOMMU,
David Kiarie <=