+ }
+
+ if (g_hash_table_size(s->ctx_cache) >= LIMIT_CACHE_CTX) {
+ ctx_cache = g_hash_table_new_full(__ctx_hash, __ctx_equal,
+ g_free, NULL);
+ g_hash_table_unref(qatomic_xchg(&s->ctx_cache, ctx_cache));
+ }
+
+ ctx = g_new0(RISCVIOMMUContext, 1);
+ ctx->devid = devid;
+ ctx->pasid = pasid;
+
+ int fault = riscv_iommu_ctx_fetch(s, ctx);
+ if (!fault) {
+ g_hash_table_add(ctx_cache, ctx);
+ *ref = ctx_cache;
+ return ctx;
+ }
+
+ g_hash_table_unref(ctx_cache);
+ *ref = NULL;
+
+ riscv_iommu_report_fault(s, ctx, RISCV_IOMMU_FQ_TTYPE_UADDR_RD,
+ fault, !!pasid, 0, 0);
+
+ g_free(ctx);
+ return NULL;
+}
+
+static void riscv_iommu_ctx_put(RISCVIOMMUState *s, void *ref)
+{
+ if (ref) {
+ g_hash_table_unref((GHashTable *)ref);
+ }
+}
+
+/* Find or allocate address space for a given device */
+static AddressSpace *riscv_iommu_space(RISCVIOMMUState *s, uint32_t
devid)
+{
+ RISCVIOMMUSpace *as;
+
+ /* FIXME: PCIe bus remapping for attached endpoints. */
+ devid |= s->bus << 8;
+
+ qemu_mutex_lock(&s->core_lock);
+ QLIST_FOREACH(as, &s->spaces, list) {
+ if (as->devid == devid) {
+ break;
+ }
+ }
+ qemu_mutex_unlock(&s->core_lock);
+
+ if (as == NULL) {
+ char name[64];
+ as = g_new0(RISCVIOMMUSpace, 1);
+
+ as->iommu = s;
+ as->devid = devid;
+
+ snprintf(name, sizeof(name), "riscv-iommu-%04x:%02x.%d-iova",
+ PCI_BUS_NUM(as->devid), PCI_SLOT(as->devid),
PCI_FUNC(as->devid));
+
+ /* IOVA address space, untranslated addresses */
+ memory_region_init_iommu(&as->iova_mr, sizeof(as->iova_mr),
+ TYPE_RISCV_IOMMU_MEMORY_REGION,
+ OBJECT(as), "riscv_iommu", UINT64_MAX);
+ address_space_init(&as->iova_as,
MEMORY_REGION(&as->iova_mr), name);
+
+ qemu_mutex_lock(&s->core_lock);
+ QLIST_INSERT_HEAD(&s->spaces, as, list);
+ qemu_mutex_unlock(&s->core_lock);
+
+ trace_riscv_iommu_new(s->parent_obj.id, PCI_BUS_NUM(as->devid),
+ PCI_SLOT(as->devid), PCI_FUNC(as->devid));
+ }
+ return &as->iova_as;
+}
+
+static int riscv_iommu_translate(RISCVIOMMUState *s,
RISCVIOMMUContext *ctx,
+ IOMMUTLBEntry *iotlb)
+{
+ bool enable_pasid;
+ bool enable_pri;
+ int fault;
+
+ /*
+ * TC[32] is reserved for custom extensions, used here to
temporarily
+ * enable automatic page-request generation for ATS queries.
+ */
+ enable_pri = (iotlb->perm == IOMMU_NONE) && (ctx->tc &
BIT_ULL(32));
+ enable_pasid = (ctx->tc & RISCV_IOMMU_DC_TC_PDTV);
+
+ /* Translate using device directory / page table information. */
+ fault = riscv_iommu_spa_fetch(s, ctx, iotlb);
+
+ if (enable_pri && fault) {
+ struct riscv_iommu_pq_record pr = {0};
+ if (enable_pasid) {
+ pr.hdr = set_field(RISCV_IOMMU_PREQ_HDR_PV,
+ RISCV_IOMMU_PREQ_HDR_PID, ctx->pasid);
+ }
+ pr.hdr = set_field(pr.hdr, RISCV_IOMMU_PREQ_HDR_DID,
ctx->devid);
+ pr.payload = (iotlb->iova & TARGET_PAGE_MASK) |
+ RISCV_IOMMU_PREQ_PAYLOAD_M;
+ riscv_iommu_pri(s, &pr);
+ return fault;
+ }
+
+ if (fault) {
+ unsigned ttype;
+
+ if (iotlb->perm & IOMMU_RW) {
+ ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_WR;
+ } else {
+ ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_RD;
+ }
+
+ riscv_iommu_report_fault(s, ctx, ttype, fault, enable_pasid,
+ iotlb->iova, iotlb->translated_addr);
+ return fault;
+ }
+
+ return 0;
+}
+
+/* IOMMU Command Interface */
+static MemTxResult riscv_iommu_iofence(RISCVIOMMUState *s, bool notify,
+ uint64_t addr, uint32_t data)
+{
+ /*
+ * ATS processing in this implementation of the IOMMU is
synchronous,
+ * no need to wait for completions here.
+ */
+ if (!notify) {
+ return MEMTX_OK;
+ }
+
+ return dma_memory_write(s->target_as, addr, &data, sizeof(data),
+ MEMTXATTRS_UNSPECIFIED);
+}
+
+static void riscv_iommu_process_ddtp(RISCVIOMMUState *s)
+{
+ uint64_t old_ddtp = s->ddtp;
+ uint64_t new_ddtp = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_DDTP);
+ unsigned new_mode = get_field(new_ddtp, RISCV_IOMMU_DDTP_MODE);
+ unsigned old_mode = get_field(old_ddtp, RISCV_IOMMU_DDTP_MODE);
+ bool ok = false;
+
+ /*
+ * Check for allowed DDTP.MODE transitions:
+ * {OFF, BARE} -> {OFF, BARE, 1LVL, 2LVL, 3LVL}
+ * {1LVL, 2LVL, 3LVL} -> {OFF, BARE}
+ */
+ if (new_mode == old_mode ||
+ new_mode == RISCV_IOMMU_DDTP_MODE_OFF ||
+ new_mode == RISCV_IOMMU_DDTP_MODE_BARE) {
+ ok = true;
+ } else if (new_mode == RISCV_IOMMU_DDTP_MODE_1LVL ||
+ new_mode == RISCV_IOMMU_DDTP_MODE_2LVL ||
+ new_mode == RISCV_IOMMU_DDTP_MODE_3LVL) {
+ ok = old_mode == RISCV_IOMMU_DDTP_MODE_OFF ||
+ old_mode == RISCV_IOMMU_DDTP_MODE_BARE;
+ }
+
+ if (ok) {
+ /* clear reserved and busy bits, report back sanitized
version */
+ new_ddtp = set_field(new_ddtp & RISCV_IOMMU_DDTP_PPN,
+ RISCV_IOMMU_DDTP_MODE, new_mode);
+ } else {
+ new_ddtp = old_ddtp;
+ }
+ s->ddtp = new_ddtp;
+
+ riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_DDTP, new_ddtp);
+}
+
+/* Command function and opcode field. */
+#define RISCV_IOMMU_CMD(func, op) (((func) << 7) | (op))
+
+static void riscv_iommu_process_cq_tail(RISCVIOMMUState *s)
+{
+ struct riscv_iommu_command cmd;
+ MemTxResult res;
+ dma_addr_t addr;
+ uint32_t tail, head, ctrl;
+ uint64_t cmd_opcode;
+ GHFunc func;
+
+ ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR);
+ tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQT) & s->cq_mask;
+ head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQH) & s->cq_mask;
+
+ /* Check for pending error or queue processing disabled */
+ if (!(ctrl & RISCV_IOMMU_CQCSR_CQON) ||
+ !!(ctrl & (RISCV_IOMMU_CQCSR_CMD_ILL |
RISCV_IOMMU_CQCSR_CQMF))) {
+ return;
+ }
+
+ while (tail != head) {
+ addr = s->cq_addr + head * sizeof(cmd);
+ res = dma_memory_read(s->target_as, addr, &cmd, sizeof(cmd),
+ MEMTXATTRS_UNSPECIFIED);
+
+ if (res != MEMTX_OK) {
+ riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR,
+ RISCV_IOMMU_CQCSR_CQMF, 0);
+ goto fault;
+ }
+
+ trace_riscv_iommu_cmd(s->parent_obj.id, cmd.dword0,
cmd.dword1);
+
+ cmd_opcode = get_field(cmd.dword0,
+ RISCV_IOMMU_CMD_OPCODE |
RISCV_IOMMU_CMD_FUNC);
+
+ switch (cmd_opcode) {
+ case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOFENCE_FUNC_C,
+ RISCV_IOMMU_CMD_IOFENCE_OPCODE):
+ res = riscv_iommu_iofence(s,
+ cmd.dword0 & RISCV_IOMMU_CMD_IOFENCE_AV, cmd.dword1,
+ get_field(cmd.dword0, RISCV_IOMMU_CMD_IOFENCE_DATA));
+
+ if (res != MEMTX_OK) {
+ riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR,
+ RISCV_IOMMU_CQCSR_CQMF, 0);
+ goto fault;
+ }
+ break;
+
+ case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA,
+ RISCV_IOMMU_CMD_IOTINVAL_OPCODE):
+ if (cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_PSCV) {
+ /* illegal command arguments IOTINVAL.GVMA & PSCV ==
1 */
+ goto cmd_ill;
+ }
+ /* translation cache not implemented yet */
+ break;
+
+ case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA,
+ RISCV_IOMMU_CMD_IOTINVAL_OPCODE):
+ /* translation cache not implemented yet */
+ break;
+
+ case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_DDT,
+ RISCV_IOMMU_CMD_IODIR_OPCODE):
+ if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) {
+ /* invalidate all device context cache mappings */
+ func = __ctx_inval_all;
+ } else {
+ /* invalidate all device context matching DID */
+ func = __ctx_inval_devid;
+ }
+ riscv_iommu_ctx_inval(s, func,
+ get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID), 0);
+ break;
+
+ case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_PDT,
+ RISCV_IOMMU_CMD_IODIR_OPCODE):
+ if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) {
+ /* illegal command arguments IODIR_PDT & DV == 0 */
+ goto cmd_ill;
+ } else {
+ func = __ctx_inval_devid_pasid;
+ }
+ riscv_iommu_ctx_inval(s, func,
+ get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID),
+ get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_PID));
+ break;
+
+ default:
+ cmd_ill:
+ /* Invalid instruction, do not advance instruction
index. */
+ riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR,
+ RISCV_IOMMU_CQCSR_CMD_ILL, 0);
+ goto fault;
+ }
+
+ /* Advance and update head pointer after command completes. */
+ head = (head + 1) & s->cq_mask;
+ riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_CQH, head);
+ }
+ return;
+
+fault:
+ if (ctrl & RISCV_IOMMU_CQCSR_CIE) {
+ riscv_iommu_notify(s, RISCV_IOMMU_INTR_CQ);
+ }
+}
+
+static void riscv_iommu_process_cq_control(RISCVIOMMUState *s)
+{
+ uint64_t base;
+ uint32_t ctrl_set = riscv_iommu_reg_get32(s,
RISCV_IOMMU_REG_CQCSR);
+ uint32_t ctrl_clr;
+ bool enable = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQEN);
+ bool active = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQON);
+
+ if (enable && !active) {
+ base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_CQB);
+ s->cq_mask = (2ULL << get_field(base,
RISCV_IOMMU_CQB_LOG2SZ)) - 1;
+ s->cq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_CQB_PPN));
+ stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~s->cq_mask);
+ stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQH], 0);
+ stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQT], 0);
+ ctrl_set = RISCV_IOMMU_CQCSR_CQON;
+ ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQMF |
+ RISCV_IOMMU_CQCSR_CMD_ILL |
RISCV_IOMMU_CQCSR_CMD_TO |
+ RISCV_IOMMU_CQCSR_FENCE_W_IP;
+ } else if (!enable && active) {
+ stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~0);
+ ctrl_set = 0;
+ ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQON;
+ } else {
+ ctrl_set = 0;
+ ctrl_clr = RISCV_IOMMU_CQCSR_BUSY;
+ }
+
+ riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, ctrl_set,
ctrl_clr);
+}
+
+static void riscv_iommu_process_fq_control(RISCVIOMMUState *s)
+{
+ uint64_t base;
+ uint32_t ctrl_set = riscv_iommu_reg_get32(s,
RISCV_IOMMU_REG_FQCSR);
+ uint32_t ctrl_clr;
+ bool enable = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQEN);
+ bool active = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQON);
+
+ if (enable && !active) {
+ base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_FQB);
+ s->fq_mask = (2ULL << get_field(base,
RISCV_IOMMU_FQB_LOG2SZ)) - 1;
+ s->fq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_FQB_PPN));
+ stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~s->fq_mask);
+ stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQH], 0);
+ stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQT], 0);
+ ctrl_set = RISCV_IOMMU_FQCSR_FQON;
+ ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQMF |
+ RISCV_IOMMU_FQCSR_FQOF;
+ } else if (!enable && active) {
+ stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~0);
+ ctrl_set = 0;
+ ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQON;
+ } else {
+ ctrl_set = 0;
+ ctrl_clr = RISCV_IOMMU_FQCSR_BUSY;
+ }
+
+ riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, ctrl_set,
ctrl_clr);
+}
+
+static void riscv_iommu_process_pq_control(RISCVIOMMUState *s)
+{
+ uint64_t base;
+ uint32_t ctrl_set = riscv_iommu_reg_get32(s,
RISCV_IOMMU_REG_PQCSR);
+ uint32_t ctrl_clr;
+ bool enable = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQEN);
+ bool active = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQON);
+
+ if (enable && !active) {
+ base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_PQB);
+ s->pq_mask = (2ULL << get_field(base,
RISCV_IOMMU_PQB_LOG2SZ)) - 1;
+ s->pq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_PQB_PPN));
+ stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~s->pq_mask);
+ stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQH], 0);
+ stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQT], 0);
+ ctrl_set = RISCV_IOMMU_PQCSR_PQON;
+ ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQMF |
+ RISCV_IOMMU_PQCSR_PQOF;
+ } else if (!enable && active) {
+ stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~0);
+ ctrl_set = 0;
+ ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQON;
+ } else {
+ ctrl_set = 0;
+ ctrl_clr = RISCV_IOMMU_PQCSR_BUSY;
+ }
+
+ riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, ctrl_set,
ctrl_clr);
+}
+
+typedef void riscv_iommu_process_fn(RISCVIOMMUState *s);
+
+static void riscv_iommu_update_ipsr(RISCVIOMMUState *s, uint64_t data)
+{
+ uint32_t cqcsr, fqcsr, pqcsr;
+ uint32_t ipsr_set = 0;
+ uint32_t ipsr_clr = 0;
+
+ if (data & RISCV_IOMMU_IPSR_CIP) {
+ cqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR);
+
+ if (cqcsr & RISCV_IOMMU_CQCSR_CIE &&
+ (cqcsr & RISCV_IOMMU_CQCSR_FENCE_W_IP ||
+ cqcsr & RISCV_IOMMU_CQCSR_CMD_ILL ||
+ cqcsr & RISCV_IOMMU_CQCSR_CMD_TO ||
+ cqcsr & RISCV_IOMMU_CQCSR_CQMF)) {
+ ipsr_set |= RISCV_IOMMU_IPSR_CIP;
+ } else {
+ ipsr_clr |= RISCV_IOMMU_IPSR_CIP;
+ }
+ } else {
+ ipsr_clr |= RISCV_IOMMU_IPSR_CIP;
+ }
+
+ if (data & RISCV_IOMMU_IPSR_FIP) {
+ fqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR);
+
+ if (fqcsr & RISCV_IOMMU_FQCSR_FIE &&
+ (fqcsr & RISCV_IOMMU_FQCSR_FQOF ||
+ fqcsr & RISCV_IOMMU_FQCSR_FQMF)) {
+ ipsr_set |= RISCV_IOMMU_IPSR_FIP;
+ } else {
+ ipsr_clr |= RISCV_IOMMU_IPSR_FIP;
+ }
+ } else {
+ ipsr_clr |= RISCV_IOMMU_IPSR_FIP;
+ }
+
+ if (data & RISCV_IOMMU_IPSR_PIP) {
+ pqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR);
+
+ if (pqcsr & RISCV_IOMMU_PQCSR_PIE &&
+ (pqcsr & RISCV_IOMMU_PQCSR_PQOF ||
+ pqcsr & RISCV_IOMMU_PQCSR_PQMF)) {
+ ipsr_set |= RISCV_IOMMU_IPSR_PIP;
+ } else {
+ ipsr_clr |= RISCV_IOMMU_IPSR_PIP;
+ }
+ } else {
+ ipsr_clr |= RISCV_IOMMU_IPSR_PIP;
+ }
+
+ riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, ipsr_set, ipsr_clr);
+}
+
+static MemTxResult riscv_iommu_mmio_write(void *opaque, hwaddr addr,
+ uint64_t data, unsigned size, MemTxAttrs attrs)
+{
+ riscv_iommu_process_fn *process_fn = NULL;
+ RISCVIOMMUState *s = opaque;
+ uint32_t regb = addr & ~3;
+ uint32_t busy = 0;
+ uint64_t val = 0;
+
+ if ((addr & (size - 1)) != 0) {
+ /* Unsupported MMIO alignment or access size */
+ return MEMTX_ERROR;
+ }
+
+ if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) {
+ /* Unsupported MMIO access location. */
+ return MEMTX_ACCESS_ERROR;
+ }
+
+ /* Track actionable MMIO write. */
+ switch (regb) {
+ case RISCV_IOMMU_REG_DDTP:
+ case RISCV_IOMMU_REG_DDTP + 4:
+ process_fn = riscv_iommu_process_ddtp;
+ regb = RISCV_IOMMU_REG_DDTP;
+ busy = RISCV_IOMMU_DDTP_BUSY;
+ break;
+
+ case RISCV_IOMMU_REG_CQT:
+ process_fn = riscv_iommu_process_cq_tail;
+ break;
+
+ case RISCV_IOMMU_REG_CQCSR:
+ process_fn = riscv_iommu_process_cq_control;
+ busy = RISCV_IOMMU_CQCSR_BUSY;
+ break;
+
+ case RISCV_IOMMU_REG_FQCSR:
+ process_fn = riscv_iommu_process_fq_control;
+ busy = RISCV_IOMMU_FQCSR_BUSY;
+ break;
+
+ case RISCV_IOMMU_REG_PQCSR:
+ process_fn = riscv_iommu_process_pq_control;
+ busy = RISCV_IOMMU_PQCSR_BUSY;
+ break;
+
+ case RISCV_IOMMU_REG_IPSR:
+ /*
+ * IPSR has special procedures to update. Execute it
+ * and exit.
+ */
+ if (size == 4) {
+ uint32_t ro = ldl_le_p(&s->regs_ro[addr]);
+ uint32_t wc = ldl_le_p(&s->regs_wc[addr]);
+ uint32_t rw = ldl_le_p(&s->regs_rw[addr]);
+ stl_le_p(&val, ((rw & ro) | (data & ~ro)) & ~(data & wc));
+ } else if (size == 8) {
+ uint64_t ro = ldq_le_p(&s->regs_ro[addr]);
+ uint64_t wc = ldq_le_p(&s->regs_wc[addr]);
+ uint64_t rw = ldq_le_p(&s->regs_rw[addr]);
+ stq_le_p(&val, ((rw & ro) | (data & ~ro)) & ~(data & wc));
+ }
+
+ riscv_iommu_update_ipsr(s, val);
+
+ return MEMTX_OK;
+
+ default:
+ break;
+ }
+
+ /*
+ * Registers update might be not synchronized with core logic.
+ * If system software updates register when relevant BUSY bit
+ * is set IOMMU behavior of additional writes to the register
+ * is UNSPECIFIED.
+ */
+ qemu_spin_lock(&s->regs_lock);
+ if (size == 1) {
+ uint8_t ro = s->regs_ro[addr];
+ uint8_t wc = s->regs_wc[addr];
+ uint8_t rw = s->regs_rw[addr];
+ s->regs_rw[addr] = ((rw & ro) | (data & ~ro)) & ~(data & wc);
+ } else if (size == 2) {
+ uint16_t ro = lduw_le_p(&s->regs_ro[addr]);
+ uint16_t wc = lduw_le_p(&s->regs_wc[addr]);
+ uint16_t rw = lduw_le_p(&s->regs_rw[addr]);
+ stw_le_p(&s->regs_rw[addr], ((rw & ro) | (data & ~ro)) &
~(data & wc));
+ } else if (size == 4) {
+ uint32_t ro = ldl_le_p(&s->regs_ro[addr]);
+ uint32_t wc = ldl_le_p(&s->regs_wc[addr]);
+ uint32_t rw = ldl_le_p(&s->regs_rw[addr]);
+ stl_le_p(&s->regs_rw[addr], ((rw & ro) | (data & ~ro)) &
~(data & wc));
+ } else if (size == 8) {
+ uint64_t ro = ldq_le_p(&s->regs_ro[addr]);
+ uint64_t wc = ldq_le_p(&s->regs_wc[addr]);
+ uint64_t rw = ldq_le_p(&s->regs_rw[addr]);
+ stq_le_p(&s->regs_rw[addr], ((rw & ro) | (data & ~ro)) &
~(data & wc));
+ }
+
+ /* Busy flag update, MSB 4-byte register. */
+ if (busy) {
+ uint32_t rw = ldl_le_p(&s->regs_rw[regb]);
+ stl_le_p(&s->regs_rw[regb], rw | busy);
+ }
+ qemu_spin_unlock(&s->regs_lock);
+
+ if (process_fn) {
+ qemu_mutex_lock(&s->core_lock);
+ process_fn(s);
+ qemu_mutex_unlock(&s->core_lock);
+ }
+
+ return MEMTX_OK;
+}
+
+static MemTxResult riscv_iommu_mmio_read(void *opaque, hwaddr addr,
+ uint64_t *data, unsigned size, MemTxAttrs attrs)
+{
+ RISCVIOMMUState *s = opaque;
+ uint64_t val = -1;
+ uint8_t *ptr;
+
+ if ((addr & (size - 1)) != 0) {
+ /* Unsupported MMIO alignment. */
+ return MEMTX_ERROR;
+ }
+
+ if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) {
+ return MEMTX_ACCESS_ERROR;
+ }
+
+ ptr = &s->regs_rw[addr];
+
+ if (size == 1) {
+ val = (uint64_t)*ptr;
+ } else if (size == 2) {
+ val = lduw_le_p(ptr);
+ } else if (size == 4) {
+ val = ldl_le_p(ptr);
+ } else if (size == 8) {
+ val = ldq_le_p(ptr);
+ } else {
+ return MEMTX_ERROR;
+ }
+
+ *data = val;
+
+ return MEMTX_OK;
+}
+
+static const MemoryRegionOps riscv_iommu_mmio_ops = {
+ .read_with_attrs = riscv_iommu_mmio_read,
+ .write_with_attrs = riscv_iommu_mmio_write,
+ .endianness = DEVICE_NATIVE_ENDIAN,
+ .impl = {
+ .min_access_size = 4,
+ .max_access_size = 8,
+ .unaligned = false,
+ },
+ .valid = {
+ .min_access_size = 4,
+ .max_access_size = 8,
+ }
+};
+
+/*
+ * Translations matching MSI pattern check are redirected to
"riscv-iommu-trap"
+ * memory region as untranslated address, for additional MSI/MRIF
interception
+ * by IOMMU interrupt remapping implementation.
+ * Note: Device emulation code generating an MSI is expected to
provide a valid
+ * memory transaction attributes with requested_id set.
+ */
+static MemTxResult riscv_iommu_trap_write(void *opaque, hwaddr addr,
+ uint64_t data, unsigned size, MemTxAttrs attrs)
+{
+ RISCVIOMMUState* s = (RISCVIOMMUState *)opaque;
+ RISCVIOMMUContext *ctx;
+ MemTxResult res;
+ void *ref;
+ uint32_t devid = attrs.requester_id;
+
+ if (attrs.unspecified) {
+ return MEMTX_ACCESS_ERROR;
+ }
+
+ /* FIXME: PCIe bus remapping for attached endpoints. */
+ devid |= s->bus << 8;
+
+ ctx = riscv_iommu_ctx(s, devid, 0, &ref);
+ if (ctx == NULL) {
+ res = MEMTX_ACCESS_ERROR;
+ } else {
+ res = riscv_iommu_msi_write(s, ctx, addr, data, size, attrs);
+ }
+ riscv_iommu_ctx_put(s, ref);
+ return res;
+}
+
+static MemTxResult riscv_iommu_trap_read(void *opaque, hwaddr addr,
+ uint64_t *data, unsigned size, MemTxAttrs attrs)
+{
+ return MEMTX_ACCESS_ERROR;
+}
+
+static const MemoryRegionOps riscv_iommu_trap_ops = {
+ .read_with_attrs = riscv_iommu_trap_read,
+ .write_with_attrs = riscv_iommu_trap_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .impl = {
+ .min_access_size = 4,
+ .max_access_size = 8,
+ .unaligned = true,
+ },
+ .valid = {
+ .min_access_size = 4,
+ .max_access_size = 8,
+ }
+};
+
+static void riscv_iommu_realize(DeviceState *dev, Error **errp)
+{
+ RISCVIOMMUState *s = RISCV_IOMMU(dev);
+
+ s->cap = s->version & RISCV_IOMMU_CAP_VERSION;
+ if (s->enable_msi) {
+ s->cap |= RISCV_IOMMU_CAP_MSI_FLAT | RISCV_IOMMU_CAP_MSI_MRIF;
+ }
+ /* Report QEMU target physical address space limits */
+ s->cap = set_field(s->cap, RISCV_IOMMU_CAP_PAS,
+ TARGET_PHYS_ADDR_SPACE_BITS);
+
+ /* TODO: method to report supported PASID bits */
+ s->pasid_bits = 8; /* restricted to size of MemTxAttrs.pasid */
+ s->cap |= RISCV_IOMMU_CAP_PD8;
+
+ /* Out-of-reset translation mode: OFF (DMA disabled) BARE
(passthrough) */
+ s->ddtp = set_field(0, RISCV_IOMMU_DDTP_MODE, s->enable_off ?
+ RISCV_IOMMU_DDTP_MODE_OFF :
RISCV_IOMMU_DDTP_MODE_BARE);
+
+ /* register storage */
+ s->regs_rw = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE);
+ s->regs_ro = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE);
+ s->regs_wc = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE);
+
+ /* Mark all registers read-only */
+ memset(s->regs_ro, 0xff, RISCV_IOMMU_REG_SIZE);
+
+ /*
+ * Register complete MMIO space, including MSI/PBA registers.
+ * Note, PCIDevice implementation will add overlapping MR for
MSI/PBA,
+ * managed directly by the PCIDevice implementation.
+ */
+ memory_region_init_io(&s->regs_mr, OBJECT(dev),
&riscv_iommu_mmio_ops, s,
+ "riscv-iommu-regs", RISCV_IOMMU_REG_SIZE);
+
+ /* Set power-on register state */
+ stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_CAP], s->cap);
+ stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_FCTL], 0);
+ stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_DDTP],
+ ~(RISCV_IOMMU_DDTP_PPN | RISCV_IOMMU_DDTP_MODE));
+ stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQB],
+ ~(RISCV_IOMMU_CQB_LOG2SZ | RISCV_IOMMU_CQB_PPN));
+ stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQB],
+ ~(RISCV_IOMMU_FQB_LOG2SZ | RISCV_IOMMU_FQB_PPN));
+ stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQB],
+ ~(RISCV_IOMMU_PQB_LOG2SZ | RISCV_IOMMU_PQB_PPN));
+ stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_CQCSR],
RISCV_IOMMU_CQCSR_CQMF |
+ RISCV_IOMMU_CQCSR_CMD_TO | RISCV_IOMMU_CQCSR_CMD_ILL);
+ stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQCSR],
RISCV_IOMMU_CQCSR_CQON |
+ RISCV_IOMMU_CQCSR_BUSY);
+ stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_FQCSR],
RISCV_IOMMU_FQCSR_FQMF |
+ RISCV_IOMMU_FQCSR_FQOF);
+ stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQCSR],
RISCV_IOMMU_FQCSR_FQON |
+ RISCV_IOMMU_FQCSR_BUSY);
+ stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_PQCSR],
RISCV_IOMMU_PQCSR_PQMF |
+ RISCV_IOMMU_PQCSR_PQOF);
+ stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQCSR],
RISCV_IOMMU_PQCSR_PQON |
+ RISCV_IOMMU_PQCSR_BUSY);
+ stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_IPSR], ~0);
+ stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_IVEC], 0);
+ stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_DDTP], s->ddtp);
+
+ /* Memory region for downstream access, if specified. */
+ if (s->target_mr) {
+ s->target_as = g_new0(AddressSpace, 1);
+ address_space_init(s->target_as, s->target_mr,
+ "riscv-iommu-downstream");
+ } else {
+ /* Fallback to global system memory. */
+ s->target_as = &address_space_memory;
+ }
+
+ /* Memory region for untranslated MRIF/MSI writes */
+ memory_region_init_io(&s->trap_mr, OBJECT(dev),
&riscv_iommu_trap_ops, s,
+ "riscv-iommu-trap", ~0ULL);
+ address_space_init(&s->trap_as, &s->trap_mr,
"riscv-iommu-trap-as");
+
+ /* Device translation context cache */
+ s->ctx_cache = g_hash_table_new_full(__ctx_hash, __ctx_equal,
+ g_free, NULL);
+
+ s->iommus.le_next = NULL;
+ s->iommus.le_prev = NULL;
+ QLIST_INIT(&s->spaces);
+ qemu_mutex_init(&s->core_lock);
+ qemu_spin_init(&s->regs_lock);
+}
+
+static void riscv_iommu_unrealize(DeviceState *dev)
+{
+ RISCVIOMMUState *s = RISCV_IOMMU(dev);
+
+ qemu_mutex_destroy(&s->core_lock);
+ g_hash_table_unref(s->ctx_cache);
+}
+
+static Property riscv_iommu_properties[] = {
+ DEFINE_PROP_UINT32("version", RISCVIOMMUState, version,
+ RISCV_IOMMU_SPEC_DOT_VER),
+ DEFINE_PROP_UINT32("bus", RISCVIOMMUState, bus, 0x0),
+ DEFINE_PROP_BOOL("intremap", RISCVIOMMUState, enable_msi, TRUE),
+ DEFINE_PROP_BOOL("off", RISCVIOMMUState, enable_off, TRUE),
+ DEFINE_PROP_LINK("downstream-mr", RISCVIOMMUState, target_mr,
+ TYPE_MEMORY_REGION, MemoryRegion *),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void riscv_iommu_class_init(ObjectClass *klass, void* data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ /* internal device for riscv-iommu-{pci/sys}, not user-creatable */
+ dc->user_creatable = false;
+ dc->realize = riscv_iommu_realize;
+ dc->unrealize = riscv_iommu_unrealize;
+ device_class_set_props(dc, riscv_iommu_properties);
+}
+
+static const TypeInfo riscv_iommu_info = {
+ .name = TYPE_RISCV_IOMMU,
+ .parent = TYPE_DEVICE,
+ .instance_size = sizeof(RISCVIOMMUState),
+ .class_init = riscv_iommu_class_init,
+};
+
+static const char *IOMMU_FLAG_STR[] = {
+ "NA",
+ "RO",
+ "WR",
+ "RW",
+};
+
+/* RISC-V IOMMU Memory Region - Address Translation Space */
+static IOMMUTLBEntry riscv_iommu_memory_region_translate(
+ IOMMUMemoryRegion *iommu_mr, hwaddr addr,
+ IOMMUAccessFlags flag, int iommu_idx)
+{
+ RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace,
iova_mr);
+ RISCVIOMMUContext *ctx;
+ void *ref;
+ IOMMUTLBEntry iotlb = {
+ .iova = addr,
+ .target_as = as->iommu->target_as,
+ .addr_mask = ~0ULL,
+ .perm = flag,
+ };
+
+ ctx = riscv_iommu_ctx(as->iommu, as->devid, iommu_idx, &ref);
+ if (ctx == NULL) {
+ /* Translation disabled or invalid. */
+ iotlb.addr_mask = 0;
+ iotlb.perm = IOMMU_NONE;
+ } else if (riscv_iommu_translate(as->iommu, ctx, &iotlb)) {
+ /* Translation disabled or fault reported. */
+ iotlb.addr_mask = 0;
+ iotlb.perm = IOMMU_NONE;
+ }
+
+ /* Trace all dma translations with original access flags. */
+ trace_riscv_iommu_dma(as->iommu->parent_obj.id,
PCI_BUS_NUM(as->devid),
+ PCI_SLOT(as->devid), PCI_FUNC(as->devid),
iommu_idx,
+ IOMMU_FLAG_STR[flag & IOMMU_RW], iotlb.iova,
+ iotlb.translated_addr);
+
+ riscv_iommu_ctx_put(as->iommu, ref);
+
+ return iotlb;
+}
+
+static int riscv_iommu_memory_region_notify(
+ IOMMUMemoryRegion *iommu_mr, IOMMUNotifierFlag old,
+ IOMMUNotifierFlag new, Error **errp)
+{
+ RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace,
iova_mr);
+
+ if (old == IOMMU_NOTIFIER_NONE) {
+ as->notifier = true;
+ trace_riscv_iommu_notifier_add(iommu_mr->parent_obj.name);
+ } else if (new == IOMMU_NOTIFIER_NONE) {
+ as->notifier = false;
+ trace_riscv_iommu_notifier_del(iommu_mr->parent_obj.name);
+ }
+
+ return 0;
+}
+
+static inline bool pci_is_iommu(PCIDevice *pdev)
+{
+ return pci_get_word(pdev->config + PCI_CLASS_DEVICE) == 0x0806;
+}
+
+static AddressSpace *riscv_iommu_find_as(PCIBus *bus, void *opaque,
int devfn)
+{
+ RISCVIOMMUState *s = (RISCVIOMMUState *) opaque;
+ PCIDevice *pdev = pci_find_device(bus, pci_bus_num(bus), devfn);
+ AddressSpace *as = NULL;
+
+ if (pdev && pci_is_iommu(pdev)) {
+ return s->target_as;
+ }
+
+ /* Find first registered IOMMU device */
+ while (s->iommus.le_prev) {
+ s = *(s->iommus.le_prev);
+ }
+
+ /* Find first matching IOMMU */
+ while (s != NULL && as == NULL) {
+ as = riscv_iommu_space(s, PCI_BUILD_BDF(pci_bus_num(bus),
devfn));
+ s = s->iommus.le_next;
+ }
+
+ return as ? as : &address_space_memory;
+}
+
+static const PCIIOMMUOps riscv_iommu_ops = {
+ .get_address_space = riscv_iommu_find_as,
+};
+
+void riscv_iommu_pci_setup_iommu(RISCVIOMMUState *iommu, PCIBus *bus,
+ Error **errp)
+{
+ if (bus->iommu_ops &&
+ bus->iommu_ops->get_address_space == riscv_iommu_find_as) {
+ /* Allow multiple IOMMUs on the same PCIe bus, link known
devices */
+ RISCVIOMMUState *last = (RISCVIOMMUState *)bus->iommu_opaque;
+ QLIST_INSERT_AFTER(last, iommu, iommus);
+ } else if (!bus->iommu_ops && !bus->iommu_opaque) {
+ pci_setup_iommu(bus, &riscv_iommu_ops, iommu);
+ } else {
+ error_setg(errp, "can't register secondary IOMMU for PCI bus
#%d",
+ pci_bus_num(bus));
+ }
+}
+
+static int riscv_iommu_memory_region_index(IOMMUMemoryRegion *iommu_mr,
+ MemTxAttrs attrs)
+{
+ return attrs.unspecified ? RISCV_IOMMU_NOPASID : (int)attrs.pasid;
+}
+
+static int riscv_iommu_memory_region_index_len(IOMMUMemoryRegion
*iommu_mr)
+{
+ RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace,
iova_mr);
+ return 1 << as->iommu->pasid_bits;
+}
+
+static void riscv_iommu_memory_region_init(ObjectClass *klass, void
*data)
+{
+ IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
+
+ imrc->translate = riscv_iommu_memory_region_translate;
+ imrc->notify_flag_changed = riscv_iommu_memory_region_notify;
+ imrc->attrs_to_index = riscv_iommu_memory_region_index;
+ imrc->num_indexes = riscv_iommu_memory_region_index_len;
+}
+
+static const TypeInfo riscv_iommu_memory_region_info = {
+ .parent = TYPE_IOMMU_MEMORY_REGION,
+ .name = TYPE_RISCV_IOMMU_MEMORY_REGION,
+ .class_init = riscv_iommu_memory_region_init,
+};
+
+static void riscv_iommu_register_mr_types(void)
+{
+ type_register_static(&riscv_iommu_memory_region_info);
+ type_register_static(&riscv_iommu_info);
+}
+
+type_init(riscv_iommu_register_mr_types);
diff --git a/hw/riscv/riscv-iommu.h b/hw/riscv/riscv-iommu.h
new file mode 100644
index 0000000000..31d3907d33
--- /dev/null
+++ b/hw/riscv/riscv-iommu.h
@@ -0,0 +1,141 @@
+/*
+ * QEMU emulation of an RISC-V IOMMU
+ *
+ * Copyright (C) 2022-2023 Rivos Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_RISCV_IOMMU_STATE_H
+#define HW_RISCV_IOMMU_STATE_H
+
+#include "qemu/osdep.h"
+#include "qom/object.h"
+
+#include "hw/riscv/iommu.h"
+
+struct RISCVIOMMUState {
+ /*< private >*/
+ DeviceState parent_obj;
+
+ /*< public >*/
+ uint32_t version; /* Reported interface version number */
+ uint32_t pasid_bits; /* process identifier width */
+ uint32_t bus; /* PCI bus mapping for non-root endpoints */
+
+ uint64_t cap; /* IOMMU supported capabilities */
+ uint64_t fctl; /* IOMMU enabled features */
+
+ bool enable_off; /* Enable out-of-reset OFF mode (DMA
disabled) */
+ bool enable_msi; /* Enable MSI remapping */
+
+ /* IOMMU Internal State */
+ uint64_t ddtp; /* Validated Device Directory Tree Root
Pointer */
+
+ dma_addr_t cq_addr; /* Command queue base physical address */
+ dma_addr_t fq_addr; /* Fault/event queue base physical address */
+ dma_addr_t pq_addr; /* Page request queue base physical
address */
+
+ uint32_t cq_mask; /* Command queue index bit mask */
+ uint32_t fq_mask; /* Fault/event queue index bit mask */
+ uint32_t pq_mask; /* Page request queue index bit mask */
+
+ /* interrupt notifier */
+ void (*notify)(RISCVIOMMUState *iommu, unsigned vector);
+
+ /* IOMMU State Machine */
+ QemuThread core_proc; /* Background processing thread */
+ QemuMutex core_lock; /* Global IOMMU lock, used for cache/regs
updates */
+ QemuCond core_cond; /* Background processing wake up signal */
+ unsigned core_exec; /* Processing thread execution actions */
+
+ /* IOMMU target address space */
+ AddressSpace *target_as;
+ MemoryRegion *target_mr;
+
+ /* MSI / MRIF access trap */
+ AddressSpace trap_as;
+ MemoryRegion trap_mr;
+
+ GHashTable *ctx_cache; /* Device translation Context
Cache */
+
+ /* MMIO Hardware Interface */
+ MemoryRegion regs_mr;
+ QemuSpin regs_lock;
+ uint8_t *regs_rw; /* register state (user write) */
+ uint8_t *regs_wc; /* write-1-to-clear mask */
+ uint8_t *regs_ro; /* read-only mask */
+
+ QLIST_ENTRY(RISCVIOMMUState) iommus;
+ QLIST_HEAD(, RISCVIOMMUSpace) spaces;
+};
+
+void riscv_iommu_pci_setup_iommu(RISCVIOMMUState *iommu, PCIBus *bus,
+ Error **errp);
+
+/* private helpers */
+
+/* Register helper functions */
+static inline uint32_t riscv_iommu_reg_mod32(RISCVIOMMUState *s,
+ unsigned idx, uint32_t set, uint32_t clr)
+{
+ uint32_t val;
+ qemu_spin_lock(&s->regs_lock);
+ val = ldl_le_p(s->regs_rw + idx);
+ stl_le_p(s->regs_rw + idx, (val & ~clr) | set);
+ qemu_spin_unlock(&s->regs_lock);
+ return val;
+}
+
+static inline void riscv_iommu_reg_set32(RISCVIOMMUState *s,
+ unsigned idx, uint32_t set)
+{
+ qemu_spin_lock(&s->regs_lock);
+ stl_le_p(s->regs_rw + idx, set);
+ qemu_spin_unlock(&s->regs_lock);
+}
+
+static inline uint32_t riscv_iommu_reg_get32(RISCVIOMMUState *s,
+ unsigned idx)
+{
+ return ldl_le_p(s->regs_rw + idx);
+}
+
+static inline uint64_t riscv_iommu_reg_mod64(RISCVIOMMUState *s,
+ unsigned idx, uint64_t set, uint64_t clr)
+{
+ uint64_t val;
+ qemu_spin_lock(&s->regs_lock);
+ val = ldq_le_p(s->regs_rw + idx);
+ stq_le_p(s->regs_rw + idx, (val & ~clr) | set);
+ qemu_spin_unlock(&s->regs_lock);
+ return val;
+}
+
+static inline void riscv_iommu_reg_set64(RISCVIOMMUState *s,
+ unsigned idx, uint64_t set)
+{
+ qemu_spin_lock(&s->regs_lock);
+ stq_le_p(s->regs_rw + idx, set);
+ qemu_spin_unlock(&s->regs_lock);
+}
+
+static inline uint64_t riscv_iommu_reg_get64(RISCVIOMMUState *s,
+ unsigned idx)
+{
+ return ldq_le_p(s->regs_rw + idx);
+}
+
+
+
+#endif
diff --git a/hw/riscv/trace-events b/hw/riscv/trace-events
new file mode 100644
index 0000000000..42a97caffa
--- /dev/null
+++ b/hw/riscv/trace-events
@@ -0,0 +1,11 @@
+# See documentation at docs/devel/tracing.rst
+
+# riscv-iommu.c
+riscv_iommu_new(const char *id, unsigned b, unsigned d, unsigned f)
"%s: device attached %04x:%02x.%d"
+riscv_iommu_flt(const char *id, unsigned b, unsigned d, unsigned f,
uint64_t reason, uint64_t iova) "%s: fault %04x:%02x.%u reason:
0x%"PRIx64" iova: 0x%"PRIx64
+riscv_iommu_pri(const char *id, unsigned b, unsigned d, unsigned f,
uint64_t iova) "%s: page request %04x:%02x.%u iova: 0x%"PRIx64
+riscv_iommu_dma(const char *id, unsigned b, unsigned d, unsigned f,
unsigned pasid, const char *dir, uint64_t iova, uint64_t phys) "%s:
translate %04x:%02x.%u #%u %s 0x%"PRIx64" -> 0x%"PRIx64
+riscv_iommu_msi(const char *id, unsigned b, unsigned d, unsigned f,
uint64_t iova, uint64_t phys) "%s: translate %04x:%02x.%u MSI
0x%"PRIx64" -> 0x%"PRIx64
+riscv_iommu_cmd(const char *id, uint64_t l, uint64_t u) "%s: command
0x%"PRIx64" 0x%"PRIx64
+riscv_iommu_notifier_add(const char *id) "%s: dev-iotlb notifier added"
+riscv_iommu_notifier_del(const char *id) "%s: dev-iotlb notifier
removed"
diff --git a/hw/riscv/trace.h b/hw/riscv/trace.h
new file mode 100644
index 0000000000..8c0e3ca1f3
--- /dev/null
+++ b/hw/riscv/trace.h
@@ -0,0 +1 @@
+#include "trace/trace-hw_riscv.h"
diff --git a/include/hw/riscv/iommu.h b/include/hw/riscv/iommu.h
new file mode 100644
index 0000000000..070ee69973
--- /dev/null
+++ b/include/hw/riscv/iommu.h
@@ -0,0 +1,36 @@
+/*
+ * QEMU emulation of an RISC-V IOMMU
+ *
+ * Copyright (C) 2022-2023 Rivos Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_RISCV_IOMMU_H
+#define HW_RISCV_IOMMU_H
+
+#include "qemu/osdep.h"
+#include "qom/object.h"
+
+#define TYPE_RISCV_IOMMU "riscv-iommu"
+OBJECT_DECLARE_SIMPLE_TYPE(RISCVIOMMUState, RISCV_IOMMU)
+typedef struct RISCVIOMMUState RISCVIOMMUState;
+
+#define TYPE_RISCV_IOMMU_MEMORY_REGION "riscv-iommu-mr"
+typedef struct RISCVIOMMUSpace RISCVIOMMUSpace;
+
+#define TYPE_RISCV_IOMMU_PCI "riscv-iommu-pci"
+OBJECT_DECLARE_SIMPLE_TYPE(RISCVIOMMUStatePci, RISCV_IOMMU_PCI)
+typedef struct RISCVIOMMUStatePci RISCVIOMMUStatePci;
+
+#endif
diff --git a/meson.build b/meson.build
index a9de71d450..8099d8271c 100644
--- a/meson.build
+++ b/meson.build
@@ -3319,6 +3319,7 @@ if have_system
'hw/pci-host',
'hw/ppc',
'hw/rtc',
+ 'hw/riscv',
'hw/s390x',
'hw/scsi',
'hw/sd',