[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 6/6] Add the AEHD implementation.
From: |
Haitao Shan |
Subject: |
[PATCH 6/6] Add the AEHD implementation. |
Date: |
Thu, 2 Mar 2023 18:26:17 -0800 |
Implement the AEHD accelerator including the AEHD AccelClass,
AccelCPUClass, AccelOpsClass.
Signed-off-by: Haitao Shan <hshan@google.com>
---
hw/i386/x86.c | 2 +-
include/exec/ram_addr.h | 2 -
include/sysemu/aehd.h | 87 ++
include/sysemu/hw_accel.h | 1 +
target/i386/aehd/aehd-accel-ops.c | 119 ++
target/i386/aehd/aehd-accel-ops.h | 22 +
target/i386/aehd/aehd-all.c | 1020 +++++++++++++++
target/i386/aehd/aehd-cpu.c | 150 +++
target/i386/aehd/aehd-cpu.h | 41 +
target/i386/aehd/aehd-stub.c | 22 +
target/i386/aehd/aehd.c | 1915 +++++++++++++++++++++++++++++
target/i386/aehd/aehd_i386.h | 26 +
target/i386/aehd/aehd_int.h | 2 +-
target/i386/aehd/meson.build | 4 +
target/i386/cpu.c | 12 +-
target/i386/cpu.h | 5 +-
target/i386/helper.c | 3 +
target/i386/meson.build | 1 +
18 files changed, 3428 insertions(+), 6 deletions(-)
create mode 100644 target/i386/aehd/aehd-accel-ops.c
create mode 100644 target/i386/aehd/aehd-accel-ops.h
create mode 100644 target/i386/aehd/aehd-cpu.c
create mode 100644 target/i386/aehd/aehd-cpu.h
create mode 100644 target/i386/aehd/aehd-stub.c
create mode 100644 target/i386/aehd/aehd_i386.h
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index ffc6f97ce0..fdf090f25d 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -1251,7 +1251,7 @@ bool x86_machine_is_smm_enabled(const X86MachineState
*x86ms)
return false;
}
- if (tcg_enabled() || qtest_enabled()) {
+ if (tcg_enabled() || aehd_enabled() || qtest_enabled()) {
smm_available = true;
} else if (kvm_enabled()) {
smm_available = kvm_has_smm();
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index f4fb6a2111..4ff1745c30 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -332,7 +332,6 @@ static inline void
cpu_physical_memory_set_dirty_range(ram_addr_t start,
xen_hvm_modified_memory(start, length);
}
-#if !defined(_WIN32)
static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long
*bitmap,
ram_addr_t start,
ram_addr_t pages)
@@ -424,7 +423,6 @@ static inline void
cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap,
}
}
}
-#endif /* not _WIN32 */
bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
ram_addr_t length,
diff --git a/include/sysemu/aehd.h b/include/sysemu/aehd.h
index 534dd95e3c..f5846ee27e 100644
--- a/include/sysemu/aehd.h
+++ b/include/sysemu/aehd.h
@@ -28,8 +28,17 @@
# define CONFIG_AEHD_IS_POSSIBLE
#endif
+#ifdef CONFIG_AEHD_IS_POSSIBLE
+
+extern bool aehd_allowed;
+#define aehd_enabled() (aehd_allowed)
+
+#else /* !CONFIG_AEHD_IS_POSSIBLE */
+
#define aehd_enabled() (0)
+#endif /* !CONFIG_AEHD_IS_POSSIBLE */
+
struct aehd_run;
struct aehd_lapic_state;
struct aehd_irq_routing_entry;
@@ -43,6 +52,9 @@ DECLARE_INSTANCE_CHECKER(AEHDState, AEHD_STATE,
extern AEHDState *aehd_state;
+/* external API */
+bool aehd_has_free_slot(MachineState *ms);
+
#ifdef NEED_CPU_H
#include "cpu.h"
@@ -57,6 +69,40 @@ int aehd_vcpu_ioctl(CPUState *cpu, int type, void *input,
size_t input_size,
/* Arch specific hooks */
+void aehd_arch_pre_run(CPUState *cpu, struct aehd_run *run);
+MemTxAttrs aehd_arch_post_run(CPUState *cpu, struct aehd_run *run);
+
+int aehd_arch_handle_exit(CPUState *cpu, struct aehd_run *run);
+
+int aehd_arch_handle_ioapic_eoi(CPUState *cpu, struct aehd_run *run);
+
+int aehd_arch_process_async_events(CPUState *cpu);
+
+int aehd_arch_get_registers(CPUState *cpu);
+
+/* state subset only touched by the VCPU itself during runtime */
+#define AEHD_PUT_RUNTIME_STATE 1
+/* state subset modified during VCPU reset */
+#define AEHD_PUT_RESET_STATE 2
+/* full state set, modified during initialization or on vmload */
+#define AEHD_PUT_FULL_STATE 3
+
+int aehd_arch_put_registers(CPUState *cpu, int level);
+
+int aehd_arch_init(MachineState *ms, AEHDState *s);
+
+int aehd_arch_init_vcpu(CPUState *cpu);
+
+bool aehd_vcpu_id_is_valid(int vcpu_id);
+
+/* Returns VCPU ID to be used on AEHD_CREATE_VCPU ioctl() */
+unsigned long aehd_arch_vcpu_id(CPUState *cpu);
+
+void aehd_arch_init_irq_routing(AEHDState *s);
+
+int aehd_arch_fixup_msi_route(struct aehd_irq_routing_entry *route,
+ uint64_t address, uint32_t data, PCIDevice *dev);
+
/* Notify arch about newly added MSI routes */
int aehd_arch_add_msi_route_post(struct aehd_irq_routing_entry *route,
int vector, PCIDevice *dev);
@@ -71,11 +117,52 @@ void aehd_irqchip_add_irq_route(AEHDState *s, int gsi, int
irqchip, int pin);
void aehd_put_apic_state(DeviceState *d, struct aehd_lapic_state *kapic);
void aehd_get_apic_state(DeviceState *d, struct aehd_lapic_state *kapic);
+bool aehd_arch_stop_on_emulation_error(CPUState *cpu);
+
+int aehd_check_extension(AEHDState *s, unsigned int extension);
+
+int aehd_vm_check_extension(AEHDState *s, unsigned int extension);
+
+uint32_t aehd_arch_get_supported_cpuid(AEHDState *env, uint32_t function,
+ uint32_t index, int reg);
+
#endif /* NEED_CPU_H */
+void aehd_raise_event(CPUState *cpu);
+void aehd_cpu_synchronize_state(CPUState *cpu);
+
+/**
+ * aehd_irqchip_add_msi_route - Add MSI route for specific vector
+ * @s: AEHD state
+ * @vector: which vector to add. This can be either MSI/MSIX
+ * vector. The function will automatically detect whether
+ * MSI/MSIX is enabled, and fetch corresponding MSI
+ * message.
+ * @dev: Owner PCI device to add the route. If @dev is specified
+ * as @NULL, an empty MSI message will be inited.
+ * @return: virq (>=0) when success, errno (<0) when failed.
+ */
+int aehd_irqchip_add_msi_route(AEHDState *s, int vector, PCIDevice *dev);
+int aehd_irqchip_update_msi_route(AEHDState *s, int virq, MSIMessage msg,
+ PCIDevice *dev);
void aehd_irqchip_commit_routes(AEHDState *s);
void aehd_irqchip_release_virq(AEHDState *s, int virq);
+void aehd_irqchip_set_qemuirq_gsi(AEHDState *s, qemu_irq irq, int gsi);
void aehd_pc_setup_irq_routing(bool pci_enabled);
+void aehd_init_irq_routing(AEHDState *s);
+
+/**
+ * aehd_arch_irqchip_create:
+ * @AEHDState: The AEHDState pointer
+ * @MachineState: The MachineState pointer
+ *
+ * Allow architectures to create an in-kernel irq chip themselves.
+ *
+ * Returns: < 0: error
+ * 0: irq chip was not created
+ * > 0: irq chip was created
+ */
+int aehd_arch_irqchip_create(MachineState *ms, AEHDState *s);
#endif
diff --git a/include/sysemu/hw_accel.h b/include/sysemu/hw_accel.h
index 22903a55f7..a9e5494e08 100644
--- a/include/sysemu/hw_accel.h
+++ b/include/sysemu/hw_accel.h
@@ -14,6 +14,7 @@
#include "hw/core/cpu.h"
#include "sysemu/hax.h"
#include "sysemu/kvm.h"
+#include "sysemu/aehd.h"
#include "sysemu/hvf.h"
#include "sysemu/whpx.h"
#include "sysemu/nvmm.h"
diff --git a/target/i386/aehd/aehd-accel-ops.c
b/target/i386/aehd/aehd-accel-ops.c
new file mode 100644
index 0000000000..49e6f0287c
--- /dev/null
+++ b/target/i386/aehd/aehd-accel-ops.c
@@ -0,0 +1,119 @@
+/*
+ * QEMU AEHD support
+ *
+ * Copyright IBM, Corp. 2008
+ * Red Hat, Inc. 2008
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ * Glauber Costa <gcosta@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qemu/main-loop.h"
+#include "sysemu/aehd.h"
+#include "sysemu/runstate.h"
+#include "sysemu/cpus.h"
+#include "qemu/guest-random.h"
+#include "qapi/error.h"
+
+#include "aehd-accel-ops.h"
+
+static void *aehd_vcpu_thread_fn(void *arg)
+{
+ CPUState *cpu = arg;
+ int r;
+
+ rcu_register_thread();
+
+ qemu_mutex_lock_iothread();
+ qemu_thread_get_self(cpu->thread);
+ cpu->thread_id = qemu_get_thread_id();
+ cpu->can_do_io = 1;
+ current_cpu = cpu;
+
+ r = aehd_init_vcpu(cpu);
+ if (r < 0) {
+ fprintf(stderr, "aehd_init_vcpu failed: %s\n", strerror(-r));
+ exit(1);
+ }
+
+ /* signal CPU creation */
+ cpu_thread_signal_created(cpu);
+ qemu_guest_random_seed_thread_part2(cpu->random_seed);
+
+ do {
+ if (cpu_can_run(cpu)) {
+ r = aehd_cpu_exec(cpu);
+ if (r == EXCP_DEBUG) {
+ cpu_handle_guest_debug(cpu);
+ }
+ }
+ qemu_wait_io_event(cpu);
+ } while (!cpu->unplug || cpu_can_run(cpu));
+
+ aehd_destroy_vcpu(cpu);
+ cpu_thread_signal_destroyed(cpu);
+ qemu_mutex_unlock_iothread();
+ rcu_unregister_thread();
+ return NULL;
+}
+
+static void aehd_start_vcpu_thread(CPUState *cpu)
+{
+ char thread_name[VCPU_THREAD_NAME_SIZE];
+
+ cpu->thread = g_malloc0(sizeof(QemuThread));
+ cpu->halt_cond = g_malloc0(sizeof(QemuCond));
+ qemu_cond_init(cpu->halt_cond);
+ snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/AEHD",
+ cpu->cpu_index);
+ qemu_thread_create(cpu->thread, thread_name, aehd_vcpu_thread_fn,
+ cpu, QEMU_THREAD_JOINABLE);
+#ifdef _WIN32
+ cpu->hThread = qemu_thread_get_handle(cpu->thread);
+#endif
+}
+
+static bool aehd_vcpu_thread_is_idle(CPUState *cpu)
+{
+ return false;
+}
+
+static void aehd_kick_vcpu_thread(CPUState *cpu)
+{
+ cpu_exit(cpu);
+ aehd_raise_event(cpu);
+}
+
+static void aehd_accel_ops_class_init(ObjectClass *oc, void *data)
+{
+ AccelOpsClass *ops = ACCEL_OPS_CLASS(oc);
+
+ ops->create_vcpu_thread = aehd_start_vcpu_thread;
+ ops->cpu_thread_is_idle = aehd_vcpu_thread_is_idle;
+ ops->synchronize_post_reset = aehd_cpu_synchronize_post_reset;
+ ops->synchronize_post_init = aehd_cpu_synchronize_post_init;
+ ops->synchronize_state = aehd_cpu_synchronize_state;
+ ops->synchronize_pre_loadvm = aehd_cpu_synchronize_pre_loadvm;
+ ops->kick_vcpu_thread = aehd_kick_vcpu_thread;
+}
+
+static const TypeInfo aehd_accel_ops_type = {
+ .name = ACCEL_OPS_NAME("aehd"),
+
+ .parent = TYPE_ACCEL_OPS,
+ .class_init = aehd_accel_ops_class_init,
+ .abstract = true,
+};
+
+static void aehd_accel_ops_register_types(void)
+{
+ type_register_static(&aehd_accel_ops_type);
+}
+type_init(aehd_accel_ops_register_types);
diff --git a/target/i386/aehd/aehd-accel-ops.h
b/target/i386/aehd/aehd-accel-ops.h
new file mode 100644
index 0000000000..8ee4f5bd55
--- /dev/null
+++ b/target/i386/aehd/aehd-accel-ops.h
@@ -0,0 +1,22 @@
+/*
+ * Accelerator CPUS Interface
+ *
+ * Copyright 2020 SUSE LLC
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef AEHD_CPUS_H
+#define AEHD_CPUS_H
+
+#include "sysemu/cpus.h"
+
+int aehd_init_vcpu(CPUState *cpu);
+int aehd_cpu_exec(CPUState *cpu);
+void aehd_destroy_vcpu(CPUState *cpu);
+void aehd_cpu_synchronize_post_reset(CPUState *cpu);
+void aehd_cpu_synchronize_post_init(CPUState *cpu);
+void aehd_cpu_synchronize_pre_loadvm(CPUState *cpu);
+
+#endif /* AEHD_CPUS_H */
diff --git a/target/i386/aehd/aehd-all.c b/target/i386/aehd/aehd-all.c
index 4c6a670cb7..8e16106ba4 100644
--- a/target/i386/aehd/aehd-all.c
+++ b/target/i386/aehd/aehd-all.c
@@ -39,6 +39,7 @@
#include "qapi/qapi-visit-common.h"
#include "sysemu/hw_accel.h"
#include "sysemu/aehd-interface.h"
+#include "aehd-accel-ops.h"
#include "aehd_int.h"
#include "hw/boards.h"
@@ -51,7 +52,508 @@
do { } while (0)
#endif
+struct AEHDParkedVcpu {
+ unsigned long vcpu_id;
+ HANDLE aehd_fd;
+ QLIST_ENTRY(AEHDParkedVcpu) node;
+};
+
AEHDState *aehd_state;
+bool aehd_allowed;
+
+static AEHDSlot *aehd_get_free_slot(AEHDMemoryListener *gml)
+{
+ AEHDState *s = aehd_state;
+ int i;
+
+ for (i = 0; i < s->nr_slots; i++) {
+ if (gml->slots[i].memory_size == 0) {
+ return &gml->slots[i];
+ }
+ }
+
+ return NULL;
+}
+
+bool aehd_has_free_slot(MachineState *ms)
+{
+ AEHDState *s = AEHD_STATE(ms->accelerator);
+
+ return aehd_get_free_slot(&s->memory_listener);
+}
+
+static AEHDSlot *aehd_alloc_slot(AEHDMemoryListener *gml)
+{
+ AEHDSlot *slot = aehd_get_free_slot(gml);
+
+ if (slot) {
+ return slot;
+ }
+
+ fprintf(stderr, "%s: no free slot available\n", __func__);
+ abort();
+}
+
+static AEHDSlot *aehd_lookup_matching_slot(AEHDMemoryListener *gml,
+ hwaddr start_addr,
+ hwaddr size)
+{
+ AEHDState *s = aehd_state;
+ int i;
+
+ for (i = 0; i < s->nr_slots; i++) {
+ AEHDSlot *mem = &gml->slots[i];
+
+ if (start_addr == mem->start_addr && size == mem->memory_size) {
+ return mem;
+ }
+ }
+
+ return NULL;
+}
+
+/*
+ * Calculate and align the start address and the size of the section.
+ * Return the size. If the size is 0, the aligned section is empty.
+ */
+static hwaddr aehd_align_section(MemoryRegionSection *section,
+ hwaddr *start)
+{
+ hwaddr size = int128_get64(section->size);
+ hwaddr delta, aligned;
+
+ /*
+ * kvm works in page size chunks, but the function may be called
+ * with sub-page size and unaligned start address. Pad the start
+ * address to next and truncate size to previous page boundary.
+ */
+ aligned = ROUND_UP(section->offset_within_address_space,
+ qemu_real_host_page_size());
+ delta = aligned - section->offset_within_address_space;
+ *start = aligned;
+ if (delta > size) {
+ return 0;
+ }
+
+ return (size - delta) & qemu_real_host_page_mask();
+}
+
+static int aehd_set_user_memory_region(AEHDMemoryListener *gml, AEHDSlot *slot)
+{
+ AEHDState *s = aehd_state;
+ struct aehd_userspace_memory_region mem;
+ int r;
+
+ mem.slot = slot->slot | (gml->as_id << 16);
+ mem.guest_phys_addr = slot->start_addr;
+ mem.userspace_addr = (uint64_t)slot->ram;
+ mem.flags = slot->flags;
+
+ if (slot->memory_size && mem.flags & AEHD_MEM_READONLY) {
+ /*
+ * Set the slot size to 0 before setting the slot to the desired
+ * value. This is needed based on KVM commit 75d61fbc.
+ */
+ mem.memory_size = 0;
+ r = aehd_vm_ioctl(s, AEHD_SET_USER_MEMORY_REGION,
+ &mem, sizeof(mem), NULL, 0);
+ }
+ mem.memory_size = slot->memory_size;
+ r = aehd_vm_ioctl(s, AEHD_SET_USER_MEMORY_REGION,
+ &mem, sizeof(mem), NULL, 0);
+ return r;
+}
+
+void aehd_destroy_vcpu(CPUState *cpu)
+{
+ struct AEHDParkedVcpu *vcpu = NULL;
+ int ret = 0;
+
+ DPRINTF("aehd_destroy_vcpu\n");
+
+ ret = aehd_vcpu_ioctl(cpu, AEHD_VCPU_MUNMAP, NULL, 0, NULL, 0);
+ fprintf(stderr, "aehd munmap %d\n", ret);
+
+ vcpu = g_malloc0(sizeof(*vcpu));
+ vcpu->vcpu_id = aehd_arch_vcpu_id(cpu);
+ vcpu->aehd_fd = cpu->aehd_fd;
+ QLIST_INSERT_HEAD(&aehd_state->aehd_parked_vcpus, vcpu, node);
+}
+
+static HANDLE aehd_get_vcpu(AEHDState *s, unsigned long vcpu_id)
+{
+ struct AEHDParkedVcpu *cpu;
+ HANDLE vcpu_fd = INVALID_HANDLE_VALUE;
+ int ret;
+
+ QLIST_FOREACH(cpu, &s->aehd_parked_vcpus, node) {
+ if (cpu->vcpu_id == vcpu_id) {
+ HANDLE aehd_fd;
+
+ QLIST_REMOVE(cpu, node);
+ aehd_fd = cpu->aehd_fd;
+ g_free(cpu);
+ return aehd_fd;
+ }
+ }
+
+ ret = aehd_vm_ioctl(s, AEHD_CREATE_VCPU, &vcpu_id, sizeof(vcpu_id),
+ &vcpu_fd, sizeof(vcpu_fd));
+ if (ret) {
+ return INVALID_HANDLE_VALUE;
+ }
+
+ return vcpu_fd;
+}
+
+int aehd_init_vcpu(CPUState *cpu)
+{
+ AEHDState *s = aehd_state;
+ long mmap_size;
+ int ret;
+ HANDLE vcpu_fd;
+
+ DPRINTF("aehd_init_vcpu\n");
+
+ vcpu_fd = aehd_get_vcpu(s, aehd_arch_vcpu_id(cpu));
+ if (vcpu_fd == INVALID_HANDLE_VALUE) {
+ DPRINTF("aehd_create_vcpu failed\n");
+ ret = -EFAULT;
+ goto err;
+ }
+
+ cpu->aehd_fd = vcpu_fd;
+ cpu->aehd_state = s;
+ cpu->vcpu_dirty = true;
+
+ ret = aehd_ioctl(s, AEHD_GET_VCPU_MMAP_SIZE, NULL, 0,
+ &mmap_size, sizeof(mmap_size));
+ if (ret) {
+ DPRINTF("AEHD_GET_VCPU_MMAP_SIZE failed\n");
+ goto err;
+ }
+
+ ret = aehd_vcpu_ioctl(cpu, AEHD_VCPU_MMAP, NULL, 0,
+ &cpu->aehd_run, sizeof(cpu->aehd_run));
+ if (ret) {
+ DPRINTF("mmap'ing vcpu state failed\n");
+ goto err;
+ }
+
+ ret = aehd_arch_init_vcpu(cpu);
+err:
+ return ret;
+}
+
+/*
+ * dirty pages logging control
+ */
+
+static int aehd_mem_flags(MemoryRegion *mr)
+{
+ bool readonly = mr->readonly || memory_region_is_romd(mr);
+ int flags = 0;
+
+ if (memory_region_get_dirty_log_mask(mr) != 0) {
+ flags |= AEHD_MEM_LOG_DIRTY_PAGES;
+ }
+ if (readonly) {
+ flags |= AEHD_MEM_READONLY;
+ }
+ return flags;
+}
+
+static int aehd_slot_update_flags(AEHDMemoryListener *gml, AEHDSlot *mem,
+ MemoryRegion *mr)
+{
+ int old_flags;
+
+ old_flags = mem->flags;
+ mem->flags = aehd_mem_flags(mr);
+
+ /* If nothing changed effectively, no need to issue ioctl */
+ if (mem->flags == old_flags) {
+ return 0;
+ }
+
+ return aehd_set_user_memory_region(gml, mem);
+}
+
+static int aehd_section_update_flags(AEHDMemoryListener *gml,
+ MemoryRegionSection *section)
+{
+ hwaddr start_addr, size;
+ AEHDSlot *mem;
+
+ size = aehd_align_section(section, &start_addr);
+ if (!size) {
+ return 0;
+ }
+
+ mem = aehd_lookup_matching_slot(gml, start_addr, size);
+ if (!mem) {
+ /* We don't have a slot if we want to trap every access. */
+ return 0;
+ }
+
+ return aehd_slot_update_flags(gml, mem, section->mr);
+}
+
+static void aehd_log_start(MemoryListener *listener,
+ MemoryRegionSection *section,
+ int old, int new)
+{
+ AEHDMemoryListener *gml = container_of(listener, AEHDMemoryListener,
+ listener);
+ int r;
+
+ if (old != 0) {
+ return;
+ }
+
+ r = aehd_section_update_flags(gml, section);
+ if (r < 0) {
+ fprintf(stderr, "%s: dirty pages log change\n", __func__);
+ abort();
+ }
+}
+
+static void aehd_log_stop(MemoryListener *listener,
+ MemoryRegionSection *section,
+ int old, int new)
+{
+ AEHDMemoryListener *gml = container_of(listener, AEHDMemoryListener,
+ listener);
+ int r;
+
+ if (new != 0) {
+ return;
+ }
+
+ r = aehd_section_update_flags(gml, section);
+ if (r < 0) {
+ fprintf(stderr, "%s: dirty pages log change\n", __func__);
+ abort();
+ }
+}
+
+/* get aehd's dirty pages bitmap and update qemu's */
+static int aehd_get_dirty_pages_log_range(MemoryRegionSection *section,
+ unsigned long *bitmap)
+{
+ ram_addr_t start = section->offset_within_region +
+ memory_region_get_ram_addr(section->mr);
+ ram_addr_t pages = int128_get64(section->size) /
+ qemu_real_host_page_size();
+
+ cpu_physical_memory_set_dirty_lebitmap(bitmap, start, pages);
+ return 0;
+}
+
+#define ALIGN(x, y) (((x) + (y) - 1) & ~((y) - 1))
+
+/**
+ * aehd_physical_sync_dirty_bitmap - Grab dirty bitmap from kernel space
+ * This function updates qemu's dirty bitmap using
+ * memory_region_set_dirty(). This means all bits are set
+ * to dirty.
+ *
+ * @start_add: start of logged region.
+ * @end_addr: end of logged region.
+ */
+static int aehd_physical_sync_dirty_bitmap(AEHDMemoryListener *gml,
+ MemoryRegionSection *section)
+{
+ AEHDState *s = aehd_state;
+ struct aehd_dirty_log d = {};
+ AEHDSlot *mem;
+ hwaddr start_addr, size;
+
+ size = aehd_align_section(section, &start_addr);
+ if (size) {
+ mem = aehd_lookup_matching_slot(gml, start_addr, size);
+ if (!mem) {
+ /* We don't have a slot if we want to trap every access. */
+ return 0;
+ }
+
+ size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS),
+ HOST_LONG_BITS) / 8;
+ d.dirty_bitmap = g_malloc0(size);
+
+ d.slot = mem->slot | (gml->as_id << 16);
+ if (aehd_vm_ioctl(s, AEHD_GET_DIRTY_LOG, &d, sizeof(d),
+ &d, sizeof(d))) {
+ DPRINTF("ioctl failed %d\n", errno);
+ g_free(d.dirty_bitmap);
+ return -1;
+ }
+
+ aehd_get_dirty_pages_log_range(section, d.dirty_bitmap);
+ g_free(d.dirty_bitmap);
+ }
+
+ return 0;
+}
+
+int aehd_check_extension(AEHDState *s, unsigned int extension)
+{
+ int ret;
+ int result;
+ HANDLE hDevice = s->fd;
+
+ if (hDevice == INVALID_HANDLE_VALUE) {
+ DPRINTF("Invalid HANDLE for aehd device!\n");
+ return 0;
+ }
+
+ ret = aehd_ioctl(s, AEHD_CHECK_EXTENSION, &extension, sizeof(extension),
+ &result, sizeof(result));
+
+ if (ret) {
+ DPRINTF("Failed to get aehd capabilities: %lx\n", GetLastError());
+ return 0;
+ }
+
+ return result;
+}
+
+int aehd_vm_check_extension(AEHDState *s, unsigned int extension)
+{
+ int ret;
+ int result;
+
+ ret = aehd_vm_ioctl(s, AEHD_CHECK_EXTENSION, &extension, sizeof(extension),
+ &result, sizeof(result));
+ if (ret < 0) {
+ /* VM wide version not implemented, use global one instead */
+ ret = aehd_check_extension(s, extension);
+ }
+
+ return result;
+}
+
+static void aehd_set_phys_mem(AEHDMemoryListener *gml,
+ MemoryRegionSection *section, bool add)
+{
+ AEHDSlot *mem;
+ int err;
+ MemoryRegion *mr = section->mr;
+ bool writeable = !mr->readonly && !mr->rom_device;
+ hwaddr start_addr, size;
+ void *ram;
+
+ if (!memory_region_is_ram(mr)) {
+ if (writeable) {
+ return;
+ } else if (!mr->romd_mode) {
+ /*
+ * If the memory device is not in romd_mode, then we actually want
+ * to remove the aehd memory slot so all accesses will trap.
+ */
+ add = false;
+ }
+ }
+
+ size = aehd_align_section(section, &start_addr);
+ if (!size) {
+ return;
+ }
+
+ /* use aligned delta to align the ram address */
+ ram = memory_region_get_ram_ptr(mr) + section->offset_within_region +
+ (start_addr - section->offset_within_address_space);
+
+ if (!add) {
+ mem = aehd_lookup_matching_slot(gml, start_addr, size);
+ if (!mem) {
+ return;
+ }
+ if (mem->flags & AEHD_MEM_LOG_DIRTY_PAGES) {
+ aehd_physical_sync_dirty_bitmap(gml, section);
+ }
+
+ /* unregister the slot */
+ mem->memory_size = 0;
+ err = aehd_set_user_memory_region(gml, mem);
+ if (err) {
+ fprintf(stderr, "%s: error unregistering overlapping slot: %s\n",
+ __func__, strerror(-err));
+ abort();
+ }
+ return;
+ }
+
+ /* register the new slot */
+ mem = aehd_alloc_slot(gml);
+ mem->memory_size = size;
+ mem->start_addr = start_addr;
+ mem->ram = ram;
+ mem->flags = aehd_mem_flags(mr);
+
+ err = aehd_set_user_memory_region(gml, mem);
+ if (err) {
+ fprintf(stderr, "%s: error registering slot: %s\n", __func__,
+ strerror(-err));
+ abort();
+ }
+}
+
+static void aehd_region_add(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+ AEHDMemoryListener *gml = container_of(listener, AEHDMemoryListener,
+ listener);
+
+ memory_region_ref(section->mr);
+ aehd_set_phys_mem(gml, section, true);
+}
+
+static void aehd_region_del(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+ AEHDMemoryListener *gml = container_of(listener, AEHDMemoryListener,
+ listener);
+
+ aehd_set_phys_mem(gml, section, false);
+ memory_region_unref(section->mr);
+}
+
+static void aehd_log_sync(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+ AEHDMemoryListener *gml = container_of(listener, AEHDMemoryListener,
+ listener);
+ int r;
+
+ r = aehd_physical_sync_dirty_bitmap(gml, section);
+ if (r < 0) {
+ fprintf(stderr, "%s: sync dirty bitmap\n", __func__);
+ abort();
+ }
+}
+
+void aehd_memory_listener_register(AEHDState *s, AEHDMemoryListener *gml,
+ AddressSpace *as, int as_id)
+{
+ int i;
+
+ gml->slots = g_malloc0(s->nr_slots * sizeof(AEHDSlot));
+ gml->as_id = as_id;
+
+ for (i = 0; i < s->nr_slots; i++) {
+ gml->slots[i].slot = i;
+ }
+
+ gml->listener.region_add = aehd_region_add;
+ gml->listener.region_del = aehd_region_del;
+ gml->listener.log_start = aehd_log_start;
+ gml->listener.log_stop = aehd_log_stop;
+ gml->listener.log_sync = aehd_log_sync;
+ gml->listener.priority = 10;
+
+ memory_listener_register(&gml->listener, as);
+}
int aehd_set_irq(AEHDState *s, int irq, int level)
{
@@ -86,6 +588,25 @@ static void clear_gsi(AEHDState *s, unsigned int gsi)
clear_bit(gsi, s->used_gsi_bitmap);
}
+void aehd_init_irq_routing(AEHDState *s)
+{
+ int gsi_count, i;
+
+ gsi_count = aehd_check_extension(s, AEHD_CAP_IRQ_ROUTING) - 1;
+ if (gsi_count > 0) {
+ /* Round up so we can search ints using ffs */
+ s->used_gsi_bitmap = bitmap_new(gsi_count);
+ s->gsi_count = gsi_count;
+ }
+
+ s->irq_routes = g_malloc0(sizeof(*s->irq_routes));
+ s->nr_allocated_irq_routes = 0;
+
+ for (i = 0; i < AEHD_MSI_HASHTAB_SIZE; i++) {
+ QTAILQ_INIT(&s->msi_hashtab[i]);
+ }
+}
+
void aehd_irqchip_commit_routes(AEHDState *s)
{
int ret;
@@ -124,6 +645,30 @@ static void aehd_add_routing_entry(AEHDState *s,
set_gsi(s, entry->gsi);
}
+static int aehd_update_routing_entry(AEHDState *s,
+ struct aehd_irq_routing_entry *new_entry)
+{
+ struct aehd_irq_routing_entry *entry;
+ int n;
+
+ for (n = 0; n < s->irq_routes->nr; n++) {
+ entry = &s->irq_routes->entries[n];
+ if (entry->gsi != new_entry->gsi) {
+ continue;
+ }
+
+ if (!memcmp(entry, new_entry, sizeof *entry)) {
+ return 0;
+ }
+
+ *entry = *new_entry;
+
+ return 0;
+ }
+
+ return -ESRCH;
+}
+
void aehd_irqchip_add_irq_route(AEHDState *s, int irq, int irqchip, int pin)
{
struct aehd_irq_routing_entry e = {};
@@ -247,6 +792,459 @@ int aehd_irqchip_send_msi(AEHDState *s, MSIMessage msg)
return aehd_set_irq(s, route->kroute.gsi, 1);
}
+int aehd_irqchip_add_msi_route(AEHDState *s, int vector, PCIDevice *dev)
+{
+ struct aehd_irq_routing_entry kroute = {};
+ int virq;
+ MSIMessage msg = {0, 0};
+
+ if (dev) {
+ msg = pci_get_msi_message(dev, vector);
+ }
+
+ virq = aehd_irqchip_get_virq(s);
+ if (virq < 0) {
+ return virq;
+ }
+
+ kroute.gsi = virq;
+ kroute.type = AEHD_IRQ_ROUTING_MSI;
+ kroute.flags = 0;
+ kroute.u.msi.address_lo = (uint32_t)msg.address;
+ kroute.u.msi.address_hi = msg.address >> 32;
+ kroute.u.msi.data = le32_to_cpu(msg.data);
+
+ aehd_add_routing_entry(s, &kroute);
+ aehd_arch_add_msi_route_post(&kroute, vector, dev);
+ aehd_irqchip_commit_routes(s);
+
+ return virq;
+}
+
+int aehd_irqchip_update_msi_route(AEHDState *s, int virq, MSIMessage msg,
+ PCIDevice *dev)
+{
+ struct aehd_irq_routing_entry kroute = {};
+
+ kroute.gsi = virq;
+ kroute.type = AEHD_IRQ_ROUTING_MSI;
+ kroute.flags = 0;
+ kroute.u.msi.address_lo = (uint32_t)msg.address;
+ kroute.u.msi.address_hi = msg.address >> 32;
+ kroute.u.msi.data = le32_to_cpu(msg.data);
+
+ return aehd_update_routing_entry(s, &kroute);
+}
+
+void aehd_irqchip_set_qemuirq_gsi(AEHDState *s, qemu_irq irq, int gsi)
+{
+ g_hash_table_insert(s->gsimap, irq, GINT_TO_POINTER(gsi));
+}
+
+static void aehd_irqchip_create(MachineState *machine, AEHDState *s)
+{
+ int ret;
+
+ /*
+ * First probe and see if there's a arch-specific hook to create the
+ * in-kernel irqchip for us
+ */
+ ret = aehd_arch_irqchip_create(machine, s);
+ if (ret == 0) {
+ ret = aehd_vm_ioctl(s, AEHD_CREATE_IRQCHIP, NULL, 0, NULL, 0);
+ }
+ if (ret < 0) {
+ fprintf(stderr, "Create kernel irqchip failed: %s\n", strerror(-ret));
+ exit(1);
+ }
+
+ aehd_init_irq_routing(s);
+
+ s->gsimap = g_hash_table_new(g_direct_hash, g_direct_equal);
+}
+
+/*
+ * Find number of supported CPUs using the recommended
+ * procedure from the kernel API documentation to cope with
+ * older kernels that may be missing capabilities.
+ */
+static int aehd_recommended_vcpus(AEHDState *s)
+{
+ int ret = aehd_check_extension(s, AEHD_CAP_NR_VCPUS);
+ return (ret) ? ret : 4;
+}
+
+static int aehd_max_vcpus(AEHDState *s)
+{
+ int ret = aehd_check_extension(s, AEHD_CAP_MAX_VCPUS);
+ return (ret) ? ret : aehd_recommended_vcpus(s);
+}
+
+static int aehd_max_vcpu_id(AEHDState *s)
+{
+ int ret = aehd_check_extension(s, AEHD_CAP_MAX_VCPU_ID);
+ return (ret) ? ret : aehd_max_vcpus(s);
+}
+
+bool aehd_vcpu_id_is_valid(int vcpu_id)
+{
+ AEHDState *s = AEHD_STATE(current_machine->accelerator);
+ return vcpu_id >= 0 && vcpu_id < aehd_max_vcpu_id(s);
+}
+
+static HANDLE aehd_open_device(void)
+{
+ HANDLE hDevice;
+
+ hDevice = CreateFile("\\\\.\\aehd", GENERIC_READ | GENERIC_WRITE, 0, NULL,
+ CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
+
+ if (hDevice == INVALID_HANDLE_VALUE)
+ fprintf(stderr, "Failed to open the aehd device! Error code %lx\n",
+ GetLastError());
+ return hDevice;
+}
+
+static int aehd_init(MachineState *ms)
+{
+ struct {
+ const char *name;
+ int num;
+ } num_cpus[] = {
+ { "SMP", ms->smp.cpus },
+ { "hotpluggable", ms->smp.max_cpus },
+ { NULL, }
+ }, *nc = num_cpus;
+ int soft_vcpus_limit, hard_vcpus_limit;
+ AEHDState *s;
+ int ret;
+ int type = 0;
+ HANDLE vmfd;
+
+ s = AEHD_STATE(ms->accelerator);
+
+ /*
+ * On systems where the kernel can support different base page
+ * sizes, host page size may be different from TARGET_PAGE_SIZE,
+ * even with AEHD. TARGET_PAGE_SIZE is assumed to be the minimum
+ * page size for the system though.
+ */
+ assert(TARGET_PAGE_SIZE <= qemu_real_host_page_size());
+
+ QLIST_INIT(&s->aehd_parked_vcpus);
+ s->vmfd = INVALID_HANDLE_VALUE;
+ s->fd = aehd_open_device();
+ if (s->fd == INVALID_HANDLE_VALUE) {
+ fprintf(stderr, "Could not access AEHD kernel module: %m\n");
+ ret = -ENODEV;
+ goto err;
+ }
+
+ s->nr_slots = aehd_check_extension(s, AEHD_CAP_NR_MEMSLOTS);
+
+ /* If unspecified, use the default value */
+ if (!s->nr_slots) {
+ s->nr_slots = 32;
+ }
+
+ /* check the vcpu limits */
+ soft_vcpus_limit = aehd_recommended_vcpus(s);
+ hard_vcpus_limit = aehd_max_vcpus(s);
+
+ while (nc->name) {
+ if (nc->num > soft_vcpus_limit) {
+ fprintf(stderr,
+ "Warning: Number of %s cpus requested (%d) exceeds "
+ "the recommended cpus supported by AEHD (%d)\n",
+ nc->name, nc->num, soft_vcpus_limit);
+
+ if (nc->num > hard_vcpus_limit) {
+ fprintf(stderr, "Number of %s cpus requested (%d) exceeds "
+ "the maximum cpus supported by AEHD (%d)\n",
+ nc->name, nc->num, hard_vcpus_limit);
+ exit(1);
+ }
+ }
+ nc++;
+ }
+
+ do {
+ ret = aehd_ioctl(s, AEHD_CREATE_VM, &type, sizeof(type),
+ &vmfd, sizeof(vmfd));
+ } while (ret == -EINTR);
+
+ if (ret < 0) {
+ fprintf(stderr, "ioctl(AEHD_CREATE_VM) failed: %d %s\n", -ret,
+ strerror(-ret));
+ goto err;
+ }
+
+ s->vmfd = vmfd;
+
+ ret = aehd_arch_init(ms, s);
+ if (ret < 0) {
+ goto err;
+ }
+
+ aehd_irqchip_create(ms, s);
+
+ aehd_state = s;
+
+ aehd_memory_listener_register(s, &s->memory_listener,
+ &address_space_memory, 0);
+
+ printf("AEHD is operational\n");
+
+ return 0;
+
+err:
+ assert(ret < 0);
+ if (s->vmfd != INVALID_HANDLE_VALUE) {
+ CloseHandle(s->vmfd);
+ }
+ if (s->fd != INVALID_HANDLE_VALUE) {
+ CloseHandle(s->fd);
+ }
+ g_free(s->memory_listener.slots);
+
+ return ret;
+}
+
+static void aehd_handle_io(uint16_t port, MemTxAttrs attrs, void *data,
+ int direction, int size, uint32_t count)
+{
+ int i;
+ uint8_t *ptr = data;
+
+ for (i = 0; i < count; i++) {
+ address_space_rw(&address_space_io, port, attrs,
+ ptr, size,
+ direction == AEHD_EXIT_IO_OUT);
+ ptr += size;
+ }
+}
+
+static int aehd_handle_internal_error(CPUState *cpu, struct aehd_run *run)
+{
+ fprintf(stderr, "AEHD internal error. Suberror: %d\n",
+ run->internal.suberror);
+
+ int i;
+
+ for (i = 0; i < run->internal.ndata; ++i) {
+ fprintf(stderr, "extra data[%d]: %"PRIx64"\n",
+ i, (uint64_t)run->internal.data[i]);
+ }
+
+ if (run->internal.suberror == AEHD_INTERNAL_ERROR_EMULATION) {
+ fprintf(stderr, "emulation failure\n");
+ if (!aehd_arch_stop_on_emulation_error(cpu)) {
+ cpu_dump_state(cpu, stderr, CPU_DUMP_CODE);
+ return EXCP_INTERRUPT;
+ }
+ }
+ /*
+ * FIXME: Should trigger a qmp message to let management know
+ * something went wrong.
+ */
+ return -1;
+}
+
+void aehd_raise_event(CPUState *cpu)
+{
+ AEHDState *s = aehd_state;
+ struct aehd_run *run = cpu->aehd_run;
+ unsigned long vcpu_id = aehd_arch_vcpu_id(cpu);
+
+ if (!run) {
+ return;
+ }
+ run->user_event_pending = 1;
+ aehd_vm_ioctl(s, AEHD_KICK_VCPU, &vcpu_id, sizeof(vcpu_id), NULL, 0);
+}
+
+static void do_aehd_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
+{
+ if (!cpu->vcpu_dirty) {
+ aehd_arch_get_registers(cpu);
+ cpu->vcpu_dirty = true;
+ }
+}
+
+void aehd_cpu_synchronize_state(CPUState *cpu)
+{
+ if (!cpu->vcpu_dirty) {
+ run_on_cpu(cpu, do_aehd_cpu_synchronize_state, RUN_ON_CPU_NULL);
+ }
+}
+
+static void do_aehd_cpu_synchronize_post_reset(CPUState *cpu,
+ run_on_cpu_data arg)
+{
+ aehd_arch_put_registers(cpu, AEHD_PUT_RESET_STATE);
+ cpu->vcpu_dirty = false;
+}
+
+void aehd_cpu_synchronize_post_reset(CPUState *cpu)
+{
+ run_on_cpu(cpu, do_aehd_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
+}
+
+static void do_aehd_cpu_synchronize_post_init(CPUState *cpu,
+ run_on_cpu_data arg)
+{
+ aehd_arch_put_registers(cpu, AEHD_PUT_FULL_STATE);
+ cpu->vcpu_dirty = false;
+}
+
+void aehd_cpu_synchronize_post_init(CPUState *cpu)
+{
+ run_on_cpu(cpu, do_aehd_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
+}
+
+static void do_aehd_cpu_synchronize_pre_loadvm(CPUState *cpu,
+ run_on_cpu_data arg)
+{
+ cpu->vcpu_dirty = true;
+}
+
+void aehd_cpu_synchronize_pre_loadvm(CPUState *cpu)
+{
+ run_on_cpu(cpu, do_aehd_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
+}
+
+int aehd_cpu_exec(CPUState *cpu)
+{
+ struct aehd_run *run = cpu->aehd_run;
+ int ret, run_ret;
+
+ DPRINTF("aehd_cpu_exec()\n");
+
+ if (aehd_arch_process_async_events(cpu)) {
+ cpu->exit_request = 0;
+ return EXCP_HLT;
+ }
+
+ qemu_mutex_unlock_iothread();
+
+ do {
+ MemTxAttrs attrs;
+
+ if (cpu->vcpu_dirty) {
+ aehd_arch_put_registers(cpu, AEHD_PUT_RUNTIME_STATE);
+ cpu->vcpu_dirty = false;
+ }
+
+ aehd_arch_pre_run(cpu, run);
+ if (cpu->exit_request) {
+ DPRINTF("interrupt exit requested\n");
+ /*
+ * AEHD requires us to reenter the kernel after IO exits to
complete
+ * instruction emulation. This self-signal will ensure that we
+ * leave ASAP again.
+ */
+ qemu_cpu_kick(cpu);
+ }
+
+ run_ret = aehd_vcpu_ioctl(cpu, AEHD_RUN, NULL, 0, NULL, 0);
+
+ attrs = aehd_arch_post_run(cpu, run);
+
+ if (run_ret < 0) {
+ if (run_ret == -EINTR || run_ret == -EAGAIN) {
+ DPRINTF("io window exit\n");
+ ret = EXCP_INTERRUPT;
+ break;
+ }
+ fprintf(stderr, "error: aehd run failed %s\n",
+ strerror(-run_ret));
+ ret = -1;
+ break;
+ }
+
+ switch (run->exit_reason) {
+ case AEHD_EXIT_IO:
+ DPRINTF("handle_io\n");
+ /* Called outside BQL */
+ aehd_handle_io(run->io.port, attrs,
+ (uint8_t *)run + run->io.data_offset,
+ run->io.direction,
+ run->io.size,
+ run->io.count);
+ ret = 0;
+ break;
+ case AEHD_EXIT_MMIO:
+ DPRINTF("handle_mmio\n");
+ /* Called outside BQL */
+ address_space_rw(&address_space_memory,
+ run->mmio.phys_addr, attrs,
+ run->mmio.data,
+ run->mmio.len,
+ run->mmio.is_write);
+ ret = 0;
+ break;
+ case AEHD_EXIT_IRQ_WINDOW_OPEN:
+ DPRINTF("irq_window_open\n");
+ ret = EXCP_INTERRUPT;
+ break;
+ case AEHD_EXIT_INTR:
+ DPRINTF("aehd raise event exiting\n");
+ ret = EXCP_INTERRUPT;
+ break;
+ case AEHD_EXIT_SHUTDOWN:
+ DPRINTF("shutdown\n");
+ qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
+ ret = EXCP_INTERRUPT;
+ break;
+ case AEHD_EXIT_UNKNOWN:
+ fprintf(stderr, "AEHD: unknown exit, hardware reason %" PRIx64
"\n",
+ (uint64_t)run->hw.hardware_exit_reason);
+ ret = -1;
+ break;
+ case AEHD_EXIT_INTERNAL_ERROR:
+ ret = aehd_handle_internal_error(cpu, run);
+ break;
+ case AEHD_EXIT_SYSTEM_EVENT:
+ switch (run->system_event.type) {
+ case AEHD_SYSTEM_EVENT_SHUTDOWN:
+ qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
+ ret = EXCP_INTERRUPT;
+ break;
+ case AEHD_SYSTEM_EVENT_RESET:
+ qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
+ ret = EXCP_INTERRUPT;
+ break;
+ case AEHD_SYSTEM_EVENT_CRASH:
+ aehd_cpu_synchronize_state(cpu);
+ qemu_mutex_lock_iothread();
+ qemu_system_guest_panicked(cpu_get_crash_info(cpu));
+ qemu_mutex_unlock_iothread();
+ ret = 0;
+ break;
+ default:
+ DPRINTF("aehd_arch_handle_exit\n");
+ ret = aehd_arch_handle_exit(cpu, run);
+ break;
+ }
+ break;
+ default:
+ DPRINTF("aehd_arch_handle_exit\n");
+ ret = aehd_arch_handle_exit(cpu, run);
+ break;
+ }
+ } while (ret == 0);
+
+ qemu_mutex_lock_iothread();
+
+ if (ret < 0) {
+ cpu_dump_state(cpu, stderr, CPU_DUMP_CODE);
+ vm_stop(RUN_STATE_INTERNAL_ERROR);
+ }
+
+ cpu->exit_request = 0;
+ return ret;
+}
+
int aehd_ioctl(AEHDState *s, int type, void *input, size_t input_size,
void *output, size_t output_size)
{
@@ -327,3 +1325,25 @@ int aehd_vcpu_ioctl(CPUState *cpu, int type, void *input,
size_t input_size,
}
return ret;
}
+
+static void aehd_accel_class_init(ObjectClass *oc, void *data)
+{
+ AccelClass *ac = ACCEL_CLASS(oc);
+ ac->name = "AEHD";
+ ac->init_machine = aehd_init;
+ ac->allowed = &aehd_allowed;
+}
+
+static const TypeInfo aehd_accel_type = {
+ .name = TYPE_AEHD_ACCEL,
+ .parent = TYPE_ACCEL,
+ .class_init = aehd_accel_class_init,
+ .instance_size = sizeof(AEHDState),
+};
+
+static void aehd_type_init(void)
+{
+ type_register_static(&aehd_accel_type);
+}
+
+type_init(aehd_type_init);
diff --git a/target/i386/aehd/aehd-cpu.c b/target/i386/aehd/aehd-cpu.c
new file mode 100644
index 0000000000..63ddcba85f
--- /dev/null
+++ b/target/i386/aehd/aehd-cpu.c
@@ -0,0 +1,150 @@
+/*
+ * x86 AEHD CPU type initialization
+ *
+ * Copyright 2021 SUSE LLC
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "host-cpu.h"
+#include "aehd-cpu.h"
+#include "qapi/error.h"
+#include "sysemu/sysemu.h"
+#include "hw/boards.h"
+
+#include "aehd_i386.h"
+#include "hw/core/accel-cpu.h"
+
+static bool aehd_cpu_realizefn(CPUState *cs, Error **errp)
+{
+ return host_cpu_realizefn(cs, errp);
+}
+
+static void aehd_cpu_max_instance_init(X86CPU *cpu)
+{
+ CPUX86State *env = &cpu->env;
+ AEHDState *s = aehd_state;
+
+ host_cpu_max_instance_init(cpu);
+
+ env->cpuid_min_level =
+ aehd_arch_get_supported_cpuid(s, 0x0, 0, R_EAX);
+ env->cpuid_min_xlevel =
+ aehd_arch_get_supported_cpuid(s, 0x80000000, 0, R_EAX);
+ env->cpuid_min_xlevel2 =
+ aehd_arch_get_supported_cpuid(s, 0xC0000000, 0, R_EAX);
+}
+
+static void aehd_cpu_xsave_init(void)
+{
+ static bool first = true;
+ uint32_t eax, ebx, ecx, edx;
+ int i;
+
+ if (!first) {
+ return;
+ }
+ first = false;
+
+ /* x87 and SSE states are in the legacy region of the XSAVE area. */
+ x86_ext_save_areas[XSTATE_FP_BIT].offset = 0;
+ x86_ext_save_areas[XSTATE_SSE_BIT].offset = 0;
+
+ for (i = XSTATE_SSE_BIT + 1; i < XSAVE_STATE_AREA_COUNT; i++) {
+ ExtSaveArea *esa = &x86_ext_save_areas[i];
+
+ if (!esa->size) {
+ continue;
+ }
+ if ((x86_cpu_get_supported_feature_word(esa->feature, false) &
+ esa->bits) != esa->bits) {
+ continue;
+ }
+ host_cpuid(0xd, i, &eax, &ebx, &ecx, &edx);
+ if (eax != 0) {
+ assert(esa->size == eax);
+ esa->offset = ebx;
+ esa->ecx = ecx;
+ }
+ }
+}
+
+/*
+ * AEHD-specific features that are automatically added/removed
+ * from cpudef models when AEHD is enabled.
+ * Only for builtin_x86_defs models initialized with x86_register_cpudef_types.
+ *
+ * NOTE: features can be enabled by default only if they were
+ * already available in the oldest kernel version supported
+ * by the AEHD accelerator (see "OS requirements" section at
+ * docs/system/target-i386.rst)
+ */
+static PropValue aehd_default_props[] = {
+ { "x2apic", "on" },
+ { "acpi", "off" },
+ { "monitor", "off" },
+ { "svm", "off" },
+ { NULL, NULL },
+};
+
+/*
+ * Only for builtin_x86_defs models initialized with x86_register_cpudef_types.
+ */
+void x86_cpu_change_aehd_default(const char *prop, const char *value)
+{
+ PropValue *pv;
+ for (pv = aehd_default_props; pv->prop; pv++) {
+ if (!strcmp(pv->prop, prop)) {
+ pv->value = value;
+ break;
+ }
+ }
+
+ /*
+ * It is valid to call this function only for properties that
+ * are already present in the aehd_default_props table.
+ */
+ assert(pv->prop);
+}
+
+static void aehd_cpu_instance_init(CPUState *cs)
+{
+ X86CPU *cpu = X86_CPU(cs);
+ X86CPUClass *xcc = X86_CPU_GET_CLASS(cpu);
+
+ host_cpu_instance_init(cpu);
+
+ if (xcc->model) {
+ /* Special cases not set in the X86CPUDefinition structs: */
+ x86_cpu_apply_props(cpu, aehd_default_props);
+ }
+
+ if (cpu->max_features) {
+ aehd_cpu_max_instance_init(cpu);
+ }
+
+ aehd_cpu_xsave_init();
+}
+
+static void aehd_cpu_accel_class_init(ObjectClass *oc, void *data)
+{
+ AccelCPUClass *acc = ACCEL_CPU_CLASS(oc);
+
+ acc->cpu_realizefn = aehd_cpu_realizefn;
+ acc->cpu_instance_init = aehd_cpu_instance_init;
+}
+static const TypeInfo aehd_cpu_accel_type_info = {
+ .name = ACCEL_CPU_NAME("aehd"),
+
+ .parent = TYPE_ACCEL_CPU,
+ .class_init = aehd_cpu_accel_class_init,
+ .abstract = true,
+};
+static void aehd_cpu_accel_register_types(void)
+{
+ type_register_static(&aehd_cpu_accel_type_info);
+}
+type_init(aehd_cpu_accel_register_types);
diff --git a/target/i386/aehd/aehd-cpu.h b/target/i386/aehd/aehd-cpu.h
new file mode 100644
index 0000000000..a0227c1121
--- /dev/null
+++ b/target/i386/aehd/aehd-cpu.h
@@ -0,0 +1,41 @@
+/*
+ * i386 AEHD CPU type and functions
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef AEHD_CPU_H
+#define AEHD_CPU_H
+
+#ifdef CONFIG_AEHD
+/*
+ * Change the value of a AEHD-specific default
+ *
+ * If value is NULL, no default will be set and the original
+ * value from the CPU model table will be kept.
+ *
+ * It is valid to call this function only for properties that
+ * are already present in the aehd_default_props table.
+ */
+void x86_cpu_change_aehd_default(const char *prop, const char *value);
+
+#else /* !CONFIG_AEHD */
+
+#define x86_cpu_change_aehd_default(a, b)
+
+#endif /* CONFIG_AEHD */
+
+#endif /* AEHD_CPU_H */
diff --git a/target/i386/aehd/aehd-stub.c b/target/i386/aehd/aehd-stub.c
new file mode 100644
index 0000000000..25e0682091
--- /dev/null
+++ b/target/i386/aehd/aehd-stub.c
@@ -0,0 +1,22 @@
+/*
+ * QEMU AEHD x86 specific function stubs
+ *
+ * Copyright Linaro Limited 2012
+ *
+ * Author: Peter Maydell <peter.maydell@linaro.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "aehd_i386.h"
+
+#ifndef __OPTIMIZE__
+uint32_t aehd_arch_get_supported_cpuid(AEHDState *env, uint32_t function,
+ uint32_t index, int reg)
+{
+ abort();
+}
+#endif
diff --git a/target/i386/aehd/aehd.c b/target/i386/aehd/aehd.c
index 4890a75553..d269b42473 100644
--- a/target/i386/aehd/aehd.c
+++ b/target/i386/aehd/aehd.c
@@ -16,6 +16,7 @@
#include "qapi/error.h"
#include "cpu.h"
+#include "aehd_i386.h"
#include "aehd_int.h"
#include "sysemu/aehd-interface.h"
#include "sysemu/sysemu.h"
@@ -49,6 +50,1920 @@
do { } while (0)
#endif
+/*
+ * A 4096-byte buffer can hold the 8-byte aehd_msrs header, plus
+ * 255 aehd_msr_entry structs
+ */
+#define MSR_BUF_SIZE 4096
+
+#ifndef BUS_MCEERR_AR
+#define BUS_MCEERR_AR 4
+#endif
+#ifndef BUS_MCEERR_AO
+#define BUS_MCEERR_AO 5
+#endif
+
+static bool has_msr_star;
+static bool has_msr_hsave_pa;
+static bool has_msr_tsc_aux;
+static bool has_msr_tsc_adjust;
+static bool has_msr_tsc_deadline;
+static bool has_msr_feature_control;
+static bool has_msr_misc_enable;
+static bool has_msr_smbase;
+static bool has_msr_bndcfgs;
+static bool has_msr_mtrr;
+static bool has_msr_xss;
+
+static bool has_msr_architectural_pmu;
+static uint32_t num_architectural_pmu_counters;
+
+static int has_xsave;
+static int has_xcrs;
+
+static struct aehd_cpuid *cpuid_cache;
+
+static struct aehd_cpuid *try_get_cpuid(AEHDState *s, int max)
+{
+ struct aehd_cpuid *cpuid;
+ int r, size;
+
+ size = sizeof(*cpuid) + max * sizeof(*cpuid->entries);
+ cpuid = g_malloc0(size);
+ cpuid->nent = max;
+ r = aehd_ioctl(s, AEHD_GET_SUPPORTED_CPUID,
+ cpuid, size, cpuid, size);
+ if (r == 0 && cpuid->nent >= max) {
+ r = -E2BIG;
+ }
+ if (r < 0) {
+ if (r == -E2BIG) {
+ g_free(cpuid);
+ return NULL;
+ } else {
+ fprintf(stderr, "AEHD_GET_SUPPORTED_CPUID failed: %s\n",
+ strerror(-r));
+ exit(1);
+ }
+ }
+ return cpuid;
+}
+
+/*
+ * Run AEHD_GET_SUPPORTED_CPUID ioctl(), allocating a buffer large enough
+ * for all entries.
+ */
+static struct aehd_cpuid *get_supported_cpuid(AEHDState *s)
+{
+ struct aehd_cpuid *cpuid;
+ int max = 1;
+
+ if (cpuid_cache != NULL) {
+ return cpuid_cache;
+ }
+ while ((cpuid = try_get_cpuid(s, max)) == NULL) {
+ max *= 2;
+ }
+ cpuid_cache = cpuid;
+ return cpuid;
+}
+
+/*
+ * Returns the value for a specific register on the cpuid entry
+ */
+static uint32_t cpuid_entry_get_reg(struct aehd_cpuid_entry *entry, int reg)
+{
+ uint32_t ret = 0;
+ switch (reg) {
+ case R_EAX:
+ ret = entry->eax;
+ break;
+ case R_EBX:
+ ret = entry->ebx;
+ break;
+ case R_ECX:
+ ret = entry->ecx;
+ break;
+ case R_EDX:
+ ret = entry->edx;
+ break;
+ }
+ return ret;
+}
+
+/*
+ * Find matching entry for function/index on aehd_cpuid struct
+ */
+static struct aehd_cpuid_entry *cpuid_find_entry(struct aehd_cpuid *cpuid,
+ uint32_t function,
+ uint32_t index)
+{
+ int i;
+ for (i = 0; i < cpuid->nent; ++i) {
+ if (cpuid->entries[i].function == function &&
+ cpuid->entries[i].index == index) {
+ return &cpuid->entries[i];
+ }
+ }
+ /* not found: */
+ return NULL;
+}
+
+uint32_t aehd_arch_get_supported_cpuid(AEHDState *s, uint32_t function,
+ uint32_t index, int reg)
+{
+ struct aehd_cpuid *cpuid;
+ uint32_t ret = 0;
+ uint32_t cpuid_1_edx;
+
+ cpuid = get_supported_cpuid(s);
+
+ struct aehd_cpuid_entry *entry = cpuid_find_entry(cpuid, function, index);
+ if (entry) {
+ ret = cpuid_entry_get_reg(entry, reg);
+ }
+
+ /* Fixups for the data returned by AEHD, below */
+
+ if (function == 1 && reg == R_ECX) {
+ /*
+ * We can set the hypervisor flag, even if AEHD does not return it on
+ * GET_SUPPORTED_CPUID
+ */
+ ret |= CPUID_EXT_HYPERVISOR;
+ } else if (function == 6 && reg == R_EAX) {
+ ret |= CPUID_6_EAX_ARAT; /* safe to allow because of emulated APIC */
+ } else if (function == 0x80000001 && reg == R_EDX) {
+ /*
+ * On Intel, aehd returns cpuid according to the Intel spec,
+ * so add missing bits according to the AMD spec:
+ */
+ cpuid_1_edx = aehd_arch_get_supported_cpuid(s, 1, 0, R_EDX);
+ ret |= cpuid_1_edx & CPUID_EXT2_AMD_ALIASES;
+ }
+
+ return ret;
+}
+
+static void cpu_update_state(void *opaque, bool running, RunState state)
+{
+ CPUX86State *env = opaque;
+
+ if (running) {
+ env->tsc_valid = false;
+ }
+}
+
+unsigned long aehd_arch_vcpu_id(CPUState *cs)
+{
+ X86CPU *cpu = X86_CPU(cs);
+ return cpu->apic_id;
+}
+
+static Error *invtsc_mig_blocker;
+
+#define AEHD_MAX_CPUID_ENTRIES 100
+
+int aehd_arch_init_vcpu(CPUState *cs)
+{
+ struct {
+ struct aehd_cpuid cpuid;
+ struct aehd_cpuid_entry entries[AEHD_MAX_CPUID_ENTRIES];
+ } cpuid_data;
+ /*
+ * The kernel defines these structs with padding fields so there
+ * should be no extra padding in our cpuid_data struct.
+ */
+ QEMU_BUILD_BUG_ON(sizeof(cpuid_data) !=
+ sizeof(struct aehd_cpuid) +
+ sizeof(struct aehd_cpuid_entry) *
AEHD_MAX_CPUID_ENTRIES);
+ X86CPU *cpu = X86_CPU(cs);
+ CPUX86State *env = &cpu->env;
+ uint32_t limit, i, j, cpuid_i;
+ uint32_t unused;
+ struct aehd_cpuid_entry *c;
+ int r;
+ Error *local_err = NULL;
+
+ memset(&cpuid_data, 0, sizeof(cpuid_data));
+
+ cpuid_i = 0;
+
+ cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
+
+ for (i = 0; i <= limit; i++) {
+ if (cpuid_i == AEHD_MAX_CPUID_ENTRIES) {
+ fprintf(stderr, "unsupported level value: 0x%x\n", limit);
+ abort();
+ }
+ c = &cpuid_data.entries[cpuid_i++];
+
+ switch (i) {
+ case 2: {
+ /* Keep reading function 2 till all the input is received */
+ int times;
+
+ c->function = i;
+ c->flags = AEHD_CPUID_FLAG_STATEFUL_FUNC |
+ AEHD_CPUID_FLAG_STATE_READ_NEXT;
+ cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
+ times = c->eax & 0xff;
+
+ for (j = 1; j < times; ++j) {
+ if (cpuid_i == AEHD_MAX_CPUID_ENTRIES) {
+ fprintf(stderr, "cpuid_data is full, no space for "
+ "cpuid(eax:2):eax & 0xf = 0x%x\n", times);
+ abort();
+ }
+ c = &cpuid_data.entries[cpuid_i++];
+ c->function = i;
+ c->flags = AEHD_CPUID_FLAG_STATEFUL_FUNC;
+ cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
+ }
+ break;
+ }
+ case 4:
+ case 0xb:
+ case 0xd:
+ for (j = 0; ; j++) {
+ if (i == 0xd && j == 64) {
+ break;
+ }
+ c->function = i;
+ c->flags = AEHD_CPUID_FLAG_SIGNIFCANT_INDEX;
+ c->index = j;
+ cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
+
+ if (i == 4 && c->eax == 0) {
+ break;
+ }
+ if (i == 0xb && !(c->ecx & 0xff00)) {
+ break;
+ }
+ if (i == 0xd && c->eax == 0) {
+ continue;
+ }
+ if (cpuid_i == AEHD_MAX_CPUID_ENTRIES) {
+ fprintf(stderr, "cpuid_data is full, no space for "
+ "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
+ abort();
+ }
+ c = &cpuid_data.entries[cpuid_i++];
+ }
+ break;
+ default:
+ c->function = i;
+ c->flags = 0;
+ cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
+ break;
+ }
+ }
+
+ if (limit >= 0x0a) {
+ uint32_t ver;
+
+ cpu_x86_cpuid(env, 0x0a, 0, &ver, &unused, &unused, &unused);
+ if ((ver & 0xff) > 0) {
+ has_msr_architectural_pmu = true;
+ num_architectural_pmu_counters = (ver & 0xff00) >> 8;
+
+ /*
+ * Shouldn't be more than 32, since that's the number of bits
+ * available in EBX to tell us _which_ counters are available.
+ * Play it safe.
+ */
+ if (num_architectural_pmu_counters > MAX_GP_COUNTERS) {
+ num_architectural_pmu_counters = MAX_GP_COUNTERS;
+ }
+ }
+ }
+
+ cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused);
+
+ for (i = 0x80000000; i <= limit; i++) {
+ if (cpuid_i == AEHD_MAX_CPUID_ENTRIES) {
+ fprintf(stderr, "unsupported xlevel value: 0x%x\n", limit);
+ abort();
+ }
+ c = &cpuid_data.entries[cpuid_i++];
+
+ c->function = i;
+ c->flags = 0;
+ cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
+ }
+
+ cpuid_data.cpuid.nent = cpuid_i;
+
+ qemu_add_vm_change_state_handler(cpu_update_state, env);
+
+ c = cpuid_find_entry(&cpuid_data.cpuid, 1, 0);
+ if (c) {
+ has_msr_feature_control = !!(c->ecx & CPUID_EXT_VMX) ||
+ !!(c->ecx & CPUID_EXT_SMX);
+ }
+
+ c = cpuid_find_entry(&cpuid_data.cpuid, 0x80000007, 0);
+ if ((env->features[FEAT_8000_0007_EDX] & CPUID_APM_INVTSC) &&
+ invtsc_mig_blocker == NULL) {
+ error_setg(&invtsc_mig_blocker,
+ "State blocked by non-migratable CPU device"
+ " (invtsc flag)");
+ r = migrate_add_blocker(invtsc_mig_blocker, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ error_free(invtsc_mig_blocker);
+ return r;
+ }
+ }
+
+ cpuid_data.cpuid.padding = 0;
+ r = aehd_vcpu_ioctl(cs, AEHD_SET_CPUID, &cpuid_data, sizeof(cpuid_data),
+ NULL, 0);
+ if (r) {
+ return r;
+ }
+
+ if (has_xsave) {
+ env->xsave_buf = qemu_memalign(4096, env->xsave_buf_len);
+ memset(env->xsave_buf, 0, env->xsave_buf_len);
+ }
+ cpu->aehd_msr_buf = g_malloc0(MSR_BUF_SIZE);
+
+ if (env->features[FEAT_1_EDX] & CPUID_MTRR) {
+ has_msr_mtrr = true;
+ }
+ if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_RDTSCP)) {
+ has_msr_tsc_aux = false;
+ }
+
+ return 0;
+}
+
+void aehd_arch_reset_vcpu(X86CPU *cpu)
+{
+ CPUX86State *env = &cpu->env;
+
+ env->exception_injected = -1;
+ env->interrupt_injected = -1;
+ env->xcr0 = 1;
+ env->mp_state = cpu_is_bsp(cpu) ? AEHD_MP_STATE_RUNNABLE :
+ AEHD_MP_STATE_UNINITIALIZED;
+}
+
+void aehd_arch_do_init_vcpu(X86CPU *cpu)
+{
+ CPUX86State *env = &cpu->env;
+
+ /* APs get directly into wait-for-SIPI state. */
+ if (env->mp_state == AEHD_MP_STATE_UNINITIALIZED) {
+ env->mp_state = AEHD_MP_STATE_INIT_RECEIVED;
+ }
+}
+
+static int aehd_get_supported_msrs(AEHDState *s)
+{
+ static int aehd_supported_msrs;
+ int ret = 0;
+ unsigned long msr_list_size;
+
+ /* first time */
+ if (aehd_supported_msrs == 0) {
+ struct aehd_msr_list msr_list, *aehd_msr_list;
+
+ aehd_supported_msrs = -1;
+
+ /*
+ * Obtain MSR list from AEHD. These are the MSRs that we must
+ * save/restore
+ */
+ msr_list.nmsrs = 0;
+ ret = aehd_ioctl(s, AEHD_GET_MSR_INDEX_LIST,
+ &msr_list, sizeof(msr_list),
+ &msr_list, sizeof(msr_list));
+ if (ret < 0 && ret != -E2BIG) {
+ return ret;
+ }
+
+ msr_list_size = sizeof(msr_list) + msr_list.nmsrs *
+ sizeof(msr_list.indices[0]);
+ aehd_msr_list = g_malloc0(msr_list_size);
+
+ aehd_msr_list->nmsrs = msr_list.nmsrs;
+ ret = aehd_ioctl(s, AEHD_GET_MSR_INDEX_LIST,
+ aehd_msr_list, msr_list_size,
+ aehd_msr_list, msr_list_size);
+ if (ret >= 0) {
+ int i;
+
+ for (i = 0; i < aehd_msr_list->nmsrs; i++) {
+ if (aehd_msr_list->indices[i] == MSR_STAR) {
+ has_msr_star = true;
+ continue;
+ }
+ if (aehd_msr_list->indices[i] == MSR_VM_HSAVE_PA) {
+ has_msr_hsave_pa = true;
+ continue;
+ }
+ if (aehd_msr_list->indices[i] == MSR_TSC_AUX) {
+ has_msr_tsc_aux = true;
+ continue;
+ }
+ if (aehd_msr_list->indices[i] == MSR_TSC_ADJUST) {
+ has_msr_tsc_adjust = true;
+ continue;
+ }
+ if (aehd_msr_list->indices[i] == MSR_IA32_TSCDEADLINE) {
+ has_msr_tsc_deadline = true;
+ continue;
+ }
+ if (aehd_msr_list->indices[i] == MSR_IA32_SMBASE) {
+ has_msr_smbase = true;
+ continue;
+ }
+ if (aehd_msr_list->indices[i] == MSR_IA32_MISC_ENABLE) {
+ has_msr_misc_enable = true;
+ continue;
+ }
+ if (aehd_msr_list->indices[i] == MSR_IA32_BNDCFGS) {
+ has_msr_bndcfgs = true;
+ continue;
+ }
+ if (aehd_msr_list->indices[i] == MSR_IA32_XSS) {
+ has_msr_xss = true;
+ continue;
+ }
+ }
+ }
+
+ g_free(aehd_msr_list);
+ }
+
+ return ret;
+}
+
+static Notifier smram_machine_done;
+static AEHDMemoryListener smram_listener;
+static AddressSpace smram_address_space;
+static MemoryRegion smram_as_root;
+static MemoryRegion smram_as_mem;
+
+static void register_smram_listener(Notifier *n, void *unused)
+{
+ MemoryRegion *smram =
+ (MemoryRegion *) object_resolve_path("/machine/smram", NULL);
+
+ /* Outer container... */
+ memory_region_init(&smram_as_root, OBJECT(aehd_state),
+ "mem-container-smram", ~0ull);
+ memory_region_set_enabled(&smram_as_root, true);
+
+ /*
+ * ... with two regions inside: normal system memory with low
+ * priority, and...
+ */
+ memory_region_init_alias(&smram_as_mem, OBJECT(aehd_state), "mem-smram",
+ get_system_memory(), 0, ~0ull);
+ memory_region_add_subregion_overlap(&smram_as_root, 0, &smram_as_mem, 0);
+ memory_region_set_enabled(&smram_as_mem, true);
+
+ if (smram) {
+ /* ... SMRAM with higher priority */
+ memory_region_add_subregion_overlap(&smram_as_root, 0, smram, 10);
+ memory_region_set_enabled(smram, true);
+ }
+
+ address_space_init(&smram_address_space, &smram_as_root, "AEHD-SMRAM");
+ aehd_memory_listener_register(aehd_state, &smram_listener,
+ &smram_address_space, 1);
+}
+
+int aehd_arch_init(MachineState *ms, AEHDState *s)
+{
+ /* Allows up to 16M BIOSes. */
+ uint64_t identity_base = 0xfeffc000;
+ uint64_t tss_base;
+ int ret;
+
+ has_xsave = aehd_check_extension(s, AEHD_CAP_XSAVE);
+
+ has_xcrs = aehd_check_extension(s, AEHD_CAP_XCRS);
+
+ ret = aehd_get_supported_msrs(s);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /*
+ * On older Intel CPUs, AEHD uses vm86 mode to emulate 16-bit code
directly.
+ * In order to use vm86 mode, an EPT identity map and a TSS are needed.
+ * Since these must be part of guest physical memory, we need to allocate
+ * them, both by setting their start addresses in the kernel and by
+ * creating a corresponding e820 entry. We need 4 pages before the BIOS.
+ */
+ ret = aehd_vm_ioctl(s, AEHD_SET_IDENTITY_MAP_ADDR,
+ &identity_base, sizeof(identity_base), NULL, 0);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* Set TSS base one page after EPT identity map. */
+ tss_base = identity_base + 0x1000;
+ ret = aehd_vm_ioctl(s, AEHD_SET_TSS_ADDR, &tss_base, sizeof(tss_base),
+ NULL, 0);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* Tell fw_cfg to notify the BIOS to reserve the range. */
+ ret = e820_add_entry(identity_base, 0x4000, E820_RESERVED);
+ if (ret < 0) {
+ fprintf(stderr, "e820_add_entry() table is full\n");
+ return ret;
+ }
+
+ if (object_dynamic_cast(OBJECT(ms), TYPE_X86_MACHINE) &&
+ x86_machine_is_smm_enabled(X86_MACHINE(ms))) {
+ smram_machine_done.notify = register_smram_listener;
+ qemu_add_machine_init_done_notifier(&smram_machine_done);
+ }
+ return 0;
+}
+
+static void set_v8086_seg(struct aehd_segment *lhs, const SegmentCache *rhs)
+{
+ lhs->selector = rhs->selector;
+ lhs->base = rhs->base;
+ lhs->limit = rhs->limit;
+ lhs->type = 3;
+ lhs->present = 1;
+ lhs->dpl = 3;
+ lhs->db = 0;
+ lhs->s = 1;
+ lhs->l = 0;
+ lhs->g = 0;
+ lhs->avl = 0;
+ lhs->unusable = 0;
+}
+
+static void set_seg(struct aehd_segment *lhs, const SegmentCache *rhs)
+{
+ unsigned flags = rhs->flags;
+ lhs->selector = rhs->selector;
+ lhs->base = rhs->base;
+ lhs->limit = rhs->limit;
+ lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
+ lhs->present = (flags & DESC_P_MASK) != 0;
+ lhs->dpl = (flags >> DESC_DPL_SHIFT) & 3;
+ lhs->db = (flags >> DESC_B_SHIFT) & 1;
+ lhs->s = (flags & DESC_S_MASK) != 0;
+ lhs->l = (flags >> DESC_L_SHIFT) & 1;
+ lhs->g = (flags & DESC_G_MASK) != 0;
+ lhs->avl = (flags & DESC_AVL_MASK) != 0;
+ lhs->unusable = !lhs->present;
+ lhs->padding = 0;
+}
+
+static void get_seg(SegmentCache *lhs, const struct aehd_segment *rhs)
+{
+ lhs->selector = rhs->selector;
+ lhs->base = rhs->base;
+ lhs->limit = rhs->limit;
+ if (rhs->unusable) {
+ lhs->flags = 0;
+ } else {
+ lhs->flags = (rhs->type << DESC_TYPE_SHIFT) |
+ (rhs->present * DESC_P_MASK) |
+ (rhs->dpl << DESC_DPL_SHIFT) |
+ (rhs->db << DESC_B_SHIFT) |
+ (rhs->s * DESC_S_MASK) |
+ (rhs->l << DESC_L_SHIFT) |
+ (rhs->g * DESC_G_MASK) |
+ (rhs->avl * DESC_AVL_MASK);
+ }
+}
+
+static void aehd_getput_reg(__u64 *aehd_reg, target_ulong *qemu_reg, int set)
+{
+ if (set) {
+ *aehd_reg = *qemu_reg;
+ } else {
+ *qemu_reg = *aehd_reg;
+ }
+}
+
+static int aehd_getput_regs(X86CPU *cpu, int set)
+{
+ CPUX86State *env = &cpu->env;
+ struct aehd_regs regs;
+ int ret = 0;
+
+ if (!set) {
+ ret = aehd_vcpu_ioctl(CPU(cpu), AEHD_GET_REGS, NULL, 0,
+ ®s, sizeof(regs));
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ aehd_getput_reg(®s.rax, &env->regs[R_EAX], set);
+ aehd_getput_reg(®s.rbx, &env->regs[R_EBX], set);
+ aehd_getput_reg(®s.rcx, &env->regs[R_ECX], set);
+ aehd_getput_reg(®s.rdx, &env->regs[R_EDX], set);
+ aehd_getput_reg(®s.rsi, &env->regs[R_ESI], set);
+ aehd_getput_reg(®s.rdi, &env->regs[R_EDI], set);
+ aehd_getput_reg(®s.rsp, &env->regs[R_ESP], set);
+ aehd_getput_reg(®s.rbp, &env->regs[R_EBP], set);
+#ifdef TARGET_X86_64
+ aehd_getput_reg(®s.r8, &env->regs[8], set);
+ aehd_getput_reg(®s.r9, &env->regs[9], set);
+ aehd_getput_reg(®s.r10, &env->regs[10], set);
+ aehd_getput_reg(®s.r11, &env->regs[11], set);
+ aehd_getput_reg(®s.r12, &env->regs[12], set);
+ aehd_getput_reg(®s.r13, &env->regs[13], set);
+ aehd_getput_reg(®s.r14, &env->regs[14], set);
+ aehd_getput_reg(®s.r15, &env->regs[15], set);
+#endif
+
+ aehd_getput_reg(®s.rflags, &env->eflags, set);
+ aehd_getput_reg(®s.rip, &env->eip, set);
+
+ if (set) {
+ ret = aehd_vcpu_ioctl(CPU(cpu), AEHD_SET_REGS, ®s, sizeof(regs),
+ NULL, 0);
+ }
+
+ return ret;
+}
+
+static int aehd_put_fpu(X86CPU *cpu)
+{
+ CPUX86State *env = &cpu->env;
+ struct aehd_fpu fpu;
+ int i;
+
+ memset(&fpu, 0, sizeof fpu);
+ fpu.fsw = env->fpus & ~(7 << 11);
+ fpu.fsw |= (env->fpstt & 7) << 11;
+ fpu.fcw = env->fpuc;
+ fpu.last_opcode = env->fpop;
+ fpu.last_ip = env->fpip;
+ fpu.last_dp = env->fpdp;
+ for (i = 0; i < 8; ++i) {
+ fpu.ftwx |= (!env->fptags[i]) << i;
+ }
+ memcpy(fpu.fpr, env->fpregs, sizeof env->fpregs);
+ for (i = 0; i < CPU_NB_REGS; i++) {
+ stq_p(&fpu.xmm[i][0], env->xmm_regs[i].ZMM_Q(0));
+ stq_p(&fpu.xmm[i][8], env->xmm_regs[i].ZMM_Q(1));
+ }
+ fpu.mxcsr = env->mxcsr;
+
+ return aehd_vcpu_ioctl(CPU(cpu), AEHD_SET_FPU, &fpu, sizeof(fpu), NULL, 0);
+}
+
+static int aehd_put_xsave(X86CPU *cpu)
+{
+ CPUX86State *env = &cpu->env;
+ void *xsave = env->xsave_buf;
+
+ if (!has_xsave) {
+ return aehd_put_fpu(cpu);
+ }
+ x86_cpu_xsave_all_areas(cpu, xsave, env->xsave_buf_len);
+
+ return aehd_vcpu_ioctl(CPU(cpu), AEHD_SET_XSAVE, xsave, sizeof(*xsave),
+ NULL, 0);
+}
+
+static int aehd_put_xcrs(X86CPU *cpu)
+{
+ CPUX86State *env = &cpu->env;
+ struct aehd_xcrs xcrs = {};
+
+ if (!has_xcrs) {
+ return 0;
+ }
+
+ xcrs.nr_xcrs = 1;
+ xcrs.flags = 0;
+ xcrs.xcrs[0].xcr = 0;
+ xcrs.xcrs[0].value = env->xcr0;
+ return aehd_vcpu_ioctl(CPU(cpu), AEHD_SET_XCRS, &xcrs, sizeof(xcrs),
+ NULL, 0);
+}
+
+static int aehd_put_sregs(X86CPU *cpu)
+{
+ CPUX86State *env = &cpu->env;
+ struct aehd_sregs sregs;
+
+ memset(sregs.interrupt_bitmap, 0, sizeof(sregs.interrupt_bitmap));
+ if (env->interrupt_injected >= 0) {
+ sregs.interrupt_bitmap[env->interrupt_injected / 64] |=
+ (uint64_t)1 << (env->interrupt_injected % 64);
+ }
+
+ if ((env->eflags & VM_MASK)) {
+ set_v8086_seg(&sregs.cs, &env->segs[R_CS]);
+ set_v8086_seg(&sregs.ds, &env->segs[R_DS]);
+ set_v8086_seg(&sregs.es, &env->segs[R_ES]);
+ set_v8086_seg(&sregs.fs, &env->segs[R_FS]);
+ set_v8086_seg(&sregs.gs, &env->segs[R_GS]);
+ set_v8086_seg(&sregs.ss, &env->segs[R_SS]);
+ } else {
+ set_seg(&sregs.cs, &env->segs[R_CS]);
+ set_seg(&sregs.ds, &env->segs[R_DS]);
+ set_seg(&sregs.es, &env->segs[R_ES]);
+ set_seg(&sregs.fs, &env->segs[R_FS]);
+ set_seg(&sregs.gs, &env->segs[R_GS]);
+ set_seg(&sregs.ss, &env->segs[R_SS]);
+ }
+
+ set_seg(&sregs.tr, &env->tr);
+ set_seg(&sregs.ldt, &env->ldt);
+
+ sregs.idt.limit = env->idt.limit;
+ sregs.idt.base = env->idt.base;
+ memset(sregs.idt.padding, 0, sizeof sregs.idt.padding);
+ sregs.gdt.limit = env->gdt.limit;
+ sregs.gdt.base = env->gdt.base;
+ memset(sregs.gdt.padding, 0, sizeof sregs.gdt.padding);
+
+ sregs.cr0 = env->cr[0];
+ sregs.cr2 = env->cr[2];
+ sregs.cr3 = env->cr[3];
+ sregs.cr4 = env->cr[4];
+
+ sregs.cr8 = cpu_get_apic_tpr(cpu->apic_state);
+ sregs.apic_base = cpu_get_apic_base(cpu->apic_state);
+
+ sregs.efer = env->efer;
+
+ return aehd_vcpu_ioctl(CPU(cpu), AEHD_SET_SREGS, &sregs, sizeof(sregs),
+ NULL, 0);
+}
+
+static void aehd_msr_buf_reset(X86CPU *cpu)
+{
+ memset(cpu->aehd_msr_buf, 0, MSR_BUF_SIZE);
+}
+
+static void aehd_msr_entry_add(X86CPU *cpu, uint32_t index, uint64_t value)
+{
+ struct aehd_msrs *msrs = cpu->aehd_msr_buf;
+ void *limit = ((void *)msrs) + MSR_BUF_SIZE;
+ struct aehd_msr_entry *entry = &msrs->entries[msrs->nmsrs];
+
+ assert((void *)(entry + 1) <= limit);
+
+ entry->index = index;
+ entry->reserved = 0;
+ entry->data = value;
+ msrs->nmsrs++;
+}
+
+static int aehd_put_tscdeadline_msr(X86CPU *cpu)
+{
+ CPUX86State *env = &cpu->env;
+ int ret;
+
+ if (!has_msr_tsc_deadline) {
+ return 0;
+ }
+
+ aehd_msr_buf_reset(cpu);
+ aehd_msr_entry_add(cpu, MSR_IA32_TSCDEADLINE, env->tsc_deadline);
+
+ ret = aehd_vcpu_ioctl(CPU(cpu), AEHD_SET_MSRS, cpu->aehd_msr_buf,
+ sizeof(struct aehd_msrs) +
+ sizeof(struct aehd_msr_entry),
+ cpu->aehd_msr_buf, sizeof(struct aehd_msrs));
+ if (ret < 0) {
+ return ret;
+ } else {
+ ret = cpu->aehd_msr_buf->nmsrs;
+ }
+
+ assert(ret == 1);
+ return 0;
+}
+
+/*
+ * Provide a separate write service for the feature control MSR in order to
+ * kick the VCPU out of VMXON or even guest mode on reset. This has to be done
+ * before writing any other state because forcibly leaving nested mode
+ * invalidates the VCPU state.
+ */
+static int aehd_put_msr_feature_control(X86CPU *cpu)
+{
+ int ret;
+
+ if (!has_msr_feature_control) {
+ return 0;
+ }
+
+ aehd_msr_buf_reset(cpu);
+ aehd_msr_entry_add(cpu, MSR_IA32_FEATURE_CONTROL,
+ cpu->env.msr_ia32_feature_control);
+
+ ret = aehd_vcpu_ioctl(CPU(cpu), AEHD_SET_MSRS, cpu->aehd_msr_buf,
+ sizeof(struct aehd_msrs) +
+ sizeof(struct aehd_msr_entry),
+ cpu->aehd_msr_buf, sizeof(struct aehd_msrs));
+ if (ret < 0) {
+ return ret;
+ } else {
+ ret = cpu->aehd_msr_buf->nmsrs;
+ }
+
+ assert(ret == 1);
+ return 0;
+}
+
+static int aehd_put_msrs(X86CPU *cpu, int level)
+{
+ CPUX86State *env = &cpu->env;
+ int i;
+ int ret;
+
+ aehd_msr_buf_reset(cpu);
+
+ aehd_msr_entry_add(cpu, MSR_IA32_SYSENTER_CS, env->sysenter_cs);
+ aehd_msr_entry_add(cpu, MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
+ aehd_msr_entry_add(cpu, MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
+ aehd_msr_entry_add(cpu, MSR_PAT, env->pat);
+ if (has_msr_star) {
+ aehd_msr_entry_add(cpu, MSR_STAR, env->star);
+ }
+ if (has_msr_hsave_pa) {
+ aehd_msr_entry_add(cpu, MSR_VM_HSAVE_PA, env->vm_hsave);
+ }
+ if (has_msr_tsc_aux) {
+ aehd_msr_entry_add(cpu, MSR_TSC_AUX, env->tsc_aux);
+ }
+ if (has_msr_tsc_adjust) {
+ aehd_msr_entry_add(cpu, MSR_TSC_ADJUST, env->tsc_adjust);
+ }
+ if (has_msr_misc_enable) {
+ aehd_msr_entry_add(cpu, MSR_IA32_MISC_ENABLE,
+ env->msr_ia32_misc_enable);
+ }
+ if (has_msr_smbase) {
+ aehd_msr_entry_add(cpu, MSR_IA32_SMBASE, env->smbase);
+ }
+ if (has_msr_bndcfgs) {
+ aehd_msr_entry_add(cpu, MSR_IA32_BNDCFGS, env->msr_bndcfgs);
+ }
+ if (has_msr_xss) {
+ aehd_msr_entry_add(cpu, MSR_IA32_XSS, env->xss);
+ }
+#ifdef TARGET_X86_64
+ aehd_msr_entry_add(cpu, MSR_CSTAR, env->cstar);
+ aehd_msr_entry_add(cpu, MSR_KERNELGSBASE, env->kernelgsbase);
+ aehd_msr_entry_add(cpu, MSR_FMASK, env->fmask);
+ aehd_msr_entry_add(cpu, MSR_LSTAR, env->lstar);
+#endif
+ /*
+ * The following MSRs have side effects on the guest or are too heavy
+ * for normal writeback. Limit them to reset or full state updates.
+ */
+ if (level >= AEHD_PUT_RESET_STATE) {
+ aehd_msr_entry_add(cpu, MSR_IA32_TSC, env->tsc);
+ if (has_msr_architectural_pmu) {
+ /* Stop the counter. */
+ aehd_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR_CTRL, 0);
+ aehd_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_CTRL, 0);
+
+ /* Set the counter values. */
+ for (i = 0; i < MAX_FIXED_COUNTERS; i++) {
+ aehd_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR0 + i,
+ env->msr_fixed_counters[i]);
+ }
+ for (i = 0; i < num_architectural_pmu_counters; i++) {
+ aehd_msr_entry_add(cpu, MSR_P6_PERFCTR0 + i,
+ env->msr_gp_counters[i]);
+ aehd_msr_entry_add(cpu, MSR_P6_EVNTSEL0 + i,
+ env->msr_gp_evtsel[i]);
+ }
+ aehd_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_STATUS,
+ env->msr_global_status);
+ aehd_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_OVF_CTRL,
+ env->msr_global_ovf_ctrl);
+
+ /* Now start the PMU. */
+ aehd_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR_CTRL,
+ env->msr_fixed_ctr_ctrl);
+ aehd_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_CTRL,
+ env->msr_global_ctrl);
+ }
+ if (has_msr_mtrr) {
+ uint64_t phys_mask = MAKE_64BIT_MASK(0, cpu->phys_bits);
+
+ aehd_msr_entry_add(cpu, MSR_MTRRdefType, env->mtrr_deftype);
+ aehd_msr_entry_add(cpu, MSR_MTRRfix64K_00000, env->mtrr_fixed[0]);
+ aehd_msr_entry_add(cpu, MSR_MTRRfix16K_80000, env->mtrr_fixed[1]);
+ aehd_msr_entry_add(cpu, MSR_MTRRfix16K_A0000, env->mtrr_fixed[2]);
+ aehd_msr_entry_add(cpu, MSR_MTRRfix4K_C0000, env->mtrr_fixed[3]);
+ aehd_msr_entry_add(cpu, MSR_MTRRfix4K_C8000, env->mtrr_fixed[4]);
+ aehd_msr_entry_add(cpu, MSR_MTRRfix4K_D0000, env->mtrr_fixed[5]);
+ aehd_msr_entry_add(cpu, MSR_MTRRfix4K_D8000, env->mtrr_fixed[6]);
+ aehd_msr_entry_add(cpu, MSR_MTRRfix4K_E0000, env->mtrr_fixed[7]);
+ aehd_msr_entry_add(cpu, MSR_MTRRfix4K_E8000, env->mtrr_fixed[8]);
+ aehd_msr_entry_add(cpu, MSR_MTRRfix4K_F0000, env->mtrr_fixed[9]);
+ aehd_msr_entry_add(cpu, MSR_MTRRfix4K_F8000, env->mtrr_fixed[10]);
+ for (i = 0; i < MSR_MTRRcap_VCNT; i++) {
+ /*
+ * The CPU GPs if we write to a bit above the physical limit of
+ * the host CPU (and AEHD emulates that)
+ */
+ uint64_t mask = env->mtrr_var[i].mask;
+ mask &= phys_mask;
+
+ aehd_msr_entry_add(cpu, MSR_MTRRphysBase(i),
+ env->mtrr_var[i].base);
+ aehd_msr_entry_add(cpu, MSR_MTRRphysMask(i), mask);
+ }
+ }
+
+ /*
+ * Note: MSR_IA32_FEATURE_CONTROL is written separately, see
+ * aehd_put_msr_feature_control.
+ */
+ }
+
+ ret = aehd_vcpu_ioctl(CPU(cpu), AEHD_SET_MSRS, cpu->aehd_msr_buf,
+ sizeof(struct aehd_msrs) + cpu->aehd_msr_buf->nmsrs *
+ sizeof(struct aehd_msr_entry),
+ cpu->aehd_msr_buf, sizeof(struct aehd_msrs));
+ if (ret < 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+
+static int aehd_get_fpu(X86CPU *cpu)
+{
+ CPUX86State *env = &cpu->env;
+ struct aehd_fpu fpu;
+ int i, ret;
+
+ ret = aehd_vcpu_ioctl(CPU(cpu), AEHD_GET_FPU, NULL, 0, &fpu, sizeof(fpu));
+ if (ret < 0) {
+ return ret;
+ }
+
+ env->fpstt = (fpu.fsw >> 11) & 7;
+ env->fpus = fpu.fsw;
+ env->fpuc = fpu.fcw;
+ env->fpop = fpu.last_opcode;
+ env->fpip = fpu.last_ip;
+ env->fpdp = fpu.last_dp;
+ for (i = 0; i < 8; ++i) {
+ env->fptags[i] = !((fpu.ftwx >> i) & 1);
+ }
+ memcpy(env->fpregs, fpu.fpr, sizeof env->fpregs);
+ for (i = 0; i < CPU_NB_REGS; i++) {
+ env->xmm_regs[i].ZMM_Q(0) = ldq_p(&fpu.xmm[i][0]);
+ env->xmm_regs[i].ZMM_Q(1) = ldq_p(&fpu.xmm[i][8]);
+ }
+ env->mxcsr = fpu.mxcsr;
+
+ return 0;
+}
+
+static int aehd_get_xsave(X86CPU *cpu)
+{
+ CPUX86State *env = &cpu->env;
+ void *xsave = env->xsave_buf;
+ int ret;
+
+ if (!has_xsave) {
+ return aehd_get_fpu(cpu);
+ }
+
+ ret = aehd_vcpu_ioctl(CPU(cpu), AEHD_GET_XSAVE, NULL, 0,
+ xsave, sizeof(*xsave));
+ if (ret < 0) {
+ return ret;
+ }
+ x86_cpu_xrstor_all_areas(cpu, xsave, env->xsave_buf_len);
+
+ return 0;
+}
+
+static int aehd_get_xcrs(X86CPU *cpu)
+{
+ CPUX86State *env = &cpu->env;
+ int i, ret;
+ struct aehd_xcrs xcrs;
+
+ if (!has_xcrs) {
+ return 0;
+ }
+
+ ret = aehd_vcpu_ioctl(CPU(cpu), AEHD_GET_XCRS, NULL, 0,
+ &xcrs, sizeof(xcrs));
+ if (ret < 0) {
+ return ret;
+ }
+
+ for (i = 0; i < xcrs.nr_xcrs; i++) {
+ /* Only support xcr0 now */
+ if (xcrs.xcrs[i].xcr == 0) {
+ env->xcr0 = xcrs.xcrs[i].value;
+ break;
+ }
+ }
+ return 0;
+}
+
+static int aehd_get_sregs(X86CPU *cpu)
+{
+ CPUX86State *env = &cpu->env;
+ struct aehd_sregs sregs;
+ uint32_t hflags;
+ int bit, i, ret;
+
+ ret = aehd_vcpu_ioctl(CPU(cpu), AEHD_GET_SREGS, NULL, 0,
+ &sregs, sizeof(sregs));
+ if (ret < 0) {
+ return ret;
+ }
+
+ /*
+ * There can only be one pending IRQ set in the bitmap at a time, so try
+ * to find it and save its number instead (-1 for none).
+ */
+ env->interrupt_injected = -1;
+ for (i = 0; i < ARRAY_SIZE(sregs.interrupt_bitmap); i++) {
+ if (sregs.interrupt_bitmap[i]) {
+ bit = ctz64(sregs.interrupt_bitmap[i]);
+ env->interrupt_injected = i * 64 + bit;
+ break;
+ }
+ }
+
+ get_seg(&env->segs[R_CS], &sregs.cs);
+ get_seg(&env->segs[R_DS], &sregs.ds);
+ get_seg(&env->segs[R_ES], &sregs.es);
+ get_seg(&env->segs[R_FS], &sregs.fs);
+ get_seg(&env->segs[R_GS], &sregs.gs);
+ get_seg(&env->segs[R_SS], &sregs.ss);
+
+ get_seg(&env->tr, &sregs.tr);
+ get_seg(&env->ldt, &sregs.ldt);
+
+ env->idt.limit = sregs.idt.limit;
+ env->idt.base = sregs.idt.base;
+ env->gdt.limit = sregs.gdt.limit;
+ env->gdt.base = sregs.gdt.base;
+
+ env->cr[0] = sregs.cr0;
+ env->cr[2] = sregs.cr2;
+ env->cr[3] = sregs.cr3;
+ env->cr[4] = sregs.cr4;
+
+ env->efer = sregs.efer;
+
+ /* changes to apic base and cr8/tpr are read back via aehd_arch_post_run */
+
+#define HFLAG_COPY_MASK \
+ (~(HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \
+ HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \
+ HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \
+ HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK))
+
+ hflags = env->hflags & HFLAG_COPY_MASK;
+ hflags |= (env->segs[R_SS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
+ hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT);
+ hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) &
+ (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK);
+ hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK));
+
+ if (env->cr[4] & CR4_OSFXSR_MASK) {
+ hflags |= HF_OSFXSR_MASK;
+ }
+
+ if (env->efer & MSR_EFER_LMA) {
+ hflags |= HF_LMA_MASK;
+ }
+
+ if ((hflags & HF_LMA_MASK) && (env->segs[R_CS].flags & DESC_L_MASK)) {
+ hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
+ } else {
+ hflags |= (env->segs[R_CS].flags & DESC_B_MASK) >>
+ (DESC_B_SHIFT - HF_CS32_SHIFT);
+ hflags |= (env->segs[R_SS].flags & DESC_B_MASK) >>
+ (DESC_B_SHIFT - HF_SS32_SHIFT);
+ if (!(env->cr[0] & CR0_PE_MASK) || (env->eflags & VM_MASK) ||
+ !(hflags & HF_CS32_MASK)) {
+ hflags |= HF_ADDSEG_MASK;
+ } else {
+ hflags |= ((env->segs[R_DS].base | env->segs[R_ES].base |
+ env->segs[R_SS].base) != 0) << HF_ADDSEG_SHIFT;
+ }
+ }
+ env->hflags = hflags;
+
+ return 0;
+}
+
+static int aehd_get_msrs(X86CPU *cpu)
+{
+ CPUX86State *env = &cpu->env;
+ struct aehd_msr_entry *msrs = cpu->aehd_msr_buf->entries;
+ int ret, i;
+ uint64_t mtrr_top_bits;
+ uint64_t bufsize;
+
+ aehd_msr_buf_reset(cpu);
+
+ aehd_msr_entry_add(cpu, MSR_IA32_SYSENTER_CS, 0);
+ aehd_msr_entry_add(cpu, MSR_IA32_SYSENTER_ESP, 0);
+ aehd_msr_entry_add(cpu, MSR_IA32_SYSENTER_EIP, 0);
+ aehd_msr_entry_add(cpu, MSR_PAT, 0);
+ if (has_msr_star) {
+ aehd_msr_entry_add(cpu, MSR_STAR, 0);
+ }
+ if (has_msr_hsave_pa) {
+ aehd_msr_entry_add(cpu, MSR_VM_HSAVE_PA, 0);
+ }
+ if (has_msr_tsc_aux) {
+ aehd_msr_entry_add(cpu, MSR_TSC_AUX, 0);
+ }
+ if (has_msr_tsc_adjust) {
+ aehd_msr_entry_add(cpu, MSR_TSC_ADJUST, 0);
+ }
+ if (has_msr_tsc_deadline) {
+ aehd_msr_entry_add(cpu, MSR_IA32_TSCDEADLINE, 0);
+ }
+ if (has_msr_misc_enable) {
+ aehd_msr_entry_add(cpu, MSR_IA32_MISC_ENABLE, 0);
+ }
+ if (has_msr_smbase) {
+ aehd_msr_entry_add(cpu, MSR_IA32_SMBASE, 0);
+ }
+ if (has_msr_feature_control) {
+ aehd_msr_entry_add(cpu, MSR_IA32_FEATURE_CONTROL, 0);
+ }
+ if (has_msr_bndcfgs) {
+ aehd_msr_entry_add(cpu, MSR_IA32_BNDCFGS, 0);
+ }
+ if (has_msr_xss) {
+ aehd_msr_entry_add(cpu, MSR_IA32_XSS, 0);
+ }
+
+
+ if (!env->tsc_valid) {
+ aehd_msr_entry_add(cpu, MSR_IA32_TSC, 0);
+ env->tsc_valid = !runstate_is_running();
+ }
+
+#ifdef TARGET_X86_64
+ aehd_msr_entry_add(cpu, MSR_CSTAR, 0);
+ aehd_msr_entry_add(cpu, MSR_KERNELGSBASE, 0);
+ aehd_msr_entry_add(cpu, MSR_FMASK, 0);
+ aehd_msr_entry_add(cpu, MSR_LSTAR, 0);
+#endif
+ if (has_msr_architectural_pmu) {
+ aehd_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR_CTRL, 0);
+ aehd_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_CTRL, 0);
+ aehd_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_STATUS, 0);
+ aehd_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_OVF_CTRL, 0);
+ for (i = 0; i < MAX_FIXED_COUNTERS; i++) {
+ aehd_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR0 + i, 0);
+ }
+ for (i = 0; i < num_architectural_pmu_counters; i++) {
+ aehd_msr_entry_add(cpu, MSR_P6_PERFCTR0 + i, 0);
+ aehd_msr_entry_add(cpu, MSR_P6_EVNTSEL0 + i, 0);
+ }
+ }
+
+ if (has_msr_mtrr) {
+ aehd_msr_entry_add(cpu, MSR_MTRRdefType, 0);
+ aehd_msr_entry_add(cpu, MSR_MTRRfix64K_00000, 0);
+ aehd_msr_entry_add(cpu, MSR_MTRRfix16K_80000, 0);
+ aehd_msr_entry_add(cpu, MSR_MTRRfix16K_A0000, 0);
+ aehd_msr_entry_add(cpu, MSR_MTRRfix4K_C0000, 0);
+ aehd_msr_entry_add(cpu, MSR_MTRRfix4K_C8000, 0);
+ aehd_msr_entry_add(cpu, MSR_MTRRfix4K_D0000, 0);
+ aehd_msr_entry_add(cpu, MSR_MTRRfix4K_D8000, 0);
+ aehd_msr_entry_add(cpu, MSR_MTRRfix4K_E0000, 0);
+ aehd_msr_entry_add(cpu, MSR_MTRRfix4K_E8000, 0);
+ aehd_msr_entry_add(cpu, MSR_MTRRfix4K_F0000, 0);
+ aehd_msr_entry_add(cpu, MSR_MTRRfix4K_F8000, 0);
+ for (i = 0; i < MSR_MTRRcap_VCNT; i++) {
+ aehd_msr_entry_add(cpu, MSR_MTRRphysBase(i), 0);
+ aehd_msr_entry_add(cpu, MSR_MTRRphysMask(i), 0);
+ }
+ }
+
+ bufsize = sizeof(struct aehd_msrs) + cpu->aehd_msr_buf->nmsrs *
+ sizeof(struct aehd_msr_entry);
+ ret = aehd_vcpu_ioctl(CPU(cpu), AEHD_GET_MSRS, cpu->aehd_msr_buf, bufsize,
+ cpu->aehd_msr_buf, bufsize);
+ if (ret < 0) {
+ return ret;
+ } else {
+ ret = cpu->aehd_msr_buf->nmsrs;
+ }
+
+ /*
+ * MTRR masks: Each mask consists of 5 parts
+ * a 10..0: must be zero
+ * b 11 : valid bit
+ * c n-1.12: actual mask bits
+ * d 51..n: reserved must be zero
+ * e 63.52: reserved must be zero
+ *
+ * 'n' is the number of physical bits supported by the CPU and is
+ * apparently always <= 52. We know our 'n' but don't know what
+ * the destinations 'n' is; it might be smaller, in which case
+ * it masks (c) on loading. It might be larger, in which case
+ * we fill 'd' so that d..c is consistent irrespetive of the 'n'
+ * we're migrating to.
+ */
+
+ if (cpu->fill_mtrr_mask) {
+ QEMU_BUILD_BUG_ON(TARGET_PHYS_ADDR_SPACE_BITS > 52);
+ assert(cpu->phys_bits <= TARGET_PHYS_ADDR_SPACE_BITS);
+ mtrr_top_bits = MAKE_64BIT_MASK(cpu->phys_bits, 52 - cpu->phys_bits);
+ } else {
+ mtrr_top_bits = 0;
+ }
+
+ for (i = 0; i < ret; i++) {
+ uint32_t index = msrs[i].index;
+ switch (index) {
+ case MSR_IA32_SYSENTER_CS:
+ env->sysenter_cs = msrs[i].data;
+ break;
+ case MSR_IA32_SYSENTER_ESP:
+ env->sysenter_esp = msrs[i].data;
+ break;
+ case MSR_IA32_SYSENTER_EIP:
+ env->sysenter_eip = msrs[i].data;
+ break;
+ case MSR_PAT:
+ env->pat = msrs[i].data;
+ break;
+ case MSR_STAR:
+ env->star = msrs[i].data;
+ break;
+#ifdef TARGET_X86_64
+ case MSR_CSTAR:
+ env->cstar = msrs[i].data;
+ break;
+ case MSR_KERNELGSBASE:
+ env->kernelgsbase = msrs[i].data;
+ break;
+ case MSR_FMASK:
+ env->fmask = msrs[i].data;
+ break;
+ case MSR_LSTAR:
+ env->lstar = msrs[i].data;
+ break;
+#endif
+ case MSR_IA32_TSC:
+ env->tsc = msrs[i].data;
+ break;
+ case MSR_TSC_AUX:
+ env->tsc_aux = msrs[i].data;
+ break;
+ case MSR_TSC_ADJUST:
+ env->tsc_adjust = msrs[i].data;
+ break;
+ case MSR_IA32_TSCDEADLINE:
+ env->tsc_deadline = msrs[i].data;
+ break;
+ case MSR_VM_HSAVE_PA:
+ env->vm_hsave = msrs[i].data;
+ break;
+ case MSR_MCG_STATUS:
+ env->mcg_status = msrs[i].data;
+ break;
+ case MSR_MCG_CTL:
+ env->mcg_ctl = msrs[i].data;
+ break;
+ case MSR_MCG_EXT_CTL:
+ env->mcg_ext_ctl = msrs[i].data;
+ break;
+ case MSR_IA32_MISC_ENABLE:
+ env->msr_ia32_misc_enable = msrs[i].data;
+ break;
+ case MSR_IA32_SMBASE:
+ env->smbase = msrs[i].data;
+ break;
+ case MSR_IA32_FEATURE_CONTROL:
+ env->msr_ia32_feature_control = msrs[i].data;
+ break;
+ case MSR_IA32_BNDCFGS:
+ env->msr_bndcfgs = msrs[i].data;
+ break;
+ case MSR_IA32_XSS:
+ env->xss = msrs[i].data;
+ break;
+ default:
+ if (msrs[i].index >= MSR_MC0_CTL &&
+ msrs[i].index < MSR_MC0_CTL + (env->mcg_cap & 0xff) * 4) {
+ env->mce_banks[msrs[i].index - MSR_MC0_CTL] = msrs[i].data;
+ }
+ break;
+ case MSR_CORE_PERF_FIXED_CTR_CTRL:
+ env->msr_fixed_ctr_ctrl = msrs[i].data;
+ break;
+ case MSR_CORE_PERF_GLOBAL_CTRL:
+ env->msr_global_ctrl = msrs[i].data;
+ break;
+ case MSR_CORE_PERF_GLOBAL_STATUS:
+ env->msr_global_status = msrs[i].data;
+ break;
+ case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+ env->msr_global_ovf_ctrl = msrs[i].data;
+ break;
+ case MSR_CORE_PERF_FIXED_CTR0 ...
+ MSR_CORE_PERF_FIXED_CTR0 + MAX_FIXED_COUNTERS - 1:
+ uint32_t offset = index - MSR_CORE_PERF_FIXED_CTR0;
+ env->msr_fixed_counters[offset] = msrs[i].data;
+ break;
+ case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR0 + MAX_GP_COUNTERS - 1:
+ env->msr_gp_counters[index - MSR_P6_PERFCTR0] = msrs[i].data;
+ break;
+ case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL0 + MAX_GP_COUNTERS - 1:
+ env->msr_gp_evtsel[index - MSR_P6_EVNTSEL0] = msrs[i].data;
+ break;
+ case MSR_MTRRdefType:
+ env->mtrr_deftype = msrs[i].data;
+ break;
+ case MSR_MTRRfix64K_00000:
+ env->mtrr_fixed[0] = msrs[i].data;
+ break;
+ case MSR_MTRRfix16K_80000:
+ env->mtrr_fixed[1] = msrs[i].data;
+ break;
+ case MSR_MTRRfix16K_A0000:
+ env->mtrr_fixed[2] = msrs[i].data;
+ break;
+ case MSR_MTRRfix4K_C0000:
+ env->mtrr_fixed[3] = msrs[i].data;
+ break;
+ case MSR_MTRRfix4K_C8000:
+ env->mtrr_fixed[4] = msrs[i].data;
+ break;
+ case MSR_MTRRfix4K_D0000:
+ env->mtrr_fixed[5] = msrs[i].data;
+ break;
+ case MSR_MTRRfix4K_D8000:
+ env->mtrr_fixed[6] = msrs[i].data;
+ break;
+ case MSR_MTRRfix4K_E0000:
+ env->mtrr_fixed[7] = msrs[i].data;
+ break;
+ case MSR_MTRRfix4K_E8000:
+ env->mtrr_fixed[8] = msrs[i].data;
+ break;
+ case MSR_MTRRfix4K_F0000:
+ env->mtrr_fixed[9] = msrs[i].data;
+ break;
+ case MSR_MTRRfix4K_F8000:
+ env->mtrr_fixed[10] = msrs[i].data;
+ break;
+ case MSR_MTRRphysBase(0) ... MSR_MTRRphysMask(MSR_MTRRcap_VCNT - 1):
+ if (index & 1) {
+ env->mtrr_var[MSR_MTRRphysIndex(index)].mask = msrs[i].data |
+ mtrr_top_bits;
+ } else {
+ env->mtrr_var[MSR_MTRRphysIndex(index)].base = msrs[i].data;
+ }
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int aehd_put_mp_state(X86CPU *cpu)
+{
+ struct aehd_mp_state mp_state = { .mp_state = cpu->env.mp_state };
+
+ return aehd_vcpu_ioctl(CPU(cpu), AEHD_SET_MP_STATE,
+ &mp_state, sizeof(mp_state), NULL, 0);
+}
+
+static int aehd_get_mp_state(X86CPU *cpu)
+{
+ CPUState *cs = CPU(cpu);
+ CPUX86State *env = &cpu->env;
+ struct aehd_mp_state mp_state;
+ int ret;
+
+ ret = aehd_vcpu_ioctl(cs, AEHD_GET_MP_STATE, NULL, 0,
+ &mp_state, sizeof(mp_state));
+ if (ret < 0) {
+ return ret;
+ }
+ env->mp_state = mp_state.mp_state;
+ cs->halted = (mp_state.mp_state == AEHD_MP_STATE_HALTED);
+ return 0;
+}
+
+static int aehd_get_apic(X86CPU *cpu)
+{
+ DeviceState *apic = cpu->apic_state;
+ struct aehd_lapic_state gapic;
+ int ret;
+
+ if (apic) {
+ ret = aehd_vcpu_ioctl(CPU(cpu), AEHD_GET_LAPIC, NULL, 0,
+ &gapic, sizeof(gapic));
+ if (ret < 0) {
+ return ret;
+ }
+
+ aehd_get_apic_state(apic, &gapic);
+ }
+ return 0;
+}
+
+static int aehd_put_apic(X86CPU *cpu)
+{
+ DeviceState *apic = cpu->apic_state;
+ struct aehd_lapic_state gapic;
+
+ if (apic) {
+ aehd_put_apic_state(apic, &gapic);
+
+ return aehd_vcpu_ioctl(CPU(cpu), AEHD_SET_LAPIC,
+ &gapic, sizeof(gapic), NULL, 0);
+ }
+ return 0;
+}
+
+static int aehd_put_vcpu_events(X86CPU *cpu, int level)
+{
+ CPUState *cs = CPU(cpu);
+ CPUX86State *env = &cpu->env;
+ struct aehd_vcpu_events events = {};
+
+ events.exception.injected = env->exception_injected;
+ events.exception.nr = env->exception_nr;
+ events.exception.has_error_code = env->has_error_code;
+ events.exception.error_code = env->error_code;
+
+ events.interrupt.injected = (env->interrupt_injected >= 0);
+ events.interrupt.nr = env->interrupt_injected;
+ events.interrupt.soft = env->soft_interrupt;
+
+ events.nmi.injected = env->nmi_injected;
+ events.nmi.pending = env->nmi_pending;
+ events.nmi.masked = !!(env->hflags2 & HF2_NMI_MASK);
+
+ events.sipi_vector = env->sipi_vector;
+
+ if (has_msr_smbase) {
+ events.smi.smm = !!(env->hflags & HF_SMM_MASK);
+ events.smi.smm_inside_nmi = !!(env->hflags2 & HF2_SMM_INSIDE_NMI_MASK);
+ /*
+ * As soon as these are moved to the kernel, remove them
+ * from cs->interrupt_request.
+ */
+ events.smi.pending = cs->interrupt_request & CPU_INTERRUPT_SMI;
+ events.smi.latched_init = cs->interrupt_request & CPU_INTERRUPT_INIT;
+ cs->interrupt_request &= ~(CPU_INTERRUPT_INIT | CPU_INTERRUPT_SMI);
+ events.flags |= AEHD_VCPUEVENT_VALID_SMM;
+ }
+
+ events.flags = 0;
+ if (level >= AEHD_PUT_RESET_STATE) {
+ events.flags |=
+ AEHD_VCPUEVENT_VALID_NMI_PENDING |
AEHD_VCPUEVENT_VALID_SIPI_VECTOR;
+ }
+
+ return aehd_vcpu_ioctl(CPU(cpu), AEHD_SET_VCPU_EVENTS,
+ &events, sizeof(events), NULL, 0);
+}
+
+static int aehd_get_vcpu_events(X86CPU *cpu)
+{
+ CPUX86State *env = &cpu->env;
+ struct aehd_vcpu_events events;
+ int ret;
+
+ memset(&events, 0, sizeof(events));
+ ret = aehd_vcpu_ioctl(CPU(cpu), AEHD_GET_VCPU_EVENTS,
+ NULL, 0, &events, sizeof(events));
+ if (ret < 0) {
+ return ret;
+ }
+
+ env->exception_injected =
+ events.exception.injected ? events.exception.nr : -1;
+ env->has_error_code = events.exception.has_error_code;
+ env->error_code = events.exception.error_code;
+
+ env->interrupt_injected =
+ events.interrupt.injected ? events.interrupt.nr : -1;
+ env->soft_interrupt = events.interrupt.soft;
+
+ env->nmi_injected = events.nmi.injected;
+ env->nmi_pending = events.nmi.pending;
+ if (events.nmi.masked) {
+ env->hflags2 |= HF2_NMI_MASK;
+ } else {
+ env->hflags2 &= ~HF2_NMI_MASK;
+ }
+
+ if (events.flags & AEHD_VCPUEVENT_VALID_SMM) {
+ if (events.smi.smm) {
+ env->hflags |= HF_SMM_MASK;
+ } else {
+ env->hflags &= ~HF_SMM_MASK;
+ }
+ if (events.smi.pending) {
+ cpu_interrupt(CPU(cpu), CPU_INTERRUPT_SMI);
+ } else {
+ cpu_reset_interrupt(CPU(cpu), CPU_INTERRUPT_SMI);
+ }
+ if (events.smi.smm_inside_nmi) {
+ env->hflags2 |= HF2_SMM_INSIDE_NMI_MASK;
+ } else {
+ env->hflags2 &= ~HF2_SMM_INSIDE_NMI_MASK;
+ }
+ if (events.smi.latched_init) {
+ cpu_interrupt(CPU(cpu), CPU_INTERRUPT_INIT);
+ } else {
+ cpu_reset_interrupt(CPU(cpu), CPU_INTERRUPT_INIT);
+ }
+ }
+
+ env->sipi_vector = events.sipi_vector;
+
+ return 0;
+}
+
+static int aehd_put_debugregs(X86CPU *cpu)
+{
+ CPUX86State *env = &cpu->env;
+ struct aehd_debugregs dbgregs;
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ dbgregs.db[i] = env->dr[i];
+ }
+ dbgregs.dr6 = env->dr[6];
+ dbgregs.dr7 = env->dr[7];
+ dbgregs.flags = 0;
+
+ return aehd_vcpu_ioctl(CPU(cpu), AEHD_SET_DEBUGREGS,
+ &dbgregs, sizeof(dbgregs), NULL, 0);
+}
+
+static int aehd_get_debugregs(X86CPU *cpu)
+{
+ CPUX86State *env = &cpu->env;
+ struct aehd_debugregs dbgregs;
+ int i, ret;
+
+ ret = aehd_vcpu_ioctl(CPU(cpu), AEHD_GET_DEBUGREGS,
+ &dbgregs, sizeof(dbgregs), NULL, 0);
+ if (ret < 0) {
+ return ret;
+ }
+ for (i = 0; i < 4; i++) {
+ env->dr[i] = dbgregs.db[i];
+ }
+ env->dr[4] = env->dr[6] = dbgregs.dr6;
+ env->dr[5] = env->dr[7] = dbgregs.dr7;
+
+ return 0;
+}
+
+int aehd_arch_put_registers(CPUState *cpu, int level)
+{
+ X86CPU *x86_cpu = X86_CPU(cpu);
+ int ret;
+
+ assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
+
+ if (level >= AEHD_PUT_RESET_STATE) {
+ ret = aehd_put_msr_feature_control(x86_cpu);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ ret = aehd_getput_regs(x86_cpu, 1);
+ if (ret < 0) {
+ return ret;
+ }
+ ret = aehd_put_xsave(x86_cpu);
+ if (ret < 0) {
+ return ret;
+ }
+ ret = aehd_put_xcrs(x86_cpu);
+ if (ret < 0) {
+ return ret;
+ }
+ ret = aehd_put_sregs(x86_cpu);
+ if (ret < 0) {
+ return ret;
+ }
+ ret = aehd_put_msrs(x86_cpu, level);
+ if (ret < 0) {
+ return ret;
+ }
+ if (level >= AEHD_PUT_RESET_STATE) {
+ ret = aehd_put_mp_state(x86_cpu);
+ if (ret < 0) {
+ return ret;
+ }
+ ret = aehd_put_apic(x86_cpu);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ ret = aehd_put_tscdeadline_msr(x86_cpu);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = aehd_put_vcpu_events(x86_cpu, level);
+ if (ret < 0) {
+ return ret;
+ }
+ ret = aehd_put_debugregs(x86_cpu);
+ if (ret < 0) {
+ return ret;
+ }
+ return 0;
+}
+
+int aehd_arch_get_registers(CPUState *cs)
+{
+ X86CPU *cpu = X86_CPU(cs);
+ int ret;
+
+ assert(cpu_is_stopped(cs) || qemu_cpu_is_self(cs));
+
+ ret = aehd_getput_regs(cpu, 0);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = aehd_get_xsave(cpu);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = aehd_get_xcrs(cpu);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = aehd_get_sregs(cpu);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = aehd_get_msrs(cpu);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = aehd_get_mp_state(cpu);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = aehd_get_apic(cpu);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = aehd_get_vcpu_events(cpu);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = aehd_get_debugregs(cpu);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = 0;
+ out:
+ cpu_sync_bndcs_hflags(&cpu->env);
+ return ret;
+}
+
+void aehd_arch_pre_run(CPUState *cpu, struct aehd_run *run)
+{
+ X86CPU *x86_cpu = X86_CPU(cpu);
+ CPUX86State *env = &x86_cpu->env;
+ int ret;
+
+ /* Inject NMI */
+ if (cpu->interrupt_request & (CPU_INTERRUPT_NMI | CPU_INTERRUPT_SMI)) {
+ if (cpu->interrupt_request & CPU_INTERRUPT_NMI) {
+ qemu_mutex_lock_iothread();
+ cpu->interrupt_request &= ~CPU_INTERRUPT_NMI;
+ qemu_mutex_unlock_iothread();
+ DPRINTF("injected NMI\n");
+ ret = aehd_vcpu_ioctl(cpu, AEHD_NMI, NULL, 0, NULL, 0);
+ if (ret < 0) {
+ fprintf(stderr, "AEHD: injection failed, NMI lost (%s)\n",
+ strerror(-ret));
+ }
+ }
+ if (cpu->interrupt_request & CPU_INTERRUPT_SMI) {
+ qemu_mutex_lock_iothread();
+ cpu->interrupt_request &= ~CPU_INTERRUPT_SMI;
+ qemu_mutex_unlock_iothread();
+ DPRINTF("injected SMI\n");
+ ret = aehd_vcpu_ioctl(cpu, AEHD_SMI, NULL, 0, NULL, 0);
+ if (ret < 0) {
+ fprintf(stderr, "AEHD: injection failed, SMI lost (%s)\n",
+ strerror(-ret));
+ }
+ }
+ }
+
+ /*
+ * Force the VCPU out of its inner loop to process any INIT requests
+ * or (for userspace APIC, but it is cheap to combine the checks here)
+ * pending TPR access reports.
+ */
+ if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) {
+ if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) &&
+ !(env->hflags & HF_SMM_MASK)) {
+ cpu->exit_request = 1;
+ }
+ if (cpu->interrupt_request & CPU_INTERRUPT_TPR) {
+ cpu->exit_request = 1;
+ }
+ }
+}
+
+MemTxAttrs aehd_arch_post_run(CPUState *cpu, struct aehd_run *run)
+{
+ X86CPU *x86_cpu = X86_CPU(cpu);
+ CPUX86State *env = &x86_cpu->env;
+
+ if (run->flags & AEHD_RUN_X86_SMM) {
+ env->hflags |= HF_SMM_MASK;
+ } else {
+ env->hflags &= ~HF_SMM_MASK;
+ }
+ if (run->if_flag) {
+ env->eflags |= IF_MASK;
+ } else {
+ env->eflags &= ~IF_MASK;
+ }
+
+ cpu_set_apic_tpr(x86_cpu->apic_state, run->cr8);
+ cpu_set_apic_base(x86_cpu->apic_state, run->apic_base);
+
+ return cpu_get_mem_attrs(env);
+}
+
+int aehd_arch_process_async_events(CPUState *cs)
+{
+ X86CPU *cpu = X86_CPU(cs);
+ CPUX86State *env = &cpu->env;
+
+ if (cs->interrupt_request & CPU_INTERRUPT_MCE) {
+ /* We must not raise CPU_INTERRUPT_MCE if it's not supported. */
+ assert(env->mcg_cap);
+
+ cs->interrupt_request &= ~CPU_INTERRUPT_MCE;
+
+ aehd_cpu_synchronize_state(cs);
+
+ if (env->exception_injected == EXCP08_DBLE) {
+ /* this means triple fault */
+ qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
+ cs->exit_request = 1;
+ return 0;
+ }
+ env->exception_injected = EXCP12_MCHK;
+ env->has_error_code = 0;
+
+ cs->halted = 0;
+ if (env->mp_state == AEHD_MP_STATE_HALTED) {
+ env->mp_state = AEHD_MP_STATE_RUNNABLE;
+ }
+ }
+
+ if ((cs->interrupt_request & CPU_INTERRUPT_INIT) &&
+ !(env->hflags & HF_SMM_MASK)) {
+ aehd_cpu_synchronize_state(cs);
+ do_cpu_init(cpu);
+ }
+
+ return 0;
+}
+
+static int aehd_handle_halt(X86CPU *cpu)
+{
+ CPUState *cs = CPU(cpu);
+ CPUX86State *env = &cpu->env;
+
+ if (!((cs->interrupt_request & CPU_INTERRUPT_HARD) &&
+ (env->eflags & IF_MASK)) &&
+ !(cs->interrupt_request & CPU_INTERRUPT_NMI)) {
+ cs->halted = 1;
+ return EXCP_HLT;
+ }
+
+ return 0;
+}
+
+static int aehd_handle_tpr_access(X86CPU *cpu)
+{
+ CPUState *cs = CPU(cpu);
+ struct aehd_run *run = cs->aehd_run;
+
+ apic_handle_tpr_access_report(cpu->apic_state, run->tpr_access.rip,
+ run->tpr_access.is_write ? TPR_ACCESS_WRITE
+ : TPR_ACCESS_READ);
+ return 1;
+}
+
+static bool host_supports_vmx(void)
+{
+ uint32_t ecx, unused;
+
+ host_cpuid(1, 0, &unused, &unused, &ecx, &unused);
+ return ecx & CPUID_EXT_VMX;
+}
+
+#define VMX_INVALID_GUEST_STATE 0x80000021
+
+int aehd_arch_handle_exit(CPUState *cs, struct aehd_run *run)
+{
+ X86CPU *cpu = X86_CPU(cs);
+ uint64_t code;
+ int ret;
+
+ switch (run->exit_reason) {
+ case AEHD_EXIT_HLT:
+ DPRINTF("handle_hlt\n");
+ qemu_mutex_lock_iothread();
+ ret = aehd_handle_halt(cpu);
+ qemu_mutex_unlock_iothread();
+ break;
+ case AEHD_EXIT_SET_TPR:
+ ret = 0;
+ break;
+ case AEHD_EXIT_TPR_ACCESS:
+ qemu_mutex_lock_iothread();
+ ret = aehd_handle_tpr_access(cpu);
+ qemu_mutex_unlock_iothread();
+ break;
+ case AEHD_EXIT_FAIL_ENTRY:
+ code = run->fail_entry.hardware_entry_failure_reason;
+ fprintf(stderr, "AEHD: entry failed, hardware error 0x%" PRIx64 "\n",
+ code);
+ if (host_supports_vmx() && code == VMX_INVALID_GUEST_STATE) {
+ fprintf(stderr,
+ "\nIf you're running a guest on an Intel machine without "
+ "unrestricted mode\n"
+ "support, the failure can be most likely due to the guest "
+ "entering an invalid\n"
+ "state for Intel VT. For example, the guest maybe running "
+ "in big real mode\n"
+ "which is not supported on less recent Intel processors."
+ "\n\n");
+ }
+ ret = -1;
+ break;
+ case AEHD_EXIT_EXCEPTION:
+ fprintf(stderr, "AEHD: exception %d exit (error code 0x%x)\n",
+ run->ex.exception, run->ex.error_code);
+ ret = -1;
+ break;
+ case AEHD_EXIT_IOAPIC_EOI:
+ ioapic_eoi_broadcast(run->eoi.vector);
+ ret = 0;
+ break;
+ default:
+ fprintf(stderr, "AEHD: unknown exit reason %d\n", run->exit_reason);
+ ret = -1;
+ break;
+ }
+
+ return ret;
+}
+
+bool aehd_arch_stop_on_emulation_error(CPUState *cs)
+{
+ X86CPU *cpu = X86_CPU(cs);
+ CPUX86State *env = &cpu->env;
+
+ aehd_cpu_synchronize_state(cs);
+ return !(env->cr[0] & CR0_PE_MASK) ||
+ ((env->segs[R_CS].selector & 3) != 3);
+}
+
+int aehd_arch_irqchip_create(MachineState *ms, AEHDState *s)
+{
+ return 0;
+}
+
typedef struct MSIRouteEntry MSIRouteEntry;
struct MSIRouteEntry {
diff --git a/target/i386/aehd/aehd_i386.h b/target/i386/aehd/aehd_i386.h
new file mode 100644
index 0000000000..622a9b1d63
--- /dev/null
+++ b/target/i386/aehd/aehd_i386.h
@@ -0,0 +1,26 @@
+/*
+ * QEMU AEHD support
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * Copyright (c) 2017 Intel Corporation
+ * Written by:
+ * Haitao Shan <hshan@google.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_AEHD_I386_H
+#define QEMU_AEHD_I386_H
+
+#include "sysemu/aehd.h"
+
+void aehd_arch_reset_vcpu(X86CPU *cs);
+void aehd_arch_do_init_vcpu(X86CPU *cs);
+
+#endif /*QEMU_AEHD_I386_H */
diff --git a/target/i386/aehd/aehd_int.h b/target/i386/aehd/aehd_int.h
index 113f3ebf8e..738dfa72ad 100644
--- a/target/i386/aehd/aehd_int.h
+++ b/target/i386/aehd/aehd_int.h
@@ -45,6 +45,6 @@ struct AEHDState {
};
void aehd_memory_listener_register(AEHDState *s, AEHDMemoryListener *kml,
- AddressSpace *as, int as_id);
+ AddressSpace *as, int as_id);
#endif
diff --git a/target/i386/aehd/meson.build b/target/i386/aehd/meson.build
index 50880712db..b4c6364e96 100644
--- a/target/i386/aehd/meson.build
+++ b/target/i386/aehd/meson.build
@@ -1,4 +1,8 @@
+i386_ss.add(when: 'CONFIG_AEHD', if_false: files('aehd-stub.c'))
+
i386_softmmu_ss.add(when: 'CONFIG_AEHD', if_true: files(
'aehd-all.c',
'aehd.c',
+ 'aehd-cpu.c',
+ 'aehd-accel-ops.c',
))
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index f3fbb0bf8b..83c250e64d 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -27,6 +27,7 @@
#include "sysemu/reset.h"
#include "sysemu/hvf.h"
#include "kvm/kvm_i386.h"
+#include "aehd/aehd_i386.h"
#include "sev.h"
#include "qapi/error.h"
#include "qapi/qapi-visit-machine.h"
@@ -1487,7 +1488,7 @@ uint32_t xsave_area_size(uint64_t mask, bool compacted)
static inline bool accel_uses_host_cpuid(void)
{
- return kvm_enabled() || hvf_enabled();
+ return kvm_enabled() || hvf_enabled() || aehd_enabled();
}
static inline uint64_t x86_cpu_xsave_xcr0_components(X86CPU *cpu)
@@ -5067,6 +5068,13 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord
w,
wi->msr.index);
break;
}
+ } else if (aehd_enabled()) {
+ if (wi->type != CPUID_FEATURE_WORD) {
+ return 0;
+ }
+ r = aehd_arch_get_supported_cpuid(aehd_state, wi->cpuid.eax,
+ wi->cpuid.ecx,
+ wi->cpuid.reg);
} else if (hvf_enabled()) {
if (wi->type != CPUID_FEATURE_WORD) {
return 0;
@@ -6161,6 +6169,8 @@ static void x86_cpu_reset_hold(Object *obj)
if (kvm_enabled()) {
kvm_arch_reset_vcpu(cpu);
+ } else if (aehd_enabled()) {
+ aehd_arch_reset_vcpu(cpu);
}
x86_cpu_set_sgxlepubkeyhash(env);
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index ea650e68a3..4b18f5442f 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1793,7 +1793,7 @@ typedef struct CPUArchState {
int64_t user_tsc_khz; /* for sanity check only */
uint64_t apic_bus_freq;
uint64_t tsc;
-#if defined(CONFIG_KVM) || defined(CONFIG_HVF)
+#if defined(CONFIG_KVM) || defined(CONFIG_AEHD) || defined(CONFIG_HVF)
void *xsave_buf;
uint32_t xsave_buf_len;
#endif
@@ -1825,6 +1825,7 @@ typedef struct CPUArchState {
} CPUX86State;
struct kvm_msrs;
+struct aehd_msrs;
/**
* X86CPU:
@@ -1968,6 +1969,8 @@ struct ArchCPU {
struct kvm_msrs *kvm_msr_buf;
+ struct aehd_msrs *aehd_msr_buf;
+
int32_t node_id; /* NUMA node this CPU belongs to */
int32_t socket_id;
int32_t die_id;
diff --git a/target/i386/helper.c b/target/i386/helper.c
index 8857444819..fbf7b12d7a 100644
--- a/target/i386/helper.c
+++ b/target/i386/helper.c
@@ -23,6 +23,7 @@
#include "exec/exec-all.h"
#include "sysemu/runstate.h"
#include "kvm/kvm_i386.h"
+#include "aehd/aehd_i386.h"
#ifndef CONFIG_USER_ONLY
#include "sysemu/hw_accel.h"
#include "monitor/monitor.h"
@@ -596,6 +597,8 @@ void do_cpu_init(X86CPU *cpu)
if (kvm_enabled()) {
kvm_arch_do_init_vcpu(cpu);
+ } else if (aehd_enabled()) {
+ aehd_arch_do_init_vcpu(cpu);
}
apic_init_reset(cpu->apic_state);
}
diff --git a/target/i386/meson.build b/target/i386/meson.build
index 76a90b73d5..57ddfbd4a4 100644
--- a/target/i386/meson.build
+++ b/target/i386/meson.build
@@ -10,6 +10,7 @@ i386_ss.add(when: 'CONFIG_SEV', if_true: files('host-cpu.c'))
# x86 cpu type
i386_ss.add(when: 'CONFIG_KVM', if_true: files('host-cpu.c'))
+i386_ss.add(when: 'CONFIG_AEHD', if_true: files('host-cpu.c'))
i386_ss.add(when: 'CONFIG_HVF', if_true: files('host-cpu.c'))
i386_softmmu_ss = ss.source_set()
--
2.40.0.rc0.216.gc4246ad0f0-goog
- [PATCH 1/6] Add the Android Emulator hypervisor driver (AEHD) accelerator., Haitao Shan, 2023/03/02
- [PATCH 2/6] Add a few AEHD headers., Haitao Shan, 2023/03/02
- [PATCH 3/6] Add the aehd-apic device type., Haitao Shan, 2023/03/02
- [PATCH 4/6] Add the aehd-ioapic device type., Haitao Shan, 2023/03/02
- [PATCH 5/6] Add the aehd-i8259 device type., Haitao Shan, 2023/03/02
- [PATCH 6/6] Add the AEHD implementation.,
Haitao Shan <=
- Re: [PATCH 1/6] Add the Android Emulator hypervisor driver (AEHD) accelerator., Michael S. Tsirkin, 2023/03/03
Re: [PATCH 1/6] Add the Android Emulator hypervisor driver (AEHD) accelerator., Haitao Shan, 2023/03/03