[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH] RFC: vmcoreinfo device
From: |
Marc-André Lureau |
Subject: |
[Qemu-devel] [PATCH] RFC: vmcoreinfo device |
Date: |
Mon, 24 Apr 2017 17:03:55 +0400 |
The VM coreinfo (vmcoreinfo) device is an emulated device which
exposes a 4k memory range to the guest to store various informations
useful to debug the guest OS. (it is greatly inspired by the VMGENID
device implementation)
This is an early-boot alternative to the qemu-ga VMDUMP_INFO event
proposed in "[PATCH 00/21] WIP: dump: add kaslr support".
If deemed more appropriate, we can consider writing to fw_cfg directly
instead of guest memory, now that qemu 2.9 supports it again.
The proof-of-concept kernel module:
https://github.com/elmarco/vmgenid-test/blob/master/qemuvmci-test.c
Signed-off-by: Marc-André Lureau <address@hidden>
---
include/hw/acpi/aml-build.h | 1 +
include/hw/acpi/vmcoreinfo.h | 36 +++++++
include/hw/compat.h | 4 +
hw/acpi/aml-build.c | 2 +
hw/acpi/vmcoreinfo.c | 207 +++++++++++++++++++++++++++++++++++++
hw/i386/acpi-build.c | 14 +++
default-configs/i386-softmmu.mak | 1 +
default-configs/x86_64-softmmu.mak | 1 +
docs/specs/vmcoreinfo.txt | 138 +++++++++++++++++++++++++
hw/acpi/Makefile.objs | 1 +
10 files changed, 405 insertions(+)
create mode 100644 include/hw/acpi/vmcoreinfo.h
create mode 100644 hw/acpi/vmcoreinfo.c
create mode 100644 docs/specs/vmcoreinfo.txt
diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h
index 00c21f160c..fd479115e1 100644
--- a/include/hw/acpi/aml-build.h
+++ b/include/hw/acpi/aml-build.h
@@ -211,6 +211,7 @@ struct AcpiBuildTables {
GArray *rsdp;
GArray *tcpalog;
GArray *vmgenid;
+ GArray *vmcoreinfo;
BIOSLinker *linker;
} AcpiBuildTables;
diff --git a/include/hw/acpi/vmcoreinfo.h b/include/hw/acpi/vmcoreinfo.h
new file mode 100644
index 0000000000..63196aeee0
--- /dev/null
+++ b/include/hw/acpi/vmcoreinfo.h
@@ -0,0 +1,36 @@
+#ifndef ACPI_VMCOREINFO_H
+#define ACPI_VMCOREINFO_H
+
+#include "hw/acpi/bios-linker-loader.h"
+#include "hw/qdev.h"
+
+#define VMCOREINFO_DEVICE "vmcoreinfo"
+#define VMCOREINFO_FW_CFG_FILE "etc/vmcoreinfo"
+#define VMCOREINFO_ADDR_FW_CFG_FILE "etc/vmcoreinfo-addr"
+
+#define VMCOREINFO_FW_CFG_SIZE 4096 /* Occupy a page of memory */
+#define VMCOREINFO_OFFSET 40 /* allow space for
+ * OVMF SDT Header Probe Supressor
+ */
+
+#define VMCOREINFO(obj) OBJECT_CHECK(VmcoreinfoState, (obj), VMCOREINFO_DEVICE)
+
+typedef struct VmcoreinfoState {
+ DeviceClass parent_obj;
+ uint8_t vmcoreinfo_addr_le[8]; /* Address of memory region */
+ bool write_pointer_available;
+} VmcoreinfoState;
+
+/* returns NULL unless there is exactly one device */
+static inline Object *find_vmcoreinfo_dev(void)
+{
+ return object_resolve_path_type("", VMCOREINFO_DEVICE, NULL);
+}
+
+void vmcoreinfo_build_acpi(VmcoreinfoState *vis, GArray *table_data,
+ GArray *vmci, BIOSLinker *linker);
+void vmcoreinfo_add_fw_cfg(VmcoreinfoState *vis, FWCfgState *s, GArray *vmci);
+bool vmcoreinfo_get(VmcoreinfoState *vis, uint64_t *paddr, uint32_t *size,
+ Error **errp);
+
+#endif
diff --git a/include/hw/compat.h b/include/hw/compat.h
index 5d5be91daf..d0c9b71902 100644
--- a/include/hw/compat.h
+++ b/include/hw/compat.h
@@ -135,6 +135,10 @@
.driver = "vmgenid",\
.property = "x-write-pointer-available",\
.value = "off",\
+ },{\
+ .driver = "vmcoreinfo",\
+ .property = "x-write-pointer-available",\
+ .value = "off",\
},
#define HW_COMPAT_2_3 \
diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index c6f2032dec..cd639586a4 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -1560,6 +1560,7 @@ void acpi_build_tables_init(AcpiBuildTables *tables)
tables->table_data = g_array_new(false, true /* clear */, 1);
tables->tcpalog = g_array_new(false, true /* clear */, 1);
tables->vmgenid = g_array_new(false, true /* clear */, 1);
+ tables->vmcoreinfo = g_array_new(false, true /* clear */, 1);
tables->linker = bios_linker_loader_init();
}
@@ -1570,6 +1571,7 @@ void acpi_build_tables_cleanup(AcpiBuildTables *tables,
bool mfre)
g_array_free(tables->table_data, true);
g_array_free(tables->tcpalog, mfre);
g_array_free(tables->vmgenid, mfre);
+ g_array_free(tables->vmcoreinfo, mfre);
}
/* Build rsdt table */
diff --git a/hw/acpi/vmcoreinfo.c b/hw/acpi/vmcoreinfo.c
new file mode 100644
index 0000000000..dec4feac1e
--- /dev/null
+++ b/hw/acpi/vmcoreinfo.c
@@ -0,0 +1,207 @@
+/*
+ * Virtual Machine coreinfo device
+ * (based on Virtual Machine Generation ID Device)
+ *
+ * Copyright (C) 2017 Red Hat, Inc.
+ * Copyright (C) 2017 Skyport Systems.
+ *
+ * Authors: Marc-André Lureau <address@hidden>
+ * Ben Warren <address@hidden>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+#include "qemu/osdep.h"
+#include "hw/acpi/acpi.h"
+#include "hw/acpi/aml-build.h"
+#include "hw/acpi/vmcoreinfo.h"
+#include "hw/nvram/fw_cfg.h"
+#include "sysemu/sysemu.h"
+#include "qapi/error.h"
+
+void vmcoreinfo_build_acpi(VmcoreinfoState *vis, GArray *table_data,
+ GArray *vmci, BIOSLinker *linker)
+{
+ Aml *ssdt, *dev, *scope, *method, *addr, *if_ctx;
+ uint32_t vgia_offset;
+
+ g_array_set_size(vmci, VMCOREINFO_FW_CFG_SIZE);
+
+ /* Put this in a separate SSDT table */
+ ssdt = init_aml_allocator();
+
+ /* Reserve space for header */
+ acpi_data_push(ssdt->buf, sizeof(AcpiTableHeader));
+
+ /* Storage address */
+ vgia_offset = table_data->len +
+ build_append_named_dword(ssdt->buf, "VCIA");
+ scope = aml_scope("\\_SB");
+ dev = aml_device("VMCI");
+ aml_append(dev, aml_name_decl("_HID", aml_string("QEMUVMCI")));
+
+ /* Simple status method to check that address is linked and non-zero */
+ method = aml_method("_STA", 0, AML_NOTSERIALIZED);
+ addr = aml_local(0);
+ aml_append(method, aml_store(aml_int(0xf), addr));
+ if_ctx = aml_if(aml_equal(aml_name("VCIA"), aml_int(0)));
+ aml_append(if_ctx, aml_store(aml_int(0), addr));
+ aml_append(method, if_ctx);
+ aml_append(method, aml_return(addr));
+ aml_append(dev, method);
+
+ /* the ADDR method returns two 32-bit words representing the lower and
+ * upper halves of the physical address of the vmcoreinfo area
+ */
+ method = aml_method("ADDR", 0, AML_NOTSERIALIZED);
+
+ addr = aml_local(0);
+ aml_append(method, aml_store(aml_package(2), addr));
+
+ aml_append(method, aml_store(aml_add(aml_name("VCIA"),
+ aml_int(VMCOREINFO_OFFSET), NULL),
+ aml_index(addr, aml_int(0))));
+ aml_append(method, aml_store(aml_int(0), aml_index(addr, aml_int(1))));
+ aml_append(method, aml_return(addr));
+
+ aml_append(dev, method);
+ aml_append(scope, dev);
+ aml_append(ssdt, scope);
+
+ g_array_append_vals(table_data, ssdt->buf->data, ssdt->buf->len);
+
+ /* Allocate guest memory */
+ bios_linker_loader_alloc(linker, VMCOREINFO_FW_CFG_FILE, vmci, 4096,
+ false /* page boundary, high memory */);
+
+ /* Patch address of vmcoreinfo fw_cfg blob into the ADDR fw_cfg
+ * blob so QEMU can read the info from there. The address is
+ * expected to be < 4GB, but write 64 bits anyway.
+ * The address that is patched in is offset in order to implement
+ * the "OVMF SDT Header probe suppressor"
+ * see docs/specs/vmcoreinfo.txt for more details.
+ */
+ bios_linker_loader_write_pointer(linker,
+ VMCOREINFO_ADDR_FW_CFG_FILE, 0, sizeof(uint64_t),
+ VMCOREINFO_FW_CFG_FILE, VMCOREINFO_OFFSET);
+
+ /* Patch address of vmcoreinfo into the AML so OSPM can retrieve
+ * and read it. Note that while we provide storage for 64 bits, only
+ * the least-signficant 32 get patched into AML.
+ */
+ bios_linker_loader_add_pointer(linker,
+ ACPI_BUILD_TABLE_FILE, vgia_offset, sizeof(uint32_t),
+ VMCOREINFO_FW_CFG_FILE, 0);
+
+ build_header(linker, table_data,
+ (void *)(table_data->data + table_data->len - ssdt->buf->len),
+ "SSDT", ssdt->buf->len, 1, NULL, "VMCOREIN");
+ free_aml_allocator();
+}
+
+void vmcoreinfo_add_fw_cfg(VmcoreinfoState *vis, FWCfgState *s, GArray *vmci)
+{
+ /* Create a read-only fw_cfg file for vmcoreinfo allocation */
+ /* XXX: linker could learn to allocate without backing fw_cfg? */
+ fw_cfg_add_file(s, VMCOREINFO_FW_CFG_FILE, vmci->data,
+ VMCOREINFO_FW_CFG_SIZE);
+ /* Create a read-write fw_cfg file for Address */
+ fw_cfg_add_file_callback(s, VMCOREINFO_ADDR_FW_CFG_FILE, NULL, NULL,
+ vis->vmcoreinfo_addr_le,
+ ARRAY_SIZE(vis->vmcoreinfo_addr_le), false);
+}
+
+bool vmcoreinfo_get(VmcoreinfoState *vis,
+ uint64_t *paddr, uint32_t *size,
+ Error **errp)
+{
+ uint32_t vmcoreinfo_addr;
+ uint32_t version;
+
+ assert(vis);
+ assert(paddr);
+ assert(size);
+
+ memcpy(&vmcoreinfo_addr, vis->vmcoreinfo_addr_le, sizeof(vmcoreinfo_addr));
+ vmcoreinfo_addr = le32_to_cpu(vmcoreinfo_addr);
+ if (!vmcoreinfo_addr) {
+ error_setg(errp, "BIOS has not yet written the address of %s",
+ VMCOREINFO_DEVICE);
+ return false;
+ }
+
+ cpu_physical_memory_read(vmcoreinfo_addr, &version, sizeof(version));
+ if (version != 0) {
+ error_setg(errp, "Unknown %s memory version", VMCOREINFO_DEVICE);
+ return false;
+ }
+
+ cpu_physical_memory_read(vmcoreinfo_addr + 4, paddr, sizeof(paddr));
+ *paddr = le64_to_cpu(*paddr);
+ cpu_physical_memory_read(vmcoreinfo_addr + 12, size, sizeof(size));
+ *size = le32_to_cpu(*size);
+
+ return true;
+}
+
+static const VMStateDescription vmstate_vmcoreinfo = {
+ .name = "vmcoreinfo",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT8_ARRAY(vmcoreinfo_addr_le, VmcoreinfoState,
sizeof(uint64_t)),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static Property vmcoreinfo_properties[] = {
+ DEFINE_PROP_BOOL("x-write-pointer-available", VmcoreinfoState,
+ write_pointer_available, true),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void vmcoreinfo_realize(DeviceState *dev, Error **errp)
+{
+ VmcoreinfoState *vms = VMCOREINFO(dev);
+
+ if (!vms->write_pointer_available) {
+ error_setg(errp, "%s requires DMA write support in fw_cfg, "
+ "which this machine type does not provide",
+ VMCOREINFO_DEVICE);
+ return;
+ }
+
+ /* Given that this function is executing, there is at least one VMCOREINFO
+ * device. Check if there are several.
+ */
+ if (!find_vmcoreinfo_dev()) {
+ error_setg(errp, "at most one %s device is permitted",
+ VMCOREINFO_DEVICE);
+ return;
+ }
+}
+
+static void vmcoreinfo_device_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->vmsd = &vmstate_vmcoreinfo;
+ dc->realize = vmcoreinfo_realize;
+ dc->hotpluggable = false;
+ dc->props = vmcoreinfo_properties;
+}
+
+static const TypeInfo vmcoreinfo_device_info = {
+ .name = VMCOREINFO_DEVICE,
+ .parent = TYPE_DEVICE,
+ .instance_size = sizeof(VmcoreinfoState),
+ .class_init = vmcoreinfo_device_class_init,
+};
+
+static void vmcoreinfo_register_types(void)
+{
+ type_register_static(&vmcoreinfo_device_info);
+}
+
+type_init(vmcoreinfo_register_types)
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 2073108577..97f04401c4 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -43,6 +43,7 @@
#include "sysemu/tpm.h"
#include "hw/acpi/tpm.h"
#include "hw/acpi/vmgenid.h"
+#include "hw/acpi/vmcoreinfo.h"
#include "sysemu/tpm_backend.h"
#include "hw/timer/mc146818rtc_regs.h"
#include "sysemu/numa.h"
@@ -2612,6 +2613,7 @@ void acpi_build(AcpiBuildTables *tables, MachineState
*machine)
GArray *tables_blob = tables->table_data;
AcpiSlicOem slic_oem = { .id = NULL, .table_id = NULL };
Object *vmgenid_dev;
+ Object *vmcoreinfo_dev;
acpi_get_pm_info(&pm);
acpi_get_misc_info(&misc);
@@ -2661,6 +2663,12 @@ void acpi_build(AcpiBuildTables *tables, MachineState
*machine)
vmgenid_build_acpi(VMGENID(vmgenid_dev), tables_blob,
tables->vmgenid, tables->linker);
}
+ vmcoreinfo_dev = find_vmcoreinfo_dev();
+ if (vmcoreinfo_dev) {
+ acpi_add_table(table_offsets, tables_blob);
+ vmcoreinfo_build_acpi(VMCOREINFO(vmcoreinfo_dev), tables_blob,
+ tables->vmcoreinfo, tables->linker);
+ }
if (misc.has_hpet) {
acpi_add_table(table_offsets, tables_blob);
@@ -2833,6 +2841,7 @@ void acpi_setup(void)
AcpiBuildTables tables;
AcpiBuildState *build_state;
Object *vmgenid_dev;
+ Object *vmcoreinfo_dev;
if (!pcms->fw_cfg) {
ACPI_BUILD_DPRINTF("No fw cfg. Bailing out.\n");
@@ -2874,6 +2883,11 @@ void acpi_setup(void)
vmgenid_add_fw_cfg(VMGENID(vmgenid_dev), pcms->fw_cfg,
tables.vmgenid);
}
+ vmcoreinfo_dev = find_vmcoreinfo_dev();
+ if (vmcoreinfo_dev) {
+ vmcoreinfo_add_fw_cfg(VMCOREINFO(vmcoreinfo_dev), pcms->fw_cfg,
+ tables.vmcoreinfo);
+ }
if (!pcmc->rsdp_in_ram) {
/*
diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak
index 029e95202a..5b0acdb715 100644
--- a/default-configs/i386-softmmu.mak
+++ b/default-configs/i386-softmmu.mak
@@ -60,3 +60,4 @@ CONFIG_SMBIOS=y
CONFIG_HYPERV_TESTDEV=$(CONFIG_KVM)
CONFIG_PXB=y
CONFIG_ACPI_VMGENID=y
+CONFIG_ACPI_VMCOREINFO=y
diff --git a/default-configs/x86_64-softmmu.mak
b/default-configs/x86_64-softmmu.mak
index d1d7432f74..71033e26fa 100644
--- a/default-configs/x86_64-softmmu.mak
+++ b/default-configs/x86_64-softmmu.mak
@@ -60,3 +60,4 @@ CONFIG_SMBIOS=y
CONFIG_HYPERV_TESTDEV=$(CONFIG_KVM)
CONFIG_PXB=y
CONFIG_ACPI_VMGENID=y
+CONFIG_ACPI_VMCOREINFO=y
diff --git a/docs/specs/vmcoreinfo.txt b/docs/specs/vmcoreinfo.txt
new file mode 100644
index 0000000000..70d9716fe0
--- /dev/null
+++ b/docs/specs/vmcoreinfo.txt
@@ -0,0 +1,138 @@
+VIRTUAL MACHINE COREINFO DEVICE
+===============================
+
+Copyright (C) 2017 Red Hat, Inc.
+
+This work is licensed under the terms of the GNU GPL, version 2 or later.
+See the COPYING file in the top-level directory.
+
+===
+
+The VM coreinfo (vmcoreinfo) device is an emulated device which
+exposes a 4k memory range to the guest to store various informations
+useful to debug the guest OS.
+
+QEMU Implementation
+-------------------
+
+The vmcoreinfo device is put in its own ACPI descriptor table, in a
+Secondary System Description Table, or SSDT.
+
+The following is a dump of the contents from a running system:
+
+# iasl -p ./SSDT -d /sys/firmware/acpi/tables/SSDT
+/*
+ * Intel ACPI Component Architecture
+ * AML/ASL+ Disassembler version 20160831-64
+ * Copyright (c) 2000 - 2016 Intel Corporation
+ *
+ * Disassembling to symbolic ASL+ operators
+ *
+ * Disassembly of /sys/firmware/acpi/tables/SSDT, Mon Apr 24 15:59:53 2017
+ *
+ * Original Table Header:
+ * Signature "SSDT"
+ * Length 0x00000086 (134)
+ * Revision 0x01
+ * Checksum 0x5C
+ * OEM ID "BOCHS "
+ * OEM Table ID "VMCOREIN"
+ * OEM Revision 0x00000001 (1)
+ * Compiler ID "BXPC"
+ * Compiler Version 0x00000001 (1)
+ */
+DefinitionBlock ("", "SSDT", 1, "BOCHS ", "VMCOREIN", 0x00000001)
+{
+ Name (VCIA, 0x3FFFF000)
+ Scope (\_SB)
+ {
+ Device (VMCI)
+ {
+ Name (_HID, "QEMUVMCI") // _HID: Hardware ID
+ Method (_STA, 0, NotSerialized) // _STA: Status
+ {
+ Local0 = 0x0F
+ If (VCIA == Zero)
+ {
+ Local0 = Zero
+ }
+
+ Return (Local0)
+ }
+
+ Method (ADDR, 0, NotSerialized)
+ {
+ Local0 = Package (0x02) {}
+ Local0 [Zero] = (VCIA + 0x28)
+ Local0 [One] = Zero
+ Return (Local0)
+ }
+ }
+ }
+}
+
+
+Design Details:
+---------------
+
+QEMU must be able to read the contents of the device memory,
+specifically when starting a memory dump. In order to do this, QEMU
+must know the address that has been allocated.
+
+The mechanism chosen for this memory sharing is writeable fw_cfg blobs.
+These are data object that are visible to both QEMU and guests, and are
+addressable as sequential files.
+
+More information about fw_cfg can be found in "docs/specs/fw_cfg.txt"
+
+Two fw_cfg blobs are used in this case:
+
+/etc/vmcoreinfo - used to allocate memory range, read-only to the guest
+/etc/vmcoreinfo-addr - contains the address of the allocated range
+ - writeable by the guest
+
+
+QEMU sends the following commands to the guest at startup:
+
+1. Allocate memory for vmcoreinfo fw_cfg blob.
+2. Write the address of vmcoreinfo into the SSDT (VCIA ACPI variable as
+ shown above in the iasl dump). Note that this change is not propagated
+ back to QEMU.
+3. Write the address of vmcoreinfo back to QEMU's copy of vmcoreinfo-addr
+ via the fw_cfg DMA interface.
+
+After step 3, QEMU is able to read the contents of vmcoreinfo.
+
+The value of VCIA is persisted via the VMState mechanism.
+
+
+Storage Format:
+---------------
+
+The content is expected to use little-endian format.
+
+In order to implement an OVMF "SDT Header Probe Suppressor", the contents of
+the vmcoreinfo blob has 40 bytes of padding:
+
++-----------------------------------+
+| SSDT with OEM Table ID = VMCOREIN |
++-----------------------------------+
+| ... | TOP OF PAGE
+| VCIA dword object ----------------|-----> +---------------------------+
+| ... | | fw-allocated array for |
+| _STA method referring to VCIA | | "etc/vmcoreinfo" |
+| ... | +---------------------------+
+| ADDR method referring to VCIA | | 0: OVMF SDT Header probe |
+| ... | | suppressor |
++-----------------------------------+ | 40: uint32 version field |
+ | 44: info contents |
+ | .... |
+ +---------------------------+
+ END OF PAGE
+
+Version 0 content:
+
+ uint64 paddr:
+ Physical address of the Linux vmcoreinfo ELF note.
+ uint32 size:
+ Size of the vmcoreinfo ELF note.
diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs
index 11c35bcb44..9623078f95 100644
--- a/hw/acpi/Makefile.objs
+++ b/hw/acpi/Makefile.objs
@@ -6,6 +6,7 @@ common-obj-$(CONFIG_ACPI_MEMORY_HOTPLUG) += memory_hotplug.o
common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu.o
common-obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o
common-obj-$(CONFIG_ACPI_VMGENID) += vmgenid.o
+common-obj-$(CONFIG_ACPI_VMCOREINFO) += vmcoreinfo.o
common-obj-$(call lnot,$(CONFIG_ACPI_X86)) += acpi-stub.o
common-obj-y += acpi_interface.o
--
2.12.0.191.gc5d8de91d
- [Qemu-devel] [PATCH] RFC: vmcoreinfo device,
Marc-André Lureau <=