[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH v3 23/32] nvdimm: build ACPI NFIT table
From: |
Michael S. Tsirkin |
Subject: |
Re: [Qemu-devel] [PATCH v3 23/32] nvdimm: build ACPI NFIT table |
Date: |
Mon, 12 Oct 2015 14:27:08 +0300 |
On Sun, Oct 11, 2015 at 11:52:55AM +0800, Xiao Guangrong wrote:
> NFIT is defined in ACPI 6.0: 5.2.25 NVDIMM Firmware Interface Table (NFIT)
>
> Currently, we only support PMEM mode. Each device has 3 structures:
> - SPA structure, defines the PMEM region info
>
> - MEM DEV structure, it has the @handle which is used to associate specified
> ACPI NVDIMM device we will introduce in later patch.
> Also we can happily ignored the memory device's interleave, the real
> nvdimm hardware access is hidden behind host
>
> - DCR structure, it defines vendor ID used to associate specified vendor
> nvdimm driver. Since we only implement PMEM mode this time, Command
> window and Data window are not needed
>
> Signed-off-by: Xiao Guangrong <address@hidden>
> ---
> hw/i386/acpi-build.c | 4 +
> hw/mem/nvdimm/acpi.c | 209
> ++++++++++++++++++++++++++++++++++++++++++++++-
> hw/mem/nvdimm/internal.h | 13 +++
> hw/mem/nvdimm/nvdimm.c | 25 ++++++
> include/hw/mem/nvdimm.h | 2 +
> 5 files changed, 252 insertions(+), 1 deletion(-)
>
> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> index 95e0c65..c637dc8 100644
> --- a/hw/i386/acpi-build.c
> +++ b/hw/i386/acpi-build.c
> @@ -1661,6 +1661,7 @@ static bool acpi_has_iommu(void)
> static
> void acpi_build(PcGuestInfo *guest_info, AcpiBuildTables *tables)
> {
> + PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
I don't like more code poking at machine directly.
I know srat does it, and I don't like it. Any chance you can add
acpi_get_nvdumm_info to get all you need from nvdimm state?
> GArray *table_offsets;
> unsigned facs, ssdt, dsdt, rsdt;
> AcpiCpuInfo cpu;
> @@ -1742,6 +1743,9 @@ void acpi_build(PcGuestInfo *guest_info,
> AcpiBuildTables *tables)
> build_dmar_q35(tables_blob, tables->linker);
> }
>
> + nvdimm_build_acpi_table(&pcms->nvdimm_memory, table_offsets, tables_blob,
> + tables->linker);
> +
> /* Add tables supplied by user (if any) */
> for (u = acpi_table_first(); u; u = acpi_table_next(u)) {
> unsigned len = acpi_table_len(u);
> diff --git a/hw/mem/nvdimm/acpi.c b/hw/mem/nvdimm/acpi.c
> index b640874..62b1e02 100644
> --- a/hw/mem/nvdimm/acpi.c
> +++ b/hw/mem/nvdimm/acpi.c
> @@ -32,6 +32,46 @@
> #include "hw/mem/nvdimm.h"
> #include "internal.h"
>
> +static void nfit_spa_uuid_pm(uuid_le *uuid)
> +{
> + uuid_le uuid_pm = UUID_LE(0x66f0d379, 0xb4f3, 0x4074, 0xac, 0x43, 0x0d,
> + 0x33, 0x18, 0xb7, 0x8c, 0xdb);
> + memcpy(uuid, &uuid_pm, sizeof(uuid_pm));
> +}
> +
Just add a static constant:
const uint8_t nfit_spa_uuid[] = {0x79, 0xd3, ..... }
then memcpy instead of a wrapper.
> +enum {
> + NFIT_STRUCTURE_SPA = 0,
> + NFIT_STRUCTURE_MEMDEV = 1,
> + NFIT_STRUCTURE_IDT = 2,
> + NFIT_STRUCTURE_SMBIOS = 3,
> + NFIT_STRUCTURE_DCR = 4,
> + NFIT_STRUCTURE_BDW = 5,
> + NFIT_STRUCTURE_FLUSH = 6,
> +};
> +
> +enum {
> + EFI_MEMORY_UC = 0x1ULL,
> + EFI_MEMORY_WC = 0x2ULL,
> + EFI_MEMORY_WT = 0x4ULL,
> + EFI_MEMORY_WB = 0x8ULL,
> + EFI_MEMORY_UCE = 0x10ULL,
> + EFI_MEMORY_WP = 0x1000ULL,
> + EFI_MEMORY_RP = 0x2000ULL,
> + EFI_MEMORY_XP = 0x4000ULL,
> + EFI_MEMORY_NV = 0x8000ULL,
> + EFI_MEMORY_MORE_RELIABLE = 0x10000ULL,
> +};
> +
> +/*
> + * NVDIMM Firmware Interface Table
> + * @signature: "NFIT"
> + */
> +struct nfit {
> + ACPI_TABLE_HEADER_DEF
> + uint32_t reserved;
> +} QEMU_PACKED;
> +typedef struct nfit nfit;
> +
> /* System Physical Address Range Structure */
> struct nfit_spa {
> uint16_t type;
> @@ -40,13 +80,21 @@ struct nfit_spa {
> uint16_t flags;
> uint32_t reserved;
> uint32_t proximity_domain;
> - uint8_t type_guid[16];
> + uuid_le type_guid;
> uint64_t spa_base;
> uint64_t spa_length;
> uint64_t mem_attr;
> } QEMU_PACKED;
> typedef struct nfit_spa nfit_spa;
>
> +/*
> + * Control region is strictly for management during hot add/online
> + * operation.
> + */
> +#define SPA_FLAGS_ADD_ONLINE_ONLY (1)
unused
> +/* Data in Proximity Domain field is valid. */
> +#define SPA_FLAGS_PROXIMITY_VALID (1 << 1)
> +
> /* Memory Device to System Physical Address Range Mapping Structure */
> struct nfit_memdev {
> uint16_t type;
> @@ -91,12 +139,20 @@ struct nfit_dcr {
> } QEMU_PACKED;
> typedef struct nfit_dcr nfit_dcr;
>
> +#define REVSISON_ID 1
> +#define NFIT_FIC1 0x201
> +
> static uint64_t nvdimm_device_structure_size(uint64_t slots)
> {
> /* each nvdimm has three structures. */
> return slots * (sizeof(nfit_spa) + sizeof(nfit_memdev) +
> sizeof(nfit_dcr));
> }
>
> +static uint64_t get_nfit_total_size(uint64_t slots)
> +{
> + return sizeof(struct nfit) + nvdimm_device_structure_size(slots);
> +}
> +
> static uint64_t nvdimm_acpi_memory_size(uint64_t slots, uint64_t page_size)
> {
> uint64_t size = nvdimm_device_structure_size(slots);
> @@ -118,3 +174,154 @@ void nvdimm_init_memory_state(NVDIMMState *state,
> MemoryRegion*system_memory,
> NVDIMM_ACPI_MEM_SIZE);
> memory_region_add_subregion(system_memory, state->base, &state->mr);
> }
> +
> +static uint32_t nvdimm_slot_to_sn(int slot)
> +{
> + return 0x123456 + slot;
> +}
> +
> +static uint32_t nvdimm_slot_to_handle(int slot)
> +{
> + return slot + 1;
> +}
> +
> +static uint16_t nvdimm_slot_to_spa_index(int slot)
> +{
> + return (slot + 1) << 1;
> +}
> +
> +static uint32_t nvdimm_slot_to_dcr_index(int slot)
> +{
> + return nvdimm_slot_to_spa_index(slot) + 1;
> +}
> +
There are lots of magic numbers here with no comments.
Pls explain the logic in code comments.
> +static int build_structure_spa(void *buf, NVDIMMDevice *nvdimm)
Pls document the specific chapter that this implements.
same everywhere else.
> +{
> + nfit_spa *nfit_spa;
> + uint64_t addr = object_property_get_int(OBJECT(nvdimm), DIMM_ADDR_PROP,
> + NULL);
> + uint64_t size = object_property_get_int(OBJECT(nvdimm), DIMM_SIZE_PROP,
> + NULL);
> + uint32_t node = object_property_get_int(OBJECT(nvdimm), DIMM_NODE_PROP,
> + NULL);
> + int slot = object_property_get_int(OBJECT(nvdimm), DIMM_SLOT_PROP,
> + NULL);
> +
> + nfit_spa = buf;
> +
> + nfit_spa->type = cpu_to_le16(NFIT_STRUCTURE_SPA);
Don't do these 1-time enums. They are hard to match against spec.
nfit_spa->type = cpu_to_le16(0 /* System Physical Address Range
Structure */);
same everywhere else.
> + nfit_spa->length = cpu_to_le16(sizeof(*nfit_spa));
> + nfit_spa->spa_index = cpu_to_le16(nvdimm_slot_to_spa_index(slot));
> + nfit_spa->flags = cpu_to_le16(SPA_FLAGS_PROXIMITY_VALID);
> + nfit_spa->proximity_domain = cpu_to_le32(node);
> + nfit_spa_uuid_pm(&nfit_spa->type_guid);
> + nfit_spa->spa_base = cpu_to_le64(addr);
> + nfit_spa->spa_length = cpu_to_le64(size);
> + nfit_spa->mem_attr = cpu_to_le64(EFI_MEMORY_WB | EFI_MEMORY_NV);
> +
> + return sizeof(*nfit_spa);
> +}
> +
> +static int build_structure_memdev(void *buf, NVDIMMDevice *nvdimm)
> +{
> + nfit_memdev *nfit_memdev;
> + uint64_t addr = object_property_get_int(OBJECT(nvdimm), DIMM_ADDR_PROP,
> + NULL);
> + uint64_t size = object_property_get_int(OBJECT(nvdimm), DIMM_SIZE_PROP,
> + NULL);
> + int slot = object_property_get_int(OBJECT(nvdimm), DIMM_SLOT_PROP,
> + NULL);
> + uint32_t handle = nvdimm_slot_to_handle(slot);
> +
> + nfit_memdev = buf;
> + nfit_memdev->type = cpu_to_le16(NFIT_STRUCTURE_MEMDEV);
> + nfit_memdev->length = cpu_to_le16(sizeof(*nfit_memdev));
> + nfit_memdev->nfit_handle = cpu_to_le32(handle);
> + /* point to nfit_spa. */
> + nfit_memdev->spa_index = cpu_to_le16(nvdimm_slot_to_spa_index(slot));
> + /* point to nfit_dcr. */
> + nfit_memdev->dcr_index = cpu_to_le16(nvdimm_slot_to_dcr_index(slot));
> + nfit_memdev->region_len = cpu_to_le64(size);
> + nfit_memdev->region_dpa = cpu_to_le64(addr);
> + /* Only one interleave for pmem. */
> + nfit_memdev->interleave_ways = cpu_to_le16(1);
> +
> + return sizeof(*nfit_memdev);
> +}
> +
> +static int build_structure_dcr(void *buf, NVDIMMDevice *nvdimm)
> +{
> + nfit_dcr *nfit_dcr;
> + int slot = object_property_get_int(OBJECT(nvdimm), DIMM_SLOT_PROP,
> + NULL);
> + uint32_t sn = nvdimm_slot_to_sn(slot);
> +
> + nfit_dcr = buf;
> + nfit_dcr->type = cpu_to_le16(NFIT_STRUCTURE_DCR);
> + nfit_dcr->length = cpu_to_le16(sizeof(*nfit_dcr));
> + nfit_dcr->dcr_index = cpu_to_le16(nvdimm_slot_to_dcr_index(slot));
> + nfit_dcr->vendor_id = cpu_to_le16(0x8086);
> + nfit_dcr->device_id = cpu_to_le16(1);
> + nfit_dcr->revision_id = cpu_to_le16(REVSISON_ID);
> + nfit_dcr->serial_number = cpu_to_le32(sn);
> + nfit_dcr->fic = cpu_to_le16(NFIT_FIC1);
> +
> + return sizeof(*nfit_dcr);
> +}
> +
> +static void build_device_structure(GSList *device_list, char *buf)
> +{
> + buf += sizeof(nfit);
> +
> + for (; device_list; device_list = device_list->next) {
> + NVDIMMDevice *nvdimm = device_list->data;
> +
> + /* build System Physical Address Range Description Table. */
> + buf += build_structure_spa(buf, nvdimm);
> +
> + /*
> + * build Memory Device to System Physical Address Range Mapping
> + * Table.
> + */
> + buf += build_structure_memdev(buf, nvdimm);
> +
> + /* build Control Region Descriptor Table. */
> + buf += build_structure_dcr(buf, nvdimm);
> + }
> +}
> +
> +static void build_nfit(GSList *device_list, GArray *table_offsets,
> + GArray *table_data, GArray *linker)
> +{
> + size_t total;
> + char *buf;
> + int nfit_start, nr;
> +
> + nr = g_slist_length(device_list);
> + total = get_nfit_total_size(nr);
> +
> + nfit_start = table_data->len;
> + acpi_add_table(table_offsets, table_data);
> +
> + buf = acpi_data_push(table_data, total);
> + build_device_structure(device_list, buf);
This seems fragile. Should build_device_structure overflow
a buffer we'll corrupt memory.
Current code does use acpi_data_push but only for trivial
things like fixed size headers.
Can't you use glib to dynamically append things to table
as they are generated?
> +
> + build_header(linker, table_data, (void *)(table_data->data + nfit_start),
> + "NFIT", table_data->len - nfit_start, 1);
> +}
> +
> +void nvdimm_build_acpi_table(NVDIMMState *state, GArray *table_offsets,
> + GArray *table_data, GArray *linker)
> +{
> + GSList *device_list = nvdimm_get_built_list();
> +
> + if (!memory_region_size(&state->mr)) {
> + assert(!device_list);
> + return;
> + }
> +
> + if (device_list) {
> + build_nfit(device_list, table_offsets, table_data, linker);
> + g_slist_free(device_list);
> + }
> +}
> diff --git a/hw/mem/nvdimm/internal.h b/hw/mem/nvdimm/internal.h
> index c4ba750..5551448 100644
> --- a/hw/mem/nvdimm/internal.h
> +++ b/hw/mem/nvdimm/internal.h
> @@ -14,4 +14,17 @@
> #define NVDIMM_INTERNAL_H
>
> #define MIN_NAMESPACE_LABEL_SIZE (128UL << 10)
> +
> +struct uuid_le {
> + uint8_t b[16];
> +};
> +typedef struct uuid_le uuid_le;
> +
> +#define UUID_LE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \
> +((uuid_le) \
> +{ { (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \
> + (b) & 0xff, ((b) >> 8) & 0xff, (c) & 0xff, ((c) >> 8) & 0xff, \
> + (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) } })
> +
Please avoid polluting the global namespace.
Prefix everything with NVDIMM.
> +GSList *nvdimm_get_built_list(void);
You are adding an extern function with no comment
about it's purpose anywhere. Pls fix this.
The name isn't pretty. What does "built" mean?
List of what? Is this a device list?
> #endif
This header is too small to be worth it.
nvdimm_get_built_list seems to be the only interface -
just stick it in the header you have under include.
> diff --git a/hw/mem/nvdimm/nvdimm.c b/hw/mem/nvdimm/nvdimm.c
> index 0850e82..bc8c577 100644
> --- a/hw/mem/nvdimm/nvdimm.c
> +++ b/hw/mem/nvdimm/nvdimm.c
> @@ -26,6 +26,31 @@
> #include "hw/mem/nvdimm.h"
> #include "internal.h"
>
> +static int nvdimm_built_list(Object *obj, void *opaque)
> +{
> + GSList **list = opaque;
> +
> + if (object_dynamic_cast(obj, TYPE_NVDIMM)) {
> + DeviceState *dev = DEVICE(obj);
> +
> + /* only realized NVDIMMs matter */
> + if (dev->realized) {
> + *list = g_slist_append(*list, dev);
> + }
> + }
> +
> + object_child_foreach(obj, nvdimm_built_list, opaque);
> + return 0;
> +}
> +
> +GSList *nvdimm_get_built_list(void)
> +{
> + GSList *list = NULL;
> +
> + object_child_foreach(qdev_get_machine(), nvdimm_built_list, &list);
> + return list;
> +}
> +
> static MemoryRegion *nvdimm_get_memory_region(DIMMDevice *dimm)
> {
> NVDIMMDevice *nvdimm = NVDIMM(dimm);
> diff --git a/include/hw/mem/nvdimm.h b/include/hw/mem/nvdimm.h
> index aa95961..0a6bda4 100644
> --- a/include/hw/mem/nvdimm.h
> +++ b/include/hw/mem/nvdimm.h
> @@ -49,4 +49,6 @@ typedef struct NVDIMMState NVDIMMState;
>
> void nvdimm_init_memory_state(NVDIMMState *state, MemoryRegion*system_memory,
> MachineState *machine , uint64_t page_size);
> +void nvdimm_build_acpi_table(NVDIMMState *state, GArray *table_offsets,
> + GArray *table_data, GArray *linker);
> #endif
> --
> 1.8.3.1
- Re: [Qemu-devel] [PATCH v3 01/32] acpi: add aml_derefof, (continued)
- [Qemu-devel] [PATCH v3 02/32] acpi: add aml_sizeof, Xiao Guangrong, 2015/10/10
- [Qemu-devel] [PATCH v3 17/32] dimm: abstract dimm device from pc-dimm, Xiao Guangrong, 2015/10/10
- [Qemu-devel] [PATCH v3 18/32] dimm: get mapped memory region from DIMMDeviceClass->get_memory_region, Xiao Guangrong, 2015/10/10
- [Qemu-devel] [PATCH v3 13/32] pc-dimm: make pc_existing_dimms_capacity static and rename it, Xiao Guangrong, 2015/10/10
- [Qemu-devel] [PATCH v3 15/32] stubs: rename qmp_pc_dimm_device_list.c, Xiao Guangrong, 2015/10/10
- [Qemu-devel] [PATCH v3 21/32] nvdimm: implement NVDIMM device abstract, Xiao Guangrong, 2015/10/10
- [Qemu-devel] [PATCH v3 24/32] nvdimm: init the address region used by DSM method, Xiao Guangrong, 2015/10/10
- [Qemu-devel] [PATCH v3 23/32] nvdimm: build ACPI NFIT table, Xiao Guangrong, 2015/10/10
- Re: [Qemu-devel] [PATCH v3 23/32] nvdimm: build ACPI NFIT table,
Michael S. Tsirkin <=
- Re: [Qemu-devel] [PATCH v3 23/32] nvdimm: build ACPI NFIT table, Dan Williams, 2015/10/12
[Qemu-devel] [PATCH v3 25/32] nvdimm: build ACPI nvdimm devices, Xiao Guangrong, 2015/10/10
[Qemu-devel] [PATCH v3 32/32] nvdimm: add maintain info, Xiao Guangrong, 2015/10/10