qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH 4/4] add BIOS support for an ACPI SRAT table (needed


From: Andre Przywara
Subject: [Qemu-devel] [PATCH 4/4] add BIOS support for an ACPI SRAT table (needed for NUMA support)
Date: Tue, 31 Mar 2009 15:28:57 +0200

Signed-off-by: Andre Przywara <address@hidden>
---
 .../bios-pq/0012_add-SRAT-ACPI-table-support.patch |  307 ++++++++++++++++++++
 pc-bios/bios-pq/series                             |    1 +
 2 files changed, 308 insertions(+), 0 deletions(-)
 create mode 100644 pc-bios/bios-pq/0012_add-SRAT-ACPI-table-support.patch

diff --git a/pc-bios/bios-pq/0012_add-SRAT-ACPI-table-support.patch 
b/pc-bios/bios-pq/0012_add-SRAT-ACPI-table-support.patch
new file mode 100644
index 0000000..a98c072
--- /dev/null
+++ b/pc-bios/bios-pq/0012_add-SRAT-ACPI-table-support.patch
@@ -0,0 +1,307 @@
+From 40b77280bf956be2e1d5cbd6b2662e861b480112 Mon Sep 17 00:00:00 2001
+From: Andre Przywara <address@hidden>
+Date: Fri, 20 Mar 2009 00:35:06 +0100
+Subject: [PATCH] add SRAT ACPI table support
+
+Take NUMA topology info from the QEMU firmware configuration interface
+(number of nodes, node for each (V)CPU and amount of memory) and build
+a SRAT table describing this topology for the guest OS. Handles more than
+4 GB of RAM by including a hole for 32bit PCI memory mapping.
+---
+ bios/rombios32.c |  175 ++++++++++++++++++++++++++++++++++++++++++++++++++----
+ 1 files changed, 164 insertions(+), 11 deletions(-)
+
+diff --git a/bios/rombios32.c b/bios/rombios32.c
+index 7be4216..02379c0 100644
+--- a/bios/rombios32.c
++++ b/bios/rombios32.c
+@@ -451,6 +451,11 @@ int pm_sci_int;
+ unsigned long bios_table_cur_addr;
+ unsigned long bios_table_end_addr;
+ 
++static inline uint64_t le64_to_cpu(uint64_t x)
++{
++    return x;
++}
++
+ void wrmsr_smp(uint32_t index, uint64_t val)
+ {
+     static struct { uint32_t ecx, eax, edx; } *p = (void *)SMP_MSR_ADDR;
+@@ -469,6 +474,7 @@ void wrmsr_smp(uint32_t index, uint64_t val)
+ #define QEMU_CFG_SIGNATURE  0x00
+ #define QEMU_CFG_ID         0x01
+ #define QEMU_CFG_UUID       0x02
++#define QEMU_CFG_NUMA       0x0D
+ #define QEMU_CFG_ARCH_LOCAL     0x8000
+ #define QEMU_CFG_ACPI_TABLES  (QEMU_CFG_ARCH_LOCAL + 0)
+ 
+@@ -519,6 +525,14 @@ static int acpi_load_table(int i, uint32_t addr, uint16_t 
*len)
+     qemu_cfg_read((uint8_t*)addr, *len);
+     return 0;
+ }
++
++uint64_t qemu_cfg_get64 (void)
++{
++    uint64_t ret;
++
++    qemu_cfg_read((uint8_t*)&ret, 8);
++    return le64_to_cpu(ret);
++}
+ #endif
+ 
+ void uuid_probe(void)
+@@ -1273,7 +1287,7 @@ struct rsdt_descriptor_rev1
+ {
+       ACPI_TABLE_HEADER_DEF                           /* ACPI common table 
header */
+ #ifdef BX_QEMU
+-      uint32_t                             table_offset_entry [4]; /* Array 
of pointers to other */
++      uint32_t                             table_offset_entry [5]; /* Array 
of pointers to other */
+ #else
+       uint32_t                             table_offset_entry [3]; /* Array 
of pointers to other */
+ #endif
+@@ -1381,7 +1395,7 @@ struct multiple_apic_table
+ } __attribute__((__packed__));
+ 
+ 
+-/* Values for Type in APIC_HEADER_DEF */
++/* Values for Type in APIC sub-headers */
+ 
+ #define APIC_PROCESSOR          0
+ #define APIC_IO                 1
+@@ -1394,18 +1408,18 @@ struct multiple_apic_table
+ #define APIC_XRUPT_SOURCE       8
+ #define APIC_RESERVED           9           /* 9 and greater are reserved */
+ 
+-/*
+- * MADT sub-structures (Follow MULTIPLE_APIC_DESCRIPTION_TABLE)
+- */
+-#define APIC_HEADER_DEF                     /* Common APIC sub-structure 
header */\
++#define ACPI_SUB_HEADER_DEF                 /* Common ACPI sub-structure 
header */\
+       uint8_t                              type; \
+       uint8_t                              length;
+ 
++/*
++ * MADT sub-structures (Follow MULTIPLE_APIC_DESCRIPTION_TABLE)
++ */
+ /* Sub-structures for MADT */
+ 
+ struct madt_processor_apic
+ {
+-      APIC_HEADER_DEF
++      ACPI_SUB_HEADER_DEF
+       uint8_t                              processor_id;           /* ACPI 
processor id */
+       uint8_t                              local_apic_id;          /* 
Processor's local APIC id */
+ #if 0
+@@ -1416,6 +1430,43 @@ struct madt_processor_apic
+ #endif
+ } __attribute__((__packed__));
+ 
++/*
++ * SRAT (NUMA topology description) table
++ */
++
++#define SRAT_PROCESSOR          0
++#define SRAT_MEMORY             1
++
++struct system_resource_affinity_table
++{
++    ACPI_TABLE_HEADER_DEF
++    uint32_t    reserved1;
++    uint32_t    reserved2[2];
++};
++
++struct srat_processor_affinity
++{
++    ACPI_SUB_HEADER_DEF
++    uint8_t     proximity_lo;
++    uint8_t     local_apic_id;
++    uint32_t    flags;
++    uint8_t     local_sapic_eid;
++    uint8_t     proximity_hi[3];
++    uint32_t    reserved;
++};
++
++struct srat_memory_affinity
++{
++    ACPI_SUB_HEADER_DEF
++    uint8_t     proximity[4];
++    uint16_t    reserved1;
++    uint32_t    base_addr_low,base_addr_high;
++    uint32_t    length_low,length_high;
++    uint32_t    reserved2;
++    uint32_t    flags;
++    uint32_t    reserved3[2];
++};
++
+ #ifdef BX_QEMU
+ /*
+  *  * ACPI 2.0 Generic Address Space definition.
+@@ -1444,7 +1495,7 @@ struct acpi_20_hpet {
+ 
+ struct madt_io_apic
+ {
+-      APIC_HEADER_DEF
++      ACPI_SUB_HEADER_DEF
+       uint8_t                              io_apic_id;             /* I/O 
APIC ID */
+       uint8_t                              reserved;               /* 
Reserved - must be zero */
+       uint32_t                             address;                /* APIC 
physical address */
+@@ -1455,7 +1506,7 @@ struct madt_io_apic
+ #ifdef BX_QEMU
+ struct madt_int_override
+ {
+-      APIC_HEADER_DEF
++      ACPI_SUB_HEADER_DEF
+       uint8_t                bus;     /* Identifies ISA Bus */
+       uint8_t                source;  /* Bus-relative interrupt source */
+       uint32_t               gsi;     /* GSI that source will signal */
+@@ -1559,6 +1610,21 @@ int acpi_build_processor_ssdt(uint8_t *ssdt)
+     return ssdt_ptr - ssdt;
+ }
+ 
++static void acpi_build_srat_memory(struct srat_memory_affinity *numamem,
++    uint64_t base, uint64_t len, int node, int enabled)
++{
++     numamem->type = SRAT_MEMORY;
++     numamem->length = sizeof(*numamem);
++     memset (numamem->proximity, 0 ,4);
++     numamem->proximity[0] = node;
++     numamem->flags = cpu_to_le32(!!enabled);
++     numamem->base_addr_low = base & 0xFFFFFFFF;
++     numamem->base_addr_high = base >> 32;
++     numamem->length_low = len & 0xFFFFFFFF;
++     numamem->length_high = len >> 32;
++     return;
++}
++
+ /* base_addr must be a multiple of 4KB */
+ void acpi_bios_init(void)
+ {
+@@ -1569,12 +1635,15 @@ void acpi_bios_init(void)
+     struct multiple_apic_table *madt;
+     uint8_t *dsdt, *ssdt;
+ #ifdef BX_QEMU
++    struct system_resource_affinity_table *srat;
+     struct acpi_20_hpet *hpet;
+     uint32_t hpet_addr;
+ #endif
+     uint32_t base_addr, rsdt_addr, fadt_addr, addr, facs_addr, dsdt_addr, 
ssdt_addr;
+     uint32_t acpi_tables_size, madt_addr, madt_size, rsdt_size;
++    uint32_t srat_addr,srat_size;
+     uint16_t i, external_tables;
++    int nb_numa_nodes;
+ 
+     /* reserve memory space for tables */
+ #ifdef BX_USE_EBDA_TABLES
+@@ -1616,6 +1685,25 @@ void acpi_bios_init(void)
+     ssdt_addr = addr;
+     ssdt = (void *)(addr);
+     addr += acpi_build_processor_ssdt(ssdt);
++#ifdef BX_QEMU
++    qemu_cfg_select(QEMU_CFG_NUMA);
++    nb_numa_nodes = qemu_cfg_get64();
++#else
++    nb_numa_nodes = 0;
++#endif
++    if (nb_numa_nodes > 0) {
++        addr = (addr + 7) & ~7;
++        srat_addr = addr;
++        srat_size = sizeof(*srat) +
++            sizeof(struct srat_processor_affinity) * smp_cpus +
++            sizeof(struct srat_memory_affinity) * (nb_numa_nodes + 2);
++        srat = (void *)(addr);
++        addr += srat_size;
++    } else {
++        srat_addr = addr;
++        srat = (void*)(addr);
++        srat_size = 0;
++    }
+ 
+     addr = (addr + 7) & ~7;
+     madt_addr = addr;
+@@ -1725,6 +1813,69 @@ void acpi_bios_init(void)
+ 
+     memset(rsdt, 0, rsdt_size);
+ #ifdef BX_QEMU
++    /* SRAT */
++    if (nb_numa_nodes > 0) {
++        struct srat_processor_affinity *core;
++        struct srat_memory_affinity *numamem;
++        int slots;
++        uint64_t mem_len, mem_base, next_base = 0, curnode;
++
++        qemu_cfg_select(QEMU_CFG_NUMA);
++        qemu_cfg_get64();
++        memset (srat, 0 , srat_size);
++        srat->reserved1=1;
++ 
++        core = (void*)(srat + 1);
++        for (i = 0; i < smp_cpus; ++i) {
++             core->type = SRAT_PROCESSOR;
++             core->length = sizeof(*core);
++             core->local_apic_id = i;
++             curnode = qemu_cfg_get64();
++             core->proximity_lo = curnode;
++             memset (core->proximity_hi, 0, 3);
++             core->local_sapic_eid = 0;
++             if (i < smp_cpus)
++                 core->flags = cpu_to_le32(1);
++             else
++                 core->flags = 0;
++             core++;
++        }
++
++        /* the memory map is a bit tricky, it contains at least one hole
++         * from 640k-1M and possibly another one from 3.5G-4G.
++         */
++        numamem = (void*)core; slots = 0;
++        acpi_build_srat_memory(numamem, 0, 640*1024, 0, 1);
++        next_base = 1024 * 1024; numamem++;slots++;
++        for (i = 1; i < nb_numa_nodes + 1; ++i) {
++            mem_base = next_base;
++            mem_len = qemu_cfg_get64();
++            if (i == 1) mem_len -= 1024 * 1024;
++            next_base = mem_base + mem_len;
++ 
++            /* Cut out the PCI hole */
++            if (mem_base <= ram_size && next_base > ram_size) {
++                mem_len -= next_base - ram_size;
++                if (mem_len > 0) {
++                    acpi_build_srat_memory(numamem, mem_base, mem_len, i-1, 
1);
++                    numamem++; slots++;
++                }
++                mem_base = 1ULL << 32;
++                mem_len = next_base - ram_size;
++                next_base += (1ULL << 32) - ram_size;
++            }
++            acpi_build_srat_memory(numamem, mem_base, mem_len, i-1, 1);
++            numamem++; slots++;
++        }
++        for (; slots < nb_numa_nodes + 2; slots++) {
++            acpi_build_srat_memory(numamem, 0, 0, 0, 0);
++            numamem++;
++        }
++
++         acpi_build_table_header((struct acpi_table_header *)srat,
++                                "SRAT", srat_size, 1);
++    }
++
+     /* HPET */
+     memset(hpet, 0, sizeof(*hpet));
+     /* Note timer_block_id value must be kept in sync with value advertised by
+@@ -1753,9 +1904,11 @@ void acpi_bios_init(void)
+     rsdt->table_offset_entry[2] = cpu_to_le32(ssdt_addr);
+ #ifdef BX_QEMU
+     rsdt->table_offset_entry[3] = cpu_to_le32(hpet_addr);
++    if (nb_numa_nodes > 0)
++        rsdt->table_offset_entry[4] = cpu_to_le32(srat_addr);
+ #endif
+-    acpi_build_table_header((struct acpi_table_header *)rsdt,
+-                            "RSDT", rsdt_size, 1);
++    acpi_build_table_header((struct acpi_table_header *)rsdt, "RSDT",
++        rsdt_size - (nb_numa_nodes > 0? 0: sizeof(uint32_t)), 1);
+ 
+     acpi_tables_size = addr - base_addr;
+ 
+-- 
+1.6.1.3
+
diff --git a/pc-bios/bios-pq/series b/pc-bios/bios-pq/series
index 5a29df9..bd04a1a 100644
--- a/pc-bios/bios-pq/series
+++ b/pc-bios/bios-pq/series
@@ -9,3 +9,4 @@
 0009_qemu-bios-pci-hotplug-support.patch
 0010_bios-mark-the-acpi-sci-interrupt-as-connected-to-irq-9.patch
 0011_read-additional-acpi-tables-from-a-vm.patch
+0012_add-SRAT-ACPI-table-support.patch
-- 
1.6.1.3






reply via email to

[Prev in Thread] Current Thread [Next in Thread]