qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH v5 10/14] pc: Add dimm paravirt SRAT info


From: Hu Tao
Subject: [Qemu-devel] [PATCH v5 10/14] pc: Add dimm paravirt SRAT info
Date: Wed, 26 Jun 2013 17:13:33 +0800

The numa_fw_cfg paravirt interface is extended to include SRAT information for
all hotplug-able dimms. There are 3 words for each hotplug-able memory slot,
denoting start address, size and node proximity. The new info is appended after
existing numa info, so that the fw_cfg layout does not break.  This information
is used by Seabios to build hotplug memory device objects at runtime.
nb_numa_nodes is set to 1 by default (not 0), so that we always pass srat info
to SeaBIOS.

v3->v4: numa_fw_cfg needs to be initalized after memory controller sets up dimm
ranges.  Make changes for pc_piix and pc_q35 to set numa_fw_cfg after i440fx
initialization.

v2->v3: setting nb_numa_nodes to 1 is not needed

v1->v2:
Dimm SRAT info (#dimms) is appended at end of existing numa fw_cfg in order not
to break existing layout
Documentation of the new fwcfg layout is included in docs/specs/fwcfg.txt

Signed-off-by: Vasilis Liaskovitis <address@hidden>
Signed-off-by: Hu Tao <address@hidden>
---
 docs/specs/fwcfg.txt    | 28 ++++++++++++++++++++++++++++
 hw/i386/pc.c            | 30 ++++++++++++++++++++++++------
 hw/i386/pc_piix.c       |  1 +
 hw/i386/pc_q35.c        |  7 +++++--
 include/hw/i386/pc.h    |  1 +
 include/sysemu/sysemu.h |  1 +
 6 files changed, 60 insertions(+), 8 deletions(-)
 create mode 100644 docs/specs/fwcfg.txt

diff --git a/docs/specs/fwcfg.txt b/docs/specs/fwcfg.txt
new file mode 100644
index 0000000..e6fcd8f
--- /dev/null
+++ b/docs/specs/fwcfg.txt
@@ -0,0 +1,28 @@
+QEMU<->BIOS Paravirt Documentation
+--------------------------------------
+
+This document describes paravirt data structures passed from QEMU to BIOS.
+
+fw_cfg SRAT paravirt info
+--------------------
+The SRAT info passed from QEMU to BIOS has the following layout:
+
+-----------------------------------------------------------------------------------------------
+#nodes | cpu0_pxm | cpu1_pxm | ... | cpulast_pxm | node0_mem | node1_mem | ... 
| nodelast_mem
+
+-----------------------------------------------------------------------------------------------
+#dimms | dimm0_start | dimm0_sz | dimm0_pxm | ... | dimmlast_start | 
dimmlast_sz | dimmlast_pxm
+
+Entry 0 contains the number of numa nodes (nb_numa_nodes).
+
+Entries 1..max_cpus: The next max_cpus entries describe node proximity for each
+one of the vCPUs in the system.
+
+Entries max_cpus+1..max_cpus+nb_numa_nodes+1:  The next nb_numa_nodes entries
+describe the memory size for each one of the NUMA nodes in the system.
+
+Entry max_cpus+nb_numa_nodes+1 contains the number of memory dimms 
(nb_hp_dimms)
+
+The last 3 * nb_hp_dimms entries are organized in triplets: Each triplet 
contains
+the physical address offset, size (in bytes), and node proximity for the
+respective dimm.
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 65838a6..b51d3b5 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -55,6 +55,7 @@
 #include "hw/acpi/acpi.h"
 #include "hw/cpu/icc_bus.h"
 #include "hw/boards.h"
+#include "hw/mem-hotplug/dimm.h"
 
 /* debug PC/ISA interrupts */
 //#define DEBUG_IRQ
@@ -606,8 +607,6 @@ static FWCfgState *bochs_bios_init(void)
     FWCfgState *fw_cfg;
     uint8_t *smbios_table;
     size_t smbios_len;
-    uint64_t *numa_fw_cfg;
-    int i, j;
     unsigned int apic_id_limit = pc_apic_id_limit(max_cpus);
 
     fw_cfg = fw_cfg_init(BIOS_CFG_IOPORT, BIOS_CFG_IOPORT + 1, 0, 0);
@@ -640,11 +639,25 @@ static FWCfgState *bochs_bios_init(void)
                      &e820_table, sizeof(e820_table));
 
     fw_cfg_add_bytes(fw_cfg, FW_CFG_HPET, &hpet_cfg, sizeof(hpet_cfg));
+
+    return fw_cfg;
+}
+
+void bochs_meminfo_bios_init(void *fw_cfg)
+{
+    uint64_t *numa_fw_cfg;
+    uint64_t *hp_dimms_fw_cfg;
+    int i, j;
+    unsigned int apic_id_limit = pc_apic_id_limit(max_cpus);
+
     /* allocate memory for the NUMA channel: one (64bit) word for the number
      * of nodes, one word for each VCPU->node and one word for each node to
      * hold the amount of memory.
+     * Finally one word for the number of hotplug memory slots and three words
+     * for each hotplug memory slot (start address, size and node proximity).
      */
-    numa_fw_cfg = g_new0(uint64_t, 1 + apic_id_limit + nb_numa_nodes);
+    numa_fw_cfg = g_new0(uint64_t,
+                         2 + apic_id_limit + nb_numa_nodes  + 3 * nb_hp_dimms);
     numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes);
     for (i = 0; i < max_cpus; i++) {
         unsigned int apic_id = x86_cpu_apic_id_from_index(i);
@@ -659,11 +672,16 @@ static FWCfgState *bochs_bios_init(void)
     for (i = 0; i < nb_numa_nodes; i++) {
         numa_fw_cfg[apic_id_limit + 1 + i] = cpu_to_le64(node_mem[i]);
     }
+
+    numa_fw_cfg[1 + apic_id_limit + nb_numa_nodes] = cpu_to_le64(nb_hp_dimms);
+
+    hp_dimms_fw_cfg = numa_fw_cfg + 2 + apic_id_limit + nb_numa_nodes;
+    if (nb_hp_dimms) {
+        dimm_setup_fwcfg_layout(hp_dimms_fw_cfg);
+    }
     fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, numa_fw_cfg,
-                     (1 + apic_id_limit + nb_numa_nodes) *
+                     (2 + apic_id_limit + nb_numa_nodes + 3 * nb_hp_dimms) *
                      sizeof(*numa_fw_cfg));
-
-    return fw_cfg;
 }
 
 static long get_file_size(FILE *f)
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index fb056df..6e18343 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -138,6 +138,7 @@ static void pc_init1(MemoryRegion *system_memory,
     if (!xen_enabled()) {
         fw_cfg = pc_memory_init(kernel_filename, kernel_cmdline, 
initrd_filename,
                                 below_4g_mem_size, above_4g_mem_size);
+        bochs_meminfo_bios_init(fw_cfg);
     }
 
     if (kvm_irqchip_in_kernel()) {
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 5fe14bb..2c14977 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -74,6 +74,7 @@ static void pc_q35_init(QEMUMachineInitArgs *args)
     ICH9LPCState *ich9_lpc;
     PCIDevice *ahci;
     DeviceState *icc_bridge;
+    void *fw_cfg = NULL;
 
     icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE);
     object_property_add_child(qdev_get_machine(), "icc-bridge",
@@ -97,8 +98,9 @@ static void pc_q35_init(QEMUMachineInitArgs *args)
 
     /* allocate ram and load rom/bios */
     if (!xen_enabled()) {
-        pc_memory_init(kernel_filename, kernel_cmdline,
-                       initrd_filename, below_4g_mem_size, above_4g_mem_size);
+        fw_cfg = pc_memory_init(kernel_filename, kernel_cmdline,
+                                initrd_filename, below_4g_mem_size,
+                                above_4g_mem_size);
     }
 
     /* irq lines */
@@ -116,6 +118,7 @@ static void pc_q35_init(QEMUMachineInitArgs *args)
     q35_host->mch.address_space_io = get_system_io();
     /* pci */
     qdev_init_nofail(DEVICE(q35_host));
+    bochs_meminfo_bios_init(fw_cfg);
     host_bus = q35_host->host.pci.bus;
     /* create ISA bus */
     lpc = pci_create_simple_multifunction(host_bus, PCI_DEVFN(ICH9_LPC_DEV,
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 959b92b..4a29e6e 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -231,6 +231,7 @@ int pvpanic_init(ISABus *bus);
 #define E820_UNUSABLE   5
 
 int e820_add_entry(uint64_t, uint64_t, uint32_t);
+void bochs_meminfo_bios_init(void *fw_cfg);
 
 #define PC_COMPAT_1_5 \
         {\
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 2fb71af..2644faa 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -132,6 +132,7 @@ extern QEMUClock *rtc_clock;
 extern int nb_numa_nodes;
 extern uint64_t node_mem[MAX_NODES];
 extern unsigned long *node_cpumask[MAX_NODES];
+extern int nb_hp_dimms;
 
 #define MAX_OPTION_ROMS 16
 typedef struct QEMUOptionRom {
-- 
1.8.3.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]