[PATCH 86/86] numa: remove deprecated implicit RAM distribution between

qemu-devel

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 86/86] numa: remove deprecated implicit RAM distribution between

From:	Igor Mammedov
Subject:	[PATCH 86/86] numa: remove deprecated implicit RAM distribution between nodes
Date:	Tue, 31 Dec 2019 14:04:10 +0100

Feature has been deprecated since 4.1 (4bb4a273),
remove it.

As result if RAM distribution wasn't specified explicitly,
the machine won't start and CLI should be changed to explicitly
assign RAM to nodes using options:
  -node node,memdev  (5.0 and newer machine types)
  -node node,mem     (4.2 and older machine types)
It's recommended to use "memdev" variant for new virtual machines
and use "mem" only when it's necessary to migrate already existing
virtual machine started with implicit RAM distribution.

Signed-off-by: Igor Mammedov <address@hidden>
---
 include/hw/boards.h   |  3 ---
 include/sysemu/numa.h |  4 ----
 hw/core/machine.c     |  6 -----
 hw/core/numa.c        | 61 +++++----------------------------------------------
 hw/i386/pc_piix.c     |  1 -
 hw/i386/pc_q35.c      |  1 -
 hw/ppc/spapr.c        |  7 ------
 qemu-deprecated.texi  |  8 -------
 qemu-options.hx       | 16 +++++++-------
 9 files changed, 13 insertions(+), 94 deletions(-)

diff --git a/include/hw/boards.h b/include/hw/boards.h
index 9f69883..aadc9af 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -192,12 +192,9 @@ struct MachineClass {
     int minimum_page_bits;
     bool has_hotpluggable_cpus;
     bool ignore_memory_transaction_failures;
-    int numa_mem_align_shift;
     const char **valid_cpu_types;
     strList *allowed_dynamic_sysbus_devices;
     bool auto_enable_numa_with_memhp;
-    void (*numa_auto_assign_ram)(MachineClass *mc, NodeInfo *nodes,
-                                 int nb_nodes, ram_addr_t size);
     bool ignore_boot_device_suffixes;
     bool smbus_no_migration_support;
     bool nvdimm_supported;
diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h
index 21f6a5a..5470238 100644
--- a/include/sysemu/numa.h
+++ b/include/sysemu/numa.h
@@ -43,10 +43,6 @@ void parse_numa_opts(MachineState *ms);
 void numa_complete_configuration(MachineState *ms);
 void query_numa_node_mem(NumaNodeMem node_mem[], MachineState *ms);
 extern QemuOptsList qemu_numa_opts;
-void numa_legacy_auto_assign_ram(MachineClass *mc, NodeInfo *nodes,
-                                 int nb_nodes, ram_addr_t size);
-void numa_default_auto_assign_ram(MachineClass *mc, NodeInfo *nodes,
-                                  int nb_nodes, ram_addr_t size);
 void numa_cpu_pre_plug(const struct CPUArchId *slot, DeviceState *dev,
                        Error **errp);
 bool numa_uses_legacy_mem(void);
diff --git a/hw/core/machine.c b/hw/core/machine.c
index a938052..b1ce8c5 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -715,12 +715,6 @@ static void machine_class_init(ObjectClass *oc, void *data)
     mc->rom_file_has_mr = true;
     mc->smp_parse = smp_parse;
 
-    /* numa node memory size aligned on 8MB by default.
-     * On Linux, each node's border has to be 8MB aligned
-     */
-    mc->numa_mem_align_shift = 23;
-    mc->numa_auto_assign_ram = numa_default_auto_assign_ram;
-
     object_class_property_add_str(oc, "kernel",
         machine_get_kernel, machine_set_kernel, &error_abort);
     object_class_property_set_description(oc, "kernel",
diff --git a/hw/core/numa.c b/hw/core/numa.c
index 96776ff..522e689 100644
--- a/hw/core/numa.c
+++ b/hw/core/numa.c
@@ -330,42 +330,6 @@ static void complete_init_numa_distance(MachineState *ms)
     }
 }
 
-void numa_legacy_auto_assign_ram(MachineClass *mc, NodeInfo *nodes,
-                                 int nb_nodes, ram_addr_t size)
-{
-    int i;
-    uint64_t usedmem = 0;
-
-    /* Align each node according to the alignment
-     * requirements of the machine class
-     */
-
-    for (i = 0; i < nb_nodes - 1; i++) {
-        nodes[i].node_mem = (size / nb_nodes) &
-                            ~((1 << mc->numa_mem_align_shift) - 1);
-        usedmem += nodes[i].node_mem;
-    }
-    nodes[i].node_mem = size - usedmem;
-}
-
-void numa_default_auto_assign_ram(MachineClass *mc, NodeInfo *nodes,
-                                  int nb_nodes, ram_addr_t size)
-{
-    int i;
-    uint64_t usedmem = 0, node_mem;
-    uint64_t granularity = size / nb_nodes;
-    uint64_t propagate = 0;
-
-    for (i = 0; i < nb_nodes - 1; i++) {
-        node_mem = (granularity + propagate) &
-                   ~((1 << mc->numa_mem_align_shift) - 1);
-        propagate = granularity + propagate - node_mem;
-        nodes[i].node_mem = node_mem;
-        usedmem += node_mem;
-    }
-    nodes[i].node_mem = size - usedmem;
-}
-
 static void numa_init_memdev_container(MachineState *ms, MemoryRegion *ram)
 {
     int i;
@@ -435,30 +399,15 @@ void numa_complete_configuration(MachineState *ms)
             ms->numa_state->num_nodes = MAX_NODES;
         }
 
-        /* If no memory size is given for any node, assume the default case
-         * and distribute the available memory equally across all nodes
-         */
-        for (i = 0; i < ms->numa_state->num_nodes; i++) {
-            if (numa_info[i].node_mem != 0) {
-                break;
-            }
-        }
-        if (i == ms->numa_state->num_nodes) {
-            assert(mc->numa_auto_assign_ram);
-            mc->numa_auto_assign_ram(mc, numa_info,
-                                     ms->numa_state->num_nodes, ram_size);
-            if (!qtest_enabled()) {
-                warn_report("Default splitting of RAM between nodes is 
deprecated,"
-                            " Use '-numa node,memdev' to explictly define RAM"
-                            " allocation per node");
-            }
-        }
-
         numa_total = 0;
         for (i = 0; i < ms->numa_state->num_nodes; i++) {
             numa_total += numa_info[i].node_mem;
         }
-        if (numa_total != ram_size) {
+        if (numa_total == 0) {
+            error_report(" Use '-numa node,memdev' to explictly assign RAM"
+                         " to node");
+            exit(EXIT_FAILURE);
+        } else if (numa_total != ram_size) {
             error_report("total memory for NUMA nodes (0x%" PRIx64 ")"
                          " should equal RAM size (0x" RAM_ADDR_FMT ")",
                          numa_total, ram_size);
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index b2805fe..3a1edba 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -533,7 +533,6 @@ static void pc_i440fx_2_9_machine_options(MachineClass *m)
     pc_i440fx_2_10_machine_options(m);
     compat_props_add(m->compat_props, hw_compat_2_9, hw_compat_2_9_len);
     compat_props_add(m->compat_props, pc_compat_2_9, pc_compat_2_9_len);
-    m->numa_auto_assign_ram = numa_legacy_auto_assign_ram;
 }
 
 DEFINE_I440FX_MACHINE(v2_9, "pc-i440fx-2.9", NULL,
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index f15711b..ed6c7be 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -467,7 +467,6 @@ static void pc_q35_2_10_machine_options(MachineClass *m)
     pc_q35_2_11_machine_options(m);
     compat_props_add(m->compat_props, hw_compat_2_10, hw_compat_2_10_len);
     compat_props_add(m->compat_props, pc_compat_2_10, pc_compat_2_10_len);
-    m->numa_auto_assign_ram = numa_legacy_auto_assign_ram;
     m->auto_enable_numa_with_memhp = false;
 }
 
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 6527962..1cc7407 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -4380,11 +4380,6 @@ static void spapr_machine_class_init(ObjectClass *oc, 
void *data)
     xic->ics_resend = spapr_ics_resend;
     xic->icp_get = spapr_icp_get;
     ispc->print_info = spapr_pic_print_info;
-    /* Force NUMA node memory size to be a multiple of
-     * SPAPR_MEMORY_BLOCK_SIZE (256M) since that's the granularity
-     * in which LMBs are represented and hot-added
-     */
-    mc->numa_mem_align_shift = 28;
     mc->auto_enable_numa = true;
 
     smc->default_caps.caps[SPAPR_CAP_HTM] = SPAPR_CAP_OFF;
@@ -4637,7 +4632,6 @@ static void spapr_machine_2_9_class_options(MachineClass 
*mc)
     spapr_machine_2_10_class_options(mc);
     compat_props_add(mc->compat_props, hw_compat_2_9, hw_compat_2_9_len);
     compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
-    mc->numa_auto_assign_ram = numa_legacy_auto_assign_ram;
     smc->pre_2_10_has_unused_icps = true;
     smc->resize_hpt_default = SPAPR_RESIZE_HPT_DISABLED;
 }
@@ -4657,7 +4651,6 @@ static void spapr_machine_2_8_class_options(MachineClass 
*mc)
     spapr_machine_2_9_class_options(mc);
     compat_props_add(mc->compat_props, hw_compat_2_8, hw_compat_2_8_len);
     compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
-    mc->numa_mem_align_shift = 23;
 }
 
 DEFINE_SPAPR_MACHINE(2_8, "2.8", false);
diff --git a/qemu-deprecated.texi b/qemu-deprecated.texi
index d9b07ba..05e6082 100644
--- a/qemu-deprecated.texi
+++ b/qemu-deprecated.texi
@@ -89,14 +89,6 @@ error in the future.
 The @code{-realtime mlock=on|off} argument has been replaced by the
 @code{-overcommit mem-lock=on|off} argument.
 
-@subsection -numa node (without memory specified) (since 4.1)
-
-Splitting RAM by default between NUMA nodes has the same issues as @option{mem}
-parameter described above with the difference that the role of the user plays
-QEMU using implicit generic or board specific splitting rule.
-Use @option{memdev} with @var{memory-backend-ram} backend or @option{mem} (if
-it's supported by used machine type) to define mapping explictly instead.
-
 @subsection RISC-V -bios (since 4.1)
 
 QEMU 4.1 introduced support for the -bios option in QEMU for RISC-V for the
diff --git a/qemu-options.hx b/qemu-options.hx
index 088dd32..4872f11 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -168,14 +168,14 @@ If any on the three values is given, the total number of 
CPUs @var{n} can be omi
 ETEXI
 
 DEF("numa", HAS_ARG, QEMU_OPTION_numa,
-    "-numa node[,mem=size][,cpus=firstcpu[-lastcpu]][,nodeid=node]\n"
-    "-numa node[,memdev=id][,cpus=firstcpu[-lastcpu]][,nodeid=node]\n"
+    "-numa node,mem=size[,cpus=firstcpu[-lastcpu]][,nodeid=node]\n"
+    "-numa node,memdev=id[,cpus=firstcpu[-lastcpu]][,nodeid=node]\n"
     "-numa dist,src=source,dst=destination,val=distance\n"
     "-numa cpu,node-id=node[,socket-id=x][,core-id=y][,thread-id=z]\n",
     QEMU_ARCH_ALL)
 STEXI
-@item -numa 
node[,mem=@var{size}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}]
-@itemx -numa 
node[,memdev=@var{id}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}]
+@item -numa 
node,mem=@var{size}[,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}]
+@itemx -numa 
node,memdev=@var{id}[,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}]
 @itemx -numa dist,src=@var{source},dst=@var{destination},val=@var{distance}
 @itemx -numa 
cpu,node-id=@var{node}[,socket-id=@var{x}][,core-id=@var{y}][,thread-id=@var{z}]
 @findex -numa
@@ -193,7 +193,7 @@ split between them.
 For example, the following option assigns VCPUs 0, 1, 2 and 5 to
 a NUMA node:
 @example
--numa node,cpus=0-2,cpus=5
+-numa node,cpus=0-2,cpus=5,memdev=ram-backend-id
 @end example
 
 @samp{cpu} option is a new alternative to @samp{cpus} option
@@ -210,14 +210,14 @@ For example:
 @example
 -M pc \
 -smp 1,sockets=2,maxcpus=2 \
--numa node,nodeid=0 -numa node,nodeid=1 \
+-m 512M -object memory-backend-ram,id=mem,size=512M \
+-numa node,nodeid=0,memdev=mem -numa node,nodeid=1 \
 -numa cpu,node-id=0,socket-id=0 -numa cpu,node-id=1,socket-id=1
 @end example
 
 Legacy @samp{mem} assigns a given RAM amount to a node (not supported for 5.0
 and newer machine types). @samp{memdev} assigns RAM from a given memory backend
-device to a node. If @samp{mem} and @samp{memdev} are omitted in all nodes, RAM
-is split equally between them.
+device to a node.
 
 @samp{mem} and @samp{memdev} are mutually exclusive. Furthermore,
 if one node uses @samp{memdev}, all of them have to use it.
-- 
2.7.4

[Prev in Thread]

Current Thread

[Next in Thread]

Re: [PATCH 76/86] post conversion default_ram_id cleanup, (continued)
- [PATCH 77/86] exec: cleanup qemu_minrampagesize()/qemu_maxrampagesize(), Igor Mammedov, 2019/12/31
- [PATCH 78/86] exec: drop bogus mem_path from qemu_ram_alloc_from_fd(), Igor Mammedov, 2019/12/31
- [PATCH 80/86] hostmem: introduce "prealloc-threads" property, Igor Mammedov, 2019/12/31
- [PATCH 83/86] tests:numa-test: make top level args dynamic and g_autofree(cli) cleanups, Igor Mammedov, 2019/12/31
- [PATCH 79/86] make mem_path local variable, Igor Mammedov, 2019/12/31
- [PATCH 85/86] numa: make exit() usage consistent, Igor Mammedov, 2019/12/31
- [PATCH 84/86] tests:numa-test: use explicit memdev to specify node RAM, Igor Mammedov, 2019/12/31
- [PATCH 82/86] numa: forbid '-numa node, mem' for 5.0 and newer machine types, Igor Mammedov, 2019/12/31
- [PATCH 81/86] hostmem: fix strict bind policy, Igor Mammedov, 2019/12/31
- [PATCH 86/86] numa: remove deprecated implicit RAM distribution between nodes, Igor Mammedov <=
- [PATCH 17/86] arm:imx25_pdk: drop RAM size fixup, Igor Mammedov, 2019/12/31
- [PATCH 38/86] arm:xlnx-versal-virt: use memdev for RAM, Igor Mammedov, 2019/12/31
  - Re: [PATCH 38/86] arm:xlnx-versal-virt: use memdev for RAM, Philippe Mathieu-Daudé, 2019/12/31
- Re: [PATCH 00/86] refactor main RAM allocation to use hostmem backend, Philippe Mathieu-Daudé, 2019/12/31
  - Re: [PATCH 00/86] refactor main RAM allocation to use hostmem backend, Igor Mammedov, 2019/12/31

Prev by Date: [PATCH 81/86] hostmem: fix strict bind policy
Next by Date: [PATCH 17/86] arm:imx25_pdk: drop RAM size fixup
Previous by thread: [PATCH 81/86] hostmem: fix strict bind policy
Next by thread: [PATCH 17/86] arm:imx25_pdk: drop RAM size fixup
Index(es):
- Date
- Thread