qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH 1/4] added -numa cmdline parameter parser


From: Andre Przywara
Subject: [Qemu-devel] [PATCH 1/4] added -numa cmdline parameter parser
Date: Tue, 31 Mar 2009 15:28:54 +0200

From: Andre Przywara <address@hidden>


Signed-off-by: Andre Przywara <address@hidden>
---
 qemu-options.hx |    8 ++++
 sysemu.h        |    7 +++-
 vl.c            |  114 +++++++++++++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 124 insertions(+), 5 deletions(-)

diff --git a/qemu-options.hx b/qemu-options.hx
index 6c58e2a..f3f1389 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -40,6 +40,14 @@ CPUs are supported. On Sparc32 target, Linux limits the 
number of usable CPUs
 to 4.
 ETEXI
 
+DEF("numa", HAS_ARG, QEMU_OPTION_numa,
+    "-numa node[,mem=size][,cpus=cpu[-cpu]][,nodeid=node]\n")
+STEXI
address@hidden -numa @var{opts}
+Simulate a multi node NUMA system. If mem and cpus are omitted, resources
+are split equally.
+ETEXI
+
 DEF("fda", HAS_ARG, QEMU_OPTION_fda,
     "-fda/-fdb file  use 'file' as floppy disk 0/1 image\n")
 DEF("fdb", HAS_ARG, QEMU_OPTION_fdb, "")
diff --git a/sysemu.h b/sysemu.h
index 3eab34b..b83a66c 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -108,6 +108,11 @@ extern const char *bootp_filename;
 extern int kqemu_allowed;
 #endif
 
+#define MAX_NODES 64
+extern int nb_numa_nodes;
+extern uint64_t node_mem[MAX_NODES];
+extern uint64_t node_cpumask[MAX_NODES];
+
 #define MAX_OPTION_ROMS 16
 extern const char *option_rom[MAX_OPTION_ROMS];
 extern int nb_option_roms;
@@ -248,7 +253,7 @@ void do_usb_add(Monitor *mon, const char *devname);
 void do_usb_del(Monitor *mon, const char *devname);
 void usb_info(Monitor *mon);
 
-const char *get_opt_name(char *buf, int buf_size, const char *p);
+const char *get_opt_name(char *buf, int buf_size, const char *p, char delim);
 const char *get_opt_value(char *buf, int buf_size, const char *p);
 int get_param_value(char *buf, int buf_size,
                     const char *tag, const char *str);
diff --git a/vl.c b/vl.c
index 5e6c621..3f9c713 100644
--- a/vl.c
+++ b/vl.c
@@ -261,6 +261,10 @@ const char *prom_envs[MAX_PROM_ENVS];
 int nb_drives_opt;
 struct drive_opt drives_opt[MAX_DRIVES];
 
+int nb_numa_nodes;
+uint64_t node_mem[MAX_NODES];
+uint64_t node_cpumask[MAX_NODES];
+
 static CPUState *cur_cpu;
 static CPUState *next_cpu;
 static int event_pending = 1;
@@ -1860,12 +1864,12 @@ static int socket_init(void)
 }
 #endif
 
-const char *get_opt_name(char *buf, int buf_size, const char *p)
+const char *get_opt_name(char *buf, int buf_size, const char *p, char delim)
 {
     char *q;
 
     q = buf;
-    while (*p != '\0' && *p != '=') {
+    while (*p != '\0' && *p != delim) {
         if (q && (q - buf) < buf_size - 1)
             *q++ = *p;
         p++;
@@ -1905,7 +1909,7 @@ int get_param_value(char *buf, int buf_size,
 
     p = str;
     for(;;) {
-        p = get_opt_name(option, sizeof(option), p);
+        p = get_opt_name(option, sizeof(option), p, '=');
         if (*p != '=')
             break;
         p++;
@@ -1930,7 +1934,7 @@ int check_params(char *buf, int buf_size,
 
     p = str;
     for(;;) {
-        p = get_opt_name(buf, buf_size, p);
+        p = get_opt_name(buf, buf_size, p, '=');
         if (*p != '=')
             return -1;
         p++;
@@ -2623,6 +2627,53 @@ int drive_init(struct drive_opt *arg, int snapshot, void 
*opaque)
     return drives_table_idx;
 }
 
+static void numa_add(const char* optarg)
+{
+char option[128];
+char *endptr;
+unsigned long long value, endvalue;
+int nodenr;
+
+    optarg = get_opt_name(option, 128, optarg, ',') + 1;
+    if (!strcmp(option, "node")) {
+        if (get_param_value(option, 128, "nodeid", optarg) == 0) {
+            nodenr = nb_numa_nodes;
+        } else {
+            nodenr = strtoull(option, NULL, 10);
+        }
+
+        if (get_param_value(option, 128, "mem", optarg) == 0) {
+            node_mem[nodenr] = 0;
+        } else
+        {
+            value = strtoull(option, &endptr, 0);
+            switch (*endptr) {
+            case 0: case 'M': case 'm':
+                value <<= 20;
+                break;
+            case 'G': case 'g':
+                value <<= 30;
+                break;
+            }
+            node_mem[nodenr] = value;
+        }
+        if (get_param_value(option, 128, "cpus", optarg) == 0) {
+            node_cpumask[nodenr] = 0;
+        } else {
+            value = strtoull(option, &endptr, 10);
+            if (*endptr == '-') {
+                endvalue = strtoull(endptr+1, &endptr, 10);
+                value = (1 << (endvalue + 1)) - (1 << value);
+            } else {
+                value = 1 << value;
+            }
+            node_cpumask[nodenr] = value;
+        }
+        nb_numa_nodes++;
+    }
+    return;
+}
+
 /***********************************************************/
 /* USB devices */
 
@@ -4337,12 +4388,18 @@ int main(int argc, char **argv, char **envp)
         virtio_consoles[i] = NULL;
     virtio_console_index = 0;
 
+    for (i = 0; i < MAX_NODES; i++) {
+        node_cpumask[i] = 0;
+        node_mem[i] = 0;
+    }
+
     usb_devices_index = 0;
 
     nb_net_clients = 0;
     nb_bt_opts = 0;
     nb_drives = 0;
     nb_drives_opt = 0;
+    nb_numa_nodes = 0;
     hda_index = -1;
 
     nb_nics = 0;
@@ -4492,6 +4549,13 @@ int main(int argc, char **argv, char **envp)
                                     ",trans=none" : "");
                 }
                 break;
+            case QEMU_OPTION_numa:
+                if (nb_numa_nodes >= MAX_NODES) {
+                    fprintf(stderr, "qemu: too many NUMA nodes\n");
+                    exit(1);
+                }
+                numa_add(optarg);
+                break;
             case QEMU_OPTION_nographic:
                 nographic = 1;
                 break;
@@ -5192,6 +5256,48 @@ int main(int argc, char **argv, char **envp)
         }
     }
 
+    if (nb_numa_nodes > 0) {
+        int i;
+
+        if (nb_numa_nodes > smp_cpus) {
+            nb_numa_nodes = smp_cpus;
+        }
+
+        /* If no memory size if given for any node, assume the default case
+         * and distribute the available memory equally across all nodes
+         */
+        for (i = 0; i < nb_numa_nodes; i++) {
+            if (node_mem[i] != 0)
+                break;
+        }
+        if (i == nb_numa_nodes) {
+            uint64_t usedmem = 0;
+
+            /* On Linux, the each node's border has to be 8MB aligned,
+             * the final node gets the rest.
+             */
+            for (i = 0; i < nb_numa_nodes - 1; i++) {
+                node_mem[i] = (ram_size / nb_numa_nodes) & ~((1 << 23UL) - 1);
+                usedmem += node_mem[i];
+            }
+            node_mem[i] = ram_size - usedmem;
+        }
+
+        for (i = 0; i < nb_numa_nodes; i++) {
+            if (node_cpumask[i] != 0)
+                break;
+        }
+        /* assigning the VCPUs round-robin is easier to implement, guest OSes
+         * must cope with this anyway, because there are BIOSes out there in
+         * real machines which also use this scheme.
+         */
+        if (i == nb_numa_nodes) {
+            for (i = 0; i < smp_cpus; i++) {
+                node_cpumask[i % nb_numa_nodes] |= 1<<i;
+            }
+        }
+    }
+
     if (kvm_enabled()) {
         int ret;
 
-- 
1.6.1.3






reply via email to

[Prev in Thread] Current Thread [Next in Thread]