qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH 1/1] tcg: add perfmap and jitdump


From: Alex Bennée
Subject: Re: [PATCH 1/1] tcg: add perfmap and jitdump
Date: Mon, 07 Nov 2022 11:04:16 +0000
User-agent: mu4e 1.9.1; emacs 28.2.50

Ilya Leoshkevich <iii@linux.ibm.com> writes:

> Add ability to dump /tmp/perf-<pid>.map and jit-<pid>.dump.
> The first one allows the perf tool to map samples to each individual
> translation block. The second one adds the ability to resolve symbol
> names, line numbers and inspect JITed code.
>
> Example of use:
>
>     perf record qemu-x86_64 -perfmap ./a.out
>     perf report
>
> or
>
>     perf record -k 1 qemu-x86_64 -jitdump ./a.out
>     perf inject -j -i perf.data -o perf.data.jitted
>     perf report -i perf.data.jitted
>
> Co-developed-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
> Co-developed-by: Alex Bennée <alex.bennee@linaro.org>
> Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
> ---
>  accel/tcg/debuginfo.c     | 108 +++++++++++++
>  accel/tcg/debuginfo.h     |  54 +++++++
>  accel/tcg/meson.build     |   2 +
>  accel/tcg/perf.c          | 333 ++++++++++++++++++++++++++++++++++++++
>  accel/tcg/perf.h          |  28 ++++
>  accel/tcg/translate-all.c |   3 +
>  docs/devel/tcg.rst        |  20 +++
>  linux-user/elfload.c      |   3 +
>  linux-user/exit.c         |   2 +
>  linux-user/main.c         |  15 ++
>  linux-user/meson.build    |   1 +
>  meson.build               |   8 +
>  qemu-options.hx           |  20 +++
>  softmmu/vl.c              |  11 ++
>  tcg/tcg.c                 |   2 +
>  15 files changed, 610 insertions(+)
>  create mode 100644 accel/tcg/debuginfo.c
>  create mode 100644 accel/tcg/debuginfo.h
>  create mode 100644 accel/tcg/perf.c
>  create mode 100644 accel/tcg/perf.h
>
> diff --git a/accel/tcg/debuginfo.c b/accel/tcg/debuginfo.c
> new file mode 100644
> index 0000000000..904eb23103
> --- /dev/null
> +++ b/accel/tcg/debuginfo.c
> @@ -0,0 +1,108 @@
> +/*
> + * Debug information support.
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +#include "qemu/osdep.h"
> +
> +#include <elfutils/libdwfl.h>
> +
> +#include "debuginfo.h"
> +
> +static QemuMutex lock;
> +static Dwfl *dwfl;
> +static const Dwfl_Callbacks dwfl_callbacks = {
> +    .find_elf = NULL,
> +    .find_debuginfo = dwfl_standard_find_debuginfo,
> +    .section_address = NULL,
> +    .debuginfo_path = NULL,
> +};
> +
> +__attribute__((constructor))
> +static void debuginfo_init(void)
> +{
> +    qemu_mutex_init(&lock);
> +}
> +
> +bool debuginfo_report_elf(const char *image_name, int image_fd,
> +                          target_ulong load_bias)
> +{
> +    qemu_mutex_lock(&lock);

You can wrap this up with a QEMU_LOCK_GUARD(&lock) { and avoid having to
catch all your exit cases.

> +
> +    if (dwfl == NULL) {
> +        dwfl = dwfl_begin(&dwfl_callbacks);
> +    } else {
> +        dwfl_report_begin_add(dwfl);
> +    }
> +
> +    if (dwfl == NULL) {
> +        qemu_mutex_unlock(&lock);
> +        return false;
> +    }
> +
> +    dwfl_report_elf(dwfl, image_name, image_name, image_fd, load_bias, true);
> +    dwfl_report_end(dwfl, NULL, NULL);
> +    qemu_mutex_unlock(&lock);
> +    return true;
> +}
> +
> +bool debuginfo_get_symbol(target_ulong address,
> +                          const char **symbol, target_ulong *offset)
> +{
> +    Dwfl_Module *dwfl_module;
> +    GElf_Off dwfl_offset;
> +    GElf_Sym dwfl_sym;
> +
> +    qemu_mutex_lock(&lock);
> +
> +    if (dwfl == NULL) {
> +        qemu_mutex_unlock(&lock);
> +        return false;
> +    }
> +
> +    dwfl_module = dwfl_addrmodule(dwfl, address);
> +    if (dwfl_module == NULL) {
> +        qemu_mutex_unlock(&lock);
> +        return false;
> +    }
> +
> +    *symbol = dwfl_module_addrinfo(dwfl_module, address, &dwfl_offset,
> +                                   &dwfl_sym, NULL, NULL, NULL);
> +    if (*symbol == NULL) {
> +        qemu_mutex_unlock(&lock);
> +        return false;
> +    }
> +    *offset = dwfl_offset;
> +    qemu_mutex_unlock(&lock);
> +    return true;
> +}
> +
> +bool debuginfo_get_line(target_ulong address,
> +                        const char **file, int *line)
> +{
> +    Dwfl_Module *dwfl_module;
> +    Dwfl_Line *dwfl_line;
> +
> +    qemu_mutex_lock(&lock);

ditto.

> +
> +    if (dwfl == NULL) {
> +        qemu_mutex_unlock(&lock);
> +        return false;
> +    }
> +
> +    dwfl_module = dwfl_addrmodule(dwfl, address);
> +    if (dwfl_module == NULL) {
> +        qemu_mutex_unlock(&lock);
> +        return false;
> +    }
> +
> +    dwfl_line = dwfl_module_getsrc(dwfl_module, address);
> +    if (dwfl_line == NULL) {
> +        qemu_mutex_unlock(&lock);
> +        return false;
> +    }
> +    *file = dwfl_lineinfo(dwfl_line, NULL, line, 0, NULL, NULL);
> +    qemu_mutex_unlock(&lock);
> +    return true;
> +}
> diff --git a/accel/tcg/debuginfo.h b/accel/tcg/debuginfo.h
> new file mode 100644
> index 0000000000..f4f22aa786
> --- /dev/null
> +++ b/accel/tcg/debuginfo.h
> @@ -0,0 +1,54 @@
> +/*
> + * Debug information support.
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +#ifndef ACCEL_TCG_DEBUGINFO_H
> +#define ACCEL_TCG_DEBUGINFO_H
> +
> +#include "exec/cpu-defs.h"
> +
> +#ifdef CONFIG_LIBDW
> +/*
> + * Load debuginfo for the specified guest ELF image.
> + * Return true on success, false on failure.
> + */
> +bool debuginfo_report_elf(const char *image_name, int image_fd,
> +                          target_ulong load_bias);
> +
> +/*
> + * Find a symbol name associated with the specified guest PC.
> + * Return true on success, false if there is no associated symbol.
> + */
> +bool debuginfo_get_symbol(target_ulong address,
> +                          const char **symbol, target_ulong *offset);
> +
> +/*
> + * Find a line number associated with the specified guest PC.
> + * Return true on success, false if there is no associated line number.
> + */
> +bool debuginfo_get_line(target_ulong address,
> +                        const char **file, int *line);
> +#else
> +static inline bool debuginfo_report_elf(const char *image_name, int image_fd,
> +                                        target_ulong load_bias)
> +{
> +    return false;
> +}
> +
> +static inline bool debuginfo_get_symbol(target_ulong address,
> +                                        const char **symbol,
> +                                        target_ulong *offset)
> +{
> +    return false;
> +}
> +
> +static inline bool debuginfo_get_line(target_ulong address,
> +                                      const char **file, int *line)
> +{
> +    return false;
> +}
> +#endif
> +
> +#endif
> diff --git a/accel/tcg/meson.build b/accel/tcg/meson.build
> index 7a0a79d731..e206e3471b 100644
> --- a/accel/tcg/meson.build
> +++ b/accel/tcg/meson.build
> @@ -1,5 +1,6 @@
>  tcg_ss = ss.source_set()
>  tcg_ss.add(files(
> +  'perf.c',
>    'tcg-all.c',
>    'cpu-exec-common.c',
>    'cpu-exec.c',
> @@ -11,6 +12,7 @@ tcg_ss.add(files(
>  tcg_ss.add(when: 'CONFIG_USER_ONLY', if_true: files('user-exec.c'))
>  tcg_ss.add(when: 'CONFIG_SOFTMMU', if_false: files('user-exec-stub.c'))
>  tcg_ss.add(when: 'CONFIG_PLUGIN', if_true: [files('plugin-gen.c')])
> +tcg_ss.add(when: libdw, if_true: files('debuginfo.c'))
>  specific_ss.add_all(when: 'CONFIG_TCG', if_true: tcg_ss)
>  
>  specific_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_TCG'], if_true: files(
> diff --git a/accel/tcg/perf.c b/accel/tcg/perf.c
> new file mode 100644
> index 0000000000..80b5a1bf8b
> --- /dev/null
> +++ b/accel/tcg/perf.c
> @@ -0,0 +1,333 @@
> +/*
> + * Linux perf perf-<pid>.map and jit-<pid>.dump integration.
> + *
> + * The jitdump spec can be found at [1].
> + *
> + * [1] 
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/tools/perf/Documentation/jitdump-specification.txt
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +#include "qemu/osdep.h"
> +#include "elf.h"
> +#include "qemu/timer.h"
> +#include "tcg/tcg.h"
> +
> +#include "debuginfo.h"
> +#include "perf.h"
> +
> +static FILE *safe_fopen_w(const char *path)
> +{
> +    int saved_errno;
> +    FILE *f;
> +    int fd;
> +
> +    /* Delete the old file, if any. */
> +    unlink(path);
> +
> +    /* Avoid symlink attacks by using O_CREAT | O_EXCL. */
> +    fd = open(path, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
> +    if (fd == -1) {
> +        return NULL;
> +    }
> +
> +    /* Convert fd to FILE*. */
> +    f = fdopen(fd, "w");
> +    if (f == NULL) {
> +        saved_errno = errno;
> +        close(fd);
> +        errno = saved_errno;
> +        return NULL;
> +    }
> +
> +    return f;
> +}
> +
> +static FILE *perfmap;
> +
> +void perf_enable_perfmap(void)
> +{
> +    char map_file[32];
> +
> +    snprintf(map_file, sizeof(map_file), "/tmp/perf-%d.map", getpid());
> +    perfmap = safe_fopen_w(map_file);
> +    if (perfmap == NULL) {
> +        warn_report("Could not open %s: %s, proceeding without perfmap",
> +                     map_file, strerror(errno));
> +    }
> +}
> +
> +static FILE *jitdump;
> +
> +#define JITHEADER_MAGIC 0x4A695444
> +#define JITHEADER_VERSION 1
> +
> +struct jitheader {
> +    uint32_t magic;
> +    uint32_t version;
> +    uint32_t total_size;
> +    uint32_t elf_mach;
> +    uint32_t pad1;
> +    uint32_t pid;
> +    uint64_t timestamp;
> +    uint64_t flags;
> +};
> +
> +enum jit_record_type {
> +    JIT_CODE_LOAD = 0,
> +    JIT_CODE_DEBUG_INFO = 2,
> +};
> +
> +struct jr_prefix {
> +    uint32_t id;
> +    uint32_t total_size;
> +    uint64_t timestamp;
> +};
> +
> +struct jr_code_load {
> +    struct jr_prefix p;
> +
> +    uint32_t pid;
> +    uint32_t tid;
> +    uint64_t vma;
> +    uint64_t code_addr;
> +    uint64_t code_size;
> +    uint64_t code_index;
> +};
> +
> +struct debug_entry {
> +    uint64_t addr;
> +    int lineno;
> +    int discrim;
> +    const char name[];
> +};
> +
> +struct jr_code_debug_info {
> +    struct jr_prefix p;
> +
> +    uint64_t code_addr;
> +    uint64_t nr_entry;
> +    struct debug_entry entries[];
> +};
> +
> +static uint32_t get_e_machine(void)
> +{
> +    Elf64_Ehdr elf_header;
> +    FILE *exe;
> +    size_t n;
> +
> +    QEMU_BUILD_BUG_ON(offsetof(Elf32_Ehdr, e_machine) !=
> +                      offsetof(Elf64_Ehdr, e_machine));
> +
> +    exe = fopen("/proc/self/exe", "r");
> +    if (exe == NULL) {
> +        return EM_NONE;
> +    }
> +
> +    n = fread(&elf_header, sizeof(elf_header), 1, exe);
> +    fclose(exe);
> +    if (n != 1) {
> +        return EM_NONE;
> +    }
> +
> +    return elf_header.e_machine;
> +}
> +
> +void perf_enable_jitdump(void)
> +{
> +    struct jitheader header;
> +    char jitdump_file[32];
> +#ifdef CONFIG_LINUX
> +    void *perf_marker;
> +#endif
> +
> +    if (!use_rt_clock) {
> +        warn_report("CLOCK_MONOTONIC is not available, proceeding without 
> jitdump");
> +        return;
> +    }
> +
> +    snprintf(jitdump_file, sizeof(jitdump_file), "jit-%d.dump", getpid());
> +    jitdump = safe_fopen_w(jitdump_file);
> +    if (jitdump == NULL) {
> +        warn_report("Could not open %s: %s, proceeding without jitdump",
> +                     jitdump_file, strerror(errno));
> +        return;
> +    }
> +
> +#ifdef CONFIG_LINUX
> +    /*
> +     * `perf inject` will see that the mapped file name in the corresponding
> +     * PERF_RECORD_MMAP or PERF_RECORD_MMAP2 event is of the form jit-%d.dump
> +     * and will process it as a jitdump file.
> +     */
> +    perf_marker = mmap(NULL, qemu_real_host_page_size(), PROT_READ | 
> PROT_EXEC,
> +                       MAP_PRIVATE, fileno(jitdump), 0);
> +    if (perf_marker == MAP_FAILED) {
> +        warn_report("Could not map %s: %s, proceeding without jitdump",
> +                     jitdump_file, strerror(errno));
> +        fclose(jitdump);
> +        jitdump = NULL;
> +        return;
> +    }
> +#endif
> +
> +    header.magic = JITHEADER_MAGIC;
> +    header.version = JITHEADER_VERSION;
> +    header.total_size = sizeof(header);
> +    header.elf_mach = get_e_machine();
> +    header.pad1 = 0;
> +    header.pid = getpid();
> +    header.timestamp = get_clock();
> +    header.flags = 0;
> +    fwrite(&header, sizeof(header), 1, jitdump);
> +}
> +
> +void perf_report_prologue(void *start, size_t size)
> +{
> +    if (perfmap) {
> +        fprintf(perfmap, "%"PRIxPTR" %zx tcg-prologue-buffer\n",
> +                (uintptr_t)start, size);
> +    }
> +}
> +
> +/*
> + * Append a single line mapping to a JIT_CODE_DEBUG_INFO jitdump entry.
> + * Return 1 on success, 0 if there is no line number information for 
> guest_pc.
> + */
> +static int append_debug_entry(GArray *raw, void *host_pc,
> +                              target_ulong guest_pc)
> +{
> +    struct debug_entry ent;
> +    const char *file;
> +    int line;
> +
> +    if (!debuginfo_get_line(guest_pc, &file, &line)) {
> +        return 0;
> +    }
> +
> +    ent.addr = (uint64_t)host_pc;
> +    ent.lineno = line;
> +    ent.discrim = 0;
> +    g_array_append_vals(raw, &ent, sizeof(ent));
> +    g_array_append_vals(raw, file, strlen(file) + 1);
> +    return 1;
> +}
> +
> +/* Write a JIT_CODE_DEBUG_INFO jitdump entry. */
> +static void write_jr_code_debug_info(void *start, size_t size, int icount)
> +{
> +    GArray *raw = g_array_new(false, false, 1);
> +    struct jr_code_debug_info rec;
> +    struct debug_entry ent;
> +    target_ulong guest_pc;
> +    void *host_pc;
> +    int insn;
> +
> +    /* Reserve space for the header. */
> +    g_array_set_size(raw, sizeof(rec));
> +
> +    /* Create debug entries. */
> +    rec.nr_entry = 0;
> +    for (insn = 0; insn < icount; insn++) {
> +        host_pc = start;
> +        if (insn != 0) {
> +            host_pc += tcg_ctx->gen_insn_end_off[insn - 1];
> +        }
> +        guest_pc = tcg_ctx->gen_insn_data[insn][0];
> +        rec.nr_entry += append_debug_entry(raw, host_pc, guest_pc);
> +    }
> +
> +    /* Trailing debug_entry. */
> +    ent.addr = (uint64_t)start + size;
> +    ent.lineno = 0;
> +    ent.discrim = 0;
> +    g_array_append_vals(raw, &ent, sizeof(ent));
> +    g_array_append_vals(raw, "", 1);
> +    rec.nr_entry++;
> +
> +    /* Create header. */
> +    rec.p.id = JIT_CODE_DEBUG_INFO;
> +    rec.p.total_size = raw->len;
> +    rec.p.timestamp = get_clock();
> +    rec.code_addr = (uint64_t)start;
> +    memcpy(raw->data, &rec, sizeof(rec));
> +
> +    /* Flush. */
> +    fwrite(raw->data, raw->len, 1, jitdump);
> +    g_array_unref(raw);
> +}
> +
> +/* Write a JIT_CODE_LOAD jitdump entry. */
> +static void write_jr_code_load(void *start, size_t size,
> +                               const char *symbol, const char *suffix)
> +{
> +    static uint64_t code_index;
> +    struct jr_code_load rec;
> +    size_t suffix_size;
> +    size_t name_size;
> +
> +    name_size = strlen(symbol);
> +    suffix_size = strlen(suffix) + 1;
> +    rec.p.id = JIT_CODE_LOAD;
> +    rec.p.total_size = sizeof(rec) + name_size + suffix_size + size;
> +    rec.p.timestamp = get_clock();
> +    rec.pid = getpid();
> +    rec.tid = gettid();
> +    rec.vma = (uint64_t)start;
> +    rec.code_addr = (uint64_t)start;
> +    rec.code_size = size;
> +    rec.code_index = code_index++;
> +    fwrite(&rec, sizeof(rec), 1, jitdump);
> +    fwrite(symbol, name_size, 1, jitdump);
> +    fwrite(suffix, suffix_size, 1, jitdump);
> +    fwrite(start, size, 1, jitdump);
> +}
> +
> +void perf_report_code(void *start, size_t size, int icount, uint64_t pc)
> +{
> +    char suffix[32] = "";
> +    char symbol_buf[32];
> +    const char *symbol;
> +    target_ulong offset;
> +
> +    /* Symbolize guest PC. */
> +    if (perfmap || jitdump) {
> +        if (!debuginfo_get_symbol(pc, &symbol, &offset)) {
> +            snprintf(symbol_buf, sizeof(symbol_buf), "subject-%"PRIx64, pc);
> +            symbol = symbol_buf;
> +            offset = 0;
> +        }
> +        if (offset != 0) {
> +            snprintf(suffix, sizeof(suffix), "+0x%"PRIx64, (uint64_t)offset);
> +        }
> +    }
> +
> +    /* Emit a perfmap entry if needed. */
> +    if (perfmap) {
> +        flockfile(perfmap);
> +        fprintf(perfmap, "%"PRIxPTR" %zx %s%s\n",
> +                (uintptr_t)start, size, symbol, suffix);
> +        funlockfile(perfmap);
> +    }
> +
> +    /* Emit jitdump entries if needed. */
> +    if (jitdump) {
> +        flockfile(jitdump);
> +        write_jr_code_debug_info(start, size, icount);
> +        write_jr_code_load(start, size, symbol, suffix);
> +        funlockfile(jitdump);
> +    }
> +}
> +
> +void perf_exit(void)
> +{
> +    if (perfmap) {
> +        fclose(perfmap);
> +        perfmap = NULL;
> +    }
> +
> +    if (jitdump) {
> +        fclose(jitdump);
> +        jitdump = NULL;
> +    }
> +}
> diff --git a/accel/tcg/perf.h b/accel/tcg/perf.h
> new file mode 100644
> index 0000000000..df54be9ccd
> --- /dev/null
> +++ b/accel/tcg/perf.h
> @@ -0,0 +1,28 @@
> +/*
> + * Linux perf perf-<pid>.map and jit-<pid>.dump integration.
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +#ifndef ACCEL_TCG_PERF_H
> +#define ACCEL_TCG_PERF_H
> +
> +#include <stddef.h>
> +#include <stdint.h>
> +
> +/* Start writing perf-<pid>.map. */
> +void perf_enable_perfmap(void);
> +
> +/* Start writing jit-<pid>.dump. */
> +void perf_enable_jitdump(void);
> +
> +/* Add information about TCG prologue to profiler maps. */
> +void perf_report_prologue(void *start, size_t size);
> +
> +/* Add information about JITted guest code to profiler maps. */
> +void perf_report_code(void *start, size_t size, int icount, uint64_t pc);
> +
> +/* Stop writing perf-<pid>.map and/or jit-<pid>.dump. */
> +void perf_exit(void);
> +
> +#endif
> diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
> index 4ed75a13e1..b9e8d8066f 100644
> --- a/accel/tcg/translate-all.c
> +++ b/accel/tcg/translate-all.c
> @@ -62,6 +62,7 @@
>  #include "tb-hash.h"
>  #include "tb-context.h"
>  #include "internal.h"
> +#include "perf.h"
>  
>  /* #define DEBUG_TB_INVALIDATE */
>  /* #define DEBUG_TB_FLUSH */
> @@ -1492,6 +1493,8 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
>      }
>      tb->tc.size = gen_code_size;
>  
> +    perf_report_code(gen_code_buf, gen_code_size, tb->icount, tb->pc);
> +

I think the recent code re-factoring means this needs updating. If its
the guest pc I think that is already in a local variable.

>  #ifdef CONFIG_PROFILER
>      qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti);
>      qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size);
> diff --git a/docs/devel/tcg.rst b/docs/devel/tcg.rst
> index a65fb7b1c4..63e42b0426 100644
> --- a/docs/devel/tcg.rst
> +++ b/docs/devel/tcg.rst
> @@ -188,3 +188,23 @@ memory areas instead calls out to C code for device 
> emulation.
>  Finally, the MMU helps tracking dirty pages and pages pointed to by
>  translation blocks.
>  
> +Profiling JITted code
> +---------------------
> +
> +The Linux ``perf`` tool will treat all JITted code as a single block as
> +unlike the main code it can't use debug information to link individual
> +program counter samples with larger functions. To overcome this
> +limitation you can use the ``-perfmap`` or the ``-jitdump`` option to 
> generate
> +map files. ``-perfmap`` is lightweight and produces only guest-host mappings.
> +``-jitdump`` additionally saves JITed code and guest debug information (if
> +available); its output needs to be integrated with the ``perf.data`` file
> +before the final report can be viewed.

I think this needs to be a bit clearer. Does jitdump only make sense if
the guest has debug information. Running:

  perf record -k 1 ./qemu-system-aarch64 -jitdump -monitor none \
    -display none -chardev stdio,id=output  \
    -M virt -cpu max -display none \
    -semihosting-config enable=on,target=native,chardev=output \
    -kernel tests/tcg/aarch64-softmmu/memory
  perf inject -j -i perf.data -o perf.data.jitted
  perf report -i perf.data.jitted

gives me pretty much the same report as the -perfmap. I think this is
because we only properly look at the guest elf data for linux-user?

> +
> +.. code::
> +
> +  perf record $QEMU -perfmap $REMAINING_ARGS
> +  perf report
> +
> +  perf record -k 1 $QEMU -jitdump $REMAINING_ARGS
> +  perf inject -j -i perf.data -o perf.data.jitted
> +  perf report -i perf.data.jitted
> diff --git a/linux-user/elfload.c b/linux-user/elfload.c
> index 20894b633f..5928c14dfc 100644
> --- a/linux-user/elfload.c
> +++ b/linux-user/elfload.c
> @@ -19,6 +19,7 @@
>  #include "qemu/selfmap.h"
>  #include "qapi/error.h"
>  #include "target_signal.h"
> +#include "accel/tcg/debuginfo.h"
>  
>  #ifdef _ARCH_PPC64
>  #undef ARCH_DLINFO
> @@ -3261,6 +3262,8 @@ static void load_elf_image(const char *image_name, int 
> image_fd,
>          load_symbols(ehdr, image_fd, load_bias);
>      }
>  
> +    debuginfo_report_elf(image_name, image_fd, load_bias);
> +
>      mmap_unlock();
>  
>      close(image_fd);
> diff --git a/linux-user/exit.c b/linux-user/exit.c
> index fa6ef0b9b4..607b6da9fc 100644
> --- a/linux-user/exit.c
> +++ b/linux-user/exit.c
> @@ -17,6 +17,7 @@
>   *  along with this program; if not, see <http://www.gnu.org/licenses/>.
>   */
>  #include "qemu/osdep.h"
> +#include "accel/tcg/perf.h"
>  #include "exec/gdbstub.h"
>  #include "qemu.h"
>  #include "user-internals.h"
> @@ -38,4 +39,5 @@ void preexit_cleanup(CPUArchState *env, int code)
>  #endif
>          gdb_exit(code);
>          qemu_plugin_user_exit();
> +        perf_exit();
>  }
> diff --git a/linux-user/main.c b/linux-user/main.c
> index a17fed045b..4290651c3c 100644
> --- a/linux-user/main.c
> +++ b/linux-user/main.c
> @@ -53,6 +53,7 @@
>  #include "signal-common.h"
>  #include "loader.h"
>  #include "user-mmap.h"
> +#include "accel/tcg/perf.h"
>  
>  #ifdef CONFIG_SEMIHOSTING
>  #include "semihosting/semihost.h"
> @@ -423,6 +424,16 @@ static void handle_arg_abi_call0(const char *arg)
>  }
>  #endif
>  
> +static void handle_arg_perfmap(const char *arg)
> +{
> +    perf_enable_perfmap();
> +}
> +
> +static void handle_arg_jitdump(const char *arg)
> +{
> +    perf_enable_jitdump();
> +}
> +
>  static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins);
>  
>  #ifdef CONFIG_PLUGIN
> @@ -493,6 +504,10 @@ static const struct qemu_argument arg_table[] = {
>      {"xtensa-abi-call0", "QEMU_XTENSA_ABI_CALL0", false, 
> handle_arg_abi_call0,
>       "",           "assume CALL0 Xtensa ABI"},
>  #endif
> +    {"perfmap",    "QEMU_PERFMAP",     false, handle_arg_perfmap,
> +     "",           "Generate a /tmp/perf-${pid}.map file for perf"},
> +    {"jitdump",    "QEMU_JITDUMP",     false, handle_arg_jitdump,
> +     "",           "Generate a jit-${pid}.dump file for perf"},
>      {NULL, NULL, false, NULL, NULL, NULL}
>  };
>  
> diff --git a/linux-user/meson.build b/linux-user/meson.build
> index de4320af05..7171dc60be 100644
> --- a/linux-user/meson.build
> +++ b/linux-user/meson.build
> @@ -22,6 +22,7 @@ linux_user_ss.add(files(
>    'uname.c',
>  ))
>  linux_user_ss.add(rt)
> +linux_user_ss.add(libdw)
>  
>  linux_user_ss.add(when: 'TARGET_HAS_BFLT', if_true: files('flatload.c'))
>  linux_user_ss.add(when: 'TARGET_I386', if_true: files('vm86.c'))
> diff --git a/meson.build b/meson.build
> index b686dfef75..be625efcc5 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -1631,6 +1631,12 @@ if libbpf.found() and not cc.links('''
>    endif
>  endif
>  
> +# libdw
> +libdw = dependency('libdw',
> +                   method: 'pkg-config',
> +                   kwargs: static_kwargs,
> +                   required: false)
> +
>  #################
>  # config-host.h #
>  #################
> @@ -1897,6 +1903,7 @@ config_host_data.set('CONFIG_DBUS_DISPLAY', 
> dbus_display)
>  config_host_data.set('CONFIG_CFI', get_option('cfi'))
>  config_host_data.set('CONFIG_SELINUX', selinux.found())
>  config_host_data.set('CONFIG_XEN_BACKEND', xen.found())
> +config_host_data.set('CONFIG_LIBDW', libdw.found())
>  if xen.found()
>    # protect from xen.version() having less than three components
>    xen_version = xen.version().split('.') + ['0', '0']
> @@ -3937,6 +3944,7 @@ summary_info += {'libudev':           libudev}
>  # Dummy dependency, keep .found()
>  summary_info += {'FUSE lseek':        fuse_lseek.found()}
>  summary_info += {'selinux':           selinux}
> +summary_info += {'libdw':             libdw}
>  summary(summary_info, bool_yn: true, section: 'Dependencies')
>  
>  if not supported_cpus.contains(cpu)
> diff --git a/qemu-options.hx b/qemu-options.hx
> index 95b998a13b..b0c64c4a31 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -4799,6 +4799,26 @@ SRST
>      Enable synchronization profiling.
>  ERST
>  
> +#ifdef CONFIG_TCG
> +DEF("perfmap", 0, QEMU_OPTION_perfmap,
> +    "-perfmap        generate a /tmp/perf-${pid}.map file for perf\n",
> +    QEMU_ARCH_ALL)
> +SRST
> +``-perfmap``
> +    Generate a map file for Linux perf tools that will allow basic profiling
> +    information to be broken down into basic blocks.
> +ERST
> +
> +DEF("jitdump", 0, QEMU_OPTION_jitdump,
> +    "-jitdump        generate a jit-${pid}.dump file for perf\n",
> +    QEMU_ARCH_ALL)
> +SRST
> +``-jitdump``
> +    Generate a dump file for Linux perf tools that maps basic blocks to 
> symbol
> +    names, line numbers and JITted code.
> +ERST
> +#endif
> +
>  DEFHEADING()
>  
>  DEFHEADING(Generic object creation:)
> diff --git a/softmmu/vl.c b/softmmu/vl.c
> index b464da25bc..40e371a0c2 100644
> --- a/softmmu/vl.c
> +++ b/softmmu/vl.c
> @@ -96,6 +96,9 @@
>  #include "fsdev/qemu-fsdev.h"
>  #endif
>  #include "sysemu/qtest.h"
> +#ifdef CONFIG_TCG
> +#include "accel/tcg/perf.h"
> +#endif
>  
>  #include "disas/disas.h"
>  
> @@ -2900,6 +2903,14 @@ void qemu_init(int argc, char **argv)
>              case QEMU_OPTION_DFILTER:
>                  qemu_set_dfilter_ranges(optarg, &error_fatal);
>                  break;
> +#ifdef CONFIG_TCG
> +            case QEMU_OPTION_perfmap:
> +                perf_enable_perfmap();
> +                break;
> +            case QEMU_OPTION_jitdump:
> +                perf_enable_jitdump();
> +                break;
> +#endif
>              case QEMU_OPTION_seed:
>                  qemu_guest_random_seed_main(optarg, &error_fatal);
>                  break;
> diff --git a/tcg/tcg.c b/tcg/tcg.c
> index 612a12f58f..cd1ccf2bff 100644
> --- a/tcg/tcg.c
> +++ b/tcg/tcg.c
> @@ -61,6 +61,7 @@
>  #include "exec/log.h"
>  #include "tcg/tcg-ldst.h"
>  #include "tcg-internal.h"
> +#include "accel/tcg/perf.h"
>  
>  #ifdef CONFIG_TCG_INTERPRETER
>  #include <ffi.h>
> @@ -749,6 +750,7 @@ void tcg_prologue_init(TCGContext *s)
>  #endif
>  
>      prologue_size = tcg_current_code_size(s);
> +    perf_report_prologue(s->code_gen_ptr, prologue_size);
>  
>  #ifndef CONFIG_TCG_INTERPRETER
>      flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),


-- 
Alex Bennée



reply via email to

[Prev in Thread] Current Thread [Next in Thread]