>From 497ed0672f7fe08d9654a0e5c11b682bea43a59e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 5 Oct 2011 08:29:39 +0200 Subject: [PATCH 0/3] *** SUBJECT HERE *** *** BLURB HERE *** Paolo Bonzini (3): qemu-threads: add TLS wrappers windows tls configure | 20 +++++++++++++++++ coroutine-win32.c | 7 ++++- cpu-all.h | 4 ++- cpus.c | 13 +++++++--- exec.c | 2 +- qemu-thread-posix.c | 42 ++++++++++++++++++++++++++++++++--- qemu-thread-win32.c | 16 +++++++++++++ qemu-tls-gcc.h | 25 +++++++++++++++++++++ qemu-tls-pthread.h | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++ qemu-tls-win32.h | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++ 10 files changed, 234 insertions(+), 12 deletions(-) create mode 100644 qemu-tls-gcc.h create mode 100644 qemu-tls-pthread.h create mode 100644 qemu-tls-win32.h -- 1.7.6 >From d8c3c4e789f9b86a66042a9181333e1a096b6b93 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 16 Aug 2011 10:37:44 -0700 Subject: [PATCH 1/3] qemu-threads: add TLS wrappers Win32 emulated TLS is slow and is not available on all versions of GCC; some versions of Unix only have pthread_getspecific as a means to access TLS. Actually, Win32 does have support for decent TLS, and GCC does not map __thread to it. But kind of unlike ELF TLS, it's perfectly possible to declare TLS variables with simple C code! For pthread_getspecific we similarly allocate a memory block; we have to compute all the offsets at load time, which is also cheaper than doing a pthread_key_create for each variable. Not optimal, but it works. This patch adds wrappers to qemu-thread that will use __thread or pthread_getspecific on POSIX systems, and the .tls segment on Windows. It does kinda uglify the declarations, but not too much. Signed-off-by: Paolo Bonzini --- configure | 20 +++++++++++++++++ coroutine-win32.c | 7 ++++- qemu-thread-posix.c | 42 ++++++++++++++++++++++++++++++++--- qemu-thread-win32.c | 16 +++++++++++++ qemu-tls-gcc.h | 25 +++++++++++++++++++++ qemu-tls-pthread.h | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++ qemu-tls-win32.h | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 221 insertions(+), 6 deletions(-) create mode 100644 qemu-tls-gcc.h create mode 100644 qemu-tls-pthread.h create mode 100644 qemu-tls-win32.h diff --git a/configure b/configure index 59b1494..50d7b54 100755 --- a/configure +++ b/configure @@ -1215,6 +1215,23 @@ EOF fi ########################################## +# __thread check + +if test "$mingw32" = "yes" ; then + tls_model=win32 +else + cat > $TMPC << EOF +__thread int x; +int main() { return x; } +EOF + if compile_prog "" "" ; then + tls_model=gcc + else + tls_model=pthread + fi +fi + +########################################## # zlib check if test "$zlib" != "no" ; then @@ -2697,6 +2714,7 @@ echo "Documentation $docs" [ ! -z "$uname_release" ] && \ echo "uname -r $uname_release" echo "NPTL support $nptl" +echo "TLS support $tls_model" echo "GUEST_BASE $guest_base" echo "PIE user targets $user_pie" echo "vde support $vde" @@ -3580,6 +3598,8 @@ if test "$target_linux_user" = "yes" -o "$target_bsd_user" = "yes" ; then esac fi +symlink $source_path/qemu-tls-$tls_model.h qemu-tls.h + # use included Linux headers if test "$linux" = "yes" ; then includes="-I\$(SRC_PATH)/linux-headers $includes" diff --git a/coroutine-win32.c b/coroutine-win32.c index 4179609..708e220 100644 --- a/coroutine-win32.c +++ b/coroutine-win32.c @@ -24,6 +24,7 @@ #include "qemu-common.h" #include "qemu-coroutine-int.h" +#include "qemu-tls.h" typedef struct { @@ -33,8 +34,10 @@ typedef struct CoroutineAction action; } CoroutineWin32; -static __thread CoroutineWin32 leader; -static __thread Coroutine *current; +static DEFINE_TLS(CoroutineWin32, tls_leader); +static DEFINE_TLS(Coroutine *, tls_current); +#define leader get_tls(tls_leader) +#define current get_tls(tls_current) CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, CoroutineAction action) diff --git a/qemu-thread-posix.c b/qemu-thread-posix.c index ac3c0c9..acd04ff 100644 --- a/qemu-thread-posix.c +++ b/qemu-thread-posix.c @@ -18,6 +18,9 @@ #include #include #include "qemu-thread.h" +#include "qemu-common.h" +#include "qemu-tls.h" +#include "qemu-barrier.h" static void error_exit(int err, const char *msg) { @@ -115,18 +118,44 @@ void qemu_cond_wait(QemuCond *cond, QemuMutex *mutex) error_exit(err, __func__); } +size_t tls_size; +pthread_key_t tls_key; + +static void __attribute__((constructor(102))) tls_init_thread(void) +{ + /* It's easier to always create the key, even if using GCC tls. */ + pthread_key_create(&tls_key, g_free); + _tls_init_thread(); +} + +typedef struct QemuThreadData { + void *(*start_routine)(void *); + void *arg; +} QemuThreadData; + +static void *start_routine_wrapper(void *arg) +{ + QemuThreadData args = *(QemuThreadData *) arg; + g_free(arg); + _tls_init_thread(); + return args.start_routine(args.arg); +} + void qemu_thread_create(QemuThread *thread, - void *(*start_routine)(void*), + void *(*start_routine)(void *), void *arg) { + sigset_t set, oldset; + QemuThreadData *args = g_malloc(sizeof(QemuThreadData)); int err; - /* Leave signal handling to the iothread. */ - sigset_t set, oldset; + args->start_routine = start_routine; + args->arg = arg; + /* Leave signal handling to the iothread. */ sigfillset(&set); pthread_sigmask(SIG_SETMASK, &set, &oldset); - err = pthread_create(&thread->thread, NULL, start_routine, arg); + err = pthread_create(&thread->thread, NULL, start_routine_wrapper, args); if (err) error_exit(err, __func__); diff --git a/qemu-thread-win32.c b/qemu-thread-win32.c index db8e744..118d92f 100644 --- a/qemu-thread-win32.c +++ b/qemu-thread-win32.c @@ -16,6 +16,22 @@ #include #include +/* TLS support. */ + +int __attribute__((section(".tls$000"))) _tls_start = 0; +int __attribute__((section(".tls$ZZZ"))) _tls_end = 0; +int _tls_index = 0; + +const IMAGE_TLS_DIRECTORY _tls_used __attribute__((used, section(".rdata$T"))) = { + (ULONG)(ULONG_PTR) &_tls_start, /* start of tls data */ + (ULONG)(ULONG_PTR) &_tls_end, /* end of tls data */ + (ULONG)(ULONG_PTR) &_tls_index, /* address of tls_index */ + (ULONG) 0, /* pointer to callbacks */ + (ULONG) 0, /* size of tls zero fill */ + (ULONG) 0 /* characteristics */ +}; + + static void error_exit(int err, const char *msg) { char *pstr; diff --git a/qemu-tls-gcc.h b/qemu-tls-gcc.h new file mode 100644 index 0000000..8cff148 --- /dev/null +++ b/qemu-tls-gcc.h @@ -0,0 +1,24 @@ +/* + * TLS with __thread + * + * Copyright Red Hat, Inc. 2011 + * + * Authors: + * Paolo Bonzini + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_TLS_GCC_H +#define QEMU_TLS_GCC_H + +#define DECLARE_TLS(type, x) extern __thread type tls__##x +#define DEFINE_TLS(type, x) __thread type tls__##x +#define get_tls(x) tls__##x + +static inline size_t tls_init(size_t size, size_t alignment) { return 0; } +static inline void _tls_init_thread(void) {} + +#endif diff --git a/qemu-tls-pthread.h b/qemu-tls-pthread.h new file mode 100644 index 0000000..ef97528 --- /dev/null +++ b/qemu-tls-pthread.h @@ -0,0 +1,57 @@ +/* + * TLS with pthread_getspecific + * + * Copyright Red Hat, Inc. 2011 + * + * Authors: + * Paolo Bonzini + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_TLS_PTHREAD_H +#define QEMU_TLS_PTHREAD_H + +#include +#include + +#define DECLARE_TLS(type, x) \ + extern size_t tls_offset__##x; \ + extern type tls_dummy__##x + +#define DEFINE_TLS(type, x) \ + size_t tls_offset__##x; \ + static void __attribute__((constructor(101))) tls_init__##x(void) \ + { \ + tls_offset__##x = tls_init(sizeof(type), __alignof__(type)); \ + } \ + extern type tls_dummy__##x + +extern size_t tls_size; +extern pthread_key_t tls_key; + +static inline size_t tls_init(size_t size, size_t alignment) +{ + size_t tls_offset = (tls_size + alignment - 1) & -alignment; + tls_size = tls_offset + size; + return tls_offset; +} + +static inline void _tls_init_thread(void) +{ + void *mem = tls_size == 0 ? NULL : g_malloc0(tls_size); + pthread_setspecific(tls_key, mem); +} + +static inline __attribute__((__const__)) void *_get_tls(size_t offset) +{ + char *base = pthread_getspecific(tls_key); + return &base[offset]; +} + +#define get_tls(x) \ + (*(__typeof__(&tls_dummy__##x)) _get_tls(tls_offset__##x)) + +#endif diff --git a/qemu-tls-win32.h b/qemu-tls-win32.h new file mode 100644 index 0000000..d04d48b --- /dev/null +++ b/qemu-tls-win32.h @@ -0,0 +1,59 @@ +/* + * TLS with Win32 .tls sections + * + * Copyright Red Hat, Inc. 2011 + * + * Authors: + * Paolo Bonzini + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_TLS_WIN32_H +#define QEMU_TLS_WIN32_H + +#include +#include + +typedef struct _TEB { + NT_TIB NtTib; + void *EnvironmentPointer; + void *x[3]; + char **ThreadLocalStoragePointer; +} TEB, *PTEB; + +/* 1) The initial contents TLS variables is placed in the .tls section. */ + +#define DECLARE_TLS(type, x) extern DEFINE_TLS(type, x) +#define DEFINE_TLS(type, x) type tls__##x __attribute__((section(".tls$AAA"))) + +/* 2) _tls_index holds the number of our module. The executable should be + zero, DLLs are numbered 1 and up. The loader fills it in for us. */ + +extern int _tls_index; +extern int _tls_start; +static inline void _tls_init_thread(void) {} + +/* 3) Thus, Teb->ThreadLocalStoragePointer[_tls_index] is the base of + the TLS segment for this (thread, module) pair. Each segment has + the same layout as this module's .tls segment and is initialized + with the content of the .tls segment; 0 is the _tls_start variable. + So, get_tls passes us the offset of the passed variable relative to + _tls_start, and we return that same offset plus the base of segment. */ + +static inline __attribute__((__const__)) void *_get_tls(size_t offset) +{ + PTEB Teb = NtCurrentTeb(); + return (char *)(Teb->ThreadLocalStoragePointer[_tls_index]) + offset; +} + +/* 4) get_tls, in addition to computing the offset, returns an lvalue. + "I got it. Magic." */ + +#define get_tls(x) \ + (*(__typeof__(tls__##x) *) \ + _get_tls((ULONG_PTR)&(tls__##x) - (ULONG_PTR)&_tls_start)) + +#endif -- 1.7.6 >From b10531473a833cf5e925f00461134b0bcd2295bb Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 29 Aug 2011 17:03:55 +0200 Subject: [PATCH 2/3] Prepare Windows port for thread-local cpu_single_env Windows does not execute cpu_signal in VCPU-thread context, so it won't be able to use cpu_single_env there. However, it has the CPUState available, so nothing is lost. Signed-off-by: Paolo Bonzini --- cpus.c | 13 +++++++++---- 1 files changed, 9 insertions(+), 4 deletions(-) diff --git a/cpus.c b/cpus.c index 8978779..822ce7a 100644 --- a/cpus.c +++ b/cpus.c @@ -176,10 +176,10 @@ static void cpu_handle_guest_debug(CPUState *env) env->stopped = 1; } -static void cpu_signal(int sig) +static inline void do_cpu_kick(CPUState *env) { - if (cpu_single_env) { - cpu_exit(cpu_single_env); + if (env) { + cpu_exit(env); } exit_request = 1; } @@ -437,6 +437,11 @@ static void qemu_kvm_init_cpu_signals(CPUState *env) } } +static void cpu_signal(int sig) +{ + do_cpu_kick(cpu_single_env); +} + static void qemu_tcg_init_cpu_signals(void) { sigset_t set; @@ -708,7 +713,7 @@ static void qemu_cpu_kick_thread(CPUState *env) #else /* _WIN32 */ if (!qemu_cpu_is_self(env)) { SuspendThread(env->thread->thread); - cpu_signal(0); + do_cpu_kick(env); ResumeThread(env->thread->thread); } #endif -- 1.7.6 >From 497ed0672f7fe08d9654a0e5c11b682bea43a59e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 29 Aug 2011 17:04:01 +0200 Subject: [PATCH 3/3] Make cpu_single_env thread-local Signed-off-by: Paolo Bonzini --- cpu-all.h | 4 +++- exec.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/cpu-all.h b/cpu-all.h index 42a5fa0..da457dc 100644 --- a/cpu-all.h +++ b/cpu-all.h @@ -20,6 +20,7 @@ #define CPU_ALL_H #include "qemu-common.h" +#include "qemu-tls.h" #include "cpu-common.h" /* some important defines: @@ -334,7 +335,8 @@ void cpu_dump_statistics(CPUState *env, FILE *f, fprintf_function cpu_fprintf, void QEMU_NORETURN cpu_abort(CPUState *env, const char *fmt, ...) GCC_FMT_ATTR(2, 3); extern CPUState *first_cpu; -extern CPUState *cpu_single_env; +DECLARE_TLS(CPUState *,tls_cpu_single_env); +#define cpu_single_env get_tls(tls_cpu_single_env) /* Flags for use in ENV->INTERRUPT_PENDING. diff --git a/exec.c b/exec.c index d0cbf15..66b82db 100644 --- a/exec.c +++ b/exec.c @@ -120,7 +120,7 @@ static MemoryRegion *system_io; CPUState *first_cpu; /* current CPU in the current thread. It is only valid inside cpu_exec() */ -CPUState *cpu_single_env; +DEFINE_TLS(CPUState *,cpu_single_env); /* 0 = Do not count executed instructions. 1 = Precise instruction counting. 2 = Adaptive rate instruction counting. */ -- 1.7.6