[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH v3 06/34] tcg: Add EXCP_ATOMIC
From: |
Alex Bennée |
Subject: |
Re: [Qemu-devel] [PATCH v3 06/34] tcg: Add EXCP_ATOMIC |
Date: |
Mon, 12 Sep 2016 15:16:48 +0100 |
User-agent: |
mu4e 0.9.17; emacs 25.1.12 |
Richard Henderson <address@hidden> writes:
> When we cannot emulate an atomic operation within a parallel
> context, this exception allows us to stop the world and try
> again in a serial context.
>
> Signed-off-by: Richard Henderson <address@hidden>
> ---
> cpu-exec-common.c | 6 +++++
> cpu-exec.c | 23 +++++++++++++++++++
> cpus.c | 6 +++++
> include/exec/cpu-all.h | 1 +
> include/exec/exec-all.h | 1 +
> include/qemu-common.h | 1 +
> linux-user/main.c | 59
> ++++++++++++++++++++++++++++++++++++++++++++++++-
> tcg/tcg.h | 1 +
> translate-all.c | 1 +
> 9 files changed, 98 insertions(+), 1 deletion(-)
>
> diff --git a/cpu-exec-common.c b/cpu-exec-common.c
> index 0cb4ae6..767d9c6 100644
> --- a/cpu-exec-common.c
> +++ b/cpu-exec-common.c
> @@ -77,3 +77,9 @@ void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc)
> }
> siglongjmp(cpu->jmp_env, 1);
> }
> +
> +void cpu_loop_exit_atomic(CPUState *cpu, uintptr_t pc)
> +{
> + cpu->exception_index = EXCP_ATOMIC;
> + cpu_loop_exit_restore(cpu, pc);
> +}
> diff --git a/cpu-exec.c b/cpu-exec.c
> index 5d9710a..8d77516 100644
> --- a/cpu-exec.c
> +++ b/cpu-exec.c
> @@ -225,6 +225,29 @@ static void cpu_exec_nocache(CPUState *cpu, int
> max_cycles,
> }
> #endif
>
> +void cpu_exec_step(CPUState *cpu)
> +{
> + CPUArchState *env = (CPUArchState *)cpu->env_ptr;
> + TranslationBlock *tb;
> + target_ulong cs_base, pc;
> + uint32_t flags;
> + bool old_tb_flushed;
> +
> + old_tb_flushed = cpu->tb_flushed;
> + cpu->tb_flushed = false;
Advanced warning, these disappear soon when the async safe work (plus
safe tb flush) patches get merged.
> +
> + cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
> + tb = tb_gen_code(cpu, pc, cs_base, flags,
> + 1 | CF_NOCACHE | CF_IGNORE_ICOUNT);
> + tb->orig_tb = NULL;
> + cpu->tb_flushed |= old_tb_flushed;
> + /* execute the generated code */
> + trace_exec_tb_nocache(tb, pc);
> + cpu_tb_exec(cpu, tb);
> + tb_phys_invalidate(tb, -1);
> + tb_free(tb);
> +}
> +
> struct tb_desc {
> target_ulong pc;
> target_ulong cs_base;
> diff --git a/cpus.c b/cpus.c
> index 84c3520..a01bbbd 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -1575,6 +1575,12 @@ static void tcg_exec_all(void)
> if (r == EXCP_DEBUG) {
> cpu_handle_guest_debug(cpu);
> break;
> + } else if (r == EXCP_ATOMIC) {
> + /* ??? When we begin running cpus in parallel, we should
> + stop all cpus, clear parallel_cpus, and interpret a
> + single insn with cpu_exec_step. In the meantime,
> + we should never get here. */
> + abort();
Possibly more correct would be:
g_assert(parallel_cpus == false);
abort();
As mentioned in other patches I was seeing this triggered by
TLB_NOTDIRTY writes. However I think the fix is fairly simple, see
bellow:
> }
> } else if (cpu->stop || cpu->stopped) {
> if (cpu->unplug) {
> diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
> index b6a7059..1f7e9d8 100644
> --- a/include/exec/cpu-all.h
> +++ b/include/exec/cpu-all.h
> @@ -31,6 +31,7 @@
> #define EXCP_DEBUG 0x10002 /* cpu stopped after a breakpoint or
> singlestep */
> #define EXCP_HALTED 0x10003 /* cpu is halted (waiting for external
> event) */
> #define EXCP_YIELD 0x10004 /* cpu wants to yield timeslice to another */
> +#define EXCP_ATOMIC 0x10005 /* stop-the-world and emulate atomic */
>
> /* some important defines:
> *
> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> index d008296..08424ae 100644
> --- a/include/exec/exec-all.h
> +++ b/include/exec/exec-all.h
> @@ -71,6 +71,7 @@ static inline void cpu_list_lock(void)
> void cpu_exec_init(CPUState *cpu, Error **errp);
> void QEMU_NORETURN cpu_loop_exit(CPUState *cpu);
> void QEMU_NORETURN cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc);
> +void QEMU_NORETURN cpu_loop_exit_atomic(CPUState *cpu, uintptr_t pc);
>
> #if !defined(CONFIG_USER_ONLY)
> void cpu_reloading_memory_map(void);
> diff --git a/include/qemu-common.h b/include/qemu-common.h
> index 9e8b0bd..f814317 100644
> --- a/include/qemu-common.h
> +++ b/include/qemu-common.h
> @@ -80,6 +80,7 @@ void tcg_exec_init(unsigned long tb_size);
> bool tcg_enabled(void);
>
> void cpu_exec_init_all(void);
> +void cpu_exec_step(CPUState *cpu);
>
> /**
> * Sends a (part of) iovec down a socket, yielding when the socket is full,
> or
> diff --git a/linux-user/main.c b/linux-user/main.c
> index f2f4d2f..8d5c948 100644
> --- a/linux-user/main.c
> +++ b/linux-user/main.c
> @@ -182,13 +182,25 @@ static inline void start_exclusive(void)
> }
>
> /* Finish an exclusive operation. */
> -static inline void __attribute__((unused)) end_exclusive(void)
> +static inline void end_exclusive(void)
> {
> pending_cpus = 0;
> pthread_cond_broadcast(&exclusive_resume);
> pthread_mutex_unlock(&exclusive_lock);
> }
>
> +static void step_atomic(CPUState *cpu)
> +{
> + start_exclusive();
> +
> + /* Since we got here, we know that parallel_cpus must be true. */
> + parallel_cpus = false;
> + cpu_exec_step(cpu);
> + parallel_cpus = true;
> +
> + end_exclusive();
> +}
> +
Paolo's safe work patches bring the start/end_exclusive functions into
cpu-exec-common so I think after that has been merged this function
can also be moved and called directly from the MTTCG loop on an
EXCP_ATOMIC exit.
> /* Wait for exclusive ops to finish, and begin cpu execution. */
> static inline void cpu_exec_start(CPUState *cpu)
> {
> @@ -440,6 +452,9 @@ void cpu_loop(CPUX86State *env)
> }
> }
> break;
> + case EXCP_ATOMIC:
> + step_atomic(cs);
> + break;
> default:
> pc = env->segs[R_CS].base + env->eip;
> EXCP_DUMP(env, "qemu: 0x%08lx: unhandled CPU exception 0x%x -
> aborting\n",
> @@ -932,6 +947,9 @@ void cpu_loop(CPUARMState *env)
> case EXCP_YIELD:
> /* nothing to do here for user-mode, just resume guest code */
> break;
> + case EXCP_ATOMIC:
> + step_atomic(cs);
> + break;
> default:
> error:
> EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x -
> aborting\n", trapnr);
> @@ -1131,6 +1149,9 @@ void cpu_loop(CPUARMState *env)
> case EXCP_YIELD:
> /* nothing to do here for user-mode, just resume guest code */
> break;
> + case EXCP_ATOMIC:
> + step_atomic(cs);
> + break;
> default:
> EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x -
> aborting\n", trapnr);
> abort();
> @@ -1220,6 +1241,9 @@ void cpu_loop(CPUUniCore32State *env)
> }
> }
> break;
> + case EXCP_ATOMIC:
> + step_atomic(cs);
> + break;
> default:
> goto error;
> }
> @@ -1492,6 +1516,9 @@ void cpu_loop (CPUSPARCState *env)
> }
> }
> break;
> + case EXCP_ATOMIC:
> + step_atomic(cs);
> + break;
> default:
> printf ("Unhandled trap: 0x%x\n", trapnr);
> cpu_dump_state(cs, stderr, fprintf, 0);
> @@ -2040,6 +2067,9 @@ void cpu_loop(CPUPPCState *env)
> case EXCP_INTERRUPT:
> /* just indicate that signals should be handled asap */
> break;
> + case EXCP_ATOMIC:
> + step_atomic(cs);
> + break;
> default:
> cpu_abort(cs, "Unknown exception 0x%d. Aborting\n", trapnr);
> break;
> @@ -2713,6 +2743,9 @@ done_syscall:
> }
> }
> break;
> + case EXCP_ATOMIC:
> + step_atomic(cs);
> + break;
> default:
> error:
> EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x -
> aborting\n", trapnr);
> @@ -2799,6 +2832,9 @@ void cpu_loop(CPUOpenRISCState *env)
> case EXCP_NR:
> qemu_log_mask(CPU_LOG_INT, "\nNR\n");
> break;
> + case EXCP_ATOMIC:
> + step_atomic(cs);
> + break;
> default:
> EXCP_DUMP(env, "\nqemu: unhandled CPU exception %#x -
> aborting\n",
> trapnr);
> @@ -2874,6 +2910,9 @@ void cpu_loop(CPUSH4State *env)
> queue_signal(env, info.si_signo, &info);
> break;
>
> + case EXCP_ATOMIC:
> + step_atomic(cs);
> + break;
> default:
> printf ("Unhandled trap: 0x%x\n", trapnr);
> cpu_dump_state(cs, stderr, fprintf, 0);
> @@ -2939,6 +2978,9 @@ void cpu_loop(CPUCRISState *env)
> }
> }
> break;
> + case EXCP_ATOMIC:
> + step_atomic(cs);
> + break;
> default:
> printf ("Unhandled trap: 0x%x\n", trapnr);
> cpu_dump_state(cs, stderr, fprintf, 0);
> @@ -3053,6 +3095,9 @@ void cpu_loop(CPUMBState *env)
> }
> }
> break;
> + case EXCP_ATOMIC:
> + step_atomic(cs);
> + break;
> default:
> printf ("Unhandled trap: 0x%x\n", trapnr);
> cpu_dump_state(cs, stderr, fprintf, 0);
> @@ -3154,6 +3199,9 @@ void cpu_loop(CPUM68KState *env)
> }
> }
> break;
> + case EXCP_ATOMIC:
> + step_atomic(cs);
> + break;
> default:
> EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x -
> aborting\n", trapnr);
> abort();
> @@ -3389,6 +3437,9 @@ void cpu_loop(CPUAlphaState *env)
> case EXCP_INTERRUPT:
> /* Just indicate that signals should be handled asap. */
> break;
> + case EXCP_ATOMIC:
> + step_atomic(cs);
> + break;
> default:
> printf ("Unhandled trap: 0x%x\n", trapnr);
> cpu_dump_state(cs, stderr, fprintf, 0);
> @@ -3516,6 +3567,9 @@ void cpu_loop(CPUS390XState *env)
> queue_signal(env, info.si_signo, &info);
> break;
>
> + case EXCP_ATOMIC:
> + step_atomic(cs);
> + break;
> default:
> fprintf(stderr, "Unhandled trap: 0x%x\n", trapnr);
> cpu_dump_state(cs, stderr, fprintf, 0);
> @@ -3768,6 +3822,9 @@ void cpu_loop(CPUTLGState *env)
> case TILEGX_EXCP_REG_UDN_ACCESS:
> gen_sigill_reg(env);
> break;
> + case EXCP_ATOMIC:
> + step_atomic(cs);
> + break;
> default:
> fprintf(stderr, "trapnr is %d[0x%x].\n", trapnr, trapnr);
> g_assert_not_reached();
> diff --git a/tcg/tcg.h b/tcg/tcg.h
> index 1bcabca..00498fc 100644
> --- a/tcg/tcg.h
> +++ b/tcg/tcg.h
> @@ -700,6 +700,7 @@ struct TCGContext {
> };
>
> extern TCGContext tcg_ctx;
> +extern bool parallel_cpus;
>
> static inline void tcg_set_insn_param(int op_idx, int arg, TCGArg v)
> {
> diff --git a/translate-all.c b/translate-all.c
> index 0dd6466..f97fc1e 100644
> --- a/translate-all.c
> +++ b/translate-all.c
> @@ -119,6 +119,7 @@ static void *l1_map[V_L1_SIZE];
>
> /* code generation context */
> TCGContext tcg_ctx;
> +bool parallel_cpus;
So lets add some words to this to distinguish between this and the mttcg
enabled flags and its relation to -smp. Something like:
parallel_cpus indicates to the front-ends if code needs to be
generated taking into account multiple threads of execution. It will
be true for linux-user after the first thread clone and if system mode
if MTTCG is enabled. On the transition from false->true any code
generated while false needs to be invalidated. It may be temporally
set to false when generating non-cached code in an exclusive context.
>
> /* translation block context */
> #ifdef CONFIG_USER_ONLY
--
Alex Bennée
- [Qemu-devel] [PATCH v3 00/34] cmpxchg-based emulation of atomics, Richard Henderson, 2016/09/03
- [Qemu-devel] [PATCH v3 07/34] HACK: Always enable parallel_cpus, Richard Henderson, 2016/09/03
- [Qemu-devel] [PATCH v3 11/34] cputlb: Move most of iotlb code out of line, Richard Henderson, 2016/09/03
- [Qemu-devel] [PATCH v3 04/34] int128: Use __int128 if available, Richard Henderson, 2016/09/03
- [Qemu-devel] [PATCH v3 09/34] cputlb: Move probe_write out of softmmu_template.h, Richard Henderson, 2016/09/03