qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [RFC v2 11/11] tcg: enable thread-per-vCPU


From: Sergey Fedorov
Subject: Re: [Qemu-devel] [RFC v2 11/11] tcg: enable thread-per-vCPU
Date: Fri, 27 May 2016 16:57:12 +0300
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.8.0

On 05/04/16 18:32, Alex Bennée wrote:
> From: KONRAD Frederic <address@hidden>
>
> This allows the user to switch on multi-thread behaviour and spawn a
> thread per-vCPU. For a simple test like:
>
>   ./arm/run ./arm/locking-test.flat -smp 4 -tcg mttcg=on
>
> Will now use 4 vCPU threads and have an expected FAIL (instead of the
> unexpected PASS) as the default mode of the test has no protection when
> incrementing a shared variable.
>
> However we still default to a single thread for all vCPUs as individual
> front-end and back-ends need additional fixes to safely support:
>   - atomic behaviour
>   - tb invalidation
>   - memory ordering
>
> The function default_mttcg_enabled can be tweaked as support is added.
>
> As assumptions about tcg_current_cpu are no longer relevant to the
> single-threaded kick routine we need to save the current information
> somewhere else for the timer.

It was hard to read this patch first time through. It seems to pursue
multiple goals at the same time and needs to be split at least to the
following parts:
 1. Make 'cpu->thread_kicked' access atomic
 2. Remove global 'exit_request' and use per-CPU 'exit_request'
 3. Change how 'current_cpu' is set
 4. Reorganize round-robin CPU TCG thread function
 5. Enable 'mmap_lock' for system mode emulation (do we really want this?)
 6. Enable 'tb_lock' for system mode emulation
 7. Introduce per-CPU TCG thread function

That would be easier and faster to review these changes as a separate
patches. I attempted to mark each individual change with the numbers
from the list above.
 
>
> Signed-off-by: KONRAD Frederic <address@hidden>
> Signed-off-by: Paolo Bonzini <address@hidden>
> [AJB: Some fixes, conditionally, commit rewording]
> Signed-off-by: Alex Bennée <address@hidden>
>
> ---
> v1 (ajb):
>   - fix merge conflicts
>   - maintain single-thread approach
> v2
>   - re-base fixes (no longer has tb_find_fast lock tweak ahead)
>   - remove bogus break condition on cpu->stop/stopped
>   - only process exiting cpus exit_request
>   - handle all cpus idle case (fixes shutdown issues)
>   - sleep on EXCP_HALTED in mttcg mode (prevent crash on start-up)
>   - move icount timer into helper
> ---
>  cpu-exec-common.c       |   1 -
>  cpu-exec.c              |  15 ----
>  cpus.c                  | 216 
> +++++++++++++++++++++++++++++++++---------------
>  include/exec/exec-all.h |   4 -
>  translate-all.c         |   8 --
>  5 files changed, 150 insertions(+), 94 deletions(-)
>
> diff --git a/cpu-exec-common.c b/cpu-exec-common.c
> index 1b1731c..3d7eaa3 100644
> --- a/cpu-exec-common.c
> +++ b/cpu-exec-common.c
> @@ -23,7 +23,6 @@
>  #include "exec/memory-internal.h"
>  
>  bool exit_request;
> -CPUState *tcg_current_cpu;

This relates to item 2. There's no more references to this global
'exit_request', lets just remove it.

>  
>  /* exit the current TB from a signal handler. The host registers are
>     restored in a state compatible with the CPU emulator
> diff --git a/cpu-exec.c b/cpu-exec.c
> index f558508..42cec05 100644
> --- a/cpu-exec.c
> +++ b/cpu-exec.c
> @@ -292,7 +292,6 @@ static TranslationBlock *tb_find_slow(CPUState *cpu,
>          goto found;
>      }
>  
> -#ifdef CONFIG_USER_ONLY

5, 6.

>      /* mmap_lock is needed by tb_gen_code, and mmap_lock must be
>       * taken outside tb_lock.  Since we're momentarily dropping
>       * tb_lock, there's a chance that our desired tb has been
> @@ -306,15 +305,12 @@ static TranslationBlock *tb_find_slow(CPUState *cpu,
>          mmap_unlock();
>          goto found;
>      }
> -#endif

5, 6.

>  
>      /* if no translated code available, then translate it now */
>      cpu->tb_invalidated_flag = false;
>      tb = tb_gen_code(cpu, pc, cs_base, flags, 0);
>  
> -#ifdef CONFIG_USER_ONLY
>      mmap_unlock();
> -#endif

5.

>  
>  found:
>      /* we add the TB in the virtual pc hash table */
> @@ -388,13 +384,8 @@ int cpu_exec(CPUState *cpu)
>          cpu->halted = 0;
>      }
>  
> -    atomic_mb_set(&tcg_current_cpu, cpu);
>      rcu_read_lock();
>  
> -    if (unlikely(atomic_mb_read(&exit_request))) {
> -        cpu->exit_request = 1;
> -    }
> -

2.

>      cc->cpu_exec_enter(cpu);
>  
>      /* Calculate difference between guest clock and host clock.
> @@ -515,7 +506,6 @@ int cpu_exec(CPUState *cpu)
>                  }
>                  if (unlikely(cpu->exit_request
>                               || replay_has_interrupt())) {
> -                    cpu->exit_request = 0;

2.

>                      cpu->exception_index = EXCP_INTERRUPT;
>                      cpu_loop_exit(cpu);
>                  }
> @@ -629,10 +619,5 @@ int cpu_exec(CPUState *cpu)
>      cc->cpu_exec_exit(cpu);
>      rcu_read_unlock();
>  
> -    /* fail safe : never use current_cpu outside cpu_exec() */
> -    current_cpu = NULL;

We still want to reset 'current_cpu' leaving cpu_exec(), don't we?

> -
> -    /* Does not need atomic_mb_set because a spurious wakeup is okay.  */
> -    atomic_set(&tcg_current_cpu, NULL);

2.

>      return ret;
>  }
> diff --git a/cpus.c b/cpus.c
> index 02fae13..f7c7359 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -966,10 +966,7 @@ void run_on_cpu(CPUState *cpu, void (*func)(void *data), 
> void *data)
>  
>      qemu_cpu_kick(cpu);
>      while (!atomic_mb_read(&wi.done)) {
> -        CPUState *self_cpu = current_cpu;
> -
>          qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
> -        current_cpu = self_cpu;

Don't we need to keep it for the sake of single-threaded CPU loop?

>      }
>  }
>  
> @@ -1031,13 +1028,13 @@ static void flush_queued_work(CPUState *cpu)
>  
>  static void qemu_wait_io_event_common(CPUState *cpu)
>  {
> +    atomic_mb_set(&cpu->thread_kicked, false);
>      if (cpu->stop) {
>          cpu->stop = false;
>          cpu->stopped = true;
>          qemu_cond_broadcast(&qemu_pause_cond);
>      }
>      flush_queued_work(cpu);
> -    cpu->thread_kicked = false;

1.

>  }
>  
>  static void qemu_tcg_wait_io_event(CPUState *cpu)
> @@ -1046,9 +1043,7 @@ static void qemu_tcg_wait_io_event(CPUState *cpu)
>          qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
>      }
>  
> -    CPU_FOREACH(cpu) {
> -        qemu_wait_io_event_common(cpu);
> -    }
> +    qemu_wait_io_event_common(cpu);

4?

>  }
>  
>  static void qemu_kvm_wait_io_event(CPUState *cpu)
> @@ -1115,6 +1110,7 @@ static void *qemu_dummy_cpu_thread_fn(void *arg)
>      qemu_thread_get_self(cpu->thread);
>      cpu->thread_id = qemu_get_thread_id();
>      cpu->can_do_io = 1;
> +    current_cpu = cpu;

3.

>  
>      sigemptyset(&waitset);
>      sigaddset(&waitset, SIG_IPI);
> @@ -1123,9 +1119,7 @@ static void *qemu_dummy_cpu_thread_fn(void *arg)
>      cpu->created = true;
>      qemu_cond_signal(&qemu_cpu_cond);
>  
> -    current_cpu = cpu;
>      while (1) {
> -        current_cpu = NULL;

3.

>          qemu_mutex_unlock_iothread();
>          do {
>              int sig;
> @@ -1136,7 +1130,6 @@ static void *qemu_dummy_cpu_thread_fn(void *arg)
>              exit(1);
>          }
>          qemu_mutex_lock_iothread();
> -        current_cpu = cpu;

3.

>          qemu_wait_io_event_common(cpu);
>      }
>  
> @@ -1153,32 +1146,52 @@ static void *qemu_dummy_cpu_thread_fn(void *arg)
>   * elsewhere.
>   */
>  static int tcg_cpu_exec(CPUState *cpu);
> -static void qemu_cpu_kick_no_halt(void);
> +
> +struct kick_info {
> +    QEMUTimer *timer;
> +    CPUState  *cpu;
> +};
>  
>  static void kick_tcg_thread(void *opaque)
>  {
> -    QEMUTimer *self = *(QEMUTimer **) opaque;
> -    timer_mod(self,
> +    struct kick_info *info = (struct kick_info *) opaque;
> +    CPUState *cpu = atomic_mb_read(&info->cpu);
> +
> +    timer_mod(info->timer,
>                qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
>                NANOSECONDS_PER_SECOND / 10);
> -    qemu_cpu_kick_no_halt();
> +
> +    if (cpu) {
> +        cpu_exit(cpu);
> +    }

2. Maybe a separate change.

>  }
>  
> -static void *qemu_tcg_cpu_thread_fn(void *arg)
> +static void handle_icount_deadline(void)
> +{
> +    if (use_icount) {
> +        int64_t deadline =
> +            qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
> +
> +        if (deadline == 0) {
> +            qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
> +        }
> +    }
> +}

4.

> +
> +static void *qemu_tcg_single_cpu_thread_fn(void *arg)

7.

>  {
> +    struct kick_info info;
>      CPUState *cpu = arg;
> -    QEMUTimer *kick_timer;

2. Maybe a separate change.

>  
>      rcu_register_thread();
>  
>      qemu_mutex_lock_iothread();
>      qemu_thread_get_self(cpu->thread);
>  
> -    CPU_FOREACH(cpu) {
> -        cpu->thread_id = qemu_get_thread_id();
> -        cpu->created = true;
> -        cpu->can_do_io = 1;
> -    }
> +    cpu->thread_id = qemu_get_thread_id();
> +    cpu->created = true;
> +    cpu->can_do_io = 1;
> +    current_cpu = cpu;

4?

>      qemu_cond_signal(&qemu_cpu_cond);
>  
>      /* wait for initial kick-off after machine start */
> @@ -1193,18 +1206,24 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
>  
>      /* Set to kick if we have to do more than one vCPU */
>      if (CPU_NEXT(first_cpu)) {
> -        kick_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,  kick_tcg_thread, 
> &kick_timer);
> -        timer_mod(kick_timer,
> +        info.timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,  kick_tcg_thread, 
> &info);
> +        info.cpu = NULL;
> +        smp_wmb();
> +        timer_mod(info.timer,
>                    qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
>                    NANOSECONDS_PER_SECOND / 10);

2. Maybe a separate change.

>      }
>  
>      /* process any pending work */
> -    atomic_mb_set(&exit_request, 1);
> +    CPU_FOREACH(cpu) {
> +        atomic_mb_set(&cpu->exit_request, 1);
> +    }

2.

>  
>      cpu = first_cpu;
>  
>      while (1) {
> +        bool nothing_ran = true;
> +

4.

>          /* Account partial waits to QEMU_CLOCK_VIRTUAL.  */
>          qemu_account_warp_timer();
>  
> @@ -1212,34 +1231,107 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
>              cpu = first_cpu;
>          }
>  
> -        for (; cpu != NULL && !exit_request; cpu = CPU_NEXT(cpu)) {
> +        for (; cpu != NULL && !cpu->exit_request; cpu = CPU_NEXT(cpu)) {
> +            atomic_mb_set(&info.cpu, cpu);

2.

>  
>              qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
>                                (cpu->singlestep_enabled & SSTEP_NOTIMER) == 
> 0);
>  
>              if (cpu_can_run(cpu)) {
>                  int r = tcg_cpu_exec(cpu);
> +                nothing_ran = false;

4.

>                  if (r == EXCP_DEBUG) {
>                      cpu_handle_guest_debug(cpu);
>                      break;
>                  }
> -            } else if (cpu->stop || cpu->stopped) {
> -                break;

4.

>              }
>  
>          } /* for cpu.. */
>  
> -        /* Pairs with smp_wmb in qemu_cpu_kick.  */
> -        atomic_mb_set(&exit_request, 0);

2.

> +        atomic_mb_set(&info.cpu, NULL);

2. Maybe a separate change.

> +
> +        handle_icount_deadline();

4.

>  
> -        if (use_icount) {
> -            int64_t deadline = 
> qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
> +        /* We exit in one of three conditions:
> +         *  - cpu is set, because exit_request is true
> +         *  - cpu is not set, because we have looped around
> +         *  - cpu is not set and nothing ran
> +         */
>  
> -            if (deadline == 0) {
> -                qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
> +        if (cpu) {
> +            g_assert(cpu->exit_request);
> +            /* Pairs with smp_wmb in qemu_cpu_kick.  */
> +            atomic_mb_set(&cpu->exit_request, 0);
> +            qemu_tcg_wait_io_event(cpu);
> +        } else if (nothing_ran) {
> +            while (all_cpu_threads_idle()) {
> +                qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);

2, 4.

>              }
>          }
> -        qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
> +    }
> +
> +    return NULL;
> +}
> +
> +/* Multi-threaded TCG
> + *
> + * In the multi-threaded case each vCPU has its own thread. The TLS
> + * variable current_cpu can be used deep in the code to find the
> + * current CPUState for a given thread.
> + */
> +
> +static void *qemu_tcg_cpu_thread_fn(void *arg)
> +{
> +    CPUState *cpu = arg;
> +
> +    rcu_register_thread();
> +
> +    qemu_mutex_lock_iothread();
> +    qemu_thread_get_self(cpu->thread);
> +
> +    cpu->thread_id = qemu_get_thread_id();
> +    cpu->created = true;
> +    cpu->can_do_io = 1;
> +    current_cpu = cpu;
> +    qemu_cond_signal(&qemu_cpu_cond);
> +
> +    /* process any pending work */
> +    atomic_mb_set(&cpu->exit_request, 1);
> +
> +    while (1) {
> +        bool sleep = false;
> +
> +        if (cpu_can_run(cpu)) {
> +            int r = tcg_cpu_exec(cpu);
> +            switch (r)
> +            {
> +            case EXCP_DEBUG:
> +                cpu_handle_guest_debug(cpu);
> +                break;
> +            case EXCP_HALTED:
> +                /* during start-up the vCPU is reset and the thread is
> +                 * kicked several times. If we don't ensure we go back
> +                 * to sleep in the halted state we won't cleanly
> +                 * start-up when the vCPU is enabled.
> +                 */
> +                sleep = true;
> +                break;
> +            default:
> +                /* Ignore everything else? */
> +                break;
> +            }
> +        } else {
> +            sleep = true;
> +        }
> +
> +        handle_icount_deadline();
> +
> +        if (sleep) {
> +            qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
> +        }
> +
> +        atomic_mb_set(&cpu->exit_request, 0);
> +        qemu_tcg_wait_io_event(cpu);

7.

>      }
>  
>      return NULL;
> @@ -1264,24 +1356,11 @@ static void qemu_cpu_kick_thread(CPUState *cpu)
>  #endif
>  }
>  
> -static void qemu_cpu_kick_no_halt(void)
> -{
> -    CPUState *cpu;
> -    /* Ensure whatever caused the exit has reached the CPU threads before
> -     * writing exit_request.
> -     */
> -    atomic_mb_set(&exit_request, 1);
> -    cpu = atomic_mb_read(&tcg_current_cpu);
> -    if (cpu) {
> -        cpu_exit(cpu);
> -    }
> -}

2.

> -
>  void qemu_cpu_kick(CPUState *cpu)
>  {
>      qemu_cond_broadcast(cpu->halt_cond);
>      if (tcg_enabled()) {
> -        qemu_cpu_kick_no_halt();
> +        cpu_exit(cpu);

2.

>      } else {
>          qemu_cpu_kick_thread(cpu);
>      }
> @@ -1347,13 +1426,6 @@ void pause_all_vcpus(void)
>  
>      if (qemu_in_vcpu_thread()) {
>          cpu_stop_current();
> -        if (!kvm_enabled()) {
> -            CPU_FOREACH(cpu) {
> -                cpu->stop = false;
> -                cpu->stopped = true;
> -            }
> -            return;
> -        }

4.

>      }
>  
>      while (!all_vcpus_paused()) {
> @@ -1387,29 +1459,41 @@ void resume_all_vcpus(void)
>  static void qemu_tcg_init_vcpu(CPUState *cpu)
>  {
>      char thread_name[VCPU_THREAD_NAME_SIZE];
> -    static QemuCond *tcg_halt_cond;
> -    static QemuThread *tcg_cpu_thread;
> +    static QemuCond *single_tcg_halt_cond;
> +    static QemuThread *single_tcg_cpu_thread;
>  
> -    /* share a single thread for all cpus with TCG */
> -    if (!tcg_cpu_thread) {
> +    if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
>          cpu->thread = g_malloc0(sizeof(QemuThread));
>          cpu->halt_cond = g_malloc0(sizeof(QemuCond));
>          qemu_cond_init(cpu->halt_cond);
> -        tcg_halt_cond = cpu->halt_cond;
> -        snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
> +
> +        if (qemu_tcg_mttcg_enabled()) {
> +            /* create a thread per vCPU with TCG (MTTCG) */
> +            snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
>                   cpu->cpu_index);
> -        qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
> -                           cpu, QEMU_THREAD_JOINABLE);
> +
> +            qemu_thread_create(cpu->thread, thread_name, 
> qemu_tcg_cpu_thread_fn,
> +                               cpu, QEMU_THREAD_JOINABLE);
> +
> +        } else {
> +            /* share a single thread for all cpus with TCG */
> +            snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
> +            qemu_thread_create(cpu->thread, thread_name, 
> qemu_tcg_single_cpu_thread_fn,
> +                               cpu, QEMU_THREAD_JOINABLE);
> +
> +            single_tcg_halt_cond = cpu->halt_cond;
> +            single_tcg_cpu_thread = cpu->thread;
> +        }
>  #ifdef _WIN32
>          cpu->hThread = qemu_thread_get_handle(cpu->thread);
>  #endif
>          while (!cpu->created) {
>              qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
>          }
> -        tcg_cpu_thread = cpu->thread;
>      } else {
> -        cpu->thread = tcg_cpu_thread;
> -        cpu->halt_cond = tcg_halt_cond;
> +        /* For non-MTTCG cases we share the thread */
> +        cpu->thread = single_tcg_cpu_thread;
> +        cpu->halt_cond = single_tcg_halt_cond;
>      }
>  }

7.

>  
> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> index 60716ae..cde4b7a 100644
> --- a/include/exec/exec-all.h
> +++ b/include/exec/exec-all.h
> @@ -479,8 +479,4 @@ bool memory_region_is_unassigned(MemoryRegion *mr);
>  /* vl.c */
>  extern int singlestep;
>  
> -/* cpu-exec.c, accessed with atomic_mb_read/atomic_mb_set */
> -extern CPUState *tcg_current_cpu;
> -extern bool exit_request;
> -

2.

>  #endif
> diff --git a/translate-all.c b/translate-all.c
> index 0c377bb..8e70583 100644
> --- a/translate-all.c
> +++ b/translate-all.c
> @@ -122,36 +122,28 @@ static void *l1_map[V_L1_SIZE];
>  TCGContext tcg_ctx;
>  
>  /* translation block context */
> -#ifdef CONFIG_USER_ONLY
>  __thread int have_tb_lock;
> -#endif
>  
>  void tb_lock(void)
>  {
> -#ifdef CONFIG_USER_ONLY
>      assert(!have_tb_lock);
>      qemu_mutex_lock(&tcg_ctx.tb_ctx.tb_lock);
>      have_tb_lock++;
> -#endif
>  }
>  
>  void tb_unlock(void)
>  {
> -#ifdef CONFIG_USER_ONLY
>      assert(have_tb_lock);
>      have_tb_lock--;
>      qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock);
> -#endif
>  }
>  
>  void tb_lock_reset(void)
>  {
> -#ifdef CONFIG_USER_ONLY
>      if (have_tb_lock) {
>          qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock);
>          have_tb_lock = 0;
>      }
> -#endif
>  }
>  

6.

>  static TranslationBlock *tb_find_pc(uintptr_t tc_ptr);

Kind regards,
Sergey



reply via email to

[Prev in Thread] Current Thread [Next in Thread]