qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [RFC PATCH] Use atomic cmpxchg to atomically check the


From: Alex Bennée
Subject: Re: [Qemu-devel] [RFC PATCH] Use atomic cmpxchg to atomically check the exclusive value in a STREX
Date: Tue, 09 Jun 2015 14:59:26 +0100

address@hidden writes:

> From: KONRAD Frederic <address@hidden>
>
> This mechanism replaces the existing load/store exclusive mechanism which 
> seems
> to be broken for multithread.
> It follows the intention of the existing mechanism and stores the target 
> address
> and data values during a load operation and checks that they remain unchanged
> before a store.
>
> In common with the older approach, this provides weaker semantics than 
> required
> in that it could be that a different processor writes the same value as a
> non-exclusive write, however in practise this seems to be irrelevant.
>
> The old implementation didn’t correctly store it’s values as globals, but 
> rather
> kept a local copy per CPU.
>
> This new mechanism stores the values globally and also uses the atomic cmpxchg
> macros to ensure atomicity - it is therefore very efficient and threadsafe.
>
> Signed-off-by: KONRAD Frederic <address@hidden>
> ---
>  target-arm/cpu.c       |  21 +++++++
>  target-arm/cpu.h       |   6 ++
>  target-arm/helper.c    |  12 ++++
>  target-arm/helper.h    |   6 ++
>  target-arm/op_helper.c | 146 
> ++++++++++++++++++++++++++++++++++++++++++++++++-
>  target-arm/translate.c |  98 ++++++---------------------------
>  6 files changed, 207 insertions(+), 82 deletions(-)
>
> diff --git a/target-arm/cpu.c b/target-arm/cpu.c
> index 4a888ab..0400061 100644
> --- a/target-arm/cpu.c
> +++ b/target-arm/cpu.c
> @@ -31,6 +31,26 @@
>  #include "sysemu/kvm.h"
>  #include "kvm_arm.h"
>  
> +/* Protect cpu_exclusive_* variable .*/
> +__thread bool cpu_have_exclusive_lock;
> +QemuMutex cpu_exclusive_lock;
> +
> +inline void arm_exclusive_lock(void)
> +{
> +    if (!cpu_have_exclusive_lock) {
> +        qemu_mutex_lock(&cpu_exclusive_lock);
> +        cpu_have_exclusive_lock = true;
> +    }
> +}
> +
> +inline void arm_exclusive_unlock(void)
> +{
> +    if (cpu_have_exclusive_lock) {
> +        cpu_have_exclusive_lock = false;
> +        qemu_mutex_unlock(&cpu_exclusive_lock);
> +    }
> +}
> +
>  static void arm_cpu_set_pc(CPUState *cs, vaddr value)
>  {
>      ARMCPU *cpu = ARM_CPU(cs);
> @@ -425,6 +445,7 @@ static void arm_cpu_initfn(Object *obj)
>          cpu->psci_version = 2; /* TCG implements PSCI 0.2 */
>          if (!inited) {
>              inited = true;
> +            qemu_mutex_init(&cpu_exclusive_lock);
>              arm_translate_init();
>          }
>      }
> diff --git a/target-arm/cpu.h b/target-arm/cpu.h
> index 21b5b8e..257ed05 100644
> --- a/target-arm/cpu.h
> +++ b/target-arm/cpu.h
> @@ -506,6 +506,9 @@ static inline bool is_a64(CPUARMState *env)
>  int cpu_arm_signal_handler(int host_signum, void *pinfo,
>                             void *puc);
>  
> +int arm_get_phys_addr(CPUARMState *env, target_ulong address, int 
> access_type,
> +                      hwaddr *phys_ptr, int *prot, target_ulong *page_size);
> +
>  /**
>   * pmccntr_sync
>   * @env: CPUARMState
> @@ -1922,4 +1925,7 @@ enum {
>      QEMU_PSCI_CONDUIT_HVC = 2,
>  };
>  
> +void arm_exclusive_lock(void);
> +void arm_exclusive_unlock(void);
> +
>  #endif
> diff --git a/target-arm/helper.c b/target-arm/helper.c
> index 3da0c05..31ee1e5 100644
> --- a/target-arm/helper.c
> +++ b/target-arm/helper.c
> @@ -23,6 +23,14 @@ static inline int get_phys_addr(CPUARMState *env, 
> target_ulong address,
>  #define PMCRE   0x1
>  #endif
>  
> +int arm_get_phys_addr(CPUARMState *env, target_ulong address, int 
> access_type,
> +                      hwaddr *phys_ptr, int *prot, target_ulong *page_size)
> +{
> +    MemTxAttrs attrs = {};
> +    return get_phys_addr(env, address, access_type, cpu_mmu_index(env),
> +                         phys_ptr, &attrs, prot, page_size);
> +}
> +
>  static int vfp_gdb_get_reg(CPUARMState *env, uint8_t *buf, int reg)
>  {
>      int nregs;
> @@ -4731,6 +4739,10 @@ void arm_cpu_do_interrupt(CPUState *cs)
>  
>      arm_log_exception(cs->exception_index);
>  
> +    arm_exclusive_lock();
> +    env->exclusive_addr = -1;
> +    arm_exclusive_unlock();
> +
>      if (arm_is_psci_call(cpu, cs->exception_index)) {
>          arm_handle_psci_call(cpu);
>          qemu_log_mask(CPU_LOG_INT, "...handled as PSCI call\n");
> diff --git a/target-arm/helper.h b/target-arm/helper.h
> index fc885de..63c2809 100644
> --- a/target-arm/helper.h
> +++ b/target-arm/helper.h
> @@ -529,6 +529,12 @@ DEF_HELPER_2(dc_zva, void, env, i64)
>  DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64)
>  DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64)
>  
> +DEF_HELPER_4(atomic_cmpxchg64, i32, env, i32, i64, i32)
> +DEF_HELPER_2(atomic_check, i32, env, i32)
> +DEF_HELPER_1(atomic_release, void, env)

Apologies for being split over several mails, I didn't notice before
that atomic_check/release don't seem to be called at all in this patch.
Is there later work that uses it?

> +DEF_HELPER_1(atomic_clear, void, env)
> +DEF_HELPER_3(atomic_claim, void, env, i32, i64)
> +
>  #ifdef TARGET_AARCH64
>  #include "helper-a64.h"
>  #endif
> diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
> index 7583ae7..81dd403 100644
> --- a/target-arm/op_helper.c
> +++ b/target-arm/op_helper.c
> @@ -30,12 +30,157 @@ static void raise_exception(CPUARMState *env, uint32_t 
> excp,
>      CPUState *cs = CPU(arm_env_get_cpu(env));
>  
>      assert(!excp_is_internal(excp));
> +    arm_exclusive_lock();
>      cs->exception_index = excp;
>      env->exception.syndrome = syndrome;
>      env->exception.target_el = target_el;
> +    /*
> +     * We MAY already have the lock - in which case we are exiting the
> +     * instruction due to an exception. Otherwise we better make sure we are 
> not
> +     * about to enter a STREX anyway.
> +     */
> +    env->exclusive_addr = -1;
> +    arm_exclusive_unlock();
>      cpu_loop_exit(cs);
>  }
>  
> +/* NB return 1 for fail, 0 for pass */
> +uint32_t HELPER(atomic_cmpxchg64)(CPUARMState *env, uint32_t addr,
> +                                  uint64_t newval, uint32_t size)
> +{
> +    ARMCPU *cpu = arm_env_get_cpu(env);
> +    CPUState *cs = CPU(cpu);
> +
> +    bool result = false;
> +    hwaddr len = 8 << size;
> +
> +    hwaddr paddr;
> +    target_ulong page_size;
> +    int prot;
> +
> +    arm_exclusive_lock();
> +
> +    if (env->exclusive_addr != addr) {
> +        arm_exclusive_unlock();
> +        return 1;
> +    }
> +
> +    if (arm_get_phys_addr(env, addr, 1, &paddr, &prot, &page_size) != 0) {
> +        tlb_fill(ENV_GET_CPU(env), addr, MMU_DATA_STORE, cpu_mmu_index(env), 
> 0);
> +        if  (arm_get_phys_addr(env, addr, 1, &paddr, &prot, &page_size) != 
> 0) {
> +            arm_exclusive_unlock();
> +            return 1;
> +        }
> +    }
> +
> +    switch (size) {
> +    case 0:
> +    {
> +        uint8_t oldval, *p;
> +        p = address_space_map(cs->as, paddr, &len, true);
> +        if (len == 8 << size) {
> +            oldval = (uint8_t)env->exclusive_val;
> +            result = (atomic_cmpxchg(p, oldval, (uint8_t)newval) == oldval);
> +        }
> +        address_space_unmap(cs->as, p, len, true, result ? 8 : 0);
> +    }
> +    break;
> +    case 1:
> +    {
> +        uint16_t oldval, *p;
> +        p = address_space_map(cs->as, paddr, &len, true);
> +        if (len == 8 << size) {
> +            oldval = (uint16_t)env->exclusive_val;
> +            result = (atomic_cmpxchg(p, oldval, (uint16_t)newval) == oldval);
> +        }
> +        address_space_unmap(cs->as, p, len, true, result ? 8 : 0);
> +    }
> +    break;
> +    case 2:
> +    {
> +        uint32_t oldval, *p;
> +        p = address_space_map(cs->as, paddr, &len, true);
> +        if (len == 8 << size) {
> +            oldval = (uint32_t)env->exclusive_val;
> +            result = (atomic_cmpxchg(p, oldval, (uint32_t)newval) == oldval);
> +        }
> +        address_space_unmap(cs->as, p, len, true, result ? 8 : 0);
> +    }
> +    break;
> +    case 3:
> +    {
> +        uint64_t oldval, *p;
> +        p = address_space_map(cs->as, paddr, &len, true);
> +        if (len == 8 << size) {
> +            oldval = (uint64_t)env->exclusive_val;
> +            result = (atomic_cmpxchg(p, oldval, (uint64_t)newval) == oldval);
> +        }
> +        address_space_unmap(cs->as, p, len, true, result ? 8 : 0);
> +    }
> +    break;
> +    default:
> +        abort();
> +    break;
> +    }
> +
> +    env->exclusive_addr = -1;
> +    arm_exclusive_unlock();
> +    if (result) {
> +        return 0;
> +    } else {
> +        return 1;
> +    }
> +}
> +
> +
> +uint32_t HELPER(atomic_check)(CPUARMState *env, uint32_t addr)
> +{
> +      arm_exclusive_lock();
> +      if (env->exclusive_addr == addr) {
> +        return true;
> +      }
> +      /* we failed to keep the address tagged, so we fail */
> +      env->exclusive_addr = -1;  /* for efficiency */
> +      arm_exclusive_unlock();
> +      return false;
> +}
> +
> +void HELPER(atomic_release)(CPUARMState *env)
> +{
> +    env->exclusive_addr = -1;
> +    arm_exclusive_unlock();
> +}
> +
> +void HELPER(atomic_clear)(CPUARMState *env)
> +{
> +    /* make sure no STREX is about to start */
> +    arm_exclusive_lock();
> +    env->exclusive_addr = -1;
> +    arm_exclusive_unlock();
> +}
> +
> +
> +void HELPER(atomic_claim)(CPUARMState *env, uint32_t addr, uint64_t val)
> +{
> +    CPUState *cpu;
> +    CPUARMState *current_cpu;
> +
> +    /* ensure that there are no STREX's executing */
> +    arm_exclusive_lock();
> +
> +    CPU_FOREACH(cpu) {
> +        current_cpu = &ARM_CPU(cpu)->env;
> +        if (current_cpu->exclusive_addr  == addr) {
> +            /* We steal the atomic of this CPU. */
> +            current_cpu->exclusive_addr = -1;
> +        }
> +    }
> +
> +    env->exclusive_val = val;
> +    env->exclusive_addr = addr;
> +    arm_exclusive_unlock();
> +}
> +
>  static int exception_target_el(CPUARMState *env)
>  {
>      int target_el = MAX(1, arm_current_el(env));
> @@ -582,7 +727,6 @@ void HELPER(exception_return)(CPUARMState *env)
>  
>      aarch64_save_sp(env, cur_el);
>  
> -    env->exclusive_addr = -1;
>  
>      /* We must squash the PSTATE.SS bit to zero unless both of the
>       * following hold:
> diff --git a/target-arm/translate.c b/target-arm/translate.c
> index 39692d7..ba4eb65 100644
> --- a/target-arm/translate.c
> +++ b/target-arm/translate.c
> @@ -65,8 +65,8 @@ TCGv_ptr cpu_env;
>  static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
>  static TCGv_i32 cpu_R[16];
>  static TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
> -static TCGv_i64 cpu_exclusive_addr;
>  static TCGv_i64 cpu_exclusive_val;
> +static TCGv_i64 cpu_exclusive_addr;
>  #ifdef CONFIG_USER_ONLY
>  static TCGv_i64 cpu_exclusive_test;
>  static TCGv_i32 cpu_exclusive_info;
> @@ -7391,6 +7391,7 @@ static void gen_load_exclusive(DisasContext *s, int rt, 
> int rt2,
>                                 TCGv_i32 addr, int size)
>  {
>      TCGv_i32 tmp = tcg_temp_new_i32();
> +    TCGv_i64 val = tcg_temp_new_i64();
>  
>      s->is_ldex = true;
>  
> @@ -7415,20 +7416,20 @@ static void gen_load_exclusive(DisasContext *s, int 
> rt, int rt2,
>  
>          tcg_gen_addi_i32(tmp2, addr, 4);
>          gen_aa32_ld32u(tmp3, tmp2, get_mem_index(s));
> +        tcg_gen_concat_i32_i64(val, tmp, tmp3);
>          tcg_temp_free_i32(tmp2);
> -        tcg_gen_concat_i32_i64(cpu_exclusive_val, tmp, tmp3);
>          store_reg(s, rt2, tmp3);
>      } else {
> -        tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
> +        tcg_gen_extu_i32_i64(val, tmp);
>      }
> -
> +    gen_helper_atomic_claim(cpu_env, addr, val);
> +    tcg_temp_free_i64(val);
>      store_reg(s, rt, tmp);
> -    tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
>  }
>  
>  static void gen_clrex(DisasContext *s)
>  {
> -    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
> +    gen_helper_atomic_clear(cpu_env);
>  }
>  
>  #ifdef CONFIG_USER_ONLY
> @@ -7445,84 +7446,19 @@ static void gen_store_exclusive(DisasContext *s, int 
> rd, int rt, int rt2,
>                                  TCGv_i32 addr, int size)
>  {
>      TCGv_i32 tmp;
> -    TCGv_i64 val64, extaddr;
> -    TCGLabel *done_label;
> -    TCGLabel *fail_label;
> -
> -    /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
> -         [addr] = {Rt};
> -         {Rd} = 0;
> -       } else {
> -         {Rd} = 1;
> -       } */
> -    fail_label = gen_new_label();
> -    done_label = gen_new_label();
> -    extaddr = tcg_temp_new_i64();
> -    tcg_gen_extu_i32_i64(extaddr, addr);
> -    tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
> -    tcg_temp_free_i64(extaddr);
> -
> -    tmp = tcg_temp_new_i32();
> -    switch (size) {
> -    case 0:
> -        gen_aa32_ld8u(tmp, addr, get_mem_index(s));
> -        break;
> -    case 1:
> -        gen_aa32_ld16u(tmp, addr, get_mem_index(s));
> -        break;
> -    case 2:
> -    case 3:
> -        gen_aa32_ld32u(tmp, addr, get_mem_index(s));
> -        break;
> -    default:
> -        abort();
> -    }
> -
> -    val64 = tcg_temp_new_i64();
> -    if (size == 3) {
> -        TCGv_i32 tmp2 = tcg_temp_new_i32();
> -        TCGv_i32 tmp3 = tcg_temp_new_i32();
> -        tcg_gen_addi_i32(tmp2, addr, 4);
> -        gen_aa32_ld32u(tmp3, tmp2, get_mem_index(s));
> -        tcg_temp_free_i32(tmp2);
> -        tcg_gen_concat_i32_i64(val64, tmp, tmp3);
> -        tcg_temp_free_i32(tmp3);
> -    } else {
> -        tcg_gen_extu_i32_i64(val64, tmp);
> -    }
> -    tcg_temp_free_i32(tmp);
> -
> -    tcg_gen_brcond_i64(TCG_COND_NE, val64, cpu_exclusive_val, fail_label);
> -    tcg_temp_free_i64(val64);
> +    TCGv_i32 tmp2;
> +    TCGv_i64 val = tcg_temp_new_i64();
> +    TCGv_i32 tmp_size = tcg_const_i32(size);
>  
>      tmp = load_reg(s, rt);
> -    switch (size) {
> -    case 0:
> -        gen_aa32_st8(tmp, addr, get_mem_index(s));
> -        break;
> -    case 1:
> -        gen_aa32_st16(tmp, addr, get_mem_index(s));
> -        break;
> -    case 2:
> -    case 3:
> -        gen_aa32_st32(tmp, addr, get_mem_index(s));
> -        break;
> -    default:
> -        abort();
> -    }
> +    tmp2 = load_reg(s, rt2);
> +    tcg_gen_concat_i32_i64(val, tmp, tmp2);
>      tcg_temp_free_i32(tmp);
> -    if (size == 3) {
> -        tcg_gen_addi_i32(addr, addr, 4);
> -        tmp = load_reg(s, rt2);
> -        gen_aa32_st32(tmp, addr, get_mem_index(s));
> -        tcg_temp_free_i32(tmp);
> -    }
> -    tcg_gen_movi_i32(cpu_R[rd], 0);
> -    tcg_gen_br(done_label);
> -    gen_set_label(fail_label);
> -    tcg_gen_movi_i32(cpu_R[rd], 1);
> -    gen_set_label(done_label);
> -    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
> +    tcg_temp_free_i32(tmp2);
> +
> +    gen_helper_atomic_cmpxchg64(cpu_R[rd], cpu_env, addr, val, tmp_size);
> +    tcg_temp_free_i64(val);
> +    tcg_temp_free_i32(tmp_size);
>  }
>  #endif

-- 
Alex Bennée



reply via email to

[Prev in Thread] Current Thread [Next in Thread]