[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [RFC v7 16/16] target-arm: aarch64: add atomic instructions
From: |
Alvise Rigo |
Subject: |
[Qemu-devel] [RFC v7 16/16] target-arm: aarch64: add atomic instructions |
Date: |
Fri, 29 Jan 2016 10:32:45 +0100 |
Use the new LL/SC runtime helpers to handle the aarch64 atomic instructions
in softmmu_llsc_template.h.
The STXP emulation required a dedicated helper to handle the paired
doubleword case.
Suggested-by: Jani Kokkonen <address@hidden>
Suggested-by: Claudio Fontana <address@hidden>
Signed-off-by: Alvise Rigo <address@hidden>
---
configure | 6 +-
target-arm/helper-a64.c | 55 +++++++++++++++++++
target-arm/helper-a64.h | 4 ++
target-arm/op_helper.c | 8 +++
target-arm/translate-a64.c | 134 ++++++++++++++++++++++++++++++++++++++++++++-
5 files changed, 204 insertions(+), 3 deletions(-)
diff --git a/configure b/configure
index 915efcc..38121ff 100755
--- a/configure
+++ b/configure
@@ -5873,9 +5873,11 @@ echo "QEMU_CFLAGS+=$cflags" >> $config_target_mak
# Use tcg LL/SC tcg backend for exclusive instruction is arm/aarch64
# softmmus targets
if test "$arm_tcg_use_llsc" = "yes" ; then
- if test "$target" = "arm-softmmu" ; then
+ case "$target" in
+ arm-softmmu | aarch64-softmmu)
echo "CONFIG_ARM_USE_LDST_EXCL=y" >> $config_target_mak
- fi
+ ;;
+ esac
fi
done # for target in $targets
diff --git a/target-arm/helper-a64.c b/target-arm/helper-a64.c
index c7bfb4d..dcee66f 100644
--- a/target-arm/helper-a64.c
+++ b/target-arm/helper-a64.c
@@ -26,6 +26,7 @@
#include "qemu/bitops.h"
#include "internals.h"
#include "qemu/crc32c.h"
+#include "tcg/tcg.h"
#include <zlib.h> /* For crc32 */
/* C2.4.7 Multiply and divide */
@@ -443,3 +444,57 @@ uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val,
uint32_t bytes)
/* Linux crc32c converts the output to one's complement. */
return crc32c(acc, buf, bytes) ^ 0xffffffff;
}
+
+#ifdef CONFIG_ARM_USE_LDST_EXCL
+/* STXP emulation for two 64 bit doublewords. We can't use directly two
+ * stcond_i64 accesses, otherwise the first will conclude the LL/SC pair.
+ * Instead, two normal 64-bit accesses are used and the CPUState is
+ * updated accordingly. */
+target_ulong HELPER(stxp_i128)(CPUArchState *env, target_ulong addr,
+ uint64_t vall, uint64_t valh,
+ uint32_t mmu_idx)
+{
+ CPUState *cpu = ENV_GET_CPU(env);
+ TCGMemOpIdx op;
+ target_ulong ret = 0;
+
+ if (!cpu->ll_sc_context) {
+ cpu->excl_succeeded = false;
+ ret = 1;
+ goto out;
+ }
+
+ op = make_memop_idx(MO_BEQ, mmu_idx);
+
+ /* According to section C6.6.191 of ARM ARM DDI 0487A.h, the access has to
+ * be quadword aligned. For the time being, we do not support paired STXPs
+ * to MMIO memory, this will become trivial when the softmmu will support
+ * 128bit memory accesses. */
+ if (addr & 0xf) {
+ /* TODO: Do unaligned access */
+ }
+
+ /* Setting excl_succeeded to true will make the store exclusive. */
+ cpu->excl_succeeded = true;
+ helper_ret_stq_mmu(env, addr, vall, op, GETRA());
+
+ if (!cpu->excl_succeeded) {
+ ret = 1;
+ goto out;
+ }
+
+ helper_ret_stq_mmu(env, addr + 8, valh, op, GETRA());
+ if (!cpu->excl_succeeded) {
+ ret = 1;
+ } else {
+ cpu->excl_succeeded = false;
+ }
+
+out:
+ /* Unset LL/SC context */
+ cpu->ll_sc_context = false;
+ cpu->excl_protected_range.begin = EXCLUSIVE_RESET_ADDR;
+
+ return ret;
+}
+#endif
diff --git a/target-arm/helper-a64.h b/target-arm/helper-a64.h
index 1d3d10f..c416a83 100644
--- a/target-arm/helper-a64.h
+++ b/target-arm/helper-a64.h
@@ -46,3 +46,7 @@ DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
DEF_HELPER_FLAGS_2(fcvtx_f64_to_f32, TCG_CALL_NO_RWG, f32, f64, env)
DEF_HELPER_FLAGS_3(crc32_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32)
DEF_HELPER_FLAGS_3(crc32c_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32)
+#ifdef CONFIG_ARM_USE_LDST_EXCL
+/* STXP helper */
+DEF_HELPER_5(stxp_i128, i64, env, i64, i64, i64, i32)
+#endif
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index 404c13b..146fc9a 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -34,6 +34,14 @@ static void raise_exception(CPUARMState *env, uint32_t excp,
cs->exception_index = excp;
env->exception.syndrome = syndrome;
env->exception.target_el = target_el;
+#ifdef CONFIG_ARM_USE_LDST_EXCL
+ HELPER(atomic_clear)(env);
+ /* If the exception happens in the middle of a LL/SC, we need to clear
+ * excl_succeeded to avoid that the normal store following the exception is
+ * wrongly interpreted as exclusive.
+ * */
+ cs->excl_succeeded = 0;
+#endif
cpu_loop_exit(cs);
}
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index 80f6c20..f34e957 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -37,8 +37,10 @@
static TCGv_i64 cpu_X[32];
static TCGv_i64 cpu_pc;
+#if !defined(CONFIG_ARM_USE_LDST_EXCL)
/* Load/store exclusive handling */
static TCGv_i64 cpu_exclusive_high;
+#endif
static const char *regnames[] = {
"x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
@@ -94,8 +96,10 @@ void a64_translate_init(void)
regnames[i]);
}
+#if !defined(CONFIG_ARM_USE_LDST_EXCL)
cpu_exclusive_high = tcg_global_mem_new_i64(TCG_AREG0,
offsetof(CPUARMState, exclusive_high), "exclusive_high");
+#endif
}
static inline ARMMMUIdx get_a64_user_mem_index(DisasContext *s)
@@ -1219,7 +1223,11 @@ static void handle_hint(DisasContext *s, uint32_t insn,
static void gen_clrex(DisasContext *s, uint32_t insn)
{
+#ifndef CONFIG_ARM_USE_LDST_EXCL
tcg_gen_movi_i64(cpu_exclusive_addr, -1);
+#else
+ gen_helper_atomic_clear(cpu_env);
+#endif
}
/* CLREX, DSB, DMB, ISB */
@@ -1685,7 +1693,11 @@ static void disas_b_exc_sys(DisasContext *s, uint32_t
insn)
}
/*
- * Load/Store exclusive instructions are implemented by remembering
+ * If the softmmu is enabled, the translation of Load/Store exclusive
+ * instructions will rely on the gen_helper_{ldlink,stcond} helpers,
+ * offloading most of the work to the softmmu_llsc_template.h functions.
+ *
+ * Otherwise, instructions are implemented by remembering
* the value/address loaded, and seeing if these are the same
* when the store is performed. This is not actually the architecturally
* mandated semantics, but it works for typical guest code sequences
@@ -1695,6 +1707,66 @@ static void disas_b_exc_sys(DisasContext *s, uint32_t
insn)
* this sequence is effectively atomic. In user emulation mode we
* throw an exception and handle the atomic operation elsewhere.
*/
+#ifdef CONFIG_ARM_USE_LDST_EXCL
+static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
+ TCGv_i64 addr, int size, bool is_pair)
+{
+ /* In case @is_pair is set, we have to guarantee that at least the 128 bits
+ * accessed by a Load Exclusive Pair (64-bit variant) are protected. Since
+ * we do not have 128-bit helpers, we split the access in two halves, the
+ * first of them will set the exclusive region to cover at least 128 bits
+ * (this is why aarch64 has a custom cc->cpu_set_excl_protected_range which
+ * covers 128 bits).
+ * */
+ TCGv_i32 mem_idx = tcg_temp_new_i32();
+
+ tcg_gen_movi_i32(mem_idx, get_mem_index(s));
+
+ g_assert(size <= 3);
+
+ if (size < 3) {
+ TCGv_i32 tmp = tcg_temp_new_i32();
+
+ switch (size) {
+ case 0:
+ gen_helper_ldlink_i8(tmp, cpu_env, addr, mem_idx);
+ break;
+ case 1:
+ gen_helper_ldlink_i16(tmp, cpu_env, addr, mem_idx);
+ break;
+ case 2:
+ gen_helper_ldlink_i32(tmp, cpu_env, addr, mem_idx);
+ break;
+ default:
+ abort();
+ }
+
+ TCGv_i64 tmp64 = tcg_temp_new_i64();
+ tcg_gen_ext_i32_i64(tmp64, tmp);
+ tcg_gen_mov_i64(cpu_reg(s, rt), tmp64);
+
+ tcg_temp_free_i32(tmp);
+ tcg_temp_free_i64(tmp64);
+ } else {
+ gen_helper_ldlink_i64(cpu_reg(s, rt), cpu_env, addr, mem_idx);
+ }
+
+ if (is_pair) {
+ TCGMemOp memop = MO_TE + size;
+ TCGv_i64 addr2 = tcg_temp_new_i64();
+ TCGv_i64 hitmp = tcg_temp_new_i64();
+
+ g_assert(size >= 2);
+ tcg_gen_addi_i64(addr2, addr, 1 << size);
+ tcg_gen_qemu_ld_i64(hitmp, addr2, get_mem_index(s), memop);
+ tcg_temp_free_i64(addr2);
+ tcg_gen_mov_i64(cpu_reg(s, rt2), hitmp);
+ tcg_temp_free_i64(hitmp);
+ }
+
+ tcg_temp_free_i32(mem_idx);
+}
+#else
static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
TCGv_i64 addr, int size, bool is_pair)
{
@@ -1723,6 +1795,7 @@ static void gen_load_exclusive(DisasContext *s, int rt,
int rt2,
tcg_temp_free_i64(tmp);
tcg_gen_mov_i64(cpu_exclusive_addr, addr);
}
+#endif
#ifdef CONFIG_USER_ONLY
static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
@@ -1733,6 +1806,65 @@ static void gen_store_exclusive(DisasContext *s, int rd,
int rt, int rt2,
size | is_pair << 2 | (rd << 4) | (rt << 9) | (rt2 <<
14));
gen_exception_internal_insn(s, 4, EXCP_STREX);
}
+#elif defined(CONFIG_ARM_USE_LDST_EXCL)
+static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
+ TCGv_i64 addr, int size, int is_pair)
+{
+ /* Don't bother to check if we are actually in exclusive context since the
+ * helpers keep care of it. */
+ TCGv_i32 mem_idx = tcg_temp_new_i32();
+
+ tcg_gen_movi_i32(mem_idx, get_mem_index(s));
+
+ g_assert(size <= 3);
+ if (is_pair) {
+ if (size == 3) {
+ gen_helper_stxp_i128(cpu_reg(s, rd), cpu_env, addr, cpu_reg(s, rt),
+ cpu_reg(s, rt2), mem_idx);
+ } else if (size == 2) {
+ /* Paired single word case. After merging the two registers into
+ * one, we use one stcond_i64 to store the value to memory. */
+ TCGv_i64 val = tcg_temp_new_i64();
+ TCGv_i64 valh = tcg_temp_new_i64();
+ tcg_gen_shli_i64(valh, cpu_reg(s, rt2), 32);
+ tcg_gen_and_i64(val, valh, cpu_reg(s, rt));
+ gen_helper_stcond_i64(cpu_reg(s, rd), cpu_env, addr, val, mem_idx);
+ tcg_temp_free_i64(valh);
+ tcg_temp_free_i64(val);
+ } else {
+ abort();
+ }
+ } else {
+ if (size < 3) {
+ TCGv_i32 val = tcg_temp_new_i32();
+
+ tcg_gen_extrl_i64_i32(val, cpu_reg(s, rt));
+
+ switch (size) {
+ case 0:
+ gen_helper_stcond_i8(cpu_reg(s, rd), cpu_env, addr, val,
+ mem_idx);
+ break;
+ case 1:
+ gen_helper_stcond_i16(cpu_reg(s, rd), cpu_env, addr, val,
+ mem_idx);
+ break;
+ case 2:
+ gen_helper_stcond_i32(cpu_reg(s, rd), cpu_env, addr, val,
+ mem_idx);
+ break;
+ default:
+ abort();
+ }
+ tcg_temp_free_i32(val);
+ } else {
+ gen_helper_stcond_i64(cpu_reg(s, rd), cpu_env, addr, cpu_reg(s,
rt),
+ mem_idx);
+ }
+ }
+
+ tcg_temp_free_i32(mem_idx);
+}
#else
static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
TCGv_i64 inaddr, int size, int is_pair)
--
2.7.0
- [Qemu-devel] [RFC v7 02/16] softmmu: Simplify helper_*_st_name, wrap unaligned code, (continued)
- [Qemu-devel] [RFC v7 02/16] softmmu: Simplify helper_*_st_name, wrap unaligned code, Alvise Rigo, 2016/01/29
- [Qemu-devel] [RFC v7 03/16] softmmu: Simplify helper_*_st_name, wrap MMIO code, Alvise Rigo, 2016/01/29
- [Qemu-devel] [RFC v7 06/16] qom: cpu: Add CPUClass hooks for exclusive range, Alvise Rigo, 2016/01/29
- [Qemu-devel] [RFC v7 08/16] softmmu: Honor the new exclusive bitmap, Alvise Rigo, 2016/01/29
- [Qemu-devel] [RFC v7 01/16] exec.c: Add new exclusive bitmap to ram_list, Alvise Rigo, 2016/01/29
- [Qemu-devel] [RFC v7 04/16] softmmu: Simplify helper_*_st_name, wrap RAM code, Alvise Rigo, 2016/01/29
- [Qemu-devel] [RFC v7 09/16] softmmu: Include MMIO/invalid exclusive accesses, Alvise Rigo, 2016/01/29
- [Qemu-devel] [RFC v7 15/16] target-arm: cpu64: use custom set_excl hook, Alvise Rigo, 2016/01/29
- [Qemu-devel] [RFC v7 10/16] softmmu: Protect MMIO exclusive range, Alvise Rigo, 2016/01/29
- [Qemu-devel] [RFC v7 14/16] target-arm: translate: Use ld/st excl for atomic insns, Alvise Rigo, 2016/01/29
- [Qemu-devel] [RFC v7 16/16] target-arm: aarch64: add atomic instructions,
Alvise Rigo <=
- [Qemu-devel] [RFC v7 07/16] softmmu: Add helpers for a new slowpath, Alvise Rigo, 2016/01/29
- [Qemu-devel] [RFC v7 12/16] configure: Use slow-path for atomic only when the softmmu is enabled, Alvise Rigo, 2016/01/29
- [Qemu-devel] [RFC v7 13/16] softmmu: Add history of excl accesses, Alvise Rigo, 2016/01/29
- [Qemu-devel] [RFC v7 11/16] tcg: Create new runtime helpers for excl accesses, Alvise Rigo, 2016/01/29