[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v6 15/36] tcg: Add guest load/store primitives for TCGv_i128
From: |
Alex Bennée |
Subject: |
Re: [PATCH v6 15/36] tcg: Add guest load/store primitives for TCGv_i128 |
Date: |
Wed, 01 Feb 2023 09:52:13 +0000 |
User-agent: |
mu4e 1.9.18; emacs 29.0.60 |
Richard Henderson <richard.henderson@linaro.org> writes:
> These are not yet considering atomicity of the 16-byte value;
> this is a direct replacement for the current target code which
> uses a pair of 8-byte operations.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> include/exec/cpu_ldst.h | 10 +++
> include/tcg/tcg-op.h | 2 +
> accel/tcg/cputlb.c | 112 +++++++++++++++++++++++++++++++++
> accel/tcg/user-exec.c | 66 ++++++++++++++++++++
> tcg/tcg-op.c | 134 ++++++++++++++++++++++++++++++++++++++++
> 5 files changed, 324 insertions(+)
>
> diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
> index d0c7c0d5fe..09b55cc0ee 100644
> --- a/include/exec/cpu_ldst.h
> +++ b/include/exec/cpu_ldst.h
> @@ -220,6 +220,11 @@ uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr ptr,
> uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr ptr,
> MemOpIdx oi, uintptr_t ra);
>
> +Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
> + MemOpIdx oi, uintptr_t ra);
> +Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
> + MemOpIdx oi, uintptr_t ra);
> +
> void cpu_stb_mmu(CPUArchState *env, abi_ptr ptr, uint8_t val,
> MemOpIdx oi, uintptr_t ra);
> void cpu_stw_be_mmu(CPUArchState *env, abi_ptr ptr, uint16_t val,
> @@ -235,6 +240,11 @@ void cpu_stl_le_mmu(CPUArchState *env, abi_ptr ptr,
> uint32_t val,
> void cpu_stq_le_mmu(CPUArchState *env, abi_ptr ptr, uint64_t val,
> MemOpIdx oi, uintptr_t ra);
>
> +void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
> + MemOpIdx oi, uintptr_t ra);
> +void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
> + MemOpIdx oi, uintptr_t ra);
> +
> uint32_t cpu_atomic_cmpxchgb_mmu(CPUArchState *env, target_ulong addr,
> uint32_t cmpv, uint32_t newv,
> MemOpIdx oi, uintptr_t retaddr);
> diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
> index c4276767d1..e5f5b63c37 100644
> --- a/include/tcg/tcg-op.h
> +++ b/include/tcg/tcg-op.h
> @@ -845,6 +845,8 @@ void tcg_gen_qemu_ld_i32(TCGv_i32, TCGv, TCGArg, MemOp);
> void tcg_gen_qemu_st_i32(TCGv_i32, TCGv, TCGArg, MemOp);
> void tcg_gen_qemu_ld_i64(TCGv_i64, TCGv, TCGArg, MemOp);
> void tcg_gen_qemu_st_i64(TCGv_i64, TCGv, TCGArg, MemOp);
> +void tcg_gen_qemu_ld_i128(TCGv_i128, TCGv, TCGArg, MemOp);
> +void tcg_gen_qemu_st_i128(TCGv_i128, TCGv, TCGArg, MemOp);
>
> static inline void tcg_gen_qemu_ld8u(TCGv ret, TCGv addr, int mem_index)
> {
> diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
> index 4e040a1cb9..e3604ad313 100644
> --- a/accel/tcg/cputlb.c
> +++ b/accel/tcg/cputlb.c
> @@ -2187,6 +2187,64 @@ uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr
> addr,
> return cpu_load_helper(env, addr, oi, ra, helper_le_ldq_mmu);
> }
>
> +Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
> + MemOpIdx oi, uintptr_t ra)
> +{
> + MemOp mop = get_memop(oi);
> + int mmu_idx = get_mmuidx(oi);
> + MemOpIdx new_oi;
> + unsigned a_bits;
> + uint64_t h, l;
> +
> + tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_BE|MO_128));
> + a_bits = get_alignment_bits(mop);
> +
> + /* Handle CPU specific unaligned behaviour */
> + if (addr & ((1 << a_bits) - 1)) {
> + cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_LOAD,
> + mmu_idx, ra);
> + }
> +
> + /* Construct an unaligned 64-bit replacement MemOpIdx. */
> + mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
> + new_oi = make_memop_idx(mop, mmu_idx);
> +
> + h = helper_be_ldq_mmu(env, addr, new_oi, ra);
> + l = helper_be_ldq_mmu(env, addr + 8, new_oi, ra);
> +
> + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
> + return int128_make128(l, h);
> +}
> +
> +Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
> + MemOpIdx oi, uintptr_t ra)
> +{
> + MemOp mop = get_memop(oi);
> + int mmu_idx = get_mmuidx(oi);
> + MemOpIdx new_oi;
> + unsigned a_bits;
> + uint64_t h, l;
> +
> + tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_LE|MO_128));
Why not use validate_memop for this like elsewhere in cputlb?
<snip>
>
> +void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
> + MemOpIdx oi, uintptr_t ra)
> +{
> + MemOp mop = get_memop(oi);
> + int mmu_idx = get_mmuidx(oi);
> + MemOpIdx new_oi;
> + unsigned a_bits;
> +
> + tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_BE|MO_128));
ditto for the others
> diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
> index cb83d2375d..33ef325f6e 100644
> --- a/tcg/tcg-op.c
> +++ b/tcg/tcg-op.c
> @@ -3109,6 +3109,140 @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr,
> TCGArg idx, MemOp memop)
> }
> }
>
I'm confused because the TCG ops in this patch are still using i64 and
the atomic use hasn't come in yet. Worth splitting the patch?
Anyway:
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
> +static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
> +{
> + MemOp mop_1 = orig, mop_2;
> +
> + tcg_debug_assert((orig & MO_SIZE) == MO_128);
> + tcg_debug_assert((orig & MO_SIGN) == 0);
> +
> + /* Use a memory ordering implemented by the host. */
> + if (!TCG_TARGET_HAS_MEMORY_BSWAP && (orig & MO_BSWAP)) {
> + mop_1 &= ~MO_BSWAP;
> + }
> +
> + /* Reduce the size to 64-bit. */
> + mop_1 = (mop_1 & ~MO_SIZE) | MO_64;
> +
> + /* Retain the alignment constraints of the original. */
> + switch (orig & MO_AMASK) {
> + case MO_UNALN:
> + case MO_ALIGN_2:
> + case MO_ALIGN_4:
> + mop_2 = mop_1;
> + break;
> + case MO_ALIGN_8:
> + /* Prefer MO_ALIGN+MO_64 to MO_ALIGN_8+MO_64. */
> + mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
> + mop_2 = mop_1;
> + break;
> + case MO_ALIGN:
> + /* Second has 8-byte alignment; first has 16-byte alignment. */
> + mop_2 = mop_1;
> + mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN_16;
> + break;
> + case MO_ALIGN_16:
> + case MO_ALIGN_32:
> + case MO_ALIGN_64:
> + /* Second has 8-byte alignment; first retains original. */
> + mop_2 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
> + break;
> + default:
> + g_assert_not_reached();
> + }
> + ret[0] = mop_1;
> + ret[1] = mop_2;
> +}
> +
> +void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
> +{
> + MemOp mop[2];
> + TCGv addr_p8;
> + TCGv_i64 x, y;
> +
> + canonicalize_memop_i128_as_i64(mop, memop);
> +
> + tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
> + addr = plugin_prep_mem_callbacks(addr);
> +
> + /* TODO: respect atomicity of the operation. */
> + /* TODO: allow the tcg backend to see the whole operation. */
> +
> + /*
> + * Since there are no global TCGv_i128, there is no visible state
> + * changed if the second load faults. Load directly into the two
> + * subwords.
> + */
> + if ((memop & MO_BSWAP) == MO_LE) {
> + x = TCGV128_LOW(val);
> + y = TCGV128_HIGH(val);
> + } else {
> + x = TCGV128_HIGH(val);
> + y = TCGV128_LOW(val);
> + }
> +
> + gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, mop[0], idx);
> +
> + if ((mop[0] ^ memop) & MO_BSWAP) {
> + tcg_gen_bswap64_i64(x, x);
> + }
> +
> + addr_p8 = tcg_temp_new();
> + tcg_gen_addi_tl(addr_p8, addr, 8);
> + gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, mop[1], idx);
> + tcg_temp_free(addr_p8);
> +
> + if ((mop[0] ^ memop) & MO_BSWAP) {
> + tcg_gen_bswap64_i64(y, y);
> + }
> +
> + plugin_gen_mem_callbacks(addr, make_memop_idx(memop, idx),
> + QEMU_PLUGIN_MEM_R);
> +}
> +
> +void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
> +{
> + MemOp mop[2];
> + TCGv addr_p8;
> + TCGv_i64 x, y;
> +
> + canonicalize_memop_i128_as_i64(mop, memop);
> +
> + tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
> + addr = plugin_prep_mem_callbacks(addr);
> +
> + /* TODO: respect atomicity of the operation. */
> + /* TODO: allow the tcg backend to see the whole operation. */
> +
> + if ((memop & MO_BSWAP) == MO_LE) {
> + x = TCGV128_LOW(val);
> + y = TCGV128_HIGH(val);
> + } else {
> + x = TCGV128_HIGH(val);
> + y = TCGV128_LOW(val);
> + }
> +
> + addr_p8 = tcg_temp_new();
> + if ((mop[0] ^ memop) & MO_BSWAP) {
> + TCGv_i64 t = tcg_temp_new_i64();
> +
> + tcg_gen_bswap64_i64(t, x);
> + gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr, mop[0], idx);
> + tcg_gen_bswap64_i64(t, y);
> + tcg_gen_addi_tl(addr_p8, addr, 8);
> + gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr_p8, mop[1], idx);
> + tcg_temp_free_i64(t);
> + } else {
> + gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr, mop[0], idx);
> + tcg_gen_addi_tl(addr_p8, addr, 8);
> + gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8, mop[1], idx);
> + }
> + tcg_temp_free(addr_p8);
> +
> + plugin_gen_mem_callbacks(addr, make_memop_idx(memop, idx),
> + QEMU_PLUGIN_MEM_W);
> +}
> +
> static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
> {
> switch (opc & MO_SSIZE) {
--
Alex Bennée
Virtualisation Tech Lead @ Linaro
- Re: [PATCH v6 15/36] tcg: Add guest load/store primitives for TCGv_i128,
Alex Bennée <=