[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [RFC v3 06/13] target-i386: translate: implement qemu_l
From: |
alvise rigo |
Subject: |
Re: [Qemu-devel] [RFC v3 06/13] target-i386: translate: implement qemu_ldlink and qemu_stcond ops |
Date: |
Fri, 17 Jul 2015 15:27:45 +0200 |
On Fri, Jul 17, 2015 at 2:56 PM, Alex Bennée <address@hidden> wrote:
>
> Alvise Rigo <address@hidden> writes:
>
>> Implement strex and ldrex instruction relying on TCG's qemu_ldlink and
>> qemu_stcond. For the time being only 32bit configurations are supported.
>>
>> Suggested-by: Jani Kokkonen <address@hidden>
>> Suggested-by: Claudio Fontana <address@hidden>
>> Signed-off-by: Alvise Rigo <address@hidden>
>> ---
>> tcg/i386/tcg-target.c | 136
>> ++++++++++++++++++++++++++++++++++++++++++--------
>> 1 file changed, 114 insertions(+), 22 deletions(-)
>>
>> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
>> index 0d7c99c..d8250a9 100644
>> --- a/tcg/i386/tcg-target.c
>> +++ b/tcg/i386/tcg-target.c
>> @@ -1141,6 +1141,17 @@ static void * const qemu_ld_helpers[16] = {
>> [MO_BEQ] = helper_be_ldq_mmu,
>> };
>>
>> +/* LoadLink helpers, only unsigned. Use the macro below to access them. */
>> +static void * const qemu_ldex_helpers[16] = {
>> + [MO_LEUL] = helper_le_ldlinkul_mmu,
>> +};
>> +
>> +#define LDEX_HELPER(mem_op) \
>> +({ \
>> + assert(mem_op & MO_EXCL); \
>> + qemu_ldex_helpers[((int)mem_op - MO_EXCL)]; \
>> +})
>> +
>> /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
>> * uintxx_t val, int mmu_idx, uintptr_t
>> ra)
>> */
>> @@ -1154,6 +1165,17 @@ static void * const qemu_st_helpers[16] = {
>> [MO_BEQ] = helper_be_stq_mmu,
>> };
>>
>> +/* StoreConditional helpers. Use the macro below to access them. */
>> +static void * const qemu_stex_helpers[16] = {
>> + [MO_LEUL] = helper_le_stcondl_mmu,
>> +};
>> +
>> +#define STEX_HELPER(mem_op) \
>> +({ \
>> + assert(mem_op & MO_EXCL); \
>> + qemu_stex_helpers[(int)mem_op - MO_EXCL]; \
>> +})
>> +
>
> Same comments as for target-arm.
>
> Do we need to be protecting backends with HAS_LDST_EXCL defines or some
> such macro hackery? What currently happens if you use the new TCG ops
> when the backend doesn't support them? Is supporting all backends a
> prerequisite for the series?
I think that the ideal approach would be to have all the backends
implementing the slowpath for atomic instructions so that the
HAS_LDST_EXCL macro will not be needed. Then a frontend can rely on
the slowpath or not.
So, ideally, it's a prerequisite.
Regards,
alvise
>
>> /* Perform the TLB load and compare.
>>
>> Inputs:
>> @@ -1249,6 +1271,7 @@ static inline void tcg_out_tlb_load(TCGContext *s,
>> TCGReg addrlo, TCGReg addrhi,
>> * for a load or store, so that we can later generate the correct helper
>> code
>> */
>> static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
>> + TCGReg llsc_success,
>> TCGReg datalo, TCGReg datahi,
>> TCGReg addrlo, TCGReg addrhi,
>> tcg_insn_unit *raddr,
>> @@ -1257,6 +1280,7 @@ static void add_qemu_ldst_label(TCGContext *s, bool
>> is_ld, TCGMemOpIdx oi,
>> TCGLabelQemuLdst *label = new_ldst_label(s);
>>
>> label->is_ld = is_ld;
>> + label->llsc_success = llsc_success;
>> label->oi = oi;
>> label->datalo_reg = datalo;
>> label->datahi_reg = datahi;
>> @@ -1311,7 +1335,11 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s,
>> TCGLabelQemuLdst *l)
>> (uintptr_t)l->raddr);
>> }
>>
>> - tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
>> + if (opc & MO_EXCL) {
>> + tcg_out_call(s, LDEX_HELPER(opc));
>> + } else {
>> + tcg_out_call(s, qemu_ld_helpers[opc & ~MO_SIGN]);
>> + }
>>
>> data_reg = l->datalo_reg;
>> switch (opc & MO_SSIZE) {
>> @@ -1415,9 +1443,16 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s,
>> TCGLabelQemuLdst *l)
>> }
>> }
>>
>> - /* "Tail call" to the helper, with the return address back inline. */
>> - tcg_out_push(s, retaddr);
>> - tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
>> + if (opc & MO_EXCL) {
>> + tcg_out_call(s, STEX_HELPER(opc));
>> + /* Save the output of the StoreConditional */
>> + tcg_out_mov(s, TCG_TYPE_I32, l->llsc_success, TCG_REG_EAX);
>> + tcg_out_jmp(s, l->raddr);
>> + } else {
>> + /* "Tail call" to the helper, with the return address back inline.
>> */
>> + tcg_out_push(s, retaddr);
>> + tcg_out_jmp(s, qemu_st_helpers[opc]);
>> + }
>> }
>> #elif defined(__x86_64__) && defined(__linux__)
>> # include <asm/prctl.h>
>> @@ -1530,7 +1565,8 @@ static void tcg_out_qemu_ld_direct(TCGContext *s,
>> TCGReg datalo, TCGReg datahi,
>> /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
>> EAX. It will be useful once fixed registers globals are less
>> common. */
>> -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
>> +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64,
>> + bool isLoadLink)
>> {
>> TCGReg datalo, datahi, addrlo;
>> TCGReg addrhi __attribute__((unused));
>> @@ -1553,14 +1589,34 @@ static void tcg_out_qemu_ld(TCGContext *s, const
>> TCGArg *args, bool is64)
>> mem_index = get_mmuidx(oi);
>> s_bits = opc & MO_SIZE;
>>
>> - tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
>> - label_ptr, offsetof(CPUTLBEntry, addr_read));
>> + if (isLoadLink) {
>> + TCGType t = ((TCG_TARGET_REG_BITS == 64) && (TARGET_LONG_BITS ==
>> 64)) ?
>> + TCG_TYPE_I64 :
>> TCG_TYPE_I32;
>> + /* The JMP address will be patched afterwards,
>> + * in tcg_out_qemu_ld_slow_path (two times when
>> + * TARGET_LONG_BITS > TCG_TARGET_REG_BITS). */
>> + tcg_out_mov(s, t, TCG_REG_L1, addrlo);
>> +
>> + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
>> + /* Store the second part of the address. */
>> + tcg_out_mov(s, t, TCG_REG_L0, addrhi);
>> + /* We add 4 to include the jmp that follows. */
>> + label_ptr[1] = s->code_ptr + 4;
>> + }
>>
>> - /* TLB Hit. */
>> - tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
>> + tcg_out_opc(s, OPC_JMP_long, 0, 0, 0);
>> + label_ptr[0] = s->code_ptr;
>> + s->code_ptr += 4;
>> + } else {
>> + tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
>> + label_ptr, offsetof(CPUTLBEntry, addr_read));
>> +
>> + /* TLB Hit. */
>> + tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
>> + }
>>
>> /* Record the current context of a load into ldst label */
>> - add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
>> + add_qemu_ldst_label(s, true, oi, 0, datalo, datahi, addrlo, addrhi,
>> s->code_ptr, label_ptr);
>> #else
>> {
>> @@ -1663,9 +1719,10 @@ static void tcg_out_qemu_st_direct(TCGContext *s,
>> TCGReg datalo, TCGReg datahi,
>> }
>> }
>>
>> -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
>> +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64,
>> + bool isStoreCond)
>> {
>> - TCGReg datalo, datahi, addrlo;
>> + TCGReg datalo, datahi, addrlo, llsc_success;
>> TCGReg addrhi __attribute__((unused));
>> TCGMemOpIdx oi;
>> TCGMemOp opc;
>> @@ -1675,6 +1732,9 @@ static void tcg_out_qemu_st(TCGContext *s, const
>> TCGArg *args, bool is64)
>> tcg_insn_unit *label_ptr[2];
>> #endif
>>
>> + /* The stcond variant has one more param */
>> + llsc_success = (isStoreCond ? *args++ : 0);
>> +
>> datalo = *args++;
>> datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
>> addrlo = *args++;
>> @@ -1686,15 +1746,35 @@ static void tcg_out_qemu_st(TCGContext *s, const
>> TCGArg *args, bool is64)
>> mem_index = get_mmuidx(oi);
>> s_bits = opc & MO_SIZE;
>>
>> - tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
>> - label_ptr, offsetof(CPUTLBEntry, addr_write));
>> + if (isStoreCond) {
>> + TCGType t = ((TCG_TARGET_REG_BITS == 64) && (TARGET_LONG_BITS ==
>> 64)) ?
>> + TCG_TYPE_I64 :
>> TCG_TYPE_I32;
>> + /* The JMP address will be filled afterwards,
>> + * in tcg_out_qemu_ld_slow_path (two times when
>> + * TARGET_LONG_BITS > TCG_TARGET_REG_BITS). */
>> + tcg_out_mov(s, t, TCG_REG_L1, addrlo);
>> +
>> + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
>> + /* Store the second part of the address. */
>> + tcg_out_mov(s, t, TCG_REG_L0, addrhi);
>> + /* We add 4 to include the jmp that follows. */
>> + label_ptr[1] = s->code_ptr + 4;
>> + }
>>
>> - /* TLB Hit. */
>> - tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
>> + tcg_out_opc(s, OPC_JMP_long, 0, 0, 0);
>> + label_ptr[0] = s->code_ptr;
>> + s->code_ptr += 4;
>> + } else {
>> + tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
>> + label_ptr, offsetof(CPUTLBEntry, addr_write));
>> +
>> + /* TLB Hit. */
>> + tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
>> + }
>>
>> /* Record the current context of a store into ldst label */
>> - add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
>> - s->code_ptr, label_ptr);
>> + add_qemu_ldst_label(s, false, oi, llsc_success, datalo, datahi, addrlo,
>> + addrhi, s->code_ptr, label_ptr);
>> #else
>> {
>> int32_t offset = GUEST_BASE;
>> @@ -1955,16 +2035,22 @@ static inline void tcg_out_op(TCGContext *s,
>> TCGOpcode opc,
>> break;
>>
>> case INDEX_op_qemu_ld_i32:
>> - tcg_out_qemu_ld(s, args, 0);
>> + tcg_out_qemu_ld(s, args, 0, 0);
>> + break;
>> + case INDEX_op_qemu_ldlink_i32:
>> + tcg_out_qemu_ld(s, args, 0, 1);
>> break;
>> case INDEX_op_qemu_ld_i64:
>> - tcg_out_qemu_ld(s, args, 1);
>> + tcg_out_qemu_ld(s, args, 1, 0);
>> break;
>> case INDEX_op_qemu_st_i32:
>> - tcg_out_qemu_st(s, args, 0);
>> + tcg_out_qemu_st(s, args, 0, 0);
>> + break;
>> + case INDEX_op_qemu_stcond_i32:
>> + tcg_out_qemu_st(s, args, 0, 1);
>> break;
>> case INDEX_op_qemu_st_i64:
>> - tcg_out_qemu_st(s, args, 1);
>> + tcg_out_qemu_st(s, args, 1, 0);
>> break;
>>
>> OP_32_64(mulu2):
>> @@ -2186,17 +2272,23 @@ static const TCGTargetOpDef x86_op_defs[] = {
>>
>> #if TCG_TARGET_REG_BITS == 64
>> { INDEX_op_qemu_ld_i32, { "r", "L" } },
>> + { INDEX_op_qemu_ldlink_i32, { "r", "L" } },
>> { INDEX_op_qemu_st_i32, { "L", "L" } },
>> + { INDEX_op_qemu_stcond_i32, { "r", "L", "L" } },
>> { INDEX_op_qemu_ld_i64, { "r", "L" } },
>> { INDEX_op_qemu_st_i64, { "L", "L" } },
>> #elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
>> { INDEX_op_qemu_ld_i32, { "r", "L" } },
>> + { INDEX_op_qemu_ldlink_i32, { "r", "L" } },
>> { INDEX_op_qemu_st_i32, { "L", "L" } },
>> + { INDEX_op_qemu_stcond_i32, { "r", "L", "L" } },
>> { INDEX_op_qemu_ld_i64, { "r", "r", "L" } },
>> { INDEX_op_qemu_st_i64, { "L", "L", "L" } },
>> #else
>> { INDEX_op_qemu_ld_i32, { "r", "L", "L" } },
>> + { INDEX_op_qemu_ldlink_i32, { "r", "L", "L" } },
>> { INDEX_op_qemu_st_i32, { "L", "L", "L" } },
>> + { INDEX_op_qemu_stcond_i32, { "r", "L", "L", "L" } },
>> { INDEX_op_qemu_ld_i64, { "r", "r", "L", "L" } },
>> { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } },
>> #endif
>
> --
> Alex Bennée
- [Qemu-devel] [RFC v3 00/13] Slow-path for atomic instruction translation, Alvise Rigo, 2015/07/10
- [Qemu-devel] [RFC v3 01/13] exec: Add new exclusive bitmap to ram_list, Alvise Rigo, 2015/07/10
- [Qemu-devel] [RFC v3 02/13] cputlb: Add new TLB_EXCL flag, Alvise Rigo, 2015/07/10
- [Qemu-devel] [RFC v3 03/13] softmmu: Add helpers for a new slow-path, Alvise Rigo, 2015/07/10
- [Qemu-devel] [RFC v3 06/13] target-i386: translate: implement qemu_ldlink and qemu_stcond ops, Alvise Rigo, 2015/07/10
- [Qemu-devel] [RFC v3 04/13] tcg-op: create new TCG qemu_ldlink and qemu_stcond instructions, Alvise Rigo, 2015/07/10
- [Qemu-devel] [RFC v3 07/13] ram_addr.h: Make exclusive bitmap accessors atomic, Alvise Rigo, 2015/07/10
- [Qemu-devel] [RFC v3 08/13] exec.c: introduce a simple rendezvous support, Alvise Rigo, 2015/07/10
- [Qemu-devel] [RFC v3 05/13] target-arm: translate: implement qemu_ldlink and qemu_stcond ops, Alvise Rigo, 2015/07/10