[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v2 27/30] tcg/i386: Examine MemOp for atomicity and alignment
From: |
Richard Henderson |
Subject: |
[PATCH v2 27/30] tcg/i386: Examine MemOp for atomicity and alignment |
Date: |
Wed, 15 Feb 2023 16:57:36 -1000 |
No change to the ultimate load/store routines yet, so some
atomicity conditions not yet honored, but plumbs the change
to alignment through the adjacent functions.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/i386/tcg-target.c.inc | 128 ++++++++++++++++++++++++++++++--------
1 file changed, 101 insertions(+), 27 deletions(-)
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index 21442c9339..6ee7bc5a9a 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -1746,6 +1746,83 @@ tcg_out_testi(TCGContext *s, TCGReg r, uint32_t i)
}
}
+/*
+ * Return the alignment and atomicity to use for the inline fast path
+ * for the given memory operation. The alignment may be larger than
+ * that specified in @opc, and the correct alignment will be diagnosed
+ * by the slow path helper.
+ */
+static MemOp atom_and_align_for_opc(TCGContext *s, MemOp opc, MemOp *out_al)
+{
+ MemOp align = get_alignment_bits(opc);
+ MemOp atom, atmax, atsub, size = opc & MO_SIZE;
+
+ /* When serialized, no further atomicity required. */
+ if (s->gen_tb->cflags & CF_PARALLEL) {
+ atom = opc & MO_ATOM_MASK;
+ } else {
+ atom = MO_ATOM_NONE;
+ }
+
+ atmax = opc & MO_ATMAX_MASK;
+ if (atmax == MO_ATMAX_SIZE) {
+ atmax = size;
+ } else {
+ atmax = atmax >> MO_ATMAX_SHIFT;
+ }
+
+ switch (atom) {
+ case MO_ATOM_NONE:
+ /* The operation requires no specific atomicity. */
+ atmax = MO_8;
+ atsub = MO_8;
+ break;
+ case MO_ATOM_IFALIGN:
+ /* If unaligned, the subobjects are bytes. */
+ atsub = MO_8;
+ break;
+ case MO_ATOM_WITHIN16:
+ /* If unaligned, there are subobjects if atmax < size. */
+ atsub = (atmax < size ? atmax : MO_8);
+ atmax = size;
+ break;
+ case MO_ATOM_SUBALIGN:
+ /* If unaligned but not odd, there are subobjects up to atmax - 1. */
+ atsub = (atmax == MO_8 ? MO_8 : atmax - 1);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ /*
+ * Per Intel Architecture SDM, Volume 3 Section 8.1.1,
+ * - Pentium family guarantees atomicity of aligned <= 64-bit.
+ * - P6 family guarantees atomicity of unaligned <= 64-bit
+ * which fit within a cache line.
+ * - AVX guarantees atomicity of aligned 128-bit VMOVDQA (et al).
+ *
+ * There is no language in the Intel manual specifying what happens
+ * with the partial memory operations when crossing a cache line.
+ * When there is required atomicity of subobjects, we must perform
+ * an additional runtime test for alignment and then perform either
+ * the full operation, or two half-sized operations.
+ *
+ * For x86_64, and MO_64, we do not have a scratch register with
+ * which to do this. Only allow splitting for MO_64 on i386,
+ * where the data is already separated, or MO_128.
+ * Otherwise, require full alignment and fall back to the helper
+ * for the misaligned case.
+ */
+ if (align < atmax
+ && atsub != MO_8
+ && size != (TCG_TARGET_REG_BITS == 64 ? MO_128 : MO_64)) {
+ align = size;
+ }
+
+ *out_al = align;
+ return atmax;
+}
+
/*
* helper signature: helper_ld*_mmu(CPUState *env, target_ulong addr,
* int mmu_idx, uintptr_t ra)
@@ -1987,7 +2064,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s,
TCGLabelQemuLdst *l)
* First argument register is clobbered.
*/
static void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
- int mem_index, MemOp opc,
+ int mem_index, MemOp a_bits, MemOp s_bits,
tcg_insn_unit **label_ptr, int which)
{
const TCGReg r0 = TCG_REG_L0;
@@ -1995,8 +2072,6 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg
addrlo, TCGReg addrhi,
TCGType ttype = TCG_TYPE_I32;
TCGType tlbtype = TCG_TYPE_I32;
int trexw = 0, hrexw = 0, tlbrexw = 0;
- unsigned a_bits = get_alignment_bits(opc);
- unsigned s_bits = opc & MO_SIZE;
unsigned a_mask = (1 << a_bits) - 1;
unsigned s_mask = (1 << s_bits) - 1;
target_ulong tlb_mask;
@@ -2124,7 +2199,8 @@ static inline int setup_guest_base_seg(void)
static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
TCGReg base, int index, intptr_t ofs,
- int seg, TCGType type, MemOp memop)
+ int seg, TCGType type, MemOp memop,
+ MemOp atom, MemOp align)
{
bool use_movbe = false;
int rexw = (type == TCG_TYPE_I32 ? 0 : P_REXW);
@@ -2225,11 +2301,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg
*args, TCGType type)
TCGReg datalo, datahi, addrlo;
TCGReg addrhi __attribute__((unused));
MemOpIdx oi;
- MemOp opc;
+ MemOp opc, atom, align;
tcg_insn_unit *label_ptr[2] = { };
-#ifndef CONFIG_SOFTMMU
- unsigned a_bits;
-#endif
datalo = *args++;
switch (type) {
@@ -2246,26 +2319,27 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg
*args, TCGType type)
addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
oi = *args++;
opc = get_memop(oi);
+ atom = atom_and_align_for_opc(s, opc, &align);
#if defined(CONFIG_SOFTMMU)
- tcg_out_tlb_load(s, addrlo, addrhi, get_mmuidx(oi), opc,
+ tcg_out_tlb_load(s, addrlo, addrhi, get_mmuidx(oi), align, opc & MO_SIZE,
label_ptr, offsetof(CPUTLBEntry, addr_read));
/* TLB Hit. */
- tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, type, opc);
+ tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, type,
+ opc, atom, align);
/* Record the current context of a load into ldst label */
add_qemu_ldst_label(s, true, type, oi, datalo, datahi,
TCG_REG_L1, addrhi, s->code_ptr, label_ptr);
#else
- a_bits = get_alignment_bits(opc);
- if (a_bits) {
- tcg_out_test_alignment(s, addrlo, a_bits, label_ptr);
+ if (align) {
+ tcg_out_test_alignment(s, addrlo, align, label_ptr);
}
tcg_out_qemu_ld_direct(s, datalo, datahi, addrlo, x86_guest_base_index,
x86_guest_base_offset, x86_guest_base_seg,
- type, opc);
- if (a_bits) {
+ type, opc, atom, align);
+ if (align) {
add_qemu_ldst_label(s, true, type, oi, datalo, datahi,
addrlo, addrhi, s->code_ptr, label_ptr);
}
@@ -2274,7 +2348,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg
*args, TCGType type)
static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
TCGReg base, int index, intptr_t ofs,
- int seg, MemOp memop)
+ int seg, MemOp memop,
+ MemOp atom, MemOp align)
{
bool use_movbe = false;
int movop = OPC_MOVL_EvGv;
@@ -2329,11 +2404,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg
*args, TCGType type)
TCGReg datalo, datahi, addrlo;
TCGReg addrhi __attribute__((unused));
MemOpIdx oi;
- MemOp opc;
+ MemOp opc, atom, align;
tcg_insn_unit *label_ptr[2] = { };
-#ifndef CONFIG_SOFTMMU
- unsigned a_bits;
-#endif
datalo = *args++;
switch (type) {
@@ -2350,25 +2422,27 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg
*args, TCGType type)
addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
oi = *args++;
opc = get_memop(oi);
+ atom = atom_and_align_for_opc(s, opc, &align);
#if defined(CONFIG_SOFTMMU)
- tcg_out_tlb_load(s, addrlo, addrhi, get_mmuidx(oi), opc,
+ tcg_out_tlb_load(s, addrlo, addrhi, get_mmuidx(oi), align, opc & MO_SIZE,
label_ptr, offsetof(CPUTLBEntry, addr_write));
/* TLB Hit. */
- tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, opc);
+ tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0,
+ opc, atom, align);
/* Record the current context of a store into ldst label */
add_qemu_ldst_label(s, false, type, oi, datalo, datahi,
TCG_REG_L1, addrhi, s->code_ptr, label_ptr);
#else
- a_bits = get_alignment_bits(opc);
- if (a_bits) {
- tcg_out_test_alignment(s, addrlo, a_bits, label_ptr);
+ if (align) {
+ tcg_out_test_alignment(s, addrlo, align, label_ptr);
}
tcg_out_qemu_st_direct(s, datalo, datahi, addrlo, x86_guest_base_index,
- x86_guest_base_offset, x86_guest_base_seg, opc);
- if (a_bits) {
+ x86_guest_base_offset, x86_guest_base_seg,
+ opc, atom, align);
+ if (align) {
add_qemu_ldst_label(s, false, type, oi, datalo, datahi,
addrlo, addrhi, s->code_ptr, label_ptr);
}
--
2.34.1
- [PATCH v2 17/30] tcg/aarch64: Detect have_lse, have_lse2 for linux, (continued)
- [PATCH v2 17/30] tcg/aarch64: Detect have_lse, have_lse2 for linux, Richard Henderson, 2023/02/15
- [PATCH v2 19/30] accel/tcg: Add have_lse2 support in ldst_atomicity, Richard Henderson, 2023/02/15
- [PATCH v2 18/30] tcg/aarch64: Detect have_lse, have_lse2 for darwin, Richard Henderson, 2023/02/15
- [PATCH v2 20/30] tcg: Introduce TCG_OPF_TYPE_MASK, Richard Henderson, 2023/02/15
- [PATCH v2 22/30] tcg/i386: Introduce tcg_out_mov2, Richard Henderson, 2023/02/15
- [PATCH v2 21/30] tcg: Add INDEX_op_qemu_{ld,st}_i128, Richard Henderson, 2023/02/15
- [PATCH v2 23/30] tcg/i386: Introduce tcg_out_testi, Richard Henderson, 2023/02/15
- [PATCH v2 24/30] tcg/i386: Use full load/store helpers in user-only mode, Richard Henderson, 2023/02/15
- [PATCH v2 25/30] tcg/i386: Replace is64 with type in qemu_ld/st routines, Richard Henderson, 2023/02/15
- [PATCH v2 26/30] tcg/i386: Mark Win64 call-saved vector regs as reserved, Richard Henderson, 2023/02/15
- [PATCH v2 27/30] tcg/i386: Examine MemOp for atomicity and alignment,
Richard Henderson <=
- [PATCH v2 28/30] tcg/i386: Support 128-bit load/store with have_atomic16, Richard Henderson, 2023/02/15
- [PATCH v2 29/30] tcg/i386: Add vex_v argument to tcg_out_vex_modrm_pool, Richard Henderson, 2023/02/15
- [PATCH v2 30/30] tcg/i386: Honor 64-bit atomicity in 32-bit mode, Richard Henderson, 2023/02/15