[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PULL 02/11] Hexagon (target/hexagon) Fix predicated assignment to .tmp
From: |
Taylor Simpson |
Subject: |
[PULL 02/11] Hexagon (target/hexagon) Fix predicated assignment to .tmp and .cur |
Date: |
Thu, 10 Nov 2022 16:52:05 -0800 |
Here are example instructions with a predicated .tmp/.cur assignment
if (p1) v12.tmp = vmem(r7 + #0)
if (p0) v12.cur = vmem(r9 + #0)
The .tmp/.cur indicates that references to v12 in the same packet
take the result of the load. However, when the predicate is false,
the value at the start of the packet should be used. After the packet
commits, the .tmp value is dropped, but the .cur value is maintained.
To fix this bug, we preload the original value from the HVX register
into the temporary used for the result.
Test cases added to tests/tcg/hexagon/hvx_misc.c
Acked-by: Richard Henderson <richard.henderson@linaro.org>
Co-authored-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com>
Signed-off-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com>
Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
Message-Id: <20221108162906.3166-3-tsimpson@quicinc.com>
---
target/hexagon/translate.h | 6 +++
tests/tcg/hexagon/hvx_misc.c | 72 +++++++++++++++++++++++++++++++++
target/hexagon/gen_tcg_funcs.py | 12 ++++++
3 files changed, 90 insertions(+)
diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
index 115e29b84f..b8fcf615e8 100644
--- a/target/hexagon/translate.h
+++ b/target/hexagon/translate.h
@@ -86,6 +86,12 @@ static inline bool is_preloaded(DisasContext *ctx, int num)
return test_bit(num, ctx->regs_written);
}
+static inline bool is_vreg_preloaded(DisasContext *ctx, int num)
+{
+ return test_bit(num, ctx->vregs_updated) ||
+ test_bit(num, ctx->vregs_updated_tmp);
+}
+
intptr_t ctx_future_vreg_off(DisasContext *ctx, int regnum,
int num, bool alloc_ok);
intptr_t ctx_tmp_vreg_off(DisasContext *ctx, int regnum,
diff --git a/tests/tcg/hexagon/hvx_misc.c b/tests/tcg/hexagon/hvx_misc.c
index 6e2c9ab3cd..53d5c9b44f 100644
--- a/tests/tcg/hexagon/hvx_misc.c
+++ b/tests/tcg/hexagon/hvx_misc.c
@@ -541,6 +541,75 @@ static void test_vshuff(void)
check_output_b(__LINE__, 1);
}
+static void test_load_tmp_predicated(void)
+{
+ void *p0 = buffer0;
+ void *p1 = buffer1;
+ void *pout = output;
+ bool pred = true;
+
+ for (int i = 0; i < BUFSIZE; i++) {
+ /*
+ * Load into v12 as .tmp with a predicate
+ * When the predicate is true, we get the vector from buffer1[i]
+ * When the predicate is false, we get a vector of all 1's
+ * Regardless of the predicate, the next packet should have
+ * a vector of all 1's
+ */
+ asm("v3 = vmem(%0 + #0)\n\t"
+ "r1 = #1\n\t"
+ "v12 = vsplat(r1)\n\t"
+ "p1 = !cmp.eq(%3, #0)\n\t"
+ "{\n\t"
+ " if (p1) v12.tmp = vmem(%1 + #0)\n\t"
+ " v4.w = vadd(v12.w, v3.w)\n\t"
+ "}\n\t"
+ "v4.w = vadd(v4.w, v12.w)\n\t"
+ "vmem(%2 + #0) = v4\n\t"
+ : : "r"(p0), "r"(p1), "r"(pout), "r"(pred)
+ : "r1", "p1", "v12", "v3", "v4", "v6", "memory");
+ p0 += sizeof(MMVector);
+ p1 += sizeof(MMVector);
+ pout += sizeof(MMVector);
+
+ for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
+ expect[i].w[j] =
+ pred ? buffer0[i].w[j] + buffer1[i].w[j] + 1
+ : buffer0[i].w[j] + 2;
+ }
+ pred = !pred;
+ }
+
+ check_output_w(__LINE__, BUFSIZE);
+}
+
+static void test_load_cur_predicated(void)
+{
+ bool pred = true;
+ for (int i = 0; i < BUFSIZE; i++) {
+ asm volatile("p0 = !cmp.eq(%3, #0)\n\t"
+ "v3 = vmem(%0+#0)\n\t"
+ /*
+ * Preload v4 to make sure that the assignment from the
+ * packet below is not being ignored when pred is false.
+ */
+ "r0 = #0x01237654\n\t"
+ "v4 = vsplat(r0)\n\t"
+ "{\n\t"
+ " if (p0) v3.cur = vmem(%1+#0)\n\t"
+ " v4 = v3\n\t"
+ "}\n\t"
+ "vmem(%2+#0) = v4\n\t"
+ :
+ : "r"(&buffer0[i]), "r"(&buffer1[i]),
+ "r"(&output[i]), "r"(pred)
+ : "r0", "p0", "v3", "v4", "memory");
+ expect[i] = pred ? buffer1[i] : buffer0[i];
+ pred = !pred;
+ }
+ check_output_w(__LINE__, BUFSIZE);
+}
+
int main()
{
init_buffers();
@@ -578,6 +647,9 @@ int main()
test_vshuff();
+ test_load_tmp_predicated();
+ test_load_cur_predicated();
+
puts(err ? "FAIL" : "PASS");
return err ? 1 : 0;
}
diff --git a/target/hexagon/gen_tcg_funcs.py b/target/hexagon/gen_tcg_funcs.py
index 02a6565685..ca5fde91cc 100755
--- a/target/hexagon/gen_tcg_funcs.py
+++ b/target/hexagon/gen_tcg_funcs.py
@@ -173,6 +173,18 @@ def genptr_decl(f, tag, regtype, regid, regno):
f.write(" ctx_future_vreg_off(ctx, %s%sN," % \
(regtype, regid))
f.write(" 1, true);\n");
+ if 'A_CONDEXEC' in hex_common.attribdict[tag]:
+ f.write(" if (!is_vreg_preloaded(ctx, %s)) {\n" % (regN))
+ f.write(" intptr_t src_off =")
+ f.write(" offsetof(CPUHexagonState, VRegs[%s%sN]);\n"% \
+ (regtype, regid))
+ f.write(" tcg_gen_gvec_mov(MO_64, %s%sV_off,\n" % \
+ (regtype, regid))
+ f.write(" src_off,\n")
+ f.write(" sizeof(MMVector),\n")
+ f.write(" sizeof(MMVector));\n")
+ f.write(" }\n")
+
if (not hex_common.skip_qemu_helper(tag)):
f.write(" TCGv_ptr %s%sV = tcg_temp_new_ptr();\n" % \
(regtype, regid))
--
2.17.1
- [PULL 00/11] Hexagon bug fixes and performance improvement, Taylor Simpson, 2022/11/10
- [PULL 03/11] Hexagon (target/hexagon) Add overrides for S2_asr_r_r_sat/S2_asl_r_r_sat, Taylor Simpson, 2022/11/10
- [PULL 02/11] Hexagon (target/hexagon) Fix predicated assignment to .tmp and .cur,
Taylor Simpson <=
- [PULL 05/11] Hexagon (target/hexagon) Remove PC from the runtime state, Taylor Simpson, 2022/11/10
- [PULL 08/11] Hexagon (target/hexagon) Add overrides for compound compare and jump, Taylor Simpson, 2022/11/10
- [PULL 01/11] Hexagon (target/hexagon) Add pkt and insn to DisasContext, Taylor Simpson, 2022/11/10
- [PULL 09/11] Hexagon (target/hexagon) Add overrides for various forms of jump, Taylor Simpson, 2022/11/10
- [PULL 11/11] Hexagon (target/hexagon) Use direct block chaining for tight loops, Taylor Simpson, 2022/11/10
- [PULL 07/11] Hexagon (target/hexagon) Add overrides for direct call instructions, Taylor Simpson, 2022/11/10
- [PULL 06/11] Hexagon (target/hexagon) Remove next_PC from runtime state, Taylor Simpson, 2022/11/10
- [PULL 04/11] Hexagon (target/hexagon) Only use branch_taken when packet has multi cof, Taylor Simpson, 2022/11/10
- [PULL 10/11] Hexagon (target/hexagon) Use direct block chaining for direct jump/branch, Taylor Simpson, 2022/11/10
- Re: [PULL 00/11] Hexagon bug fixes and performance improvement, Stefan Hajnoczi, 2022/11/10