[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 19/23] i386: Destructive FP helpers for AVX
From: |
Paolo Bonzini |
Subject: |
[PATCH 19/23] i386: Destructive FP helpers for AVX |
Date: |
Sat, 27 Aug 2022 01:12:00 +0200 |
From: Paul Brook <paul@nowt.org>
Perpare the horizontal atithmetic vector helpers for AVX
These currently use a dummy Reg typed variable to store the result then
assign the whole register. This will cause 128 bit operations to corrupt
the upper half of the register, so replace it with explicit temporaries
and element assignments.
Signed-off-by: Paul Brook <paul@nowt.org>
Message-Id: <20220424220204.2493824-18-paul@nowt.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/ops_sse.h | 70 +++++++++++++++++++++----------------------
1 file changed, 35 insertions(+), 35 deletions(-)
diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index 17d04888c5..ed2f04ded5 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -940,45 +940,45 @@ void helper_insertq_i(CPUX86State *env, ZMMReg *d, int
index, int length)
d->ZMM_Q(0) = helper_insertq(d->ZMM_Q(0), index, length);
}
-void glue(helper_haddps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
-{
- ZMMReg r;
-
- r.ZMM_S(0) = float32_add(d->ZMM_S(0), d->ZMM_S(1), &env->sse_status);
- r.ZMM_S(1) = float32_add(d->ZMM_S(2), d->ZMM_S(3), &env->sse_status);
- r.ZMM_S(2) = float32_add(s->ZMM_S(0), s->ZMM_S(1), &env->sse_status);
- r.ZMM_S(3) = float32_add(s->ZMM_S(2), s->ZMM_S(3), &env->sse_status);
- MOVE(*d, r);
+#define SSE_HELPER_HPS(name, F) \
+void glue(helper_ ## name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) \
+{ \
+ Reg *v = d; \
+ float32 r[2 << SHIFT]; \
+ int i, j; \
+ for (i = j = 0; j < 4; i++, j += 2) { \
+ r[i] = F(v->ZMM_S(j), v->ZMM_S(j + 1), &env->sse_status); \
+ } \
+ for (j = 0; j < 4; i++, j += 2) { \
+ r[i] = F(s->ZMM_S(j), s->ZMM_S(j + 1), &env->sse_status); \
+ } \
+ for (i = 0; i < 2 << SHIFT; i++) { \
+ d->ZMM_S(i) = r[i]; \
+ } \
}
-void glue(helper_haddpd, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
-{
- ZMMReg r;
+SSE_HELPER_HPS(haddps, float32_add)
+SSE_HELPER_HPS(hsubps, float32_sub)
- r.ZMM_D(0) = float64_add(d->ZMM_D(0), d->ZMM_D(1), &env->sse_status);
- r.ZMM_D(1) = float64_add(s->ZMM_D(0), s->ZMM_D(1), &env->sse_status);
- MOVE(*d, r);
+#define SSE_HELPER_HPD(name, F) \
+void glue(helper_ ## name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) \
+{ \
+ Reg *v = d; \
+ float64 r[2 << SHIFT]; \
+ int i, j; \
+ for (i = j = 0; j < 2; i++, j += 2) { \
+ r[i] = F(v->ZMM_D(j), v->ZMM_D(j + 1), &env->sse_status); \
+ } \
+ for (j = 0; j < 2; i++, j += 2) { \
+ r[i] = F(s->ZMM_D(j), s->ZMM_D(j + 1), &env->sse_status); \
+ } \
+ for (i = 0; i < 1 << SHIFT; i++) { \
+ d->ZMM_D(i) = r[i]; \
+ } \
}
-void glue(helper_hsubps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
-{
- ZMMReg r;
-
- r.ZMM_S(0) = float32_sub(d->ZMM_S(0), d->ZMM_S(1), &env->sse_status);
- r.ZMM_S(1) = float32_sub(d->ZMM_S(2), d->ZMM_S(3), &env->sse_status);
- r.ZMM_S(2) = float32_sub(s->ZMM_S(0), s->ZMM_S(1), &env->sse_status);
- r.ZMM_S(3) = float32_sub(s->ZMM_S(2), s->ZMM_S(3), &env->sse_status);
- MOVE(*d, r);
-}
-
-void glue(helper_hsubpd, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
-{
- ZMMReg r;
-
- r.ZMM_D(0) = float64_sub(d->ZMM_D(0), d->ZMM_D(1), &env->sse_status);
- r.ZMM_D(1) = float64_sub(s->ZMM_D(0), s->ZMM_D(1), &env->sse_status);
- MOVE(*d, r);
-}
+SSE_HELPER_HPD(haddpd, float64_add)
+SSE_HELPER_HPD(hsubpd, float64_sub)
void glue(helper_addsubps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
{
@@ -1999,7 +1999,7 @@ void glue(helper_mpsadbw, SUFFIX)(CPUX86State *env, Reg
*d, Reg *s,
int i, j;
uint16_t r[8];
- for (j = 0; j < 4 << SHIFT; j++) {
+ for (j = 0; j < 4 << SHIFT; ) {
int s0 = (j * 2) + ((offset & 3) << 2);
int d0 = (j * 2) + ((offset & 4) << 0);
for (i = 0; i < 8; i++, d0++) {
--
2.37.1
- Re: [PATCH 07/23] i386: check SSE table flags instead of hardcoding opcodes, (continued)
- [PATCH 08/23] i386: isolate MMX code more, Paolo Bonzini, 2022/08/26
- [PATCH 09/23] i386: Add size suffix to vector FP helpers, Paolo Bonzini, 2022/08/26
- [PATCH 11/23] i386: Add CHECK_NO_VEX, Paolo Bonzini, 2022/08/26
- [PATCH 10/23] i386: do not cast gen_helper_* function pointers, Paolo Bonzini, 2022/08/26
- [PATCH 12/23] i386: Rewrite vector shift helper, Paolo Bonzini, 2022/08/26
- [PATCH 23/23] i386: AVX+AES helpers prep, Paolo Bonzini, 2022/08/26
- [PATCH 19/23] i386: Destructive FP helpers for AVX,
Paolo Bonzini <=
- [PATCH 13/23] i386: Rewrite simple integer vector helpers, Paolo Bonzini, 2022/08/26
- [PATCH 14/23] i386: Misc integer AVX helper prep, Paolo Bonzini, 2022/08/26
- [PATCH 15/23] i386: Destructive vector helpers for AVX, Paolo Bonzini, 2022/08/26
- [PATCH 20/23] i386: Misc AVX helper prep, Paolo Bonzini, 2022/08/26
- [PATCH 22/23] i386: AVX pclmulqdq prep, Paolo Bonzini, 2022/08/26
- [PATCH 16/23] i386: Floating point arithmetic helper AVX prep, Paolo Bonzini, 2022/08/26
- [PATCH 18/23] i386: Dot product AVX helper prep, Paolo Bonzini, 2022/08/26
- [PATCH 17/23] i386: reimplement AVX comparison helpers, Paolo Bonzini, 2022/08/26