[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 11/35] target/i386: Prepare ops_sse_header.h for 256 bit AVX
From: |
Paolo Bonzini |
Subject: |
[PATCH 11/35] target/i386: Prepare ops_sse_header.h for 256 bit AVX |
Date: |
Thu, 13 Oct 2022 23:46:27 +0200 |
From: Paul Brook <paul@nowt.org>
Adjust all #ifdefs to match the ones in ops_sse.h.
Signed-off-by: Paul Brook <paul@nowt.org>
Message-Id: <20220424220204.2493824-23-paul@nowt.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/ops_sse_header.h | 114 +++++++++++++++++++++++------------
1 file changed, 75 insertions(+), 39 deletions(-)
diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h
index 400b24c091..9d9a115df4 100644
--- a/target/i386/ops_sse_header.h
+++ b/target/i386/ops_sse_header.h
@@ -43,7 +43,7 @@ DEF_HELPER_3(glue(pslld, SUFFIX), void, env, Reg, Reg)
DEF_HELPER_3(glue(psrlq, SUFFIX), void, env, Reg, Reg)
DEF_HELPER_3(glue(psllq, SUFFIX), void, env, Reg, Reg)
-#if SHIFT == 1
+#if SHIFT >= 1
DEF_HELPER_3(glue(psrldq, SUFFIX), void, env, Reg, Reg)
DEF_HELPER_3(glue(pslldq, SUFFIX), void, env, Reg, Reg)
#endif
@@ -101,7 +101,7 @@ SSE_HELPER_L(pcmpeql, FCMPEQ)
SSE_HELPER_W(pmullw, FMULLW)
#if SHIFT == 0
-SSE_HELPER_W(pmulhrw, FMULHRW)
+DEF_HELPER_3(glue(pmulhrw, SUFFIX), void, env, Reg, Reg)
#endif
SSE_HELPER_W(pmulhuw, FMULHUW)
SSE_HELPER_W(pmulhw, FMULHW)
@@ -113,7 +113,9 @@ DEF_HELPER_3(glue(pmuludq, SUFFIX), void, env, Reg, Reg)
DEF_HELPER_3(glue(pmaddwd, SUFFIX), void, env, Reg, Reg)
DEF_HELPER_3(glue(psadbw, SUFFIX), void, env, Reg, Reg)
+#if SHIFT < 2
DEF_HELPER_4(glue(maskmov, SUFFIX), void, env, Reg, Reg, tl)
+#endif
DEF_HELPER_2(glue(movl_mm_T0, SUFFIX), void, Reg, i32)
#ifdef TARGET_X86_64
DEF_HELPER_2(glue(movq_mm_T0, SUFFIX), void, Reg, i64)
@@ -122,38 +124,63 @@ DEF_HELPER_2(glue(movq_mm_T0, SUFFIX), void, Reg, i64)
#if SHIFT == 0
DEF_HELPER_3(glue(pshufw, SUFFIX), void, Reg, Reg, int)
#else
-DEF_HELPER_3(glue(shufps, SUFFIX), void, Reg, Reg, int)
-DEF_HELPER_3(glue(shufpd, SUFFIX), void, Reg, Reg, int)
DEF_HELPER_3(glue(pshufd, SUFFIX), void, Reg, Reg, int)
DEF_HELPER_3(glue(pshuflw, SUFFIX), void, Reg, Reg, int)
DEF_HELPER_3(glue(pshufhw, SUFFIX), void, Reg, Reg, int)
#endif
-#if SHIFT == 1
+#if SHIFT >= 1
/* FPU ops */
/* XXX: not accurate */
-#define SSE_HELPER_S(name, F) \
- DEF_HELPER_3(glue(name ## ps, SUFFIX), void, env, Reg, Reg) \
- DEF_HELPER_3(name ## ss, void, env, Reg, Reg) \
- DEF_HELPER_3(glue(name ## pd, SUFFIX), void, env, Reg, Reg) \
+#define SSE_HELPER_P4(name) \
+ DEF_HELPER_3(glue(name ## ps, SUFFIX), void, env, Reg, Reg) \
+ DEF_HELPER_3(glue(name ## pd, SUFFIX), void, env, Reg, Reg)
+
+#define SSE_HELPER_P3(name, ...) \
+ DEF_HELPER_3(glue(name ## ps, SUFFIX), void, env, Reg, Reg) \
+ DEF_HELPER_3(glue(name ## pd, SUFFIX), void, env, Reg, Reg)
+
+#if SHIFT == 1
+#define SSE_HELPER_S4(name) \
+ SSE_HELPER_P4(name) \
+ DEF_HELPER_3(name ## ss, void, env, Reg, Reg) \
DEF_HELPER_3(name ## sd, void, env, Reg, Reg)
+#define SSE_HELPER_S3(name) \
+ SSE_HELPER_P3(name) \
+ DEF_HELPER_3(name ## ss, void, env, Reg, Reg) \
+ DEF_HELPER_3(name ## sd, void, env, Reg, Reg)
+#else
+#define SSE_HELPER_S4(name, ...) SSE_HELPER_P4(name)
+#define SSE_HELPER_S3(name, ...) SSE_HELPER_P3(name)
+#endif
-SSE_HELPER_S(add, FPU_ADD)
-SSE_HELPER_S(sub, FPU_SUB)
-SSE_HELPER_S(mul, FPU_MUL)
-SSE_HELPER_S(div, FPU_DIV)
-SSE_HELPER_S(min, FPU_MIN)
-SSE_HELPER_S(max, FPU_MAX)
-SSE_HELPER_S(sqrt, FPU_SQRT)
+DEF_HELPER_3(glue(shufps, SUFFIX), void, Reg, Reg, int)
+DEF_HELPER_3(glue(shufpd, SUFFIX), void, Reg, Reg, int)
+SSE_HELPER_S4(add)
+SSE_HELPER_S4(sub)
+SSE_HELPER_S4(mul)
+SSE_HELPER_S4(div)
+SSE_HELPER_S4(min)
+SSE_HELPER_S4(max)
+
+SSE_HELPER_S3(sqrt)
DEF_HELPER_3(glue(cvtps2pd, SUFFIX), void, env, Reg, Reg)
DEF_HELPER_3(glue(cvtpd2ps, SUFFIX), void, env, Reg, Reg)
-DEF_HELPER_3(cvtss2sd, void, env, Reg, Reg)
-DEF_HELPER_3(cvtsd2ss, void, env, Reg, Reg)
DEF_HELPER_3(glue(cvtdq2ps, SUFFIX), void, env, Reg, Reg)
DEF_HELPER_3(glue(cvtdq2pd, SUFFIX), void, env, Reg, Reg)
+
+DEF_HELPER_3(glue(cvtps2dq, SUFFIX), void, env, ZMMReg, ZMMReg)
+DEF_HELPER_3(glue(cvtpd2dq, SUFFIX), void, env, ZMMReg, ZMMReg)
+
+DEF_HELPER_3(glue(cvttps2dq, SUFFIX), void, env, ZMMReg, ZMMReg)
+DEF_HELPER_3(glue(cvttpd2dq, SUFFIX), void, env, ZMMReg, ZMMReg)
+
+#if SHIFT == 1
+DEF_HELPER_3(cvtss2sd, void, env, Reg, Reg)
+DEF_HELPER_3(cvtsd2ss, void, env, Reg, Reg)
DEF_HELPER_3(cvtpi2ps, void, env, ZMMReg, MMXReg)
DEF_HELPER_3(cvtpi2pd, void, env, ZMMReg, MMXReg)
DEF_HELPER_3(cvtsi2ss, void, env, ZMMReg, i32)
@@ -164,8 +191,6 @@ DEF_HELPER_3(cvtsq2ss, void, env, ZMMReg, i64)
DEF_HELPER_3(cvtsq2sd, void, env, ZMMReg, i64)
#endif
-DEF_HELPER_3(glue(cvtps2dq, SUFFIX), void, env, ZMMReg, ZMMReg)
-DEF_HELPER_3(glue(cvtpd2dq, SUFFIX), void, env, ZMMReg, ZMMReg)
DEF_HELPER_3(cvtps2pi, void, env, MMXReg, ZMMReg)
DEF_HELPER_3(cvtpd2pi, void, env, MMXReg, ZMMReg)
DEF_HELPER_2(cvtss2si, s32, env, ZMMReg)
@@ -175,8 +200,6 @@ DEF_HELPER_2(cvtss2sq, s64, env, ZMMReg)
DEF_HELPER_2(cvtsd2sq, s64, env, ZMMReg)
#endif
-DEF_HELPER_3(glue(cvttps2dq, SUFFIX), void, env, ZMMReg, ZMMReg)
-DEF_HELPER_3(glue(cvttpd2dq, SUFFIX), void, env, ZMMReg, ZMMReg)
DEF_HELPER_3(cvttps2pi, void, env, MMXReg, ZMMReg)
DEF_HELPER_3(cvttpd2pi, void, env, MMXReg, ZMMReg)
DEF_HELPER_2(cvttss2si, s32, env, ZMMReg)
@@ -185,27 +208,24 @@ DEF_HELPER_2(cvttsd2si, s32, env, ZMMReg)
DEF_HELPER_2(cvttss2sq, s64, env, ZMMReg)
DEF_HELPER_2(cvttsd2sq, s64, env, ZMMReg)
#endif
+#endif
DEF_HELPER_3(glue(rsqrtps, SUFFIX), void, env, ZMMReg, ZMMReg)
-DEF_HELPER_3(rsqrtss, void, env, ZMMReg, ZMMReg)
DEF_HELPER_3(glue(rcpps, SUFFIX), void, env, ZMMReg, ZMMReg)
+#if SHIFT == 1
+DEF_HELPER_3(rsqrtss, void, env, ZMMReg, ZMMReg)
DEF_HELPER_3(rcpss, void, env, ZMMReg, ZMMReg)
DEF_HELPER_3(extrq_r, void, env, ZMMReg, ZMMReg)
DEF_HELPER_4(extrq_i, void, env, ZMMReg, int, int)
DEF_HELPER_3(insertq_r, void, env, ZMMReg, ZMMReg)
DEF_HELPER_5(insertq_i, void, env, ZMMReg, ZMMReg, int, int)
-DEF_HELPER_3(glue(haddps, SUFFIX), void, env, ZMMReg, ZMMReg)
-DEF_HELPER_3(glue(haddpd, SUFFIX), void, env, ZMMReg, ZMMReg)
-DEF_HELPER_3(glue(hsubps, SUFFIX), void, env, ZMMReg, ZMMReg)
-DEF_HELPER_3(glue(hsubpd, SUFFIX), void, env, ZMMReg, ZMMReg)
-DEF_HELPER_3(glue(addsubps, SUFFIX), void, env, ZMMReg, ZMMReg)
-DEF_HELPER_3(glue(addsubpd, SUFFIX), void, env, ZMMReg, ZMMReg)
+#endif
-#define SSE_HELPER_CMP(name, F, C) \
- DEF_HELPER_3(glue(name ## ps, SUFFIX), void, env, Reg, Reg) \
- DEF_HELPER_3(name ## ss, void, env, Reg, Reg) \
- DEF_HELPER_3(glue(name ## pd, SUFFIX), void, env, Reg, Reg) \
- DEF_HELPER_3(name ## sd, void, env, Reg, Reg)
+SSE_HELPER_P4(hadd)
+SSE_HELPER_P4(hsub)
+SSE_HELPER_P4(addsub)
+
+#define SSE_HELPER_CMP(name, F, C) SSE_HELPER_S4(name)
SSE_HELPER_CMP(cmpeq, FPU_CMPQ, FPU_EQ)
SSE_HELPER_CMP(cmplt, FPU_CMPS, FPU_LT)
@@ -216,10 +236,13 @@ SSE_HELPER_CMP(cmpnlt, FPU_CMPS, !FPU_LT)
SSE_HELPER_CMP(cmpnle, FPU_CMPS, !FPU_LE)
SSE_HELPER_CMP(cmpord, FPU_CMPQ, !FPU_UNORD)
+#if SHIFT == 1
DEF_HELPER_3(ucomiss, void, env, Reg, Reg)
DEF_HELPER_3(comiss, void, env, Reg, Reg)
DEF_HELPER_3(ucomisd, void, env, Reg, Reg)
DEF_HELPER_3(comisd, void, env, Reg, Reg)
+#endif
+
DEF_HELPER_2(glue(movmskps, SUFFIX), i32, env, Reg)
DEF_HELPER_2(glue(movmskpd, SUFFIX), i32, env, Reg)
#endif
@@ -236,7 +259,7 @@ DEF_HELPER_3(glue(packssdw, SUFFIX), void, env, Reg, Reg)
UNPCK_OP(l, 0)
UNPCK_OP(h, 1)
-#if SHIFT == 1
+#if SHIFT >= 1
DEF_HELPER_3(glue(punpcklqdq, SUFFIX), void, env, Reg, Reg)
DEF_HELPER_3(glue(punpckhqdq, SUFFIX), void, env, Reg, Reg)
#endif
@@ -283,7 +306,7 @@ DEF_HELPER_3(glue(psignd, SUFFIX), void, env, Reg, Reg)
DEF_HELPER_4(glue(palignr, SUFFIX), void, env, Reg, Reg, s32)
/* SSE4.1 op helpers */
-#if SHIFT == 1
+#if SHIFT >= 1
DEF_HELPER_3(glue(pblendvb, SUFFIX), void, env, Reg, Reg)
DEF_HELPER_3(glue(blendvps, SUFFIX), void, env, Reg, Reg)
DEF_HELPER_3(glue(blendvpd, SUFFIX), void, env, Reg, Reg)
@@ -312,22 +335,30 @@ DEF_HELPER_3(glue(pmaxsd, SUFFIX), void, env, Reg, Reg)
DEF_HELPER_3(glue(pmaxuw, SUFFIX), void, env, Reg, Reg)
DEF_HELPER_3(glue(pmaxud, SUFFIX), void, env, Reg, Reg)
DEF_HELPER_3(glue(pmulld, SUFFIX), void, env, Reg, Reg)
+#if SHIFT == 1
DEF_HELPER_3(glue(phminposuw, SUFFIX), void, env, Reg, Reg)
+#endif
DEF_HELPER_4(glue(roundps, SUFFIX), void, env, Reg, Reg, i32)
DEF_HELPER_4(glue(roundpd, SUFFIX), void, env, Reg, Reg, i32)
+#if SHIFT == 1
DEF_HELPER_4(glue(roundss, SUFFIX), void, env, Reg, Reg, i32)
DEF_HELPER_4(glue(roundsd, SUFFIX), void, env, Reg, Reg, i32)
+#endif
DEF_HELPER_4(glue(blendps, SUFFIX), void, env, Reg, Reg, i32)
DEF_HELPER_4(glue(blendpd, SUFFIX), void, env, Reg, Reg, i32)
DEF_HELPER_4(glue(pblendw, SUFFIX), void, env, Reg, Reg, i32)
DEF_HELPER_4(glue(dpps, SUFFIX), void, env, Reg, Reg, i32)
+#if SHIFT == 1
DEF_HELPER_4(glue(dppd, SUFFIX), void, env, Reg, Reg, i32)
+#endif
DEF_HELPER_4(glue(mpsadbw, SUFFIX), void, env, Reg, Reg, i32)
#endif
/* SSE4.2 op helpers */
-#if SHIFT == 1
+#if SHIFT >= 1
DEF_HELPER_3(glue(pcmpgtq, SUFFIX), void, env, Reg, Reg)
+#endif
+#if SHIFT == 1
DEF_HELPER_4(glue(pcmpestri, SUFFIX), void, env, Reg, Reg, i32)
DEF_HELPER_4(glue(pcmpestrm, SUFFIX), void, env, Reg, Reg, i32)
DEF_HELPER_4(glue(pcmpistri, SUFFIX), void, env, Reg, Reg, i32)
@@ -336,13 +367,15 @@ DEF_HELPER_3(crc32, tl, i32, tl, i32)
#endif
/* AES-NI op helpers */
-#if SHIFT == 1
+#if SHIFT >= 1
DEF_HELPER_3(glue(aesdec, SUFFIX), void, env, Reg, Reg)
DEF_HELPER_3(glue(aesdeclast, SUFFIX), void, env, Reg, Reg)
DEF_HELPER_3(glue(aesenc, SUFFIX), void, env, Reg, Reg)
DEF_HELPER_3(glue(aesenclast, SUFFIX), void, env, Reg, Reg)
+#if SHIFT == 1
DEF_HELPER_3(glue(aesimc, SUFFIX), void, env, Reg, Reg)
DEF_HELPER_4(glue(aeskeygenassist, SUFFIX), void, env, Reg, Reg, i32)
+#endif
DEF_HELPER_4(glue(pclmulqdq, SUFFIX), void, env, Reg, Reg, i32)
#endif
@@ -354,6 +387,9 @@ DEF_HELPER_4(glue(pclmulqdq, SUFFIX), void, env, Reg, Reg,
i32)
#undef SSE_HELPER_W
#undef SSE_HELPER_L
#undef SSE_HELPER_Q
-#undef SSE_HELPER_S
+#undef SSE_HELPER_S3
+#undef SSE_HELPER_S4
+#undef SSE_HELPER_P3
+#undef SSE_HELPER_P4
#undef SSE_HELPER_CMP
#undef UNPCK_OP
--
2.37.3
- [PATCH 14/35] target/i386: provide 3-operand versions of unary scalar helpers, (continued)
- [PATCH 14/35] target/i386: provide 3-operand versions of unary scalar helpers, Paolo Bonzini, 2022/10/13
- [PATCH 28/35] target/i386: reimplement 0x0f 0x10-0x17, add AVX, Paolo Bonzini, 2022/10/13
- [PATCH 35/35] target/i386: remove old SSE decoder, Paolo Bonzini, 2022/10/13
- [PATCH 18/35] target/i386: reimplement 0x0f 0xd8-0xdf, 0xe8-0xef, 0xf8-0xff, add AVX, Paolo Bonzini, 2022/10/13
- [PATCH 17/35] target/i386: reimplement 0x0f 0x60-0x6f, add AVX, Paolo Bonzini, 2022/10/13
- [PATCH 15/35] target/i386: implement additional AVX comparison operators, Paolo Bonzini, 2022/10/13
- [PATCH 20/35] target/i386: reimplement 0x0f 0x78-0x7f, add AVX, Paolo Bonzini, 2022/10/13
- [PATCH 30/35] target/i386: implement XSAVE and XRSTOR of AVX registers, Paolo Bonzini, 2022/10/13
- [PATCH 21/35] target/i386: reimplement 0x0f 0x70-0x77, add AVX, Paolo Bonzini, 2022/10/13
- [PATCH 32/35] target/i386: Enable AVX cpuid bits when using TCG, Paolo Bonzini, 2022/10/13
- [PATCH 11/35] target/i386: Prepare ops_sse_header.h for 256 bit AVX,
Paolo Bonzini <=
- [PATCH 12/35] target/i386: extend helpers to support VEX.V 3- and 4- operand encodings, Paolo Bonzini, 2022/10/13
- [PATCH 24/35] target/i386: reimplement 0x0f 0x3a, add AVX, Paolo Bonzini, 2022/10/13
- [PATCH 31/35] target/i386: implement VLDMXCSR/VSTMXCSR, Paolo Bonzini, 2022/10/13
- [PATCH 23/35] target/i386: clarify (un)signedness of immediates from 0F3Ah opcodes, Paolo Bonzini, 2022/10/13
- [PATCH 27/35] target/i386: reimplement 0x0f 0xc2, 0xc4-0xc6, add AVX, Paolo Bonzini, 2022/10/13
- [PATCH 34/35] target/i386: move 3DNow to the new decoder, Paolo Bonzini, 2022/10/13
- [PATCH 22/35] target/i386: reimplement 0x0f 0xd0-0xd7, 0xe0-0xe7, 0xf0-0xf7, add AVX, Paolo Bonzini, 2022/10/13
- [PATCH 25/35] target/i386: Use tcg gvec ops for pmovmskb, Paolo Bonzini, 2022/10/13
- [PATCH 33/35] tests/tcg: extend SSE tests to AVX, Paolo Bonzini, 2022/10/13
- [PATCH 26/35] target/i386: reimplement 0x0f 0x38, add AVX, Paolo Bonzini, 2022/10/13