[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v2 30/42] i386: Implement VPERMIL
From: |
Paul Brook |
Subject: |
[PATCH v2 30/42] i386: Implement VPERMIL |
Date: |
Sun, 24 Apr 2022 23:01:52 +0100 |
Some potentially surprising details when comparing vpermilpd v.s. vpermilps,
but overall pretty straightforward.
Signed-off-by: Paul Brook <paul@nowt.org>
---
target/i386/ops_sse.h | 82 ++++++++++++++++++++++++++++++++++++
target/i386/ops_sse_header.h | 4 ++
target/i386/tcg/translate.c | 4 ++
3 files changed, 90 insertions(+)
diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index 4115c9a257..9b92b9790a 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -3113,6 +3113,88 @@ void glue(helper_vbroadcastq, SUFFIX)(CPUX86State *env,
Reg *d, Reg *s)
#endif
}
+void glue(helper_vpermilpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s)
+{
+ uint64_t r0, r1;
+
+ r0 = v->Q((s->Q(0) >> 1) & 1);
+ r1 = v->Q((s->Q(1) >> 1) & 1);
+ d->Q(0) = r0;
+ d->Q(1) = r1;
+#if SHIFT == 2
+ r0 = v->Q(((s->Q(2) >> 1) & 1) + 2);
+ r1 = v->Q(((s->Q(3) >> 1) & 1) + 2);
+ d->Q(2) = r0;
+ d->Q(3) = r1;
+#endif
+}
+
+void glue(helper_vpermilps, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s)
+{
+ uint32_t r0, r1, r2, r3;
+
+ r0 = v->L(s->L(0) & 3);
+ r1 = v->L(s->L(1) & 3);
+ r2 = v->L(s->L(2) & 3);
+ r3 = v->L(s->L(3) & 3);
+ d->L(0) = r0;
+ d->L(1) = r1;
+ d->L(2) = r2;
+ d->L(3) = r3;
+#if SHIFT == 2
+ r0 = v->L((s->L(4) & 3) + 4);
+ r1 = v->L((s->L(5) & 3) + 4);
+ r2 = v->L((s->L(6) & 3) + 4);
+ r3 = v->L((s->L(7) & 3) + 4);
+ d->L(4) = r0;
+ d->L(5) = r1;
+ d->L(6) = r2;
+ d->L(7) = r3;
+#endif
+}
+
+void glue(helper_vpermilpd_imm, SUFFIX)(CPUX86State *env,
+ Reg *d, Reg *s, uint32_t order)
+{
+ uint64_t r0, r1;
+
+ r0 = s->Q((order >> 0) & 1);
+ r1 = s->Q((order >> 1) & 1);
+ d->Q(0) = r0;
+ d->Q(1) = r1;
+#if SHIFT == 2
+ r0 = s->Q(((order >> 2) & 1) + 2);
+ r1 = s->Q(((order >> 3) & 1) + 2);
+ d->Q(2) = r0;
+ d->Q(3) = r1;
+#endif
+}
+
+void glue(helper_vpermilps_imm, SUFFIX)(CPUX86State *env,
+ Reg *d, Reg *s, uint32_t order)
+{
+ uint32_t r0, r1, r2, r3;
+
+ r0 = s->L((order >> 0) & 3);
+ r1 = s->L((order >> 2) & 3);
+ r2 = s->L((order >> 4) & 3);
+ r3 = s->L((order >> 6) & 3);
+ d->L(0) = r0;
+ d->L(1) = r1;
+ d->L(2) = r2;
+ d->L(3) = r3;
+#if SHIFT == 2
+ r0 = s->L(((order >> 0) & 3) + 4);
+ r1 = s->L(((order >> 2) & 3) + 4);
+ r2 = s->L(((order >> 4) & 3) + 4);
+ r3 = s->L(((order >> 6) & 3) + 4);
+ d->L(4) = r0;
+ d->L(5) = r1;
+ d->L(6) = r2;
+ d->L(7) = r3;
+#endif
+}
+
#if SHIFT == 2
void glue(helper_vbroadcastdq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
{
diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h
index 51e02cd4fa..c52169a030 100644
--- a/target/i386/ops_sse_header.h
+++ b/target/i386/ops_sse_header.h
@@ -417,6 +417,10 @@ DEF_HELPER_3(glue(vbroadcastb, SUFFIX), void, env, Reg,
Reg)
DEF_HELPER_3(glue(vbroadcastw, SUFFIX), void, env, Reg, Reg)
DEF_HELPER_3(glue(vbroadcastl, SUFFIX), void, env, Reg, Reg)
DEF_HELPER_3(glue(vbroadcastq, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_4(glue(vpermilpd, SUFFIX), void, env, Reg, Reg, Reg)
+DEF_HELPER_4(glue(vpermilps, SUFFIX), void, env, Reg, Reg, Reg)
+DEF_HELPER_4(glue(vpermilpd_imm, SUFFIX), void, env, Reg, Reg, i32)
+DEF_HELPER_4(glue(vpermilps_imm, SUFFIX), void, env, Reg, Reg, i32)
#if SHIFT == 2
DEF_HELPER_3(glue(vbroadcastdq, SUFFIX), void, env, Reg, Reg)
DEF_HELPER_1(vzeroall, void, env)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 59ab1dc562..358c3ecb0b 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -3251,6 +3251,8 @@ static const struct SSEOpHelper_table6 sse_op_table6[256]
= {
[0x09] = BINARY_OP_MMX(psignw, SSSE3),
[0x0a] = BINARY_OP_MMX(psignd, SSSE3),
[0x0b] = BINARY_OP_MMX(pmulhrsw, SSSE3),
+ [0x0c] = BINARY_OP(vpermilps, AVX, 0),
+ [0x0d] = BINARY_OP(vpermilpd, AVX, 0),
[0x10] = BLENDV_OP(pblendvb, SSE41, SSE_OPF_MMX),
[0x14] = BLENDV_OP(blendvps, SSE41, 0),
[0x15] = BLENDV_OP(blendvpd, SSE41, 0),
@@ -3311,6 +3313,8 @@ static const struct SSEOpHelper_table6 sse_op_table6[256]
= {
/* prefix [66] 0f 3a */
static const struct SSEOpHelper_table7 sse_op_table7[256] = {
+ [0x04] = UNARY_OP(vpermilps_imm, AVX, 0),
+ [0x05] = UNARY_OP(vpermilpd_imm, AVX, 0),
[0x08] = UNARY_OP(roundps, SSE41, 0),
[0x09] = UNARY_OP(roundpd, SSE41, 0),
#define gen_helper_roundss_ymm NULL
--
2.36.0
- [PATCH v2 06/42] i386: Add CHECK_NO_VEX, (continued)
- [PATCH v2 06/42] i386: Add CHECK_NO_VEX, Paul Brook, 2022/04/24
- [PATCH v2 02/42] i386: DPPS rounding fix, Paul Brook, 2022/04/24
- [PATCH v2 09/42] i386: Helper macro for 256 bit AVX helpers, Paul Brook, 2022/04/24
- [PATCH v2 07/42] Enforce VEX encoding restrictions, Paul Brook, 2022/04/24
- [PATCH v2 08/42] i386: Add ZMM_OFFSET macro, Paul Brook, 2022/04/24
- [PATCH v2 04/42] i386: Rework sse_op_table1, Paul Brook, 2022/04/24
- [PATCH v2 05/42] i386: Rework sse_op_table6/7, Paul Brook, 2022/04/24
- [PATCH v2 03/42] Add AVX_EN hflag, Paul Brook, 2022/04/24
- [PATCH v2 10/42] i386: Rewrite vector shift helper, Paul Brook, 2022/04/24
- [PATCH v2 17/42] i386: Destructive FP helpers for AVX, Paul Brook, 2022/04/24
- [PATCH v2 30/42] i386: Implement VPERMIL,
Paul Brook <=
- [PATCH v2 33/42] i386: Implement VMASKMOV, Paul Brook, 2022/04/24
- [PATCH v2 29/42] i386: Implement VBROADCAST, Paul Brook, 2022/04/24
- [PATCH v2 41/42] AVX tests, Paul Brook, 2022/04/24
- [PATCH v2 16/42] i386: Dot product AVX helper prep, Paul Brook, 2022/04/24
- [PATCH v2 37/42] i386: Implement VBLENDV, Paul Brook, 2022/04/24
- [PATCH v2 39/42] i386: Enable AVX cpuid bits when using TCG, Paul Brook, 2022/04/24
- [PATCH v2 25/42] i386: VEX.V encodings (3 operand), Paul Brook, 2022/04/24
- [PATCH v2 11/42] i386: Rewrite simple integer vector helpers, Paul Brook, 2022/04/24
- [PATCH v2 14/42] i386: Add size suffix to vector FP helpers, Paul Brook, 2022/04/24
- [PATCH v2 38/42] i386: Implement VPBLENDD, Paul Brook, 2022/04/24