[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PULL 28/39] target/i386: Rewrite vector shift helper
From: |
Paolo Bonzini |
Subject: |
[PULL 28/39] target/i386: Rewrite vector shift helper |
Date: |
Thu, 1 Sep 2022 20:24:18 +0200 |
From: Paul Brook <paul@nowt.org>
Rewrite the vector shift helpers in preperation for AVX support (3 operand
form and 256 bit vectors).
For now keep the existing two operand interface.
No functional changes to existing helpers.
Signed-off-by: Paul Brook <paul@nowt.org>
Message-Id: <20220424220204.2493824-11-paul@nowt.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/ops_sse.h | 247 +++++++++++++++++++-----------------------
1 file changed, 112 insertions(+), 135 deletions(-)
diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index 2c0090a647..a4a09226e3 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -40,6 +40,8 @@
#define SUFFIX _xmm
#endif
+#define LANE_WIDTH (SHIFT ? 16 : 8)
+
/*
* Copy the relevant parts of a Reg value around. In the case where
* sizeof(Reg) > SIZE, these helpers operate only on the lower bytes of
@@ -56,198 +58,173 @@
#define MOVE(d, r) memcpy(&(d).B(0), &(r).B(0), SIZE)
#endif
-void glue(helper_psrlw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
-{
- int shift;
+#if SHIFT == 0
+#define FPSRL(x, c) ((x) >> shift)
+#define FPSRAW(x, c) ((int16_t)(x) >> shift)
+#define FPSRAL(x, c) ((int32_t)(x) >> shift)
+#define FPSLL(x, c) ((x) << shift)
+#endif
- if (s->Q(0) > 15) {
- d->Q(0) = 0;
-#if SHIFT == 1
- d->Q(1) = 0;
-#endif
+void glue(helper_psrlw, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
+{
+ Reg *s = d;
+ int shift;
+ if (c->Q(0) > 15) {
+ for (int i = 0; i < 1 << SHIFT; i++) {
+ d->Q(i) = 0;
+ }
} else {
- shift = s->B(0);
- d->W(0) >>= shift;
- d->W(1) >>= shift;
- d->W(2) >>= shift;
- d->W(3) >>= shift;
-#if SHIFT == 1
- d->W(4) >>= shift;
- d->W(5) >>= shift;
- d->W(6) >>= shift;
- d->W(7) >>= shift;
-#endif
+ shift = c->B(0);
+ for (int i = 0; i < 4 << SHIFT; i++) {
+ d->W(i) = FPSRL(s->W(i), shift);
+ }
}
}
-void glue(helper_psraw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_psllw, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
+ Reg *s = d;
int shift;
+ if (c->Q(0) > 15) {
+ for (int i = 0; i < 1 << SHIFT; i++) {
+ d->Q(i) = 0;
+ }
+ } else {
+ shift = c->B(0);
+ for (int i = 0; i < 4 << SHIFT; i++) {
+ d->W(i) = FPSLL(s->W(i), shift);
+ }
+ }
+}
- if (s->Q(0) > 15) {
+void glue(helper_psraw, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
+{
+ Reg *s = d;
+ int shift;
+ if (c->Q(0) > 15) {
shift = 15;
} else {
- shift = s->B(0);
+ shift = c->B(0);
+ }
+ for (int i = 0; i < 4 << SHIFT; i++) {
+ d->W(i) = FPSRAW(s->W(i), shift);
}
- d->W(0) = (int16_t)d->W(0) >> shift;
- d->W(1) = (int16_t)d->W(1) >> shift;
- d->W(2) = (int16_t)d->W(2) >> shift;
- d->W(3) = (int16_t)d->W(3) >> shift;
-#if SHIFT == 1
- d->W(4) = (int16_t)d->W(4) >> shift;
- d->W(5) = (int16_t)d->W(5) >> shift;
- d->W(6) = (int16_t)d->W(6) >> shift;
- d->W(7) = (int16_t)d->W(7) >> shift;
-#endif
}
-void glue(helper_psllw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_psrld, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
+ Reg *s = d;
int shift;
-
- if (s->Q(0) > 15) {
- d->Q(0) = 0;
-#if SHIFT == 1
- d->Q(1) = 0;
-#endif
+ if (c->Q(0) > 31) {
+ for (int i = 0; i < 1 << SHIFT; i++) {
+ d->Q(i) = 0;
+ }
} else {
- shift = s->B(0);
- d->W(0) <<= shift;
- d->W(1) <<= shift;
- d->W(2) <<= shift;
- d->W(3) <<= shift;
-#if SHIFT == 1
- d->W(4) <<= shift;
- d->W(5) <<= shift;
- d->W(6) <<= shift;
- d->W(7) <<= shift;
-#endif
+ shift = c->B(0);
+ for (int i = 0; i < 2 << SHIFT; i++) {
+ d->L(i) = FPSRL(s->L(i), shift);
+ }
}
}
-void glue(helper_psrld, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_pslld, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
+ Reg *s = d;
int shift;
-
- if (s->Q(0) > 31) {
- d->Q(0) = 0;
-#if SHIFT == 1
- d->Q(1) = 0;
-#endif
+ if (c->Q(0) > 31) {
+ for (int i = 0; i < 1 << SHIFT; i++) {
+ d->Q(i) = 0;
+ }
} else {
- shift = s->B(0);
- d->L(0) >>= shift;
- d->L(1) >>= shift;
-#if SHIFT == 1
- d->L(2) >>= shift;
- d->L(3) >>= shift;
-#endif
+ shift = c->B(0);
+ for (int i = 0; i < 2 << SHIFT; i++) {
+ d->L(i) = FPSLL(s->L(i), shift);
+ }
}
}
-void glue(helper_psrad, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_psrad, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
+ Reg *s = d;
int shift;
-
- if (s->Q(0) > 31) {
+ if (c->Q(0) > 31) {
shift = 31;
} else {
- shift = s->B(0);
+ shift = c->B(0);
+ }
+ for (int i = 0; i < 2 << SHIFT; i++) {
+ d->L(i) = FPSRAL(s->L(i), shift);
}
- d->L(0) = (int32_t)d->L(0) >> shift;
- d->L(1) = (int32_t)d->L(1) >> shift;
-#if SHIFT == 1
- d->L(2) = (int32_t)d->L(2) >> shift;
- d->L(3) = (int32_t)d->L(3) >> shift;
-#endif
}
-void glue(helper_pslld, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_psrlq, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
+ Reg *s = d;
int shift;
-
- if (s->Q(0) > 31) {
- d->Q(0) = 0;
-#if SHIFT == 1
- d->Q(1) = 0;
-#endif
+ if (c->Q(0) > 63) {
+ for (int i = 0; i < 1 << SHIFT; i++) {
+ d->Q(i) = 0;
+ }
} else {
- shift = s->B(0);
- d->L(0) <<= shift;
- d->L(1) <<= shift;
-#if SHIFT == 1
- d->L(2) <<= shift;
- d->L(3) <<= shift;
-#endif
+ shift = c->B(0);
+ for (int i = 0; i < 1 << SHIFT; i++) {
+ d->Q(i) = FPSRL(s->Q(i), shift);
+ }
}
}
-void glue(helper_psrlq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_psllq, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
+ Reg *s = d;
int shift;
-
- if (s->Q(0) > 63) {
- d->Q(0) = 0;
-#if SHIFT == 1
- d->Q(1) = 0;
-#endif
+ if (c->Q(0) > 63) {
+ for (int i = 0; i < 1 << SHIFT; i++) {
+ d->Q(i) = 0;
+ }
} else {
- shift = s->B(0);
- d->Q(0) >>= shift;
-#if SHIFT == 1
- d->Q(1) >>= shift;
-#endif
+ shift = c->B(0);
+ for (int i = 0; i < 1 << SHIFT; i++) {
+ d->Q(i) = FPSLL(s->Q(i), shift);
+ }
}
}
-void glue(helper_psllq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+#if SHIFT >= 1
+void glue(helper_psrldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
- int shift;
+ Reg *s = d;
+ int shift, i, j;
- if (s->Q(0) > 63) {
- d->Q(0) = 0;
-#if SHIFT == 1
- d->Q(1) = 0;
-#endif
- } else {
- shift = s->B(0);
- d->Q(0) <<= shift;
-#if SHIFT == 1
- d->Q(1) <<= shift;
-#endif
- }
-}
-
-#if SHIFT == 1
-void glue(helper_psrldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
-{
- int shift, i;
-
- shift = s->L(0);
+ shift = c->L(0);
if (shift > 16) {
shift = 16;
}
- for (i = 0; i < 16 - shift; i++) {
- d->B(i) = d->B(i + shift);
- }
- for (i = 16 - shift; i < 16; i++) {
- d->B(i) = 0;
+ for (j = 0; j < 8 << SHIFT; j += LANE_WIDTH) {
+ for (i = 0; i < 16 - shift; i++) {
+ d->B(j + i) = s->B(j + i + shift);
+ }
+ for (i = 16 - shift; i < 16; i++) {
+ d->B(j + i) = 0;
+ }
}
}
-void glue(helper_pslldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_pslldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
- int shift, i;
+ Reg *s = d;
+ int shift, i, j;
- shift = s->L(0);
+ shift = c->L(0);
if (shift > 16) {
shift = 16;
}
- for (i = 15; i >= shift; i--) {
- d->B(i) = d->B(i - shift);
- }
- for (i = 0; i < shift; i++) {
- d->B(i) = 0;
+ for (j = 0; j < 8 << SHIFT; j += LANE_WIDTH) {
+ for (i = 15; i >= shift; i--) {
+ d->B(j + i) = s->B(j + i - shift);
+ }
+ for (i = 0; i < shift; i++) {
+ d->B(j + i) = 0;
+ }
}
}
#endif
--
2.37.2
- [PULL 26/39] target/i386: Add CHECK_NO_VEX, (continued)
- [PULL 26/39] target/i386: Add CHECK_NO_VEX, Paolo Bonzini, 2022/09/01
- [PULL 27/39] target/i386: rewrite destructive 3DNow operations, Paolo Bonzini, 2022/09/01
- [PULL 25/39] target/i386: do not cast gen_helper_* function pointers, Paolo Bonzini, 2022/09/01
- [PULL 24/39] target/i386: Add size suffix to vector FP helpers, Paolo Bonzini, 2022/09/01
- [PULL 30/39] target/i386: Misc integer AVX helper prep, Paolo Bonzini, 2022/09/01
- [PULL 33/39] target/i386: reimplement AVX comparison helpers, Paolo Bonzini, 2022/09/01
- [PULL 31/39] target/i386: Destructive vector helpers for AVX, Paolo Bonzini, 2022/09/01
- [PULL 29/39] target/i386: Rewrite simple integer vector helpers, Paolo Bonzini, 2022/09/01
- [PULL 34/39] target/i386: Dot product AVX helper prep, Paolo Bonzini, 2022/09/01
- [PULL 37/39] target/i386: Rewrite blendv helpers, Paolo Bonzini, 2022/09/01
- [PULL 28/39] target/i386: Rewrite vector shift helper,
Paolo Bonzini <=
- [PULL 32/39] target/i386: Floating point arithmetic helper AVX prep, Paolo Bonzini, 2022/09/01
- [PULL 38/39] target/i386: AVX pclmulqdq prep, Paolo Bonzini, 2022/09/01
- [PULL 35/39] target/i386: Destructive FP helpers for AVX, Paolo Bonzini, 2022/09/01
- [PULL 36/39] target/i386: Misc AVX helper prep, Paolo Bonzini, 2022/09/01
- [PULL 39/39] target/i386: AVX+AES helpers prep, Paolo Bonzini, 2022/09/01
- Re: [PULL 00/39] i386, SCSI, build system changes for 2022-09-01, Stefan Hajnoczi, 2022/09/05