qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v2 10/42] i386: Rewrite vector shift helper


From: Paul Brook
Subject: [PATCH v2 10/42] i386: Rewrite vector shift helper
Date: Sun, 24 Apr 2022 23:01:32 +0100

Rewrite the vector shift helpers in preperation for AVX support (3 operand
form and 256 bit vectors).

For now keep the existing two operand interface.

No functional changes to existing helpers.

Signed-off-by: Paul Brook <paul@nowt.org>
---
 target/i386/ops_sse.h | 250 ++++++++++++++++++++++--------------------
 1 file changed, 133 insertions(+), 117 deletions(-)

diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index 23daab6b50..9297c96d04 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -63,199 +63,215 @@
 #define MOVE(d, r) memcpy(&(d).B(0), &(r).B(0), SIZE)
 #endif
 
-void glue(helper_psrlw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+#if SHIFT == 0
+#define SHIFT_HELPER_BODY(n, elem, F) do {      \
+    d->elem(0) = F(s->elem(0), shift);          \
+    if ((n) > 1) {                              \
+        d->elem(1) = F(s->elem(1), shift);      \
+    }                                           \
+    if ((n) > 2) {                              \
+        d->elem(2) = F(s->elem(2), shift);      \
+        d->elem(3) = F(s->elem(3), shift);      \
+    }                                           \
+    if ((n) > 4) {                              \
+        d->elem(4) = F(s->elem(4), shift);      \
+        d->elem(5) = F(s->elem(5), shift);      \
+        d->elem(6) = F(s->elem(6), shift);      \
+        d->elem(7) = F(s->elem(7), shift);      \
+    }                                           \
+    if ((n) > 8) {                              \
+        d->elem(8) = F(s->elem(8), shift);      \
+        d->elem(9) = F(s->elem(9), shift);      \
+        d->elem(10) = F(s->elem(10), shift);    \
+        d->elem(11) = F(s->elem(11), shift);    \
+        d->elem(12) = F(s->elem(12), shift);    \
+        d->elem(13) = F(s->elem(13), shift);    \
+        d->elem(14) = F(s->elem(14), shift);    \
+        d->elem(15) = F(s->elem(15), shift);    \
+    }                                           \
+    } while (0)
+
+#define FPSRL(x, c) ((x) >> shift)
+#define FPSRAW(x, c) ((int16_t)(x) >> shift)
+#define FPSRAL(x, c) ((int32_t)(x) >> shift)
+#define FPSLL(x, c) ((x) << shift)
+#endif
+
+void glue(helper_psrlw, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
 {
+    Reg *s = d;
     int shift;
-
-    if (s->Q(0) > 15) {
+    if (c->Q(0) > 15) {
         d->Q(0) = 0;
-#if SHIFT == 1
-        d->Q(1) = 0;
-#endif
+        XMM_ONLY(d->Q(1) = 0;)
+        YMM_ONLY(
+                d->Q(2) = 0;
+                d->Q(3) = 0;
+                )
     } else {
-        shift = s->B(0);
-        d->W(0) >>= shift;
-        d->W(1) >>= shift;
-        d->W(2) >>= shift;
-        d->W(3) >>= shift;
-#if SHIFT == 1
-        d->W(4) >>= shift;
-        d->W(5) >>= shift;
-        d->W(6) >>= shift;
-        d->W(7) >>= shift;
-#endif
+        shift = c->B(0);
+        SHIFT_HELPER_BODY(4 << SHIFT, W, FPSRL);
     }
 }
 
-void glue(helper_psraw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_psllw, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
 {
+    Reg *s = d;
     int shift;
-
-    if (s->Q(0) > 15) {
-        shift = 15;
+    if (c->Q(0) > 15) {
+        d->Q(0) = 0;
+        XMM_ONLY(d->Q(1) = 0;)
+        YMM_ONLY(
+                d->Q(2) = 0;
+                d->Q(3) = 0;
+                )
     } else {
-        shift = s->B(0);
+        shift = c->B(0);
+        SHIFT_HELPER_BODY(4 << SHIFT, W, FPSLL);
     }
-    d->W(0) = (int16_t)d->W(0) >> shift;
-    d->W(1) = (int16_t)d->W(1) >> shift;
-    d->W(2) = (int16_t)d->W(2) >> shift;
-    d->W(3) = (int16_t)d->W(3) >> shift;
-#if SHIFT == 1
-    d->W(4) = (int16_t)d->W(4) >> shift;
-    d->W(5) = (int16_t)d->W(5) >> shift;
-    d->W(6) = (int16_t)d->W(6) >> shift;
-    d->W(7) = (int16_t)d->W(7) >> shift;
-#endif
 }
 
-void glue(helper_psllw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_psraw, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
 {
+    Reg *s = d;
     int shift;
-
-    if (s->Q(0) > 15) {
-        d->Q(0) = 0;
-#if SHIFT == 1
-        d->Q(1) = 0;
-#endif
+    if (c->Q(0) > 15) {
+        shift = 15;
     } else {
-        shift = s->B(0);
-        d->W(0) <<= shift;
-        d->W(1) <<= shift;
-        d->W(2) <<= shift;
-        d->W(3) <<= shift;
-#if SHIFT == 1
-        d->W(4) <<= shift;
-        d->W(5) <<= shift;
-        d->W(6) <<= shift;
-        d->W(7) <<= shift;
-#endif
+        shift = c->B(0);
     }
+    SHIFT_HELPER_BODY(4 << SHIFT, W, FPSRAW);
 }
 
-void glue(helper_psrld, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_psrld, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
 {
+    Reg *s = d;
     int shift;
-
-    if (s->Q(0) > 31) {
+    if (c->Q(0) > 31) {
         d->Q(0) = 0;
-#if SHIFT == 1
-        d->Q(1) = 0;
-#endif
+        XMM_ONLY(d->Q(1) = 0;)
+        YMM_ONLY(
+                d->Q(2) = 0;
+                d->Q(3) = 0;
+                )
     } else {
-        shift = s->B(0);
-        d->L(0) >>= shift;
-        d->L(1) >>= shift;
-#if SHIFT == 1
-        d->L(2) >>= shift;
-        d->L(3) >>= shift;
-#endif
+        shift = c->B(0);
+        SHIFT_HELPER_BODY(2 << SHIFT, L, FPSRL);
     }
 }
 
-void glue(helper_psrad, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_pslld, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
 {
+    Reg *s = d;
     int shift;
-
-    if (s->Q(0) > 31) {
-        shift = 31;
+    if (c->Q(0) > 31) {
+        d->Q(0) = 0;
+        XMM_ONLY(d->Q(1) = 0;)
+        YMM_ONLY(
+                d->Q(2) = 0;
+                d->Q(3) = 0;
+                )
     } else {
-        shift = s->B(0);
+        shift = c->B(0);
+        SHIFT_HELPER_BODY(2 << SHIFT, L, FPSLL);
     }
-    d->L(0) = (int32_t)d->L(0) >> shift;
-    d->L(1) = (int32_t)d->L(1) >> shift;
-#if SHIFT == 1
-    d->L(2) = (int32_t)d->L(2) >> shift;
-    d->L(3) = (int32_t)d->L(3) >> shift;
-#endif
 }
 
-void glue(helper_pslld, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_psrad, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
 {
+    Reg *s = d;
     int shift;
-
-    if (s->Q(0) > 31) {
-        d->Q(0) = 0;
-#if SHIFT == 1
-        d->Q(1) = 0;
-#endif
+    if (c->Q(0) > 31) {
+        shift = 31;
     } else {
-        shift = s->B(0);
-        d->L(0) <<= shift;
-        d->L(1) <<= shift;
-#if SHIFT == 1
-        d->L(2) <<= shift;
-        d->L(3) <<= shift;
-#endif
+        shift = c->B(0);
     }
+    SHIFT_HELPER_BODY(2 << SHIFT, L, FPSRAL);
 }
 
-void glue(helper_psrlq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_psrlq, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
 {
+    Reg *s = d;
     int shift;
-
-    if (s->Q(0) > 63) {
+    if (c->Q(0) > 63) {
         d->Q(0) = 0;
-#if SHIFT == 1
-        d->Q(1) = 0;
-#endif
+        XMM_ONLY(d->Q(1) = 0;)
+        YMM_ONLY(
+                d->Q(2) = 0;
+                d->Q(3) = 0;
+                )
     } else {
-        shift = s->B(0);
-        d->Q(0) >>= shift;
-#if SHIFT == 1
-        d->Q(1) >>= shift;
-#endif
+        shift = c->B(0);
+        SHIFT_HELPER_BODY(1 << SHIFT, Q, FPSRL);
     }
 }
 
-void glue(helper_psllq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_psllq, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
 {
+    Reg *s = d;
     int shift;
-
-    if (s->Q(0) > 63) {
+    if (c->Q(0) > 63) {
         d->Q(0) = 0;
-#if SHIFT == 1
-        d->Q(1) = 0;
-#endif
+        XMM_ONLY(d->Q(1) = 0;)
+        YMM_ONLY(
+                d->Q(2) = 0;
+                d->Q(3) = 0;
+                )
     } else {
-        shift = s->B(0);
-        d->Q(0) <<= shift;
-#if SHIFT == 1
-        d->Q(1) <<= shift;
-#endif
+        shift = c->B(0);
+        SHIFT_HELPER_BODY(1 << SHIFT, Q, FPSLL);
     }
 }
 
-#if SHIFT == 1
-void glue(helper_psrldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+#if SHIFT >= 1
+void glue(helper_psrldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
 {
+    Reg *s = d;
     int shift, i;
 
-    shift = s->L(0);
+    shift = c->L(0);
     if (shift > 16) {
         shift = 16;
     }
     for (i = 0; i < 16 - shift; i++) {
-        d->B(i) = d->B(i + shift);
+        d->B(i) = s->B(i + shift);
     }
     for (i = 16 - shift; i < 16; i++) {
         d->B(i) = 0;
     }
+#if SHIFT == 2
+    for (i = 0; i < 16 - shift; i++) {
+        d->B(i + 16) = s->B(i + 16 + shift);
+    }
+    for (i = 16 - shift; i < 16; i++) {
+        d->B(i + 16) = 0;
+    }
+#endif
 }
 
-void glue(helper_pslldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_pslldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
 {
+    Reg *s = d;
     int shift, i;
 
-    shift = s->L(0);
+    shift = c->L(0);
     if (shift > 16) {
         shift = 16;
     }
     for (i = 15; i >= shift; i--) {
-        d->B(i) = d->B(i - shift);
+        d->B(i) = s->B(i - shift);
     }
     for (i = 0; i < shift; i++) {
         d->B(i) = 0;
     }
+#if SHIFT == 2
+    for (i = 15; i >= shift; i--) {
+        d->B(i + 16) = s->B(i + 16 - shift);
+    }
+    for (i = 0; i < shift; i++) {
+        d->B(i + 16) = 0;
+    }
+#endif
 }
 #endif
 
-- 
2.36.0




reply via email to

[Prev in Thread] Current Thread [Next in Thread]