qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [RFC PATCH v4 72/75] target/i386: convert psadbw helper to


From: Jan Bobek
Subject: [Qemu-devel] [RFC PATCH v4 72/75] target/i386: convert psadbw helper to gvec style
Date: Wed, 21 Aug 2019 13:29:48 -0400

Make these helpers suitable for use with tcg_gen_gvec_* functions.

Signed-off-by: Jan Bobek <address@hidden>
---
 target/i386/ops_sse.h        | 64 +++++++++++++++---------------------
 target/i386/ops_sse_header.h |  2 +-
 target/i386/translate.c      |  9 +++--
 3 files changed, 32 insertions(+), 43 deletions(-)

diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index 384a835662..b866ead1c8 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -412,6 +412,15 @@ static inline int satsw(int x)
     }
 }
 
+static inline int abs1(int x)
+{
+    if (x < 0) {
+        return -x;
+    } else {
+        return x;
+    }
+}
+
 #define FMULHRW(a, b) (((int16_t)(a) * (int16_t)(b) + 0x8000) >> 16)
 #endif
 
@@ -510,52 +519,33 @@ void glue(helper_pmaddwd, SUFFIX)(Reg *d, Reg *a, Reg *b, 
uint32_t desc)
     glue(clear_high, SUFFIX)(d, oprsz, maxsz);
 }
 
-#if SHIFT == 0
-static inline int abs1(int a)
+void glue(helper_psadbw, SUFFIX)(Reg *d, Reg *a, Reg *b, uint32_t desc)
 {
-    if (a < 0) {
-        return -a;
-    } else {
-        return a;
+    const intptr_t oprsz = simd_oprsz(desc);
+    const intptr_t maxsz = simd_maxsz(desc);
+
+    for (intptr_t i = 0; i * sizeof(uint64_t) < oprsz; ++i) {
+        const uint64_t t0 = abs1(a->B(8 * i + 0) - b->B(8 * i + 0));
+        const uint64_t t1 = abs1(a->B(8 * i + 1) - b->B(8 * i + 1));
+        const uint64_t t2 = abs1(a->B(8 * i + 2) - b->B(8 * i + 2));
+        const uint64_t t3 = abs1(a->B(8 * i + 3) - b->B(8 * i + 3));
+        const uint64_t t4 = abs1(a->B(8 * i + 4) - b->B(8 * i + 4));
+        const uint64_t t5 = abs1(a->B(8 * i + 5) - b->B(8 * i + 5));
+        const uint64_t t6 = abs1(a->B(8 * i + 6) - b->B(8 * i + 6));
+        const uint64_t t7 = abs1(a->B(8 * i + 7) - b->B(8 * i + 7));
+        d->Q(i) = t0 + t1 + t2 + t3 + t4 + t5 + t6 + t7;
     }
+    glue(clear_high, SUFFIX)(d, oprsz, maxsz);
 }
-#endif
-void glue(helper_psadbw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
-{
-    unsigned int val;
 
-    val = 0;
-    val += abs1(d->B(0) - s->B(0));
-    val += abs1(d->B(1) - s->B(1));
-    val += abs1(d->B(2) - s->B(2));
-    val += abs1(d->B(3) - s->B(3));
-    val += abs1(d->B(4) - s->B(4));
-    val += abs1(d->B(5) - s->B(5));
-    val += abs1(d->B(6) - s->B(6));
-    val += abs1(d->B(7) - s->B(7));
-    d->Q(0) = val;
-#if SHIFT == 1
-    val = 0;
-    val += abs1(d->B(8) - s->B(8));
-    val += abs1(d->B(9) - s->B(9));
-    val += abs1(d->B(10) - s->B(10));
-    val += abs1(d->B(11) - s->B(11));
-    val += abs1(d->B(12) - s->B(12));
-    val += abs1(d->B(13) - s->B(13));
-    val += abs1(d->B(14) - s->B(14));
-    val += abs1(d->B(15) - s->B(15));
-    d->Q(1) = val;
-#endif
-}
-
-void glue(helper_maskmov, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
+void glue(helper_maskmov, SUFFIX)(CPUX86State *env, Reg *a, Reg *b,
                                   target_ulong a0)
 {
     int i;
 
     for (i = 0; i < (8 << SHIFT); i++) {
-        if (s->B(i) & 0x80) {
-            cpu_stb_data_ra(env, a0 + i, d->B(i), GETPC());
+        if (b->B(i) & 0x80) {
+            cpu_stb_data_ra(env, a0 + i, a->B(i), GETPC());
         }
     }
 }
diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h
index 18d39ca649..ec7d1fc686 100644
--- a/target/i386/ops_sse_header.h
+++ b/target/i386/ops_sse_header.h
@@ -74,7 +74,7 @@ DEF_HELPER_4(glue(pavgw, SUFFIX), void, Reg, Reg, Reg, i32)
 DEF_HELPER_4(glue(pmuludq, SUFFIX), void, Reg, Reg, Reg, i32)
 DEF_HELPER_4(glue(pmaddwd, SUFFIX), void, Reg, Reg, Reg, i32)
 
-DEF_HELPER_3(glue(psadbw, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_4(glue(psadbw, SUFFIX), void, Reg, Reg, Reg, i32)
 DEF_HELPER_4(glue(maskmov, SUFFIX), void, env, Reg, Reg, tl)
 DEF_HELPER_2(glue(movl_mm_T0, SUFFIX), void, Reg, i32)
 #ifdef TARGET_X86_64
diff --git a/target/i386/translate.c b/target/i386/translate.c
index 55607db09c..6bffbaee4c 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -2806,7 +2806,6 @@ static const SSEFunc_0_epp sse_op_table1[256][4] = {
     [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, 
gen_helper_cvtpd2dq },
     [0xe7] = { SSE_SPECIAL , SSE_SPECIAL },  /* movntq, movntq */
     [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
-    [0xf6] = MMX_OP2(psadbw),
     [0xf7] = { (SSEFunc_0_epp)gen_helper_maskmov_mmx,
                (SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */
 };
@@ -6256,10 +6255,10 @@ DEF_GEN_INSN3_GVEC(pavgw, Pq, Pq, Qq, 3_ool, MM_OPRSZ, 
MM_MAXSZ, pavgw_mmx)
 DEF_GEN_INSN3_GVEC(pavgw, Vdq, Vdq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, 
pavgw_xmm)
 DEF_GEN_INSN3_GVEC(vpavgw, Vdq, Hdq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, 
pavgw_xmm)
 DEF_GEN_INSN3_GVEC(vpavgw, Vqq, Hqq, Wqq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, 
pavgw_xmm)
-DEF_GEN_INSN3_HELPER_EPP(psadbw, psadbw_mmx, Pq, Pq, Qq)
-DEF_GEN_INSN3_HELPER_EPP(psadbw, psadbw_xmm, Vdq, Vdq, Wdq)
-DEF_GEN_INSN3_HELPER_EPP(vpsadbw, psadbw_xmm, Vdq, Hdq, Wdq)
-DEF_GEN_INSN3_HELPER_EPP(vpsadbw, psadbw_xmm, Vqq, Hqq, Wqq)
+DEF_GEN_INSN3_GVEC(psadbw, Pq, Pq, Qq, 3_ool, MM_OPRSZ, MM_MAXSZ, psadbw_mmx)
+DEF_GEN_INSN3_GVEC(psadbw, Vdq, Vdq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, 
psadbw_xmm)
+DEF_GEN_INSN3_GVEC(vpsadbw, Vdq, Hdq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, 
psadbw_xmm)
+DEF_GEN_INSN3_GVEC(vpsadbw, Vqq, Hqq, Wqq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, 
psadbw_xmm)
 DEF_GEN_INSN4_HELPER_EPPI(mpsadbw, mpsadbw_xmm, Vdq, Vdq, Wdq, Ib)
 DEF_GEN_INSN4_HELPER_EPPI(vmpsadbw, mpsadbw_xmm, Vdq, Hdq, Wdq, Ib)
 DEF_GEN_INSN4_HELPER_EPPI(vmpsadbw, mpsadbw_xmm, Vqq, Hqq, Wqq, Ib)
-- 
2.20.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]