[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 3/6] target/i386: Use tcg gvec for pcmp{eq,gt}*
From: |
Richard Henderson |
Subject: |
[PATCH 3/6] target/i386: Use tcg gvec for pcmp{eq,gt}* |
Date: |
Mon, 22 Aug 2022 15:37:19 -0700 |
As pcmpeqb is used by strlen et al, this is the highest overhead
sse operation, at 2.5%. It's simple to include the other compares
at the same time.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/i386/ops_sse.h | 8 --------
target/i386/ops_sse_header.h | 8 --------
target/i386/tcg/translate.c | 31 +++++++++++++++++++++++++------
3 files changed, 25 insertions(+), 22 deletions(-)
diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index 535440f882..94440a9dc5 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -420,14 +420,6 @@ SSE_HELPER_Q(helper_pandn, FANDN)
SSE_HELPER_Q(helper_por, FOR)
SSE_HELPER_Q(helper_pxor, FXOR)
-SSE_HELPER_B(helper_pcmpgtb, FCMPGTB)
-SSE_HELPER_W(helper_pcmpgtw, FCMPGTW)
-SSE_HELPER_L(helper_pcmpgtl, FCMPGTL)
-
-SSE_HELPER_B(helper_pcmpeqb, FCMPEQ)
-SSE_HELPER_W(helper_pcmpeqw, FCMPEQ)
-SSE_HELPER_L(helper_pcmpeql, FCMPEQ)
-
SSE_HELPER_W(helper_pmullw, FMULLW)
#if SHIFT == 0
SSE_HELPER_W(helper_pmulhrw, FMULHRW)
diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h
index cef28f2aae..b9f957daf8 100644
--- a/target/i386/ops_sse_header.h
+++ b/target/i386/ops_sse_header.h
@@ -91,14 +91,6 @@ SSE_HELPER_Q(pandn, FANDN)
SSE_HELPER_Q(por, FOR)
SSE_HELPER_Q(pxor, FXOR)
-SSE_HELPER_B(pcmpgtb, FCMPGTB)
-SSE_HELPER_W(pcmpgtw, FCMPGTW)
-SSE_HELPER_L(pcmpgtl, FCMPGTL)
-
-SSE_HELPER_B(pcmpeqb, FCMPEQ)
-SSE_HELPER_W(pcmpeqw, FCMPEQ)
-SSE_HELPER_L(pcmpeql, FCMPEQ)
-
SSE_HELPER_W(pmullw, FMULLW)
#if SHIFT == 0
SSE_HELPER_W(pmulhrw, FMULHRW)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index c1f1f6f66b..467d018b68 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -2847,9 +2847,9 @@ static const SSEFunc_0_epp sse_op_table1[256][4] = {
[0x61] = MMX_OP2(punpcklwd),
[0x62] = MMX_OP2(punpckldq),
[0x63] = MMX_OP2(packsswb),
- [0x64] = MMX_OP2(pcmpgtb),
- [0x65] = MMX_OP2(pcmpgtw),
- [0x66] = MMX_OP2(pcmpgtl),
+ [0x64] = { SSE_DUMMY, SSE_DUMMY }, /* pcmpgtb */
+ [0x65] = { SSE_DUMMY, SSE_DUMMY }, /* pcmpgtw */
+ [0x66] = { SSE_DUMMY, SSE_DUMMY }, /* pcmpgtl */
[0x67] = MMX_OP2(packuswb),
[0x68] = MMX_OP2(punpckhbw),
[0x69] = MMX_OP2(punpckhwd),
@@ -2866,9 +2866,9 @@ static const SSEFunc_0_epp sse_op_table1[256][4] = {
[0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
[0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
[0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
- [0x74] = MMX_OP2(pcmpeqb),
- [0x75] = MMX_OP2(pcmpeqw),
- [0x76] = MMX_OP2(pcmpeql),
+ [0x74] = { SSE_DUMMY, SSE_DUMMY }, /* pcmpeqb */
+ [0x75] = { SSE_DUMMY, SSE_DUMMY }, /* pcmpeqw */
+ [0x76] = { SSE_DUMMY, SSE_DUMMY }, /* pcmpeql */
[0x77] = { SSE_DUMMY }, /* emms */
[0x78] = { NULL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* extrq_i, insertq_i */
[0x79] = { NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r },
@@ -4415,6 +4415,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s,
int b,
return;
}
} else {
+ int vec_len = is_xmm ? 16 : 8;
+ int xmm_ofs = is_xmm ? offsetof(ZMMReg, ZMM_X(0)) : 0;
+
/* generic MMX or SSE operation */
switch(b) {
case 0x70: /* pshufx insn */
@@ -4532,6 +4535,22 @@ static void gen_sse(CPUX86State *env, DisasContext *s,
int b,
sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp;
sse_fn_eppt(cpu_env, s->ptr0, s->ptr1, s->A0);
break;
+ case 0x64: /* pcmpgtb */
+ case 0x65: /* pcmpgtw */
+ case 0x66: /* pcmpgtl */
+ op1_offset += xmm_ofs;
+ op2_offset += xmm_ofs;
+ tcg_gen_gvec_cmp(TCG_COND_GT, b - 0x64, op1_offset, op1_offset,
+ op2_offset, vec_len, vec_len);
+ break;
+ case 0x74: /* pcmpeqb */
+ case 0x75: /* pcmpeqw */
+ case 0x76: /* pcmpeql */
+ op1_offset += xmm_ofs;
+ op2_offset += xmm_ofs;
+ tcg_gen_gvec_cmp(TCG_COND_EQ, b - 0x74, op1_offset, op1_offset,
+ op2_offset, vec_len, vec_len);
+ break;
default:
tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
--
2.34.1
- [PATCH 0/6] target/i386: Host vector ops for high-usage SSE, Richard Henderson, 2022/08/22
- [PATCH 1/6] target/i386: Define XMMReg and access macros, Richard Henderson, 2022/08/22
- [PATCH 2/6] target/i386: Use tcg gvec for gen_op_movo, Richard Henderson, 2022/08/22
- [PATCH 3/6] target/i386: Use tcg gvec for pcmp{eq,gt}*,
Richard Henderson <=
- [PATCH 5/6] target/i386: Use tcg gvec for pand, pandn, por, pxor, Richard Henderson, 2022/08/22
- [PATCH 4/6] target/i386: Use tcg gvec for p{add,sub}*, Richard Henderson, 2022/08/22
- [PATCH 6/6] target/i386: Use tcg gvec ops for pmovmskb, Richard Henderson, 2022/08/22