[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 10/22] target/i386: add X86_SPECIALs for MOVSX and MOVZX
From: |
Paolo Bonzini |
Subject: |
[PATCH 10/22] target/i386: add X86_SPECIALs for MOVSX and MOVZX |
Date: |
Fri, 22 Dec 2023 19:15:51 +0100 |
Usually the registers are just moved into s->T0 without much care for
their operand size. However, in some cases we can get more efficient
code if the operand fetching logic syncs with the emission function
on what is nicer.
All the current uses are mostly demonstrative and only reduce the code
in the emission functions, because the instructions do not support
memory operands. However the logic is generic and applies to several
more instructions such as MOVSXD (aka movslq), one-byte shift
instructions, multiplications, XLAT, and indirect calls/jumps.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/decode-new.c.inc | 18 ++++++++++----
target/i386/tcg/decode-new.h | 4 +++
target/i386/tcg/emit.c.inc | 42 +++++++++++++++++---------------
3 files changed, 40 insertions(+), 24 deletions(-)
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index 00fdb243857..d7a86d96c0c 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -156,6 +156,8 @@
#define op0_Rd .special = X86_SPECIAL_Op0_Rd,
#define op2_Ry .special = X86_SPECIAL_Op2_Ry,
#define avx_movx .special = X86_SPECIAL_AVXExtMov,
+#define sextT0 .special = X86_SPECIAL_SExtT0,
+#define zextT0 .special = X86_SPECIAL_ZExtT0,
#define vex1 .vex_class = 1,
#define vex1_rep3 .vex_class = 1, .vex_special = X86_VEX_REPScalar,
@@ -571,8 +573,8 @@ static const X86OpEntry opcodes_0F38_F0toFF[16][5] = {
[5] = {
X86_OP_ENTRY3(BZHI, G,y, E,y, B,y, vex13 cpuid(BMI1)),
{},
- X86_OP_ENTRY3(PEXT, G,y, B,y, E,y, vex13 cpuid(BMI2)),
- X86_OP_ENTRY3(PDEP, G,y, B,y, E,y, vex13 cpuid(BMI2)),
+ X86_OP_ENTRY3(PEXT, G,y, B,y, E,y, vex13 zextT0 cpuid(BMI2)),
+ X86_OP_ENTRY3(PDEP, G,y, B,y, E,y, vex13 zextT0 cpuid(BMI2)),
{},
},
[6] = {
@@ -583,10 +585,10 @@ static const X86OpEntry opcodes_0F38_F0toFF[16][5] = {
{},
},
[7] = {
- X86_OP_ENTRY3(BEXTR, G,y, E,y, B,y, vex13 cpuid(BMI1)),
+ X86_OP_ENTRY3(BEXTR, G,y, E,y, B,y, vex13 zextT0 cpuid(BMI1)),
X86_OP_ENTRY3(SHLX, G,y, E,y, B,y, vex13 cpuid(BMI1)),
- X86_OP_ENTRY3(SARX, G,y, E,y, B,y, vex13 cpuid(BMI1)),
- X86_OP_ENTRY3(SHRX, G,y, E,y, B,y, vex13 cpuid(BMI1)),
+ X86_OP_ENTRY3(SARX, G,y, E,y, B,y, vex13 sextT0 cpuid(BMI1)),
+ X86_OP_ENTRY3(SHRX, G,y, E,y, B,y, vex13 zextT0 cpuid(BMI1)),
{},
},
};
@@ -1905,6 +1907,12 @@ static void disas_insn_new(DisasContext *s, CPUState
*cpu, int b)
}
break;
+ case X86_SPECIAL_SExtT0:
+ case X86_SPECIAL_ZExtT0:
+ /* Handled in gen_load. */
+ assert(decode.op[1].unit == X86_OP_INT);
+ break;
+
default:
break;
}
diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h
index b253f7457ae..70b6717227f 100644
--- a/target/i386/tcg/decode-new.h
+++ b/target/i386/tcg/decode-new.h
@@ -191,6 +191,10 @@ typedef enum X86InsnSpecial {
* become P/P/Q/N, and size "x" becomes "q".
*/
X86_SPECIAL_MMX,
+
+ /* When loaded into s->T0, register operand 1 is zero/sign extended. */
+ X86_SPECIAL_SExtT0,
+ X86_SPECIAL_ZExtT0,
} X86InsnSpecial;
/*
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index f5e44117eab..4c2006fdd09 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -232,9 +232,30 @@ static void gen_load(DisasContext *s, X86DecodedInsn
*decode, int opn, TCGv v)
break;
case X86_OP_INT:
if (op->has_ea) {
- gen_op_ld_v(s, op->ot, v, s->A0);
+ if (v == s->T0 && decode->e.special == X86_SPECIAL_SExtT0) {
+ gen_op_ld_v(s, op->ot | MO_SIGN, v, s->A0);
+ } else {
+ gen_op_ld_v(s, op->ot, v, s->A0);
+ }
+
+ } else if (op->ot == MO_8 && byte_reg_is_xH(s, op->n)) {
+ if (v == s->T0 && decode->e.special == X86_SPECIAL_SExtT0) {
+ tcg_gen_sextract_tl(v, cpu_regs[op->n - 4], 8, 8);
+ } else {
+ tcg_gen_extract_tl(v, cpu_regs[op->n - 4], 8, 8);
+ }
+
+ } else if (op->ot < MO_TL && v == s->T0 &&
+ (decode->e.special == X86_SPECIAL_SExtT0 ||
+ decode->e.special == X86_SPECIAL_ZExtT0)) {
+ if (decode->e.special == X86_SPECIAL_SExtT0) {
+ tcg_gen_ext_tl(v, cpu_regs[op->n], op->ot | MO_SIGN);
+ } else {
+ tcg_gen_ext_tl(v, cpu_regs[op->n], op->ot);
+ }
+
} else {
- gen_op_mov_v_reg(s, op->ot, v, op->n);
+ tcg_gen_mov_tl(v, cpu_regs[op->n]);
}
break;
case X86_OP_IMM:
@@ -1084,9 +1105,6 @@ static void gen_BEXTR(DisasContext *s, CPUX86State *env,
X86DecodedInsn *decode)
* Shifts larger than operand size get zeros.
*/
tcg_gen_ext8u_tl(s->A0, s->T1);
- if (TARGET_LONG_BITS == 64 && ot == MO_32) {
- tcg_gen_ext32u_tl(s->T0, s->T0);
- }
tcg_gen_shr_tl(s->T0, s->T0, s->A0);
tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound, s->T0, zero);
@@ -1428,19 +1446,11 @@ static void gen_PCMPISTRM(DisasContext *s, CPUX86State
*env, X86DecodedInsn *dec
static void gen_PDEP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
- MemOp ot = decode->op[1].ot;
- if (ot < MO_64) {
- tcg_gen_ext32u_tl(s->T0, s->T0);
- }
gen_helper_pdep(s->T0, s->T0, s->T1);
}
static void gen_PEXT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
- MemOp ot = decode->op[1].ot;
- if (ot < MO_64) {
- tcg_gen_ext32u_tl(s->T0, s->T0);
- }
gen_helper_pext(s->T0, s->T0, s->T1);
}
@@ -1796,9 +1806,6 @@ static void gen_SARX(DisasContext *s, CPUX86State *env,
X86DecodedInsn *decode)
mask = ot == MO_64 ? 63 : 31;
tcg_gen_andi_tl(s->T1, s->T1, mask);
- if (ot != MO_64) {
- tcg_gen_ext32s_tl(s->T0, s->T0);
- }
tcg_gen_sar_tl(s->T0, s->T0, s->T1);
}
@@ -1873,9 +1880,6 @@ static void gen_SHRX(DisasContext *s, CPUX86State *env,
X86DecodedInsn *decode)
mask = ot == MO_64 ? 63 : 31;
tcg_gen_andi_tl(s->T1, s->T1, mask);
- if (ot != MO_64) {
- tcg_gen_ext32u_tl(s->T0, s->T0);
- }
tcg_gen_shr_tl(s->T0, s->T0, s->T1);
}
--
2.43.0
- Re: [PATCH 08/22] target/i386: avoid trunc and ext for MULX and RORX, (continued)
- [PATCH 11/22] target/i386: do not decode string source/destination into decode->mem, Paolo Bonzini, 2023/12/22
- [PATCH 12/22] target/i386: do not clobber A0 in POP translation, Paolo Bonzini, 2023/12/22
- [PATCH 14/22] target/i386: split eflags computation out of gen_compute_eflags, Paolo Bonzini, 2023/12/22
- [PATCH 18/22] target/i386: prepare for implementation of STOS/SCAS in new decoder, Paolo Bonzini, 2023/12/22
- [PATCH 06/22] target/i386: document more deviations from the manual, Paolo Bonzini, 2023/12/22
- [PATCH 10/22] target/i386: add X86_SPECIALs for MOVSX and MOVZX,
Paolo Bonzini <=
- [PATCH 13/22] target/i386: do not clobber T0 on string operations, Paolo Bonzini, 2023/12/22
- [PATCH 16/22] target/i386: do not use s->tmp0 for jumps on ECX ==/!= 0, Paolo Bonzini, 2023/12/22
- [PATCH 15/22] target/i386: do not use s->tmp4 for push, Paolo Bonzini, 2023/12/22
- [PATCH 19/22] target/i386: move operand load and writeback out of gen_cmovcc1, Paolo Bonzini, 2023/12/22
- [PATCH 20/22] target/i386: adjust decoding of J operand, Paolo Bonzini, 2023/12/22