[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH 45/57] target-i386: Implement MOVBE
From: |
Richard Henderson |
Subject: |
[Qemu-devel] [PATCH 45/57] target-i386: Implement MOVBE |
Date: |
Tue, 19 Feb 2013 09:40:19 -0800 |
Signed-off-by: Richard Henderson <address@hidden>
---
target-i386/cpu.c | 16 +++++--
target-i386/translate.c | 122 ++++++++++++++++++++++++++++++++++++++----------
2 files changed, 110 insertions(+), 28 deletions(-)
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index dfcf86e..0f19533 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -389,10 +389,15 @@ typedef struct x86_def_t {
CPUID_VME, CPUID_DTS, CPUID_SS, CPUID_HT, CPUID_TM, CPUID_PBE */
#define TCG_EXT_FEATURES (CPUID_EXT_SSE3 | CPUID_EXT_MONITOR | \
CPUID_EXT_SSSE3 | CPUID_EXT_CX16 | CPUID_EXT_POPCNT | \
- CPUID_EXT_HYPERVISOR)
+ CPUID_EXT_MOVBE | CPUID_EXT_HYPERVISOR)
/* missing:
- CPUID_EXT_DTES64, CPUID_EXT_DSCPL, CPUID_EXT_VMX, CPUID_EXT_EST,
- CPUID_EXT_TM2, CPUID_EXT_XTPR, CPUID_EXT_PDCM, CPUID_EXT_XSAVE */
+ CPUID_EXT_PCLMULQDQ, CPUID_EXT_DTES64, CPUID_EXT_DSCPL,
+ CPUID_EXT_VMX, CPUID_EXT_SMX, CPUID_EXT_EST, CPUID_EXT_TM2,
+ CPUID_EXT_CID, CPUID_EXT_FMA, CPUID_EXT_XTPR, CPUID_EXT_PDCM,
+ CPUID_EXT_PCID, CPUID_EXT_DCA, CPUID_EXT_SSE41, CPUID_EXT_SSE42,
+ CPUID_EXT_X2APIC, CPUID_EXT_TSC_DEADLINE_TIMER, CPUID_EXT_AES,
+ CPUID_EXT_XSAVE, CPUID_EXT_OSXSAVE, CPUID_EXT_AVX,
+ CPUID_EXT_F16C, CPUID_EXT_RDRAND */
#define TCG_EXT2_FEATURES ((TCG_FEATURES & CPUID_EXT2_AMD_ALIASES) | \
CPUID_EXT2_NX | CPUID_EXT2_MMXEXT | CPUID_EXT2_RDTSCP | \
CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT)
@@ -402,6 +407,11 @@ typedef struct x86_def_t {
CPUID_EXT3_CR8LEG | CPUID_EXT3_ABM | CPUID_EXT3_SSE4A)
#define TCG_SVM_FEATURES 0
#define TCG_7_0_EBX_FEATURES (CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_SMAP)
+ /* missing:
+ CPUID_7_0_EBX_FSGSBASE, CPUID_7_0_EBX_BMI1, CPUID_7_0_EBX_HLE,
+ CPUID_7_0_EBX_AVX2, CPUID_7_0_EBX_BMI2, CPUID_7_0_EBX_ERMS,
+ CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM, CPUID_7_0_EBX_RDSEED,
+ CPUID_7_0_EBX_ADX */
/* built-in CPU model definitions
*/
diff --git a/target-i386/translate.c b/target-i386/translate.c
index f824b99..5a91ff1 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -3837,11 +3837,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s,
int b,
reg = ((modrm >> 3) & 7) | rex_r;
gen_op_mov_reg_T0(OT_LONG, reg);
break;
+
case 0x138:
- if (s->prefix & PREFIX_REPNZ)
- goto crc32;
case 0x038:
b = modrm;
+ if ((b & 0xf0) == 0xf0) {
+ goto do_0f_38_fx;
+ }
modrm = cpu_ldub_code(env, s->pc++);
rm = modrm & 7;
reg = ((modrm >> 3) & 7) | rex_r;
@@ -3914,36 +3916,106 @@ static void gen_sse(CPUX86State *env, DisasContext *s,
int b,
set_cc_op(s, CC_OP_EFLAGS);
}
break;
- case 0x338: /* crc32 */
- crc32:
- b = modrm;
+
+ case 0x238:
+ case 0x338:
+ do_0f_38_fx:
+ /* Various integer extensions at 0f 38 f[0-f]. */
+ b = modrm | (b1 << 8);
modrm = cpu_ldub_code(env, s->pc++);
reg = ((modrm >> 3) & 7) | rex_r;
- if (b != 0xf0 && b != 0xf1)
- goto illegal_op;
- if (!(s->cpuid_ext_features & CPUID_EXT_SSE42))
- goto illegal_op;
+ switch (b) {
+ case 0x3f0: /* crc32 Gd,Eb */
+ case 0x3f1: /* crc32 Gd,Ey */
+ do_crc32:
+ if (!(s->cpuid_ext_features & CPUID_EXT_SSE42)) {
+ goto illegal_op;
+ }
+ if ((b & 0xff) == 0xf0) {
+ ot = OT_BYTE;
+ } else if (s->dflag != 2) {
+ ot = (s->prefix & PREFIX_DATA ? OT_WORD : OT_LONG);
+ } else {
+ ot = OT_QUAD;
+ }
- if (b == 0xf0)
- ot = OT_BYTE;
- else if (b == 0xf1 && s->dflag != 2)
- if (s->prefix & PREFIX_DATA)
- ot = OT_WORD;
- else
- ot = OT_LONG;
- else
- ot = OT_QUAD;
+ gen_op_mov_TN_reg(OT_LONG, 0, reg);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+ gen_helper_crc32(cpu_T[0], cpu_tmp2_i32,
+ cpu_T[0], tcg_const_i32(8 << ot));
- gen_op_mov_TN_reg(OT_LONG, 0, reg);
- tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
- gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
- gen_helper_crc32(cpu_T[0], cpu_tmp2_i32,
- cpu_T[0], tcg_const_i32(8 << ot));
+ ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
+ gen_op_mov_reg_T0(ot, reg);
+ break;
- ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
- gen_op_mov_reg_T0(ot, reg);
+ case 0x1f0: /* crc32 or movbe */
+ case 0x1f1:
+ /* For these insns, the f3 prefix is supposed to have priority
+ over the 66 prefix, but that's not what we implement above
+ setting b1. */
+ if (s->prefix & PREFIX_REPNZ) {
+ goto do_crc32;
+ }
+ /* FALLTHRU */
+ case 0x0f0: /* movbe Gy,My */
+ case 0x0f1: /* movbe My,Gy */
+ if (!(s->cpuid_ext_features & CPUID_EXT_MOVBE)) {
+ goto illegal_op;
+ }
+ if (s->dflag != 2) {
+ ot = (s->prefix & PREFIX_DATA ? OT_WORD : OT_LONG);
+ } else {
+ ot = OT_QUAD;
+ }
+
+ /* Load the data incoming to the bswap. Note that the TCG
+ implementation of bswap requires the input be zero
+ extended. In the case of the loads, we simply know that
+ gen_op_ld_v via gen_ldst_modrm does that already. */
+ if ((b & 1) == 0) {
+ gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+ } else {
+ switch (ot) {
+ case OT_WORD:
+ tcg_gen_ext16u_tl(cpu_T[0], cpu_regs[reg]);
+ break;
+ default:
+ tcg_gen_ext32u_tl(cpu_T[0], cpu_regs[reg]);
+ break;
+ case OT_QUAD:
+ tcg_gen_mov_tl(cpu_T[0], cpu_regs[reg]);
+ break;
+ }
+ }
+
+ switch (ot) {
+ case OT_WORD:
+ tcg_gen_bswap16_tl(cpu_T[0], cpu_T[0]);
+ break;
+ default:
+ tcg_gen_bswap32_tl(cpu_T[0], cpu_T[0]);
+ break;
+#ifdef TARGET_X86_64
+ case OT_QUAD:
+ tcg_gen_bswap64_tl(cpu_T[0], cpu_T[0]);
+ break;
+#endif
+ }
+
+ if ((b & 1) == 0) {
+ gen_op_mov_reg_T0(ot, reg);
+ } else {
+ gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
+ }
+ break;
+
+ default:
+ goto illegal_op;
+ }
break;
+
case 0x03a:
case 0x13a:
b = modrm;
--
1.8.1.2
- [Qemu-devel] [PATCH 37/57] target-i386: introduce gen_jcc1_noeob, (continued)
- [Qemu-devel] [PATCH 37/57] target-i386: introduce gen_jcc1_noeob, Richard Henderson, 2013/02/19
- [Qemu-devel] [PATCH 39/57] target-i386: optimize flags checking after sub using CC_SRCT, Richard Henderson, 2013/02/19
- [Qemu-devel] [PATCH 42/57] target-i386: Use CC_SRC2 for ADC and SBB, Richard Henderson, 2013/02/19
- [Qemu-devel] [PATCH 28/57] target-i386: introduce CCPrepare, Richard Henderson, 2013/02/19
- [Qemu-devel] [PATCH 30/57] target-i386: use CCPrepare to generate conditional jumps, Richard Henderson, 2013/02/19
- [Qemu-devel] [PATCH 44/57] target-i386: Decode the VEX prefixes, Richard Henderson, 2013/02/19
- [Qemu-devel] [PATCH 40/57] target-i386: Don't reference ENV through most of cc helpers, Richard Henderson, 2013/02/19
- [Qemu-devel] [PATCH 43/57] target-i386: Tidy prefix parsing, Richard Henderson, 2013/02/19
- [Qemu-devel] [PATCH 08/57] target-i386: move eflags computation closer to gen_op_set_cc_op, Richard Henderson, 2013/02/19
- [Qemu-devel] [PATCH 48/57] target-i386: Implement BLSR, BLSMSK, BLSI, Richard Henderson, 2013/02/19
- [Qemu-devel] [PATCH 45/57] target-i386: Implement MOVBE,
Richard Henderson <=
- [Qemu-devel] [PATCH 32/57] target-i386: cleanup temporary macros for CCPrepare, Richard Henderson, 2013/02/19
- [Qemu-devel] [PATCH 47/57] target-i386: Implement BEXTR, Richard Henderson, 2013/02/19
- [Qemu-devel] [PATCH 27/57] target-i386: optimize setcc instructions, Richard Henderson, 2013/02/19
- [Qemu-devel] [PATCH 19/57] target-i386: no need to flush out cc_op before gen_eob, Richard Henderson, 2013/02/19
- [Qemu-devel] [PATCH 35/57] target-i386: kill cpu_T3, Richard Henderson, 2013/02/19
- [Qemu-devel] [PATCH 50/57] target-i386: Implement MULX, Richard Henderson, 2013/02/19
- [Qemu-devel] [PATCH 09/57] target-i386: compute eflags outside rcl/rcr helper, Richard Henderson, 2013/02/19
- [Qemu-devel] [PATCH 06/57] target-i386: drop cc_op argument of gen_jcc1, Richard Henderson, 2013/02/19
- [Qemu-devel] [PATCH 46/57] target-i386: Implement ANDN, Richard Henderson, 2013/02/19
- [Qemu-devel] [PATCH 54/57] target-i386: Implement ADX extension, Richard Henderson, 2013/02/19