[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH for-4.1 v3 09/17] tcg/i386: Implement tcg_out_dupm_v
From: |
Richard Henderson |
Subject: |
[Qemu-devel] [PATCH for-4.1 v3 09/17] tcg/i386: Implement tcg_out_dupm_vec |
Date: |
Tue, 19 Mar 2019 10:21:18 -0700 |
At the same time, improve tcg_out_dupi_vec wrt broadcast
from the constant pool.
Signed-off-by: Richard Henderson <address@hidden>
---
tcg/i386/tcg-target.inc.c | 57 +++++++++++++++++++++++++++++----------
1 file changed, 43 insertions(+), 14 deletions(-)
diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
index ae88df440a..2b88f2054e 100644
--- a/tcg/i386/tcg-target.inc.c
+++ b/tcg/i386/tcg-target.inc.c
@@ -358,7 +358,6 @@ static inline int tcg_target_const_match(tcg_target_long
val, TCGType type,
#define OPC_MOVBE_MyGy (0xf1 | P_EXT38)
#define OPC_MOVD_VyEy (0x6e | P_EXT | P_DATA16)
#define OPC_MOVD_EyVy (0x7e | P_EXT | P_DATA16)
-#define OPC_MOVDDUP (0x12 | P_EXT | P_SIMDF2)
#define OPC_MOVDQA_VxWx (0x6f | P_EXT | P_DATA16)
#define OPC_MOVDQA_WxVx (0x7f | P_EXT | P_DATA16)
#define OPC_MOVDQU_VxWx (0x6f | P_EXT | P_SIMDF3)
@@ -457,6 +456,10 @@ static inline int tcg_target_const_match(tcg_target_long
val, TCGType type,
#define OPC_UD2 (0x0b | P_EXT)
#define OPC_VPBLENDD (0x02 | P_EXT3A | P_DATA16)
#define OPC_VPBLENDVB (0x4c | P_EXT3A | P_DATA16)
+#define OPC_VPINSRB (0x20 | P_EXT3A | P_DATA16)
+#define OPC_VPINSRW (0xc4 | P_EXT | P_DATA16)
+#define OPC_VBROADCASTSS (0x18 | P_EXT38 | P_DATA16)
+#define OPC_VBROADCASTSD (0x19 | P_EXT38 | P_DATA16)
#define OPC_VPBROADCASTB (0x78 | P_EXT38 | P_DATA16)
#define OPC_VPBROADCASTW (0x79 | P_EXT38 | P_DATA16)
#define OPC_VPBROADCASTD (0x58 | P_EXT38 | P_DATA16)
@@ -854,16 +857,17 @@ static bool tcg_out_mov(TCGContext *s, TCGType type,
TCGReg ret, TCGReg arg)
return true;
}
+static const int avx2_dup_insn[4] = {
+ OPC_VPBROADCASTB, OPC_VPBROADCASTW,
+ OPC_VPBROADCASTD, OPC_VPBROADCASTQ,
+};
+
static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg r, TCGReg a)
{
if (have_avx2) {
- static const int dup_insn[4] = {
- OPC_VPBROADCASTB, OPC_VPBROADCASTW,
- OPC_VPBROADCASTD, OPC_VPBROADCASTQ,
- };
int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0);
- tcg_out_vex_modrm(s, dup_insn[vece] + vex_l, r, 0, a);
+ tcg_out_vex_modrm(s, avx2_dup_insn[vece] + vex_l, r, 0, a);
} else {
switch (vece) {
case MO_8:
@@ -893,10 +897,35 @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type,
unsigned vece,
static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg r, TCGReg base, intptr_t offset)
{
- return false;
+ if (have_avx2) {
+ int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0);
+ tcg_out_vex_modrm_offset(s, avx2_dup_insn[vece] + vex_l,
+ r, 0, base, offset);
+ } else {
+ switch (vece) {
+ case MO_64:
+ tcg_out_vex_modrm_offset(s, OPC_VBROADCASTSD, r, 0, base, offset);
+ break;
+ case MO_32:
+ tcg_out_vex_modrm_offset(s, OPC_VBROADCASTSS, r, 0, base, offset);
+ break;
+ case MO_16:
+ tcg_out_vex_modrm_offset(s, OPC_VPINSRW, r, r, base, offset);
+ tcg_out8(s, 0); /* imm8 */
+ tcg_out_dup_vec(s, type, vece, r, r);
+ break;
+ case MO_8:
+ tcg_out_vex_modrm_offset(s, OPC_VPINSRB, r, r, base, offset);
+ tcg_out8(s, 0); /* imm8 */
+ tcg_out_dup_vec(s, type, vece, r, r);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ }
+ return true;
}
-
static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
TCGReg ret, tcg_target_long arg)
{
@@ -917,16 +946,16 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
} else if (have_avx2) {
tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTQ + vex_l, ret);
} else {
- tcg_out_vex_modrm_pool(s, OPC_MOVDDUP, ret);
+ tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSD, ret);
}
new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4);
- } else if (have_avx2) {
- tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTD + vex_l, ret);
- new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0);
} else {
- tcg_out_vex_modrm_pool(s, OPC_MOVD_VyEy, ret);
+ if (have_avx2) {
+ tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSD + vex_l, ret);
+ } else {
+ tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSS, ret);
+ }
new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0);
- tcg_out_dup_vec(s, type, MO_32, ret, ret);
}
}
--
2.17.2
- [Qemu-devel] [PATCH for-4.1 v3 00/17] tcg/ppc: Add vector opcodes, Richard Henderson, 2019/03/19
- [Qemu-devel] [PATCH for-4.1 v3 01/17] target/arm: Fill in .opc for cmtst_op, Richard Henderson, 2019/03/19
- [Qemu-devel] [PATCH for-4.1 v3 02/17] tcg: Assert fixed_reg is read-only, Richard Henderson, 2019/03/19
- [Qemu-devel] [PATCH for-4.1 v3 04/17] tcg: Support cross-class moves without instruction support, Richard Henderson, 2019/03/19
- [Qemu-devel] [PATCH for-4.1 v3 03/17] tcg: Return bool success from tcg_out_mov, Richard Henderson, 2019/03/19
- [Qemu-devel] [PATCH for-4.1 v3 05/17] tcg: Allow add_vec, sub_vec, neg_vec, not_vec to be expanded, Richard Henderson, 2019/03/19
- [Qemu-devel] [PATCH for-4.1 v3 06/17] tcg: Promote tcg_out_{dup, dupi}_vec to backend interface, Richard Henderson, 2019/03/19
- [Qemu-devel] [PATCH for-4.1 v3 08/17] tcg: Add tcg_out_dupm_vec to the backend interface, Richard Henderson, 2019/03/19
- [Qemu-devel] [PATCH for-4.1 v3 09/17] tcg/i386: Implement tcg_out_dupm_vec,
Richard Henderson <=
- [Qemu-devel] [PATCH for-4.1 v3 07/17] tcg: Manually expand INDEX_op_dup_vec, Richard Henderson, 2019/03/19
- [Qemu-devel] [PATCH for-4.1 v3 10/17] tcg/aarch64: Implement tcg_out_dupm_vec, Richard Henderson, 2019/03/19
- [Qemu-devel] [PATCH for-4.1 v3 11/17] tcg: Add INDEX_op_dup_mem_vec, Richard Henderson, 2019/03/19
- [Qemu-devel] [PATCH for-4.1 v3 13/17] tcg/ppc: Support vector shift by immediate, Richard Henderson, 2019/03/19
- [Qemu-devel] [PATCH for-4.1 v3 14/17] tcg/ppc: Support vector multiply, Richard Henderson, 2019/03/19
- [Qemu-devel] [PATCH for-4.1 v3 15/17] tcg/ppc: Update vector support to v2.06, Richard Henderson, 2019/03/19
- [Qemu-devel] [PATCH for-4.1 v3 16/17] tcg/ppc: Update vector support to v2.07, Richard Henderson, 2019/03/19
- [Qemu-devel] [PATCH for-4.1 v3 12/17] tcg/ppc: Initial backend support for Altivec, Richard Henderson, 2019/03/19
- [Qemu-devel] [PATCH for-4.1 v3 17/17] tcg/ppc: Update vector support to v3.00, Richard Henderson, 2019/03/19
- Re: [Qemu-devel] [PATCH for-4.1 v3 00/17] tcg/ppc: Add vector opcodes, no-reply, 2019/03/19