[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH 31/38] target/ppc: Use vector variable shifts for VS
From: |
Richard Henderson |
Subject: |
[Qemu-devel] [PATCH 31/38] target/ppc: Use vector variable shifts for VS{L, R, RA}{B, H, W, D} |
Date: |
Fri, 19 Apr 2019 21:34:35 -1000 |
Signed-off-by: Richard Henderson <address@hidden>
---
target/ppc/helper.h | 24 ++--
target/ppc/int_helper.c | 6 +-
target/ppc/translate/vmx-impl.inc.c | 168 ++++++++++++++++++++++++++--
3 files changed, 172 insertions(+), 26 deletions(-)
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 638a6e99c4..5416dc55ce 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -180,18 +180,18 @@ DEF_HELPER_3(vmuloub, void, avr, avr, avr)
DEF_HELPER_3(vmulouh, void, avr, avr, avr)
DEF_HELPER_3(vmulouw, void, avr, avr, avr)
DEF_HELPER_3(vmuluwm, void, avr, avr, avr)
-DEF_HELPER_3(vsrab, void, avr, avr, avr)
-DEF_HELPER_3(vsrah, void, avr, avr, avr)
-DEF_HELPER_3(vsraw, void, avr, avr, avr)
-DEF_HELPER_3(vsrad, void, avr, avr, avr)
-DEF_HELPER_3(vsrb, void, avr, avr, avr)
-DEF_HELPER_3(vsrh, void, avr, avr, avr)
-DEF_HELPER_3(vsrw, void, avr, avr, avr)
-DEF_HELPER_3(vsrd, void, avr, avr, avr)
-DEF_HELPER_3(vslb, void, avr, avr, avr)
-DEF_HELPER_3(vslh, void, avr, avr, avr)
-DEF_HELPER_3(vslw, void, avr, avr, avr)
-DEF_HELPER_3(vsld, void, avr, avr, avr)
+DEF_HELPER_4(vsrab, void, avr, avr, avr, i32)
+DEF_HELPER_4(vsrah, void, avr, avr, avr, i32)
+DEF_HELPER_4(vsraw, void, avr, avr, avr, i32)
+DEF_HELPER_4(vsrad, void, avr, avr, avr, i32)
+DEF_HELPER_4(vsrb, void, avr, avr, avr, i32)
+DEF_HELPER_4(vsrh, void, avr, avr, avr, i32)
+DEF_HELPER_4(vsrw, void, avr, avr, avr, i32)
+DEF_HELPER_4(vsrd, void, avr, avr, avr, i32)
+DEF_HELPER_4(vslb, void, avr, avr, avr, i32)
+DEF_HELPER_4(vslh, void, avr, avr, avr, i32)
+DEF_HELPER_4(vslw, void, avr, avr, avr, i32)
+DEF_HELPER_4(vsld, void, avr, avr, avr, i32)
DEF_HELPER_3(vslo, void, avr, avr, avr)
DEF_HELPER_3(vsro, void, avr, avr, avr)
DEF_HELPER_3(vsrv, void, avr, avr, avr)
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index 162add561e..35ec1ccdfb 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -1770,7 +1770,8 @@ VSHIFT(r, 0)
#undef VSHIFT
#define VSL(suffix, element, mask) \
- void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
+ void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, \
+ ppc_avr_t *b, uint32_t desc) \
{ \
int i; \
\
@@ -1958,7 +1959,8 @@ VNEG(vnegd, s64)
#undef VNEG
#define VSR(suffix, element, mask) \
- void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
+ void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, \
+ ppc_avr_t *b, uint32_t desc) \
{ \
int i; \
\
diff --git a/target/ppc/translate/vmx-impl.inc.c
b/target/ppc/translate/vmx-impl.inc.c
index c83e605a00..8cc2e99963 100644
--- a/target/ppc/translate/vmx-impl.inc.c
+++ b/target/ppc/translate/vmx-impl.inc.c
@@ -511,6 +511,150 @@ static void gen_vmrgow(DisasContext *ctx)
tcg_temp_free_i64(avr);
}
+static void gen_vsl_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(b);
+ tcg_gen_dupi_vec(vece, t, (8 << vece) - 1);
+ tcg_gen_and_vec(vece, b, b, t);
+ tcg_temp_free_vec(t);
+ tcg_gen_shlv_vec(vece, d, a, b);
+}
+
+static void gen_vslw_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ tcg_gen_andi_i32(b, b, 31);
+ tcg_gen_shl_i32(d, a, b);
+}
+
+static void gen_vsld_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ tcg_gen_andi_i64(b, b, 63);
+ tcg_gen_shl_i64(d, a, b);
+}
+
+static void gen__vsl(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode shlv_list[] = { INDEX_op_shlv_vec, 0 };
+ static const GVecGen3 g[4] = {
+ { .fniv = gen_vsl_vec,
+ .fno = gen_helper_vslb,
+ .opt_opc = shlv_list,
+ .vece = MO_8 },
+ { .fniv = gen_vsl_vec,
+ .fno = gen_helper_vslh,
+ .opt_opc = shlv_list,
+ .vece = MO_16 },
+ { .fni4 = gen_vslw_i32,
+ .fniv = gen_vsl_vec,
+ .fno = gen_helper_vslw,
+ .opt_opc = shlv_list,
+ .vece = MO_32 },
+ { .fni8 = gen_vsld_i64,
+ .fniv = gen_vsl_vec,
+ .fno = gen_helper_vsld,
+ .opt_opc = shlv_list,
+ .vece = MO_64 }
+ };
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
+}
+
+static void gen_vsr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(b);
+ tcg_gen_dupi_vec(vece, t, (8 << vece) - 1);
+ tcg_gen_and_vec(vece, b, b, t);
+ tcg_temp_free_vec(t);
+ tcg_gen_shrv_vec(vece, d, a, b);
+}
+
+static void gen_vsrw_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ tcg_gen_andi_i32(b, b, 31);
+ tcg_gen_shr_i32(d, a, b);
+}
+
+static void gen_vsrd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ tcg_gen_andi_i64(b, b, 63);
+ tcg_gen_shr_i64(d, a, b);
+}
+
+static void gen__vsr(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode shrv_list[] = { INDEX_op_shrv_vec, 0 };
+ static const GVecGen3 g[4] = {
+ { .fniv = gen_vsr_vec,
+ .fno = gen_helper_vsrb,
+ .opt_opc = shrv_list,
+ .vece = MO_8 },
+ { .fniv = gen_vsr_vec,
+ .fno = gen_helper_vsrh,
+ .opt_opc = shrv_list,
+ .vece = MO_16 },
+ { .fni4 = gen_vsrw_i32,
+ .fniv = gen_vsr_vec,
+ .fno = gen_helper_vsrw,
+ .opt_opc = shrv_list,
+ .vece = MO_32 },
+ { .fni8 = gen_vsrd_i64,
+ .fniv = gen_vsr_vec,
+ .fno = gen_helper_vsrd,
+ .opt_opc = shrv_list,
+ .vece = MO_64 }
+ };
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
+}
+
+static void gen_vsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(b);
+ tcg_gen_dupi_vec(vece, t, (8 << vece) - 1);
+ tcg_gen_and_vec(vece, b, b, t);
+ tcg_temp_free_vec(t);
+ tcg_gen_sarv_vec(vece, d, a, b);
+}
+
+static void gen_vsraw_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ tcg_gen_andi_i32(b, b, 31);
+ tcg_gen_sar_i32(d, a, b);
+}
+
+static void gen_vsrad_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ tcg_gen_andi_i64(b, b, 63);
+ tcg_gen_sar_i64(d, a, b);
+}
+
+static void gen__vsra(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode sarv_list[] = { INDEX_op_sarv_vec, 0 };
+ static const GVecGen3 g[4] = {
+ { .fniv = gen_vsra_vec,
+ .fno = gen_helper_vsrab,
+ .opt_opc = sarv_list,
+ .vece = MO_8 },
+ { .fniv = gen_vsra_vec,
+ .fno = gen_helper_vsrah,
+ .opt_opc = sarv_list,
+ .vece = MO_16 },
+ { .fni4 = gen_vsraw_i32,
+ .fniv = gen_vsra_vec,
+ .fno = gen_helper_vsraw,
+ .opt_opc = sarv_list,
+ .vece = MO_32 },
+ { .fni8 = gen_vsrad_i64,
+ .fniv = gen_vsra_vec,
+ .fno = gen_helper_vsrad,
+ .opt_opc = sarv_list,
+ .vece = MO_64 }
+ };
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
+}
+
GEN_VXFORM(vmuloub, 4, 0);
GEN_VXFORM(vmulouh, 4, 1);
GEN_VXFORM(vmulouw, 4, 2);
@@ -526,21 +670,21 @@ GEN_VXFORM(vmuleuw, 4, 10);
GEN_VXFORM(vmulesb, 4, 12);
GEN_VXFORM(vmulesh, 4, 13);
GEN_VXFORM(vmulesw, 4, 14);
-GEN_VXFORM(vslb, 2, 4);
-GEN_VXFORM(vslh, 2, 5);
-GEN_VXFORM(vslw, 2, 6);
+GEN_VXFORM_V(vslb, MO_8, gen__vsl, 2, 4);
+GEN_VXFORM_V(vslh, MO_16, gen__vsl, 2, 5);
+GEN_VXFORM_V(vslw, MO_32, gen__vsl, 2, 6);
+GEN_VXFORM_V(vsld, MO_64, gen__vsl, 2, 23);
GEN_VXFORM(vrlwnm, 2, 6);
GEN_VXFORM_DUAL(vslw, PPC_ALTIVEC, PPC_NONE, \
vrlwnm, PPC_NONE, PPC2_ISA300)
-GEN_VXFORM(vsld, 2, 23);
-GEN_VXFORM(vsrb, 2, 8);
-GEN_VXFORM(vsrh, 2, 9);
-GEN_VXFORM(vsrw, 2, 10);
-GEN_VXFORM(vsrd, 2, 27);
-GEN_VXFORM(vsrab, 2, 12);
-GEN_VXFORM(vsrah, 2, 13);
-GEN_VXFORM(vsraw, 2, 14);
-GEN_VXFORM(vsrad, 2, 15);
+GEN_VXFORM_V(vsrb, MO_8, gen__vsr, 2, 8);
+GEN_VXFORM_V(vsrh, MO_16, gen__vsr, 2, 9);
+GEN_VXFORM_V(vsrw, MO_32, gen__vsr, 2, 10);
+GEN_VXFORM_V(vsrd, MO_64, gen__vsr, 2, 27);
+GEN_VXFORM_V(vsrab, MO_8, gen__vsra, 2, 12);
+GEN_VXFORM_V(vsrah, MO_16, gen__vsra, 2, 13);
+GEN_VXFORM_V(vsraw, MO_32, gen__vsra, 2, 14);
+GEN_VXFORM_V(vsrad, MO_64, gen__vsra, 2, 15);
GEN_VXFORM(vsrv, 2, 28);
GEN_VXFORM(vslv, 2, 29);
GEN_VXFORM(vslo, 6, 16);
--
2.17.1
- [Qemu-devel] [PATCH 18/38] tcg/i386: Support vector scalar shift opcodes, (continued)
- [Qemu-devel] [PATCH 18/38] tcg/i386: Support vector scalar shift opcodes, Richard Henderson, 2019/04/20
- [Qemu-devel] [PATCH 16/38] tcg: Specify optional vector requirements with a list, Richard Henderson, 2019/04/20
- [Qemu-devel] [PATCH 23/38] target/ppc: Use tcg_gen_abs_tl, Richard Henderson, 2019/04/20
- [Qemu-devel] [PATCH 24/38] target/s390x: Use tcg_gen_abs_i64, Richard Henderson, 2019/04/20
- [Qemu-devel] [PATCH 25/38] target/xtensa: Use tcg_gen_abs_i32, Richard Henderson, 2019/04/20
- [Qemu-devel] [PATCH 21/38] target/arm: Use tcg_gen_abs_i64 and tcg_gen_gvec_abs, Richard Henderson, 2019/04/20
- [Qemu-devel] [PATCH 34/38] tcg: Do not recreate INDEX_op_neg_vec unless supported, Richard Henderson, 2019/04/20
- [Qemu-devel] [PATCH 37/38] tcg/aarch64: Use MVNI for expansion of dupi, Richard Henderson, 2019/04/20
- [Qemu-devel] [PATCH 26/38] tcg/i386: Support vector absolute value, Richard Henderson, 2019/04/20
- [Qemu-devel] [PATCH 31/38] target/ppc: Use vector variable shifts for VS{L, R, RA}{B, H, W, D},
Richard Henderson <=
- [Qemu-devel] [PATCH 35/38] tcg: Introduce do_op3_nofail for vector expansion, Richard Henderson, 2019/04/20
- [Qemu-devel] [PATCH 33/38] tcg/aarch64: Do not advertise minmax for MO_64, Richard Henderson, 2019/04/20
- [Qemu-devel] [PATCH 29/38] tcg/i386: Support vector comparison select value, Richard Henderson, 2019/04/20
- [Qemu-devel] [PATCH 38/38] tcg/aarch64: Use ORRI and BICI for vector logical operations, Richard Henderson, 2019/04/20
- [Qemu-devel] [PATCH 28/38] tcg: Add support for vector comparison select, Richard Henderson, 2019/04/20
- [Qemu-devel] [PATCH 22/38] target/cris: Use tcg_gen_abs_tl, Richard Henderson, 2019/04/20
- [Qemu-devel] [PATCH 36/38] tcg: Expand vector minmax using cmp+cmpsel, Richard Henderson, 2019/04/20
- [Qemu-devel] [PATCH 20/38] tcg: Add support for vector absolute value, Richard Henderson, 2019/04/20