[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 20/36] target/arm: Convert Neon 3-reg-same VQRDMLAH/VQRDMLSH to d
From: |
Peter Maydell |
Subject: |
[PATCH 20/36] target/arm: Convert Neon 3-reg-same VQRDMLAH/VQRDMLSH to decodetree |
Date: |
Thu, 30 Apr 2020 19:09:47 +0100 |
Convert the Neon VQRDMLAH and VQRDMLSH insns in the 3-reg-same group
to decodetree. These don't use do_3same() because they want to
operate on VFP double registers, whose offsets are different from the
neon_reg_offset() calculations do_3same does.
Signed-off-by: Peter Maydell <address@hidden>
---
target/arm/translate-neon.inc.c | 57 +++++++++++++++++++++++++++++++++
target/arm/translate.c | 36 ++-------------------
target/arm/neon-dp.decode | 3 ++
3 files changed, 62 insertions(+), 34 deletions(-)
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
index 50b77b6d714..c8beb048fa2 100644
--- a/target/arm/translate-neon.inc.c
+++ b/target/arm/translate-neon.inc.c
@@ -712,3 +712,60 @@ DO_3SAME_GVEC3_NO_SZ_3(VMLS, mls_op)
DO_3SAME_GVEC3_SHIFT(VSHL_S, sshl_op)
DO_3SAME_GVEC3_SHIFT(VSHL_U, ushl_op)
+
+static bool do_vqrdmlah(DisasContext *s, arg_3same *a,
+ gen_helper_gvec_3_ptr *fn)
+{
+ int vec_size = a->q ? 16 : 8;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
+ !dc_isar_feature(aa32_rdm, s)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (!fn) {
+ return false; /* bad size */
+ }
+
+ if ((a->vn | a->vm | a->vd) & a->q) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
+ vfp_reg_offset(1, a->vn),
+ vfp_reg_offset(1, a->vm),
+ cpu_env, vec_size, vec_size, 0, fn);
+ return true;
+}
+
+static bool trans_VQRDMLAH_3s(DisasContext *s, arg_3same *a)
+{
+ static gen_helper_gvec_3_ptr * const fns[] = {
+ NULL,
+ gen_helper_gvec_qrdmlah_s16,
+ gen_helper_gvec_qrdmlah_s32,
+ NULL,
+ };
+ return do_vqrdmlah(s, a, fns[a->size]);
+}
+
+static bool trans_VQRDMLSH_3s(DisasContext *s, arg_3same *a)
+{
+ static gen_helper_gvec_3_ptr * const fns[] = {
+ NULL,
+ gen_helper_gvec_qrdmlsh_s16,
+ gen_helper_gvec_qrdmlsh_s32,
+ NULL,
+ };
+ return do_vqrdmlah(s, a, fns[a->size]);
+}
diff --git a/target/arm/translate.c b/target/arm/translate.c
index ad60b7190f9..adc42362469 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -3629,22 +3629,6 @@ static const uint8_t neon_2rm_sizes[] = {
[NEON_2RM_VCVT_UF] = 0x4,
};
-
-/* Expand v8.1 simd helper. */
-static int do_v81_helper(DisasContext *s, gen_helper_gvec_3_ptr *fn,
- int q, int rd, int rn, int rm)
-{
- if (dc_isar_feature(aa32_rdm, s)) {
- int opr_sz = (1 + q) * 8;
- tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
- vfp_reg_offset(1, rn),
- vfp_reg_offset(1, rm), cpu_env,
- opr_sz, opr_sz, 0, fn);
- return 0;
- }
- return 1;
-}
-
static void gen_ceq0_i32(TCGv_i32 d, TCGv_i32 a)
{
tcg_gen_setcondi_i32(TCG_COND_EQ, d, a, 0);
@@ -4818,15 +4802,7 @@ static int disas_neon_data_insn(DisasContext *s,
uint32_t insn)
if (!u) {
break; /* VPADD */
}
- /* VQRDMLAH */
- switch (size) {
- case 1:
- return do_v81_helper(s, gen_helper_gvec_qrdmlah_s16,
- q, rd, rn, rm);
- case 2:
- return do_v81_helper(s, gen_helper_gvec_qrdmlah_s32,
- q, rd, rn, rm);
- }
+ /* VQRDMLAH : handled by decodetree */
return 1;
case NEON_3R_VFM_VQRDMLSH:
@@ -4837,15 +4813,7 @@ static int disas_neon_data_insn(DisasContext *s,
uint32_t insn)
}
break;
}
- /* VQRDMLSH */
- switch (size) {
- case 1:
- return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s16,
- q, rd, rn, rm);
- case 2:
- return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s32,
- q, rd, rn, rm);
- }
+ /* VQRDMLSH : handled by decodetree */
return 1;
case NEON_3R_VADD_VSUB:
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
index ec3a92fe753..ce0db476c88 100644
--- a/target/arm/neon-dp.decode
+++ b/target/arm/neon-dp.decode
@@ -84,3 +84,6 @@ VMLS_3s 1111 001 1 0 . .. .... .... 1001 . . . 0
.... @3same
VMUL_3s 1111 001 0 0 . .. .... .... 1001 . . . 1 .... @3same
VMUL_p_3s 1111 001 1 0 . .. .... .... 1001 . . . 1 .... @3same
+
+VQRDMLAH_3s 1111 001 1 0 . .. .... .... 1011 ... 1 .... @3same
+VQRDMLSH_3s 1111 001 1 0 . .. .... .... 1100 ... 1 .... @3same
--
2.20.1
- [PATCH 16/36] target/arm: Convert Neon 3-reg-same VMAX/VMIN to decodetree, (continued)
- [PATCH 16/36] target/arm: Convert Neon 3-reg-same VMAX/VMIN to decodetree, Peter Maydell, 2020/04/30
- [PATCH 14/36] target/arm: Convert Neon 3-reg-same VADD/VSUB to decodetree, Peter Maydell, 2020/04/30
- [PATCH 19/36] target/arm: Convert Neon 3-reg-same VMUL, VMLA, VMLS, VSHL to decodetree, Peter Maydell, 2020/04/30
- [PATCH 17/36] target/arm: Convert Neon 3-reg-same comparisons to decodetree, Peter Maydell, 2020/04/30
- [PATCH 18/36] target/arm: Convert Neon 3-reg-same VQADD/VQSUB to decodetree, Peter Maydell, 2020/04/30
- [PATCH 20/36] target/arm: Convert Neon 3-reg-same VQRDMLAH/VQRDMLSH to decodetree,
Peter Maydell <=
- [PATCH 21/36] target/arm: Convert Neon 3-reg-same SHA to decodetree, Peter Maydell, 2020/04/30
- [PATCH 22/36] target/arm: Move gen_ function typedefs to translate.h, Peter Maydell, 2020/04/30
- [PATCH 23/36] target/arm: Convert Neon 64-bit element 3-reg-same insns, Peter Maydell, 2020/04/30
- [PATCH 24/36] target/arm: Convert Neon VHADD 3-reg-same insns, Peter Maydell, 2020/04/30