[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-arm] [PATCH v1 06/12] target/arm: Decode aa32 armv8.1 three sa
From: |
Alex Bennée |
Subject: |
Re: [Qemu-arm] [PATCH v1 06/12] target/arm: Decode aa32 armv8.1 three same |
Date: |
Mon, 13 Nov 2017 16:55:06 +0000 |
User-agent: |
mu4e 1.0-alpha2; emacs 26.0.90 |
Richard Henderson <address@hidden> writes:
> Signed-off-by: Richard Henderson <address@hidden>
> ---
> target/arm/translate.c | 83
> ++++++++++++++++++++++++++++++++++++++------------
> 1 file changed, 64 insertions(+), 19 deletions(-)
>
> diff --git a/target/arm/translate.c b/target/arm/translate.c
> index ab1a12a1b8..0cd58710b3 100644
> --- a/target/arm/translate.c
> +++ b/target/arm/translate.c
> @@ -25,6 +25,7 @@
> #include "disas/disas.h"
> #include "exec/exec-all.h"
> #include "tcg-op.h"
> +#include "tcg-op-gvec.h"
> #include "qemu/log.h"
> #include "qemu/bitops.h"
> #include "arm_ldst.h"
> @@ -5334,9 +5335,9 @@ static void gen_neon_narrow_op(int op, int u, int size,
> #define NEON_3R_VPMAX 20
> #define NEON_3R_VPMIN 21
> #define NEON_3R_VQDMULH_VQRDMULH 22
> -#define NEON_3R_VPADD 23
> +#define NEON_3R_VPADD_VQRDMLAH 23
> #define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */
> -#define NEON_3R_VFM 25 /* VFMA, VFMS : float fused multiply-add */
> +#define NEON_3R_VFM_VQRDMLSH 25 /* VFMA, VFMS : float fused multiply-add */
> #define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
> #define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
> #define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
> @@ -5368,9 +5369,9 @@ static const uint8_t neon_3r_sizes[] = {
> [NEON_3R_VPMAX] = 0x7,
> [NEON_3R_VPMIN] = 0x7,
> [NEON_3R_VQDMULH_VQRDMULH] = 0x6,
> - [NEON_3R_VPADD] = 0x7,
> + [NEON_3R_VPADD_VQRDMLAH] = 0x7,
> [NEON_3R_SHA] = 0xf, /* size field encodes op type */
> - [NEON_3R_VFM] = 0x5, /* size bit 1 encodes op */
> + [NEON_3R_VFM_VQRDMLSH] = 0x7, /* For VFM, size bit 1 encodes op */
> [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
> [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
> [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
> @@ -5556,6 +5557,7 @@ static const uint8_t neon_2rm_sizes[] = {
>
> static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
> {
> + void (*fn_gvec_ptr)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
> int op;
> int q;
> int rd, rn, rm;
> @@ -5600,12 +5602,12 @@ static int disas_neon_data_insn(DisasContext *s,
> uint32_t insn)
> if (q && ((rd | rn | rm) & 1)) {
> return 1;
> }
> - /*
> - * The SHA-1/SHA-256 3-register instructions require special
> treatment
> - * here, as their size field is overloaded as an op type selector,
> and
> - * they all consume their input in a single pass.
> - */
> - if (op == NEON_3R_SHA) {
> + switch (op) {
> + case NEON_3R_SHA:
> + /* The SHA-1/SHA-256 3-register instructions require special
> + * treatment here, as their size field is overloaded as an
> + * op type selector, and they all consume their input in a
> + * single pass. */
> if (!q) {
> return 1;
> }
> @@ -5642,6 +5644,53 @@ static int disas_neon_data_insn(DisasContext *s,
> uint32_t insn)
> tcg_temp_free_i32(tmp2);
> tcg_temp_free_i32(tmp3);
> return 0;
> +
> + case NEON_3R_VPADD_VQRDMLAH:
> + if (!u) {
> + break; /* VPADD */
> + }
> + /* VQRDMLAH */
> + switch (size) {
> + case 1:
> + fn_gvec_ptr = gen_helper_gvec_qrdmlah_s16;
> + break;
> + case 2:
> + fn_gvec_ptr = gen_helper_gvec_qrdmlah_s32;
> + break;
> + default:
> + return 1;
> + }
> + do_vqrdmlx:
> + if (arm_dc_feature(s, ARM_FEATURE_V8_1_SIMD)) {
> + int opr_sz = (1 + q) * 8;
> + tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
> + vfp_reg_offset(1, rn),
> + vfp_reg_offset(1, rm), cpu_env,
> + opr_sz, opr_sz, 0, fn_gvec_ptr);
> + return 0;
> + }
> + return 1;
> +
> + case NEON_3R_VFM_VQRDMLSH:
> + if (!u) {
> + /* VFM, VFMS */
> + if ((5 & (1 << size)) == 0) {
> + return 1;
> + }
> + break;
> + }
> + /* VQRDMLSH */
> + switch (size) {
> + case 1:
> + fn_gvec_ptr = gen_helper_gvec_qrdmlsh_s16;
> + break;
> + case 2:
> + fn_gvec_ptr = gen_helper_gvec_qrdmlsh_s32;
> + break;
> + default:
> + return 1;
> + }
> + goto do_vqrdmlx;
Could we not take the opportunity to re-factor out the common bit rather
than make this mega function even more byzantine?
> }
> if (size == 3 && op != NEON_3R_LOGIC) {
> /* 64-bit element instructions. */
> @@ -5727,11 +5776,7 @@ static int disas_neon_data_insn(DisasContext *s,
> uint32_t insn)
> rm = rtmp;
> }
> break;
> - case NEON_3R_VPADD:
> - if (u) {
> - return 1;
> - }
> - /* Fall through */
> + case NEON_3R_VPADD_VQRDMLAH:
> case NEON_3R_VPMAX:
> case NEON_3R_VPMIN:
> pairwise = 1;
> @@ -5765,8 +5810,8 @@ static int disas_neon_data_insn(DisasContext *s,
> uint32_t insn)
> return 1;
> }
> break;
> - case NEON_3R_VFM:
> - if (!arm_dc_feature(s, ARM_FEATURE_VFP4) || u) {
> + case NEON_3R_VFM_VQRDMLSH:
> + if (!arm_dc_feature(s, ARM_FEATURE_VFP4)) {
> return 1;
> }
> break;
> @@ -5963,7 +6008,7 @@ static int disas_neon_data_insn(DisasContext *s,
> uint32_t insn)
> }
> }
> break;
> - case NEON_3R_VPADD:
> + case NEON_3R_VPADD_VQRDMLAH:
> switch (size) {
> case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break;
> case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break;
> @@ -6062,7 +6107,7 @@ static int disas_neon_data_insn(DisasContext *s,
> uint32_t insn)
> }
> }
> break;
> - case NEON_3R_VFM:
> + case NEON_3R_VFM_VQRDMLSH:
> {
> /* VFMA, VFMS: fused multiply-add */
> TCGv_ptr fpstatus = get_fpstatus_ptr(1);
--
Alex Bennée
- Re: [Qemu-arm] [PATCH v1 06/12] target/arm: Decode aa32 armv8.1 three same,
Alex Bennée <=