[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH 8/9] target/arm: Implement FP data-processing (2
From: |
Alex Bennée |
Subject: |
Re: [Qemu-devel] [PATCH 8/9] target/arm: Implement FP data-processing (2 source) for fp16 |
Date: |
Tue, 01 May 2018 12:13:38 +0100 |
User-agent: |
mu4e 1.1.0; emacs 26.1 |
Richard Henderson <address@hidden> writes:
> We missed all of the scalar fp16 binary operations.
>
> Signed-off-by: Richard Henderson <address@hidden>
> ---
> target/arm/translate-a64.c | 73
> ++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 73 insertions(+)
>
> diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
> index 794ede7222..11b90b7eb0 100644
> --- a/target/arm/translate-a64.c
> +++ b/target/arm/translate-a64.c
> @@ -532,6 +532,14 @@ static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
> return v;
> }
>
> +static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
> +{
> + TCGv_i32 v = tcg_temp_new_i32();
> +
> + tcg_gen_ld16u_i32(v, cpu_env, fp_reg_offset(s, reg, MO_16));
> + return v;
> +}
> +
> /* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
> * If SVE is not enabled, then there are only 128 bits in the vector.
> */
> @@ -4968,6 +4976,61 @@ static void handle_fp_2src_double(DisasContext *s, int
> opcode,
> tcg_temp_free_i64(tcg_res);
> }
>
> +/* Floating-point data-processing (2 source) - half precision */
> +static void handle_fp_2src_half(DisasContext *s, int opcode,
> + int rd, int rn, int rm)
> +{
> + TCGv_i32 tcg_op1;
> + TCGv_i32 tcg_op2;
> + TCGv_i32 tcg_res;
> + TCGv_ptr fpst;
> +
> + tcg_res = tcg_temp_new_i32();
> + fpst = get_fpstatus_ptr(true);
> + tcg_op1 = read_fp_hreg(s, rn);
> + tcg_op2 = read_fp_hreg(s, rm);
> +
> + switch (opcode) {
> + case 0x0: /* FMUL */
> + gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
> + break;
> + case 0x1: /* FDIV */
> + gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
> + break;
> + case 0x2: /* FADD */
> + gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
> + break;
> + case 0x3: /* FSUB */
> + gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
> + break;
> + case 0x4: /* FMAX */
> + gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
> + break;
> + case 0x5: /* FMIN */
> + gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
> + break;
> + case 0x6: /* FMAXNM */
> + gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
> + break;
> + case 0x7: /* FMINNM */
> + gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
> + break;
> + case 0x8: /* FNMUL */
> + gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
> + tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000);
> + break;
> + default:
> + g_assert_not_reached();
> + }
> +
> + write_fp_sreg(s, rd, tcg_res);
If we are going to the trouble of adding a read_fp_hreg() we might as
well do the same for the write case. Then we can convert the various:
read_vec_element_i32(s, tcg_vm, rm, 0, MO_16);
that we used before.
> +
> + tcg_temp_free_ptr(fpst);
> + tcg_temp_free_i32(tcg_op1);
> + tcg_temp_free_i32(tcg_op2);
> + tcg_temp_free_i32(tcg_res);
> +}
> +
> /* Floating point data-processing (2 source)
> * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
> * +---+---+---+-----------+------+---+------+--------+-----+------+------+
> @@ -5000,6 +5063,16 @@ static void disas_fp_2src(DisasContext *s, uint32_t
> insn)
> }
> handle_fp_2src_double(s, opcode, rd, rn, rm);
> break;
> + case 3:
> + if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
> + unallocated_encoding(s);
> + return;
> + }
> + if (!fp_access_check(s)) {
> + return;
> + }
> + handle_fp_2src_half(s, opcode, rd, rn, rm);
> + break;
> default:
> unallocated_encoding(s);
> }
Otherwise:
Reviewed-by: Alex Bennée <address@hidden>
--
Alex Bennée
- Re: [Qemu-devel] [PATCH 8/9] target/arm: Implement FP data-processing (2 source) for fp16,
Alex Bennée <=