[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PULL 40/47] target/arm: Implement fp16 for Neon VRINTX
From: |
Peter Maydell |
Subject: |
[PULL 40/47] target/arm: Implement fp16 for Neon VRINTX |
Date: |
Tue, 1 Sep 2020 16:18:16 +0100 |
Convert the Neon VRINTX insn to use gvec, and use this to implement
fp16 support for it.
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20200828183354.27913-42-peter.maydell@linaro.org
---
target/arm/helper.h | 3 +++
target/arm/vec_helper.c | 3 +++
target/arm/translate-neon.c.inc | 45 +++------------------------------
3 files changed, 9 insertions(+), 42 deletions(-)
diff --git a/target/arm/helper.h b/target/arm/helper.h
index 83f7804dfe9..cbdbf824d8d 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -640,6 +640,9 @@ DEF_HELPER_FLAGS_4(gvec_vcvt_rm_uh, TCG_CALL_NO_RWG, void,
ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_vrint_rm_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_vrint_rm_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vrintx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vrintx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_4(gvec_frecpe_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_frecpe_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_frecpe_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
index 7ddf1e791c9..20f153b47a1 100644
--- a/target/arm/vec_helper.c
+++ b/target/arm/vec_helper.c
@@ -750,6 +750,9 @@ DO_2OP(gvec_frsqrte_h, helper_rsqrte_f16, float16)
DO_2OP(gvec_frsqrte_s, helper_rsqrte_f32, float32)
DO_2OP(gvec_frsqrte_d, helper_rsqrte_f64, float64)
+DO_2OP(gvec_vrintx_h, float16_round_to_int, float16)
+DO_2OP(gvec_vrintx_s, float32_round_to_int, float32)
+
DO_2OP(gvec_sitos, helper_vfp_sitos, int32_t)
DO_2OP(gvec_uitos, helper_vfp_uitos, uint32_t)
DO_2OP(gvec_tosizs, helper_vfp_tosizs, float32)
diff --git a/target/arm/translate-neon.c.inc b/target/arm/translate-neon.c.inc
index 77a85d468fb..5726afe4d72 100644
--- a/target/arm/translate-neon.c.inc
+++ b/target/arm/translate-neon.c.inc
@@ -3679,47 +3679,6 @@ static bool trans_VQNEG(DisasContext *s, arg_2misc *a)
return do_2misc(s, a, fn[a->size]);
}
-static bool do_2misc_fp(DisasContext *s, arg_2misc *a,
- NeonGenOneSingleOpFn *fn)
-{
- int pass;
- TCGv_ptr fpst;
-
- /* Handle a 2-reg-misc operation by iterating 32 bits at a time */
- if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
- return false;
- }
-
- /* UNDEF accesses to D16-D31 if they don't exist. */
- if (!dc_isar_feature(aa32_simd_r32, s) &&
- ((a->vd | a->vm) & 0x10)) {
- return false;
- }
-
- if (a->size != 2) {
- /* TODO: FP16 will be the size == 1 case */
- return false;
- }
-
- if ((a->vd | a->vm) & a->q) {
- return false;
- }
-
- if (!vfp_access_check(s)) {
- return true;
- }
-
- fpst = fpstatus_ptr(FPST_STD);
- for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
- TCGv_i32 tmp = neon_load_reg(a->vm, pass);
- fn(tmp, tmp, fpst);
- neon_store_reg(a->vd, pass, tmp);
- }
- tcg_temp_free_ptr(fpst);
-
- return true;
-}
-
#define DO_2MISC_FP_VEC(INSN, HFUNC, SFUNC) \
static void gen_##INSN(unsigned vece, uint32_t rd_ofs, \
uint32_t rm_ofs, \
@@ -3758,12 +3717,14 @@ DO_2MISC_FP_VEC(VCVT_FU, gen_helper_gvec_ustoh,
gen_helper_gvec_uitos)
DO_2MISC_FP_VEC(VCVT_SF, gen_helper_gvec_tosszh, gen_helper_gvec_tosizs)
DO_2MISC_FP_VEC(VCVT_UF, gen_helper_gvec_touszh, gen_helper_gvec_touizs)
+DO_2MISC_FP_VEC(VRINTX_impl, gen_helper_gvec_vrintx_h,
gen_helper_gvec_vrintx_s)
+
static bool trans_VRINTX(DisasContext *s, arg_2misc *a)
{
if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
return false;
}
- return do_2misc_fp(s, a, gen_helper_rints_exact);
+ return trans_VRINTX_impl(s, a);
}
#define DO_VEC_RMODE(INSN, RMODE, OP) \
--
2.20.1
- [PULL 30/47] target/arm: Implement fp16 for Neon VFMA, VMFS, (continued)
- [PULL 30/47] target/arm: Implement fp16 for Neon VFMA, VMFS, Peter Maydell, 2020/09/01
- [PULL 26/47] target/arm: Implement fp16 for VACGE, VACGT, Peter Maydell, 2020/09/01
- [PULL 31/47] target/arm: Implement fp16 for Neon fp compare-vs-0, Peter Maydell, 2020/09/01
- [PULL 33/47] target/arm: Implement fp16 for Neon VRSQRTS, Peter Maydell, 2020/09/01
- [PULL 34/47] target/arm: Implement fp16 for Neon pairwise fp ops, Peter Maydell, 2020/09/01
- [PULL 35/47] target/arm: Implement fp16 for Neon float-integer VCVT, Peter Maydell, 2020/09/01
- [PULL 32/47] target/arm: Implement fp16 for Neon VRECPS, Peter Maydell, 2020/09/01
- [PULL 36/47] target/arm: Convert Neon VCVT fixed-point to gvec, Peter Maydell, 2020/09/01
- [PULL 37/47] target/arm: Implement fp16 for Neon VCVT fixed-point, Peter Maydell, 2020/09/01
- [PULL 39/47] target/arm: Implement fp16 for Neon VRINT-with-specified-rounding-mode, Peter Maydell, 2020/09/01
- [PULL 40/47] target/arm: Implement fp16 for Neon VRINTX,
Peter Maydell <=
- [PULL 38/47] target/arm: Implement fp16 for Neon VCVT with rounding modes, Peter Maydell, 2020/09/01
- [PULL 41/47] target/arm/vec_helper: Handle oprsz less than 16 bytes in indexed operations, Peter Maydell, 2020/09/01
- [PULL 42/47] target/arm/vec_helper: Add gvec fp indexed multiply-and-add operations, Peter Maydell, 2020/09/01
- [PULL 43/47] target/arm: Implement fp16 for Neon VMUL, VMLA, VMLS, Peter Maydell, 2020/09/01
- [PULL 44/47] target/arm: Enable FP16 in '-cpu max', Peter Maydell, 2020/09/01
- [PULL 45/47] hw/arm/sbsa-ref: add "reg" property to DT cpu nodes, Peter Maydell, 2020/09/01
- [PULL 47/47] hw/arm/sbsa-ref : Add embedded controller in secure memory, Peter Maydell, 2020/09/01
- [PULL 46/47] hw/misc/sbsa_ec : Add an embedded controller for sbsa-ref, Peter Maydell, 2020/09/01
- Re: [PULL 00/47] target-arm queue, Peter Maydell, 2020/09/01
- Re: [PULL 00/47] target-arm queue, no-reply, 2020/09/02