[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v2 16/18] target/arm: Implement MVE VCVT between single and half
From: |
Peter Maydell |
Subject: |
[PATCH v2 16/18] target/arm: Implement MVE VCVT between single and half precision |
Date: |
Thu, 26 Aug 2021 14:17:23 +0100 |
Implement the MVE VCVT instruction which converts between single
and half precision floating point.
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
v1->v2: make do_vcvt_sh/do_vcvt_hs functions, not macros
---
target/arm/helper-mve.h | 5 +++
target/arm/mve.decode | 8 ++++
target/arm/mve_helper.c | 81 ++++++++++++++++++++++++++++++++++++++
target/arm/translate-mve.c | 14 +++++++
4 files changed, 108 insertions(+)
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
index 6d4052a5269..f6345c7abbe 100644
--- a/target/arm/helper-mve.h
+++ b/target/arm/helper-mve.h
@@ -182,6 +182,11 @@ DEF_HELPER_FLAGS_4(mve_vcvt_rm_uh, TCG_CALL_NO_WG, void,
env, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(mve_vcvt_rm_ss, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(mve_vcvt_rm_us, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vcvtb_sh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcvtt_sh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcvtb_hs, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcvtt_hs, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
DEF_HELPER_FLAGS_3(mve_vmovnbb, TCG_CALL_NO_WG, void, env, ptr, ptr)
DEF_HELPER_FLAGS_3(mve_vmovnbh, TCG_CALL_NO_WG, void, env, ptr, ptr)
DEF_HELPER_FLAGS_3(mve_vmovntb, TCG_CALL_NO_WG, void, env, ptr, ptr)
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
index 410ea746fcf..32de4af3170 100644
--- a/target/arm/mve.decode
+++ b/target/arm/mve.decode
@@ -221,6 +221,8 @@ VMUL 1110 1111 0 . .. ... 0 ... 0 1001 . 1 . 1
... 0 @2op
# The VSHLL T2 encoding is not a @2op pattern, but is here because it
# overlaps what would be size=0b11 VMULH/VRMULH
{
+ VCVTB_SH 111 0 1110 0 . 11 1111 ... 0 1110 0 0 . 0 ... 1 @1op_nosz
+
VMAXNMA 111 0 1110 0 . 11 1111 ... 0 1110 1 0 . 0 ... 1 @vmaxnma
size=2
VSHLL_BS 111 0 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1
@2_shll_esize_b
@@ -235,6 +237,8 @@ VMUL 1110 1111 0 . .. ... 0 ... 0 1001 . 1 . 1
... 0 @2op
}
{
+ VCVTB_HS 111 1 1110 0 . 11 1111 ... 0 1110 0 0 . 0 ... 1 @1op_nosz
+
VMAXNMA 111 1 1110 0 . 11 1111 ... 0 1110 1 0 . 0 ... 1 @vmaxnma
size=1
VSHLL_BU 111 1 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1
@2_shll_esize_b
@@ -247,6 +251,8 @@ VMUL 1110 1111 0 . .. ... 0 ... 0 1001 . 1 . 1
... 0 @2op
}
{
+ VCVTT_SH 111 0 1110 0 . 11 1111 ... 1 1110 0 0 . 0 ... 1 @1op_nosz
+
VMINNMA 111 0 1110 0 . 11 1111 ... 1 1110 1 0 . 0 ... 1 @vmaxnma
size=2
VSHLL_TS 111 0 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1
@2_shll_esize_b
VSHLL_TS 111 0 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1
@2_shll_esize_h
@@ -260,6 +266,8 @@ VMUL 1110 1111 0 . .. ... 0 ... 0 1001 . 1 . 1
... 0 @2op
}
{
+ VCVTT_HS 111 1 1110 0 . 11 1111 ... 1 1110 0 0 . 0 ... 1 @1op_nosz
+
VMINNMA 111 1 1110 0 . 11 1111 ... 1 1110 1 0 . 0 ... 1 @vmaxnma
size=1
VSHLL_TU 111 1 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1
@2_shll_esize_b
VSHLL_TU 111 1 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1
@2_shll_esize_h
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
index a793199fbee..1ed76ac5ed8 100644
--- a/target/arm/mve_helper.c
+++ b/target/arm/mve_helper.c
@@ -3332,3 +3332,84 @@ DO_VCVT_RMODE(vcvt_rm_sh, 2, uint16_t, helper_vfp_toshh)
DO_VCVT_RMODE(vcvt_rm_uh, 2, uint16_t, helper_vfp_touhh)
DO_VCVT_RMODE(vcvt_rm_ss, 4, uint32_t, helper_vfp_tosls)
DO_VCVT_RMODE(vcvt_rm_us, 4, uint32_t, helper_vfp_touls)
+
+/*
+ * VCVT between halfprec and singleprec. As usual for halfprec
+ * conversions, FZ16 is ignored and AHP is observed.
+ */
+static void do_vcvt_sh(CPUARMState *env, void *vd, void *vm, int top)
+{
+ uint16_t *d = vd;
+ uint32_t *m = vm;
+ uint16_t r;
+ uint16_t mask = mve_element_mask(env);
+ bool ieee = !(env->vfp.xregs[ARM_VFP_FPSCR] & FPCR_AHP);
+ unsigned e;
+ float_status *fpst;
+ float_status scratch_fpst;
+ float_status *base_fpst = &env->vfp.standard_fp_status;
+ bool old_fz = get_flush_to_zero(base_fpst);
+ set_flush_to_zero(false, base_fpst);
+ for (e = 0; e < 16 / 4; e++, mask >>= 4) {
+ if ((mask & MAKE_64BIT_MASK(0, 4)) == 0) {
+ continue;
+ }
+ fpst = base_fpst;
+ if (!(mask & 1)) {
+ /* We need the result but without updating flags */
+ scratch_fpst = *fpst;
+ fpst = &scratch_fpst;
+ }
+ r = float32_to_float16(m[H4(e)], ieee, fpst);
+ mergemask(&d[H2(e * 2 + top)], r, mask >> (top * 2));
+ }
+ set_flush_to_zero(old_fz, base_fpst);
+ mve_advance_vpt(env);
+}
+
+static void do_vcvt_hs(CPUARMState *env, void *vd, void *vm, int top)
+{
+ uint32_t *d = vd;
+ uint16_t *m = vm;
+ uint32_t r;
+ uint16_t mask = mve_element_mask(env);
+ bool ieee = !(env->vfp.xregs[ARM_VFP_FPSCR] & FPCR_AHP);
+ unsigned e;
+ float_status *fpst;
+ float_status scratch_fpst;
+ float_status *base_fpst = &env->vfp.standard_fp_status;
+ bool old_fiz = get_flush_inputs_to_zero(base_fpst);
+ set_flush_inputs_to_zero(false, base_fpst);
+ for (e = 0; e < 16 / 4; e++, mask >>= 4) {
+ if ((mask & MAKE_64BIT_MASK(0, 4)) == 0) {
+ continue;
+ }
+ fpst = base_fpst;
+ if (!(mask & (1 << (top * 2)))) {
+ /* We need the result but without updating flags */
+ scratch_fpst = *fpst;
+ fpst = &scratch_fpst;
+ }
+ r = float16_to_float32(m[H2(e * 2 + top)], ieee, fpst);
+ mergemask(&d[H4(e)], r, mask);
+ }
+ set_flush_inputs_to_zero(old_fiz, base_fpst);
+ mve_advance_vpt(env);
+}
+
+void HELPER(mve_vcvtb_sh)(CPUARMState *env, void *vd, void *vm)
+{
+ do_vcvt_sh(env, vd, vm, 0);
+}
+void HELPER(mve_vcvtt_sh)(CPUARMState *env, void *vd, void *vm)
+{
+ do_vcvt_sh(env, vd, vm, 1);
+}
+void HELPER(mve_vcvtb_hs)(CPUARMState *env, void *vd, void *vm)
+{
+ do_vcvt_hs(env, vd, vm, 0);
+}
+void HELPER(mve_vcvtt_hs)(CPUARMState *env, void *vd, void *vm)
+{
+ do_vcvt_hs(env, vd, vm, 1);
+}
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
index e80a55eb62e..194ef99cc74 100644
--- a/target/arm/translate-mve.c
+++ b/target/arm/translate-mve.c
@@ -627,6 +627,20 @@ DO_VCVT_RMODE(VCVTPU, FPROUNDING_POSINF, true)
DO_VCVT_RMODE(VCVTMS, FPROUNDING_NEGINF, false)
DO_VCVT_RMODE(VCVTMU, FPROUNDING_NEGINF, true)
+#define DO_VCVT_SH(INSN, FN) \
+ static bool trans_##INSN(DisasContext *s, arg_1op *a) \
+ { \
+ if (!dc_isar_feature(aa32_mve_fp, s)) { \
+ return false; \
+ } \
+ return do_1op(s, a, gen_helper_mve_##FN); \
+ } \
+
+DO_VCVT_SH(VCVTB_SH, vcvtb_sh)
+DO_VCVT_SH(VCVTT_SH, vcvtt_sh)
+DO_VCVT_SH(VCVTB_HS, vcvtb_hs)
+DO_VCVT_SH(VCVTT_HS, vcvtt_hs)
+
/* Narrowing moves: only size 0 and 1 are valid */
#define DO_VMOVN(INSN, FN) \
static bool trans_##INSN(DisasContext *s, arg_1op *a) \
--
2.20.1
- [PATCH v2 13/18] target/arm: Implement MVE VCVT between floating and fixed point, (continued)
- [PATCH v2 13/18] target/arm: Implement MVE VCVT between floating and fixed point, Peter Maydell, 2021/08/26
- [PATCH v2 08/18] target/arm: Implement MVE fp-with-scalar VFMA, VFMAS, Peter Maydell, 2021/08/26
- [PATCH v2 06/18] target/arm: Implement MVE VMAXNMA and VMINNMA, Peter Maydell, 2021/08/26
- [PATCH v2 12/18] target/arm: Implement MVE fp scalar comparisons, Peter Maydell, 2021/08/26
- [PATCH v2 15/18] target/arm: Implement MVE VCVT with specified rounding mode, Peter Maydell, 2021/08/26
- [PATCH v2 17/18] target/arm: Implement MVE VRINT insns, Peter Maydell, 2021/08/26
- [PATCH v2 10/18] target/arm: Implement MVE FP max/min across vector, Peter Maydell, 2021/08/26
- [PATCH v2 09/18] softfloat: Remove assertion preventing silencing of NaN in default-NaN mode, Peter Maydell, 2021/08/26
- [PATCH v2 14/18] target/arm: Implement MVE VCVT between fp and integer, Peter Maydell, 2021/08/26
- [PATCH v2 16/18] target/arm: Implement MVE VCVT between single and half precision,
Peter Maydell <=
- [PATCH v2 18/18] target/arm: Enable MVE in Cortex-M55, Peter Maydell, 2021/08/26