[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH v2 6/6] target-arm: Use standard FPSCR for Neon
From: |
Aurelien Jarno |
Subject: |
Re: [Qemu-devel] [PATCH v2 6/6] target-arm: Use standard FPSCR for Neon half-precision operations |
Date: |
Wed, 9 Feb 2011 19:43:11 +0100 |
User-agent: |
Mutt/1.5.20 (2009-06-14) |
On Wed, Feb 09, 2011 at 04:27:30PM +0000, Peter Maydell wrote:
> The Neon half-precision conversion operations (VCVT.F16.F32 and
> VCVT.F32.F16) use ARM standard floating-point arithmetic, unlike
> the VFP versions (VCVTB and VCVTT).
>
> Signed-off-by: Peter Maydell <address@hidden>
> ---
> target-arm/helper.c | 26 ++++++++++++++++++++++----
> target-arm/helpers.h | 2 ++
> target-arm/translate.c | 16 ++++++++--------
> 3 files changed, 32 insertions(+), 12 deletions(-)
Reviewed-by: Aurelien Jarno <address@hidden>
> diff --git a/target-arm/helper.c b/target-arm/helper.c
> index 503278c..d36f0f3 100644
> --- a/target-arm/helper.c
> +++ b/target-arm/helper.c
> @@ -2623,9 +2623,8 @@ VFP_CONV_FIX(ul, s, float32, uint32, u)
> #undef VFP_CONV_FIX
>
> /* Half precision conversions. */
> -float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, CPUState *env)
> +static float32 do_fcvt_f16_to_f32(uint32_t a, CPUState *env, float_status *s)
> {
> - float_status *s = &env->vfp.fp_status;
> int ieee = (env->vfp.xregs[ARM_VFP_FPSCR] & (1 << 26)) == 0;
> float32 r = float16_to_float32(a, ieee, s);
> if (ieee) {
> @@ -2634,9 +2633,8 @@ float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a,
> CPUState *env)
> return r;
> }
>
> -uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, CPUState *env)
> +static uint32_t do_fcvt_f32_to_f16(float32 a, CPUState *env, float_status *s)
> {
> - float_status *s = &env->vfp.fp_status;
> int ieee = (env->vfp.xregs[ARM_VFP_FPSCR] & (1 << 26)) == 0;
> float16 r = float32_to_float16(a, ieee, s);
> if (ieee) {
> @@ -2645,6 +2643,26 @@ uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a,
> CPUState *env)
> return r;
> }
>
> +float32 HELPER(neon_fcvt_f16_to_f32)(uint32_t a, CPUState *env)
> +{
> + return do_fcvt_f16_to_f32(a, env, &env->vfp.standard_fp_status);
> +}
> +
> +float32 HELPER(neon_fcvt_f32_to_f16)(uint32_t a, CPUState *env)
> +{
> + return do_fcvt_f32_to_f16(a, env, &env->vfp.standard_fp_status);
> +}
> +
> +float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, CPUState *env)
> +{
> + return do_fcvt_f16_to_f32(a, env, &env->vfp.fp_status);
> +}
> +
> +float32 HELPER(vfp_fcvt_f32_to_f16)(uint32_t a, CPUState *env)
> +{
> + return do_fcvt_f32_to_f16(a, env, &env->vfp.fp_status);
> +}
> +
> float32 HELPER(recps_f32)(float32 a, float32 b, CPUState *env)
> {
> float_status *s = &env->vfp.fp_status;
> diff --git a/target-arm/helpers.h b/target-arm/helpers.h
> index 8a2564e..40264b4 100644
> --- a/target-arm/helpers.h
> +++ b/target-arm/helpers.h
> @@ -129,6 +129,8 @@ DEF_HELPER_3(vfp_ultod, f64, f64, i32, env)
>
> DEF_HELPER_2(vfp_fcvt_f16_to_f32, f32, i32, env)
> DEF_HELPER_2(vfp_fcvt_f32_to_f16, i32, f32, env)
> +DEF_HELPER_2(neon_fcvt_f16_to_f32, f32, i32, env)
> +DEF_HELPER_2(neon_fcvt_f32_to_f16, i32, f32, env)
>
> DEF_HELPER_3(recps_f32, f32, f32, f32, env)
> DEF_HELPER_3(rsqrts_f32, f32, f32, f32, env)
> diff --git a/target-arm/translate.c b/target-arm/translate.c
> index e4649e6..a867f55 100644
> --- a/target-arm/translate.c
> +++ b/target-arm/translate.c
> @@ -5495,17 +5495,17 @@ static int disas_neon_data_insn(CPUState * env,
> DisasContext *s, uint32_t insn)
> tmp = new_tmp();
> tmp2 = new_tmp();
> tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 0));
> - gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
> + gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
> tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 1));
> - gen_helper_vfp_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);
> + gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);
> tcg_gen_shli_i32(tmp2, tmp2, 16);
> tcg_gen_or_i32(tmp2, tmp2, tmp);
> tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 2));
> - gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
> + gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
> tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 3));
> neon_store_reg(rd, 0, tmp2);
> tmp2 = new_tmp();
> - gen_helper_vfp_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);
> + gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);
> tcg_gen_shli_i32(tmp2, tmp2, 16);
> tcg_gen_or_i32(tmp2, tmp2, tmp);
> neon_store_reg(rd, 1, tmp2);
> @@ -5518,17 +5518,17 @@ static int disas_neon_data_insn(CPUState * env,
> DisasContext *s, uint32_t insn)
> tmp = neon_load_reg(rm, 0);
> tmp2 = neon_load_reg(rm, 1);
> tcg_gen_ext16u_i32(tmp3, tmp);
> - gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
> + gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
> tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 0));
> tcg_gen_shri_i32(tmp3, tmp, 16);
> - gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
> + gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
> tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 1));
> dead_tmp(tmp);
> tcg_gen_ext16u_i32(tmp3, tmp2);
> - gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
> + gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
> tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 2));
> tcg_gen_shri_i32(tmp3, tmp2, 16);
> - gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
> + gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
> tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 3));
> dead_tmp(tmp2);
> dead_tmp(tmp3);
> --
> 1.7.1
>
>
>
--
Aurelien Jarno GPG: 1024D/F1BCDB73
address@hidden http://www.aurel32.net
- [Qemu-devel] [PATCH v2 0/6] target-arm: Fix floating point conversions, Peter Maydell, 2011/02/09
- [Qemu-devel] [PATCH v2 2/6] softfloat: Honour default_nan_mode for float-to-float conversions, Peter Maydell, 2011/02/09
- [Qemu-devel] [PATCH v2 4/6] softfloat: Correctly handle NaNs in float16_to_float32(), Peter Maydell, 2011/02/09
- [Qemu-devel] [PATCH v2 6/6] target-arm: Use standard FPSCR for Neon half-precision operations, Peter Maydell, 2011/02/09
- [Qemu-devel] [PATCH v2 3/6] softfloat: Fix single-to-half precision float conversions, Peter Maydell, 2011/02/09
- [Qemu-devel] [PATCH v2 1/6] softfloat: Add float16 type and float16 NaN handling functions, Peter Maydell, 2011/02/09
- [Qemu-devel] [PATCH v2 5/6] target-arm: Silence NaNs resulting from half-precision conversions, Peter Maydell, 2011/02/09