qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v2 11/11] target-arm: Add support for AArch32 64


From: Peter Maydell
Subject: Re: [Qemu-devel] [PATCH v2 11/11] target-arm: Add support for AArch32 64bit VCVTB and VCVTT
Date: Tue, 28 Jan 2014 15:26:06 +0000

On 28 January 2014 11:22, Will Newton <address@hidden> wrote:
> Add support for the AArch32 floating-point half-precision to double-
> precision conversion VCVTB and VCVTT instructions.
>
> Signed-off-by: Will Newton <address@hidden>
> ---
>  target-arm/translate.c | 62 
> ++++++++++++++++++++++++++++++++++++++------------
>  1 file changed, 48 insertions(+), 14 deletions(-)
>
> diff --git a/target-arm/translate.c b/target-arm/translate.c
> index e701c0f..dfda2c4 100644
> --- a/target-arm/translate.c
> +++ b/target-arm/translate.c
> @@ -3142,14 +3142,16 @@ static int disas_vfp_insn(CPUARMState * env, 
> DisasContext *s, uint32_t insn)
>                      VFP_DREG_N(rn, insn);
>                  }
>
> -                if (op == 15 && (rn == 15 || ((rn & 0x1c) == 0x18))) {
> +                if (op == 15 && (rn == 15 || ((rn & 0x1c) == 0x18) ||
> +                                 ((rn & 0x1e) == 0x6))) {
>                      /* Integer or single precision destination.  */
>                      rd = VFP_SREG_D(insn);

You should update the comment here, since the destination
is halfprec in the case you've added.

>                  } else {
>                      VFP_DREG_D(rd, insn);
>                  }
>                  if (op == 15 &&
> -                    (((rn & 0x1c) == 0x10) || ((rn & 0x14) == 0x14))) {
> +                    (((rn & 0x1c) == 0x10) || ((rn & 0x14) == 0x14) ||
> +                     ((rn & 0x1e) == 0x4))) {
>                      /* VCVT from int is always from S reg regardless of dp 
> bit.
>                       * VCVT with immediate frac_bits has same format as 
> SREG_M
>                       */

Again, the comment needs updating since you've added an extra case.

> @@ -3241,12 +3243,19 @@ static int disas_vfp_insn(CPUARMState * env, 
> DisasContext *s, uint32_t insn)
>                  case 5:
>                  case 6:
>                  case 7:
> -                    /* VCVTB, VCVTT: only present with the halfprec 
> extension,
> -                     * UNPREDICTABLE if bit 8 is set (we choose to UNDEF)
> +                    /* VCVTB, VCVTT: only present with the halfprec extension
> +                     * UNPREDICTABLE if bit 8 is set prior to ARMv8
> +                     * (we choose to UNDEF)
>                       */
> -                    if (dp || !arm_feature(env, ARM_FEATURE_VFP_FP16)) {
> +                    if ((dp && !arm_feature(env, ARM_FEATURE_V8)) ||
> +                        !arm_feature(env, ARM_FEATURE_VFP_FP16)) {
>                          return 1;
>                      }
> +                    if ((rn & 0x1e) == 0x4) {
> +                        /* Single precision source */
> +                        gen_mov_F0_vreg(0, rm);
> +                        break;
> +                    }

This is confusing, because actually you're using this to catch the
case of a half-precision source. I think it will be clearer to say:

          if (!extract32(rn, 1, 1)) {
              /* Half precision source */
              gen_mov_F0_vreg(0, rm);
              break;
          }

which then catches the half-prec sources for both to-single
and to-double, and leaves just the "source is as per dp flag"
cases to fall through. (We didn't need to catch halfprec sources
separately when we only had 32<->16 since they both get handled
similarly, but now we have the possibility of dp!=0 it's clearer
to handle them explicitly.

>                      /* Otherwise fall through */
>                  default:
>                      /* One source operand.  */
> @@ -3394,21 +3403,39 @@ static int disas_vfp_insn(CPUARMState * env, 
> DisasContext *s, uint32_t insn)
>                      case 3: /* sqrt */
>                          gen_vfp_sqrt(dp);
>                          break;
> -                    case 4: /* vcvtb.f32.f16 */
> +                    case 4: /* vcvtb.f32.f16, vcvtb.f64.f16 */
>                          tmp = gen_vfp_mrs();
>                          tcg_gen_ext16u_i32(tmp, tmp);
> -                        gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp, 
> cpu_env);
> +                        if (dp) {
> +                            gen_helper_vfp_fcvt_f16_to_f64(cpu_F0d, tmp,
> +                                                           cpu_env);
> +                        } else {
> +                            gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp,
> +                                                           cpu_env);
> +                        }
>                          tcg_temp_free_i32(tmp);
>                          break;
> -                    case 5: /* vcvtt.f32.f16 */
> +                    case 5: /* vcvtt.f32.f16, vcvtt.f64.f16 */
>                          tmp = gen_vfp_mrs();
>                          tcg_gen_shri_i32(tmp, tmp, 16);
> -                        gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp, 
> cpu_env);
> +                        if (dp) {
> +                            gen_helper_vfp_fcvt_f16_to_f64(cpu_F0d, tmp,
> +                                                           cpu_env);
> +                        } else {
> +                            gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp,
> +                                                           cpu_env);
> +                        }
>                          tcg_temp_free_i32(tmp);
>                          break;
> -                    case 6: /* vcvtb.f16.f32 */
> +                    case 6: /* vcvtb.f16.f32, vcvtb.f16.f64 */
>                          tmp = tcg_temp_new_i32();
> -                        gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, 
> cpu_env);
> +                        if (dp) {
> +                            gen_helper_vfp_fcvt_f64_to_f16(tmp, cpu_F0d,
> +                                                           cpu_env);
> +                        } else {
> +                            gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s,
> +                                                           cpu_env);
> +                        }
>                          gen_mov_F0_vreg(0, rd);
>                          tmp2 = gen_vfp_mrs();
>                          tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
> @@ -3416,9 +3443,15 @@ static int disas_vfp_insn(CPUARMState * env, 
> DisasContext *s, uint32_t insn)
>                          tcg_temp_free_i32(tmp2);
>                          gen_vfp_msr(tmp);
>                          break;
> -                    case 7: /* vcvtt.f16.f32 */
> +                    case 7: /* vcvtt.f16.f32, vcvtt.f16.f64 */
>                          tmp = tcg_temp_new_i32();
> -                        gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, 
> cpu_env);
> +                        if (dp) {
> +                            gen_helper_vfp_fcvt_f64_to_f16(tmp, cpu_F0d,
> +                                                           cpu_env);
> +                        } else {
> +                            gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s,
> +                                                           cpu_env);
> +                        }
>                          tcg_gen_shli_i32(tmp, tmp, 16);
>                          gen_mov_F0_vreg(0, rd);
>                          tmp2 = gen_vfp_mrs();
> @@ -3553,7 +3586,8 @@ static int disas_vfp_insn(CPUARMState * env, 
> DisasContext *s, uint32_t insn)
>                  /* Write back the result.  */
>                  if (op == 15 && (rn >= 8 && rn <= 11))
>                      ; /* Comparison, do nothing.  */
> -                else if (op == 15 && dp && ((rn & 0x1c) == 0x18))
> +                else if (op == 15 && dp && ((rn & 0x1c) == 0x18 ||
> +                                            (rn & 0x1e) == 0x6))
>                      /* VCVT double to int: always integer result. */
>                      gen_mov_vreg_F0(0, rd);
>                  else if (op == 15 && rn == 15)

Again, you need to update the comment because you've added a case.

Incidentally, my testing of this patch revealed a bug in
the softfloat float<->float conversion functions: if the
input is a NaN with the sign bit set and the top N bits of the
mantissa clear (where N is the number of mantissa bits in the
destination fp format), then we will incorrectly return a NaN
with sign bit clear and topmost mantissa bit only set, whereas
the ARM ARM demands that the signbit be the same as the input.
This is because the softfloat code for handling "what does an
input NaN get turned into for float/float conversion" (ie the
float*ToCommonNaN and commonNaNToFloat* functions) don't do
what the ARM ARM specifies; they should probably be specifiable
by the target architecture. At the moment we try to work around
this by always calling float*_maybe_silence_nan() after the
conversion, but this turns out to be not sufficient to fix up
the semantics...

thanks
-- PMM



reply via email to

[Prev in Thread] Current Thread [Next in Thread]