[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PULL 046/102] softfloat: Add float64r32 arithmetic routines
From: |
Cédric Le Goater |
Subject: |
[PULL 046/102] softfloat: Add float64r32 arithmetic routines |
Date: |
Wed, 15 Dec 2021 18:03:01 +0100 |
From: Richard Henderson <richard.henderson@linaro.org>
These variants take a float64 as input, compute the result to
infinite precision (as we do with FloatParts), round the result
to the precision and dynamic range of float32, and then return
the result in the format of float64.
This is the operation PowerPC requires for its float32 operations.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20211119160502.17432-28-richard.henderson@linaro.org>
Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
include/fpu/softfloat.h | 12 +++++
fpu/softfloat.c | 110 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 122 insertions(+)
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index 0d3b40780762..d34b2c44d256 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -908,6 +908,18 @@ static inline bool float64_unordered_quiet(float64 a,
float64 b,
*----------------------------------------------------------------------------*/
float64 float64_default_nan(float_status *status);
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision operations, rounding to single precision,
+| returning a result in double precision, with only one rounding step.
+*----------------------------------------------------------------------------*/
+
+float64 float64r32_add(float64, float64, float_status *status);
+float64 float64r32_sub(float64, float64, float_status *status);
+float64 float64r32_mul(float64, float64, float_status *status);
+float64 float64r32_div(float64, float64, float_status *status);
+float64 float64r32_muladd(float64, float64, float64, int, float_status
*status);
+float64 float64r32_sqrt(float64, float_status *status);
+
/*----------------------------------------------------------------------------
| Software IEC/IEEE extended double-precision conversion routines.
*----------------------------------------------------------------------------*/
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 834ed3a054f7..7f524d437767 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -1693,6 +1693,50 @@ static float64 float64_round_pack_canonical(FloatParts64
*p,
return float64_pack_raw(p);
}
+static float64 float64r32_round_pack_canonical(FloatParts64 *p,
+ float_status *s)
+{
+ parts_uncanon(p, s, &float32_params);
+
+ /*
+ * In parts_uncanon, we placed the fraction for float32 at the lsb.
+ * We need to adjust the fraction higher so that the least N bits are
+ * zero, and the fraction is adjacent to the float64 implicit bit.
+ */
+ switch (p->cls) {
+ case float_class_normal:
+ if (unlikely(p->exp == 0)) {
+ /*
+ * The result is denormal for float32, but can be represented
+ * in normalized form for float64. Adjust, per canonicalize.
+ */
+ int shift = frac_normalize(p);
+ p->exp = (float32_params.frac_shift -
+ float32_params.exp_bias - shift + 1 +
+ float64_params.exp_bias);
+ frac_shr(p, float64_params.frac_shift);
+ } else {
+ frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
+ p->exp += float64_params.exp_bias - float32_params.exp_bias;
+ }
+ break;
+ case float_class_snan:
+ case float_class_qnan:
+ frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
+ p->exp = float64_params.exp_max;
+ break;
+ case float_class_inf:
+ p->exp = float64_params.exp_max;
+ break;
+ case float_class_zero:
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ return float64_pack_raw(p);
+}
+
static void float128_unpack_canonical(FloatParts128 *p, float128 f,
float_status *s)
{
@@ -1938,6 +1982,28 @@ float64_sub(float64 a, float64 b, float_status *s)
return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
}
+static float64 float64r32_addsub(float64 a, float64 b, float_status *status,
+ bool subtract)
+{
+ FloatParts64 pa, pb, *pr;
+
+ float64_unpack_canonical(&pa, a, status);
+ float64_unpack_canonical(&pb, b, status);
+ pr = parts_addsub(&pa, &pb, status, subtract);
+
+ return float64r32_round_pack_canonical(pr, status);
+}
+
+float64 float64r32_add(float64 a, float64 b, float_status *status)
+{
+ return float64r32_addsub(a, b, status, false);
+}
+
+float64 float64r32_sub(float64 a, float64 b, float_status *status)
+{
+ return float64r32_addsub(a, b, status, true);
+}
+
static bfloat16 QEMU_FLATTEN
bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
{
@@ -2069,6 +2135,17 @@ float64_mul(float64 a, float64 b, float_status *s)
f64_is_zon2, f64_addsubmul_post);
}
+float64 float64r32_mul(float64 a, float64 b, float_status *status)
+{
+ FloatParts64 pa, pb, *pr;
+
+ float64_unpack_canonical(&pa, a, status);
+ float64_unpack_canonical(&pb, b, status);
+ pr = parts_mul(&pa, &pb, status);
+
+ return float64r32_round_pack_canonical(pr, status);
+}
+
bfloat16 QEMU_FLATTEN
bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
{
@@ -2296,6 +2373,19 @@ float64_muladd(float64 xa, float64 xb, float64 xc, int
flags, float_status *s)
return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
}
+float64 float64r32_muladd(float64 a, float64 b, float64 c,
+ int flags, float_status *status)
+{
+ FloatParts64 pa, pb, pc, *pr;
+
+ float64_unpack_canonical(&pa, a, status);
+ float64_unpack_canonical(&pb, b, status);
+ float64_unpack_canonical(&pc, c, status);
+ pr = parts_muladd(&pa, &pb, &pc, flags, status);
+
+ return float64r32_round_pack_canonical(pr, status);
+}
+
bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
int flags, float_status *status)
{
@@ -2419,6 +2509,17 @@ float64_div(float64 a, float64 b, float_status *s)
f64_div_pre, f64_div_post);
}
+float64 float64r32_div(float64 a, float64 b, float_status *status)
+{
+ FloatParts64 pa, pb, *pr;
+
+ float64_unpack_canonical(&pa, a, status);
+ float64_unpack_canonical(&pb, b, status);
+ pr = parts_div(&pa, &pb, status);
+
+ return float64r32_round_pack_canonical(pr, status);
+}
+
bfloat16 QEMU_FLATTEN
bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
{
@@ -4285,6 +4386,15 @@ float64 QEMU_FLATTEN float64_sqrt(float64 xa,
float_status *s)
return soft_f64_sqrt(ua.s, s);
}
+float64 float64r32_sqrt(float64 a, float_status *status)
+{
+ FloatParts64 p;
+
+ float64_unpack_canonical(&p, a, status);
+ parts_sqrt(&p, status, &float64_params);
+ return float64r32_round_pack_canonical(&p, status);
+}
+
bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
{
FloatParts64 p;
--
2.31.1
- [PULL 047/102] target/ppc: Add helpers for fmadds et al, (continued)
- [PULL 047/102] target/ppc: Add helpers for fmadds et al, Cédric Le Goater, 2021/12/15
- [PULL 033/102] target/ppc: Remove inline from do_fri, Cédric Le Goater, 2021/12/15
- [PULL 053/102] target/ppc: Use helper_todouble/tosingle in helper_xststdcsp, Cédric Le Goater, 2021/12/15
- [PULL 052/102] target/ppc: Update fres to new flags and float64r32, Cédric Le Goater, 2021/12/15
- [PULL 051/102] target/ppc: Add helper for frsqrtes, Cédric Le Goater, 2021/12/15
- [PULL 042/102] target/ppc: Use helper_todouble in do_frsp, Cédric Le Goater, 2021/12/15
- [PULL 049/102] target/ppc: Add helpers for fadds, fsubs, fdivs, Cédric Le Goater, 2021/12/15
- [PULL 054/102] target/ppc: Disable software TLB for the 7450 family, Cédric Le Goater, 2021/12/15
- [PULL 060/102] target/ppc: remove 401/403 CPUs, Cédric Le Goater, 2021/12/15
- [PULL 055/102] target/ppc: Disable unused facilities in the e600 CPU, Cédric Le Goater, 2021/12/15
- [PULL 046/102] softfloat: Add float64r32 arithmetic routines,
Cédric Le Goater <=
- [PULL 062/102] ppc: Mark the 'taihu' machine as deprecated, Cédric Le Goater, 2021/12/15
- [PULL 066/102] ppc/ppc405: Change ppc405ep_init() return value, Cédric Le Goater, 2021/12/15
- [PULL 058/102] target/ppc: Remove 603e exception model, Cédric Le Goater, 2021/12/15
- [PULL 056/102] target/ppc: Remove the software TLB model of 7450 CPUs, Cédric Le Goater, 2021/12/15
- [PULL 063/102] ppc: Add trace-events for DCR accesses, Cédric Le Goater, 2021/12/15
- [PULL 064/102] ppc/ppc405: Convert printfs to trace-events, Cédric Le Goater, 2021/12/15
- [PULL 043/102] target/ppc: Update sqrt for new flags, Cédric Le Goater, 2021/12/15
- [PULL 045/102] target/ppc: Update fre to new flags, Cédric Le Goater, 2021/12/15
- [PULL 057/102] target/ppc: Fix MPCxxx FPU interrupt address, Cédric Le Goater, 2021/12/15
- [PULL 068/102] ppc/ppc405: Remove flash support, Cédric Le Goater, 2021/12/15