[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH v3 12/22] fpu/softfloat: re-factor add/sub
From: |
Philippe Mathieu-Daudé |
Subject: |
Re: [Qemu-devel] [PATCH v3 12/22] fpu/softfloat: re-factor add/sub |
Date: |
Wed, 24 Jan 2018 11:33:06 -0300 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Thunderbird/52.5.2 |
On 01/24/2018 10:13 AM, Alex Bennée wrote:
> We can now add float16_add/sub and use the common decompose and
> canonicalize functions to have a single implementation for
> float16/32/64 add and sub functions.
>
> Signed-off-by: Alex Bennée <address@hidden>
> Signed-off-by: Richard Henderson <address@hidden>
>
> ---
>
> v3
> - renames for FloatParts
> - shorten internal names
> - fix comment style
> - use FloatFmt values for for extract/deposit in raw_pack/unpack
> - move #include bitops from previous patch
> - add is_nan helper
> - avoid temp variables where we can
> ---
> fpu/softfloat.c | 884
> +++++++++++++++++++++++++-----------------------
> include/fpu/softfloat.h | 4 +
> 2 files changed, 461 insertions(+), 427 deletions(-)
>
> diff --git a/fpu/softfloat.c b/fpu/softfloat.c
> index 568d555595..b179790889 100644
> --- a/fpu/softfloat.c
> +++ b/fpu/softfloat.c
> @@ -83,6 +83,7 @@ this code that are retained.
> * target-dependent and needs the TARGET_* macros.
> */
> #include "qemu/osdep.h"
> +#include "qemu/bitops.h"
> #include "fpu/softfloat.h"
>
> /* We only need stdlib for abort() */
> @@ -270,6 +271,462 @@ static const FloatFmt float64_params = {
> FLOAT_PARAMS(11, 52)
> };
>
> +/* Unpack a float to parts, but do not canonicalize. */
> +static inline FloatParts unpack_raw(FloatFmt fmt, uint64_t raw)
> +{
> + const int sign_pos = fmt.frac_size + fmt.exp_size;
> +
> + return (FloatParts) {
> + .cls = float_class_unclassified,
> + .sign = extract64(raw, sign_pos, 1),
> + .exp = extract64(raw, fmt.frac_size, fmt.exp_size),
> + .frac = extract64(raw, 0, fmt.frac_size),
> + };
> +}
> +
> +static inline FloatParts float16_unpack_raw(float16 f)
> +{
> + return unpack_raw(float16_params, f);
> +}
> +
> +static inline FloatParts float32_unpack_raw(float32 f)
> +{
> + return unpack_raw(float32_params, f);
> +}
> +
> +static inline FloatParts float64_unpack_raw(float64 f)
> +{
> + return unpack_raw(float64_params, f);
> +}
> +
> +/* Pack a float from parts, but do not canonicalize. */
> +static inline uint64_t pack_raw(FloatFmt fmt, FloatParts p)
> +{
> + const int sign_pos = fmt.frac_size + fmt.exp_size;
> + uint64_t ret = p.frac;
saving 1 line:
uint64_t ret = deposit64(p.frac, sign_pos, 1, p.sign);
and then eventually:
return deposit64(ret, fmt.frac_size, fmt.exp_size, p.exp);
> +
> + ret = deposit64(ret, fmt.frac_size, fmt.exp_size, p.exp);
> + ret = deposit64(ret, sign_pos, 1, p.sign);
> + return ret;
> +}
> +
> +static inline float16 float16_pack_raw(FloatParts p)
> +{
> + return make_float16(pack_raw(float16_params, p));
> +}
> +
> +static inline float32 float32_pack_raw(FloatParts p)
> +{
> + return make_float32(pack_raw(float32_params, p));
> +}
> +
> +static inline float64 float64_pack_raw(FloatParts p)
> +{
> + return make_float64(pack_raw(float64_params, p));
> +}
> +
> +/* Canonicalize EXP and FRAC, setting CLS. */
> +static FloatParts canonicalize(FloatParts part, const FloatFmt *parm,
> + float_status *status)
> +{
> + if (part.exp == parm->exp_max) {
> + if (part.frac == 0) {
> + part.cls = float_class_inf;
> + } else {
> +#ifdef NO_SIGNALING_NANS
> + part.cls = float_class_qnan;
> +#else
> + int64_t msb = part.frac << (parm->frac_shift + 2);
> + if ((msb < 0) == status->snan_bit_is_one) {
> + part.cls = float_class_snan;
> + } else {
> + part.cls = float_class_qnan;
> + }
> +#endif
> + }
> + } else if (part.exp == 0) {
> + if (likely(part.frac == 0)) {
> + part.cls = float_class_zero;
> + } else if (status->flush_inputs_to_zero) {
> + float_raise(float_flag_input_denormal, status);
> + part.cls = float_class_zero;
> + part.frac = 0;
> + } else {
> + int shift = clz64(part.frac) - 1;
> + part.cls = float_class_normal;
> + part.exp = parm->frac_shift - parm->exp_bias - shift + 1;
> + part.frac <<= shift;
> + }
> + } else {
> + part.cls = float_class_normal;
> + part.exp -= parm->exp_bias;
> + part.frac = DECOMPOSED_IMPLICIT_BIT + (part.frac <<
> parm->frac_shift);
> + }
> + return part;
> +}
> +
> +/* Round and uncanonicalize a floating-point number by parts. There
> + * are FRAC_SHIFT bits that may require rounding at the bottom of the
> + * fraction; these bits will be removed. The exponent will be biased
> + * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
> + */
> +
> +static FloatParts round_canonical(FloatParts p, float_status *s,
> + const FloatFmt *parm)
> +{
> + const uint64_t frac_lsbm1 = parm->frac_lsbm1;
> + const uint64_t round_mask = parm->round_mask;
> + const uint64_t roundeven_mask = parm->roundeven_mask;
> + const int exp_max = parm->exp_max;
> + const int frac_shift = parm->frac_shift;
> + uint64_t frac, inc;
> + int exp, flags = 0;
> + bool overflow_norm;
> +
> + frac = p.frac;
> + exp = p.exp;
> +
> + switch (p.cls) {
> + case float_class_normal:
> + switch (s->float_rounding_mode) {
> + case float_round_nearest_even:
> + overflow_norm = false;
> + inc = ((frac & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
> + break;
> + case float_round_ties_away:
> + overflow_norm = false;
> + inc = frac_lsbm1;
> + break;
> + case float_round_to_zero:
> + overflow_norm = true;
> + inc = 0;
> + break;
> + case float_round_up:
> + inc = p.sign ? 0 : round_mask;
> + overflow_norm = p.sign;
> + break;
> + case float_round_down:
> + inc = p.sign ? round_mask : 0;
> + overflow_norm = !p.sign;
> + break;
> + default:
> + g_assert_not_reached();
> + }
> +
> + exp += parm->exp_bias;
> + if (likely(exp > 0)) {
> + if (frac & round_mask) {
> + flags |= float_flag_inexact;
> + frac += inc;
> + if (frac & DECOMPOSED_OVERFLOW_BIT) {
> + frac >>= 1;
> + exp++;
> + }
> + }
> + frac >>= frac_shift;
> +
> + if (unlikely(exp >= exp_max)) {
> + flags |= float_flag_overflow | float_flag_inexact;
> + if (overflow_norm) {
> + exp = exp_max - 1;
> + frac = -1;
> + } else {
> + p.cls = float_class_inf;
> + goto do_inf;
> + }
> + }
> + } else if (s->flush_to_zero) {
> + flags |= float_flag_output_denormal;
> + p.cls = float_class_zero;
> + goto do_zero;
> + } else {
> + bool is_tiny = (s->float_detect_tininess
> + == float_tininess_before_rounding)
> + || (exp < 0)
> + || !((frac + inc) & DECOMPOSED_OVERFLOW_BIT);
> +
> + shift64RightJamming(frac, 1 - exp, &frac);
> + if (frac & round_mask) {
> + /* Need to recompute round-to-even. */
> + if (s->float_rounding_mode == float_round_nearest_even) {
> + inc = ((frac & roundeven_mask) != frac_lsbm1
> + ? frac_lsbm1 : 0);
> + }
> + flags |= float_flag_inexact;
> + frac += inc;
> + }
> +
> + exp = (frac & DECOMPOSED_IMPLICIT_BIT ? 1 : 0);
> + frac >>= frac_shift;
> +
> + if (is_tiny && (flags & float_flag_inexact)) {
> + flags |= float_flag_underflow;
> + }
> + if (exp == 0 && frac == 0) {
> + p.cls = float_class_zero;
> + }
> + }
> + break;
> +
> + case float_class_zero:
> + do_zero:
> + exp = 0;
> + frac = 0;
> + break;
> +
> + case float_class_inf:
> + do_inf:
> + exp = exp_max;
> + frac = 0;
> + break;
> +
> + case float_class_qnan:
> + case float_class_snan:
> + exp = exp_max;
> + break;
> +
> + default:
> + g_assert_not_reached();
> + }
> +
> + float_raise(flags, s);
> + p.exp = exp;
> + p.frac = frac;
> + return p;
> +}
> +
> +static FloatParts float16_unpack_canonical(float16 f, float_status *s)
> +{
> + return canonicalize(float16_unpack_raw(f), &float16_params, s);
> +}
> +
> +static float16 float16_round_pack_canonical(FloatParts p, float_status *s)
> +{
> + switch (p.cls) {
> + case float_class_dnan:
> + return float16_default_nan(s);
> + case float_class_msnan:
> + return float16_maybe_silence_nan(float16_pack_raw(p), s);
> + default:
> + p = round_canonical(p, s, &float16_params);
> + return float16_pack_raw(p);
> + }
> +}
> +
> +static FloatParts float32_unpack_canonical(float32 f, float_status *s)
> +{
> + return canonicalize(float32_unpack_raw(f), &float32_params, s);
> +}
> +
> +static float32 float32_round_pack_canonical(FloatParts p, float_status *s)
> +{
> + switch (p.cls) {
> + case float_class_dnan:
> + return float32_default_nan(s);
> + case float_class_msnan:
> + return float32_maybe_silence_nan(float32_pack_raw(p), s);
> + default:
> + p = round_canonical(p, s, &float32_params);
> + return float32_pack_raw(p);
> + }
> +}
> +
> +static FloatParts float64_unpack_canonical(float64 f, float_status *s)
> +{
> + return canonicalize(float64_unpack_raw(f), &float64_params, s);
> +}
> +
> +static float64 float64_round_pack_canonical(FloatParts p, float_status *s)
> +{
> + switch (p.cls) {
> + case float_class_dnan:
> + return float64_default_nan(s);
> + case float_class_msnan:
> + return float64_maybe_silence_nan(float64_pack_raw(p), s);
> + default:
> + p = round_canonical(p, s, &float64_params);
> + return float64_pack_raw(p);
> + }
> +}
> +
> +static FloatParts pick_nan_parts(FloatParts a, FloatParts b, float_status *s)
> +{
> + if (a.cls == float_class_snan || b.cls == float_class_snan) {
> + s->float_exception_flags |= float_flag_invalid;
> + }
> +
> + if (s->default_nan_mode) {
> + a.cls = float_class_dnan;
> + } else {
> + if (pickNaN(a.cls == float_class_qnan,
> + a.cls == float_class_snan,
> + b.cls == float_class_qnan,
> + b.cls == float_class_snan,
> + a.frac > b.frac
> + || (a.frac == b.frac && a.sign < b.sign))) {
> + a = b;
> + }
> + a.cls = float_class_msnan;
> + }
> + return a;
> +}
> +
> +/* Simple helper for checking if FloatClass is any NaN */
> +
> +static bool is_nan(FloatClass c)
> +{
> + return c >= float_class_qnan;
> +}
> +
> +/*
> + * Returns the result of adding or subtracting the values of the
> + * floating-point values `a' and `b'. The operation is performed
> + * according to the IEC/IEEE Standard for Binary Floating-Point
> + * Arithmetic.
> + */
> +
> +static FloatParts addsub_floats(FloatParts a, FloatParts b, bool subtract,
> + float_status *s)
> +{
> + bool a_sign = a.sign;
> + bool b_sign = b.sign ^ subtract;
> +
> + if (a_sign != b_sign) {
> + /* Subtraction */
> +
> + if (a.cls == float_class_normal && b.cls == float_class_normal) {
> + if (a.exp > b.exp || (a.exp == b.exp && a.frac >= b.frac)) {
> + shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
> + a.frac = a.frac - b.frac;
> + } else {
> + shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
> + a.frac = b.frac - a.frac;
> + a.exp = b.exp;
> + a_sign ^= 1;
> + }
> +
> + if (a.frac == 0) {
> + a.cls = float_class_zero;
> + a.sign = s->float_rounding_mode == float_round_down;
> + } else {
> + int shift = clz64(a.frac) - 1;
> + a.frac = a.frac << shift;
> + a.exp = a.exp - shift;
> + a.sign = a_sign;
> + }
> + return a;
> + }
> + if (is_nan(a.cls) || is_nan(b.cls)) {
> + return pick_nan_parts(a, b, s);
> + }
> + if (a.cls == float_class_inf) {
> + if (b.cls == float_class_inf) {
> + float_raise(float_flag_invalid, s);
> + a.cls = float_class_dnan;
> + }
> + return a;
> + }
> + if (a.cls == float_class_zero && b.cls == float_class_zero) {
> + a.sign = s->float_rounding_mode == float_round_down;
> + return a;
> + }
> + if (a.cls == float_class_zero || b.cls == float_class_inf) {
> + b.sign = a_sign ^ 1;
> + return b;
> + }
> + if (b.cls == float_class_zero) {
> + return a;
> + }
> + } else {
> + /* Addition */
> + if (a.cls == float_class_normal && b.cls == float_class_normal) {
> + if (a.exp > b.exp) {
> + shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
> + } else if (a.exp < b.exp) {
> + shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
> + a.exp = b.exp;
> + }
> + a.frac += b.frac;
> + if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
> + a.frac >>= 1;
> + a.exp += 1;
> + }
> + return a;
> + }
> + if (is_nan(a.cls) || is_nan(b.cls)) {
> + return pick_nan_parts(a, b, s);
> + }
> + if (a.cls == float_class_inf || b.cls == float_class_zero) {
> + return a;
> + }
> + if (b.cls == float_class_inf || a.cls == float_class_zero) {
> + b.sign = b_sign;
> + return b;
> + }
> + }
> + g_assert_not_reached();
> +}
> +
> +/*
> + * Returns the result of adding or subtracting the floating-point
> + * values `a' and `b'. The operation is performed according to the
> + * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
> + */
> +
> +float16 float16_add(float16 a, float16 b, float_status *status)
> +{
> + FloatParts pa = float16_unpack_canonical(a, status);
> + FloatParts pb = float16_unpack_canonical(b, status);
> + FloatParts pr = addsub_floats(pa, pb, false, status);
> +
> + return float16_round_pack_canonical(pr, status);
> +}
> +
> +float32 float32_add(float32 a, float32 b, float_status *status)
> +{
> + FloatParts pa = float32_unpack_canonical(a, status);
> + FloatParts pb = float32_unpack_canonical(b, status);
> + FloatParts pr = addsub_floats(pa, pb, false, status);
> +
> + return float32_round_pack_canonical(pr, status);
> +}
> +
> +float64 float64_add(float64 a, float64 b, float_status *status)
> +{
> + FloatParts pa = float64_unpack_canonical(a, status);
> + FloatParts pb = float64_unpack_canonical(b, status);
> + FloatParts pr = addsub_floats(pa, pb, false, status);
> +
> + return float64_round_pack_canonical(pr, status);
> +}
> +
> +float16 float16_sub(float16 a, float16 b, float_status *status)
> +{
> + FloatParts pa = float16_unpack_canonical(a, status);
> + FloatParts pb = float16_unpack_canonical(b, status);
> + FloatParts pr = addsub_floats(pa, pb, true, status);
> +
> + return float16_round_pack_canonical(pr, status);
> +}
> +
> +float32 float32_sub(float32 a, float32 b, float_status *status)
> +{
> + FloatParts pa = float32_unpack_canonical(a, status);
> + FloatParts pb = float32_unpack_canonical(b, status);
> + FloatParts pr = addsub_floats(pa, pb, true, status);
> +
> + return float32_round_pack_canonical(pr, status);
> +}
> +
> +float64 float64_sub(float64 a, float64 b, float_status *status)
> +{
> + FloatParts pa = float64_unpack_canonical(a, status);
> + FloatParts pb = float64_unpack_canonical(b, status);
> + FloatParts pr = addsub_floats(pa, pb, true, status);
> +
> + return float64_round_pack_canonical(pr, status);
> +}
> +
>
> /*----------------------------------------------------------------------------
> | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
> | and 7, and returns the properly rounded 32-bit integer corresponding to the
> @@ -2081,220 +2538,6 @@ float32 float32_round_to_int(float32 a, float_status
> *status)
>
> }
>
> -/*----------------------------------------------------------------------------
> -| Returns the result of adding the absolute values of the single-precision
> -| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
> -| before being returned. `zSign' is ignored if the result is a NaN.
> -| The addition is performed according to the IEC/IEEE Standard for Binary
> -| Floating-Point Arithmetic.
> -*----------------------------------------------------------------------------*/
> -
> -static float32 addFloat32Sigs(float32 a, float32 b, flag zSign,
> - float_status *status)
> -{
> - int aExp, bExp, zExp;
> - uint32_t aSig, bSig, zSig;
> - int expDiff;
> -
> - aSig = extractFloat32Frac( a );
> - aExp = extractFloat32Exp( a );
> - bSig = extractFloat32Frac( b );
> - bExp = extractFloat32Exp( b );
> - expDiff = aExp - bExp;
> - aSig <<= 6;
> - bSig <<= 6;
> - if ( 0 < expDiff ) {
> - if ( aExp == 0xFF ) {
> - if (aSig) {
> - return propagateFloat32NaN(a, b, status);
> - }
> - return a;
> - }
> - if ( bExp == 0 ) {
> - --expDiff;
> - }
> - else {
> - bSig |= 0x20000000;
> - }
> - shift32RightJamming( bSig, expDiff, &bSig );
> - zExp = aExp;
> - }
> - else if ( expDiff < 0 ) {
> - if ( bExp == 0xFF ) {
> - if (bSig) {
> - return propagateFloat32NaN(a, b, status);
> - }
> - return packFloat32( zSign, 0xFF, 0 );
> - }
> - if ( aExp == 0 ) {
> - ++expDiff;
> - }
> - else {
> - aSig |= 0x20000000;
> - }
> - shift32RightJamming( aSig, - expDiff, &aSig );
> - zExp = bExp;
> - }
> - else {
> - if ( aExp == 0xFF ) {
> - if (aSig | bSig) {
> - return propagateFloat32NaN(a, b, status);
> - }
> - return a;
> - }
> - if ( aExp == 0 ) {
> - if (status->flush_to_zero) {
> - if (aSig | bSig) {
> - float_raise(float_flag_output_denormal, status);
> - }
> - return packFloat32(zSign, 0, 0);
> - }
> - return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
> - }
> - zSig = 0x40000000 + aSig + bSig;
> - zExp = aExp;
> - goto roundAndPack;
> - }
> - aSig |= 0x20000000;
> - zSig = ( aSig + bSig )<<1;
> - --zExp;
> - if ( (int32_t) zSig < 0 ) {
> - zSig = aSig + bSig;
> - ++zExp;
> - }
> - roundAndPack:
> - return roundAndPackFloat32(zSign, zExp, zSig, status);
> -
> -}
> -
> -/*----------------------------------------------------------------------------
> -| Returns the result of subtracting the absolute values of the single-
> -| precision floating-point values `a' and `b'. If `zSign' is 1, the
> -| difference is negated before being returned. `zSign' is ignored if the
> -| result is a NaN. The subtraction is performed according to the IEC/IEEE
> -| Standard for Binary Floating-Point Arithmetic.
> -*----------------------------------------------------------------------------*/
> -
> -static float32 subFloat32Sigs(float32 a, float32 b, flag zSign,
> - float_status *status)
> -{
> - int aExp, bExp, zExp;
> - uint32_t aSig, bSig, zSig;
> - int expDiff;
> -
> - aSig = extractFloat32Frac( a );
> - aExp = extractFloat32Exp( a );
> - bSig = extractFloat32Frac( b );
> - bExp = extractFloat32Exp( b );
> - expDiff = aExp - bExp;
> - aSig <<= 7;
> - bSig <<= 7;
> - if ( 0 < expDiff ) goto aExpBigger;
> - if ( expDiff < 0 ) goto bExpBigger;
> - if ( aExp == 0xFF ) {
> - if (aSig | bSig) {
> - return propagateFloat32NaN(a, b, status);
> - }
> - float_raise(float_flag_invalid, status);
> - return float32_default_nan(status);
> - }
> - if ( aExp == 0 ) {
> - aExp = 1;
> - bExp = 1;
> - }
> - if ( bSig < aSig ) goto aBigger;
> - if ( aSig < bSig ) goto bBigger;
> - return packFloat32(status->float_rounding_mode == float_round_down, 0,
> 0);
> - bExpBigger:
> - if ( bExp == 0xFF ) {
> - if (bSig) {
> - return propagateFloat32NaN(a, b, status);
> - }
> - return packFloat32( zSign ^ 1, 0xFF, 0 );
> - }
> - if ( aExp == 0 ) {
> - ++expDiff;
> - }
> - else {
> - aSig |= 0x40000000;
> - }
> - shift32RightJamming( aSig, - expDiff, &aSig );
> - bSig |= 0x40000000;
> - bBigger:
> - zSig = bSig - aSig;
> - zExp = bExp;
> - zSign ^= 1;
> - goto normalizeRoundAndPack;
> - aExpBigger:
> - if ( aExp == 0xFF ) {
> - if (aSig) {
> - return propagateFloat32NaN(a, b, status);
> - }
> - return a;
> - }
> - if ( bExp == 0 ) {
> - --expDiff;
> - }
> - else {
> - bSig |= 0x40000000;
> - }
> - shift32RightJamming( bSig, expDiff, &bSig );
> - aSig |= 0x40000000;
> - aBigger:
> - zSig = aSig - bSig;
> - zExp = aExp;
> - normalizeRoundAndPack:
> - --zExp;
> - return normalizeRoundAndPackFloat32(zSign, zExp, zSig, status);
> -
> -}
> -
> -/*----------------------------------------------------------------------------
> -| Returns the result of adding the single-precision floating-point values `a'
> -| and `b'. The operation is performed according to the IEC/IEEE Standard for
> -| Binary Floating-Point Arithmetic.
> -*----------------------------------------------------------------------------*/
> -
> -float32 float32_add(float32 a, float32 b, float_status *status)
> -{
> - flag aSign, bSign;
> - a = float32_squash_input_denormal(a, status);
> - b = float32_squash_input_denormal(b, status);
> -
> - aSign = extractFloat32Sign( a );
> - bSign = extractFloat32Sign( b );
> - if ( aSign == bSign ) {
> - return addFloat32Sigs(a, b, aSign, status);
> - }
> - else {
> - return subFloat32Sigs(a, b, aSign, status);
> - }
> -
> -}
> -
> -/*----------------------------------------------------------------------------
> -| Returns the result of subtracting the single-precision floating-point
> values
> -| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
> -| for Binary Floating-Point Arithmetic.
> -*----------------------------------------------------------------------------*/
> -
> -float32 float32_sub(float32 a, float32 b, float_status *status)
> -{
> - flag aSign, bSign;
> - a = float32_squash_input_denormal(a, status);
> - b = float32_squash_input_denormal(b, status);
> -
> - aSign = extractFloat32Sign( a );
> - bSign = extractFloat32Sign( b );
> - if ( aSign == bSign ) {
> - return subFloat32Sigs(a, b, aSign, status);
> - }
> - else {
> - return addFloat32Sigs(a, b, aSign, status);
> - }
> -
> -}
> -
>
> /*----------------------------------------------------------------------------
> | Returns the result of multiplying the single-precision floating-point
> values
> | `a' and `b'. The operation is performed according to the IEC/IEEE Standard
> @@ -3891,219 +4134,6 @@ float64 float64_trunc_to_int(float64 a, float_status
> *status)
> return res;
> }
>
> -/*----------------------------------------------------------------------------
> -| Returns the result of adding the absolute values of the double-precision
> -| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
> -| before being returned. `zSign' is ignored if the result is a NaN.
> -| The addition is performed according to the IEC/IEEE Standard for Binary
> -| Floating-Point Arithmetic.
> -*----------------------------------------------------------------------------*/
> -
> -static float64 addFloat64Sigs(float64 a, float64 b, flag zSign,
> - float_status *status)
> -{
> - int aExp, bExp, zExp;
> - uint64_t aSig, bSig, zSig;
> - int expDiff;
> -
> - aSig = extractFloat64Frac( a );
> - aExp = extractFloat64Exp( a );
> - bSig = extractFloat64Frac( b );
> - bExp = extractFloat64Exp( b );
> - expDiff = aExp - bExp;
> - aSig <<= 9;
> - bSig <<= 9;
> - if ( 0 < expDiff ) {
> - if ( aExp == 0x7FF ) {
> - if (aSig) {
> - return propagateFloat64NaN(a, b, status);
> - }
> - return a;
> - }
> - if ( bExp == 0 ) {
> - --expDiff;
> - }
> - else {
> - bSig |= LIT64( 0x2000000000000000 );
> - }
> - shift64RightJamming( bSig, expDiff, &bSig );
> - zExp = aExp;
> - }
> - else if ( expDiff < 0 ) {
> - if ( bExp == 0x7FF ) {
> - if (bSig) {
> - return propagateFloat64NaN(a, b, status);
> - }
> - return packFloat64( zSign, 0x7FF, 0 );
> - }
> - if ( aExp == 0 ) {
> - ++expDiff;
> - }
> - else {
> - aSig |= LIT64( 0x2000000000000000 );
> - }
> - shift64RightJamming( aSig, - expDiff, &aSig );
> - zExp = bExp;
> - }
> - else {
> - if ( aExp == 0x7FF ) {
> - if (aSig | bSig) {
> - return propagateFloat64NaN(a, b, status);
> - }
> - return a;
> - }
> - if ( aExp == 0 ) {
> - if (status->flush_to_zero) {
> - if (aSig | bSig) {
> - float_raise(float_flag_output_denormal, status);
> - }
> - return packFloat64(zSign, 0, 0);
> - }
> - return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
> - }
> - zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
> - zExp = aExp;
> - goto roundAndPack;
> - }
> - aSig |= LIT64( 0x2000000000000000 );
> - zSig = ( aSig + bSig )<<1;
> - --zExp;
> - if ( (int64_t) zSig < 0 ) {
> - zSig = aSig + bSig;
> - ++zExp;
> - }
> - roundAndPack:
> - return roundAndPackFloat64(zSign, zExp, zSig, status);
> -
> -}
> -
> -/*----------------------------------------------------------------------------
> -| Returns the result of subtracting the absolute values of the double-
> -| precision floating-point values `a' and `b'. If `zSign' is 1, the
> -| difference is negated before being returned. `zSign' is ignored if the
> -| result is a NaN. The subtraction is performed according to the IEC/IEEE
> -| Standard for Binary Floating-Point Arithmetic.
> -*----------------------------------------------------------------------------*/
> -
> -static float64 subFloat64Sigs(float64 a, float64 b, flag zSign,
> - float_status *status)
> -{
> - int aExp, bExp, zExp;
> - uint64_t aSig, bSig, zSig;
> - int expDiff;
> -
> - aSig = extractFloat64Frac( a );
> - aExp = extractFloat64Exp( a );
> - bSig = extractFloat64Frac( b );
> - bExp = extractFloat64Exp( b );
> - expDiff = aExp - bExp;
> - aSig <<= 10;
> - bSig <<= 10;
> - if ( 0 < expDiff ) goto aExpBigger;
> - if ( expDiff < 0 ) goto bExpBigger;
> - if ( aExp == 0x7FF ) {
> - if (aSig | bSig) {
> - return propagateFloat64NaN(a, b, status);
> - }
> - float_raise(float_flag_invalid, status);
> - return float64_default_nan(status);
> - }
> - if ( aExp == 0 ) {
> - aExp = 1;
> - bExp = 1;
> - }
> - if ( bSig < aSig ) goto aBigger;
> - if ( aSig < bSig ) goto bBigger;
> - return packFloat64(status->float_rounding_mode == float_round_down, 0,
> 0);
> - bExpBigger:
> - if ( bExp == 0x7FF ) {
> - if (bSig) {
> - return propagateFloat64NaN(a, b, status);
> - }
> - return packFloat64( zSign ^ 1, 0x7FF, 0 );
> - }
> - if ( aExp == 0 ) {
> - ++expDiff;
> - }
> - else {
> - aSig |= LIT64( 0x4000000000000000 );
> - }
> - shift64RightJamming( aSig, - expDiff, &aSig );
> - bSig |= LIT64( 0x4000000000000000 );
> - bBigger:
> - zSig = bSig - aSig;
> - zExp = bExp;
> - zSign ^= 1;
> - goto normalizeRoundAndPack;
> - aExpBigger:
> - if ( aExp == 0x7FF ) {
> - if (aSig) {
> - return propagateFloat64NaN(a, b, status);
> - }
> - return a;
> - }
> - if ( bExp == 0 ) {
> - --expDiff;
> - }
> - else {
> - bSig |= LIT64( 0x4000000000000000 );
> - }
> - shift64RightJamming( bSig, expDiff, &bSig );
> - aSig |= LIT64( 0x4000000000000000 );
> - aBigger:
> - zSig = aSig - bSig;
> - zExp = aExp;
> - normalizeRoundAndPack:
> - --zExp;
> - return normalizeRoundAndPackFloat64(zSign, zExp, zSig, status);
> -
> -}
> -
> -/*----------------------------------------------------------------------------
> -| Returns the result of adding the double-precision floating-point values `a'
> -| and `b'. The operation is performed according to the IEC/IEEE Standard for
> -| Binary Floating-Point Arithmetic.
> -*----------------------------------------------------------------------------*/
> -
> -float64 float64_add(float64 a, float64 b, float_status *status)
> -{
> - flag aSign, bSign;
> - a = float64_squash_input_denormal(a, status);
> - b = float64_squash_input_denormal(b, status);
> -
> - aSign = extractFloat64Sign( a );
> - bSign = extractFloat64Sign( b );
> - if ( aSign == bSign ) {
> - return addFloat64Sigs(a, b, aSign, status);
> - }
> - else {
> - return subFloat64Sigs(a, b, aSign, status);
> - }
> -
> -}
> -
> -/*----------------------------------------------------------------------------
> -| Returns the result of subtracting the double-precision floating-point
> values
> -| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
> -| for Binary Floating-Point Arithmetic.
> -*----------------------------------------------------------------------------*/
> -
> -float64 float64_sub(float64 a, float64 b, float_status *status)
> -{
> - flag aSign, bSign;
> - a = float64_squash_input_denormal(a, status);
> - b = float64_squash_input_denormal(b, status);
> -
> - aSign = extractFloat64Sign( a );
> - bSign = extractFloat64Sign( b );
> - if ( aSign == bSign ) {
> - return subFloat64Sigs(a, b, aSign, status);
> - }
> - else {
> - return addFloat64Sigs(a, b, aSign, status);
> - }
> -
> -}
>
>
> /*----------------------------------------------------------------------------
> | Returns the result of multiplying the double-precision floating-point
> values
> diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
> index 23824a3000..693ece0974 100644
> --- a/include/fpu/softfloat.h
> +++ b/include/fpu/softfloat.h
> @@ -236,6 +236,10 @@ float64 float16_to_float64(float16 a, flag ieee,
> float_status *status);
>
> /*----------------------------------------------------------------------------
> | Software half-precision operations.
>
> *----------------------------------------------------------------------------*/
> +
> +float16 float16_add(float16, float16, float_status *status);
> +float16 float16_sub(float16, float16, float_status *status);
> +
> int float16_is_quiet_nan(float16, float_status *status);
> int float16_is_signaling_nan(float16, float_status *status);
> float16 float16_maybe_silence_nan(float16, float_status *status);
>
(note for myself, since I need more time for those 3)
all bug round_canonical() pick_nan_parts() addsub_floats():
Reviewed-by: Philippe Mathieu-Daudé <address@hidden>
- [Qemu-devel] [PATCH v3 09/22] fpu/softfloat: improve comments on ARM NaN propagation, (continued)
- [Qemu-devel] [PATCH v3 09/22] fpu/softfloat: improve comments on ARM NaN propagation, Alex Bennée, 2018/01/24
- [Qemu-devel] [PATCH v3 07/22] include/fpu/softfloat: implement float16_set_sign helper, Alex Bennée, 2018/01/24
- [Qemu-devel] [PATCH v3 08/22] include/fpu/softfloat: add some float16 constants, Alex Bennée, 2018/01/24
- [Qemu-devel] [PATCH v3 13/22] fpu/softfloat: re-factor mul, Alex Bennée, 2018/01/24
- [Qemu-devel] [PATCH v3 10/22] fpu/softfloat: move the extract functions to the top of the file, Alex Bennée, 2018/01/24
- [Qemu-devel] [PATCH v3 12/22] fpu/softfloat: re-factor add/sub, Alex Bennée, 2018/01/24
- Re: [Qemu-devel] [PATCH v3 12/22] fpu/softfloat: re-factor add/sub,
Philippe Mathieu-Daudé <=
- [Qemu-devel] [PATCH v3 14/22] fpu/softfloat: re-factor div, Alex Bennée, 2018/01/24
- [Qemu-devel] [PATCH v3 11/22] fpu/softfloat: define decompose structures, Alex Bennée, 2018/01/24
- [Qemu-devel] [PATCH v3 18/22] fpu/softfloat: re-factor int/uint to float, Alex Bennée, 2018/01/24
- [Qemu-devel] [PATCH v3 20/22] fpu/softfloat: re-factor minmax, Alex Bennée, 2018/01/24
- [Qemu-devel] [PATCH v3 16/22] fpu/softfloat: re-factor round_to_int, Alex Bennée, 2018/01/24