[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 41/72] softfloat: Introduce sh[lr]_double primitives
From: |
Richard Henderson |
Subject: |
[PATCH 41/72] softfloat: Introduce sh[lr]_double primitives |
Date: |
Fri, 7 May 2021 18:47:31 -0700 |
Have x86_64 assembly for them, with a fallback.
This avoids shuffling values through %cl in the x86 case.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
include/fpu/softfloat-macros.h | 36 ++++++++++++
fpu/softfloat.c | 102 +++++++++++++++++++++++++--------
2 files changed, 115 insertions(+), 23 deletions(-)
diff --git a/include/fpu/softfloat-macros.h b/include/fpu/softfloat-macros.h
index 672c1db555..ec4e27a595 100644
--- a/include/fpu/softfloat-macros.h
+++ b/include/fpu/softfloat-macros.h
@@ -85,6 +85,42 @@ this code that are retained.
#include "fpu/softfloat-types.h"
#include "qemu/host-utils.h"
+/**
+ * shl_double: double-word merging left shift
+ * @l: left or most-significant word
+ * @r: right or least-significant word
+ * @c: shift count
+ *
+ * Shift @l left by @c bits, shifting in bits from @r.
+ */
+static inline uint64_t shl_double(uint64_t l, uint64_t r, int c)
+{
+#if defined(__x86_64__)
+ asm("shld %b2, %1, %0" : "+r"(l) : "r"(r), "ci"(c));
+ return l;
+#else
+ return c ? (l << c) | (r >> (64 - c)) : l;
+#endif
+}
+
+/**
+ * shr_double: double-word merging right shift
+ * @l: left or most-significant word
+ * @r: right or least-significant word
+ * @c: shift count
+ *
+ * Shift @r right by @c bits, shifting in bits from @l.
+ */
+static inline uint64_t shr_double(uint64_t l, uint64_t r, int c)
+{
+#if defined(__x86_64__)
+ asm("shrd %b2, %1, %0" : "+r"(r) : "r"(l), "ci"(c));
+ return r;
+#else
+ return c ? (r >> c) | (l << (64 - c)) : r;
+#endif
+}
+
/*----------------------------------------------------------------------------
| Shifts `a' right by the number of bits given in `count'. If any nonzero
| bits are shifted off, they are ``jammed'' into the least significant bit of
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index a9ee8498ae..a42c297828 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -956,15 +956,12 @@ static int frac128_normalize(FloatParts128 *a)
{
if (a->frac_hi) {
int shl = clz64(a->frac_hi);
- if (shl) {
- int shr = 64 - shl;
- a->frac_hi = (a->frac_hi << shl) | (a->frac_lo >> shr);
- a->frac_lo = (a->frac_lo << shl);
- }
+ a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
+ a->frac_lo <<= shl;
return shl;
} else if (a->frac_lo) {
int shl = clz64(a->frac_lo);
- a->frac_hi = (a->frac_lo << shl);
+ a->frac_hi = a->frac_lo << shl;
a->frac_lo = 0;
return shl + 64;
}
@@ -975,7 +972,7 @@ static int frac256_normalize(FloatParts256 *a)
{
uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
- int ret, shl, shr;
+ int ret, shl;
if (likely(a0)) {
shl = clz64(a0);
@@ -1005,11 +1002,10 @@ static int frac256_normalize(FloatParts256 *a)
ret += shl;
}
- shr = -shl & 63;
- a0 = (a0 << shl) | (a1 >> shr);
- a1 = (a1 << shl) | (a2 >> shr);
- a2 = (a2 << shl) | (a3 >> shr);
- a3 = (a3 << shl);
+ a0 = shl_double(a0, a1, shl);
+ a1 = shl_double(a1, a2, shl);
+ a2 = shl_double(a2, a3, shl);
+ a3 <<= shl;
done:
a->frac_hi = a0;
@@ -1028,7 +1024,20 @@ static void frac64_shl(FloatParts64 *a, int c)
static void frac128_shl(FloatParts128 *a, int c)
{
- shift128Left(a->frac_hi, a->frac_lo, c, &a->frac_hi, &a->frac_lo);
+ uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
+
+ if (c & 64) {
+ a0 = a1, a1 = 0;
+ }
+
+ c &= 63;
+ if (c) {
+ a0 = shl_double(a0, a1, c);
+ a1 = a1 << c;
+ }
+
+ a->frac_hi = a0;
+ a->frac_lo = a1;
}
#define frac_shl(A, C) FRAC_GENERIC_64_128(shl, A)(A, C)
@@ -1040,19 +1049,68 @@ static void frac64_shr(FloatParts64 *a, int c)
static void frac128_shr(FloatParts128 *a, int c)
{
- shift128Right(a->frac_hi, a->frac_lo, c, &a->frac_hi, &a->frac_lo);
+ uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
+
+ if (c & 64) {
+ a1 = a0, a0 = 0;
+ }
+
+ c &= 63;
+ if (c) {
+ a1 = shr_double(a0, a1, c);
+ a0 = a0 >> c;
+ }
+
+ a->frac_hi = a0;
+ a->frac_lo = a1;
}
#define frac_shr(A, C) FRAC_GENERIC_64_128(shr, A)(A, C)
static void frac64_shrjam(FloatParts64 *a, int c)
{
- shift64RightJamming(a->frac, c, &a->frac);
+ uint64_t a0 = a->frac;
+
+ if (likely(c != 0)) {
+ if (likely(c < 64)) {
+ a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
+ } else {
+ a0 = a0 != 0;
+ }
+ a->frac = a0;
+ }
}
static void frac128_shrjam(FloatParts128 *a, int c)
{
- shift128RightJamming(a->frac_hi, a->frac_lo, c, &a->frac_hi, &a->frac_lo);
+ uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
+ uint64_t sticky = 0;
+
+ if (unlikely(c == 0)) {
+ return;
+ } else if (likely(c < 64)) {
+ /* nothing */
+ } else if (likely(c < 128)) {
+ sticky = a1;
+ a1 = a0;
+ a0 = 0;
+ c &= 63;
+ if (c == 0) {
+ goto done;
+ }
+ } else {
+ sticky = a0 | a1;
+ a0 = a1 = 0;
+ goto done;
+ }
+
+ sticky |= shr_double(a1, 0, c);
+ a1 = shr_double(a0, a1, c);
+ a0 = a0 >> c;
+
+ done:
+ a->frac_lo = a1 | (sticky != 0);
+ a->frac_hi = a0;
}
static void frac256_shrjam(FloatParts256 *a, int c)
@@ -1060,7 +1118,6 @@ static void frac256_shrjam(FloatParts256 *a, int c)
uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
uint64_t sticky = 0;
- int invc;
if (unlikely(c == 0)) {
return;
@@ -1085,12 +1142,11 @@ static void frac256_shrjam(FloatParts256 *a, int c)
goto done;
}
- invc = -c & 63;
- sticky |= a3 << invc;
- a3 = (a3 >> c) | (a2 << invc);
- a2 = (a2 >> c) | (a1 << invc);
- a1 = (a1 >> c) | (a0 << invc);
- a0 = (a0 >> c);
+ sticky |= shr_double(a3, 0, c);
+ a3 = shr_double(a2, a3, c);
+ a2 = shr_double(a1, a2, c);
+ a1 = shr_double(a0, a1, c);
+ a0 = a0 >> c;
done:
a->frac_lo = a3 | (sticky != 0);
--
2.25.1
- Re: [PATCH 34/72] softfloat: Move addsub_floats to softfloat-parts.c.inc, (continued)
- [PATCH 36/72] softfloat: Move mul_floats to softfloat-parts.c.inc, Richard Henderson, 2021/05/07
- [PATCH 38/72] softfloat: Use mulu64 for mul64To128, Richard Henderson, 2021/05/07
- [PATCH 40/72] softfloat: Tidy mul128By64To192, Richard Henderson, 2021/05/07
- [PATCH 43/72] softfloat: Split float_to_float, Richard Henderson, 2021/05/07
- [PATCH 44/72] softfloat: Convert float-to-float conversions with float128, Richard Henderson, 2021/05/07
- [PATCH 41/72] softfloat: Introduce sh[lr]_double primitives,
Richard Henderson <=
- [PATCH 42/72] softfloat: Move div_floats to softfloat-parts.c.inc, Richard Henderson, 2021/05/07
- [PATCH 48/72] softfloat: Move int_to_float to softfloat-parts.c.inc, Richard Henderson, 2021/05/07
- [PATCH 49/72] softfloat: Move uint_to_float to softfloat-parts.c.inc, Richard Henderson, 2021/05/07
- [PATCH 54/72] softfloat: Split out parts_uncanon_normal, Richard Henderson, 2021/05/07
- [PATCH 45/72] softfloat: Move round_to_int to softfloat-parts.c.inc, Richard Henderson, 2021/05/07
- [PATCH 46/72] softfloat: Move rount_to_int_and_pack to softfloat-parts.c.inc, Richard Henderson, 2021/05/07