lmi-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[lmi-commits] [lmi] master c9c50dc 07/23: Add a specialized midpoint fun


From: Greg Chicares
Subject: [lmi-commits] [lmi] master c9c50dc 07/23: Add a specialized midpoint function for root finding
Date: Tue, 27 Jul 2021 21:59:51 -0400 (EDT)

branch: master
commit c9c50dc8ba091894af2b95e2a1be78610a2e62bf
Author: Gregory W. Chicares <gchicares@sbcglobal.net>
Commit: Gregory W. Chicares <gchicares@sbcglobal.net>

    Add a specialized midpoint function for root finding
---
 zero.hpp      | 114 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 zero_test.cpp |  93 +++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 204 insertions(+), 3 deletions(-)

diff --git a/zero.hpp b/zero.hpp
index dbaab9c..f602b1b 100644
--- a/zero.hpp
+++ b/zero.hpp
@@ -24,14 +24,18 @@
 
 #include "config.hpp"
 
+#include "math_functions.hpp"           // signum()
 #include "null_stream.hpp"
 #include "round_to.hpp"
 
 #include <cfloat>                       // DBL_EPSILON, DECIMAL_DIG
-#include <cmath>                        // fabs(), pow()
+#include <cmath>                        // fabs(), isfinite(), pow()
+#include <cstdint>                      // uint64_t
+#include <cstring>                      // memcpy()
 #include <functional>                   // function(), identity()
 #include <iomanip>                      // setprecision(), setw()
 #include <limits>
+#include <numeric>                      // midpoint()
 #include <ostream>
 #include <utility>                      // forward()
 
@@ -79,6 +83,114 @@ struct root_type
     int           n_iter   {0};
 };
 
+/// Specialized binary64 midpoint for root finding.
+///
+/// A 64-bit double can represent no more than 2^64 distinct values.
+/// Disregarding NaNs, they form (a permutation of) an ordered set,
+/// any of whose members can be found in 64 binary-search steps.
+/// However, bisection using the conventional arithmetic mean takes
+///   log2(DBL_MAX - -DBL_MAX) / DBL_TRUE_MIN
+///   = 1 + 1024 + 1074 = 2099
+/// instead of 64 steps to explore that range fully; and the maximum
+/// for Brent's method is the square of that number.
+///
+/// Consider:
+///   DBL_MAX       7fefffffffffffff
+///   DBL_MAX/2     7fdfffffffffffff
+///   DBL_TRUE_MIN  1000000000000000
+///   0.0           0000000000000000
+///  -DBL_MAX       ffefffffffffffff
+///  -DBL_MAX/2     ffdfffffffffffff
+///  -DBL_TRUE_MIN  8000000000000001
+///  -0.0           8000000000000000
+/// If a root is bounded by [0.0, DBL_MAX], then evaluating the
+/// objective function at the arithmetic mean chooses between two
+/// partitions
+///   [0000000000000000, 7fdfffffffffffff]
+///   [7fdfffffffffffff, 7fefffffffffffff]
+/// the larger of which contains about 99.95% of the elements.
+/// This function instead chooses a pivot that separates half
+/// the elements from the other half.
+///
+/// Precondition: neither argument is an infinity or a NaN;
+/// throw if violated.
+///
+/// The range [0x0, 0xffffffffffffffff] with infinities and NaNs
+/// removed is wellordered with respect to only one of the comparisons
+/// <(double) and <(uint64_t), but it can be split into two subranges
+///   [ DBL_MAX ≡ 0x7fefffffffffffff,  0.0 ≡ 0x0000000000000000]
+///   [-DBL_MAX ≡ 0xffefffffffffffff, -0.0 ≡ 0x8000000000000000]
+/// that are both wellordered, isomorphically, by those comparisons.
+/// Therefore, if the arguments are of opposite sign (both nonzero,
+/// one +, the other -) then return +0.0. This can happen only on the
+/// first iteration.
+///
+/// If both arguments are zero, then return +0.0. This case is not
+/// expected to arise in practice; treating is specially removes the
+/// only violation of the invariant that the result doesn't depend on
+/// the order of the arguments.
+///
+/// Otherwise, calculate and return a binary midpoint. If one argument
+/// is a zero, then first change its signbit, if needed, to match the
+/// other argument's. Finally, interpret both as unsigned integers,
+/// and return their arithmetic mean interpreted as binary64.
+
+inline double binary64_midpoint(double d0, double d1)
+{
+    static_assert(std::numeric_limits<double>::is_iec559);
+
+    using u_dbl_int_t = std::uint64_t;
+    static_assert(sizeof(u_dbl_int_t) == sizeof(double));
+
+    if(!std::isfinite(d0) || !std::isfinite(d1))
+        {throw "binary64_midpoint: non-finite argument";}
+
+    double const s0 = signum(d0);
+    double const s1 = signum(d1);
+    if(-1.0 == s0 * s1)
+        {return 0.0;}
+    else if(0.0 == s0 && 0.0 == s1)
+        {return 0.0;}
+    else if(0.0 == s0)
+        {d0 = std::copysign(d0, d1);}
+    else if(0.0 == s1)
+        {d1 = std::copysign(d1, d0);}
+    else {;} // Do nothing.
+
+    u_dbl_int_t u0;
+    u_dbl_int_t u1;
+    std::memcpy(&u0, &d0, sizeof d0);
+    std::memcpy(&u1, &d1, sizeof d1);
+    u_dbl_int_t um = std::midpoint(u0, u1);
+    double z;
+    std::memcpy(&z, &um, sizeof z);
+#if 0 // Temporarily useful for acceptance testing.
+        std::cout
+            << std::dec
+            << std::defaultfloat
+            << u0 << " u0\n"
+            << u1 << " u1\n"
+            << um << " um\n"
+            << std::hex
+            << u0 << " u0\n"
+            << u1 << " u1\n"
+            << um << " um\n"
+            << d0 << " d0\n"
+            << d1 << " d1\n"
+            << z  << " z\n"
+            << std::hexfloat
+            << d0 << " d0\n"
+            << d1 << " d1\n"
+            << z  << " z\n"
+            << std::dec
+            << std::defaultfloat
+            << std::endl
+            << std::endl
+            ;
+#endif // 0
+    return z;
+}
+
 namespace detail
 {
 using RoundT = std::function<double(double)>;
diff --git a/zero_test.cpp b/zero_test.cpp
index d7ca7fc..1350fed 100644
--- a/zero_test.cpp
+++ b/zero_test.cpp
@@ -60,8 +60,7 @@ double max_err(double zeta, double tol)
 /// static_cast<int> is exact for any number of iterations that
 /// can be counted by an 'int'.
 ///
-/// The greatest possible number of bisection steps (where [x] is
-/// the greatest integer function) is:
+/// The greatest possible number of bisection steps is:
 ///   log2(DBL_MAX - -DBL_MAX) / DBL_TRUE_MIN
 ///   = 1 + 1024 + 1074 = 2099
 /// Yet an IEEE 754 binary64 entity can have no more than 2^64
@@ -375,6 +374,95 @@ void test_fundamentals()
     LMI_TEST_EQUAL(2099, max_iter);
 }
 
+void test_binary64_midpoint()
+{
+    // Make sure double is binary64.
+    static_assert(std::numeric_limits<double>::is_iec559);
+
+    // Make doubly sure it has infinity...
+    static_assert(std::numeric_limits<double>::has_infinity);
+    constexpr double inf = std::numeric_limits<double>::infinity();
+
+    // ...and qNaN.
+    static_assert(std::numeric_limits<double>::has_quiet_NaN);
+    constexpr double qnan = std::numeric_limits<double>::quiet_NaN();
+
+    // Make sure the signs of non-finite values are detected correctly.
+
+    LMI_TEST_EQUAL( 0.0, signum( 0.0));
+    LMI_TEST_EQUAL( 0.0, signum(-0.0));
+
+    LMI_TEST_EQUAL( 1.0, signum( inf));
+    LMI_TEST_EQUAL(-1.0, signum(-inf));
+
+    LMI_TEST_EQUAL( 1.0, signum( qnan));
+    LMI_TEST_EQUAL(-1.0, signum(-qnan));
+
+    // Both zero: return positive zero, regardless of signbit.
+    // Thus, the midpoint of two zeros doesn't depend on the order
+    // in which they're given.
+
+    double const zpp = binary64_midpoint( 0.0,  0.0);
+    double const zpn = binary64_midpoint( 0.0, -0.0);
+    double const znp = binary64_midpoint(-0.0,  0.0);
+    double const znn = binary64_midpoint(-0.0, -0.0);
+
+    LMI_TEST_EQUAL(0.0, zpp);
+    LMI_TEST_EQUAL(0.0, zpn);
+    LMI_TEST_EQUAL(0.0, znp);
+    LMI_TEST_EQUAL(0.0, znn);
+
+    LMI_TEST_EQUAL(false, std::signbit(zpp));
+    LMI_TEST_EQUAL(false, std::signbit(zpn));
+    LMI_TEST_EQUAL(false, std::signbit(znp));
+    LMI_TEST_EQUAL(false, std::signbit(znn));
+
+    // One argument >0, the other <0: return zero.
+
+    LMI_TEST_EQUAL(0.0, binary64_midpoint( 3.1416, -2.718));
+    LMI_TEST_EQUAL(0.0, binary64_midpoint(-3.1416,  2.718));
+
+    // Do not return zero when one argument is zero and the other
+    // has an opposite signbit. Note the "UN" in "UNEQUAL" here.
+
+    LMI_TEST_UNEQUAL(0.0, binary64_midpoint( 3.1416, -0.0)); // "UN"!
+    LMI_TEST_UNEQUAL(0.0, binary64_midpoint(-3.1416,  0.0)); // "UN"!
+
+    // One argument zero, the other nonzero:  binary midpoint, i.e.,
+    //   std::midpoint(*(std::uint64_t)(&x), *(std::uint64_t)(&y))
+    // after forcing the zero to match the other argument's signbit.
+
+    // 0000000000000000 <-> 0.0
+    // 3ff0000000000000 <-> 1.0
+    // 1ff8000000000000 <-> 1.11875e-154 <-> 0x1.8p-512
+    LMI_TEST(materially_equal(1.11875e-154, binary64_midpoint(0.0,  1.00), 
1.0e-5));
+
+    LMI_TEST(materially_equal(5.59376e-155, binary64_midpoint(0.0,  0.25), 
1.0e-5));
+
+    LMI_TEST(materially_equal( 2.65703e-154, binary64_midpoint( 0.0,  6.25), 
1.0e-5));
+    LMI_TEST(materially_equal( 2.65703e-154, binary64_midpoint(-0.0,  6.25), 
1.0e-5));
+    LMI_TEST(materially_equal(-2.65703e-154, binary64_midpoint( 0.0, -6.25), 
1.0e-5));
+    LMI_TEST(materially_equal(-2.65703e-154, binary64_midpoint(-0.0, -6.25), 
1.0e-5));
+
+    // Both arguments nonzero and same sign: binary midpoint, i.e.,
+    //   std::midpoint((std::uint64_t)x, (std::uint64_t)y)
+
+    LMI_TEST(materially_equal( 3.75, binary64_midpoint( 3.0,  5.0)));
+    LMI_TEST(materially_equal(-3.75, binary64_midpoint(-3.0, -5.0)));
+
+    LMI_TEST(materially_equal( 1.00028e3  , binary64_midpoint( 1.0e0  ,  1.0e6 
 ), 1.0e-5));
+
+    LMI_TEST(materially_equal( 1.00223e50 , binary64_midpoint( 1.0e0  ,  
1.0e100), 1.0e-5));
+    LMI_TEST(materially_equal( 1.00894e200, binary64_midpoint( 1.0e100,  
1.0e300), 1.0e-5));
+
+    LMI_TEST(materially_equal( 0.973197   , binary64_midpoint( 1.0e-100, 
1.0e100), 1.0e-5));
+
+    // Identical arguments: return value equals both.
+
+    LMI_TEST_EQUAL( 1.0e100, binary64_midpoint( 1.0e100,  1.0e100));
+    LMI_TEST_EQUAL(-1.0e100, binary64_midpoint(-1.0e100, -1.0e100));
+}
+
 /// A function whose value almost everywhere in (-1.0e100, 1.0e100)
 /// is a "signed" NaN. It's dubious to think of NaNs as possessing
 /// signedness, yet they do have a sign bit.
@@ -906,6 +994,7 @@ void test_former_rounding_problem()
 int test_main(int, char*[])
 {
     test_fundamentals();
+    test_binary64_midpoint();
     test_NaNs();
     test_root_at_a_bound();
     test_biases();



reply via email to

[Prev in Thread] Current Thread [Next in Thread]