[lmi-commits] [lmi] master bf5d390: Add functions to deduce precision of

lmi-commits

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[lmi-commits] [lmi] master bf5d390: Add functions to deduce precision of

From:	Greg Chicares
Subject:	[lmi-commits] [lmi] master bf5d390: Add functions to deduce precision of decimal-formatted numbers
Date:	Thu, 15 Dec 2016 01:36:36 +0000 (UTC)

branch: master
commit bf5d39045ce68d2d518b1063e754a248c323e592
Author: Gregory W. Chicares <address@hidden>
Commit: Gregory W. Chicares <address@hidden>

    Add functions to deduce precision of decimal-formatted numbers
---
 rate_table.cpp      |   85 +++++++++++++++++++++++++++++++++++++++++++++++++--
 rate_table.hpp      |    8 +++++
 rate_table_test.cpp |   35 +++++++++++++++++++++
 3 files changed, 126 insertions(+), 2 deletions(-)

diff --git a/rate_table.cpp b/rate_table.cpp
index bbc3a04..04fe6a0 100644
--- a/rate_table.cpp
+++ b/rate_table.cpp
@@ -46,7 +46,6 @@
 
 #include <algorithm>                    // std::count()
 #include <climits>                      // ULLONG_MAX
-#include <cstddef>                      // std::size_t
 #include <cstdint>
 #include <cstdlib>                      // std::strtoull()
 #include <cstring>                      // std::strncmp()
@@ -60,7 +59,6 @@
 #include <sstream>
 #include <stdexcept>
 #include <utility>                      // std::make_pair(), std::swap()
-#include <vector>
 
 using std::uint8_t;
 using std::uint16_t;
@@ -3154,3 +3152,86 @@ void database::save(std::ostream& index_os, 
std::ostream& data_os)
 }
 
 } // namespace soa_v3_format
+
+/// Infer the decimal precision of a rounded decimal-formatted number.
+
+std::size_t deduce_number_of_decimals(std::string const& arg)
+{
+    // Early exit: no decimal point means zero decimals.
+    if(std::string::npos == arg.find('.'))
+        {
+        return 0;
+        }
+
+    std::string s(arg);
+    std::size_t d = 0;
+
+    // Strip leading blanks and zeros.
+    std::string::size_type q = s.find_first_not_of(" 0");
+    if(std::string::npos != q)
+        {
+        s.erase(0, q);
+        }
+
+    // Strip trailing blanks.
+    std::string::size_type r = s.find_last_not_of(" ");
+    if(std::string::npos != r)
+        {
+        s.erase(1 + r);
+        }
+
+    // Preliminary result is number of characters after '.'.
+    // (Decrement for '.' unless nothing followed it.)
+    d = s.size() - s.find('.');
+    if(d) --d;
+
+    // Length of stripped string is number of significant digits
+    // (on both sides of the decimal point) plus one for the '.'.
+    // If this total exceeds 15--i.e., if there are more than 14
+    // significant digits--then there may be excess precision.
+    // In that case, keep only the first 15 digits (plus the '.',
+    // for a total of 16 characters), because those digits are
+    // guaranteed to be significant for IEEE754 double precision;
+    // drop the rest, which may include arbitrary digits. Then
+    // drop any trailing string that's all zeros or nines, and
+    // return the length of the remaining string. This wrongly
+    // truncates a number whose representation requires 15 or 16
+    // digits when the last one or more decimal digit is a nine,
+    // but that doesn't matter for the present use case: rate
+    // tables aren't expected to have more than about eight
+    // decimal places; and this function will be called for each
+    // number in a table and the maximum result used, so that
+    // such incorrect truncation can only occur if every number
+    // in the table is ill-conditioned in this way.
+    if(15 < s.size())
+        {
+        s.resize(16);
+        if('0' == s.back() || '9' == s.back())
+            {
+            d = s.find_last_not_of(s.back()) - s.find('.');
+            }
+        }
+
+    return d;
+}
+
+/// Infer the decimal precision of a decimally-rounded vector<double>.
+///
+/// Motivation: Some historical tables were stored only in the binary
+/// format. (Of course, no one wrote that by hand; text input surely
+/// was written first, but was not preserved.) The number of decimals
+/// implicit in the data values may defectively be inconsistent with
+/// the "Number of decimal places" header, and must be deduced. It is
+/// determined here as the greatest number of decimals required for
+/// any value datum, so that converting to text with that precision
+/// is lossless.
+
+std::size_t deduce_number_of_decimals(std::vector<double> const& values)
+{
+    std::size_t z = 0;
+    for(auto v: values)
+        {
+        z = std::max(z, deduce_number_of_decimals(value_cast<std::string>(v)));
+        }
+    return z;
+}
diff --git a/rate_table.hpp b/rate_table.hpp
index 400c890..68fe28d 100644
--- a/rate_table.hpp
+++ b/rate_table.hpp
@@ -29,9 +29,11 @@
 
 #include <boost/filesystem/path.hpp>
 
+#include <cstddef>                      // std::size_t
 #include <iosfwd>
 #include <memory>                       // std::shared_ptr
 #include <string>
+#include <vector>
 
 /// Namespace containing classes working with databases in version 3 of the SOA
 /// format.
@@ -195,4 +197,10 @@ inline std::ostream& operator<<(std::ostream& os, 
table::Number const& number)
 
 } // namespace soa_v3_format
 
+// These "deduce" functions do not clearly belong in any namespace.
+// They are declared here to facilitate unit testing.
+
+std::size_t deduce_number_of_decimals(std::string const&);
+std::size_t deduce_number_of_decimals(std::vector<double> const&);
+
 #endif // rate_table_hpp
diff --git a/rate_table_test.cpp b/rate_table_test.cpp
index 863f606..5b0d693 100644
--- a/rate_table_test.cpp
+++ b/rate_table_test.cpp
@@ -443,6 +443,40 @@ void test_copy()
     do_test_copy(qx_ins_path);
 }
 
+/// Test deduce_number_of_decimals(std::string const&).
+///
+/// The tested function's argument is a string for generality, though
+/// in its intended use that string is always a value returned by
+/// value_cast<std::string>(double). The extra generality makes it
+/// easier to write tests here. Some of the failing tests in comments
+/// indicate improvements needed if a more general version of the
+/// tested function is ever desired for other purposes.
+
+void test_decimal_deduction()
+{
+    //                                                 1 234567890123456
+    BOOST_TEST_EQUAL( 9, deduce_number_of_decimals("0002.123456789000001"));
+    BOOST_TEST_EQUAL( 8, deduce_number_of_decimals("0002.123456789999991"));
+
+    BOOST_TEST_EQUAL( 8, deduce_number_of_decimals("0002.12345678999999 "));
+    BOOST_TEST_EQUAL(13, deduce_number_of_decimals("0002.1234567899999  "));
+
+    BOOST_TEST_EQUAL( 0, deduce_number_of_decimals("   0.000000000000000"));
+    BOOST_TEST_EQUAL( 0, deduce_number_of_decimals("   
0.000000000000000000000000"));
+    // Fails, but value_cast can't return this.
+//  BOOST_TEST_EQUAL( 0, deduce_number_of_decimals("   0.0              "));
+    BOOST_TEST_EQUAL( 0, deduce_number_of_decimals("   0.               "));
+    // Fails, but value_cast can't return this.
+//  BOOST_TEST_EQUAL( 0, deduce_number_of_decimals("    .0              "));
+    BOOST_TEST_EQUAL( 0, deduce_number_of_decimals("    .               "));
+    BOOST_TEST_EQUAL( 0, deduce_number_of_decimals("   0                "));
+    BOOST_TEST_EQUAL( 0, deduce_number_of_decimals("   1                "));
+    BOOST_TEST_EQUAL( 0, deduce_number_of_decimals("   9                "));
+
+    //                                                123456789012345678
+    BOOST_TEST_EQUAL( 5, deduce_number_of_decimals("0.012830000000000001"));
+}
+
 int test_main(int, char*[])
 {
     test_database_open();
@@ -454,6 +488,7 @@ int test_main(int, char*[])
     test_add_table();
     test_delete();
     test_copy();
+    test_decimal_deduction();
 
     return EXIT_SUCCESS;
 }

[Prev in Thread]

Current Thread

[Next in Thread]

[lmi-commits] [lmi] master bf5d390: Add functions to deduce precision of decimal-formatted numbers, Greg Chicares <=

Prev by Date: [lmi-commits] [lmi] master 7031fc5 2/3: Revert "Revert "Allow empty comments in rate tables""
Next by Date: [lmi-commits] [lmi] master 82b8907: Detect and repair decimal-precision inconsistencies in rate tables
Previous by thread: [lmi-commits] [lmi] master updated (c89b086 -> bae3d4a)
Next by thread: [lmi-commits] [lmi] master 82b8907: Detect and repair decimal-precision inconsistencies in rate tables
Index(es):
- Date
- Thread