[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[lmi-commits] [lmi] master bf5d390: Add functions to deduce precision of
From: |
Greg Chicares |
Subject: |
[lmi-commits] [lmi] master bf5d390: Add functions to deduce precision of decimal-formatted numbers |
Date: |
Thu, 15 Dec 2016 01:36:36 +0000 (UTC) |
branch: master
commit bf5d39045ce68d2d518b1063e754a248c323e592
Author: Gregory W. Chicares <address@hidden>
Commit: Gregory W. Chicares <address@hidden>
Add functions to deduce precision of decimal-formatted numbers
---
rate_table.cpp | 85 +++++++++++++++++++++++++++++++++++++++++++++++++--
rate_table.hpp | 8 +++++
rate_table_test.cpp | 35 +++++++++++++++++++++
3 files changed, 126 insertions(+), 2 deletions(-)
diff --git a/rate_table.cpp b/rate_table.cpp
index bbc3a04..04fe6a0 100644
--- a/rate_table.cpp
+++ b/rate_table.cpp
@@ -46,7 +46,6 @@
#include <algorithm> // std::count()
#include <climits> // ULLONG_MAX
-#include <cstddef> // std::size_t
#include <cstdint>
#include <cstdlib> // std::strtoull()
#include <cstring> // std::strncmp()
@@ -60,7 +59,6 @@
#include <sstream>
#include <stdexcept>
#include <utility> // std::make_pair(), std::swap()
-#include <vector>
using std::uint8_t;
using std::uint16_t;
@@ -3154,3 +3152,86 @@ void database::save(std::ostream& index_os,
std::ostream& data_os)
}
} // namespace soa_v3_format
+
+/// Infer the decimal precision of a rounded decimal-formatted number.
+
+std::size_t deduce_number_of_decimals(std::string const& arg)
+{
+ // Early exit: no decimal point means zero decimals.
+ if(std::string::npos == arg.find('.'))
+ {
+ return 0;
+ }
+
+ std::string s(arg);
+ std::size_t d = 0;
+
+ // Strip leading blanks and zeros.
+ std::string::size_type q = s.find_first_not_of(" 0");
+ if(std::string::npos != q)
+ {
+ s.erase(0, q);
+ }
+
+ // Strip trailing blanks.
+ std::string::size_type r = s.find_last_not_of(" ");
+ if(std::string::npos != r)
+ {
+ s.erase(1 + r);
+ }
+
+ // Preliminary result is number of characters after '.'.
+ // (Decrement for '.' unless nothing followed it.)
+ d = s.size() - s.find('.');
+ if(d) --d;
+
+ // Length of stripped string is number of significant digits
+ // (on both sides of the decimal point) plus one for the '.'.
+ // If this total exceeds 15--i.e., if there are more than 14
+ // significant digits--then there may be excess precision.
+ // In that case, keep only the first 15 digits (plus the '.',
+ // for a total of 16 characters), because those digits are
+ // guaranteed to be significant for IEEE754 double precision;
+ // drop the rest, which may include arbitrary digits. Then
+ // drop any trailing string that's all zeros or nines, and
+ // return the length of the remaining string. This wrongly
+ // truncates a number whose representation requires 15 or 16
+ // digits when the last one or more decimal digit is a nine,
+ // but that doesn't matter for the present use case: rate
+ // tables aren't expected to have more than about eight
+ // decimal places; and this function will be called for each
+ // number in a table and the maximum result used, so that
+ // such incorrect truncation can only occur if every number
+ // in the table is ill-conditioned in this way.
+ if(15 < s.size())
+ {
+ s.resize(16);
+ if('0' == s.back() || '9' == s.back())
+ {
+ d = s.find_last_not_of(s.back()) - s.find('.');
+ }
+ }
+
+ return d;
+}
+
+/// Infer the decimal precision of a decimally-rounded vector<double>.
+///
+/// Motivation: Some historical tables were stored only in the binary
+/// format. (Of course, no one wrote that by hand; text input surely
+/// was written first, but was not preserved.) The number of decimals
+/// implicit in the data values may defectively be inconsistent with
+/// the "Number of decimal places" header, and must be deduced. It is
+/// determined here as the greatest number of decimals required for
+/// any value datum, so that converting to text with that precision
+/// is lossless.
+
+std::size_t deduce_number_of_decimals(std::vector<double> const& values)
+{
+ std::size_t z = 0;
+ for(auto v: values)
+ {
+ z = std::max(z, deduce_number_of_decimals(value_cast<std::string>(v)));
+ }
+ return z;
+}
diff --git a/rate_table.hpp b/rate_table.hpp
index 400c890..68fe28d 100644
--- a/rate_table.hpp
+++ b/rate_table.hpp
@@ -29,9 +29,11 @@
#include <boost/filesystem/path.hpp>
+#include <cstddef> // std::size_t
#include <iosfwd>
#include <memory> // std::shared_ptr
#include <string>
+#include <vector>
/// Namespace containing classes working with databases in version 3 of the SOA
/// format.
@@ -195,4 +197,10 @@ inline std::ostream& operator<<(std::ostream& os,
table::Number const& number)
} // namespace soa_v3_format
+// These "deduce" functions do not clearly belong in any namespace.
+// They are declared here to facilitate unit testing.
+
+std::size_t deduce_number_of_decimals(std::string const&);
+std::size_t deduce_number_of_decimals(std::vector<double> const&);
+
#endif // rate_table_hpp
diff --git a/rate_table_test.cpp b/rate_table_test.cpp
index 863f606..5b0d693 100644
--- a/rate_table_test.cpp
+++ b/rate_table_test.cpp
@@ -443,6 +443,40 @@ void test_copy()
do_test_copy(qx_ins_path);
}
+/// Test deduce_number_of_decimals(std::string const&).
+///
+/// The tested function's argument is a string for generality, though
+/// in its intended use that string is always a value returned by
+/// value_cast<std::string>(double). The extra generality makes it
+/// easier to write tests here. Some of the failing tests in comments
+/// indicate improvements needed if a more general version of the
+/// tested function is ever desired for other purposes.
+
+void test_decimal_deduction()
+{
+ // 1 234567890123456
+ BOOST_TEST_EQUAL( 9, deduce_number_of_decimals("0002.123456789000001"));
+ BOOST_TEST_EQUAL( 8, deduce_number_of_decimals("0002.123456789999991"));
+
+ BOOST_TEST_EQUAL( 8, deduce_number_of_decimals("0002.12345678999999 "));
+ BOOST_TEST_EQUAL(13, deduce_number_of_decimals("0002.1234567899999 "));
+
+ BOOST_TEST_EQUAL( 0, deduce_number_of_decimals(" 0.000000000000000"));
+ BOOST_TEST_EQUAL( 0, deduce_number_of_decimals("
0.000000000000000000000000"));
+ // Fails, but value_cast can't return this.
+// BOOST_TEST_EQUAL( 0, deduce_number_of_decimals(" 0.0 "));
+ BOOST_TEST_EQUAL( 0, deduce_number_of_decimals(" 0. "));
+ // Fails, but value_cast can't return this.
+// BOOST_TEST_EQUAL( 0, deduce_number_of_decimals(" .0 "));
+ BOOST_TEST_EQUAL( 0, deduce_number_of_decimals(" . "));
+ BOOST_TEST_EQUAL( 0, deduce_number_of_decimals(" 0 "));
+ BOOST_TEST_EQUAL( 0, deduce_number_of_decimals(" 1 "));
+ BOOST_TEST_EQUAL( 0, deduce_number_of_decimals(" 9 "));
+
+ // 123456789012345678
+ BOOST_TEST_EQUAL( 5, deduce_number_of_decimals("0.012830000000000001"));
+}
+
int test_main(int, char*[])
{
test_database_open();
@@ -454,6 +488,7 @@ int test_main(int, char*[])
test_add_table();
test_delete();
test_copy();
+ test_decimal_deduction();
return EXIT_SUCCESS;
}
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [lmi-commits] [lmi] master bf5d390: Add functions to deduce precision of decimal-formatted numbers,
Greg Chicares <=