From 1393ef3d71fe1d6ebadebac1f22e928ed299170b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ja=C3=ABl=20Champagne=20Gareau?= Date: Fri, 25 Apr 2025 18:19:50 -0400 Subject: [PATCH 1/5] Remove some redundencies and fix some typos --- benchmarks/algorithms.h | 5 +- benchmarks/benchmark.cpp | 7 ++- benchmarks/benchutil.h | 3 +- benchmarks/exhaustivefloat32.cpp | 70 ++++++---------------------- benchmarks/floatutils.h | 62 +++++++++++++++++++++++++ benchmarks/string_format.h | 1 + benchmarks/thoroughfloat64.cpp | 78 +++++++------------------------- 7 files changed, 98 insertions(+), 128 deletions(-) create mode 100644 benchmarks/floatutils.h diff --git a/benchmarks/algorithms.h b/benchmarks/algorithms.h index 7358aca..c1aac12 100644 --- a/benchmarks/algorithms.h +++ b/benchmarks/algorithms.h @@ -30,6 +30,7 @@ #include "grisu3.h" #include "grisu_exact.h" #include "ieeeToString.h" +#include "floatutils.h" #include "ryu/ryu.h" #include "schubfach_32.h" #include "schubfach_64.h" @@ -67,10 +68,6 @@ enum Algorithm { COUNT // Keep last }; -template -concept arithmetic_float - = std::is_same_v || std::is_same_v; - template struct BenchArgs { using Type = T; diff --git a/benchmarks/benchmark.cpp b/benchmarks/benchmark.cpp index 4c0f497..363176b 100644 --- a/benchmarks/benchmark.cpp +++ b/benchmarks/benchmark.cpp @@ -8,7 +8,6 @@ */ #include "algorithms.h" -#include #define IEEE_8087 #include "benchutil.h" #include "cxxopts.hpp" @@ -22,10 +21,10 @@ #include #include #include +#include #include #include -using Benchmarks::arithmetic_float; using Benchmarks::BenchArgs; bool is_matched(const std::string &str, const std::span filter) { @@ -42,8 +41,8 @@ bool is_matched(const std::string &str, const std::span filter) { template void evaluateProperties(const std::vector &lines, - const std::array, Benchmarks::COUNT> &args, const std::span filter = {}) { - constexpr auto precision = std::numeric_limits::digits10; + const std::array, Benchmarks::COUNT> &args, + const std::span filter = {}) { fmt::println("{:20} {:20}", "Algorithm", "Valid round-trip"); for (const auto &algo : args) { diff --git a/benchmarks/benchutil.h b/benchmarks/benchutil.h index 7ae5ddd..f5fa093 100644 --- a/benchmarks/benchutil.h +++ b/benchmarks/benchutil.h @@ -17,7 +17,7 @@ event_aggregate bench(const function_type &&function, size_t min_repeat = 10, N = 1; } volatile double dontoptimize = 0.0; - // We warmm up first. We warmup for at least 0.4s (by default). This makes + // We warm up first. We warmup for at least 0.4s (by default). This makes // sure that the processor is in a consistent state. event_aggregate warm_aggregate{}; for (size_t i = 0; i < N; i++) { @@ -89,4 +89,5 @@ void pretty_print(const std::vector &lines, const std::string &name, printf("\n"); } } + #endif //// BENCHUTIL_H diff --git a/benchmarks/exhaustivefloat32.cpp b/benchmarks/exhaustivefloat32.cpp index fefe656..b1b2461 100644 --- a/benchmarks/exhaustivefloat32.cpp +++ b/benchmarks/exhaustivefloat32.cpp @@ -12,56 +12,9 @@ #include "algorithms.h" #include "cxxopts.hpp" - -size_t count_significant_digits(std::string_view num_str) { - size_t count = 0; - size_t trailing_zeros = 0; - bool leading_zero = true; - - for (char c : num_str) { - if (c == '.') - continue; - if (c == 'e' || c == 'E') - break; // Stop counting at exponent - if (std::isdigit(static_cast(c))) { - if (c == '0') { - if (!leading_zero) - trailing_zeros++; - continue; - } - leading_zero = false; - count += trailing_zeros + 1; - trailing_zeros = 0; - } - } - - return count; -} - -std::string float_to_hex(float f) { - std::ostringstream oss; - oss << std::hexfloat << f; - return oss.str(); -} - -std::optional parse_float(std::string_view sv) { - float result; - const char* begin = sv.data(); - const char* end = sv.data() + sv.size(); - - auto [ptr, ec] = std::from_chars(begin, end, result); - - // Check if parsing succeeded and consumed the entire string - if (ec == std::errc{} && ptr == end) { - return result; - } - - // Return nullopt if parsing failed or didn't consume all input - return std::nullopt; -} +#include "floatutils.h" void run_exhaustive32(bool errol, const std::vector& algo_filter = {}) { - constexpr auto precision = std::numeric_limits::digits10; fmt::println("{:20} {:20}", "Algorithm", "Valid shortest serialization"); std::array, Benchmarks::COUNT> args; @@ -110,7 +63,7 @@ void run_exhaustive32(bool errol, const std::vector& algo_filter = continue; // Reference output, we cannot use std::to_chars here, because it produces // the shortest representation, which is not necessarily the same as the - // as the representation using the fewest significant digits. + // representation using the fewest significant digits. // So we use dragonbox, which serves as the reference implementation. const size_t vRef = Benchmarks::dragonbox(d, bufRef); const size_t vAlgo = algo.func(d, bufAlgo); @@ -120,12 +73,12 @@ void run_exhaustive32(bool errol, const std::vector& algo_filter = auto countRef = count_significant_digits(svRef); auto countAlgo = count_significant_digits(svAlgo); - auto backRef = parse_float(svRef); - auto backAlgo = parse_float(svAlgo); + auto backRef = parse_float(svRef); + auto backAlgo = parse_float(svAlgo); if(!backRef || !backAlgo) { incorrect = true; - fmt::print(" parse error: d = {}, bufRef = {}, bufAlgo = {}", float_to_hex(d), - svRef, svAlgo); + fmt::print(" parse error: d = {}, bufRef = {}, bufAlgo = {}", + float_to_hex(d), svRef, svAlgo); fflush(stdout); break; } @@ -134,20 +87,23 @@ void run_exhaustive32(bool errol, const std::vector& algo_filter = } if(*backRef != d) { incorrect = true; - fmt::print(" ref mismatch: d = {}, backRef = {}; svRef = {}, svAlgo = {}", float_to_hex(d), *backRef, svRef, svAlgo); + fmt::print(" ref mismatch: d = {}, backRef = {}; svRef = {}, svAlgo = {}", + float_to_hex(d), *backRef, svRef, svAlgo); fflush(stdout); break; } if(*backAlgo != d) { incorrect = true; - fmt::print(" algo mismatch: d = {}, backAlgo = {}; svRef = {}, svAlgo = {}, parsing the output with std::from_chars does not recover the original", float_to_hex(d), *backAlgo, svRef, svAlgo); + fmt::print(" algo mismatch: d = {}, backAlgo = {}; svRef = {}, svAlgo = {}, " + "parsing the output with std::from_chars does not recover the original", + float_to_hex(d), *backAlgo, svRef, svAlgo); fflush(stdout); break; } if (countRef != countAlgo) { incorrect = true; - fmt::print(" mismatch: d = {}, bufRef = {}, bufAlgo = {}", float_to_hex(d), - svRef, svAlgo); + fmt::print(" mismatch: d = {}, bufRef = {}, bufAlgo = {}", + float_to_hex(d), svRef, svAlgo); fflush(stdout); break; } diff --git a/benchmarks/floatutils.h b/benchmarks/floatutils.h new file mode 100644 index 0000000..a279594 --- /dev/null +++ b/benchmarks/floatutils.h @@ -0,0 +1,62 @@ +#ifndef FLOATUTILS_H +#define FLOATUTILS_H + +#include +#include +#include +#include + +template +concept arithmetic_float + = std::is_same_v || std::is_same_v; + +size_t count_significant_digits(std::string_view num_str) { + size_t count = 0; + size_t trailing_zeros = 0; + bool leading_zero = true; + + for (char c : num_str) { + if (c == '.') + continue; + if (c == 'e' || c == 'E') + break; // Stop counting at exponent + if (std::isdigit(static_cast(c))) { + if (c == '0') { + if (!leading_zero) + trailing_zeros++; + continue; + } + leading_zero = false; + count += trailing_zeros + 1; + trailing_zeros = 0; + } + } + + return count; +} + +template +std::string float_to_hex(const T f) { + std::ostringstream oss; + oss << std::hexfloat << f; + return oss.str(); +} + +template +std::optional parse_float(std::string_view sv) { + T result; + const char* begin = sv.data(); + const char* end = sv.data() + sv.size(); + + auto [ptr, ec] = std::from_chars(begin, end, result); + + // Check if parsing succeeded and consumed the entire string + if (ec == std::errc{} && ptr == end) { + return result; + } + + // Return nullopt if parsing failed or didn't consume all input + return std::nullopt; +} + +#endif diff --git a/benchmarks/string_format.h b/benchmarks/string_format.h index 30c133e..4aad000 100644 --- a/benchmarks/string_format.h +++ b/benchmarks/string_format.h @@ -22,6 +22,7 @@ template std::string accurate_to_string(T d) { answer.resize(written); return answer; } + template std::string integer_to_string(T d) { std::stringstream ss; ss << d; diff --git a/benchmarks/thoroughfloat64.cpp b/benchmarks/thoroughfloat64.cpp index 8f9fe0b..827acd0 100644 --- a/benchmarks/thoroughfloat64.cpp +++ b/benchmarks/thoroughfloat64.cpp @@ -14,53 +14,7 @@ #include "algorithms.h" #include "cxxopts.hpp" - -size_t count_significant_digits(std::string_view num_str) { - size_t count = 0; - size_t trailing_zeros = 0; - bool leading_zero = true; - - for (char c : num_str) { - if (c == '.') - continue; - if (c == 'e' || c == 'E') - break; // Stop counting at exponent - if (std::isdigit(static_cast(c))) { - if (c == '0') { - if (!leading_zero) - trailing_zeros++; - continue; - } - leading_zero = false; - count += trailing_zeros + 1; - trailing_zeros = 0; - } - } - - return count; -} - -std::string double_to_hex(double d) { - std::ostringstream oss; - oss << std::hexfloat << d; - return oss.str(); -} - -std::optional parse_double(std::string_view sv) { - double result; - const char* begin = sv.data(); - const char* end = sv.data() + sv.size(); - - auto [ptr, ec] = std::from_chars(begin, end, result); - - // Check if parsing succeeded and consumed the entire string - if (ec == std::errc{} && ptr == end) { - return result; - } - - // Return nullopt if parsing failed or didn't consume all input - return std::nullopt; -} +#include "floatutils.h" struct test_case { double value; @@ -71,19 +25,17 @@ struct test_case { std::vector load_doubles_from_file(const std::string& filename) { std::vector numbers; std::ifstream file(filename); - std::string line; if (!file.is_open()) { fmt::print("Error: Could not open file {}\n", filename); return numbers; } - while (std::getline(file, line)) { - if (auto num = parse_double(line)) { + for (std::string line; std::getline(file, line);) { + if (auto num = parse_float(line)) numbers.emplace_back(*num,line); - } else { + else fmt::print("Warning: Could not parse '{}' as double, skipping\n", line); - } } file.close(); @@ -91,7 +43,6 @@ std::vector load_doubles_from_file(const std::string& filename) { } void run_file_test(const std::string& filename, bool errol, const std::vector& algo_filter = {}) { - constexpr auto precision = std::numeric_limits::digits10; fmt::println("{:20} {:20}", "Algorithm", "Valid shortest serialization"); std::array, Benchmarks::COUNT> args; @@ -155,13 +106,13 @@ void run_file_test(const std::string& filename, bool errol, const std::vector(svRef); + auto backAlgo = parse_float(svAlgo); if(!backRef || !backAlgo) { incorrect = true; - fmt::print(" parse error: case: {}; d = {}, bufRef = {}, bufAlgo = {}", str_value, double_to_hex(d), - svRef, svAlgo); + fmt::print(" parse error: case: {}; d = {}, bufRef = {}, bufAlgo = {}", + str_value, float_to_hex(d), svRef, svAlgo); fflush(stdout); break; } @@ -170,20 +121,23 @@ void run_file_test(const std::string& filename, bool errol, const std::vector(d), *backRef, svRef, svAlgo); fflush(stdout); break; } if(*backAlgo != d) { incorrect = true; - fmt::print(" algo mismatch: case: {}; d = {}, backAlgo = {}; svRef = {}, svAlgo = {}, parsing the output with std::from_chars does not recover the original", str_value, double_to_hex(d), *backAlgo, svRef, svAlgo); + fmt::print(" algo mismatch: case: {}; d = {}, backAlgo = {}; svRef = {}, svAlgo = {}, " + "parsing the output with std::from_chars does not recover the original", + str_value, float_to_hex(d), *backAlgo, svRef, svAlgo); fflush(stdout); break; } if (countRef != countAlgo) { incorrect = true; - fmt::print(" mismatch: case: {}; d = {}, bufRef = {}, bufAlgo = {}", str_value, double_to_hex(d), - svRef, svAlgo); + fmt::print(" mismatch: case: {}; d = {}, bufRef = {}, bufAlgo = {}", + str_value, float_to_hex(d), svRef, svAlgo); fflush(stdout); break; } @@ -222,4 +176,4 @@ int main(int argc, char **argv) { fmt::print("error parsing options: {}\n", e.what()); return EXIT_FAILURE; } -} \ No newline at end of file +} From d832f8ebd5e5990704b94bbfc7a9592cf5b22a7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ja=C3=ABl=20Champagne=20Gareau?= Date: Fri, 25 Apr 2025 19:56:54 -0400 Subject: [PATCH 2/5] Remove redundencies with algo filtering --- benchmarks/benchmark.cpp | 40 ++++++++++++-------------------- benchmarks/benchutil.h | 17 ++++++++++++-- benchmarks/exhaustivefloat32.cpp | 26 +++++++-------------- benchmarks/thoroughfloat64.cpp | 26 +++++++-------------- 4 files changed, 46 insertions(+), 63 deletions(-) diff --git a/benchmarks/benchmark.cpp b/benchmarks/benchmark.cpp index 363176b..0e3780c 100644 --- a/benchmarks/benchmark.cpp +++ b/benchmarks/benchmark.cpp @@ -27,22 +27,10 @@ using Benchmarks::BenchArgs; -bool is_matched(const std::string &str, const std::span filter) { - if (filter.empty()) { - return true; - } - for (const auto &f : filter) { - if (str.find(f) != std::string::npos) { - return true; - } - } - return false; -} - template void evaluateProperties(const std::vector &lines, const std::array, Benchmarks::COUNT> &args, - const std::span filter = {}) { + const std::vector &algo_filter) { fmt::println("{:20} {:20}", "Algorithm", "Valid round-trip"); for (const auto &algo : args) { @@ -50,11 +38,11 @@ void evaluateProperties(const std::vector &lines, fmt::println("# skipping {}", algo.name); continue; } - // Apply filter if provided - if (!is_matched(algo.name, filter)) { + if (algo_filtered_out(algo.name, algo_filter)) { fmt::println("# filtered out {}", algo.name); continue; } + char buf1[100], buf2[100]; std::span bufRef(buf1, sizeof(buf1)), bufAlgo(buf2, sizeof(buf2)); int incorrect = 0; @@ -94,10 +82,11 @@ struct diy_float_t { template void process(const std::vector &lines, - const std::array, Benchmarks::COUNT> &args, const std::span filter = {}) { + const std::array, Benchmarks::COUNT> &args, + const std::vector &algo_filter) { // We have a special algorithm for the string generation: - std::string just_string = "just_string"; - if (is_matched(just_string, filter)) { + if (const std::string just_string = "just_string"; + !algo_filtered_out(just_string, algo_filter)) { std::vector parsed; for(auto d : lines) { auto v = jkj::grisu_exact(d); @@ -114,16 +103,17 @@ void process(const std::vector &lines, } else { fmt::println("# skipping {}", just_string); } + for (const auto &algo : args) { if (!algo.used) { fmt::println("# skipping {}", algo.name); continue; } - // Apply filter if provided - if (!is_matched(algo.name, filter)) { + if (algo_filtered_out(algo.name, algo_filter)) { fmt::println("# filtered out {}", algo.name); continue; } + pretty_print(lines, algo.name, [&algo](const std::vector &lines) -> int { int volume = 0; char buf[100]; @@ -133,14 +123,13 @@ void process(const std::vector &lines, return volume; }, algo.testRepeat); } - } template std::vector fileload(const std::string &filename) { std::ifstream inputfile(filename); if (!inputfile) { - fmt::print(stderr, "can't open {}\n", filename); + fmt::println(stderr, "can't open {}", filename); return {}; } @@ -151,7 +140,7 @@ std::vector fileload(const std::string &filename) { lines.push_back(std::is_same_v ? std::stof(line) : std::stod(line)); } catch (...) { - fmt::print(stderr, "problem with {}\nWe expect floating-point numbers (one per line).\n", line); + fmt::println(stderr, "problem with {}\nWe expect floating-point numbers (one per line).", line); std::abort(); } } @@ -165,7 +154,7 @@ std::vector get_random_numbers(size_t howmany, fmt::println("# parsing random numbers"); std::vector lines; auto g = get_generator_by_name(random_model); - fmt::print("model: {}\nvolume: {} floats\n", g->describe(), howmany); + fmt::println("model: {}\nvolume: {} floats", g->describe(), howmany); lines.reserve(howmany); // let us reserve plenty of memory. for (size_t i = 0; i < howmany; i++) { const T line = g->new_float(); @@ -218,7 +207,8 @@ int main(int argc, char **argv) { numbers = get_random_numbers(volume, model); else numbers = get_random_numbers(volume, model); - fmt::println("# You can also provide a filename (with the -f flag): it should contain one string per line corresponding to a number"); + fmt::println("# You can also provide a filename (with the -f flag):" + "it should contain one string per line corresponding to a number"); } else { if (single) diff --git a/benchmarks/benchutil.h b/benchmarks/benchutil.h index f5fa093..014121f 100644 --- a/benchmarks/benchutil.h +++ b/benchmarks/benchutil.h @@ -1,13 +1,26 @@ #ifndef BENCHUTIL_H #define BENCHUTIL_H -#include "counters/event_counter.h" +#include #include #include -#include +#include "counters/event_counter.h" + event_collector collector; +bool algo_filtered_out(const std::string &algo_name, + const std::vector &algo_filter) { + if (algo_filter.empty()) + return false; + + for (const auto &f : algo_filter) + if (algo_name.find(f) != std::string::npos) + return false; + + return true; +} + template event_aggregate bench(const function_type &&function, size_t min_repeat = 10, size_t min_time_ns = 400'000'000, diff --git a/benchmarks/exhaustivefloat32.cpp b/benchmarks/exhaustivefloat32.cpp index b1b2461..fafb677 100644 --- a/benchmarks/exhaustivefloat32.cpp +++ b/benchmarks/exhaustivefloat32.cpp @@ -13,6 +13,7 @@ #include "algorithms.h" #include "cxxopts.hpp" #include "floatutils.h" +#include "benchutil.h" void run_exhaustive32(bool errol, const std::vector& algo_filter = {}) { fmt::println("{:20} {:20}", "Algorithm", "Valid shortest serialization"); @@ -22,27 +23,16 @@ void run_exhaustive32(bool errol, const std::vector& algo_filter = for (const auto &algo : args) { if (!algo.used) { - fmt::print("# skipping {}\n", algo.name); + fmt::println("# skipping {}", algo.name); continue; } if (algo.func == Benchmarks::dragonbox) { - fmt::print("# skipping {} because it is the reference.\n", algo.name); + fmt::println("# skipping {} because it is the reference.", algo.name); continue; } - - // Apply filter if provided - if (!algo_filter.empty()) { - bool matched = false; - for (const auto &f : algo_filter) { - if (algo.name.find(f) != std::string::npos) { - matched = true; - break; - } - } - if (!matched) { - fmt::print("# filtered out {}\n", algo.name); - continue; - } + if (algo_filtered_out(algo.name, algo_filter)) { + fmt::println("# filtered out {}", algo.name); + continue; } bool incorrect = false; @@ -131,14 +121,14 @@ int main(int argc, char **argv) { const auto result = options.parse(argc, argv); if (result["help"].as()) { - fmt::print("{}\n", options.help()); + fmt::println("{}", options.help()); return EXIT_SUCCESS; } auto algo_filter = result["algorithm"].as>(); run_exhaustive32(result["errol"].as(), algo_filter); } catch (const std::exception &e) { - fmt::print("error parsing options: {}\n", e.what()); + fmt::println("error parsing options: {}", e.what()); return EXIT_FAILURE; } } diff --git a/benchmarks/thoroughfloat64.cpp b/benchmarks/thoroughfloat64.cpp index 827acd0..a335b07 100644 --- a/benchmarks/thoroughfloat64.cpp +++ b/benchmarks/thoroughfloat64.cpp @@ -15,6 +15,7 @@ #include "algorithms.h" #include "cxxopts.hpp" #include "floatutils.h" +#include "benchutil.h" struct test_case { double value; @@ -57,27 +58,16 @@ void run_file_test(const std::string& filename, bool errol, const std::vector) { - fmt::print("# skipping {} because it is the reference.\n", algo.name); + fmt::println("# skipping {} because it is the reference.", algo.name); continue; } - - // Apply filter if provided - if (!algo_filter.empty()) { - bool matched = false; - for (const auto &f : algo_filter) { - if (algo.name.find(f) != std::string::npos) { - matched = true; - break; - } - } - if (!matched) { - fmt::print("# filtered out {}\n", algo.name); - continue; - } + if (algo_filtered_out(algo.name, algo_filter)) { + fmt::println("# filtered out {}", algo.name); + continue; } bool incorrect = false; @@ -168,12 +158,12 @@ int main(int argc, char **argv) { const auto result = options.parse(argc, argv); if (result["help"].as()) { - fmt::print("{}\n", options.help()); + fmt::println("{}", options.help()); return EXIT_SUCCESS; } run_file_test(result["file"].as(), result["errol"].as(), result["algorithm"].as>()); } catch (const std::exception &e) { - fmt::print("error parsing options: {}\n", e.what()); + fmt::println("error parsing options: {}", e.what()); return EXIT_FAILURE; } } From 20f9eb77f198cd208bf32f6321f2eed93e6fa1ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ja=C3=ABl=20Champagne=20Gareau?= Date: Sat, 26 Apr 2025 18:09:50 -0400 Subject: [PATCH 3/5] Unify exhaustive32 and thoroughfloat64 evaluation --- benchmarks/benchutil.h | 111 ++++++++++++++++++++++++++++++ benchmarks/exhaustivefloat32.cpp | 104 ++++------------------------ benchmarks/thoroughfloat64.cpp | 112 +++---------------------------- 3 files changed, 131 insertions(+), 196 deletions(-) diff --git a/benchmarks/benchutil.h b/benchmarks/benchutil.h index 014121f..fdaa184 100644 --- a/benchmarks/benchutil.h +++ b/benchmarks/benchutil.h @@ -1,14 +1,23 @@ #ifndef BENCHUTIL_H #define BENCHUTIL_H +#include + #include #include #include +#include "algorithms.h" #include "counters/event_counter.h" event_collector collector; +template +struct TestCase { + T value; + std::optional str_value; +}; + bool algo_filtered_out(const std::string &algo_name, const std::vector &algo_filter) { if (algo_filter.empty()) @@ -21,6 +30,108 @@ bool algo_filtered_out(const std::string &algo_name, return true; } +template +void evaluate_properties_helper(bool errol, + const std::vector &algo_filter, + Range&& cases) { + fmt::println("{:20} {:20}", "Algorithm", "Valid shortest serialization"); + const auto args = Benchmarks::initArgs(errol); + + // Get number of cases for progress display + uint64_t total = 0; + if constexpr (std::ranges::sized_range) + total = static_cast(std::ranges::size(cases)); + else if constexpr (std::is_same_v) + total = (1ULL << 32); + const uint64_t progress_interval = (total > 0 ? total / 100 : 0); + + for (const auto &algo : args) { + if (!algo.used) { + fmt::println("# skipping {}", algo.name); + continue; + } + if (algo.func == Benchmarks::dragonbox) { + fmt::println("# skipping {} because it is the reference.", algo.name); + continue; + } + if (algo_filtered_out(algo.name, algo_filter)) { + fmt::println("# filtered out {}", algo.name); + continue; + } + + fmt::print("# processing {}", algo.name); + fflush(stdout); + + bool incorrect = false; + char buf1[100], buf2[100]; + std::span bufRef(buf1, sizeof buf1), bufAlgo(buf2, sizeof buf2); + + uint64_t count = 0; + for (const auto &tc : cases) { + if (progress_interval > 0 && (count++ % progress_interval) == 0) { + std::printf("."); + std::fflush(stdout); + } + + const T d = tc.value; + const std::string sv = tc.str_value ? std::format("case: {};", *tc.str_value) : ""; + + if (std::isnan(d) || std::isinf(d)) + continue; + + // Reference output, we cannot use std::to_chars here, because it produces + // the shortest representation, which is not necessarily the same as the + // representation using the fewest significant digits. + // So we use dragonbox, which serves as the reference implementation. + const size_t vRef = Benchmarks::dragonbox(d, bufRef); + const size_t vAlgo = algo.func(d, bufAlgo); + + std::string_view svRef{bufRef.data(), vRef}, + svAlgo{bufAlgo.data(), vAlgo}; + + auto countRef = count_significant_digits(svRef); + auto countAlgo = count_significant_digits(svAlgo); + auto backRef = parse_float(svRef); + auto backAlgo = parse_float(svAlgo); + + if(!backRef || !backAlgo) { + incorrect = true; + fmt::print(" parse error: {} d = {}, ref={}, algo={}", + sv, float_to_hex(d), svRef, svAlgo); + fflush(stdout); + break; + } + if(*backRef != d || *backAlgo != d) + fmt::println("\n# Error: parsing the output with std::from_chars does not bring back the input."); + if(*backRef != d) { + incorrect = true; + fmt::print(" ref mismatch: {} d = {}, backRef = {}; svRef = {}, svAlgo = {}", + sv, float_to_hex(d), *backRef, svRef, svAlgo); + fflush(stdout); + break; + } + if(*backAlgo != d) { + incorrect = true; + fmt::print(" algo mismatch: {} d = {}, backAlgo = {}; svRef = {}, svAlgo = {}, " + "parsing the output with std::from_chars does not recover the original", + sv, float_to_hex(d), *backAlgo, svRef, svAlgo); + fflush(stdout); + break; + } + if (countRef != countAlgo) { + incorrect = true; + fmt::print(" mismatch: {} d = {}, bufRef = {}, bufAlgo = {}", + sv, float_to_hex(d), svRef, svAlgo); + fflush(stdout); + break; + } + } + + fmt::print("\n"); + fmt::println("{:20} {:20}", algo.name, incorrect ? "no" : "yes"); + } +} + template event_aggregate bench(const function_type &&function, size_t min_repeat = 10, size_t min_time_ns = 400'000'000, diff --git a/benchmarks/exhaustivefloat32.cpp b/benchmarks/exhaustivefloat32.cpp index fafb677..0aae7c7 100644 --- a/benchmarks/exhaustivefloat32.cpp +++ b/benchmarks/exhaustivefloat32.cpp @@ -1,13 +1,6 @@ #include -#include -#include #include -#include -#include -#include -#include -#include #include #include "algorithms.h" @@ -15,92 +8,19 @@ #include "floatutils.h" #include "benchutil.h" -void run_exhaustive32(bool errol, const std::vector& algo_filter = {}) { - fmt::println("{:20} {:20}", "Algorithm", "Valid shortest serialization"); - - std::array, Benchmarks::COUNT> args; - args = Benchmarks::initArgs(errol); - - for (const auto &algo : args) { - if (!algo.used) { - fmt::println("# skipping {}", algo.name); - continue; - } - if (algo.func == Benchmarks::dragonbox) { - fmt::println("# skipping {} because it is the reference.", algo.name); - continue; - } - if (algo_filtered_out(algo.name, algo_filter)) { - fmt::println("# filtered out {}", algo.name); - continue; - } +using Benchmarks::BenchArgs; - bool incorrect = false; - char buf1[100], buf2[100]; - std::span bufRef(buf1, sizeof(buf1)), bufAlgo(buf2, sizeof(buf2)); - fmt::print("# processing {}", algo.name); - fflush(stdout); - for (uint64_t i = 0; i < (1ULL << 32); ++i) { - if (i % 0x2000000 == 0) { - printf("."); - fflush(stdout); - } - static_assert(sizeof(float) == sizeof(uint32_t)); - uint32_t i32(i); - float d; - std::memcpy(&d, &i32, sizeof(float)); - if (std::isnan(d) || std::isinf(d)) - continue; - // Reference output, we cannot use std::to_chars here, because it produces - // the shortest representation, which is not necessarily the same as the - // representation using the fewest significant digits. - // So we use dragonbox, which serves as the reference implementation. - const size_t vRef = Benchmarks::dragonbox(d, bufRef); - const size_t vAlgo = algo.func(d, bufAlgo); - - std::string_view svRef{bufRef.data(), vRef}; - std::string_view svAlgo{bufAlgo.data(), vAlgo}; - - auto countRef = count_significant_digits(svRef); - auto countAlgo = count_significant_digits(svAlgo); - auto backRef = parse_float(svRef); - auto backAlgo = parse_float(svAlgo); - if(!backRef || !backAlgo) { - incorrect = true; - fmt::print(" parse error: d = {}, bufRef = {}, bufAlgo = {}", - float_to_hex(d), svRef, svAlgo); - fflush(stdout); - break; - } - if(*backRef != d || *backAlgo != d) { - fmt::println("\n# Error: parsing the output with std::from_chars does not bring back the input."); - } - if(*backRef != d) { - incorrect = true; - fmt::print(" ref mismatch: d = {}, backRef = {}; svRef = {}, svAlgo = {}", - float_to_hex(d), *backRef, svRef, svAlgo); - fflush(stdout); - break; - } - if(*backAlgo != d) { - incorrect = true; - fmt::print(" algo mismatch: d = {}, backAlgo = {}; svRef = {}, svAlgo = {}, " - "parsing the output with std::from_chars does not recover the original", - float_to_hex(d), *backAlgo, svRef, svAlgo); - fflush(stdout); - break; - } - if (countRef != countAlgo) { - incorrect = true; - fmt::print(" mismatch: d = {}, bufRef = {}, bufAlgo = {}", - float_to_hex(d), svRef, svAlgo); - fflush(stdout); - break; - } - } - fmt::print("\n"); - fmt::println("{:20} {:20}", algo.name, incorrect == 0 ? "yes" : "no"); - } +void run_exhaustive32(bool errol, const std::vector& algo_filter = {}) { + static_assert(sizeof(float) == sizeof(uint32_t)); + auto floats_view + = std::views::iota(uint32_t{0}) + | std::views::take(1ULL << 32) + | std::views::transform([](uint32_t i) { + const float d = std::bit_cast(i); + return TestCase{ d, std::nullopt }; + }); + + evaluate_properties_helper(errol, algo_filter, floats_view); } cxxopts::Options diff --git a/benchmarks/thoroughfloat64.cpp b/benchmarks/thoroughfloat64.cpp index a335b07..a417894 100644 --- a/benchmarks/thoroughfloat64.cpp +++ b/benchmarks/thoroughfloat64.cpp @@ -1,14 +1,7 @@ #include -#include -#include #include -#include -#include -#include #include -#include -#include #include #include @@ -17,26 +10,21 @@ #include "floatutils.h" #include "benchutil.h" -struct test_case { - double value; - std::string str_value; -}; - // Helper function to load doubles from a file -std::vector load_doubles_from_file(const std::string& filename) { - std::vector numbers; +std::vector> load_doubles_from_file(const std::string& filename) { + std::vector> numbers; std::ifstream file(filename); if (!file.is_open()) { - fmt::print("Error: Could not open file {}\n", filename); + fmt::println("Error: Could not open file {}", filename); return numbers; } for (std::string line; std::getline(file, line);) { if (auto num = parse_float(line)) - numbers.emplace_back(*num,line); + numbers.emplace_back(*num, line); else - fmt::print("Warning: Could not parse '{}' as double, skipping\n", line); + fmt::println("Warning: Could not parse '{}' as double, skipping", line); } file.close(); @@ -44,97 +32,13 @@ std::vector load_doubles_from_file(const std::string& filename) { } void run_file_test(const std::string& filename, bool errol, const std::vector& algo_filter = {}) { - fmt::println("{:20} {:20}", "Algorithm", "Valid shortest serialization"); - - std::array, Benchmarks::COUNT> args; - args = Benchmarks::initArgs(errol); - - // Load the doubles from file - auto test_values = load_doubles_from_file(filename); + const auto test_values = load_doubles_from_file(filename); if (test_values.empty()) { - fmt::print("No valid numbers to test\n"); + fmt::println("No valid numbers to test"); return; } - for (const auto &algo : args) { - if (!algo.used) { - fmt::println("# skipping {}", algo.name); - continue; - } - if (algo.func == Benchmarks::dragonbox) { - fmt::println("# skipping {} because it is the reference.", algo.name); - continue; - } - if (algo_filtered_out(algo.name, algo_filter)) { - fmt::println("# filtered out {}", algo.name); - continue; - } - - bool incorrect = false; - char buf1[100], buf2[100]; - std::span bufRef(buf1, sizeof(buf1)), bufAlgo(buf2, sizeof(buf2)); - fmt::print("# processing {}", algo.name); - fflush(stdout); - - size_t total = test_values.size(); - for (size_t i = 0; i < total; ++i) { - if (i % (total/10) == 0 && total > 10) { - printf("."); - fflush(stdout); - } - double d = test_values[i].value; - const std::string& str_value = test_values[i].str_value; - if (std::isnan(d) || std::isinf(d)) - continue; - - const size_t vRef = Benchmarks::dragonbox(d, bufRef); - const size_t vAlgo = algo.func(d, bufAlgo); - - std::string_view svRef{bufRef.data(), vRef}; - std::string_view svAlgo{bufAlgo.data(), vAlgo}; - //fmt::print(" RESULT {}: {} ", algo.name, svAlgo); - - auto countRef = count_significant_digits(svRef); - auto countAlgo = count_significant_digits(svAlgo); - auto backRef = parse_float(svRef); - auto backAlgo = parse_float(svAlgo); - - if(!backRef || !backAlgo) { - incorrect = true; - fmt::print(" parse error: case: {}; d = {}, bufRef = {}, bufAlgo = {}", - str_value, float_to_hex(d), svRef, svAlgo); - fflush(stdout); - break; - } - if(*backRef != d || *backAlgo != d) { - fmt::println("\n# Error: parsing the output with std::from_chars does not bring back the input."); - } - if(*backRef != d) { - incorrect = true; - fmt::print(" ref mismatch:case: {}; d = {}, backRef = {}; svRef = {}, svAlgo = {}", - str_value, float_to_hex(d), *backRef, svRef, svAlgo); - fflush(stdout); - break; - } - if(*backAlgo != d) { - incorrect = true; - fmt::print(" algo mismatch: case: {}; d = {}, backAlgo = {}; svRef = {}, svAlgo = {}, " - "parsing the output with std::from_chars does not recover the original", - str_value, float_to_hex(d), *backAlgo, svRef, svAlgo); - fflush(stdout); - break; - } - if (countRef != countAlgo) { - incorrect = true; - fmt::print(" mismatch: case: {}; d = {}, bufRef = {}, bufAlgo = {}", - str_value, float_to_hex(d), svRef, svAlgo); - fflush(stdout); - break; - } - } - fmt::print("\n"); - fmt::println("{:20} {:20}", algo.name, incorrect == 0 ? "yes" : "no"); - } + evaluate_properties_helper(errol, algo_filter, test_values); } cxxopts::Options From 48d28a6df92fd3df95cf9626a70a1975a3625113 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ja=C3=ABl=20Champagne=20Gareau?= Date: Sun, 27 Apr 2025 17:36:08 -0400 Subject: [PATCH 4/5] concepts for Range and unify code further --- benchmarks/algorithms.h | 2 +- benchmarks/benchmark.cpp | 88 ++++++++++---------------------- benchmarks/benchutil.h | 37 ++++++++++---- benchmarks/exhaustivefloat32.cpp | 2 +- benchmarks/thoroughfloat64.cpp | 2 +- 5 files changed, 56 insertions(+), 75 deletions(-) diff --git a/benchmarks/algorithms.h b/benchmarks/algorithms.h index c1aac12..4d8fd8e 100644 --- a/benchmarks/algorithms.h +++ b/benchmarks/algorithms.h @@ -73,7 +73,7 @@ struct BenchArgs { using Type = T; BenchArgs(const std::string& name = {}, int (*func)(T, std::span&) = {}, - bool used = true, unsigned char testRepeat = 100) + bool used = true, size_t testRepeat = 100) : name(name), func(func), used(used), testRepeat(testRepeat) {} std::string name{}; diff --git a/benchmarks/benchmark.cpp b/benchmarks/benchmark.cpp index 0e3780c..6b9c98c 100644 --- a/benchmarks/benchmark.cpp +++ b/benchmarks/benchmark.cpp @@ -28,48 +28,10 @@ using Benchmarks::BenchArgs; template -void evaluateProperties(const std::vector &lines, +void evaluateProperties(const std::vector> &lines, const std::array, Benchmarks::COUNT> &args, const std::vector &algo_filter) { - fmt::println("{:20} {:20}", "Algorithm", "Valid round-trip"); - - for (const auto &algo : args) { - if (!algo.used) { - fmt::println("# skipping {}", algo.name); - continue; - } - if (algo_filtered_out(algo.name, algo_filter)) { - fmt::println("# filtered out {}", algo.name); - continue; - } - - char buf1[100], buf2[100]; - std::span bufRef(buf1, sizeof(buf1)), bufAlgo(buf2, sizeof(buf2)); - int incorrect = 0; - for (const auto d : lines) { - // Reference output - const int vRef = Benchmarks::std_to_chars(d, bufRef); - bufRef[vRef] = '\0'; - T dRef; - // We prefer fast_float::from_chars over std::from_chars because it is more - // likely to be available. - auto [ptr, ec] = fast_float::from_chars(bufRef.data(), bufRef.data() + vRef, dRef); - assert(ptr == bufRef.data() + vRef); - assert(ec == std::errc()); - assert(d == dRef); - // Tested algorithm output - const int vAlgo = algo.func(d, bufAlgo); - bufAlgo[vAlgo] = '\0'; - T dAlgo; - auto [ptrAlgo, ecAlgo] = fast_float::from_chars(bufAlgo.data(), bufAlgo.data() + vAlgo, dAlgo); - assert(ptrAlgo == bufAlgo.data() + vAlgo); - assert(ecAlgo == std::errc()); - if ((incorrect += (d != dAlgo)) == 1) - fmt::println("#\t{:20} mismatch: d = {:.17f}, bufRef = {}, bufAlgo = {}, dAlgo = {:.17f}", - algo.name, d, bufRef.data(), bufAlgo.data(), dAlgo); - } - fmt::println("{:20} {:20}", algo.name, incorrect == 0 ? "yes" : "no"); - } + evaluate_properties_helper(lines, algo_filter, args); } struct diy_float_t { @@ -81,15 +43,15 @@ struct diy_float_t { }; template -void process(const std::vector &lines, +void process(const std::vector> &lines, const std::array, Benchmarks::COUNT> &args, const std::vector &algo_filter) { // We have a special algorithm for the string generation: if (const std::string just_string = "just_string"; !algo_filtered_out(just_string, algo_filter)) { std::vector parsed; - for(auto d : lines) { - auto v = jkj::grisu_exact(d); + for(const auto d : lines) { + const auto v = jkj::grisu_exact(d.value); parsed.emplace_back(v.significand, v.exponent, v.is_negative); } pretty_print(parsed, just_string, [](const std::vector& parsed) -> int { @@ -97,7 +59,7 @@ void process(const std::vector &lines, char buf[100]; std::span bufspan(buf, sizeof(buf)); for (const auto v : parsed) - volume += to_chars(v.significand, v.exponent, v.is_negative, bufspan.data()); + volume += to_chars(v.significand, v.exponent, v.is_negative, bufspan.data()); return volume; }, 100); } else { @@ -114,31 +76,30 @@ void process(const std::vector &lines, continue; } - pretty_print(lines, algo.name, [&algo](const std::vector &lines) -> int { + pretty_print(lines, algo.name, [&algo](const std::vector> &lines) -> int { int volume = 0; char buf[100]; std::span bufspan(buf, sizeof(buf)); for (const auto d : lines) - volume += algo.func(d, bufspan); + volume += algo.func(d.value, bufspan); return volume; }, algo.testRepeat); } } -template -std::vector fileload(const std::string &filename) { +template +std::vector> fileload(const std::string &filename) { std::ifstream inputfile(filename); if (!inputfile) { fmt::println(stderr, "can't open {}", filename); return {}; } - std::vector lines; + std::vector> lines; lines.reserve(10000); // let us reserve plenty of memory. for (std::string line; getline(inputfile, line);) { try { - lines.push_back(std::is_same_v ? std::stof(line) - : std::stod(line)); + lines.emplace_back(std::is_same_v ? std::stof(line) : std::stod(line), line); } catch (...) { fmt::println(stderr, "problem with {}\nWe expect floating-point numbers (one per line).", line); std::abort(); @@ -148,17 +109,17 @@ std::vector fileload(const std::string &filename) { return lines; } -template -std::vector get_random_numbers(size_t howmany, - const std::string &random_model) { +template +std::vector> get_random_numbers(size_t howmany, + const std::string &random_model) { fmt::println("# parsing random numbers"); - std::vector lines; + std::vector> lines; auto g = get_generator_by_name(random_model); fmt::println("model: {}\nvolume: {} floats", g->describe(), howmany); lines.reserve(howmany); // let us reserve plenty of memory. for (size_t i = 0; i < howmany; i++) { const T line = g->new_float(); - lines.push_back(line); + lines.emplace_back(line, std::nullopt); } return lines; } @@ -183,7 +144,7 @@ int main(int argc, char **argv) { ("e,errol", "Enable errol3 (current impl. returns invalid values, e.g., for 0).", cxxopts::value()->default_value("false")) ("a,algo-filter", "Filter algorithms by name substring: you can use multiple filters separated by commas.", - cxxopts::value>()->default_value("")) + cxxopts::value>()) ("r,repeat", "Force a number of repetitions.", cxxopts::value()->default_value("0")) ("h,help", "Print usage."); @@ -195,10 +156,13 @@ int main(int argc, char **argv) { } const size_t repeat = result["repeat"].as(); const bool single = result["single"].as(); - std::vector filter = result["algo-filter"].as>(); + const auto filter = result.count("algo-filter") + ? result["algo-filter"].as>() + : std::vector{}; fmt::println("number type: binary{}", (single ? "32 (float)" : "64 (double)")); - std::variant, std::vector> numbers; + std::variant>, + std::vector>> numbers; const auto filename = result["file"].as(); if (filename.empty()) { const auto volume = result["volume"].as(); @@ -207,7 +171,7 @@ int main(int argc, char **argv) { numbers = get_random_numbers(volume, model); else numbers = get_random_numbers(volume, model); - fmt::println("# You can also provide a filename (with the -f flag):" + fmt::println("# You can also provide a filename (with the -f flag): " "it should contain one string per line corresponding to a number"); } else { @@ -234,8 +198,8 @@ int main(int argc, char **argv) { } const bool test = result["test"].as(); - std::visit([test,&filter](const auto &lines, const auto &args) { - using T1 = typename std::decay_t::value_type; + std::visit([test, &filter](const auto &lines, const auto &args) { + using T1 = typename std::decay_t::value_type::Type; using T2 = typename std::decay_t::value_type::Type; if constexpr (std::is_same_v) { if (test) diff --git a/benchmarks/benchutil.h b/benchmarks/benchutil.h index fdaa184..321d219 100644 --- a/benchmarks/benchutil.h +++ b/benchmarks/benchutil.h @@ -6,17 +6,14 @@ #include #include #include +#include #include "algorithms.h" #include "counters/event_counter.h" -event_collector collector; +using Benchmarks::BenchArgs; -template -struct TestCase { - T value; - std::optional str_value; -}; +event_collector collector; bool algo_filtered_out(const std::string &algo_name, const std::vector &algo_filter) { @@ -30,12 +27,32 @@ bool algo_filtered_out(const std::string &algo_name, return true; } -template -void evaluate_properties_helper(bool errol, +template +struct TestCase { + using Type = T; + T value; + std::optional str_value; +}; + +template +concept TestCaseConcept = arithmetic_float && requires(E e) { + { e.value } -> std::convertible_to; + { e.str_value } -> std::convertible_to>; +}; + +template +concept TestCaseRange + = std::ranges::input_range + && TestCaseConcept, T>; + +template requires TestCaseRange +void evaluate_properties_helper(Range&& cases, const std::vector &algo_filter, - Range&& cases) { + std::variant, Benchmarks::COUNT>, bool> argsOpt) { fmt::println("{:20} {:20}", "Algorithm", "Valid shortest serialization"); - const auto args = Benchmarks::initArgs(errol); + const auto args = std::holds_alternative(argsOpt) + ? Benchmarks::initArgs(std::get(argsOpt)) + : std::get, Benchmarks::COUNT>>(argsOpt); // Get number of cases for progress display uint64_t total = 0; diff --git a/benchmarks/exhaustivefloat32.cpp b/benchmarks/exhaustivefloat32.cpp index 0aae7c7..d284c1d 100644 --- a/benchmarks/exhaustivefloat32.cpp +++ b/benchmarks/exhaustivefloat32.cpp @@ -20,7 +20,7 @@ void run_exhaustive32(bool errol, const std::vector& algo_filter = return TestCase{ d, std::nullopt }; }); - evaluate_properties_helper(errol, algo_filter, floats_view); + evaluate_properties_helper(floats_view, algo_filter, errol); } cxxopts::Options diff --git a/benchmarks/thoroughfloat64.cpp b/benchmarks/thoroughfloat64.cpp index a417894..c5401c4 100644 --- a/benchmarks/thoroughfloat64.cpp +++ b/benchmarks/thoroughfloat64.cpp @@ -38,7 +38,7 @@ void run_file_test(const std::string& filename, bool errol, const std::vector(errol, algo_filter, test_values); + evaluate_properties_helper(test_values, algo_filter, errol); } cxxopts::Options From 0a99da8c77d1726ec94eaa0a757abf35319a1699 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ja=C3=ABl=20Champagne=20Gareau?= Date: Sun, 27 Apr 2025 18:02:48 -0400 Subject: [PATCH 5/5] fix some compile errors on github --- benchmarks/benchutil.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/benchmarks/benchutil.h b/benchmarks/benchutil.h index 321d219..c76e6aa 100644 --- a/benchmarks/benchutil.h +++ b/benchmarks/benchutil.h @@ -7,6 +7,8 @@ #include #include #include +#include +#include #include "algorithms.h" #include "counters/event_counter.h" @@ -91,7 +93,7 @@ void evaluate_properties_helper(Range&& cases, } const T d = tc.value; - const std::string sv = tc.str_value ? std::format("case: {};", *tc.str_value) : ""; + const std::string sv = tc.str_value ? fmt::format("case: {};", *tc.str_value) : ""; if (std::isnan(d) || std::isinf(d)) continue;