From 486cf331d74066101f7c634a89985eefb924138c Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 13 Jun 2025 15:39:21 -0400 Subject: [PATCH 1/5] the -D flags allow us to derive some interesting statistics --- benchmarks/benchmark.cpp | 108 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 107 insertions(+), 1 deletion(-) diff --git a/benchmarks/benchmark.cpp b/benchmarks/benchmark.cpp index dd50715..c512f2f 100644 --- a/benchmarks/benchmark.cpp +++ b/benchmarks/benchmark.cpp @@ -141,6 +141,105 @@ cxxopts::Options options("benchmark", "Compute the parsing speed of different number parsers."); + +// Checks if a floating-point number is exactly representable as the specified integer type +template +bool is_exact_integer(float_type x) { + if (!std::isfinite(x)) { + return false; + } + int_type i = static_cast(x); + return static_cast(i) == x; +} + +// Nouvelle version template de describe +template +void describe(const std::variant>, std::vector>> &numbers, + const std::vector> &args, + const std::vector &algo_filter) { + std::visit([&args, &algo_filter](const auto &lines) { + size_t integers64 = 0; + size_t integers32 = 0; + for (const auto &d : lines) { + integers64 += is_exact_integer(d.value) ? 1 : 0; + integers32 += is_exact_integer(d.value) ? 1 : 0; + } + std::vector sizes(lines.size(), std::numeric_limits::max()); + std::vector shortest(lines.size()); + std::vector> results; + size_t min_size = std::numeric_limits::max(); + for (const auto &algo : args) { + if (!algo.used) continue; + if (algo_filtered_out(algo.name, algo_filter)) continue; + size_t total_size = 0; + std::vector buffer(100); + std::span bufspan(buffer); + bool precise = true; + for(size_t i = 0; i < lines.size(); ++i) { + const auto &d = lines[i]; + int len = algo.func(d.value, bufspan); + if(sizes[i] > len) { + sizes[i] = len; + shortest[i].assign(bufspan.data(), len); + } + total_size += len; + std::string_view sv(buffer.data(), len); + auto parsed = parse_float(sv); + if (!parsed.has_value() || parsed.value() != d.value) { + precise = false; + break; + } + } + double avg = total_size / double(lines.size()); + results.emplace_back(algo.name, total_size, avg, precise); + if (precise && total_size < min_size) min_size = total_size; + } + constexpr size_t warning_max = 1; + for (const auto &algo : args) { + if (!algo.used) continue; + if (algo_filtered_out(algo.name, algo_filter)) continue; + size_t howmany = 0; + std::vector buffer(100); + std::span bufspan(buffer); + size_t worse_than_shortest = 0; + for(size_t i = 0; i < lines.size(); ++i) { + const auto &d = lines[i]; + int len = algo.func(d.value, bufspan); + if(sizes[i] < len) { + howmany++; + bool new_record = (len > worse_than_shortest + sizes[i]); + worse_than_shortest = (std::max)(worse_than_shortest, len - sizes[i]); + if(new_record || howmany <= warning_max) { + fmt::print(stderr, "Warning: algorithm {} produced a longer string ({}) than the shortest ({}) for value {}\n", + algo.name, len, sizes[i], d.value); + fmt::print(stderr, " Shortest: '{}'\n", shortest[i]); + std::string_view this_answer(bufspan.data(), len); + fmt::print(stderr, " Produced: '{}'\n", this_answer); + auto parsed_ref = parse_float(shortest[i]); + auto parsed_this = parse_float(this_answer); + if(!parsed_ref.has_value() || !parsed_this.has_value()) { + fmt::print(stderr, " BUG! Parsing failed for one of the strings.\n"); + } else if (parsed_ref.value() != parsed_this.value()) { + fmt::print(stderr, " BUG! Parsed values differ: {} vs {}\n", + parsed_ref.value(), parsed_this.value()); + } + + } + } + } + if(howmany > warning_max) { + fmt::print(stderr, "Warning: algorithm {} produced longer strings than the shortest for {} values, worst gap is {} characters\n", + algo.name, howmany, worse_than_shortest); + } + } + for (const auto &[name, total_size, avg, precise] : results) { + bool is_min = (precise && total_size == min_size); + fmt::print("{:<18} {:>12} ({:>5.3f} chars/f){}{}\n", name, total_size, avg, is_min ? "[minimal]" : "", precise ? "[precise]" : " [imprecise]"); + } + fmt::println("count: {}, 32-bit ints: {}, 64-bit ints: {}", lines.size(), integers32, integers64); + }, numbers); +} + int main(int argc, char **argv) { try { options.add_options() @@ -148,6 +247,7 @@ int main(int argc, char **argv) { cxxopts::value()->default_value("")) ("F,fixed", "Fixed-point representation.", cxxopts::value()->default_value("0")) + ("D,data", "Description of the data.") ("v,volume", "Volume (number of floats generated).", cxxopts::value()->default_value("100000")) ("m,model", "Random Model.", @@ -205,7 +305,13 @@ int main(int argc, char **argv) { algorithms = initArgs(errol, repeat, fixed_size); else algorithms = initArgs(errol, repeat, fixed_size); - + if (result["data"].as()) { + if (single) + describe(numbers, std::get>>(algorithms), filter); + else + describe(numbers, std::get>>(algorithms), filter); + return EXIT_SUCCESS; + } const bool test = result["test"].as(); const bool string_eval = result["string-eval"].as(); std::visit([test, string_eval, &filter](const auto &lines, const auto &args) { From 10539ffbde9d3fe4618ed758d25f2f9fa93e4a22 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 13 Jun 2025 16:06:08 -0400 Subject: [PATCH 2/5] update --- benchmarks/benchmark.cpp | 49 +++++++++++++++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 6 deletions(-) diff --git a/benchmarks/benchmark.cpp b/benchmarks/benchmark.cpp index c512f2f..85c3a90 100644 --- a/benchmarks/benchmark.cpp +++ b/benchmarks/benchmark.cpp @@ -152,7 +152,7 @@ bool is_exact_integer(float_type x) { return static_cast(i) == x; } -// Nouvelle version template de describe +// New template version of describe template void describe(const std::variant>, std::vector>> &numbers, const std::vector> &args, @@ -166,6 +166,8 @@ void describe(const std::variant>, std::vector sizes(lines.size(), std::numeric_limits::max()); std::vector shortest(lines.size()); + std::vector min_digits(lines.size(), std::numeric_limits::max()); + std::vector min_digits_str(lines.size()); std::vector> results; size_t min_size = std::numeric_limits::max(); for (const auto &algo : args) { @@ -182,6 +184,12 @@ void describe(const std::variant>, std::vector digits) { + min_digits[i] = digits; + min_digits_str[i].assign(bufspan.data(), len); + } total_size += len; std::string_view sv(buffer.data(), len); auto parsed = parse_float(sv); @@ -194,22 +202,25 @@ void describe(const std::variant>, std::vector> algo_results; for (const auto &algo : args) { if (!algo.used) continue; if (algo_filtered_out(algo.name, algo_filter)) continue; size_t howmany = 0; + size_t howmany_digits = 0; std::vector buffer(100); std::span bufspan(buffer); size_t worse_than_shortest = 0; + size_t digits_worse_than_min = 0; for(size_t i = 0; i < lines.size(); ++i) { const auto &d = lines[i]; int len = algo.func(d.value, bufspan); + // Case where the string is longer than the shortest if(sizes[i] < len) { howmany++; bool new_record = (len > worse_than_shortest + sizes[i]); worse_than_shortest = (std::max)(worse_than_shortest, len - sizes[i]); - if(new_record || howmany <= warning_max) { + if(new_record) { fmt::print(stderr, "Warning: algorithm {} produced a longer string ({}) than the shortest ({}) for value {}\n", algo.name, len, sizes[i], d.value); fmt::print(stderr, " Shortest: '{}'\n", shortest[i]); @@ -223,18 +234,44 @@ void describe(const std::variant>, std::vector digits_worse_than_min + sizes[i]); + digits_worse_than_min = (std::max)(digits_worse_than_min, digits - min_digits[i]); + if(new_record) { + fmt::print(stderr, "Warning: algorithm {} produced a string with more significant digits ({}) than the minimum ({}) for value {}\n", + algo.name, digits, min_digits[i], d.value); + fmt::print(stderr, " Min digits: '{}'\n", min_digits_str[i]); + std::string_view this_answer(bufspan.data(), len); + fmt::print(stderr, " Produced: '{}'\n", this_answer); } } } - if(howmany > warning_max) { + if(howmany > 0) { fmt::print(stderr, "Warning: algorithm {} produced longer strings than the shortest for {} values, worst gap is {} characters\n", algo.name, howmany, worse_than_shortest); } + if(howmany_digits > 0) { + fmt::print(stderr, "Warning: algorithm {} produced more significant digits than the minimum for {} values, worst gap is {} digits\n", + algo.name, howmany_digits, digits_worse_than_min); + } + if(howmany > 0 || howmany_digits > 0) { + fmt::println("---"); + } + algo_results[algo.name] = std::make_tuple(howmany == 0, howmany_digits == 0); + } for (const auto &[name, total_size, avg, precise] : results) { - bool is_min = (precise && total_size == min_size); - fmt::print("{:<18} {:>12} ({:>5.3f} chars/f){}{}\n", name, total_size, avg, is_min ? "[minimal]" : "", precise ? "[precise]" : " [imprecise]"); + auto [is_shortest, is_min_digits_algo] = algo_results[name]; + fmt::print("{:<18} {:>12} ({:>5.3f} chars/f) {:<18} {:<12} {:<15}\n", + name, total_size, avg, + is_shortest ? "[minimal string]" : "[non minimal]", + precise ? "[precise]" : "[imprecise]", + is_min_digits_algo ? "[min digits]" : "[non min digits]"); } fmt::println("count: {}, 32-bit ints: {}, 64-bit ints: {}", lines.size(), integers32, integers64); }, numbers); From 3f4ee4e3f31dcbfdf2dad61e3d5b894950d5cfbe Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 13 Jun 2025 20:43:24 -0400 Subject: [PATCH 3/5] saving. --- benchmarks/benchmark.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/benchmarks/benchmark.cpp b/benchmarks/benchmark.cpp index 85c3a90..daab473 100644 --- a/benchmarks/benchmark.cpp +++ b/benchmarks/benchmark.cpp @@ -168,12 +168,14 @@ void describe(const std::variant>, std::vector shortest(lines.size()); std::vector min_digits(lines.size(), std::numeric_limits::max()); std::vector min_digits_str(lines.size()); - std::vector> results; + std::vector> results; size_t min_size = std::numeric_limits::max(); for (const auto &algo : args) { if (!algo.used) continue; if (algo_filtered_out(algo.name, algo_filter)) continue; size_t total_size = 0; + size_t total_digits = 0; + std::vector buffer(100); std::span bufspan(buffer); bool precise = true; @@ -191,6 +193,7 @@ void describe(const std::variant>, std::vector(sv); if (!parsed.has_value() || parsed.value() != d.value) { @@ -199,7 +202,9 @@ void describe(const std::variant>, std::vector> algo_results; @@ -265,10 +270,10 @@ void describe(const std::variant>, std::vector12} ({:>5.3f} chars/f) {:<18} {:<12} {:<15}\n", - name, total_size, avg, + fmt::print("{:<18} {:>12} ({:>5.3f} chars/f) {:>12} ({:>5.3f} d/f) {:<18} {:<12} {:<15}\n", + name, total_size, avg, total_digits, avg_digits, is_shortest ? "[minimal string]" : "[non minimal]", precise ? "[precise]" : "[imprecise]", is_min_digits_algo ? "[min digits]" : "[non min digits]"); From bfb321d65b71b3b67919fba79c97a568e1dcfc9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ja=C3=ABl=20Champagne=20Gareau?= Date: Tue, 17 Jun 2025 18:18:24 -0400 Subject: [PATCH 4/5] uniformize cxxopts syntax with bool args --- .gitignore | 1 + benchmarks/benchmark.cpp | 50 ++++++++++++++++++---------------------- 2 files changed, 24 insertions(+), 27 deletions(-) diff --git a/.gitignore b/.gitignore index 7b86ac3..f08cad8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ build build_debug build_script +.gdb_history outputs tags compile_commands.json diff --git a/benchmarks/benchmark.cpp b/benchmarks/benchmark.cpp index daab473..fe366e5 100644 --- a/benchmarks/benchmark.cpp +++ b/benchmarks/benchmark.cpp @@ -25,6 +25,10 @@ #include #include +cxxopts::Options + options("benchmark", + "Compute the parsing speed of different number parsers."); + template void evaluateProperties(const std::vector> &lines, const std::vector> &args, @@ -137,11 +141,6 @@ std::vector> get_random_numbers(size_t howmany, return lines; } -cxxopts::Options - options("benchmark", - "Compute the parsing speed of different number parsers."); - - // Checks if a floating-point number is exactly representable as the specified integer type template bool is_exact_integer(float_type x) { @@ -154,7 +153,7 @@ bool is_exact_integer(float_type x) { // New template version of describe template -void describe(const std::variant>, std::vector>> &numbers, +void describe(const std::variant>, std::vector>> &numbers, const std::vector> &args, const std::vector &algo_filter) { std::visit([&args, &algo_filter](const auto &lines) { @@ -286,34 +285,30 @@ int main(int argc, char **argv) { try { options.add_options() ("f,file", "File name.", - cxxopts::value()->default_value("")) + cxxopts::value()->default_value("")) ("F,fixed", "Fixed-point representation.", - cxxopts::value()->default_value("0")) - ("D,data", "Description of the data.") + cxxopts::value()->default_value("0")) ("v,volume", "Volume (number of floats generated).", - cxxopts::value()->default_value("100000")) + cxxopts::value()->default_value("100000")) ("m,model", "Random Model.", - cxxopts::value()->default_value("uniform_01")) - ("s,single", "Use single precision instead of double.", - cxxopts::value()->default_value("false")) - ("S,string-eval", "Evaluate perf. of string generation from decimal mantissa/exponent", - cxxopts::value()->default_value("false")) - ("t,test", "Test the algorithms and find their properties.", - cxxopts::value()->default_value("false")) - ("e,errol", "Enable errol3 (current impl. returns invalid values, e.g., for 0).", - cxxopts::value()->default_value("false")) + cxxopts::value()->default_value("uniform_01")) ("a,algo-filter", "Filter algorithms by name substring: you can use multiple filters separated by commas.", - cxxopts::value>()) + cxxopts::value>()) ("r,repeat", "Force a number of repetitions.", - cxxopts::value()->default_value("0")) + cxxopts::value()->default_value("0")) + ("D,data", "Description of the data.") + ("s,single", "Use single precision instead of double.") + ("S,string-eval", "Evaluate perf. of string generation from decimal mantissa/exponent") + ("t,test", "Test the algorithms and find their properties.") + ("e,errol", "Enable errol3 (current impl. returns invalid values, e.g., for 0).") ("h,help", "Print usage."); const auto result = options.parse(argc, argv); - if (result["help"].as()) { + if (result.count("help") > 0) { fmt::print("{}\n", options.help()); return EXIT_SUCCESS; } - const bool single = result["single"].as(); + const bool single = result.count("single") > 0; const auto filter = result.count("algo-filter") ? result["algo-filter"].as>() : std::vector{}; @@ -340,22 +335,23 @@ int main(int argc, char **argv) { } std::variant>, std::vector>> algorithms; - const bool errol = result["errol"].as(); + const bool errol = result.count("errol") > 0; const size_t repeat = result["repeat"].as(); const size_t fixed_size = result["fixed"].as(); if (single) algorithms = initArgs(errol, repeat, fixed_size); else algorithms = initArgs(errol, repeat, fixed_size); - if (result["data"].as()) { + if (result.count("data") > 0) { if (single) describe(numbers, std::get>>(algorithms), filter); else describe(numbers, std::get>>(algorithms), filter); return EXIT_SUCCESS; } - const bool test = result["test"].as(); - const bool string_eval = result["string-eval"].as(); + + const bool test = result.count("test") > 0; + const bool string_eval = result.count("string-eval") > 0; std::visit([test, string_eval, &filter](const auto &lines, const auto &args) { using T1 = typename std::decay_t::value_type::Type; using T2 = typename std::decay_t::value_type::Type; From 8289a699449baf023ae21ffe553f59d1e8d0c467 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ja=C3=ABl=20Champagne=20Gareau?= Date: Tue, 17 Jun 2025 18:18:50 -0400 Subject: [PATCH 5/5] fix bug where a counter was not incremented --- benchmarks/benchmark.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/benchmark.cpp b/benchmarks/benchmark.cpp index fe366e5..9319852 100644 --- a/benchmarks/benchmark.cpp +++ b/benchmarks/benchmark.cpp @@ -243,7 +243,7 @@ void describe(const std::variant>, std::vector digits_worse_than_min + sizes[i]); digits_worse_than_min = (std::max)(digits_worse_than_min, digits - min_digits[i]); if(new_record) {