diff --git a/.gitignore b/.gitignore index 7b86ac3..f08cad8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ build build_debug build_script +.gdb_history outputs tags compile_commands.json diff --git a/benchmarks/benchmark.cpp b/benchmarks/benchmark.cpp index dd50715..9319852 100644 --- a/benchmarks/benchmark.cpp +++ b/benchmarks/benchmark.cpp @@ -25,6 +25,10 @@ #include #include +cxxopts::Options + options("benchmark", + "Compute the parsing speed of different number parsers."); + template void evaluateProperties(const std::vector> &lines, const std::vector> &args, @@ -137,41 +141,174 @@ std::vector> get_random_numbers(size_t howmany, return lines; } -cxxopts::Options - options("benchmark", - "Compute the parsing speed of different number parsers."); +// Checks if a floating-point number is exactly representable as the specified integer type +template +bool is_exact_integer(float_type x) { + if (!std::isfinite(x)) { + return false; + } + int_type i = static_cast(x); + return static_cast(i) == x; +} + +// New template version of describe +template +void describe(const std::variant>, std::vector>> &numbers, + const std::vector> &args, + const std::vector &algo_filter) { + std::visit([&args, &algo_filter](const auto &lines) { + size_t integers64 = 0; + size_t integers32 = 0; + for (const auto &d : lines) { + integers64 += is_exact_integer(d.value) ? 1 : 0; + integers32 += is_exact_integer(d.value) ? 1 : 0; + } + std::vector sizes(lines.size(), std::numeric_limits::max()); + std::vector shortest(lines.size()); + std::vector min_digits(lines.size(), std::numeric_limits::max()); + std::vector min_digits_str(lines.size()); + std::vector> results; + size_t min_size = std::numeric_limits::max(); + for (const auto &algo : args) { + if (!algo.used) continue; + if (algo_filtered_out(algo.name, algo_filter)) continue; + size_t total_size = 0; + size_t total_digits = 0; + + std::vector buffer(100); + std::span bufspan(buffer); + bool precise = true; + for(size_t i = 0; i < lines.size(); ++i) { + const auto &d = lines[i]; + int len = algo.func(d.value, bufspan); + if(sizes[i] > len) { + sizes[i] = len; + shortest[i].assign(bufspan.data(), len); + } + // Check for minimal number of significant digits + size_t digits = count_significant_digits(std::string_view(bufspan.data(), len)); + if (min_digits[i] > digits) { + min_digits[i] = digits; + min_digits_str[i].assign(bufspan.data(), len); + } + total_size += len; + total_digits += digits; + std::string_view sv(buffer.data(), len); + auto parsed = parse_float(sv); + if (!parsed.has_value() || parsed.value() != d.value) { + precise = false; + break; + } + } + double avg = total_size / double(lines.size()); + double avg_digits = total_digits / double(lines.size()); + + results.emplace_back(algo.name, total_size, avg, total_digits, avg_digits, precise); + if (precise && total_size < min_size) min_size = total_size; + } + std::map> algo_results; + for (const auto &algo : args) { + if (!algo.used) continue; + if (algo_filtered_out(algo.name, algo_filter)) continue; + size_t howmany = 0; + size_t howmany_digits = 0; + std::vector buffer(100); + std::span bufspan(buffer); + size_t worse_than_shortest = 0; + size_t digits_worse_than_min = 0; + for(size_t i = 0; i < lines.size(); ++i) { + const auto &d = lines[i]; + int len = algo.func(d.value, bufspan); + // Case where the string is longer than the shortest + if(sizes[i] < len) { + howmany++; + bool new_record = (len > worse_than_shortest + sizes[i]); + worse_than_shortest = (std::max)(worse_than_shortest, len - sizes[i]); + if(new_record) { + fmt::print(stderr, "Warning: algorithm {} produced a longer string ({}) than the shortest ({}) for value {}\n", + algo.name, len, sizes[i], d.value); + fmt::print(stderr, " Shortest: '{}'\n", shortest[i]); + std::string_view this_answer(bufspan.data(), len); + fmt::print(stderr, " Produced: '{}'\n", this_answer); + auto parsed_ref = parse_float(shortest[i]); + auto parsed_this = parse_float(this_answer); + if(!parsed_ref.has_value() || !parsed_this.has_value()) { + fmt::print(stderr, " BUG! Parsing failed for one of the strings.\n"); + } else if (parsed_ref.value() != parsed_this.value()) { + fmt::print(stderr, " BUG! Parsed values differ: {} vs {}\n", + parsed_ref.value(), parsed_this.value()); + } + } + } + // Case where the string has more significant digits than the minimum + size_t digits = count_significant_digits(std::string_view(bufspan.data(), len)); + if(min_digits[i] < digits) { + howmany_digits++; + bool new_record = (len > digits_worse_than_min + sizes[i]); + digits_worse_than_min = (std::max)(digits_worse_than_min, digits - min_digits[i]); + if(new_record) { + fmt::print(stderr, "Warning: algorithm {} produced a string with more significant digits ({}) than the minimum ({}) for value {}\n", + algo.name, digits, min_digits[i], d.value); + fmt::print(stderr, " Min digits: '{}'\n", min_digits_str[i]); + std::string_view this_answer(bufspan.data(), len); + fmt::print(stderr, " Produced: '{}'\n", this_answer); + } + } + } + if(howmany > 0) { + fmt::print(stderr, "Warning: algorithm {} produced longer strings than the shortest for {} values, worst gap is {} characters\n", + algo.name, howmany, worse_than_shortest); + } + if(howmany_digits > 0) { + fmt::print(stderr, "Warning: algorithm {} produced more significant digits than the minimum for {} values, worst gap is {} digits\n", + algo.name, howmany_digits, digits_worse_than_min); + } + if(howmany > 0 || howmany_digits > 0) { + fmt::println("---"); + } + algo_results[algo.name] = std::make_tuple(howmany == 0, howmany_digits == 0); + + } + for (const auto &[name, total_size, avg, total_digits, avg_digits, precise] : results) { + auto [is_shortest, is_min_digits_algo] = algo_results[name]; + fmt::print("{:<18} {:>12} ({:>5.3f} chars/f) {:>12} ({:>5.3f} d/f) {:<18} {:<12} {:<15}\n", + name, total_size, avg, total_digits, avg_digits, + is_shortest ? "[minimal string]" : "[non minimal]", + precise ? "[precise]" : "[imprecise]", + is_min_digits_algo ? "[min digits]" : "[non min digits]"); + } + fmt::println("count: {}, 32-bit ints: {}, 64-bit ints: {}", lines.size(), integers32, integers64); + }, numbers); +} int main(int argc, char **argv) { try { options.add_options() ("f,file", "File name.", - cxxopts::value()->default_value("")) + cxxopts::value()->default_value("")) ("F,fixed", "Fixed-point representation.", - cxxopts::value()->default_value("0")) + cxxopts::value()->default_value("0")) ("v,volume", "Volume (number of floats generated).", - cxxopts::value()->default_value("100000")) + cxxopts::value()->default_value("100000")) ("m,model", "Random Model.", - cxxopts::value()->default_value("uniform_01")) - ("s,single", "Use single precision instead of double.", - cxxopts::value()->default_value("false")) - ("S,string-eval", "Evaluate perf. of string generation from decimal mantissa/exponent", - cxxopts::value()->default_value("false")) - ("t,test", "Test the algorithms and find their properties.", - cxxopts::value()->default_value("false")) - ("e,errol", "Enable errol3 (current impl. returns invalid values, e.g., for 0).", - cxxopts::value()->default_value("false")) + cxxopts::value()->default_value("uniform_01")) ("a,algo-filter", "Filter algorithms by name substring: you can use multiple filters separated by commas.", - cxxopts::value>()) + cxxopts::value>()) ("r,repeat", "Force a number of repetitions.", - cxxopts::value()->default_value("0")) + cxxopts::value()->default_value("0")) + ("D,data", "Description of the data.") + ("s,single", "Use single precision instead of double.") + ("S,string-eval", "Evaluate perf. of string generation from decimal mantissa/exponent") + ("t,test", "Test the algorithms and find their properties.") + ("e,errol", "Enable errol3 (current impl. returns invalid values, e.g., for 0).") ("h,help", "Print usage."); const auto result = options.parse(argc, argv); - if (result["help"].as()) { + if (result.count("help") > 0) { fmt::print("{}\n", options.help()); return EXIT_SUCCESS; } - const bool single = result["single"].as(); + const bool single = result.count("single") > 0; const auto filter = result.count("algo-filter") ? result["algo-filter"].as>() : std::vector{}; @@ -198,16 +335,23 @@ int main(int argc, char **argv) { } std::variant>, std::vector>> algorithms; - const bool errol = result["errol"].as(); + const bool errol = result.count("errol") > 0; const size_t repeat = result["repeat"].as(); const size_t fixed_size = result["fixed"].as(); if (single) algorithms = initArgs(errol, repeat, fixed_size); else algorithms = initArgs(errol, repeat, fixed_size); + if (result.count("data") > 0) { + if (single) + describe(numbers, std::get>>(algorithms), filter); + else + describe(numbers, std::get>>(algorithms), filter); + return EXIT_SUCCESS; + } - const bool test = result["test"].as(); - const bool string_eval = result["string-eval"].as(); + const bool test = result.count("test") > 0; + const bool string_eval = result.count("string-eval") > 0; std::visit([test, string_eval, &filter](const auto &lines, const auto &args) { using T1 = typename std::decay_t::value_type::Type; using T2 = typename std::decay_t::value_type::Type;