fastfloat · jaja360 · Jun 17, 2025 · Jun 13, 2025 · Jun 13, 2025 · Jun 14, 2025
diff --git a/.gitignore b/.gitignore
@@ -1,6 +1,7 @@
 build
 build_debug
 build_script
+.gdb_history
 outputs
 tags
 compile_commands.json

diff --git a/benchmarks/benchmark.cpp b/benchmarks/benchmark.cpp
@@ -25,6 +25,10 @@
 #include <fast_float/fast_float.h>
 #include <fmt/core.h>
 
+cxxopts::Options
+    options("benchmark",
+            "Compute the parsing speed of different number parsers.");
+
 template <arithmetic_float T>
 void evaluateProperties(const std::vector<TestCase<T>> &lines,
                         const std::vector<BenchArgs<T>> &args,
@@ -137,41 +141,174 @@ std::vector<TestCase<T>> get_random_numbers(size_t howmany,
   return lines;
 }
 
-cxxopts::Options
-    options("benchmark",
-            "Compute the parsing speed of different number parsers.");
+// Checks if a floating-point number is exactly representable as the specified integer type
+template <std::integral int_type, std::floating_point float_type>
+bool is_exact_integer(float_type x) {
+    if (!std::isfinite(x)) {
+        return false;
+    }
+    int_type i = static_cast<int_type>(x);
+    return static_cast<float_type>(i) == x;
+}
+
+// New template version of describe
+template <typename T>
+void describe(const std::variant<std::vector<TestCase<float>>, std::vector<TestCase<double>>> &numbers,
+             const std::vector<BenchArgs<T>> &args,
+             const std::vector<std::string> &algo_filter) {
+  std::visit([&args, &algo_filter](const auto &lines) {
+    size_t integers64 = 0;
+    size_t integers32 = 0;
+    for (const auto &d : lines) {
+      integers64 += is_exact_integer<int64_t>(d.value) ? 1 : 0;
+      integers32 += is_exact_integer<int32_t>(d.value) ? 1 : 0;
+    }
+    std::vector<size_t> sizes(lines.size(), std::numeric_limits<size_t>::max());
+    std::vector<std::string> shortest(lines.size());
+    std::vector<size_t> min_digits(lines.size(), std::numeric_limits<size_t>::max());
+    std::vector<std::string> min_digits_str(lines.size());
+    std::vector<std::tuple<std::string, size_t, double, size_t, double, bool>> results;
+    size_t min_size = std::numeric_limits<size_t>::max();
+    for (const auto &algo : args) {
+      if (!algo.used) continue;
+      if (algo_filtered_out(algo.name, algo_filter)) continue;
+      size_t total_size = 0;
+      size_t total_digits = 0;
+
+      std::vector<char> buffer(100);
+      std::span<char> bufspan(buffer);
+      bool precise = true;
+      for(size_t i = 0; i < lines.size(); ++i) {
+        const auto &d = lines[i];
+        int len = algo.func(d.value, bufspan);
+        if(sizes[i] > len) {
+          sizes[i] = len;
+          shortest[i].assign(bufspan.data(), len);
+        }
+        // Check for minimal number of significant digits
+        size_t digits = count_significant_digits(std::string_view(bufspan.data(), len));
+        if (min_digits[i] > digits) {
+          min_digits[i] = digits;
+          min_digits_str[i].assign(bufspan.data(), len);
+        }
+        total_size += len;
+        total_digits += digits;
+        std::string_view sv(buffer.data(), len);
+        auto parsed = parse_float<T>(sv);
+        if (!parsed.has_value() || parsed.value() != d.value) {
+          precise = false;
+          break;
+        }
+      }
+      double avg = total_size / double(lines.size());
+      double avg_digits = total_digits / double(lines.size());
+
+      results.emplace_back(algo.name, total_size, avg, total_digits, avg_digits, precise);
+      if (precise && total_size < min_size) min_size = total_size;
+    }
+    std::map<std::string, std::tuple<bool, bool>> algo_results;
+    for (const auto &algo : args) {
+      if (!algo.used) continue;
+      if (algo_filtered_out(algo.name, algo_filter)) continue;
+      size_t howmany = 0;
+      size_t howmany_digits = 0;
+      std::vector<char> buffer(100);
+      std::span<char> bufspan(buffer);
+      size_t worse_than_shortest = 0;
+      size_t digits_worse_than_min = 0;
+      for(size_t i = 0; i < lines.size(); ++i) {
+        const auto &d = lines[i];
+        int len = algo.func(d.value, bufspan);
+        // Case where the string is longer than the shortest
+        if(sizes[i] < len) {
+          howmany++;
+          bool new_record = (len > worse_than_shortest + sizes[i]);
+          worse_than_shortest = (std::max)(worse_than_shortest, len - sizes[i]);
+          if(new_record) {
+            fmt::print(stderr, "Warning: algorithm {} produced a longer string ({}) than the shortest ({}) for value {}\n",
+                       algo.name, len, sizes[i], d.value);
+            fmt::print(stderr, "  Shortest: '{}'\n", shortest[i]);
+            std::string_view this_answer(bufspan.data(), len);
+            fmt::print(stderr, "  Produced: '{}'\n", this_answer);
+            auto parsed_ref = parse_float<T>(shortest[i]);
+            auto parsed_this = parse_float<T>(this_answer);
+            if(!parsed_ref.has_value() || !parsed_this.has_value()) {
+              fmt::print(stderr, "  BUG! Parsing failed for one of the strings.\n");
+            } else if (parsed_ref.value() != parsed_this.value()) {
+              fmt::print(stderr, "  BUG! Parsed values differ: {} vs {}\n",
+                         parsed_ref.value(), parsed_this.value());
+            }
+          }
+        }
+        // Case where the string has more significant digits than the minimum
+        size_t digits = count_significant_digits(std::string_view(bufspan.data(), len));
+        if(min_digits[i] < digits) {
+          howmany_digits++;
+          bool new_record = (len > digits_worse_than_min + sizes[i]);
+          digits_worse_than_min = (std::max)(digits_worse_than_min, digits - min_digits[i]);
+          if(new_record) {
+            fmt::print(stderr, "Warning: algorithm {} produced a string with more significant digits ({}) than the minimum ({}) for value {}\n",
+                       algo.name, digits, min_digits[i], d.value);
+            fmt::print(stderr, "  Min digits: '{}'\n", min_digits_str[i]);
+            std::string_view this_answer(bufspan.data(), len);
+            fmt::print(stderr, "  Produced: '{}'\n", this_answer);
+          }
+        }
+      }
+      if(howmany > 0) {
+        fmt::print(stderr, "Warning: algorithm {} produced longer strings than the shortest for {} values, worst gap is {} characters\n",
+                   algo.name, howmany, worse_than_shortest);
+      }
+      if(howmany_digits > 0) {
+        fmt::print(stderr, "Warning: algorithm {} produced more significant digits than the minimum for {} values, worst gap is {} digits\n",
+                   algo.name, howmany_digits, digits_worse_than_min);
+      }
+      if(howmany > 0 || howmany_digits > 0) {
+        fmt::println("---");
+      }
+      algo_results[algo.name] = std::make_tuple(howmany == 0, howmany_digits == 0);
+
+    }
+    for (const auto &[name, total_size, avg, total_digits, avg_digits, precise] : results) {
+      auto [is_shortest, is_min_digits_algo] = algo_results[name];
+      fmt::print("{:<18} {:>12} ({:>5.3f} chars/f) {:>12} ({:>5.3f} d/f) {:<18} {:<12} {:<15}\n",
+        name, total_size, avg, total_digits, avg_digits,
+        is_shortest ? "[minimal string]" : "[non minimal]",
+        precise ? "[precise]" : "[imprecise]",
+        is_min_digits_algo ? "[min digits]" : "[non min digits]");
+    }
+    fmt::println("count: {}, 32-bit ints: {}, 64-bit ints: {}", lines.size(), integers32, integers64);
+  }, numbers);
+}
 
 int main(int argc, char **argv) {
   try {
     options.add_options()
         ("f,file", "File name.",
-        cxxopts::value<std::string>()->default_value(""))
+         cxxopts::value<std::string>()->default_value(""))
         ("F,fixed", "Fixed-point representation.",
-        cxxopts::value<size_t>()->default_value("0"))
+         cxxopts::value<size_t>()->default_value("0"))
         ("v,volume", "Volume (number of floats generated).",
-        cxxopts::value<size_t>()->default_value("100000"))
+         cxxopts::value<size_t>()->default_value("100000"))
         ("m,model", "Random Model.",
-        cxxopts::value<std::string>()->default_value("uniform_01"))
-        ("s,single", "Use single precision instead of double.",
-        cxxopts::value<bool>()->default_value("false"))
-        ("S,string-eval", "Evaluate perf. of string generation from decimal mantissa/exponent",
-        cxxopts::value<bool>()->default_value("false"))
-        ("t,test", "Test the algorithms and find their properties.",
-        cxxopts::value<bool>()->default_value("false"))
-        ("e,errol", "Enable errol3 (current impl. returns invalid values, e.g., for 0).",
-        cxxopts::value<bool>()->default_value("false"))
+         cxxopts::value<std::string>()->default_value("uniform_01"))
         ("a,algo-filter", "Filter algorithms by name substring: you can use multiple filters separated by commas.",
-        cxxopts::value<std::vector<std::string>>())
+         cxxopts::value<std::vector<std::string>>())
         ("r,repeat", "Force a number of repetitions.",
-        cxxopts::value<size_t>()->default_value("0"))
+         cxxopts::value<size_t>()->default_value("0"))
+        ("D,data", "Description of the data.")
+        ("s,single", "Use single precision instead of double.")
+        ("S,string-eval", "Evaluate perf. of string generation from decimal mantissa/exponent")
+        ("t,test", "Test the algorithms and find their properties.")
+        ("e,errol", "Enable errol3 (current impl. returns invalid values, e.g., for 0).")
         ("h,help", "Print usage.");
     const auto result = options.parse(argc, argv);
 
-    if (result["help"].as<bool>()) {
+    if (result.count("help") > 0) {
       fmt::print("{}\n", options.help());
       return EXIT_SUCCESS;
     }
-    const bool single = result["single"].as<bool>();
+    const bool single = result.count("single") > 0;
     const auto filter = result.count("algo-filter")
                       ? result["algo-filter"].as<std::vector<std::string>>()
                       : std::vector<std::string>{};
@@ -198,16 +335,23 @@ int main(int argc, char **argv) {
     }
 
     std::variant<std::vector<BenchArgs<float>>, std::vector<BenchArgs<double>>> algorithms;
-    const bool errol = result["errol"].as<bool>();
+    const bool errol = result.count("errol") > 0;
     const size_t repeat = result["repeat"].as<size_t>();
     const size_t fixed_size = result["fixed"].as<size_t>();
     if (single)
       algorithms = initArgs<float>(errol, repeat, fixed_size);
     else
       algorithms = initArgs<double>(errol, repeat, fixed_size);
+    if (result.count("data") > 0) {
+      if (single)
+        describe<float>(numbers, std::get<std::vector<BenchArgs<float>>>(algorithms), filter);
+      else
+        describe<double>(numbers, std::get<std::vector<BenchArgs<double>>>(algorithms), filter);
+      return EXIT_SUCCESS;
+    }
 
-    const bool test = result["test"].as<bool>();
-    const bool string_eval = result["string-eval"].as<bool>();
+    const bool test = result.count("test") > 0;
+    const bool string_eval = result.count("string-eval") > 0;
     std::visit([test, string_eval, &filter](const auto &lines, const auto &args) {
       using T1 = typename std::decay_t<decltype(lines)>::value_type::Type;
       using T2 = typename std::decay_t<decltype(args)>::value_type::Type;