Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
build
build_debug
build_script
.gdb_history
outputs
tags
compile_commands.json
Expand Down
188 changes: 166 additions & 22 deletions benchmarks/benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@
#include <fast_float/fast_float.h>
#include <fmt/core.h>

cxxopts::Options
options("benchmark",
"Compute the parsing speed of different number parsers.");

template <arithmetic_float T>
void evaluateProperties(const std::vector<TestCase<T>> &lines,
const std::vector<BenchArgs<T>> &args,
Expand Down Expand Up @@ -137,41 +141,174 @@ std::vector<TestCase<T>> get_random_numbers(size_t howmany,
return lines;
}

cxxopts::Options
options("benchmark",
"Compute the parsing speed of different number parsers.");
// Checks if a floating-point number is exactly representable as the specified integer type
template <std::integral int_type, std::floating_point float_type>
bool is_exact_integer(float_type x) {
if (!std::isfinite(x)) {
return false;
}
int_type i = static_cast<int_type>(x);
return static_cast<float_type>(i) == x;
}

// New template version of describe
template <typename T>
void describe(const std::variant<std::vector<TestCase<float>>, std::vector<TestCase<double>>> &numbers,
const std::vector<BenchArgs<T>> &args,
const std::vector<std::string> &algo_filter) {
std::visit([&args, &algo_filter](const auto &lines) {
size_t integers64 = 0;
size_t integers32 = 0;
for (const auto &d : lines) {
integers64 += is_exact_integer<int64_t>(d.value) ? 1 : 0;
integers32 += is_exact_integer<int32_t>(d.value) ? 1 : 0;
}
std::vector<size_t> sizes(lines.size(), std::numeric_limits<size_t>::max());
std::vector<std::string> shortest(lines.size());
std::vector<size_t> min_digits(lines.size(), std::numeric_limits<size_t>::max());
std::vector<std::string> min_digits_str(lines.size());
std::vector<std::tuple<std::string, size_t, double, size_t, double, bool>> results;
size_t min_size = std::numeric_limits<size_t>::max();
for (const auto &algo : args) {
if (!algo.used) continue;
if (algo_filtered_out(algo.name, algo_filter)) continue;
size_t total_size = 0;
size_t total_digits = 0;

std::vector<char> buffer(100);
std::span<char> bufspan(buffer);
bool precise = true;
for(size_t i = 0; i < lines.size(); ++i) {
const auto &d = lines[i];
int len = algo.func(d.value, bufspan);
if(sizes[i] > len) {
sizes[i] = len;
shortest[i].assign(bufspan.data(), len);
}
// Check for minimal number of significant digits
size_t digits = count_significant_digits(std::string_view(bufspan.data(), len));
if (min_digits[i] > digits) {
min_digits[i] = digits;
min_digits_str[i].assign(bufspan.data(), len);
}
total_size += len;
total_digits += digits;
std::string_view sv(buffer.data(), len);
auto parsed = parse_float<T>(sv);
if (!parsed.has_value() || parsed.value() != d.value) {
precise = false;
break;
}
}
double avg = total_size / double(lines.size());
double avg_digits = total_digits / double(lines.size());

results.emplace_back(algo.name, total_size, avg, total_digits, avg_digits, precise);
if (precise && total_size < min_size) min_size = total_size;
}
std::map<std::string, std::tuple<bool, bool>> algo_results;
for (const auto &algo : args) {
if (!algo.used) continue;
if (algo_filtered_out(algo.name, algo_filter)) continue;
size_t howmany = 0;
size_t howmany_digits = 0;
std::vector<char> buffer(100);
std::span<char> bufspan(buffer);
size_t worse_than_shortest = 0;
size_t digits_worse_than_min = 0;
for(size_t i = 0; i < lines.size(); ++i) {
const auto &d = lines[i];
int len = algo.func(d.value, bufspan);
// Case where the string is longer than the shortest
if(sizes[i] < len) {
howmany++;
bool new_record = (len > worse_than_shortest + sizes[i]);
worse_than_shortest = (std::max)(worse_than_shortest, len - sizes[i]);
if(new_record) {
fmt::print(stderr, "Warning: algorithm {} produced a longer string ({}) than the shortest ({}) for value {}\n",
algo.name, len, sizes[i], d.value);
fmt::print(stderr, " Shortest: '{}'\n", shortest[i]);
std::string_view this_answer(bufspan.data(), len);
fmt::print(stderr, " Produced: '{}'\n", this_answer);
auto parsed_ref = parse_float<T>(shortest[i]);
auto parsed_this = parse_float<T>(this_answer);
if(!parsed_ref.has_value() || !parsed_this.has_value()) {
fmt::print(stderr, " BUG! Parsing failed for one of the strings.\n");
} else if (parsed_ref.value() != parsed_this.value()) {
fmt::print(stderr, " BUG! Parsed values differ: {} vs {}\n",
parsed_ref.value(), parsed_this.value());
}
}
}
// Case where the string has more significant digits than the minimum
size_t digits = count_significant_digits(std::string_view(bufspan.data(), len));
if(min_digits[i] < digits) {
howmany_digits++;
bool new_record = (len > digits_worse_than_min + sizes[i]);
digits_worse_than_min = (std::max)(digits_worse_than_min, digits - min_digits[i]);
if(new_record) {
fmt::print(stderr, "Warning: algorithm {} produced a string with more significant digits ({}) than the minimum ({}) for value {}\n",
algo.name, digits, min_digits[i], d.value);
fmt::print(stderr, " Min digits: '{}'\n", min_digits_str[i]);
std::string_view this_answer(bufspan.data(), len);
fmt::print(stderr, " Produced: '{}'\n", this_answer);
}
}
}
if(howmany > 0) {
fmt::print(stderr, "Warning: algorithm {} produced longer strings than the shortest for {} values, worst gap is {} characters\n",
algo.name, howmany, worse_than_shortest);
}
if(howmany_digits > 0) {
fmt::print(stderr, "Warning: algorithm {} produced more significant digits than the minimum for {} values, worst gap is {} digits\n",
algo.name, howmany_digits, digits_worse_than_min);
}
if(howmany > 0 || howmany_digits > 0) {
fmt::println("---");
}
algo_results[algo.name] = std::make_tuple(howmany == 0, howmany_digits == 0);

}
for (const auto &[name, total_size, avg, total_digits, avg_digits, precise] : results) {
auto [is_shortest, is_min_digits_algo] = algo_results[name];
fmt::print("{:<18} {:>12} ({:>5.3f} chars/f) {:>12} ({:>5.3f} d/f) {:<18} {:<12} {:<15}\n",
name, total_size, avg, total_digits, avg_digits,
is_shortest ? "[minimal string]" : "[non minimal]",
precise ? "[precise]" : "[imprecise]",
is_min_digits_algo ? "[min digits]" : "[non min digits]");
}
fmt::println("count: {}, 32-bit ints: {}, 64-bit ints: {}", lines.size(), integers32, integers64);
}, numbers);
}

int main(int argc, char **argv) {
try {
options.add_options()
("f,file", "File name.",
cxxopts::value<std::string>()->default_value(""))
cxxopts::value<std::string>()->default_value(""))
("F,fixed", "Fixed-point representation.",
cxxopts::value<size_t>()->default_value("0"))
cxxopts::value<size_t>()->default_value("0"))
("v,volume", "Volume (number of floats generated).",
cxxopts::value<size_t>()->default_value("100000"))
cxxopts::value<size_t>()->default_value("100000"))
("m,model", "Random Model.",
cxxopts::value<std::string>()->default_value("uniform_01"))
("s,single", "Use single precision instead of double.",
cxxopts::value<bool>()->default_value("false"))
("S,string-eval", "Evaluate perf. of string generation from decimal mantissa/exponent",
cxxopts::value<bool>()->default_value("false"))
("t,test", "Test the algorithms and find their properties.",
cxxopts::value<bool>()->default_value("false"))
("e,errol", "Enable errol3 (current impl. returns invalid values, e.g., for 0).",
cxxopts::value<bool>()->default_value("false"))
cxxopts::value<std::string>()->default_value("uniform_01"))
("a,algo-filter", "Filter algorithms by name substring: you can use multiple filters separated by commas.",
cxxopts::value<std::vector<std::string>>())
cxxopts::value<std::vector<std::string>>())
("r,repeat", "Force a number of repetitions.",
cxxopts::value<size_t>()->default_value("0"))
cxxopts::value<size_t>()->default_value("0"))
("D,data", "Description of the data.")
("s,single", "Use single precision instead of double.")
("S,string-eval", "Evaluate perf. of string generation from decimal mantissa/exponent")
("t,test", "Test the algorithms and find their properties.")
("e,errol", "Enable errol3 (current impl. returns invalid values, e.g., for 0).")
("h,help", "Print usage.");
const auto result = options.parse(argc, argv);

if (result["help"].as<bool>()) {
if (result.count("help") > 0) {
fmt::print("{}\n", options.help());
return EXIT_SUCCESS;
}
const bool single = result["single"].as<bool>();
const bool single = result.count("single") > 0;
const auto filter = result.count("algo-filter")
? result["algo-filter"].as<std::vector<std::string>>()
: std::vector<std::string>{};
Expand All @@ -198,16 +335,23 @@ int main(int argc, char **argv) {
}

std::variant<std::vector<BenchArgs<float>>, std::vector<BenchArgs<double>>> algorithms;
const bool errol = result["errol"].as<bool>();
const bool errol = result.count("errol") > 0;
const size_t repeat = result["repeat"].as<size_t>();
const size_t fixed_size = result["fixed"].as<size_t>();
if (single)
algorithms = initArgs<float>(errol, repeat, fixed_size);
else
algorithms = initArgs<double>(errol, repeat, fixed_size);
if (result.count("data") > 0) {
if (single)
describe<float>(numbers, std::get<std::vector<BenchArgs<float>>>(algorithms), filter);
else
describe<double>(numbers, std::get<std::vector<BenchArgs<double>>>(algorithms), filter);
return EXIT_SUCCESS;
}

const bool test = result["test"].as<bool>();
const bool string_eval = result["string-eval"].as<bool>();
const bool test = result.count("test") > 0;
const bool string_eval = result.count("string-eval") > 0;
std::visit([test, string_eval, &filter](const auto &lines, const auto &args) {
using T1 = typename std::decay_t<decltype(lines)>::value_type::Type;
using T2 = typename std::decay_t<decltype(args)>::value_type::Type;
Expand Down