From 4badb067f33f19bbb0e6157a17b735521154aca4 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Thu, 24 Apr 2025 11:00:25 -0400 Subject: [PATCH 1/3] script to generate LaTeX tables from our benchmark outputs --- scripts/README.md | 19 ++++++ scripts/latex_table.py | 128 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 147 insertions(+) create mode 100644 scripts/README.md create mode 100755 scripts/latex_table.py diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 0000000..8762f8a --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,19 @@ +## Creating LaTeX tables + +Prerequisite: You should be able to build and run the C++ benchmark. You need Python 3 on your system. + +Run your benchmark: + +``` +cmake -B build +./build/benchmarks/benchmark -f data/canada.txt > myresults.txt +``` + +Process the raw output: + +``` + ./scripts/latex_table.py myresults.txt +``` + +This will print out to std out the table. The numbers are already rounded to two significant digits, +ready to be included in a scientific manuscript. \ No newline at end of file diff --git a/scripts/latex_table.py b/scripts/latex_table.py new file mode 100755 index 0000000..2b5e9d0 --- /dev/null +++ b/scripts/latex_table.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python3 + +import sys +import re +import argparse + +# Function to format a number to two significant digits +def format_to_two_sig_digits(value): + if not isinstance(value, (int, float)) or value == 0: + return "N/A" + + # Handle negative numbers + is_negative = value < 0 + abs_value = abs(value) + + exponent = 0 + + while abs_value >= 100: + abs_value /= 10 + exponent += 1 + while abs_value < 10: + abs_value *= 10 + exponent -= 1 + + # Round to two significant digits + abs_value = round(abs_value, 0) + + # Format the number + if exponent >= 0 and exponent <= 4: + format = f"{'-' if is_negative else ''}{abs_value*10**exponent}" + format = format.replace(".0", "") + return format + elif exponent < 0 and exponent >= -4: + return f"{'-' if is_negative else ''}{abs_value*10**exponent:.1f}" + else: + return f"{'-' if is_negative else ''}{abs_value:.1f}e{exponent}" + +# Function to parse the raw input data +def parse_input(data): + lines = data.splitlines() + parsed_data = [] + current_entry = None + + for line in lines: + line = line.strip() + print(f"Processing line: {line}") # Debugging output + # Skip empty lines or comments + if not line or line.startswith("#"): + continue + + # Match lines that start a new entry (e.g., "just_string : 1365.92 MB/s ...") + match_entry = re.match(r"(\S+)\s*:\s*[\d.]+\s*MB/s", line) + if match_entry: + print(f"Found new entry: {match_entry.group(1)}") # Debugging output + current_entry = {"name": match_entry.group(1)} + parsed_data.append(current_entry) + if not current_entry: + continue + print(f"reviewing line {line}") # Debugging output + + # Match lines with ns/f + match_ns = re.search(r"([\d.]+)\s*ns/f", line) + if match_ns and current_entry: + print(f"Found ns/f: {match_ns.group(1)}") + current_entry["ns_per_float"] = float(match_ns.group(1)) + + # Match lines with instructions/float (i/f) + match_inst_float = re.search(r"([\d.]+)\s*i/f", line) + if match_inst_float and current_entry: + print(f"Found i/f: {match_inst_float.group(1)}") + current_entry["inst_per_float"] = float(match_inst_float.group(1)) + + # Match lines with instructions/cycle (i/c) + match_inst_cycle = re.search(r"([\d.]+)\s*i/c", line) + if match_inst_cycle and current_entry: + print(f"Found i/c: {match_inst_cycle.group(1)}") + current_entry["inst_per_cycle"] = float(match_inst_cycle.group(1)) + + # Filter out incomplete entries + return parsed_data + +# Function to generate LaTeX table +def generate_latex_table(data): + latex_table = r""" +\begin{tabular}{lccc} +\toprule +\textbf{Name} & \textbf{ns/f} & \textbf{instructions/float} & \textbf{instructions/cycle} \\ +\midrule +""" + + for entry in data: + name = entry["name"].replace("_", "\\_") # Escape underscores for LaTeX + ns_per_float = format_to_two_sig_digits(entry['ns_per_float']) if 'ns_per_float' in entry else 'N/A' + inst_per_float = format_to_two_sig_digits(entry['inst_per_float']) if 'inst_per_float' in entry else 'N/A' + inst_per_cycle = format_to_two_sig_digits(entry['inst_per_cycle']) if 'inst_per_cycle' in entry else 'N/A' + latex_table += f"{name} & {ns_per_float} & {inst_per_float} & {inst_per_cycle} \\\\ \n" + + latex_table += r"""\bottomrule +\end{tabular} +""" + return latex_table + +if __name__ == "f__main__": + print(format_to_two_sig_digits(336.0)) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Generate LaTeX table from performance data") + parser.add_argument("file", nargs="?", help="Optional input file name (if not provided, reads from stdin)") + args = parser.parse_args() + + # Read input data + if args.file: + try: + with open(args.file, "r") as f: + raw_input = f.read() + except FileNotFoundError: + print(f"Error: File '{args.file}' not found.", file=sys.stderr) + sys.exit(1) + except IOError as e: + print(f"Error reading file '{args.file}': {e}", file=sys.stderr) + sys.exit(1) + else: + raw_input = sys.stdin.read() + parsed_data = parse_input(raw_input) + print(f"Parsed data: {parsed_data}") # Debugging output + latex_output = generate_latex_table(parsed_data) + print(latex_output) \ No newline at end of file From db7a4298a78befb40652fb6c199387b8239f9f4e Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Thu, 24 Apr 2025 11:34:22 -0400 Subject: [PATCH 2/3] cleaning --- scripts/latex_table.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/scripts/latex_table.py b/scripts/latex_table.py index 2b5e9d0..d935927 100755 --- a/scripts/latex_table.py +++ b/scripts/latex_table.py @@ -43,7 +43,6 @@ def parse_input(data): for line in lines: line = line.strip() - print(f"Processing line: {line}") # Debugging output # Skip empty lines or comments if not line or line.startswith("#"): continue @@ -100,10 +99,6 @@ def generate_latex_table(data): """ return latex_table -if __name__ == "f__main__": - print(format_to_two_sig_digits(336.0)) - - if __name__ == "__main__": parser = argparse.ArgumentParser(description="Generate LaTeX table from performance data") parser.add_argument("file", nargs="?", help="Optional input file name (if not provided, reads from stdin)") From 96fefa17d336dc4ddde55309510d7b9d22555493 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Thu, 24 Apr 2025 13:44:53 -0400 Subject: [PATCH 3/3] cleaning --- scripts/latex_table.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/scripts/latex_table.py b/scripts/latex_table.py index d935927..ec16017 100755 --- a/scripts/latex_table.py +++ b/scripts/latex_table.py @@ -50,29 +50,23 @@ def parse_input(data): # Match lines that start a new entry (e.g., "just_string : 1365.92 MB/s ...") match_entry = re.match(r"(\S+)\s*:\s*[\d.]+\s*MB/s", line) if match_entry: - print(f"Found new entry: {match_entry.group(1)}") # Debugging output current_entry = {"name": match_entry.group(1)} parsed_data.append(current_entry) if not current_entry: continue - print(f"reviewing line {line}") # Debugging output - # Match lines with ns/f match_ns = re.search(r"([\d.]+)\s*ns/f", line) if match_ns and current_entry: - print(f"Found ns/f: {match_ns.group(1)}") current_entry["ns_per_float"] = float(match_ns.group(1)) # Match lines with instructions/float (i/f) match_inst_float = re.search(r"([\d.]+)\s*i/f", line) if match_inst_float and current_entry: - print(f"Found i/f: {match_inst_float.group(1)}") current_entry["inst_per_float"] = float(match_inst_float.group(1)) # Match lines with instructions/cycle (i/c) match_inst_cycle = re.search(r"([\d.]+)\s*i/c", line) if match_inst_cycle and current_entry: - print(f"Found i/c: {match_inst_cycle.group(1)}") current_entry["inst_per_cycle"] = float(match_inst_cycle.group(1)) # Filter out incomplete entries @@ -118,6 +112,5 @@ def generate_latex_table(data): else: raw_input = sys.stdin.read() parsed_data = parse_input(raw_input) - print(f"Parsed data: {parsed_data}") # Debugging output latex_output = generate_latex_table(parsed_data) print(latex_output) \ No newline at end of file