fastfloat · jaja360 · Jun 26, 2025 · Jun 17, 2025 · Jun 23, 2025 · Jun 23, 2025
diff --git a/.gitignore b/.gitignore
@@ -6,4 +6,5 @@ outputs
 tags
 compile_commands.json
 .cache
+*.log
 **/__pycache__
diff --git a/benchmarks/benchmark.cpp b/benchmarks/benchmark.cpp
@@ -375,7 +375,7 @@ int main(int argc, char **argv) {
     fmt::println("  ./benchmark --file=data/canada.txt      # Run benchmark using numbers from a file");
     fmt::println("  ./benchmark --fixed=10                  # Test fixed-point representation instead of shortest length");
     fmt::println("  ./benchmark --test                      # Test correctness instead of performance");
-    fmt::println("  ./benchmark --volume=1000 --model=uniform # Generate 1000 uniform random numbers");
+    fmt::println("  ./benchmark --volume=1000 --model=uniform_01 # Generate 1000 uniform random numbers in [0, 1]");
     fmt::println("  ./benchmark --algo-filter=ryu,grisu     # Only test algorithms containing 'ryu' or 'grisu'");
     fmt::println("\nFor full options list, run: ./benchmark --help");
     return EXIT_FAILURE;

diff --git a/benchmarks/random_generators.h b/benchmarks/random_generators.h
@@ -30,6 +30,24 @@ struct uniform_generator : float_number_generator<T> {
   T new_float() override { return dis(gen); }
 };
 
+template <typename T>
+struct logspace_generator : float_number_generator<T> {
+  std::random_device rd;
+  std::mt19937_64 gen;
+  std::uniform_int_distribution<int> exp;
+  std::uniform_real_distribution<T> significand;
+  explicit logspace_generator()
+      : rd(), gen(rd()),
+        exp(std::numeric_limits<T>::min_exponent + 1, // +1 skips subnormals
+            std::numeric_limits<T>::max_exponent),
+        significand(-1, 1) {}
+  std::string describe() override {
+    return "Generate random numbers uniformly in log2 space, i.e. "
+           "magnitudes uniformly distributed in the interval [-2^max_exponent, 2^max_exponent]";
+  }
+  T new_float() override { return significand(gen) * std::pow(2.0, exp(gen)); }
+};
+
 enum centering { centered, non_centered };
 template <std::floating_point T, centering C>
 struct centered_generator : float_number_generator<T> {
@@ -112,29 +130,26 @@ struct one_over_rand : float_number_generator<T> {
 };
 
 constexpr std::array<const char*, 8> model_names = {
-  "uniform_01"     , "uniform_all"  , "integer_uniform" ,
-  "centered"       , "non_centered" ,
-  "simple_uniform" , "simple_int"   ,
-  "one_over_rand"
+  "uniform_01"     , "logspace_all"    ,
+  "centered"       , "non_centered"    ,
+  "simple_uniform" , "simple_int"      ,
+  "one_over_rand"  , "integer_uniform" ,
 };
 
 template <typename T>
 inline std::unique_ptr<float_number_generator<T>>
-get_generator_by_name(std::string name) {
+get_generator_by_name(const std::string name) {
   std::cout << "available models (-m): ";
-  for (std::string name : model_names) {
-    std::cout << name << " ";
+  for (const auto& model : model_names) {
+    std::cout << model << " ";
   }
   std::cout << std::endl;
 
   // This is naive, but also not very important.
   if (name == "uniform_01")
     return std::unique_ptr<float_number_generator<T>>(new uniform_generator<T>());
-  if (name == "uniform_all") {
-    return std::unique_ptr<float_number_generator<T>>(
-        new uniform_generator<T>(std::numeric_limits<T>::lowest(),
-                                 std::numeric_limits<T>::max())
-    );
+  if (name == "logspace_all") {
+    return std::unique_ptr<float_number_generator<T>>(new logspace_generator<T>());
   }
   if (name == "centered")
     return std::unique_ptr<float_number_generator<T>>(new centered_generator<T, centered>());

diff --git a/scripts/aws_tests.bash b/scripts/aws_tests.bash
@@ -0,0 +1,234 @@
+#!/bin/bash
+
+# This script launches EC2 instances to benchmark your project.
+#
+# Requirements:
+# - The programs `git`, `ssh`, `rsync`, and `aws` must be installed.
+# - AWS CLI v2 installed and configured (`aws configure`)
+# - An EC2-compatible SSH key must exist in AWS, or the script will generate one (and save locally).
+#
+# Required AWS IAM permissions:
+# - ec2:RunInstances
+# - ec2:TerminateInstances
+# - ec2:DescribeInstances
+# - ec2:DescribeVpcs
+# - ec2:CreateSecurityGroup
+# - ec2:DeleteSecurityGroup
+# - ec2:AuthorizeSecurityGroupIngress
+#
+# Optional environment variables:
+#   AWS_KEY_NAME         use an existing key pair instead of creating one
+#   AWS_SECURITY_GROUP   use an existing SG instead of creating one
+
+set -euo pipefail
+
+# --------------------
+# User-configurable variables
+# --------------------
+
+# Ubuntu 24.04 AMI IDs for x86_64 and aarch64 architectures
+declare -A AMI_MAP=(
+  ["x86_64"]="ami-020cba7c55df1f615"
+  ["aarch64"]="ami-07041441b708acbd6"
+)
+
+# We need biggest (metal) instances to access perf events on x86
+INSTANCES_x86_64=(
+  "c5n.metal"        # Skylake
+  "c6i.metal"        # Ice Lake
+  "c7i.metal-24xl"   # Sapphire Rapids
+  "c5a.24xlarge"     # EPYC Zen 2
+  "c6a.metal"        # EPYC Zen 3
+  "c7a.metal-48xl"   # EPYC Zen 4
+)
+INSTANCES_aarch64=(
+  "c6g.medium"  # Graviton 2 - Neoverse N1
+  "c7g.medium"  # Graviton 3 - Neoverse V1
+  "c8g.medium"  # Graviton 4 - Neoverse V2
+)
+
+VOLUME_SIZE=10 # in GB
+
+KEY_NAME="${AWS_KEY_NAME:-aws_auto}" # Key path is assumed to be ~/.ssh/${KEY_NAME}.pem
+SECURITY_GROUP="${AWS_SECURITY_GROUP:-}"
+
+# --------------------
+# Internal variables (do not modify)
+# --------------------
+
+KEY_PATH="$HOME/.ssh/${KEY_NAME}.pem"
+SSH_COMMAND="ssh -i ${KEY_PATH} -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"
+
+PROJECT_DIR=$(basename "$(git rev-parse --show-toplevel)")
+CREATED_SECURITY_GROUP=""
+
+# Cleanup function to delete created security group on exit
+cleanup() {
+  if [ -n "${CREATED_SECURITY_GROUP}" ]; then
+    echo "Cleaning up security group: ${CREATED_SECURITY_GROUP}"
+    aws ec2 delete-security-group --group-id "${CREATED_SECURITY_GROUP}" || true
+  fi
+}
+
+check_prerequisites() {
+  if ((BASH_VERSINFO[0] < 4)); then
+    echo "Error: This script requires Bash version 4 or higher." >&2
+    exit 1
+  fi
+
+  for cmd in git ssh rsync aws; do
+    if ! command -v "$cmd" >/dev/null 2>&1; then
+      echo "Error: Required command '$cmd' is not installed." >&2
+      exit 1
+    fi
+  done
+
+  if ! git rev-parse --show-toplevel >/dev/null 2>&1; then
+    echo "Error: This script must be run from within a Git repository." >&2
+    exit 1
+  fi
+
+  if ! aws sts get-caller-identity >/dev/null 2>&1; then
+    echo "Error: AWS credentials not configured. Run 'aws configure'." >&2
+    exit 1
+  fi
+}
+
+create_key_pair() {
+  if aws ec2 describe-key-pairs --key-names "${KEY_NAME}" >/dev/null 2>&1; then
+    echo "Using existing AWS key pair: ${KEY_NAME}"
+    return
+  fi
+
+  echo "Creating new AWS key pair named ${KEY_NAME}"
+  mkdir -p ~/.ssh
+  aws ec2 create-key-pair --key-name "$KEY_NAME" \
+    --query 'KeyMaterial' --output text > "$KEY_PATH"
+  chmod 400 "$KEY_PATH"
+  echo "Created and saved key pair private key to $KEY_PATH"
+}
+
+create_security_group() {
+  if [ -n "${SECURITY_GROUP}" ]; then
+    echo "Using existing security group: ${SECURITY_GROUP}"
+    return
+  fi
+
+  echo "Creating a new security group for SSH access..."
+  VPC_ID=$(aws ec2 describe-vpcs \
+          --filters Name=isDefault,Values=true \
+          --query "Vpcs[0].VpcId" \
+          --output text)
+
+  CREATED_SECURITY_GROUP=$(aws ec2 create-security-group \
+          --group-name ssh-public-access \
+          --description "Allow SSH access from anywhere (0.0.0.0/0)" \
+          --vpc-id "${VPC_ID}" \
+          --query "GroupId" \
+          --output text)
+
+  aws ec2 authorize-security-group-ingress \
+    --group-id "${CREATED_SECURITY_GROUP}" \
+    --protocol tcp \
+    --port 22 \
+    --cidr 0.0.0.0/0
+
+  SECURITY_GROUP="${CREATED_SECURITY_GROUP}"
+  echo "Created security group: ${SECURITY_GROUP}"
+}
+
+get_arch() {
+  local instance_name="$1"
+  if printf '%s\n' "${INSTANCES_aarch64[@]}" | grep -qx "$instance_name"; then
+    echo "aarch64"
+  else
+    echo "x86_64"
+  fi
+}
+
+process_instance() {
+  INSTANCE_NAME=$1
+  AMI_ID=$2
+  echo "Running instance for ${INSTANCE_NAME} with AMI ${AMI_ID}"
+
+  INSTANCE_ID=$(aws ec2 run-instances \
+    --image-id ${AMI_ID} \
+    --instance-type ${INSTANCE_NAME} \
+    --key-name ${KEY_NAME} \
+    --block-device-mappings "DeviceName=/dev/sda1,Ebs={VolumeSize=${VOLUME_SIZE}}" \
+    --associate-public-ip-address \
+    --security-group-ids ${SECURITY_GROUP} \
+    --count "1" --query 'Instances[0].InstanceId' --output text)
+
+  echo "Waiting for instance ${INSTANCE_ID} to be ready..."
+  aws ec2 wait instance-status-ok --instance-ids ${INSTANCE_ID}
+  echo "Started instance: ${INSTANCE_ID}"
+
+  PUBLIC_IP=$(aws ec2 describe-instances \
+    --instance-ids ${INSTANCE_ID} \
+    --query "Reservations[0].Instances[0].PublicIpAddress" --output text)
+  echo "Instance ${INSTANCE_ID} public IP: ${PUBLIC_IP}"
+
+  git ls-files -z | rsync -avz --partial --progress --from0 --files-from=- -e "${SSH_COMMAND}" \
+    ./ ubuntu@${PUBLIC_IP}:~/${PROJECT_DIR}
+  ${SSH_COMMAND} ubuntu@${PUBLIC_IP} << EOF
+    set -e # Exit on error
+    cd ~/${PROJECT_DIR}
+
+    echo "Updating and installing dependencies on ${INSTANCE_NAME}..."
+    sudo apt update
+    sudo DEBIAN_FRONTEND=noninteractive apt install -y \
+      linux-tools-common linux-tools-generic g++ clang cmake python3
+
+    # Enable access to perf events for benchmarking
+    # Must use `sudo tee` since shell redirection (`>`) is not affected by sudo
+    echo -1 | sudo tee /proc/sys/kernel/perf_event_paranoid > /dev/null
+
+    echo "Saving some info about the environment..."
+    mkdir -p outputs
+    lscpu > outputs/lscpu.txt
+    g++ --version > outputs/g++.txt
+    clang++ --version > outputs/clang++.txt
+
+    echo "Building project with g++ and running the benchmarks..."
+    CXX=g++ cmake -B build . && cmake --build build
+    ./scripts/generate_multiple_tables.py g++
+
+    rm -rf build
+
+    echo "Building project with clang++ and running the benchmarks..."
+    CXX=clang++ cmake -B build . && cmake --build build
+    ./scripts/generate_multiple_tables.py clang++
+EOF
+
+  echo "Script executed successfully on ${INSTANCE_NAME}"
+  mkdir -p "./outputs/${INSTANCE_NAME}"
+  rsync -avz --partial --progress -e "${SSH_COMMAND}" \
+    ubuntu@${PUBLIC_IP}:~/${PROJECT_DIR}/outputs/ ./outputs/${INSTANCE_NAME}/
+
+  aws ec2 terminate-instances --instance-ids ${INSTANCE_ID}
+  echo "Terminated instance: ${INSTANCE_ID}"
+}
+
+main () {
+  trap cleanup EXIT
+  check_prerequisites
+  create_key_pair
+  create_security_group
+
+  echo "Launching ${#INSTANCES_aarch64[@]} aarch64 instances and ${#INSTANCES_x86_64[@]} x86_64 instances in parallel..."
+  for INSTANCE_NAME in "${INSTANCES_x86_64[@]}" "${INSTANCES_aarch64[@]}"; do
+    ARCH=$(get_arch "$INSTANCE_NAME")
+    AMI_ID="${AMI_MAP[$ARCH]}"
+
+    process_instance "${INSTANCE_NAME}" "${AMI_ID}" 2>&1 | tee "${INSTANCE_NAME}.log" &
+  done
+
+  # Wait for all background jobs to finish
+  wait
+  echo "All instances completed."
+}
+
+if [ "$0" = "$BASH_SOURCE" ] ; then
+  main
+fi
-Original file line number
+Diff line change
@@ Expand Up / @@ -6,4 +6,5 @@ outputs @@
     tags
     compile_commands.json
     .cache
+    *.log
     **/__pycache__