From c107b6bb84f68d4d9bf8dca604f86fbdc7a8e88c Mon Sep 17 00:00:00 2001
From: Santiago Palladino <santiago@aztecprotocol.com>
Date: Tue, 14 Jan 2025 14:09:23 -0300
Subject: [PATCH] chore: Block building benchmark via github-action-benchmark
 (#11202)

Deletes old benchmarks, along with the benchmark-related scripts and
types.

Adds a single benchmark for block building, with a stubbed
`TelemetryClient` that collects all datapoints in memory, and then
flushes a set of specified metrics into the custom format expected by
[github-action-benchmark](https://github.com/benchmark-action/github-action-benchmark),
which we're currently using for bb.

Benchmarks get published to
https://aztecprotocol.github.io/aztec-packages/dev/e2e-bench/

Fixes #11154
---
 .github/workflows/ci.yml                      |  72 ++--
 scripts/logs/Earthfile                        |  20 --
 scripts/logs/check_logs_for_benchmark.sh      |  25 --
 .../logs/download_base_benchmark_from_s3.sh   |  35 --
 scripts/logs/download_logs_from_s3.sh         |  36 --
 .../upload_aggregated_benchmarks_to_s3.sh     |  37 --
 scripts/logs/upload_logs_to_s3.sh             |  33 --
 .../circuit-types/src/stats/benchmarks.ts     |  12 -
 yarn-project/circuit-types/src/stats/index.ts |  17 -
 .../circuit-types/src/stats/metrics.ts        | 322 -----------------
 yarn-project/end-to-end/scripts/e2e_test.sh   |   4 +
 .../end-to-end/scripts/e2e_test_config.yml    |  18 +-
 .../src/bench/bench_build_block.test.ts       |  42 +++
 .../src/{benchmarks => bench}/utils.ts        | 101 +++++-
 .../end-to-end/src/benchmarks/README.md       |  23 --
 .../benchmarks/bench_process_history.test.ts  |  94 -----
 .../src/benchmarks/bench_prover.test.ts       | 259 --------------
 .../benchmarks/bench_publish_rollup.test.ts   |  61 ----
 .../src/benchmarks/bench_tx_size_fees.test.ts | 128 -------
 yarn-project/end-to-end/src/fixtures/utils.ts |  37 +-
 .../foundation/src/collection/array.test.ts   |  58 +++-
 .../foundation/src/collection/array.ts        |  31 ++
 yarn-project/scripts/Earthfile                |  59 ----
 yarn-project/scripts/package.json             |   3 -
 .../scripts/src/benchmarks/aggregate.ts       | 323 ------------------
 .../scripts/src/benchmarks/markdown.ts        | 302 ----------------
 yarn-project/scripts/src/benchmarks/paths.ts  |  13 -
 .../scripts/src/bin/bench-aggregate.ts        |   7 -
 yarn-project/scripts/src/bin/bench-comment.ts |   9 -
 .../scripts/src/bin/bench-markdown.ts         |  10 -
 yarn-project/scripts/src/types.ts             |   1 -
 yarn-project/scripts/src/utils/comment.ts     |   6 +-
 .../sequencer-client/src/sequencer/metrics.ts |  12 +-
 .../src/sequencer/sequencer.ts                |  15 +-
 yarn-project/telemetry-client/package.json    |   1 +
 yarn-project/telemetry-client/src/bench.ts    | 153 +++++++++
 yarn-project/telemetry-client/src/metrics.ts  |   1 +
 yarn-project/telemetry-client/src/noop.ts     |   2 +-
 yarn-project/yarn.lock                        |   3 -
 39 files changed, 452 insertions(+), 1933 deletions(-)
 delete mode 100644 scripts/logs/Earthfile
 delete mode 100755 scripts/logs/check_logs_for_benchmark.sh
 delete mode 100755 scripts/logs/download_base_benchmark_from_s3.sh
 delete mode 100755 scripts/logs/download_logs_from_s3.sh
 delete mode 100755 scripts/logs/upload_aggregated_benchmarks_to_s3.sh
 delete mode 100755 scripts/logs/upload_logs_to_s3.sh
 delete mode 100644 yarn-project/circuit-types/src/stats/benchmarks.ts
 delete mode 100644 yarn-project/circuit-types/src/stats/metrics.ts
 create mode 100644 yarn-project/end-to-end/src/bench/bench_build_block.test.ts
 rename yarn-project/end-to-end/src/{benchmarks => bench}/utils.ts (51%)
 delete mode 100644 yarn-project/end-to-end/src/benchmarks/README.md
 delete mode 100644 yarn-project/end-to-end/src/benchmarks/bench_process_history.test.ts
 delete mode 100644 yarn-project/end-to-end/src/benchmarks/bench_prover.test.ts
 delete mode 100644 yarn-project/end-to-end/src/benchmarks/bench_publish_rollup.test.ts
 delete mode 100644 yarn-project/end-to-end/src/benchmarks/bench_tx_size_fees.test.ts
 delete mode 100644 yarn-project/scripts/Earthfile
 delete mode 100644 yarn-project/scripts/src/benchmarks/aggregate.ts
 delete mode 100644 yarn-project/scripts/src/benchmarks/markdown.ts
 delete mode 100644 yarn-project/scripts/src/benchmarks/paths.ts
 delete mode 100644 yarn-project/scripts/src/bin/bench-aggregate.ts
 delete mode 100644 yarn-project/scripts/src/bin/bench-comment.ts
 delete mode 100644 yarn-project/scripts/src/bin/bench-markdown.ts
 create mode 100644 yarn-project/telemetry-client/src/bench.ts

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 746dcf7fbc7..78de49afe55 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -239,7 +239,7 @@ jobs:
             ci3/cache_upload_flag "$artifact"
           fi
 
-  # all the benchmarking end-to-end integration tests for aztec (not required to merge)
+  # All the benchmarking end-to-end integration tests for aztec (not required to merge)
   bench-e2e:
     needs: [images-e2e, configure]
     if: needs.configure.outputs.e2e-all == 'true' || needs.configure.outputs.bench-list != '[]'
@@ -260,15 +260,28 @@ jobs:
           if ci3/test_should_run "$artifact"; then
             docker pull aztecprotocol/aztec:${{ env.GIT_COMMIT }}
             docker pull aztecprotocol/end-to-end:${{ env.GIT_COMMIT }}
-            export FORCE_COLOR=1
-            export EARTHLY_BUILD_ARGS="${{ env.EARTHLY_BUILD_ARGS }}"
-            ./yarn-project/end-to-end/scripts/e2e_test.sh ${{ matrix.test }}
-            export COMMIT_HASH=${{ env.GIT_COMMIT }}
-            export PULL_REQUEST=${{ github.event.pull_request.number }}
-            export BRANCH=${{ github.ref_name }}
-            scripts/logs/upload_logs_to_s3.sh /usr/var/log
+            FORCE_COLOR=1 ./yarn-project/end-to-end/scripts/e2e_test.sh ${{ matrix.test }}
             ci3/cache_upload_flag "$artifact"
           fi
+      - name: Inspect data folder
+        continue-on-error: true
+        run: tree ./yarn-project/end-to-end/out
+      - name: Store benchmark result
+        if: github.ref == 'refs/heads/master'
+        continue-on-error: true
+        uses: benchmark-action/github-action-benchmark@4de1bed97a47495fc4c5404952da0499e31f5c29
+        with:
+          name: "End-to-end Benchmark"
+          benchmark-data-dir-path: "dev/e2e-bench"
+          tool: "customSmallerIsBetter"
+          output-file-path: ./yarn-project/end-to-end/out/bench.json
+          github-token: ${{ secrets.AZTEC_BOT_GITHUB_TOKEN }}
+          auto-push: true
+          alert-threshold: "150%"
+          comment-on-alert: true
+          fail-on-alert: false
+          alert-comment-cc-users: "@philwindle @spalladino"
+          max-items-in-chart: 50
 
   # Only e2e test that can't run on standard github runner
   e2e-prover-full:
@@ -320,49 +333,6 @@ jobs:
               ci3/cache_upload_flag acir-bench-$(./barretenberg/acir_tests/bootstrap.sh hash)
             fi
 
-  # TODO(ci3) fix and reinstate
-  # bench-summary:
-  #   if: needs.configure.outputs.e2e-all == 'true'
-  #   needs:
-  #     - acir-bench
-  #     - bench-e2e
-  #     - configure
-  #   runs-on: ubuntu-latest
-  #   steps:
-  #     - uses: actions/checkout@v4
-  #       with:
-  #         fetch-depth: 100 # Downloading base benchmark from master requires access to history
-  #         ref: "${{ github.event.pull_request.head.sha }}"
-  #     - uses: ./.github/ci-setup-action
-  #     - name: "Build and upload bench aggregate file"
-  #       uses: ./.github/ensure-builder
-  #       with:
-  #         runner_type: builder-x86
-  #         username: ${{ needs.configure.outputs.username }}
-  #         run: scripts/earthly-ci ./yarn-project/scripts/+bench-aggregate
-  #     - name: "Download base benchmark and package into earthly"
-  #       if: github.event_name == 'pull_request'
-  #       uses: ./.github/run-on-builder
-  #       env:
-  #         AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
-  #         AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-  #         BENCH_FOLDER: "./scripts/logs/tmp/bench"
-  #         PULL_REQUEST: "${{ github.event.pull_request.number }}"
-  #       with:
-  #         run: |
-  #           # Download the base benchmark locally (requires AWS creds and .git history)
-  #           mkdir -p $BENCH_FOLDER
-  #           ./scripts/logs/download_base_benchmark_from_s3.sh
-  #           # Package it into an earthly artifact to read from bench-comment
-  #           scripts/earthly-ci ./scripts/logs+pack-base-benchmark
-  #     - name: "Generate summary comment if pull request"
-  #       if: github.event_name == 'pull_request'
-  #       uses: ./.github/run-on-builder
-  #       env:
-  #         AZTEC_BOT_GITHUB_TOKEN: ${{ secrets.AZTEC_BOT_GITHUB_TOKEN }}
-  #       with:
-  #         run: scripts/earthly-ci ./yarn-project/scripts/+bench-comment
-
   # barretenberg (prover) native, AVM (public VM) and Merkle tree (world state) tests
   # ran on own runner for resource reasons (memory x cpu intensive)
   bb-native-tests:
diff --git a/scripts/logs/Earthfile b/scripts/logs/Earthfile
deleted file mode 100644
index 7b2b030ea69..00000000000
--- a/scripts/logs/Earthfile
+++ /dev/null
@@ -1,20 +0,0 @@
-VERSION 0.8
-FROM ../../build-images/+base-slim-node
-
-pack-base-benchmark:
-  # TODO(ci3): revisit/redo this
-  # Copies the base benchmark (ie the master run) into a container and packs it as an artifact,
-  # so it can be consumed from bench-comment. Note that we need to download base-benchmark
-  # outside of this target beforehand. We cannot run it within an Earthly container because it needs
-  # access to the .git folder, and we cannot run it with a LOCALLY statement because Earthly does
-  # not support secrets when running locally (and we need) the AWS access keys to access S3.
-  # We also cannot COPY the local file directly from bench-comment, since the file must be present
-  # within the build context so we can copy it (ie within yarn-project/scripts), and that invalidates
-  # the cache of yarn-project+build since it does a `COPY . .`, and we cannot add the bench file to
-  # earthlyignore or we would not be able to copy it from anywhere. So we need to place this target
-  # outside yarn-project altogether, since that folder should not be modified.
-  FROM scratch
-  LET LOCAL_BENCH_FOLDER=./tmp/bench
-  LET BENCH_FOLDER=/usr/var/bench
-  COPY $LOCAL_BENCH_FOLDER $BENCH_FOLDER
-  SAVE ARTIFACT $BENCH_FOLDER bench
diff --git a/scripts/logs/check_logs_for_benchmark.sh b/scripts/logs/check_logs_for_benchmark.sh
deleted file mode 100755
index 69075d4160d..00000000000
--- a/scripts/logs/check_logs_for_benchmark.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/usr/bin/env bash
-# Checks that all logs needed for assembling aggregate benchmarks have been retrieved.
-
-[ -n "${BUILD_SYSTEM_DEBUG:-}" ] && set -x # conditionally trace
-set -eu
-
-LOG_FOLDER="${LOG_FOLDER:-log}"
-E2E_SRC_FOLDER=/usr/src/yarn-project/end-to-end/src
-
-echo "Checking log files in $LOG_FOLDER"
-
-# Only generate the aggregated benchmark if we've managed to retrieve all the needed log files
-# If these runs were skipped due to no changes in their rebuild-patterns, then there's no need
-# to recompute the aggregated benchmark. Note that if one benchmark did run but others didn't,
-# this skips the whole aggregation. For now, that's fine because all benchmark files have the
-# same rebuild pattern rules. But if that changes, then we'd need to go up in the commit history
-# to find the latest log files for the unchanged benchmarks.
-EXPECTED_LOGS_COUNT=$(find $E2E_SRC_FOLDER -type f -name "bench*.test.ts" | wc -l)
-DOWNLOADED_LOGS_COUNT=$(find $LOG_FOLDER -type f -name "*.jsonl" | wc -l)
-if [ "$DOWNLOADED_LOGS_COUNT" -lt "$EXPECTED_LOGS_COUNT" ]; then
-  echo Found only $DOWNLOADED_LOGS_COUNT out of $EXPECTED_LOGS_COUNT benchmark log files in S3.
-  echo Files found: $(find $LOG_FOLDER -type f -name "*.jsonl")
-  exit 1
-fi
-
diff --git a/scripts/logs/download_base_benchmark_from_s3.sh b/scripts/logs/download_base_benchmark_from_s3.sh
deleted file mode 100755
index 6da6eb2a6bb..00000000000
--- a/scripts/logs/download_base_benchmark_from_s3.sh
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/usr/bin/env bash
-# Downloads base benchmarks from S3 to compare with the current benchmarks via bench-comment
-
-[ -n "${BUILD_SYSTEM_DEBUG:-}" ] && set -x # conditionally trace
-set -eu
-
-BUCKET_NAME="aztec-ci-artifacts"
-BENCH_FOLDER="${BENCH_FOLDER:-bench}"
-COMMIT_HASH="${COMMIT_HASH:-$(git rev-parse HEAD)}"
-BASE_BENCHMARK_FILE_JSON="${BENCH_FOLDER}/base-benchmark.json"
-
-# If on a pull request, get the data from the most recent commit on master where it's available to generate a comment comparing them
-if [ -n "${PULL_REQUEST:-}" ]; then
-  MASTER_COMMIT_HASH=$(curl -s "https://api.github.com/repos/AztecProtocol/aztec-packages/pulls/${PULL_REQUEST##*/}" | jq -r '.base.sha')
-  # master could have diverged since starting this job, refresh history
-  git fetch --depth 50 origin master
-  MASTER_COMMIT_HASHES=($(git log $MASTER_COMMIT_HASH --format="%H" -n 50))
-
-  mkdir -p $BENCH_FOLDER
-
-  set +e
-  echo "Searching for base benchmark data starting from commit $MASTER_COMMIT_HASH"
-  for commit_hash in "${MASTER_COMMIT_HASHES[@]}"; do
-    aws s3 cp "s3://${BUCKET_NAME}/benchmarks-v1/master/$commit_hash.json" $BASE_BENCHMARK_FILE_JSON
-    if [ $? -eq 0 ]; then
-      echo "Downloaded base data from commit $commit_hash"
-      exit 0
-    fi
-  done
-  set -e
-
-  echo "No base commit data found"
-else
-  echo "Not on a pull request, skipping download of base benchmark data"
-fi
diff --git a/scripts/logs/download_logs_from_s3.sh b/scripts/logs/download_logs_from_s3.sh
deleted file mode 100755
index a31979e3968..00000000000
--- a/scripts/logs/download_logs_from_s3.sh
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/usr/bin/env bash
-# Downloads the log files uploaded in upload_logs_to_s3
-
-set -eu
-
-BUCKET_NAME="aztec-ci-artifacts"
-LOG_FOLDER="${LOG_FOLDER:-log}"
-COMMIT_HASH="${COMMIT_HASH:-$(git rev-parse HEAD)}"
-
-echo "Downloading logs from S3 for commit $COMMIT_HASH in branch ${BRANCH:-} at pull request ${PULL_REQUEST:-none}"
-
-# Paths from upload_logs_to_s3
-if [ "${BRANCH:-}" = "master" ]; then
-  LOG_SOURCE_FOLDER="logs-v1/master/$COMMIT_HASH"
-  BARRETENBERG_BENCH_SOURCE_FOLDER="barretenberg-bench-v1/master/$COMMIT_HASH"
-  BENCHMARK_TARGET_FILE="benchmarks-v1/master/$COMMIT_HASH.json"
-  BENCHMARK_LATEST_FILE="benchmarks-v1/latest.json"
-elif [ -n "${PULL_REQUEST:-}" ]; then
-  LOG_SOURCE_FOLDER="logs-v1/pulls/${PULL_REQUEST##*/}"
-  BARRETENBERG_BENCH_SOURCE_FOLDER="barretenberg-bench-v1/pulls/${PULL_REQUEST##*/}"
-  BENCHMARK_TARGET_FILE="benchmarks-v1/pulls/${PULL_REQUEST##*/}.json"
-else
-  echo "Skipping benchmark run on branch ${BRANCH:-unknown}."
-  exit 0
-fi
-
-mkdir -p $LOG_FOLDER
-
-# Download benchmark log files from S3 LOG_SOURCE_FOLDER into local LOG_FOLDER
-echo "Downloading benchmark log files from $BUCKET_NAME/$LOG_SOURCE_FOLDER to $LOG_FOLDER"
-aws s3 cp "s3://${BUCKET_NAME}/${LOG_SOURCE_FOLDER}/" $LOG_FOLDER --exclude '*' --include 'bench*.jsonl' --recursive
-
-# Download barretenberg log files, these are direct benchmarks and separate from the above
-aws s3 cp "s3://${BUCKET_NAME}/${BARRETENBERG_BENCH_SOURCE_FOLDER}/" $LOG_FOLDER --exclude '*' --include '*_bench.json' --recursive
-
-echo "Downloaded log files $(ls $LOG_FOLDER)"
\ No newline at end of file
diff --git a/scripts/logs/upload_aggregated_benchmarks_to_s3.sh b/scripts/logs/upload_aggregated_benchmarks_to_s3.sh
deleted file mode 100755
index 76b00122c89..00000000000
--- a/scripts/logs/upload_aggregated_benchmarks_to_s3.sh
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/usr/bin/env bash
-# Uploads aggregated benchmark logs to S3
-
-[ -n "${BUILD_SYSTEM_DEBUG:-}" ] && set -x # conditionally trace
-set -eu
-
-BUCKET_NAME="aztec-ci-artifacts"
-LOG_FOLDER="${LOG_FOLDER:-log}"
-BENCH_FOLDER="${BENCH_FOLDER:-bench}"
-COMMIT_HASH="${COMMIT_HASH:-$(git rev-parse HEAD)}"
-BENCHMARK_FILE_JSON="${BENCH_FOLDER}/benchmark.json"
-
-# Paths from upload_logs_to_s3
-if [ "${BRANCH:-}" = "master" ]; then
-  LOG_SOURCE_FOLDER="logs-v1/master/$COMMIT_HASH"
-  BARRETENBERG_BENCH_SOURCE_FOLDER="barretenberg-bench-v1/master/$COMMIT_HASH"
-  BENCHMARK_TARGET_FILE="benchmarks-v1/master/$COMMIT_HASH.json"
-  BENCHMARK_LATEST_FILE="benchmarks-v1/latest.json"
-elif [ -n "${PULL_REQUEST:-}" ]; then
-  LOG_SOURCE_FOLDER="logs-v1/pulls/${PULL_REQUEST##*/}"
-  BARRETENBERG_BENCH_SOURCE_FOLDER="barretenberg-bench-v1/pulls/${PULL_REQUEST##*/}"
-  BENCHMARK_TARGET_FILE="benchmarks-v1/pulls/${PULL_REQUEST##*/}.json"
-elif [ -n "${CIRCLE_TAG:-}" ]; then
-  echo "Skipping benchmark run for ${CIRCLE_TAG} tagged release."
-  exit 0
-else
-  echo "Skipping benchmark run on branch ${BRANCH:-unknown}."
-  exit 0
-fi
-
-# Upload it to master or pulls
-aws s3 cp $BENCHMARK_FILE_JSON "s3://${BUCKET_NAME}/${BENCHMARK_TARGET_FILE}"
-
-# If on master, also update the "latest" benchmark file
-if [ -n "${BENCHMARK_LATEST_FILE:-}" ]; then
-  aws s3 cp $BENCHMARK_FILE_JSON "s3://${BUCKET_NAME}/${BENCHMARK_LATEST_FILE}"
-fi
diff --git a/scripts/logs/upload_logs_to_s3.sh b/scripts/logs/upload_logs_to_s3.sh
deleted file mode 100755
index 4a2c77c22d8..00000000000
--- a/scripts/logs/upload_logs_to_s3.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/usr/bin/env bash
-
-# Uploads to S3 the contents of the log file mounted on the end-to-end container,
-# which contains log entries with an associated event and metrics for it.
-# Logs are uploaded to aztec-ci-artifacts/logs-v1/master/$COMMIT/$JOB.jsonl
-# or to aztec-ci-artifacts/logs-v1/pulls/$PRNUMBER/$JOB.jsonl if on a PR
-
-[ -n "${BUILD_SYSTEM_DEBUG:-}" ] && set -x # conditionally trace
-set -eu
-
-LOG_FOLDER=$1
-BUCKET_NAME="aztec-ci-artifacts"
-COMMIT_HASH="${COMMIT_HASH:-$(git rev-parse HEAD)}"
-
-echo "Uploading logs to S3 for commit $COMMIT_HASH in branch ${BRANCH:-} at pull request ${PULL_REQUEST:-none}"
-
-if [ ! -d "$LOG_FOLDER" ] || [ -z "$(ls -A "$LOG_FOLDER")" ]; then
-  echo "No logs in folder $LOG_FOLDER to upload"
-  exit 0
-fi
-
-# Paths used in scripts/ci/assemble_e2e_benchmark.sh
-if [ "${BRANCH:-}" = "master" ]; then
-  TARGET_FOLDER="logs-v1/master/$COMMIT_HASH/"
-elif [ -n "${PULL_REQUEST:-}" ]; then
-  TARGET_FOLDER="logs-v1/pulls/${PULL_REQUEST##*/}"
-fi
-
-if [ -n "${TARGET_FOLDER:-}" ]; then
-  aws s3 cp $LOG_FOLDER "s3://${BUCKET_NAME}/${TARGET_FOLDER}"  --include "*.jsonl" --recursive
-else
-  echo Skipping upload since no target folder was defined
-fi
\ No newline at end of file
diff --git a/yarn-project/circuit-types/src/stats/benchmarks.ts b/yarn-project/circuit-types/src/stats/benchmarks.ts
deleted file mode 100644
index 37c09f40727..00000000000
--- a/yarn-project/circuit-types/src/stats/benchmarks.ts
+++ /dev/null
@@ -1,12 +0,0 @@
-import { type MetricName } from './metrics.js';
-
-/** Aggregated benchmark results. */
-export type BenchmarkResults = Partial<Record<MetricName, BenchmarkMetricResults>>;
-
-/** Aggregated benchmark result for a given metric (values aggregated by bucket such as chain size). */
-export type BenchmarkMetricResults = Record<string, number>;
-
-/** Aggregated benchmark results with a timestamp. */
-export type BenchmarkResultsWithTimestamp = BenchmarkResults & {
-  /** When did this benchmark happen. */ timestamp: string;
-};
diff --git a/yarn-project/circuit-types/src/stats/index.ts b/yarn-project/circuit-types/src/stats/index.ts
index 2a983dd9dd1..0502f09568b 100644
--- a/yarn-project/circuit-types/src/stats/index.ts
+++ b/yarn-project/circuit-types/src/stats/index.ts
@@ -1,18 +1 @@
 export * from './stats.js';
-export * from './metrics.js';
-export * from './benchmarks.js';
-
-/** Block sizes to use for benchmark tests on multiple block sizes. */
-export const BENCHMARK_BLOCK_SIZES = process.env.BENCHMARK_BLOCK_SIZES
-  ? process.env.BENCHMARK_BLOCK_SIZES.split(',').map(Number)
-  : [4, 8, 16];
-
-/** Block size to use for building chains of multiple blocks. */
-export const BENCHMARK_HISTORY_BLOCK_SIZE = process.env.BENCHMARK_HISTORY_BLOCK_SIZE
-  ? +process.env.BENCHMARK_HISTORY_BLOCK_SIZE
-  : 8;
-
-/** Chain lengths to test for history processing benchmarks. */
-export const BENCHMARK_HISTORY_CHAIN_LENGTHS = process.env.BENCHMARK_HISTORY_CHAIN_LENGTHS
-  ? process.env.BENCHMARK_HISTORY_CHAIN_LENGTHS.split(',').map(x => Number(x))
-  : [3, 5];
diff --git a/yarn-project/circuit-types/src/stats/metrics.ts b/yarn-project/circuit-types/src/stats/metrics.ts
deleted file mode 100644
index c1af812a2b9..00000000000
--- a/yarn-project/circuit-types/src/stats/metrics.ts
+++ /dev/null
@@ -1,322 +0,0 @@
-import { type StatsEventName } from './stats.js';
-
-/** How a metric is grouped in benchmarks: by block size, by length of chain processed, or by circuit name. */
-export type MetricGroupBy =
-  | 'threads'
-  | 'block-size'
-  | 'chain-length'
-  | 'protocol-circuit-name'
-  | 'app-circuit-name'
-  | 'classes-registered'
-  | 'leaf-count'
-  | 'fee-payment-method';
-
-/** Definition of a metric to track in benchmarks. */
-export interface Metric {
-  /** Identifier. */
-  name: string;
-  /** What dimension this metric is grouped by. */
-  groupBy: MetricGroupBy;
-  /** Description */
-  description: string;
-  /** Events used for generating this metric. */
-  events: readonly StatsEventName[];
-}
-
-/** Metric definitions to track from benchmarks. */
-export const Metrics = [
-  {
-    name: 'public_db_access_time_ms',
-    groupBy: 'chain-length',
-    description: 'Time to access a database.',
-    events: ['public-db-access'],
-  },
-  {
-    name: 'avm_simulation_time_ms',
-    groupBy: 'app-circuit-name',
-    description: 'Time to simulate an AVM program.',
-    events: ['avm-simulation'],
-  },
-  {
-    name: 'proof_construction_time_sha256_ms',
-    groupBy: 'threads',
-    description: 'Time needed to generate a proof of an ACIR program.',
-    events: ['proof_construction_time'],
-  },
-  {
-    name: 'proof_construction_time_sha256_30_ms',
-    groupBy: 'threads',
-    description: 'Time needed to generate a proof of an ACIR program.',
-    events: ['proof_construction_time'],
-  },
-  {
-    name: 'proof_construction_time_sha256_100_ms',
-    groupBy: 'threads',
-    description: 'Time needed to generate a proof of an ACIR program.',
-    events: ['proof_construction_time'],
-  },
-  {
-    name: 'proof_construction_time_poseidon_hash_ms',
-    groupBy: 'threads',
-    description: 'Time needed to generate a proof of an ACIR program.',
-    events: ['proof_construction_time'],
-  },
-  {
-    name: 'proof_construction_time_poseidon_hash_30_ms',
-    groupBy: 'threads',
-    description: 'Time needed to generate a proof of an ACIR program.',
-    events: ['proof_construction_time'],
-  },
-  {
-    name: 'proof_construction_time_poseidon_hash_100_ms',
-    groupBy: 'threads',
-    description: 'Time needed to generate a proof of an ACIR program.',
-    events: ['proof_construction_time'],
-  },
-  {
-    name: 'proof_construction_time_eddsa_poseidon_ms',
-    groupBy: 'threads',
-    description: 'Time needed to generate a proof of an ACIR program.',
-    events: ['proof_construction_time'],
-  },
-  {
-    name: 'l1_rollup_calldata_size_in_bytes',
-    groupBy: 'block-size',
-    description: 'Size in bytes of the tx calldata posted to L1 when submitting a block.',
-    events: ['rollup-published-to-l1'],
-  },
-  {
-    name: 'l1_rollup_calldata_gas',
-    groupBy: 'block-size',
-    description: 'Estimated gas cost of the tx calldata when posting a block to L1.',
-    events: ['rollup-published-to-l1'],
-  },
-  {
-    name: 'l1_rollup_execution_gas',
-    groupBy: 'block-size',
-    description: 'Total gas used in a tx when submitting a block to L1.',
-    events: ['rollup-published-to-l1'],
-  },
-  {
-    name: 'l2_block_processing_time_in_ms',
-    groupBy: 'block-size',
-    description: 'Time for the state synchronizer to process an L2 block that was not built by its own sequencer.',
-    events: ['l2-block-handled'],
-  },
-  {
-    name: 'l2_block_building_time_in_ms',
-    groupBy: 'block-size',
-    description: 'Total time for the sequencer to build an L2 block from a set of txs.',
-    events: ['l2-block-built'],
-  },
-  {
-    name: 'l2_block_rollup_simulation_time_in_ms',
-    groupBy: 'block-size',
-    description: 'Time for the sequencer to run the rollup circuit simulation when assembling a block.',
-    events: ['l2-block-built'],
-  },
-  {
-    name: 'l2_block_public_tx_process_time_in_ms',
-    groupBy: 'block-size',
-    description: 'Time for the sequencer to execute public function calls for txs in a block.',
-    events: ['l2-block-built'],
-  },
-  {
-    name: 'node_history_sync_time_in_ms',
-    groupBy: 'chain-length',
-    description: 'Time for a node without a sequencer to sync chain history',
-    events: ['node-synced-chain-history'],
-  },
-  {
-    name: 'node_database_size_in_bytes',
-    groupBy: 'chain-length',
-    description: 'Size on disk of the leveldown database of a node after syncing all chain history.',
-    events: ['node-synced-chain-history'],
-  },
-  {
-    name: 'protocol_circuit_simulation_time_in_ms',
-    groupBy: 'protocol-circuit-name',
-    description: 'Time to run a circuit simulation.',
-    events: ['circuit-simulation'],
-  },
-  {
-    name: 'protocol_circuit_witness_generation_time_in_ms',
-    groupBy: 'protocol-circuit-name',
-    description: 'Time to generate the partial witness for a circuit',
-    events: ['circuit-simulation'],
-  },
-  {
-    name: 'protocol_circuit_proving_time_in_ms',
-    groupBy: 'protocol-circuit-name',
-    description: 'Time to prove circuit execution.',
-    events: ['circuit-proving'],
-  },
-  {
-    name: 'protocol_circuit_input_size_in_bytes',
-    groupBy: 'protocol-circuit-name',
-    description: 'Size of the inputs to a circuit simulation.',
-    events: ['circuit-simulation'],
-  },
-  {
-    name: 'protocol_circuit_output_size_in_bytes',
-    groupBy: 'protocol-circuit-name',
-    description: 'Size of the outputs (ie public inputs) from a circuit simulation.',
-    events: ['circuit-simulation'],
-  },
-  {
-    name: 'protocol_circuit_proof_size_in_bytes',
-    groupBy: 'protocol-circuit-name',
-    description: 'Size of the proof produced by a circuit.',
-    events: ['circuit-proving'],
-  },
-  {
-    name: 'protocol_circuit_num_public_inputs',
-    groupBy: 'protocol-circuit-name',
-    description: 'Number of public inputs.',
-    events: ['circuit-proving'],
-  },
-  {
-    name: 'protocol_circuit_size_in_gates',
-    groupBy: 'protocol-circuit-name',
-    description: 'Size of the proof produced by a circuit.',
-    events: ['circuit-proving'],
-  },
-  {
-    name: 'app_circuit_simulation_time_in_ms',
-    groupBy: 'app-circuit-name',
-    description: 'Time to run a circuit simulation.',
-    events: ['circuit-simulation'],
-  },
-  {
-    name: 'app_circuit_input_size_in_bytes',
-    groupBy: 'app-circuit-name',
-    description: 'Size of the inputs to a circuit simulation.',
-    events: ['circuit-simulation'],
-  },
-  {
-    name: 'app_circuit_output_size_in_bytes',
-    groupBy: 'app-circuit-name',
-    description: 'Size of the outputs (ie public inputs) from a circuit simulation.',
-    events: ['circuit-simulation'],
-  },
-  {
-    name: 'app_circuit_proof_size_in_bytes',
-    groupBy: 'app-circuit-name',
-    description: 'Size of the proof produced by a circuit.',
-    events: ['circuit-proving'],
-  },
-  {
-    name: 'app_circuit_witness_generation_time_in_ms',
-    groupBy: 'app-circuit-name',
-    description: 'Time to generate the partial witness for a circuit',
-    events: ['circuit-simulation'],
-  },
-  {
-    name: 'app_circuit_proving_time_in_ms',
-    groupBy: 'app-circuit-name',
-    description: 'Duration of proving an app circuit.',
-    events: ['circuit-proving'],
-  },
-  {
-    name: 'app_circuit_size_in_gates',
-    groupBy: 'app-circuit-name',
-    description: 'Size of an app circuit.',
-    events: ['circuit-proving'],
-  },
-  {
-    name: 'app_circuit_num_public_inputs',
-    groupBy: 'app-circuit-name',
-    description: 'Number of public inputs.',
-    events: ['circuit-proving'],
-  },
-  {
-    name: 'tx_size_in_bytes',
-    groupBy: 'classes-registered',
-    description: 'Size of txs received in the mempool.',
-    events: ['tx-added-to-pool'],
-  },
-  {
-    name: 'tx_with_fee_size_in_bytes',
-    groupBy: 'fee-payment-method',
-    description: 'Size of txs after fully processing them (including fee payment).',
-    events: ['tx-added-to-pool'],
-  },
-  {
-    name: 'batch_insert_into_append_only_tree_16_depth_ms',
-    groupBy: 'leaf-count',
-    description: 'Time to insert a batch of leaves into an append-only tree',
-    events: ['tree-insertion'],
-  },
-  {
-    name: 'batch_insert_into_append_only_tree_16_depth_hash_count',
-    groupBy: 'leaf-count',
-    description: 'The number of hashes necessary to insert a batch of leaves into',
-    events: ['tree-insertion'],
-  },
-  {
-    name: 'batch_insert_into_append_only_tree_16_depth_hash_ms',
-    groupBy: 'leaf-count',
-    description: 'Average duration for a hash operation',
-    events: ['tree-insertion'],
-  },
-  {
-    name: 'batch_insert_into_append_only_tree_32_depth_ms',
-    groupBy: 'leaf-count',
-    description: 'Time to insert a batch of leaves into an append-only tree',
-    events: ['tree-insertion'],
-  },
-  {
-    name: 'batch_insert_into_append_only_tree_32_depth_hash_count',
-    groupBy: 'leaf-count',
-    description: 'The number of hashes necessary to insert a batch of leaves into',
-    events: ['tree-insertion'],
-  },
-  {
-    name: 'batch_insert_into_append_only_tree_32_depth_hash_ms',
-    groupBy: 'leaf-count',
-    description: 'Average duration for a hash operation',
-    events: ['tree-insertion'],
-  },
-  {
-    name: 'batch_insert_into_indexed_tree_20_depth_ms',
-    groupBy: 'leaf-count',
-    description: 'Time to insert a batch of leaves into an indexed tree',
-    events: ['tree-insertion'],
-  },
-  {
-    name: 'batch_insert_into_indexed_tree_20_depth_hash_count',
-    groupBy: 'leaf-count',
-    description: 'The number of hashes necessary to insert a batch of leaves into',
-    events: ['tree-insertion'],
-  },
-  {
-    name: 'batch_insert_into_indexed_tree_20_depth_hash_ms',
-    groupBy: 'leaf-count',
-    description: 'Average duration for a hash operation',
-    events: ['tree-insertion'],
-  },
-  {
-    name: 'batch_insert_into_indexed_tree_40_depth_ms',
-    groupBy: 'leaf-count',
-    description: 'Time to insert a batch of leaves into an indexed tree',
-    events: ['tree-insertion'],
-  },
-  {
-    name: 'batch_insert_into_indexed_tree_40_depth_hash_count',
-    groupBy: 'leaf-count',
-    description: 'The number of hashes necessary to insert a batch of leaves into',
-    events: ['tree-insertion'],
-  },
-  {
-    name: 'batch_insert_into_indexed_tree_40_depth_hash_ms',
-    groupBy: 'leaf-count',
-    description: 'Average duration for a hash operation',
-    events: ['tree-insertion'],
-  },
-] as const satisfies readonly Metric[];
-
-/** Metric definitions to track from benchmarks. */
-export type Metrics = typeof Metrics;
-
-/** Type of valid metric names. */
-export type MetricName = Metrics[number]['name'];
diff --git a/yarn-project/end-to-end/scripts/e2e_test.sh b/yarn-project/end-to-end/scripts/e2e_test.sh
index 54afed4fd8d..a2c8ddc5ac8 100755
--- a/yarn-project/end-to-end/scripts/e2e_test.sh
+++ b/yarn-project/end-to-end/scripts/e2e_test.sh
@@ -48,6 +48,9 @@ if [ "$ignore_failures" = "true" ]; then
   echo "Ignoring failures for test $TEST"
 fi
 
+# Init output folder
+mkdir -p ./out
+
 # Check if the test uses docker compose
 if [ "$(echo "$test_config" | yq e '.use_compose // false' -)" = "true" ]; then
   "$e2e_root/scripts/e2e_compose_test.sh" "$test_path" "$@" || [ "$ignore_failures" = "true" ]
@@ -71,6 +74,7 @@ else
       -e HARDWARE_CONCURRENCY="$HARDWARE_CONCURRENCY" \
       -e FAKE_PROOFS="$FAKE_PROOFS" \
       $env_args \
+      --volume ./out:/out \
       --rm aztecprotocol/end-to-end:$AZTEC_DOCKER_TAG \
       "$test_path" "$@" || [ "$ignore_failures" = "true" ]
   fi
diff --git a/yarn-project/end-to-end/scripts/e2e_test_config.yml b/yarn-project/end-to-end/scripts/e2e_test_config.yml
index 36ebb2e9ed3..5b3a9efd3dd 100644
--- a/yarn-project/end-to-end/scripts/e2e_test_config.yml
+++ b/yarn-project/end-to-end/scripts/e2e_test_config.yml
@@ -1,22 +1,8 @@
 tests:
   base: {}
-  bench_prover:
+  bench_build_block:
     env:
-      HARDWARE_CONCURRENCY: '32'
-      LOG_LEVEL: 'verbose; debug: aztec:benchmarks,aztec:sequencer,aztec:world_state,aztec:merkle_trees'
-    command: './scripts/e2e_compose_test.sh bench_prover'
-  bench_publish_rollup:
-    env:
-      HARDWARE_CONCURRENCY: '32'
-      COMPOSE_FILE: 'scripts/docker-compose-no-sandbox.yml'
-      LOG_LEVEL: 'verbose; debug: aztec:benchmarks,aztec:sequencer,aztec:world_state,aztec:merkle_trees'
-    command: './scripts/e2e_compose_test.sh bench_publish_rollup'
-  bench_tx_size:
-    env:
-      HARDWARE_CONCURRENCY: '32'
-      COMPOSE_FILE: 'scripts/docker-compose-no-sandbox.yml'
-      LOG_LEVEL: 'verbose; debug: aztec:benchmarks,aztec:sequencer,aztec:world_state,aztec:merkle_trees'
-    command: './scripts/e2e_compose_test.sh bench_tx_size'
+      BENCH_OUTPUT: '/out/bench.json'
   e2e_2_pxes: {}
   e2e_account_contracts: {}
   e2e_amm: {}
diff --git a/yarn-project/end-to-end/src/bench/bench_build_block.test.ts b/yarn-project/end-to-end/src/bench/bench_build_block.test.ts
new file mode 100644
index 00000000000..2955d2956ee
--- /dev/null
+++ b/yarn-project/end-to-end/src/bench/bench_build_block.test.ts
@@ -0,0 +1,42 @@
+import { type BenchmarkingContract } from '@aztec/noir-contracts.js/Benchmarking';
+import { type SequencerClient } from '@aztec/sequencer-client';
+import { Metrics } from '@aztec/telemetry-client';
+
+import { type EndToEndContext } from '../fixtures/utils.js';
+import { benchmarkSetup, sendTxs, waitTxs } from './utils.js';
+
+describe('benchmarks/build_block', () => {
+  let context: EndToEndContext;
+  let contract: BenchmarkingContract;
+  let sequencer: SequencerClient;
+
+  beforeEach(async () => {
+    ({ context, contract, sequencer } = await benchmarkSetup({
+      maxTxsPerBlock: 1024,
+      enforceTimeTable: false, // Let the sequencer take as much time as it needs
+      metrics: [
+        Metrics.SEQUENCER_BLOCK_BUILD_DURATION,
+        Metrics.SEQUENCER_BLOCK_BUILD_INSERTION_TIME,
+        {
+          // Invert mana-per-second since benchmark action requires that all metrics
+          // conform to either "bigger-is-better" or "smaller-is-better".
+          name: 'aztec.sequencer.block.time_per_mana',
+          source: Metrics.SEQUENCER_BLOCK_BUILD_MANA_PER_SECOND,
+          unit: 'us/mana',
+          transform: (value: number) => 1e6 / value,
+        },
+      ],
+    }));
+  });
+
+  afterEach(async () => {
+    await context.teardown();
+  });
+
+  const TX_COUNT = 32;
+  it(`builds a block with ${TX_COUNT} txs`, async () => {
+    sequencer.updateSequencerConfig({ minTxsPerBlock: TX_COUNT });
+    const sentTxs = await sendTxs(TX_COUNT, context, contract);
+    await waitTxs(sentTxs, context);
+  });
+});
diff --git a/yarn-project/end-to-end/src/benchmarks/utils.ts b/yarn-project/end-to-end/src/bench/utils.ts
similarity index 51%
rename from yarn-project/end-to-end/src/benchmarks/utils.ts
rename to yarn-project/end-to-end/src/bench/utils.ts
index 05c90892ae7..4fb6bb568b8 100644
--- a/yarn-project/end-to-end/src/benchmarks/utils.ts
+++ b/yarn-project/end-to-end/src/bench/utils.ts
@@ -1,25 +1,98 @@
-import { type AztecNodeConfig, type AztecNodeService } from '@aztec/aztec-node';
-import { type AztecNode, BatchCall, INITIAL_L2_BLOCK_NUM, type SentTx, sleep } from '@aztec/aztec.js';
-import { times } from '@aztec/foundation/collection';
+import { type AztecNodeService } from '@aztec/aztec-node';
+import { type AztecNode, BatchCall, INITIAL_L2_BLOCK_NUM, type SentTx, type WaitOpts } from '@aztec/aztec.js';
+import { mean, stdDev, times } from '@aztec/foundation/collection';
 import { randomInt } from '@aztec/foundation/crypto';
 import { BenchmarkingContract } from '@aztec/noir-contracts.js/Benchmarking';
 import { type PXEService, type PXEServiceConfig, createPXEService } from '@aztec/pxe';
+import { type Metrics } from '@aztec/telemetry-client';
+import {
+  type BenchmarkDataPoint,
+  type BenchmarkMetrics,
+  type BenchmarkTelemetryClient,
+} from '@aztec/telemetry-client/bench';
 
+import { writeFileSync } from 'fs';
 import { mkdirpSync } from 'fs-extra';
 import { globSync } from 'glob';
 import { join } from 'path';
 
-import { type EndToEndContext, setup } from '../fixtures/utils.js';
+import { type EndToEndContext, type SetupOptions, setup } from '../fixtures/utils.js';
 
 /**
  * Setup for benchmarks. Initializes a remote node with a single account and deploys a benchmark contract.
  */
-export async function benchmarkSetup(opts: Partial<AztecNodeConfig>) {
-  const context = await setup(1, { ...opts });
+export async function benchmarkSetup(
+  opts: Partial<SetupOptions> & {
+    /** What metrics to export */ metrics: (Metrics | MetricFilter)[];
+    /** Where to output the benchmark data (defaults to BENCH_OUTPUT or bench.json) */
+    benchOutput?: string;
+  },
+) {
+  const context = await setup(1, { ...opts, telemetryConfig: { benchmark: true } });
   const contract = await BenchmarkingContract.deploy(context.wallet).send().deployed();
   context.logger.info(`Deployed benchmarking contract at ${contract.address}`);
   const sequencer = (context.aztecNode as AztecNodeService).getSequencer()!;
-  return { context, contract, sequencer };
+  const telemetry = context.telemetryClient! as BenchmarkTelemetryClient;
+  context.logger.warn(`Cleared benchmark data points from setup`);
+  telemetry.clear();
+  const origTeardown = context.teardown.bind(context);
+  context.teardown = async () => {
+    await telemetry.flush();
+    const data = telemetry.getMeters();
+    const formatted = formatMetricsForGithubBenchmarkAction(data, opts.metrics);
+    const benchOutput = opts.benchOutput ?? process.env.BENCH_OUTPUT ?? 'bench.json';
+    writeFileSync(benchOutput, JSON.stringify(formatted));
+    context.logger.info(`Wrote ${data.length} metrics to ${benchOutput}`);
+    await origTeardown();
+  };
+  return { telemetry, context, contract, sequencer };
+}
+
+type MetricFilter = {
+  source: Metrics;
+  transform: (value: number) => number;
+  name: string;
+  unit?: string;
+};
+
+// See https://github.com/benchmark-action/github-action-benchmark/blob/e3c661617bc6aa55f26ae4457c737a55545a86a4/src/extract.ts#L659-L670
+type GithubActionBenchmarkResult = {
+  name: string;
+  value: number;
+  range?: string;
+  unit: string;
+  extra?: string;
+};
+
+function formatMetricsForGithubBenchmarkAction(
+  data: BenchmarkMetrics,
+  filter: (Metrics | MetricFilter)[],
+): GithubActionBenchmarkResult[] {
+  const allFilters: MetricFilter[] = filter.map(f =>
+    typeof f === 'string' ? { name: f, source: f, transform: (x: number) => x, unit: undefined } : f,
+  );
+  return data.flatMap(meter => {
+    return meter.metrics
+      .filter(metric => allFilters.map(f => f.source).includes(metric.name as Metrics))
+      .map(metric => [metric, allFilters.find(f => f.source === metric.name)!] as const)
+      .map(([metric, filter]) => ({
+        name: `${meter.name}/${filter.name}`,
+        unit: filter.unit ?? metric.unit ?? 'unknown',
+        ...getMetricValues(metric.points.map(p => ({ ...p, value: filter.transform(p.value) }))),
+      }))
+      .filter((metric): metric is GithubActionBenchmarkResult => metric.value !== undefined);
+  });
+}
+
+function getMetricValues(points: BenchmarkDataPoint[]) {
+  if (points.length === 0) {
+    return {};
+  } else if (points.length === 1) {
+    return { value: points[0].value };
+  } else {
+    const values = points.map(point => point.value);
+    return { value: mean(values), range: `± ${stdDev(values)}` };
+  }
 }
 
 /**
@@ -79,14 +152,16 @@ export async function sendTxs(
   contract: BenchmarkingContract,
 ): Promise<SentTx[]> {
   const calls = times(txCount, index => makeCall(index, context, contract));
+  context.logger.info(`Creating ${txCount} txs`);
   const provenTxs = await Promise.all(calls.map(call => call.prove({ skipPublicSimulation: true })));
-  const sentTxs = provenTxs.map(tx => tx.send());
-
-  // Awaiting txHash waits until the aztec node has received the tx into its p2p pool
-  await Promise.all(sentTxs.map(tx => tx.getTxHash()));
-  await sleep(100);
+  context.logger.info(`Sending ${txCount} txs`);
+  return provenTxs.map(tx => tx.send());
+}
 
-  return sentTxs;
+export async function waitTxs(txs: SentTx[], context: EndToEndContext, txWaitOpts?: WaitOpts) {
+  context.logger.info(`Awaiting ${txs.length} txs to be mined`);
+  await Promise.all(txs.map(tx => tx.wait(txWaitOpts)));
+  context.logger.info(`All ${txs.length} txs have been mined`);
 }
 
 /**
diff --git a/yarn-project/end-to-end/src/benchmarks/README.md b/yarn-project/end-to-end/src/benchmarks/README.md
deleted file mode 100644
index c737a87b6ab..00000000000
--- a/yarn-project/end-to-end/src/benchmarks/README.md
+++ /dev/null
@@ -1,23 +0,0 @@
-# Benchmarks
-
-Tests in this folder are meant to used for benchmarking. Stats are collected by instrumenting relevant sections of the code and emitting stats via structured logging. All stats are strongly-typed and defined in `yarn-project/types/src/stats/stats.ts`.
-
-These stats are emitted to jsonl files named after the test being run if the `BENCHMARK` flag is enabled or if running on `CI`. This setup happens when calling the `setup` helper of e2e tests in `yarn-project/end-to-end/src/fixtures/logging.ts`. Note that by default stats from all e2e tests are collected on the CI, and are uploaded to S3 using the `upload_logs_to_s3.sh` script called at the end of `run_tests_local`. All jsonl files are uploaded to the `aztec-ci-artifacts` bucket under the `logs` folder. Tests run in master are uploaded to `logs/master/COMMIT_HASH`, while tests from a PR are uploaded to `logs/pulls/PULL_REQUEST_NUMBER`.
-
-## Benchmark summaries
-
-After all benchmark tests are executed, a `bench-summary` CI job takes care of aggregating them, using the scripts in `yarn-project/scripts/benchmarks` orchestrated by `scripts/ci/assemble_e2e_benchmark.sh`. This script downloads all jsonl files, extracts metrics grouped by block size or chain length, and outputs an aggregated benchmark json file which is uploaded to S3. This file is uploaded to the same `aztec-ci-artifacts` bucket but under the `benchmarks` folder.
-
-Metrics are strongly typed as well and defined in `yarn-project/types/src/stats/metrics.ts`, while the `yarn-project/scripts/src/benchmarks/aggregate.ts` script takes care of generating them out of the collected stats from the jsonl files.
-
-Once the summary is generated, if the benchmark run is on a PR, then the summary job will also download the latest benchmark from master, compare it against the current run, generate a markdown summary, and post it to the pull request on github. This uses the `AZTEC_BOT_COMMENTER_GITHUB_TOKEN`, which is a fine-grained personal access token from the `AztecBot` github user with rw permissions on issues and pull requests.
-
-## Local development
-
-To test locally, first run one or more benchmark e2e tests locally from the `yarn-project/end-to-end` folder. It may be a good idea to shorten them so they run faster, for example:
-
-```
-$ BENCHMARK_BLOCK_SIZES=4 BENCHMARK=1 yarn test bench_publish
-```
-
-This should've generated one or more jsonl files in `yarn-project/end-to-end/log`
diff --git a/yarn-project/end-to-end/src/benchmarks/bench_process_history.test.ts b/yarn-project/end-to-end/src/benchmarks/bench_process_history.test.ts
deleted file mode 100644
index 167548937a7..00000000000
--- a/yarn-project/end-to-end/src/benchmarks/bench_process_history.test.ts
+++ /dev/null
@@ -1,94 +0,0 @@
-import { type AztecNodeConfig, AztecNodeService } from '@aztec/aztec-node';
-import { AztecAddress, Fr, INITIAL_L2_BLOCK_NUM, elapsed, sleep } from '@aztec/aztec.js';
-import {
-  BENCHMARK_HISTORY_BLOCK_SIZE,
-  BENCHMARK_HISTORY_CHAIN_LENGTHS,
-  type NodeSyncedChainHistoryStats,
-} from '@aztec/circuit-types/stats';
-import { type BenchmarkingContract } from '@aztec/noir-contracts.js/Benchmarking';
-import { type SequencerClient } from '@aztec/sequencer-client';
-
-import { type EndToEndContext } from '../fixtures/utils.js';
-import { benchmarkSetup, createNewPXE, getFolderSize, makeDataDirectory, sendTxs } from './utils.js';
-
-const BLOCK_SIZE = BENCHMARK_HISTORY_BLOCK_SIZE;
-const CHAIN_LENGTHS = BENCHMARK_HISTORY_CHAIN_LENGTHS;
-const MAX_CHAIN_LENGTH = CHAIN_LENGTHS[CHAIN_LENGTHS.length - 1];
-
-let setupBlockCount: number;
-
-describe('benchmarks/process_history', () => {
-  let context: EndToEndContext;
-  let contract: BenchmarkingContract;
-  let sequencer: SequencerClient;
-
-  beforeEach(async () => {
-    ({ context, contract, sequencer } = await benchmarkSetup({ maxTxsPerBlock: BLOCK_SIZE }));
-    setupBlockCount = await context.aztecNode.getBlockNumber();
-  });
-
-  it(
-    `processes chain history of ${MAX_CHAIN_LENGTH} with ${BLOCK_SIZE}-tx blocks`,
-    async () => {
-      // Ensure each block has exactly BLOCK_SIZE txs
-      sequencer.updateSequencerConfig({ minTxsPerBlock: BLOCK_SIZE });
-      let lastBlock = 0;
-
-      for (const chainLength of CHAIN_LENGTHS) {
-        // Send enough txs to move the chain to the next block number checkpoint
-        const txCount = (chainLength - lastBlock) * BLOCK_SIZE;
-        const sentTxs = await sendTxs(txCount, context, contract);
-        await Promise.all(sentTxs.map(tx => tx.wait({ timeout: 5 * 60_000 })));
-        await sleep(100);
-
-        // Create a new node and measure how much time it takes it to sync
-        const dataDirectory = makeDataDirectory(chainLength);
-        context.logger.info(`Set up data directory at ${dataDirectory}`);
-        const nodeConfig: AztecNodeConfig = { ...context.config, disableValidator: true, dataDirectory };
-        const [nodeSyncTime, node] = await elapsed(async () => {
-          const node = await AztecNodeService.createAndSync(nodeConfig);
-          // call getPublicStorageAt (which calls #getWorldState, which calls #syncWorldState) to force a sync with
-          // world state to ensure the node has caught up
-          await node.getPublicStorageAt(AztecAddress.random(), Fr.random(), 'latest');
-          return node;
-        });
-
-        const blockNumber = await node.getBlockNumber();
-        expect(blockNumber).toEqual(chainLength + setupBlockCount);
-
-        context.logger.info(`Node synced chain up to block ${chainLength}`, {
-          eventName: 'node-synced-chain-history',
-          txCount: BLOCK_SIZE * chainLength,
-          txsPerBlock: BLOCK_SIZE,
-          duration: nodeSyncTime,
-          blockNumber,
-          blockCount: chainLength,
-          dbSize: getFolderSize(dataDirectory),
-        } satisfies NodeSyncedChainHistoryStats);
-
-        // Create a new pxe and measure how much time it takes it to sync with failed and successful decryption
-        // Skip the first two blocks used for setup (create account contract and deploy benchmarking contract)
-        context.logger.info(`Starting new pxe`);
-        const pxe = await createNewPXE(node, contract, INITIAL_L2_BLOCK_NUM + setupBlockCount);
-
-        // Register the owner account and wait until it's synced so we measure how much time it took
-        context.logger.info(`Registering owner account on new pxe`);
-        const partialAddress = context.wallet.getCompleteAddress().partialAddress;
-        const secretKey = context.wallet.getSecretKey();
-        await pxe.registerAccount(secretKey, partialAddress);
-
-        // Repeat for another account that didn't receive any notes for them, so we measure trial-decrypts
-        context.logger.info(`Registering fresh account on new pxe`);
-        await pxe.registerAccount(Fr.random(), Fr.random());
-
-        // Stop the external node
-        await node.stop();
-
-        lastBlock = chainLength;
-      }
-
-      await context.teardown();
-    },
-    60 * 60_000,
-  );
-});
diff --git a/yarn-project/end-to-end/src/benchmarks/bench_prover.test.ts b/yarn-project/end-to-end/src/benchmarks/bench_prover.test.ts
deleted file mode 100644
index b4151bd06fa..00000000000
--- a/yarn-project/end-to-end/src/benchmarks/bench_prover.test.ts
+++ /dev/null
@@ -1,259 +0,0 @@
-import { getSchnorrAccount, getSchnorrWallet } from '@aztec/accounts/schnorr';
-import { PublicFeePaymentMethod, TxStatus, sleep } from '@aztec/aztec.js';
-import { type AccountWallet } from '@aztec/aztec.js/wallet';
-import { BBCircuitVerifier } from '@aztec/bb-prover';
-import { CompleteAddress, FEE_FUNDING_FOR_TESTER_ACCOUNT, Fq, Fr, GasSettings } from '@aztec/circuits.js';
-import { FPCContract } from '@aztec/noir-contracts.js/FPC';
-import { FeeJuiceContract } from '@aztec/noir-contracts.js/FeeJuice';
-import { TestContract } from '@aztec/noir-contracts.js/Test';
-import { TokenContract } from '@aztec/noir-contracts.js/Token';
-import { ProtocolContractAddress } from '@aztec/protocol-contracts';
-import { type PXEService, type PXEServiceConfig, createPXEService } from '@aztec/pxe';
-
-import { jest } from '@jest/globals';
-
-import { getACVMConfig } from '../fixtures/get_acvm_config.js';
-import { getBBConfig } from '../fixtures/get_bb_config.js';
-import { type EndToEndContext, setup } from '../fixtures/utils.js';
-import { FeeJuicePortalTestingHarnessFactory } from '../shared/gas_portal_test_harness.js';
-
-// TODO(@PhilWindle): Some part of this test are commented out until we speed up proving.
-
-jest.setTimeout(1_800_000);
-
-const txTimeoutSec = 3600;
-
-// How many times we'll run bb verify on each tx for benchmarking purposes
-const txVerifyIterations = process.env.BENCH_TX_VERIFY_ITERATIONS
-  ? parseInt(process.env.BENCH_TX_VERIFY_ITERATIONS)
-  : 10;
-
-// This makes AVM proving throw if there's a failure.
-process.env.AVM_PROVING_STRICT = '1';
-
-describe('benchmarks/proving', () => {
-  let ctx: EndToEndContext;
-
-  let schnorrWalletSalt: Fr;
-  let schnorrWalletEncKey: Fr;
-  let schnorrWalletSigningKey: Fq;
-  let schnorrWalletAddress: CompleteAddress;
-
-  let recipient: CompleteAddress;
-  let feeRecipient: CompleteAddress; // The address that receives the fees from the fee refund flow.
-
-  let initialGasContract: FeeJuiceContract;
-  let initialTestContract: TestContract;
-  let initialTokenContract: TokenContract;
-  let initialFpContract: FPCContract;
-
-  let provingPxes: PXEService[];
-
-  let acvmCleanup: () => Promise<void>;
-  let bbCleanup: () => Promise<void>;
-
-  // setup the environment quickly using fake proofs
-  beforeAll(async () => {
-    ctx = await setup(
-      1,
-      {
-        // do setup with fake proofs
-        realProofs: false,
-        proverAgentCount: 4,
-        proverAgentPollIntervalMs: 10,
-        minTxsPerBlock: 1,
-      },
-      {},
-    );
-
-    schnorrWalletSalt = Fr.random();
-    schnorrWalletEncKey = Fr.random();
-    schnorrWalletSigningKey = Fq.random();
-
-    feeRecipient = CompleteAddress.random();
-    recipient = CompleteAddress.random();
-
-    const initialSchnorrWallet = await getSchnorrAccount(
-      ctx.pxe,
-      schnorrWalletEncKey,
-      schnorrWalletSigningKey,
-      schnorrWalletSalt,
-    )
-      .deploy({
-        skipClassRegistration: false,
-        skipPublicDeployment: false,
-      })
-      .getWallet();
-    schnorrWalletAddress = initialSchnorrWallet.getCompleteAddress();
-
-    initialTestContract = await TestContract.deploy(initialSchnorrWallet).send().deployed();
-    initialTokenContract = await TokenContract.deploy(
-      initialSchnorrWallet,
-      initialSchnorrWallet.getAddress(),
-      'test',
-      't',
-      18,
-    )
-      .send()
-      .deployed();
-    initialGasContract = await FeeJuiceContract.at(ProtocolContractAddress.FeeJuice, initialSchnorrWallet);
-    initialFpContract = await FPCContract.deploy(initialSchnorrWallet, initialTokenContract.address, feeRecipient)
-      .send()
-      .deployed();
-
-    const feeJuiceBridgeTestHarness = await FeeJuicePortalTestingHarnessFactory.create({
-      aztecNode: ctx.aztecNode,
-      pxeService: ctx.pxe,
-      publicClient: ctx.deployL1ContractsValues.publicClient,
-      walletClient: ctx.deployL1ContractsValues.walletClient,
-      wallet: ctx.wallets[0],
-      logger: ctx.logger,
-    });
-
-    const { claimSecret, messageLeafIndex } = await feeJuiceBridgeTestHarness.prepareTokensOnL1(
-      FEE_FUNDING_FOR_TESTER_ACCOUNT,
-      initialFpContract.address,
-    );
-
-    const from = initialSchnorrWallet.getAddress(); // we are setting from to initial schnorr wallet here because of TODO(#9887)
-    await Promise.all([
-      initialGasContract.methods
-        .claim(initialFpContract.address, FEE_FUNDING_FOR_TESTER_ACCOUNT, claimSecret, messageLeafIndex)
-        .send()
-        .wait(),
-      initialTokenContract.methods
-        .mint_to_public(initialSchnorrWallet.getAddress(), FEE_FUNDING_FOR_TESTER_ACCOUNT)
-        .send()
-        .wait(),
-      initialTokenContract.methods
-        .mint_to_private(from, initialSchnorrWallet.getAddress(), FEE_FUNDING_FOR_TESTER_ACCOUNT)
-        .send()
-        .wait(),
-    ]);
-  });
-
-  // remove the fake prover and setup the real one
-  beforeAll(async () => {
-    const [acvmConfig, bbConfig] = await Promise.all([getACVMConfig(ctx.logger), getBBConfig(ctx.logger)]);
-    if (!acvmConfig || !bbConfig) {
-      throw new Error('Missing ACVM or BB config');
-    }
-
-    acvmCleanup = acvmConfig.cleanup;
-    bbCleanup = bbConfig.cleanup;
-
-    ctx.logger.info('Stopping fake provers');
-    await ctx.aztecNode.setConfig({
-      proverAgentCount: 1,
-      realProofs: true,
-      minTxsPerBlock: 2,
-    });
-
-    ctx.logger.info('Starting real provers');
-
-    ctx.logger.info('Starting PXEs configured with real proofs');
-    provingPxes = [];
-    for (let i = 0; i < 4; i++) {
-      const l1Contracts = await ctx.aztecNode.getL1ContractAddresses();
-      const pxeConfig = {
-        proverEnabled: true,
-        bbBinaryPath: bbConfig.bbBinaryPath,
-        bbWorkingDirectory: bbConfig.bbWorkingDirectory,
-        l2BlockPollingIntervalMS: 1000,
-        l2StartingBlock: 1,
-        dataDirectory: undefined,
-        dataStoreMapSizeKB: 1024 * 1024,
-        l1Contracts,
-      } as PXEServiceConfig;
-      const pxe = await createPXEService(ctx.aztecNode, pxeConfig, `proving-pxe-${i}`);
-
-      await getSchnorrAccount(pxe, schnorrWalletEncKey, schnorrWalletSigningKey, schnorrWalletSalt).register();
-      await pxe.registerContract(initialTokenContract);
-      await pxe.registerContract(initialTestContract);
-      await pxe.registerContract(initialFpContract);
-
-      provingPxes.push(pxe);
-    }
-    /*TODO(post-honk): We wait 5 seconds for a race condition in setting up 4 nodes.
-     What is a more robust solution? */
-    await sleep(5000);
-  });
-
-  afterAll(async () => {
-    await ctx.teardown();
-    await acvmCleanup();
-    await bbCleanup();
-  });
-
-  it('builds a full block', async () => {
-    ctx.logger.info('+----------------------+');
-    ctx.logger.info('|                      |');
-    ctx.logger.info('|  STARTING BENCHMARK  |');
-    ctx.logger.info('|                      |');
-    ctx.logger.info('+----------------------+');
-
-    const fnCalls = [
-      (await getTokenContract(0)).methods.transfer_in_public(schnorrWalletAddress.address, recipient.address, 1000, 0),
-      (await getTokenContract(1)).methods.transfer(recipient.address, 1000),
-      // (await getTestContractOnPXE(2)).methods.emit_unencrypted(43),
-      // (await getTestContractOnPXE(3)).methods.create_l2_to_l1_message_public(45, 46, EthAddress.random()),
-    ];
-
-    const wallet = await getWalletOnPxe(0);
-    const gasSettings = GasSettings.default({ maxFeesPerGas: await wallet.getCurrentBaseFees() });
-
-    const feeFnCall0 = {
-      gasSettings,
-      paymentMethod: new PublicFeePaymentMethod(initialFpContract.address, wallet),
-    };
-
-    // const feeFnCall1 = {
-    //   gasSettings,
-    //   paymentMethod: new PrivateFeePaymentMethod(
-    //     initialFpContract.address,
-    //     await getWalletOnPxe(1),
-    //   ),
-    // };
-
-    ctx.logger.info('Proving transactions');
-    const provenTxs = await Promise.all([
-      fnCalls[0].prove({ fee: feeFnCall0 }),
-      fnCalls[1].prove(),
-      // fnCalls[2].prove(),
-      // fnCalls[3].prove(),
-    ]);
-
-    ctx.logger.info('Verifying transactions client proofs');
-    const verifier = await BBCircuitVerifier.new((await getBBConfig(ctx.logger))!);
-    for (let i = 0; i < txVerifyIterations; i++) {
-      for (const tx of provenTxs) {
-        expect(await verifier.verifyProof(tx)).toBe(true);
-      }
-    }
-
-    ctx.logger.info('Sending transactions');
-    const txs = [
-      fnCalls[0].send({ fee: feeFnCall0 }),
-      fnCalls[1].send(),
-      // fnCalls[2].send(),
-      // fnCalls[3].send(),
-    ];
-
-    const receipts = await Promise.all(txs.map(tx => tx.wait({ timeout: txTimeoutSec })));
-    expect(receipts.every(r => r.status === TxStatus.SUCCESS)).toBe(true);
-  });
-
-  function getWalletOnPxe(idx: number): Promise<AccountWallet> {
-    return getSchnorrWallet(provingPxes[idx], schnorrWalletAddress.address, schnorrWalletSigningKey);
-  }
-
-  // async function getTestContractOnPXE(idx: number): Promise<TestContract> {
-  //   const wallet = await getWalletOnPxe(idx);
-  //   return TestContract.at(initialTestContract.address, wallet);
-  // }
-
-  async function getTokenContract(idx: number): Promise<TokenContract> {
-    const wallet = await getWalletOnPxe(idx);
-    return TokenContract.at(initialTokenContract.address, wallet);
-  }
-});
diff --git a/yarn-project/end-to-end/src/benchmarks/bench_publish_rollup.test.ts b/yarn-project/end-to-end/src/benchmarks/bench_publish_rollup.test.ts
deleted file mode 100644
index 97d7fb4b17b..00000000000
--- a/yarn-project/end-to-end/src/benchmarks/bench_publish_rollup.test.ts
+++ /dev/null
@@ -1,61 +0,0 @@
-import { AztecNodeService } from '@aztec/aztec-node';
-import { AztecAddress, Fr } from '@aztec/aztec.js';
-import { BENCHMARK_BLOCK_SIZES } from '@aztec/circuit-types/stats';
-import { type BenchmarkingContract } from '@aztec/noir-contracts.js/Benchmarking';
-import { type SequencerClient } from '@aztec/sequencer-client';
-
-import { type EndToEndContext } from '../fixtures/utils.js';
-import { benchmarkSetup, createNewPXE, sendTxs } from './utils.js';
-
-describe('benchmarks/publish_rollup', () => {
-  let context: EndToEndContext;
-  let contract: BenchmarkingContract;
-  let sequencer: SequencerClient;
-
-  beforeEach(async () => {
-    ({ context, contract, sequencer } = await benchmarkSetup({ maxTxsPerBlock: 1024 }));
-  });
-
-  it.each(BENCHMARK_BLOCK_SIZES)(
-    `publishes a rollup with %d txs`,
-    async (txCount: number) => {
-      await sequencer.stop();
-
-      // Simulate and simultaneously send ROLLUP_SIZE txs. These should not yet be processed since sequencer is stopped.
-      context.logger.info(`Assembling rollup with ${txCount} txs`);
-      const sentTxs = await sendTxs(txCount, context, contract);
-      context.logger.info(`Sent ${txCount} txs`);
-      // Restart sequencer to process all txs together
-      sequencer.restart();
-
-      // Wait for the last tx to be processed and stop the current node
-      const { blockNumber } = await sentTxs[sentTxs.length - 1].wait({ timeout: 5 * 60_000 });
-      await context.teardown();
-
-      // Create a new aztec node to measure sync time of the block
-      // and call getPublicStorageAt (which calls #getWorldState, which calls #syncWorldState) to force a sync with
-      // world state to ensure the node has caught up
-      context.logger.info(`Starting new aztec node`);
-      const node = await AztecNodeService.createAndSync({ ...context.config, disableValidator: true });
-      await node.getPublicStorageAt(AztecAddress.random(), Fr.random(), 'latest');
-
-      // Spin up a new pxe and sync it, we'll use it to test sync times of new accounts for the last block
-      context.logger.info(`Starting new pxe`);
-      const pxe = await createNewPXE(node, contract, blockNumber! - 1);
-
-      // Register the owner account and wait until it's synced so we measure how much time it took
-      context.logger.info(`Registering owner account on new pxe`);
-      const partialAddress = context.wallet.getCompleteAddress().partialAddress;
-      const secretKey = context.wallet.getSecretKey();
-      await pxe.registerAccount(secretKey, partialAddress);
-
-      // Repeat for another account that didn't receive any notes for them, so we measure trial-decrypts
-      context.logger.info(`Registering fresh account on new pxe`);
-      await pxe.registerAccount(Fr.random(), Fr.random());
-
-      // Stop the external node
-      await node.stop();
-    },
-    20 * 60_000,
-  );
-});
diff --git a/yarn-project/end-to-end/src/benchmarks/bench_tx_size_fees.test.ts b/yarn-project/end-to-end/src/benchmarks/bench_tx_size_fees.test.ts
deleted file mode 100644
index 02b8f858f30..00000000000
--- a/yarn-project/end-to-end/src/benchmarks/bench_tx_size_fees.test.ts
+++ /dev/null
@@ -1,128 +0,0 @@
-import {
-  type AccountWalletWithSecretKey,
-  type AztecAddress,
-  FeeJuicePaymentMethod,
-  type FeePaymentMethod,
-  PrivateFeePaymentMethod,
-  PublicFeePaymentMethod,
-  TxStatus,
-} from '@aztec/aztec.js';
-import { FEE_FUNDING_FOR_TESTER_ACCOUNT, GasSettings } from '@aztec/circuits.js';
-import { FPCContract } from '@aztec/noir-contracts.js/FPC';
-import { FeeJuiceContract } from '@aztec/noir-contracts.js/FeeJuice';
-import { TokenContract } from '@aztec/noir-contracts.js/Token';
-import { ProtocolContractAddress } from '@aztec/protocol-contracts';
-
-import { jest } from '@jest/globals';
-
-import { type EndToEndContext, ensureAccountsPubliclyDeployed, setup } from '../fixtures/utils.js';
-import { FeeJuicePortalTestingHarnessFactory } from '../shared/gas_portal_test_harness.js';
-
-jest.setTimeout(100_000);
-
-describe('benchmarks/tx_size_fees', () => {
-  let ctx: EndToEndContext;
-
-  let aliceWallet: AccountWalletWithSecretKey;
-  let bobAddress: AztecAddress;
-  let sequencerAddress: AztecAddress;
-  let feeJuice: FeeJuiceContract;
-  let fpc: FPCContract;
-  let token: TokenContract;
-
-  // setup the environment
-  beforeAll(async () => {
-    ctx = await setup(3, {}, {});
-
-    aliceWallet = ctx.wallets[0];
-    bobAddress = ctx.wallets[1].getAddress();
-    sequencerAddress = ctx.wallets[2].getAddress();
-
-    await ctx.aztecNode.setConfig({
-      feeRecipient: sequencerAddress,
-    });
-
-    await ensureAccountsPubliclyDeployed(aliceWallet, ctx.wallets);
-  });
-
-  // deploy the contracts
-  beforeAll(async () => {
-    feeJuice = await FeeJuiceContract.at(ProtocolContractAddress.FeeJuice, aliceWallet);
-    token = await TokenContract.deploy(aliceWallet, aliceWallet.getAddress(), 'test', 'test', 18).send().deployed();
-
-    // We set Alice as the FPC admin to avoid the need for deployment of another account.
-    const fpcAdmin = aliceWallet.getAddress();
-    fpc = await FPCContract.deploy(aliceWallet, token.address, fpcAdmin).send().deployed();
-  });
-
-  // mint tokens
-  beforeAll(async () => {
-    const feeJuiceBridgeTestHarness = await FeeJuicePortalTestingHarnessFactory.create({
-      aztecNode: ctx.aztecNode,
-      pxeService: ctx.pxe,
-      publicClient: ctx.deployL1ContractsValues.publicClient,
-      walletClient: ctx.deployL1ContractsValues.walletClient,
-      wallet: ctx.wallets[0],
-      logger: ctx.logger,
-    });
-
-    const { claimSecret: fpcSecret, messageLeafIndex: fpcLeafIndex } =
-      await feeJuiceBridgeTestHarness.prepareTokensOnL1(FEE_FUNDING_FOR_TESTER_ACCOUNT, fpc.address);
-
-    const { claimSecret: aliceSecret, messageLeafIndex: aliceLeafIndex } =
-      await feeJuiceBridgeTestHarness.prepareTokensOnL1(FEE_FUNDING_FOR_TESTER_ACCOUNT, aliceWallet.getAddress());
-
-    await Promise.all([
-      feeJuice.methods.claim(fpc.address, FEE_FUNDING_FOR_TESTER_ACCOUNT, fpcSecret, fpcLeafIndex).send().wait(),
-      feeJuice.methods
-        .claim(aliceWallet.getAddress(), FEE_FUNDING_FOR_TESTER_ACCOUNT, aliceSecret, aliceLeafIndex)
-        .send()
-        .wait(),
-    ]);
-    const from = aliceWallet.getAddress(); // we are setting from to Alice here because of TODO(#9887)
-    await token.methods.mint_to_private(from, aliceWallet.getAddress(), FEE_FUNDING_FOR_TESTER_ACCOUNT).send().wait();
-    await token.methods.mint_to_public(aliceWallet.getAddress(), FEE_FUNDING_FOR_TESTER_ACCOUNT).send().wait();
-  });
-
-  it.each<[string, () => FeePaymentMethod | undefined /*bigint*/]>([
-    ['no', () => undefined /*200021120n*/],
-    [
-      'fee_juice',
-      () => new FeeJuicePaymentMethod(aliceWallet.getAddress()),
-      // Same cost as no fee payment, since payment is done natively
-      // 200021120n,
-    ],
-    [
-      'public fee',
-      () => new PublicFeePaymentMethod(fpc.address, aliceWallet),
-      // DA:
-      // non-rev: 1 nullifiers, overhead; rev: 2 note hashes, 1 nullifier, 1168 B enc note logs, 0 B enc logs,0 B unenc logs, teardown
-      // L2:
-      // non-rev: 0; rev: 0
-      // 200062330n,
-    ],
-    [
-      'private fee',
-      () => new PrivateFeePaymentMethod(fpc.address, aliceWallet),
-      // DA:
-      // non-rev: 3 nullifiers, overhead; rev: 2 note hashes, 1168 B enc note logs, 0 B enc logs, 0 B unenc logs, teardown
-      // L2:
-      // non-rev: 0; rev: 0
-      // 200032492n,
-    ],
-  ] as const)(
-    'sends a tx with a fee with %s payment method',
-    async (_name, createPaymentMethod /*expectedTransactionFee*/) => {
-      const paymentMethod = createPaymentMethod();
-      const gasSettings = GasSettings.default({ maxFeesPerGas: await aliceWallet.getCurrentBaseFees() });
-      const tx = await token.methods
-        .transfer(bobAddress, 1n)
-        .send({ fee: paymentMethod ? { gasSettings, paymentMethod } : undefined })
-        .wait();
-
-      expect(tx.status).toEqual(TxStatus.SUCCESS);
-      // TODO: reinstante this check when ossified
-      // expect(tx.transactionFee).toEqual(expectedTransactionFee);
-    },
-  );
-});
diff --git a/yarn-project/end-to-end/src/fixtures/utils.ts b/yarn-project/end-to-end/src/fixtures/utils.ts
index 14aaf2f7d91..6389f1bb1c3 100644
--- a/yarn-project/end-to-end/src/fixtures/utils.ts
+++ b/yarn-project/end-to-end/src/fixtures/utils.ts
@@ -48,8 +48,14 @@ import { type ProverNode, type ProverNodeConfig, createProverNode } from '@aztec
 import { type PXEService, type PXEServiceConfig, createPXEService, getPXEServiceConfig } from '@aztec/pxe';
 import { type SequencerClient } from '@aztec/sequencer-client';
 import { TestL1Publisher } from '@aztec/sequencer-client/test';
+import { type TelemetryClient } from '@aztec/telemetry-client';
+import { BenchmarkTelemetryClient } from '@aztec/telemetry-client/bench';
 import { NoopTelemetryClient } from '@aztec/telemetry-client/noop';
-import { createAndStartTelemetryClient, getConfigEnvVars as getTelemetryConfig } from '@aztec/telemetry-client/start';
+import {
+  type TelemetryClientConfig,
+  createAndStartTelemetryClient,
+  getConfigEnvVars as getTelemetryConfig,
+} from '@aztec/telemetry-client/start';
 
 import { type Anvil } from '@viem/anvil';
 import fs from 'fs/promises';
@@ -81,19 +87,24 @@ export { deployAndInitializeTokenAndBridgeContracts } from '../shared/cross_chai
 export { startAnvil };
 
 const { PXE_URL = '' } = process.env;
-
-const telemetryPromise = createAndStartTelemetryClient(getTelemetryConfig());
+const getAztecUrl = () => PXE_URL;
+
+let telemetryPromise: Promise<TelemetryClient> | undefined = undefined;
+function getTelemetryClient(partialConfig: Partial<TelemetryClientConfig> & { benchmark?: boolean } = {}) {
+  if (!telemetryPromise) {
+    const config = { ...getTelemetryConfig(), ...partialConfig };
+    telemetryPromise = config.benchmark
+      ? Promise.resolve(new BenchmarkTelemetryClient())
+      : createAndStartTelemetryClient(config);
+  }
+  return telemetryPromise;
+}
 if (typeof afterAll === 'function') {
   afterAll(async () => {
-    const client = await telemetryPromise;
-    await client.stop();
+    await (await telemetryPromise)?.stop();
   });
 }
 
-const getAztecUrl = () => {
-  return PXE_URL;
-};
-
 export const getPrivateKeyFromIndex = (index: number): Buffer | null => {
   const hdAccount = mnemonicToAccount(MNEMONIC, { addressIndex: index });
   const privKeyRaw = hdAccount.getHdKey().privateKey;
@@ -246,6 +257,7 @@ async function setupWithRemoteEnvironment(
     watcher: undefined,
     dateProvider: undefined,
     blobSink: undefined,
+    telemetryClient: undefined,
     teardown,
   };
 }
@@ -274,6 +286,8 @@ export type SetupOptions = {
   startProverNode?: boolean;
   /** Whether to fund the rewardDistributor */
   fundRewardDistributor?: boolean;
+  /** Manual config for the telemetry client */
+  telemetryConfig?: Partial<TelemetryClientConfig> & { benchmark?: boolean };
 } & Partial<AztecNodeConfig>;
 
 /** Context for an end-to-end test as returned by the `setup` function */
@@ -304,6 +318,8 @@ export type EndToEndContext = {
   dateProvider: TestDateProvider | undefined;
   /** The blob sink (undefined if connected to remote environment) */
   blobSink: BlobSinkServer | undefined;
+  /** Telemetry client */
+  telemetryClient: TelemetryClient | undefined;
   /** Function to stop the started services. */
   teardown: () => Promise<void>;
 };
@@ -456,7 +472,7 @@ export async function setup(
   }
   config.l1PublishRetryIntervalMS = 100;
 
-  const telemetry = await telemetryPromise;
+  const telemetry = await getTelemetryClient(opts.telemetryConfig);
 
   const blobSinkClient = createBlobSinkClient(config.blobSinkUrl);
   const publisher = new TestL1Publisher(config, { telemetry, blobSinkClient });
@@ -537,6 +553,7 @@ export async function setup(
     watcher,
     dateProvider,
     blobSink,
+    telemetryClient: telemetry,
     teardown,
   };
 }
diff --git a/yarn-project/foundation/src/collection/array.test.ts b/yarn-project/foundation/src/collection/array.test.ts
index e3be69ff586..fcce746bc76 100644
--- a/yarn-project/foundation/src/collection/array.test.ts
+++ b/yarn-project/foundation/src/collection/array.test.ts
@@ -1,4 +1,4 @@
-import { compactArray, maxBy, removeArrayPaddingEnd, times, unique } from './array.js';
+import { compactArray, maxBy, mean, median, removeArrayPaddingEnd, stdDev, times, unique, variance } from './array.js';
 
 describe('times', () => {
   it('should return an array with the result from all executions', () => {
@@ -79,3 +79,59 @@ describe('maxBy', () => {
     expect(maxBy([1, 2, 3], x => -x)).toEqual(1);
   });
 });
+
+describe('mean', () => {
+  it('calculates the mean of an array of numbers', () => {
+    expect(mean([1, 2, 3, 4, 5])).toBe(3);
+    expect(mean([10, 20, 30, 40, 50])).toBe(30);
+    expect(mean([-1, 0, 1])).toBe(0);
+  });
+
+  it('handles empty array', () => {
+    expect(mean([])).toBeUndefined();
+  });
+});
+
+describe('median', () => {
+  it('calculates the median of an array of numbers', () => {
+    expect(median([1, 2, 3, 4, 5])).toBe(3);
+    expect(median([10, 20, 30, 40, 50])).toBe(30);
+    expect(median([-1, 0, 1])).toBe(0);
+  });
+
+  it('handles empty array', () => {
+    expect(median([])).toBeUndefined();
+  });
+});
+
+describe('variance', () => {
+  it('calculates the variance of an array of numbers', () => {
+    expect(variance([1, 2, 3, 4, 5])).toBe(2.5);
+    expect(variance([10, 20, 30, 40, 50])).toBe(250);
+    expect(variance([-1, 0, 1])).toBe(1);
+  });
+
+  it('handles empty array', () => {
+    expect(variance([])).toBeUndefined();
+  });
+
+  it('handles single element', () => {
+    expect(variance([1])).toBeUndefined();
+  });
+});
+
+describe('stdDev', () => {
+  it('calculates the standard deviation of an array of numbers', () => {
+    expect(stdDev([1, 2, 3, 4, 5])).toBeCloseTo(1.5811, 4);
+    expect(stdDev([10, 20, 30, 40, 50])).toBeCloseTo(15.8114, 4);
+    expect(stdDev([-1, 0, 1])).toBeCloseTo(1, 4);
+  });
+
+  it('handles empty array', () => {
+    expect(stdDev([])).toBeUndefined();
+  });
+
+  it('handles single element', () => {
+    expect(stdDev([1])).toBeUndefined();
+  });
+});
diff --git a/yarn-project/foundation/src/collection/array.ts b/yarn-project/foundation/src/collection/array.ts
index b2ac6bec873..b430ceefb36 100644
--- a/yarn-project/foundation/src/collection/array.ts
+++ b/yarn-project/foundation/src/collection/array.ts
@@ -146,6 +146,11 @@ export function maxBy<T>(arr: T[], fn: (x: T) => number): T | undefined {
   return arr.reduce((max, x) => (fn(x) > fn(max) ? x : max), arr[0]);
 }
 
+/** Computes the sum of a numeric array. */
+export function sum(arr: number[]): number {
+  return arr.reduce((a, b) => a + b, 0);
+}
+
 /** Computes the median of a numeric array. Returns undefined if array is empty. */
 export function median(arr: number[]) {
   if (arr.length === 0) {
@@ -155,3 +160,29 @@ export function median(arr: number[]) {
   const mid = Math.floor(sorted.length / 2);
   return sorted.length % 2 !== 0 ? sorted[mid] : (sorted[mid - 1] + sorted[mid]) / 2;
 }
+
+/** Computes the mean of a numeric array. Returns undefined if the array is empty. */
+export function mean(values: number[]) {
+  if (values.length === 0) {
+    return undefined;
+  }
+  return values.reduce((a, b) => a + b, 0) / values.length;
+}
+
+/** Computes the variance of a numeric array. Returns undefined if there are less than 2 points. */
+export function variance(values: number[]) {
+  if (values.length < 2) {
+    return undefined;
+  }
+  const avg = mean(values)!;
+  const points = values.map(value => value * value + avg * avg - 2 * value * avg);
+  return sum(points) / (values.length - 1);
+}
+
+/** Computes the standard deviation of a numeric array. Returns undefined if there are less than 2 points. */
+export function stdDev(values: number[]) {
+  if (values.length < 2) {
+    return undefined;
+  }
+  return Math.sqrt(variance(values)!);
+}
diff --git a/yarn-project/scripts/Earthfile b/yarn-project/scripts/Earthfile
deleted file mode 100644
index 05c45b7948a..00000000000
--- a/yarn-project/scripts/Earthfile
+++ /dev/null
@@ -1,59 +0,0 @@
-VERSION 0.8
-FROM ../../build-images/+base-slim-node
-
-
-download-logs:
-  # Downloads logs for the current PR and saves them as an artifact, to be consumed by bench-aggregate.
-  ARG COMMIT_HASH
-  ARG PULL_REQUEST
-  ARG BRANCH
-  FROM ../+bootstrap
-  LET LOG_FOLDER=/usr/var/logs
-  ENV LOG_FOLDER=$LOG_FOLDER
-  ENV COMMIT_HASH=$COMMIT_HASH
-  ENV PULL_REQUEST=$PULL_REQUEST
-  ENV BRANCH=$BRANCH
-  RUN --secret AWS_ACCESS_KEY_ID --secret AWS_SECRET_ACCESS_KEY ./scripts/logs/download_logs_from_s3.sh
-  SAVE ARTIFACT $LOG_FOLDER logs
-
-bench-aggregate:
-  # Copies logs downloaded from download-logs and aggregates them using bench-aggregate.
-  # No aggregation is done if there is a log missing from the benchmark jobs.
-  ARG COMMIT_HASH
-  ARG PULL_REQUEST
-  ARG BRANCH
-  FROM ../+bootstrap
-  LET LOG_FOLDER=/usr/var/logs
-  LET BENCH_FOLDER=/usr/var/bench
-  ENV LOG_FOLDER=$LOG_FOLDER
-  ENV BENCH_FOLDER=$BENCH_FOLDER
-  ENV COMMIT_HASH=$COMMIT_HASH
-  ENV PULL_REQUEST=$PULL_REQUEST
-  ENV BRANCH=$BRANCH
-  COPY +download-logs/logs $LOG_FOLDER
-  RUN --secret AWS_ACCESS_KEY_ID --secret AWS_SECRET_ACCESS_KEY \
-    ./scripts/logs/check_logs_for_benchmark.sh \
-    && ((cd /usr/src/yarn-project/scripts && yarn bench-aggregate) && ./scripts/logs/upload_aggregated_benchmarks_to_s3.sh) \
-    || (echo "Not all log files from benchmark jobs found"; mkdir -p $BENCH_FOLDER)
-  SAVE ARTIFACT $BENCH_FOLDER bench
-
-bench-comment:
-  # Use the scripts image to run bench comment after loading the benchmark from bench-aggregate
-  # and the base benchmark (ie the benches from master to compare to) from pack-base-benchmark.
-  ARG COMMIT_HASH
-  ARG PULL_REQUEST
-  ARG BRANCH
-  FROM ../+bootstrap
-  LET BENCH_FOLDER=/usr/var/bench
-  ENV BENCH_FOLDER=$BENCH_FOLDER
-  ENV COMMIT_HASH=$COMMIT_HASH
-  ENV PULL_REQUEST=$PULL_REQUEST
-  ENV PR_NUMBER=$PULL_REQUEST
-  ENV BRANCH=$BRANCH
-  COPY ../../scripts/logs+pack-base-benchmark/bench $BENCH_FOLDER/
-  COPY +bench-aggregate/bench $BENCH_FOLDER/
-  RUN echo "Bench folder contents $(ls $BENCH_FOLDER)"
-  RUN --secret AZTEC_BOT_COMMENTER_GITHUB_TOKEN \
-    [ -f $BENCH_FOLDER/benchmark.json ] \
-      && (cd /usr/src/yarn-project/scripts && AZTEC_BOT_COMMENTER_GITHUB_TOKEN=$AZTEC_BOT_COMMENTER_GITHUB_TOKEN yarn bench-comment) \
-      || echo "No benchmark file found in $BENCH_FOLDER"
\ No newline at end of file
diff --git a/yarn-project/scripts/package.json b/yarn-project/scripts/package.json
index be14fa8b141..90da9bc751a 100644
--- a/yarn-project/scripts/package.json
+++ b/yarn-project/scripts/package.json
@@ -5,9 +5,6 @@
   "type": "module",
   "exports": "./dest/index.js",
   "bin": {
-    "bench-aggregate": "./dest/bin/bench-aggregate.js",
-    "bench-comment": "./dest/bin/bench-comment.js",
-    "bench-markdown": "./dest/bin/bench-markdown.js",
     "docs-preview-comment": "./dest/bin/docs-preview.js"
   },
   "scripts": {
diff --git a/yarn-project/scripts/src/benchmarks/aggregate.ts b/yarn-project/scripts/src/benchmarks/aggregate.ts
deleted file mode 100644
index 6ad9da18c81..00000000000
--- a/yarn-project/scripts/src/benchmarks/aggregate.ts
+++ /dev/null
@@ -1,323 +0,0 @@
-// Given a local folder with the e2e benchmark files, generates a single file
-// output with the grouped metrics to be published. This script can probably
-// be replaced by a single call to jq, but I found this easier to write,
-// and pretty much every CI comes with a working version of node.
-//
-// To test this locally, first run the benchmark tests from the yarn-project/end-to-end folder
-// BENCHMARK=1 yarn test bench
-//
-// And then run this script from the yarn-project/scripts folder
-// LOG_FOLDER=../end-to-end/log yarn bench-aggregate
-import {
-  type AvmSimulationStats,
-  BENCHMARK_BLOCK_SIZES,
-  BENCHMARK_HISTORY_BLOCK_SIZE,
-  BENCHMARK_HISTORY_CHAIN_LENGTHS,
-  type BenchmarkMetricResults,
-  type BenchmarkResults,
-  type BenchmarkResultsWithTimestamp,
-  type CircuitProvingStats,
-  type CircuitSimulationStats,
-  type CircuitWitnessGenerationStats,
-  type L1PublishBlockStats,
-  type L2BlockBuiltStats,
-  type L2BlockHandledStats,
-  type MetricName,
-  type NodeSyncedChainHistoryStats,
-  type ProofConstructed,
-  type PublicDBAccessStats,
-  type Stats,
-  type TreeInsertionStats,
-  type TxAddedToPoolStats,
-} from '@aztec/circuit-types/stats';
-import { createConsoleLogger } from '@aztec/foundation/log';
-
-import * as fs from 'fs';
-import { mkdirpSync } from 'fs-extra';
-import * as path from 'path';
-import * as readline from 'readline';
-
-import { BenchDir, BenchFile, LogsDir } from './paths.js';
-
-const log = createConsoleLogger();
-
-/** Appends a data point to the final results for the given metric in the given bucket */
-function append(
-  results: BenchmarkCollectedResults,
-  metric: MetricName,
-  bucket: number | string,
-  value: number | bigint,
-) {
-  if (value === undefined) {
-    log(`Undefined value for ${metric} in bucket ${bucket}`);
-    return;
-  }
-  const numeric = Number(value);
-  if (Number.isNaN(numeric)) {
-    log(`Value ${value} for ${metric} in ${bucket} is not a number`);
-    return;
-  }
-  if (!results[metric]) {
-    results[metric] = {};
-  }
-  if (!results[metric]![bucket]) {
-    results[metric]![bucket] = [];
-  }
-  results[metric]![bucket].push(numeric);
-}
-
-/** Processes an entry with event name 'acir-proof-generated' and updates results */
-function processAcirProofGenerated(entry: ProofConstructed, results: BenchmarkCollectedResults) {
-  if (entry.acir_test === 'bench_sha256') {
-    append(results, 'proof_construction_time_sha256_ms', entry.threads, entry.value);
-  } else if (entry.acir_test === 'bench_sha256_30') {
-    append(results, 'proof_construction_time_sha256_30_ms', entry.threads, entry.value);
-  } else if (entry.acir_test === 'bench_sha256_100') {
-    append(results, 'proof_construction_time_sha256_100_ms', entry.threads, entry.value);
-  } else if (entry.acir_test === 'bench_poseidon_hash') {
-    append(results, 'proof_construction_time_poseidon_hash_ms', entry.threads, entry.value);
-  } else if (entry.acir_test === 'bench_poseidon_hash_30') {
-    append(results, 'proof_construction_time_poseidon_hash_30_ms', entry.threads, entry.value);
-  } else if (entry.acir_test === 'bench_poseidon_hash_100') {
-    append(results, 'proof_construction_time_poseidon_hash_100_ms', entry.threads, entry.value);
-  } else if (entry.acir_test === 'bench_eddsa') {
-    append(results, 'proof_construction_time_eddsa_poseidon_ms', entry.threads, entry.value);
-  }
-}
-
-/** Processes an entry with event name 'rollup-published-to-l1' and updates results */
-function processRollupPublished(entry: L1PublishBlockStats, results: BenchmarkCollectedResults) {
-  const bucket = entry.txCount;
-  if (!BENCHMARK_BLOCK_SIZES.includes(bucket)) {
-    return;
-  }
-  append(results, 'l1_rollup_calldata_gas', bucket, entry.calldataGas);
-  append(results, 'l1_rollup_calldata_size_in_bytes', bucket, entry.calldataSize);
-  append(results, 'l1_rollup_execution_gas', bucket, entry.gasUsed);
-}
-
-/**
- * Processes an entry with event name 'l2-block-handled' and updates results
- * Skips instances where the block was emitted by the same node where the processing is skipped
- */
-function processRollupBlockSynced(entry: L2BlockHandledStats, results: BenchmarkCollectedResults) {
-  const bucket = entry.txCount;
-  if (!BENCHMARK_BLOCK_SIZES.includes(bucket)) {
-    return;
-  }
-  append(results, 'l2_block_processing_time_in_ms', bucket, entry.duration);
-}
-
-/**
- * Processes an entry with event name 'circuit-simulated' and updates results
- * Buckets are circuit names
- */
-function processCircuitSimulation(entry: CircuitSimulationStats, results: BenchmarkCollectedResults) {
-  if (entry.circuitName === 'app-circuit') {
-    // app circuits aren't simulated
-    return;
-  } else {
-    const bucket = entry.circuitName;
-    append(results, 'protocol_circuit_simulation_time_in_ms', bucket, entry.duration);
-    append(results, 'protocol_circuit_input_size_in_bytes', bucket, entry.inputSize);
-    append(results, 'protocol_circuit_output_size_in_bytes', bucket, entry.outputSize);
-  }
-}
-
-/**
- * Processes an entry with event name 'circuit-proving' and updates results
- * Buckets are circuit names
- */
-function processCircuitProving(entry: CircuitProvingStats, results: BenchmarkCollectedResults) {
-  if (entry.circuitName === 'app-circuit') {
-    if (!entry.appCircuitName) {
-      return;
-    }
-    const bucket = entry.appCircuitName;
-    append(results, 'app_circuit_proving_time_in_ms', bucket, entry.duration);
-    append(results, 'app_circuit_proof_size_in_bytes', bucket, entry.proofSize);
-    append(results, 'app_circuit_size_in_gates', bucket, entry.circuitSize);
-    append(results, 'app_circuit_num_public_inputs', bucket, entry.numPublicInputs);
-  } else if (entry.circuitName === 'avm-circuit') {
-    if (!entry.appCircuitName) {
-      return;
-    }
-    const bucket = `${entry.appCircuitName} (avm)`;
-    append(results, 'app_circuit_proving_time_in_ms', bucket, entry.duration);
-    append(results, 'app_circuit_proof_size_in_bytes', bucket, entry.proofSize);
-    append(results, 'app_circuit_input_size_in_bytes', bucket, entry.inputSize);
-    // These are not yet correctly passed in bb_prover.ts.
-    // append(results, 'app_circuit_size_in_gates', bucket, entry.circuitSize);
-    // append(results, 'app_circuit_num_public_inputs', bucket, entry.numPublicInputs);
-  } else {
-    const bucket = entry.circuitName;
-    append(results, 'protocol_circuit_proving_time_in_ms', bucket, entry.duration);
-    append(results, 'protocol_circuit_proof_size_in_bytes', bucket, entry.proofSize);
-    append(results, 'protocol_circuit_size_in_gates', bucket, entry.circuitSize);
-    append(results, 'protocol_circuit_num_public_inputs', bucket, entry.numPublicInputs);
-  }
-}
-
-function processAvmSimulation(entry: AvmSimulationStats, results: BenchmarkCollectedResults) {
-  append(results, 'avm_simulation_time_ms', entry.appCircuitName, entry.duration);
-}
-
-function processDbAccess(entry: PublicDBAccessStats, results: BenchmarkCollectedResults) {
-  append(results, 'public_db_access_time_ms', entry.operation, entry.duration);
-}
-
-/**
- * Processes an entry with event name 'circuit-proving' and updates results
- * Buckets are circuit names
- */
-function processCircuitWitnessGeneration(entry: CircuitWitnessGenerationStats, results: BenchmarkCollectedResults) {
-  if (entry.circuitName === 'app-circuit') {
-    const bucket = entry.appCircuitName;
-    if (!bucket) {
-      return;
-    }
-    append(results, 'app_circuit_witness_generation_time_in_ms', bucket, entry.duration);
-    append(results, 'app_circuit_input_size_in_bytes', bucket, entry.inputSize);
-    append(results, 'app_circuit_output_size_in_bytes', bucket, entry.outputSize);
-  } else {
-    const bucket = entry.circuitName;
-    append(results, 'protocol_circuit_witness_generation_time_in_ms', bucket, entry.duration);
-  }
-}
-
-/** Processes an entry with event name 'l2-block-built' and updates results where buckets are rollup sizes */
-function processL2BlockBuilt(entry: L2BlockBuiltStats, results: BenchmarkCollectedResults) {
-  const bucket = entry.txCount;
-  if (!BENCHMARK_BLOCK_SIZES.includes(bucket)) {
-    return;
-  }
-  append(results, 'l2_block_building_time_in_ms', bucket, entry.duration);
-  append(results, 'l2_block_rollup_simulation_time_in_ms', bucket, entry.rollupCircuitsDuration);
-  append(results, 'l2_block_public_tx_process_time_in_ms', bucket, entry.publicProcessDuration);
-}
-
-/** Processes entries with event name node-synced-chain-history emitted by benchmark tests where buckets are chain lengths */
-function processNodeSyncedChain(entry: NodeSyncedChainHistoryStats, results: BenchmarkCollectedResults) {
-  const bucket = entry.blockCount;
-  if (!BENCHMARK_HISTORY_CHAIN_LENGTHS.includes(bucket)) {
-    return;
-  }
-  if (entry.txsPerBlock !== BENCHMARK_HISTORY_BLOCK_SIZE) {
-    return;
-  }
-  append(results, 'node_history_sync_time_in_ms', bucket, entry.duration);
-  append(results, 'node_database_size_in_bytes', bucket, entry.dbSize);
-}
-
-/** Processes entries for events tx-added-to-pool, with grouping by deployed contract count. */
-function processTxAddedToPool(entry: TxAddedToPoolStats, results: BenchmarkCollectedResults) {
-  append(results, 'tx_size_in_bytes', entry.classRegisteredCount, entry.size);
-}
-
-/** Process a tree insertion event and updates results */
-function processTreeInsertion(entry: TreeInsertionStats, results: BenchmarkCollectedResults) {
-  const bucket = entry.batchSize;
-  const depth = entry.treeDepth;
-  if (entry.treeType === 'append-only') {
-    if (depth === 16) {
-      append(results, 'batch_insert_into_append_only_tree_16_depth_ms', bucket, entry.duration);
-      append(results, 'batch_insert_into_append_only_tree_16_depth_hash_count', bucket, entry.hashCount);
-      append(results, 'batch_insert_into_append_only_tree_16_depth_hash_ms', bucket, entry.hashDuration);
-    } else if (depth === 32) {
-      append(results, 'batch_insert_into_append_only_tree_32_depth_ms', bucket, entry.duration);
-      append(results, 'batch_insert_into_append_only_tree_32_depth_hash_count', bucket, entry.hashCount);
-      append(results, 'batch_insert_into_append_only_tree_32_depth_hash_ms', bucket, entry.hashDuration);
-    }
-  } else if (entry.treeType === 'indexed') {
-    if (depth === 20) {
-      append(results, 'batch_insert_into_indexed_tree_20_depth_ms', bucket, entry.duration);
-      append(results, 'batch_insert_into_indexed_tree_20_depth_hash_count', bucket, entry.hashCount);
-      append(results, 'batch_insert_into_indexed_tree_20_depth_hash_ms', bucket, entry.hashDuration);
-    } else if (depth === 40) {
-      append(results, 'batch_insert_into_indexed_tree_40_depth_ms', bucket, entry.duration);
-      append(results, 'batch_insert_into_indexed_tree_40_depth_hash_count', bucket, entry.hashCount);
-      append(results, 'batch_insert_into_indexed_tree_40_depth_hash_ms', bucket, entry.hashDuration);
-    }
-  }
-}
-
-/** Processes a parsed entry from a log-file and updates results */
-function processEntry(entry: Stats, results: BenchmarkCollectedResults) {
-  switch (entry.eventName) {
-    case 'proof_construction_time':
-      return processAcirProofGenerated(entry, results);
-    case 'rollup-published-to-l1':
-      return processRollupPublished(entry, results);
-    case 'l2-block-handled':
-      return processRollupBlockSynced(entry, results);
-    case 'circuit-simulation':
-      return processCircuitSimulation(entry, results);
-    case 'circuit-witness-generation':
-      return processCircuitWitnessGeneration(entry, results);
-    case 'circuit-proving':
-      return processCircuitProving(entry, results);
-    case 'l2-block-built':
-      return processL2BlockBuilt(entry, results);
-    case 'node-synced-chain-history':
-      return processNodeSyncedChain(entry, results);
-    case 'tx-added-to-pool':
-      return processTxAddedToPool(entry, results);
-    case 'tree-insertion':
-      return processTreeInsertion(entry, results);
-    case 'avm-simulation':
-      return processAvmSimulation(entry, results);
-    case 'public-db-access':
-      return processDbAccess(entry, results);
-    default:
-      return;
-  }
-}
-
-/** Array of collected raw results for a given metric. */
-type BenchmarkCollectedMetricResults = Record<string, number[]>;
-
-/** Collected raw results pending averaging each bucket within each metric. */
-type BenchmarkCollectedResults = Partial<Record<MetricName, BenchmarkCollectedMetricResults>>;
-
-/** Parses all jsonl files downloaded and aggregates them into a single results object. */
-export async function main() {
-  const collected: BenchmarkCollectedResults = {};
-
-  // Get all jsonl files in the logs dir
-  const files = fs.readdirSync(LogsDir).filter(f => f.endsWith('.jsonl'));
-
-  // Iterate over each .jsonl file
-  for (const file of files) {
-    const filePath = path.join(LogsDir, file);
-    const fileStream = fs.createReadStream(filePath);
-    const rl = readline.createInterface({ input: fileStream });
-
-    for await (const line of rl) {
-      const entry = JSON.parse(line);
-      processEntry(entry, collected);
-    }
-  }
-
-  log(`Collected entries: ${JSON.stringify(collected)}`);
-
-  // For each bucket of each metric compute the average all collected data points
-  const results: BenchmarkResults = {};
-  for (const [metricName, metric] of Object.entries(collected)) {
-    const resultMetric: BenchmarkMetricResults = {};
-    results[metricName as MetricName] = resultMetric;
-    for (const [bucketName, bucket] of Object.entries(metric)) {
-      let avg = bucket.reduce((acc, val) => acc + val, 0) / bucket.length;
-      if (avg > 100) {
-        avg = Math.floor(avg);
-      }
-      resultMetric[bucketName] = avg;
-    }
-  }
-
-  const timestampedResults: BenchmarkResultsWithTimestamp = { ...results, timestamp: new Date().toISOString() };
-
-  // Write results to disk
-  log(`Aggregated results: ${JSON.stringify(timestampedResults, null, 2)}`);
-  mkdirpSync(BenchDir);
-  fs.writeFileSync(BenchFile, JSON.stringify(timestampedResults, null, 2));
-}
diff --git a/yarn-project/scripts/src/benchmarks/markdown.ts b/yarn-project/scripts/src/benchmarks/markdown.ts
deleted file mode 100644
index 5c7d55390d2..00000000000
--- a/yarn-project/scripts/src/benchmarks/markdown.ts
+++ /dev/null
@@ -1,302 +0,0 @@
-// Generate a markdown file with a table summary of the aggregated benchmarks.
-// If a benchmark-base file is available, shows the comparison against base (ie master in a PR).
-import { BENCHMARK_HISTORY_BLOCK_SIZE, Metrics } from '@aztec/circuit-types/stats';
-import { createConsoleLogger } from '@aztec/foundation/log';
-
-import * as fs from 'fs';
-import pick from 'lodash.pick';
-
-import { BaseBenchFile, BenchFile } from './paths.js';
-
-// Input file paths
-const inputFile = BenchFile;
-const baseFile = BaseBenchFile;
-
-const COMMENT_MARK = '<!-- AUTOGENERATED BENCHMARK COMMENT -->';
-const S3_URL = 'https://aztec-ci-artifacts.s3.us-east-2.amazonaws.com';
-
-// What % diff should be considered as a warning
-const WARNING_DIFF_THRESHOLD = 15;
-// When a measurement in ms should be considered "small"
-const SMALL_MS_THRESHOLD = 200;
-// What % diff should be considered as a warning for "small" ms measurements
-const WARNING_DIFF_THRESHOLD_SMALL_MS = 30;
-// What % diff should be considered as a warning for trial_decryption in particular
-const WARNING_DIFF_THRESHOLD_TRIAL_DECRYPTION = 75;
-
-const log = createConsoleLogger();
-
-/** Returns whether the value should be a warning, based on the % difference and absolute value. */
-function isWarning(row: string, col: string, value: number, base: number | undefined) {
-  if (base === undefined) {
-    return false;
-  }
-  const absPercentDiff = Math.abs(Math.round(((value - base) / base) * 100));
-  if (row.includes('trial_decrypt') || col.includes('trial_decrypt')) {
-    return absPercentDiff > WARNING_DIFF_THRESHOLD_TRIAL_DECRYPTION;
-  } else if ((row.endsWith('_ms') || col.endsWith('_ms')) && value < SMALL_MS_THRESHOLD) {
-    return absPercentDiff >= WARNING_DIFF_THRESHOLD_SMALL_MS;
-  } else {
-    return absPercentDiff > WARNING_DIFF_THRESHOLD;
-  }
-}
-
-/** Returns summary text for warnings */
-function getWarningsSummary(
-  data: Record<string, Record<string, number>>,
-  base: Record<string, Record<string, number>> | undefined,
-) {
-  const warnings = getWarnings(data, base);
-  if (!base) {
-    return 'No base data found for comparison.';
-  } else if (warnings.length) {
-    return `Metrics with a significant change: \n${warnings.join('\n')}`;
-  } else {
-    return `No metrics with a significant change found.`;
-  }
-}
-
-/** Returns a string with the % diff between value and base. */
-function formatDiff(value: number, baseValue: number) {
-  const percentDiff = Math.round(((value - baseValue) / baseValue) * 100);
-  const percentSign = percentDiff > 0 ? '+' : '';
-  return `<span title="${formatValue(baseValue)}">${percentSign}${percentDiff}%</span>`;
-}
-
-/** Gets a list of warnings. */
-function getWarnings(
-  data: Record<string, Record<string, number>>,
-  base: Record<string, Record<string, number>> | undefined,
-) {
-  if (!base) {
-    return [];
-  }
-  const warnings: string[] = [];
-  for (const row in data) {
-    if (row === 'timestamp') {
-      continue;
-    }
-    for (const col in data[row]) {
-      const value = data[row][col];
-      const baseValue = (base[row] ?? {})[col];
-      if (baseValue && isWarning(row, col, value, baseValue)) {
-        const diffText = formatDiff(value, baseValue);
-        warnings.push(`- **${withDesc(row)}** (${withDesc(col)}): ${formatValue(value)} (${diffText})`);
-      }
-    }
-  }
-  return warnings;
-}
-
-/** Returns a cell content formatted as string */
-function getCell(
-  data: Record<string, Record<string, number>>,
-  base: Record<string, Record<string, number>> | undefined,
-  row: string,
-  col: string,
-) {
-  const value: number | undefined = data[row][col];
-  const formattedValue = formatValue(value);
-  const baseValue: number | undefined = base?.[row]?.[col];
-  const percentDiff =
-    typeof baseValue === 'number' && baseValue > 0 && typeof value === 'number'
-      ? Math.round(((value - baseValue) / baseValue) * 100)
-      : undefined;
-  if (!percentDiff || Math.abs(percentDiff) < 1) {
-    return formattedValue;
-  }
-  if (!isWarning(row, col, value, baseValue)) {
-    return `${formattedValue} (${formatDiff(value, baseValue!)})`;
-  }
-  return `:warning: ${formattedValue} (**${formatDiff(value, baseValue!)}**)`;
-}
-
-/** Wraps the metric name in a span with a title with the description, if found. */
-function withDesc(name: string) {
-  const description = Metrics.find(m => m.name === name)?.description;
-  if (!description) {
-    return name;
-  }
-  return `<span title="${description}">${name}</span>`;
-}
-
-/** Formats a numeric value for display. */
-function formatValue(value: number | undefined): string {
-  if (typeof value === 'undefined') {
-    return 'N/A';
-  }
-
-  if (value < 100) {
-    return value.toPrecision(3);
-  }
-  return value.toLocaleString();
-}
-
-/** Transposes an object topmost and nested keys. */
-function transpose(obj: any) {
-  const transposed: any = {};
-  for (const outerKey in obj) {
-    const innerObj = obj[outerKey];
-    for (const innerKey in innerObj) {
-      if (!transposed[innerKey]) {
-        transposed[innerKey] = {};
-      }
-      transposed[innerKey][outerKey] = innerObj[innerKey];
-    }
-  }
-  return transposed;
-}
-
-/** Returns the base benchmark for comparison, if exists */
-function getBaseBenchmark(): Record<string, Record<string, number>> | undefined {
-  try {
-    return JSON.parse(fs.readFileSync(baseFile, 'utf-8'));
-  } catch {
-    return undefined;
-  }
-}
-
-/** Creates a table in md out of the data (rows and cols). */
-function getTableContent(
-  data: Record<string, Record<string, number>>,
-  baseBenchmark: Record<string, Record<string, number>> | undefined,
-  groupUnit = '',
-  col1Title = 'Metric',
-  colPrefixToRemove = '',
-) {
-  const rowKeys = Object.keys(data);
-  const groups = [...new Set(rowKeys.flatMap(key => Object.keys(data[key])))];
-  const makeHeader = (colTitle: string) => `${withDesc(colTitle.replace(colPrefixToRemove, ''))} ${groupUnit}`;
-  const header = `| ${col1Title} | ${groups.map(makeHeader).join(' | ')} |`;
-  const separator = `| - | ${groups.map(() => '-').join(' | ')} |`;
-  const makeCell = (row: string, col: string) => getCell(data, baseBenchmark, row, col);
-  const rows = rowKeys.map(key => `${withDesc(key)} | ${groups.map(g => makeCell(key, g)).join(' | ')} |`);
-
-  return `
-${header}
-${separator}
-${rows.join('\n')}
-  `;
-}
-
-/** Creates a md with the benchmark contents. */
-export function getMarkdown(prNumber: number) {
-  const benchmark = JSON.parse(fs.readFileSync(inputFile, 'utf-8'));
-  const baseBenchmark = getBaseBenchmark();
-
-  const metricsByThreads = Metrics.filter(m => m.groupBy === 'threads').map(m => m.name);
-  const metricsByBlockSize = Metrics.filter(m => m.groupBy === 'block-size').map(m => m.name);
-  const metricsByChainLength = Metrics.filter(m => m.groupBy === 'chain-length')
-    .filter(m => m.name !== 'public_db_access_time_ms')
-    .map(m => m.name);
-  const kernelCircuitMetrics = Metrics.filter(m => m.groupBy === 'protocol-circuit-name').map(m => m.name);
-  const appCircuitMetrics = Metrics.filter(m => m.groupBy === 'app-circuit-name')
-    .filter(m => m.name !== 'avm_simulation_time_ms')
-    .map(m => m.name);
-  const metricsByClassesRegistered = Metrics.filter(m => m.groupBy === 'classes-registered').map(m => m.name);
-  const metricsByFeePaymentMethod = Metrics.filter(m => m.groupBy === 'fee-payment-method').map(m => m.name);
-  const metricsByLeafCount = Metrics.filter(m => m.groupBy === 'leaf-count').map(m => m.name);
-
-  const baseHash = process.env.BASE_COMMIT_HASH;
-  const baseUrl = baseHash && `[\`${baseHash.slice(0, 8)}\`](${S3_URL}/benchmarks-v1/master/${baseHash}.json)`;
-  const baseCommitText = baseUrl
-    ? `\nValues are compared against data from master at commit ${baseUrl} and shown if the difference exceeds 1%.`
-    : '';
-
-  const prSourceDataUrl = prNumber && `${S3_URL}/benchmarks-v1/pulls/${prNumber}.json`;
-  const prSourceDataText = prSourceDataUrl
-    ? `\nThis benchmark source data is available in JSON format on S3 [here](${prSourceDataUrl}).`
-    : '';
-
-  return `
-## Benchmark results
-
-${getWarningsSummary(benchmark, baseBenchmark)}
-
-<details>
-
-<summary>Detailed results</summary>
-
-All benchmarks are run on txs on the \`Benchmarking\` contract on the repository. Each tx consists of a batch call  to \`create_note\` and \`increment_balance\`, which guarantees that each tx has a private call, a nested private call, a public call, and a nested public call, as well as an emitted private note, an unencrypted log, and public storage read and write.
-${prSourceDataText}
-${baseCommitText}
-
-### Proof generation
-
-Each column represents the number of threads used in proof generation.
-${getTableContent(pick(benchmark, metricsByThreads), baseBenchmark, 'threads')}
-
-### L2 block published to L1
-
-Each column represents the number of txs on an L2 block published to L1.
-${getTableContent(pick(benchmark, metricsByBlockSize), baseBenchmark, 'txs')}
-
-### L2 chain processing
-
-Each column represents the number of blocks on the L2 chain where each block has ${BENCHMARK_HISTORY_BLOCK_SIZE} txs.
-${getTableContent(pick(benchmark, metricsByChainLength), baseBenchmark, 'blocks')}
-
-### Circuits stats
-
-Stats on running time and I/O sizes collected for every kernel circuit run across all benchmarks.
-${getTableContent(
-  transpose(pick(benchmark, kernelCircuitMetrics)),
-  transpose(baseBenchmark),
-  '',
-  'Circuit',
-  'protocol_circuit_',
-)}
-
-Stats on running time collected for app circuits
-${getTableContent(
-  transpose(pick(benchmark, appCircuitMetrics)),
-  transpose(baseBenchmark),
-  '',
-  'Function',
-  'app_circuit_',
-)}
-
-### AVM Simulation
-
-Time to simulate various public functions in the AVM.
-${getTableContent(
-  transpose(pick(benchmark, ['avm_simulation_time_ms'])),
-  transpose(baseBenchmark),
-  '',
-  'Function',
-  'avm_simulation_',
-)}
-
-### Public DB Access
-
-Time to access various public DBs.
-${getTableContent(
-  transpose(pick(benchmark, ['public_db_access_time_ms'])),
-  transpose(baseBenchmark),
-  '',
-  'Function',
-  'public_db_access_',
-)}
-
-### Tree insertion stats
-
-The duration to insert a fixed batch of leaves into each tree type.
-${getTableContent(pick(benchmark, metricsByLeafCount), baseBenchmark, 'leaves')}
-
-### Miscellaneous
-
-Transaction sizes based on how many contract classes are registered in the tx.
-${getTableContent(pick(benchmark, metricsByClassesRegistered), baseBenchmark, 'registered classes')}
-
-Transaction size based on fee payment method
-${getTableContent(pick(benchmark, metricsByFeePaymentMethod), baseBenchmark, 'fee payment method')}
-
-</details>
-${COMMENT_MARK}
-`;
-}
-
-/** Entrypoint */
-export function main(prNumber: number) {
-  log(getMarkdown(prNumber));
-}
diff --git a/yarn-project/scripts/src/benchmarks/paths.ts b/yarn-project/scripts/src/benchmarks/paths.ts
deleted file mode 100644
index 8b82f42b19a..00000000000
--- a/yarn-project/scripts/src/benchmarks/paths.ts
+++ /dev/null
@@ -1,13 +0,0 @@
-import * as path from 'path';
-
-/** Folder where to load raw logs from */
-export const LogsDir = process.env.LOG_FOLDER ?? `log`;
-
-/** Folder with the aggregated benchmark results */
-export const BenchDir = process.env.BENCH_FOLDER ?? `bench`;
-
-/** Benchmark file path */
-export const BenchFile = path.join(BenchDir, 'benchmark.json');
-
-/** Base benchmark file path */
-export const BaseBenchFile = path.join(BenchDir, 'base-benchmark.json');
diff --git a/yarn-project/scripts/src/bin/bench-aggregate.ts b/yarn-project/scripts/src/bin/bench-aggregate.ts
deleted file mode 100644
index 44ee68f3694..00000000000
--- a/yarn-project/scripts/src/bin/bench-aggregate.ts
+++ /dev/null
@@ -1,7 +0,0 @@
-import { main } from '../benchmarks/aggregate.js';
-
-void main().catch(err => {
-  // eslint-disable-next-line no-console
-  console.error(err.message);
-  process.exit(1);
-});
diff --git a/yarn-project/scripts/src/bin/bench-comment.ts b/yarn-project/scripts/src/bin/bench-comment.ts
deleted file mode 100644
index 41440175dab..00000000000
--- a/yarn-project/scripts/src/bin/bench-comment.ts
+++ /dev/null
@@ -1,9 +0,0 @@
-import { COMMENT_TYPES } from '../types.js';
-import main from '../utils/comment.js';
-import { getPrNumber } from '../utils/pr-number.js';
-
-void main(getPrNumber(), COMMENT_TYPES.BENCH).catch(err => {
-  // eslint-disable-next-line no-console
-  console.error(err.message);
-  process.exit(1);
-});
diff --git a/yarn-project/scripts/src/bin/bench-markdown.ts b/yarn-project/scripts/src/bin/bench-markdown.ts
deleted file mode 100644
index b87762c26e0..00000000000
--- a/yarn-project/scripts/src/bin/bench-markdown.ts
+++ /dev/null
@@ -1,10 +0,0 @@
-import { main } from '../benchmarks/markdown.js';
-import { getPrNumber } from '../utils/pr-number.js';
-
-try {
-  void main(getPrNumber());
-} catch (err: any) {
-  // eslint-disable-next-line no-console
-  console.error(err);
-  process.exit(1);
-}
diff --git a/yarn-project/scripts/src/types.ts b/yarn-project/scripts/src/types.ts
index b6f971a94fe..859ef5d2830 100644
--- a/yarn-project/scripts/src/types.ts
+++ b/yarn-project/scripts/src/types.ts
@@ -1,4 +1,3 @@
 export enum COMMENT_TYPES {
-  BENCH = '<!-- AUTOGENERATED BENCHMARK COMMENT -->',
   DOCS = '<!-- AUTOGENERATED DOCS COMMENT -->',
 }
diff --git a/yarn-project/scripts/src/utils/comment.ts b/yarn-project/scripts/src/utils/comment.ts
index 7e7cc00fbb5..f541086ef84 100644
--- a/yarn-project/scripts/src/utils/comment.ts
+++ b/yarn-project/scripts/src/utils/comment.ts
@@ -16,9 +16,7 @@ const REPO = 'aztec-packages';
 const log = createConsoleLogger();
 
 async function getMarkdown(prNumber: number, commentType: COMMENT_TYPES) {
-  if (commentType === COMMENT_TYPES.BENCH) {
-    return (await import('../benchmarks/markdown.js')).getMarkdown(prNumber);
-  } else if (commentType === COMMENT_TYPES.DOCS) {
+  if (commentType === COMMENT_TYPES.DOCS) {
     if (!DOCS_PREVIEW_URL) {
       throw new Error('DOCS_PREVIEW_URL is not set');
     } else {
@@ -110,7 +108,7 @@ function sendGitHubRequest(url: string, method = 'GET', data?: object): Promise<
 }
 
 /** Entrypoint */
-export default async function main(prNumber: number, commentType: COMMENT_TYPES = COMMENT_TYPES.BENCH) {
+export default async function main(prNumber: number, commentType: COMMENT_TYPES) {
   const existingComment = await getExistingComment(prNumber, commentType);
   await upsertComment(prNumber, existingComment?.id, commentType);
 }
diff --git a/yarn-project/sequencer-client/src/sequencer/metrics.ts b/yarn-project/sequencer-client/src/sequencer/metrics.ts
index 806d35534cc..bc6caca4568 100644
--- a/yarn-project/sequencer-client/src/sequencer/metrics.ts
+++ b/yarn-project/sequencer-client/src/sequencer/metrics.ts
@@ -16,6 +16,7 @@ export class SequencerMetrics {
 
   private blockCounter: UpDownCounter;
   private blockBuildDuration: Histogram;
+  private blockBuildManaPerSecond: Gauge;
   private stateTransitionBufferDuration: Histogram;
   private currentBlockNumber: Gauge;
   private currentBlockSize: Gauge;
@@ -28,11 +29,19 @@ export class SequencerMetrics {
     this.tracer = client.getTracer(name);
 
     this.blockCounter = meter.createUpDownCounter(Metrics.SEQUENCER_BLOCK_COUNT);
+
     this.blockBuildDuration = meter.createHistogram(Metrics.SEQUENCER_BLOCK_BUILD_DURATION, {
       unit: 'ms',
       description: 'Duration to build a block',
       valueType: ValueType.INT,
     });
+
+    this.blockBuildManaPerSecond = meter.createGauge(Metrics.SEQUENCER_BLOCK_BUILD_MANA_PER_SECOND, {
+      unit: 'mana/s',
+      description: 'Mana per second when building a block',
+      valueType: ValueType.INT,
+    });
+
     this.stateTransitionBufferDuration = meter.createHistogram(Metrics.SEQUENCER_STATE_TRANSITION_BUFFER_DURATION, {
       unit: 'ms',
       description:
@@ -96,11 +105,12 @@ export class SequencerMetrics {
     this.setCurrentBlock(0, 0);
   }
 
-  recordPublishedBlock(buildDurationMs: number) {
+  recordPublishedBlock(buildDurationMs: number, totalMana: number) {
     this.blockCounter.add(1, {
       [Attributes.STATUS]: 'published',
     });
     this.blockBuildDuration.record(Math.ceil(buildDurationMs));
+    this.blockBuildManaPerSecond.record(Math.ceil((totalMana * 1000) / buildDurationMs));
     this.setCurrentBlock(0, 0);
   }
 
diff --git a/yarn-project/sequencer-client/src/sequencer/sequencer.ts b/yarn-project/sequencer-client/src/sequencer/sequencer.ts
index 69336ddf2f1..8a31b3825c5 100644
--- a/yarn-project/sequencer-client/src/sequencer/sequencer.ts
+++ b/yarn-project/sequencer-client/src/sequencer/sequencer.ts
@@ -585,8 +585,12 @@ export class Sequencer {
       // All real transactions have been added, set the block as full and pad if needed
       const block = await blockBuilder.setBlockCompleted();
 
+      // How much public gas was processed
+      const publicGas = processedTxs.reduce((acc, tx) => acc.add(tx.gasUsed.publicGas), Gas.empty());
+
       return {
         block,
+        publicGas,
         publicProcessorDuration,
         numMsgs: l1ToL2Messages.length,
         numTxs: processedTxs.length,
@@ -641,7 +645,7 @@ export class Sequencer {
 
     try {
       const buildBlockRes = await this.buildBlock(pendingTxs, newGlobalVariables, historicalHeader);
-      const { block, publicProcessorDuration, numTxs, numMsgs, blockBuildingTimer } = buildBlockRes;
+      const { publicGas, block, publicProcessorDuration, numTxs, numMsgs, blockBuildingTimer } = buildBlockRes;
 
       // TODO(@PhilWindle) We should probably periodically check for things like another
       // block being published before ours instead of just waiting on our block
@@ -683,16 +687,19 @@ export class Sequencer {
       const proofQuote = await proofQuotePromise;
 
       await this.publishL2Block(block, attestations, txHashes, proofQuote);
-      this.metrics.recordPublishedBlock(workDuration);
+      this.metrics.recordPublishedBlock(workDuration, publicGas.l2Gas);
+      const duration = Math.ceil(workDuration);
+      const manaPerSecond = Math.ceil((publicGas.l2Gas * 1000) / duration);
       this.log.info(
-        `Published block ${block.number} with ${numTxs} txs and ${numMsgs} messages in ${Math.ceil(workDuration)}ms`,
+        `Published block ${block.number} with ${numTxs} txs and ${numMsgs} messages in ${duration} ms at ${manaPerSecond} mana/s`,
         {
+          publicGas,
           blockNumber: block.number,
           blockHash: blockHash,
           slot,
           txCount: txHashes.length,
           msgCount: numMsgs,
-          duration: Math.ceil(workDuration),
+          duration,
           submitter: this.publisher.getSenderAddress().toString(),
         },
       );
diff --git a/yarn-project/telemetry-client/package.json b/yarn-project/telemetry-client/package.json
index 7face42b10c..3d75ee4bf9a 100644
--- a/yarn-project/telemetry-client/package.json
+++ b/yarn-project/telemetry-client/package.json
@@ -8,6 +8,7 @@
     ".": "./dest/index.js",
     "./start": "./dest/start.js",
     "./noop": "./dest/noop.js",
+    "./bench": "./dest/bench.js",
     "./otel-pino-stream": "./dest/vendor/otel-pino-stream.js"
   },
   "scripts": {
diff --git a/yarn-project/telemetry-client/src/bench.ts b/yarn-project/telemetry-client/src/bench.ts
new file mode 100644
index 00000000000..bb19639041d
--- /dev/null
+++ b/yarn-project/telemetry-client/src/bench.ts
@@ -0,0 +1,153 @@
+import {
+  type BatchObservableCallback,
+  type Context,
+  type MetricOptions,
+  type Observable,
+  type ValueType,
+} from '@opentelemetry/api';
+
+import { NoopTracer } from './noop.js';
+import {
+  type Attributes,
+  type Gauge,
+  type Histogram,
+  type Meter,
+  type Metrics,
+  type ObservableGauge,
+  type ObservableUpDownCounter,
+  type TelemetryClient,
+  type Tracer,
+  type UpDownCounter,
+} from './telemetry.js';
+
+export type BenchmarkMetrics = {
+  name: string;
+  metrics: {
+    name: string;
+    type: 'gauge' | 'counter' | 'histogram';
+    description?: string;
+    unit?: string;
+    valueType?: ValueType;
+    points: BenchmarkDataPoint[];
+  }[];
+}[];
+
+export class BenchmarkTelemetryClient implements TelemetryClient {
+  private meters: InMemoryPlainMeter[] = [];
+
+  getMeter(name: string): Meter {
+    const meter = new InMemoryPlainMeter(name);
+    this.meters.push(meter);
+    return meter;
+  }
+
+  getTracer(): Tracer {
+    return new NoopTracer();
+  }
+
+  stop(): Promise<void> {
+    return Promise.resolve();
+  }
+
+  flush(): Promise<void> {
+    return Promise.resolve();
+  }
+
+  isEnabled() {
+    return true;
+  }
+
+  getMeters(): BenchmarkMetrics {
+    return this.meters;
+  }
+
+  clear() {
+    this.meters.forEach(meter => meter.clear());
+  }
+}
+
+class InMemoryPlainMeter implements Meter {
+  public readonly metrics: InMemoryPlainMetric[] = [];
+
+  constructor(public readonly name: string) {}
+
+  clear() {
+    this.metrics.forEach(metric => metric.clear());
+  }
+
+  createGauge(name: Metrics, options?: MetricOptions | undefined): Gauge {
+    return this.createMetric('gauge', name, options);
+  }
+
+  createObservableGauge(name: Metrics, options?: MetricOptions | undefined): ObservableGauge {
+    return this.createMetric('gauge', name, options);
+  }
+
+  createHistogram(name: Metrics, options?: MetricOptions | undefined): Histogram {
+    return this.createMetric('histogram', name, options);
+  }
+
+  createUpDownCounter(name: Metrics, options?: MetricOptions | undefined): UpDownCounter {
+    return this.createMetric('counter', name, options);
+  }
+
+  createObservableUpDownCounter(name: Metrics, options?: MetricOptions | undefined): ObservableUpDownCounter {
+    return this.createMetric('counter', name, options);
+  }
+
+  private createMetric(type: 'gauge' | 'counter' | 'histogram', name: string, options?: MetricOptions) {
+    const metric = new InMemoryPlainMetric(type, name, options);
+    this.metrics.push(metric);
+    return metric;
+  }
+
+  addBatchObservableCallback(
+    _callback: BatchObservableCallback<Attributes>,
+    _observables: Observable<Attributes>[],
+  ): void {}
+
+  removeBatchObservableCallback(
+    _callback: BatchObservableCallback<Attributes>,
+    _observables: Observable<Attributes>[],
+  ): void {}
+}
+
+export type BenchmarkDataPoint = { value: number; attributes?: Attributes; context?: Context };
+
+class InMemoryPlainMetric {
+  public readonly points: BenchmarkDataPoint[] = [];
+
+  public readonly description?: string;
+  public readonly unit?: string;
+  public readonly valueType?: ValueType;
+
+  constructor(
+    public readonly type: 'gauge' | 'counter' | 'histogram',
+    public readonly name: string,
+    options?: MetricOptions,
+  ) {
+    this.description = options?.description;
+    this.unit = options?.unit;
+    this.valueType = options?.valueType;
+  }
+
+  add(value: number, attributes?: Attributes, context?: Context): void {
+    this.points.push({ value, attributes, context });
+  }
+
+  record(value: number, attributes?: Attributes, context?: Context): void {
+    this.points.push({ value, attributes, context });
+  }
+
+  addCallback() {}
+
+  removeCallback() {}
+
+  getPoints(): BenchmarkDataPoint[] {
+    return this.points;
+  }
+
+  clear() {
+    this.points.splice(0, this.points.length);
+  }
+}
diff --git a/yarn-project/telemetry-client/src/metrics.ts b/yarn-project/telemetry-client/src/metrics.ts
index 1561d4a9b60..51a59aa4a3f 100644
--- a/yarn-project/telemetry-client/src/metrics.ts
+++ b/yarn-project/telemetry-client/src/metrics.ts
@@ -51,6 +51,7 @@ export const NODE_RECEIVE_TX_COUNT = 'aztec.node.receive_tx.count';
 
 export const SEQUENCER_STATE_TRANSITION_BUFFER_DURATION = 'aztec.sequencer.state_transition_buffer.duration';
 export const SEQUENCER_BLOCK_BUILD_DURATION = 'aztec.sequencer.block.build_duration';
+export const SEQUENCER_BLOCK_BUILD_MANA_PER_SECOND = 'aztec.sequencer.block.build_mana_per_second';
 export const SEQUENCER_BLOCK_COUNT = 'aztec.sequencer.block.count';
 export const SEQUENCER_CURRENT_STATE = 'aztec.sequencer.current.state';
 export const SEQUENCER_CURRENT_BLOCK_NUMBER = 'aztec.sequencer.current.block_number';
diff --git a/yarn-project/telemetry-client/src/noop.ts b/yarn-project/telemetry-client/src/noop.ts
index 120c8a6e3c5..5dc7f4f4046 100644
--- a/yarn-project/telemetry-client/src/noop.ts
+++ b/yarn-project/telemetry-client/src/noop.ts
@@ -27,7 +27,7 @@ export class NoopTelemetryClient implements TelemetryClient {
 // @opentelemetry/api internally uses NoopTracer and NoopSpan but they're not exported
 // make our own versions
 // https://github.com/open-telemetry/opentelemetry-js/issues/4518#issuecomment-2179405444
-class NoopTracer implements Tracer {
+export class NoopTracer implements Tracer {
   startSpan(): Span {
     return new NoopSpan();
   }
diff --git a/yarn-project/yarn.lock b/yarn-project/yarn.lock
index 74d6e1cc21e..f77d86357ef 100644
--- a/yarn-project/yarn.lock
+++ b/yarn-project/yarn.lock
@@ -1177,9 +1177,6 @@ __metadata:
     tslib: "npm:^2.4.0"
     typescript: "npm:^5.0.4"
   bin:
-    bench-aggregate: ./dest/bin/bench-aggregate.js
-    bench-comment: ./dest/bin/bench-comment.js
-    bench-markdown: ./dest/bin/bench-markdown.js
     docs-preview-comment: ./dest/bin/docs-preview.js
   languageName: unknown
   linkType: soft