Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tests: parametrize bench mark tests #4974

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
135 changes: 86 additions & 49 deletions tests/integration_tests/performance/test_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import json
import logging
import platform
import re
import shutil
from pathlib import Path

Expand All @@ -17,78 +18,114 @@
LOGGER = logging.getLogger(__name__)


def get_executables():
"""
Get a list of binaries for benchmarking
"""

# Passing --message-format json to cargo tells it to print its log in a json format. At the end, instead of the
# usual "placed executable <...> at <...>" we'll get a json object with an 'executable' key, from which we
# extract the path to the compiled benchmark binary.
_, stdout, _ = cargo(
"bench",
f"--all --quiet --target {platform.machine()}-unknown-linux-musl --message-format json --no-run",
)

executables = []
for line in stdout.split("\n"):
if line:
msg = json.loads(line)
executable = msg.get("executable")
if executable:
executables.append(executable)

return executables


@pytest.mark.no_block_pr
@pytest.mark.timeout(900)
def test_no_regression_relative_to_target_branch():
@pytest.mark.timeout(600)
@pytest.mark.parametrize("executable", get_executables())
def test_no_regression_relative_to_target_branch(executable):
"""
Run the microbenchmarks in this repository, comparing results from pull
request target branch against what's achieved on HEAD
"""
run_criterion = get_run_criterion(executable)
compare_results = get_compare_results(executable)
git_ab_test(run_criterion, compare_results)


def run_criterion(firecracker_checkout: Path, is_a: bool) -> Path:
def get_run_criterion(executable):
"""
Executes all benchmarks by running "cargo bench --no-run", finding the executables, and running them pinned to some CPU
Get function that executes specified benchmarks, and running them pinned to some CPU
"""
baseline_name = "a_baseline" if is_a else "b_baseline"

with contextlib.chdir(firecracker_checkout):
# Passing --message-format json to cargo tells it to print its log in a json format. At the end, instead of the
# usual "placed executable <...> at <...>" we'll get a json object with an 'executable' key, from which we
# extract the path to the compiled benchmark binary.
_, stdout, _ = cargo(
"bench",
f"--all --quiet --target {platform.machine()}-unknown-linux-musl --message-format json --no-run",
)

executables = []
for line in stdout.split("\n"):
if line:
msg = json.loads(line)
executable = msg.get("executable")
if executable:
executables.append(executable)
def _run_criterion(firecracker_checkout: Path, is_a: bool) -> Path:
baseline_name = "a_baseline" if is_a else "b_baseline"

for executable in executables:
with contextlib.chdir(firecracker_checkout):
utils.check_output(
f"CARGO_TARGET_DIR=build/cargo_target taskset -c 1 {executable} --bench --save-baseline {baseline_name}"
)

return firecracker_checkout / "build" / "cargo_target" / "criterion"
return firecracker_checkout / "build" / "cargo_target" / "criterion"

return _run_criterion


def get_compare_results(executable):
"""
Get function that compares the two recorded criterion baselines for regressions, assuming that "A" is the baseline from main
"""

def compare_results(location_a_baselines: Path, location_b_baselines: Path):
"""Compares the two recorded criterion baselines for regressions, assuming that "A" is the baseline from main"""
for benchmark in location_b_baselines.glob("*"):
data = json.loads(
(benchmark / "b_baseline" / "estimates.json").read_text("utf-8")
def _compare_results(location_a_baselines: Path, location_b_baselines: Path):

list_result = utils.check_output(
f"CARGO_TARGET_DIR=build/cargo_target {executable} --bench --list"
)

average_ns = data["mean"]["point_estimate"]
# Format a string like `page_fault #2: benchmark` to a string like `page_fault_2`.
# Because under `cargo_target/criterion/`, a directory like `page_fault_2` will create.
bench_marks = [
re.sub(r"\s#(?P<sub_id>[1-9]+)", r"_\g<sub_id>", i.split(":")[0])
for i in list_result.stdout.split("\n")
if i.endswith(": benchmark")
]

for benchmark in bench_marks:
data = json.loads(
(
location_b_baselines / benchmark / "b_baseline" / "estimates.json"
).read_text("utf-8")
)

LOGGER.info("%s mean: %iµs", benchmark.name, average_ns / 1000)
average_ns = data["mean"]["point_estimate"]

# Assumption: location_b_baseline = cargo_target of current working directory. So just copy the a_baselines here
# to do the comparison
for benchmark in location_a_baselines.glob("*"):
shutil.copytree(
benchmark / "a_baseline",
location_b_baselines / benchmark.name / "a_baseline",
LOGGER.info("%s mean: %iµs", benchmark, average_ns / 1000)

# Assumption: location_b_baseline = cargo_target of current working directory. So just copy the a_baselines here
# to do the comparison

for benchmark in bench_marks:
shutil.copytree(
location_a_baselines / benchmark / "a_baseline",
location_b_baselines / benchmark / "a_baseline",
)

bench_result = utils.check_output(
f"CARGO_TARGET_DIR=build/cargo_target {executable} --bench --baseline a_baseline --load-baseline b_baseline",
True,
Path.cwd().parent,
)

_, stdout, _ = cargo(
"bench",
f"--all --target {platform.machine()}-unknown-linux-musl",
"--baseline a_baseline --load-baseline b_baseline",
)
regressions_only = "\n\n".join(
result
for result in bench_result.stdout.split("\n\n")
if "Performance has regressed." in result
)

regressions_only = "\n\n".join(
result
for result in stdout.split("\n\n")
if "Performance has regressed." in result
)
# If this string is anywhere in stdout, then at least one of our benchmarks
# is now performing worse with the PR changes.
assert not regressions_only, "\n" + regressions_only

# If this string is anywhere in stdout, then at least one of our benchmarks
# is now performing worse with the PR changes.
assert not regressions_only, "\n" + regressions_only
return _compare_results