Skip to content

Commit

Permalink
laaber + schultz's method of estimating slowdown
Browse files Browse the repository at this point in the history
  • Loading branch information
ehigham committed Nov 15, 2024
1 parent 8b3c84a commit 39eb23a
Show file tree
Hide file tree
Showing 17 changed files with 712 additions and 4,653 deletions.
2 changes: 2 additions & 0 deletions hail/notebooks/benchmark/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
data/
out/
238 changes: 238 additions & 0 deletions hail/notebooks/benchmark/minimal-detectable-slowdown.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,238 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This notebook explores variability in hail's python (macro)-benchmarks when\n",
"said benchmarks are executed on the hail batch service. The analyses within \n",
"are based off the methods proposed in [1], albeit slightly modified for long\n",
"running benchmarks. The goals of these analyses are\n",
"\n",
"- to determine if we can detect slowdowns of 5% or less reliably when running\n",
" benchmarks on hail batch.\n",
"- to identify configurations (number of batch jobs x iterations) that allow us\n",
" to detect slowdowns efficiently (ie without excesssive time and money).\n",
"\n",
"[1] Laaber et al., Software Microbenchmarking in the Cloud.How Bad is it Really?\n",
" https://dl.acm.org/doi/10.1007/s10664-019-09681-1"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from pathlib import Path\n",
"\n",
"from benchmark.tools.impex import import_timings\n",
"from benchmark.tools.plotting import plot_mean_time_per_instance, plot_trial_against_time\n",
"from benchmark.tools.statistics import (\n",
" analyze_benchmarks,\n",
" bootstrap_mean_confidence_interval,\n",
" laaber_mds,\n",
" schultz_mds,\n",
" variability,\n",
")\n",
"from plotly.io import renderers\n",
"\n",
"import hail as hl\n",
"\n",
"renderers.default = 'notebook_connected'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"hl.init(backend='spark', idempotent=True)\n",
"hl._set_flags(use_new_shuffle='1', lower='1')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Import benchmark data\n",
"# ---------------------\n",
"#\n",
"# benchmarks under `hail/python/benchmarks` are executed with a custom pytest\n",
"# plugin and their results are output as json lines (.jsonl).\n",
"# Unscrupulously, we use hail to analyse itself.\n",
"\n",
"# Plotting the time vs iteration for all instances provides a visual way of\n",
"# identifying the number of burn-in iteration required to reach a steady-state.\n",
"# Note that a steady state is never reached in some cases.\n",
"\n",
"ht = import_timings(Path('data/100x120.jsonl'))\n",
"ht = ht.checkpoint('out/imported.ht', overwrite=True)\n",
"benchmarks = ht.aggregate(hl.agg.collect_as_set(ht.name))\n",
"print(*benchmarks, sep='\\n')\n",
"plot_trial_against_time(ht)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# This is an iterative process. Select the minimum number of burn-in iterations\n",
"# required for each benchmark. Replot and verify that the graph is more-or-less\n",
"# flat. This may not be possible in all cases.\n",
"\n",
"\n",
"def filter_burn_in_iterations(ht: hl.Table) -> hl.Table:\n",
" ht = ht.annotate_globals(\n",
" first_stable_index={\n",
" 'benchmark_export_range_matrix_table_row_p100': 20,\n",
" 'benchmark_import_gvcf_force_count': 10,\n",
" 'benchmark_matrix_table_take_col': 30,\n",
" 'benchmark_ndarray_matmul_int64': 23,\n",
" 'benchmark_sample_qc': 14,\n",
" 'benchmark_shuffle_key_rows_by_mt': 10,\n",
" 'benchmark_union_partitions_table[100-100]': 40,\n",
" },\n",
" )\n",
"\n",
" return ht.select(\n",
" instances=ht.instances.map(\n",
" lambda instance: instance.annotate(\n",
" trials=(instance.trials.filter(lambda t: t.iteration >= ht.first_stable_index[ht.name]))\n",
" )\n",
" ),\n",
" )\n",
"\n",
"\n",
"ht = filter_burn_in_iterations(ht)\n",
"plot_trial_against_time(ht)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# As a final step of cleaning, we'll filter out trials that differ by some\n",
"# multiplier of the median for each instance\n",
"\n",
"\n",
"def filter_outliers(ht: hl.Table, factor: hl.Float64Expression) -> hl.Table:\n",
" # Filter out failures and\n",
" return ht.select(\n",
" instances=ht.instances.map(\n",
" lambda instance: instance.annotate(\n",
" trials=hl.bind(\n",
" lambda median: instance.trials.filter(\n",
" lambda t: hl.max([t.time, median]) / hl.min([t.time, median]) < factor\n",
" ),\n",
" hl.median(instance.trials.map(lambda t: t.time)),\n",
" )\n",
" ),\n",
" ),\n",
" )\n",
"\n",
"\n",
"ht = filter_outliers(ht, hl.float64(10))\n",
"plot_trial_against_time(ht)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# These plots show the mean time per instance. This provides a visual way of\n",
"# identifying differences in instance type if there are multiple distinct layers\n",
"\n",
"plot_mean_time_per_instance(ht)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ht = ht.select(instances=ht.instances.trials.time).checkpoint('out/pruned.ht', overwrite=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# laaber et al. section 4\n",
"\n",
"variability(ht).show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# laaber et al. section 5 - boostrapping confidence intervals of the mean\n",
"\n",
"bootstrap_mean_confidence_interval(ht, 1000, 0.95).show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Laaber et al - Minimal-Detectable Slowdown\n",
"\n",
"laaber = laaber_mds(ht).checkpoint('out/laaber-mds.ht', overwrite=True)\n",
"schultz = schultz_mds(ht).checkpoint('out/schultz-mds.ht', overwrite=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"outputs": [],
"source": [
"\n",
"mds = laaber.select(laaber=laaber.row_value, schultz=schultz[laaber.key])\n",
"mds.show(100_000)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.18"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Loading

0 comments on commit 39eb23a

Please sign in to comment.