-
Notifications
You must be signed in to change notification settings - Fork 248
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
laaber + schultz's method of estimating slowdown
- Loading branch information
Showing
20 changed files
with
960 additions
and
4,679 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
data/ | ||
out/ |
281 changes: 281 additions & 0 deletions
281
hail/notebooks/benchmark/minimal-detectable-slowdown.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,281 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"This notebook explores variability in hail's python (macro)-benchmarks when\n", | ||
"said benchmarks are executed on the hail batch service. The analyses within \n", | ||
"are based off the methods proposed in [1], albeit slightly modified for long\n", | ||
"running benchmarks. The goals of these analyses are\n", | ||
"\n", | ||
"- to determine if we can detect slowdowns of 5% or less reliably when running\n", | ||
" benchmarks on hail batch.\n", | ||
"- to identify configurations (number of batch jobs x iterations) that allow us\n", | ||
" to detect slowdowns efficiently (ie without excesssive time and money).\n", | ||
"\n", | ||
"[1] Laaber et al., Software Microbenchmarking in the Cloud.How Bad is it Really?\n", | ||
" https://dl.acm.org/doi/10.1007/s10664-019-09681-1" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from pathlib import Path\n", | ||
"\n", | ||
"from benchmark.tools.impex import dump_tsv, import_timings\n", | ||
"from benchmark.tools.plotting import plot_mean_time_per_instance, plot_trial_against_time\n", | ||
"from benchmark.tools.statistics import (\n", | ||
" bootstrap_mean_confidence_interval,\n", | ||
" laaber_mds,\n", | ||
" schultz_mds,\n", | ||
" variability,\n", | ||
")\n", | ||
"from IPython.display import clear_output\n", | ||
"from plotly.io import renderers\n", | ||
"\n", | ||
"import hail as hl\n", | ||
"\n", | ||
"renderers.default = 'notebook_connected'" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"hl.init(backend='spark', idempotent=True, local_tmpdir='/tmp/mds')\n", | ||
"hl._set_flags(use_new_shuffle='1', lower='1')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Import benchmark data\n", | ||
"# ---------------------\n", | ||
"#\n", | ||
"# benchmarks under `hail/python/benchmarks` are executed with a custom pytest\n", | ||
"# plugin and their results are output as json lines (.jsonl).\n", | ||
"# Unscrupulously, we use hail to analyse itself.\n", | ||
"\n", | ||
"with hl.TemporaryFilename(dir='/tmp') as tsvfile:\n", | ||
" timings = Path(tsvfile)\n", | ||
" dump_tsv(Path('data/1k.jsonl'), timings)\n", | ||
" ht = import_timings(timings)\n", | ||
" ht = ht.checkpoint('out/imported.ht', overwrite=True)\n", | ||
"\n", | ||
"benchmarks = ht.aggregate(hl.agg.collect_as_set(ht.name))\n", | ||
"print(*benchmarks, sep='\\n')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"t = ht\n", | ||
"t = t.filter(hl.len(t.instances) == 60)\n", | ||
"names = t.aggregate(hl.array(hl.agg.collect_as_set(t.name)))\n", | ||
"print(*names, sep='\\n')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Plotting the time vs iteration for all instances provides a visual way of\n", | ||
"# identifying the number of burn-in iteration required to reach a steady-state.\n", | ||
"# Note that a steady state is never reached in some cases.\n", | ||
"\n", | ||
"for fig in plot_trial_against_time(ht, names=names):\n", | ||
" clear_output(wait=True)\n", | ||
" print(fig.labels.title)\n", | ||
" fig.show()\n", | ||
" input()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# This is an iterative process. Select the minimum number of burn-in iterations\n", | ||
"# required for each benchmark. Replot and verify that the graph is more-or-less\n", | ||
"# flat. This may not be possible in all cases.\n", | ||
"\n", | ||
"\n", | ||
"def filter_burn_in_iterations(ht: hl.Table) -> hl.Table:\n", | ||
" ht = ht.annotate_globals(\n", | ||
" first_stable_index={\n", | ||
" 'benchmark_join_partitions_table[100-10]': 15,\n", | ||
" 'benchmark_union_partitions_table[10-10]': 4,\n", | ||
" 'benchmark_join_partitions_table[1000-1000]': 15,\n", | ||
" 'benchmark_write_range_table[10000000-1000]': 5,\n", | ||
" 'benchmark_matrix_table_array_arithmetic': 15,\n", | ||
" 'benchmark_table_aggregate_array_sum': 5,\n", | ||
" 'benchmark_matrix_table_cols_show': 10,\n", | ||
" 'benchmark_pc_relate': hl.missing(hl.tint),\n", | ||
" 'benchmark_write_profile_mt': 20,\n", | ||
" 'benchmark_table_aggregate_approx_cdf': 28,\n", | ||
" 'benchmark_table_aggregate_counter': 12,\n", | ||
" 'benchmark_table_show': 10,\n", | ||
" 'benchmark_export_range_matrix_table_entry_field_p100': 5,\n", | ||
" 'benchmark_group_by_collect_per_row': 8,\n", | ||
"\n", | ||
" 'benchmark_export_range_matrix_table_row_p100': 20,\n", | ||
" 'benchmark_import_gvcf_force_count': 10,\n", | ||
" 'benchmark_matrix_table_take_col': 30,\n", | ||
" 'benchmark_ndarray_matmul_int64': 23,\n", | ||
" 'benchmark_sample_qc': 14,\n", | ||
" 'benchmark_shuffle_key_rows_by_mt': 10,\n", | ||
" 'benchmark_union_partitions_table[100-100]': 40,\n", | ||
" },\n", | ||
" )\n", | ||
"\n", | ||
" return ht.select(\n", | ||
" instances=ht.instances.map(\n", | ||
" lambda instance: instance.annotate(\n", | ||
" trials=(instance.trials.filter(lambda t: t.iteration >= ht.first_stable_index[ht.name]))\n", | ||
" )\n", | ||
" ),\n", | ||
" )\n", | ||
"\n", | ||
"\n", | ||
"ht = filter_burn_in_iterations(ht)\n", | ||
"plot_trial_against_time(ht)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# As a final step of cleaning, we'll filter out trials that differ by some\n", | ||
"# multiplier of the median for each instance\n", | ||
"\n", | ||
"\n", | ||
"def filter_outliers(ht: hl.Table, factor: hl.Float64Expression) -> hl.Table:\n", | ||
" # Filter out failures and\n", | ||
" return ht.select(\n", | ||
" instances=ht.instances.map(\n", | ||
" lambda instance: instance.annotate(\n", | ||
" trials=hl.bind(\n", | ||
" lambda median: instance.trials.filter(\n", | ||
" lambda t: hl.max([t.time, median]) / hl.min([t.time, median]) < factor\n", | ||
" ),\n", | ||
" hl.median(instance.trials.map(lambda t: t.time)),\n", | ||
" )\n", | ||
" ),\n", | ||
" ),\n", | ||
" )\n", | ||
"\n", | ||
"\n", | ||
"ht = filter_outliers(ht, hl.float64(10))\n", | ||
"plot_trial_against_time(ht)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# These plots show the mean time per instance. This provides a visual way of\n", | ||
"# identifying differences in instance type if there are multiple distinct layers\n", | ||
"\n", | ||
"plot_mean_time_per_instance(ht)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"ht = ht.select(instances=ht.instances.trials.time).checkpoint('out/pruned.ht', overwrite=True)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# laaber et al. section 4\n", | ||
"\n", | ||
"variability(ht).show()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# laaber et al. section 5 - boostrapping confidence intervals of the mean\n", | ||
"\n", | ||
"bootstrap_mean_confidence_interval(ht, 1000, 0.95).show()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Laaber et al - Minimal-Detectable Slowdown\n", | ||
"\n", | ||
"laaber = laaber_mds(ht).checkpoint('out/laaber-mds.ht', overwrite=True)\n", | ||
"schultz = schultz_mds(ht).checkpoint('out/schultz-mds.ht', overwrite=True)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"slideshow": { | ||
"slide_type": "fragment" | ||
} | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"\n", | ||
"mds = laaber.select(laaber=laaber.row_value, schultz=schultz[laaber.key])\n", | ||
"mds.show(100_000)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": ".venv", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.9.18" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 4 | ||
} |
Oops, something went wrong.