Skip to content

Commit fa2e947

Browse files
committed
[query] Benchmark configurations
1 parent 54cd48a commit fa2e947

File tree

8 files changed

+76
-76
lines changed

8 files changed

+76
-76
lines changed

hail/python/benchmark/hail/benchmark_benchmark_analysis.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from benchmark.tools.statistics import analyze_benchmarks
88

99

10-
@pytest.mark.benchmark()
10+
@pytest.mark.benchmark(mds=1.2, instances=5, iterations=10, burn_in_iterations=10)
1111
def benchmark_analyze_benchmarks(local_tmpdir, onethreetwo, onethreethree):
1212
inputs = (onethreetwo, onethreethree)
1313
inputs = ((v, Path(tempfile.mktemp(dir=local_tmpdir))) for v in inputs)

hail/python/benchmark/hail/benchmark_combiner.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,30 +25,30 @@ def benchmark_compile_2k_merge(empty_gvcf, tmp_path):
2525
hl.vds.write_variant_datasets(combined, str(tmp_path / 'combiner-multi-write'), overwrite=True)
2626

2727

28-
@pytest.mark.benchmark()
28+
@pytest.mark.benchmark(mds=1.1, instances=10, iterations=10, burn_in_iterations=10)
2929
@pytest.mark.xfail(raises=TimeoutError, reason=XFail.Timeout)
3030
def benchmark_python_only_10k_transform(empty_gvcf):
3131
for vcf in [import_vcf(empty_gvcf)] * 10_000:
3232
transform_gvcf(vcf, [])
3333

3434

35-
@pytest.mark.benchmark()
35+
@pytest.mark.benchmark(mds=1.1, instances=10, iterations=5, burn_in_iterations=20)
3636
def benchmark_python_only_10k_combine(empty_gvcf):
3737
vcf = import_vcf(empty_gvcf)
3838
mt = transform_gvcf(vcf, [])
3939
for mts in chunk(COMBINE_GVCF_MAX, [mt] * 10_000):
4040
combine_variant_datasets(mts)
4141

4242

43-
@pytest.mark.benchmark()
43+
@pytest.mark.benchmark(mds=1.2, instances=10, iterations=10, burn_in_iterations=10)
4444
def benchmark_import_and_transform_gvcf(single_gvcf):
4545
mt = import_vcf(single_gvcf)
4646
vds = transform_gvcf(mt, [])
4747
vds.reference_data._force_count_rows()
4848
vds.variant_data._force_count_rows()
4949

5050

51-
@pytest.mark.benchmark()
51+
@pytest.mark.benchmark(mds=1.2, instances=10, iterations=15, burn_in_iterations=8)
5252
def benchmark_import_gvcf_force_count(single_gvcf):
5353
mt = import_vcf(single_gvcf)
5454
mt._force_count_rows()
@@ -63,7 +63,7 @@ def tmp_and_output_paths(tmp_path):
6363
return (tmp, output)
6464

6565

66-
@pytest.mark.benchmark()
66+
@pytest.mark.benchmark(mds=1.1, instances=10, iterations=5, burn_in_iterations=10)
6767
@pytest.mark.xfail(raises=TimeoutError, reason=XFail.Timeout)
6868
def benchmark_vds_combiner_chr22(chr22_gvcfs, tmp_and_output_paths):
6969
parts = hl.eval([hl.parse_locus_interval('chr22:start-end', reference_genome='GRCh38')])

hail/python/benchmark/hail/benchmark_linalg.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,32 +13,32 @@ def benchmark_block_matrix_nested_multiply(tmp_path):
1313
bm.write(str(tmp_path / 'result.mt'), overwrite=True)
1414

1515

16-
@pytest.mark.benchmark()
16+
@pytest.mark.benchmark(mds=1.2, instances=10, iterations=5, burn_in_iterations=5)
1717
def benchmark_make_ndarray():
1818
ht = hl.utils.range_table(200_000)
1919
ht = ht.annotate(x=hl.nd.array(hl.range(ht.idx)))
2020
ht._force_count()
2121

2222

23-
@pytest.mark.benchmark()
23+
@pytest.mark.benchmark(mds=1.2, instances=10, iterations=20, burn_in_iterations=10)
2424
def benchmark_ndarray_addition():
2525
arr = hl.nd.ones((1024, 1024))
2626
hl.eval(arr + arr)
2727

2828

29-
@pytest.mark.benchmark()
29+
@pytest.mark.benchmark(mds=1.2, instances=20, iterations=5, burn_in_iterations=10)
3030
def benchmark_ndarray_matmul_int64():
3131
arr = hl.nd.arange(1024 * 1024).map(hl.int64).reshape((1024, 1024))
3232
hl.eval(arr @ arr)
3333

3434

35-
@pytest.mark.benchmark()
35+
@pytest.mark.benchmark(mds=1.1, instances=20, iterations=15, burn_in_iterations=6)
3636
def benchmark_ndarray_matmul_float64():
3737
arr = hl.nd.arange(1024 * 1024).map(hl.float64).reshape((1024, 1024))
3838
hl.eval(arr @ arr)
3939

4040

41-
@pytest.mark.benchmark()
41+
@pytest.mark.benchmark(mds=1.2, instances=10, iterations=5, burn_in_iterations=10)
4242
@pytest.mark.xfail(raises=TimeoutError, reason=XFail.Timeout)
4343
def benchmark_blockmatrix_write_from_entry_expr_range_mt(tmp_path):
4444
mt = hl.utils.range_matrix_table(40_000, 40_000, n_partitions=4)
@@ -56,7 +56,7 @@ def benchmark_blockmatrix_write_from_entry_expr_range_mt_standardize(tmp_path):
5656
)
5757

5858

59-
@pytest.mark.benchmark()
59+
@pytest.mark.benchmark(mds=1.1, instances=5, iterations=5, burn_in_iterations=10)
6060
def benchmark_sum_table_of_ndarrays():
6161
ht = hl.utils.range_table(400).annotate(nd=hl.nd.ones((4096, 4096)))
6262
ht.aggregate(hl.agg.ndarray_sum(ht.nd))

hail/python/benchmark/hail/benchmark_matrix_table.py

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -4,110 +4,110 @@
44
from benchmark.hail.utils import XFail
55

66

7-
@pytest.mark.benchmark()
7+
@pytest.mark.benchmark(mds=1.1, instances=25, iterations=15, burn_in_iterations=8)
88
def benchmark_matrix_table_decode_and_count(profile25_mt):
99
mt = hl.read_matrix_table(str(profile25_mt))
1010
mt._force_count_rows()
1111

1212

13-
@pytest.mark.benchmark()
13+
@pytest.mark.benchmark(mds=1.1, instances=20, iterations=20, burn_in_iterations=5)
1414
def benchmark_matrix_table_decode_and_count_just_gt(profile25_mt):
1515
mt = hl.read_matrix_table(str(profile25_mt)).select_entries('GT')
1616
mt._force_count_rows()
1717

1818

19-
@pytest.mark.benchmark()
19+
@pytest.mark.benchmark(mds=1.1, instances=25, iterations=10, burn_in_iterations=20)
2020
def benchmark_matrix_table_array_arithmetic(profile25_mt):
2121
mt = hl.read_matrix_table(str(profile25_mt))
2222
mt = mt.filter_rows(mt.alleles.length() == 2)
2323
mt.select_entries(dosage=hl.pl_dosage(mt.PL)).select_rows()._force_count_rows()
2424

2525

26-
@pytest.mark.benchmark()
26+
@pytest.mark.benchmark(mds=1.1, instances=25, iterations=5, burn_in_iterations=10)
2727
def benchmark_matrix_table_entries_table(profile25_mt):
2828
mt = hl.read_matrix_table(str(profile25_mt))
2929
mt.entries()._force_count()
3030

3131

32-
@pytest.mark.benchmark()
32+
@pytest.mark.benchmark(mds=1.1, instances=20, iterations=10, burn_in_iterations=10)
3333
def benchmark_matrix_table_entries_table_no_key(profile25_mt):
3434
mt = hl.read_matrix_table(str(profile25_mt)).key_rows_by().key_cols_by()
3535
mt.entries()._force_count()
3636

3737

38-
@pytest.mark.benchmark()
38+
@pytest.mark.benchmark(mds=1.1, instances=20, iterations=10, burn_in_iterations=30)
3939
def benchmark_matrix_table_rows_force_count(profile25_mt):
4040
ht = hl.read_matrix_table(str(profile25_mt)).rows().key_by()
4141
ht._force_count()
4242

4343

44-
@pytest.mark.benchmark()
44+
@pytest.mark.benchmark(mds=1.1, instances=15, iterations=10, burn_in_iterations=15)
4545
def benchmark_matrix_table_show(profile25_mt):
4646
mt = hl.read_matrix_table(str(profile25_mt))
4747
mt.show(100)
4848

4949

50-
@pytest.mark.benchmark()
50+
@pytest.mark.benchmark(mds=1.1, instances=20, iterations=10, burn_in_iterations=15)
5151
def benchmark_matrix_table_rows_show(profile25_mt):
5252
mt = hl.read_matrix_table(str(profile25_mt))
5353
mt.rows().show(100)
5454

5555

56-
@pytest.mark.benchmark()
56+
@pytest.mark.benchmark(mds=1.1, instances=15, iterations=15, burn_in_iterations=16)
5757
def benchmark_matrix_table_cols_show(profile25_mt):
5858
mt = hl.read_matrix_table(str(profile25_mt))
5959
mt.cols().show(100)
6060

6161

62-
@pytest.mark.benchmark()
62+
@pytest.mark.benchmark(mds=1.1, instances=20, iterations=15, burn_in_iterations=10)
6363
def benchmark_matrix_table_take_entry(profile25_mt):
6464
mt = hl.read_matrix_table(str(profile25_mt))
6565
mt.GT.take(100)
6666

6767

68-
@pytest.mark.benchmark()
68+
@pytest.mark.benchmark(mds=1.1, instances=20, iterations=15, burn_in_iterations=15)
6969
def benchmark_matrix_table_entries_show(profile25_mt):
7070
mt = hl.read_matrix_table(str(profile25_mt))
7171
mt.entries().show()
7272

7373

74-
@pytest.mark.benchmark()
74+
@pytest.mark.benchmark(mds=1.1, instances=10, iterations=20, burn_in_iterations=10)
7575
def benchmark_matrix_table_take_row(profile25_mt):
7676
mt = hl.read_matrix_table(str(profile25_mt))
7777
mt.info.AF.take(100)
7878

7979

80-
@pytest.mark.benchmark()
80+
@pytest.mark.benchmark(mds=1.1, instances=15, iterations=20, burn_in_iterations=10)
8181
def benchmark_matrix_table_take_col(profile25_mt):
8282
mt = hl.read_matrix_table(str(profile25_mt))
8383
mt.s.take(100)
8484

8585

86-
@pytest.mark.benchmark()
86+
@pytest.mark.benchmark(mds=1.1, instances=20, iterations=10, burn_in_iterations=8)
8787
def benchmark_write_range_matrix_table_p100(tmp_path):
8888
mt = hl.utils.range_matrix_table(n_rows=1_000_000, n_cols=10, n_partitions=100)
8989
mt = mt.annotate_entries(x=mt.col_idx + mt.row_idx)
9090
mt.write(str(tmp_path / 'tmp.mt'))
9191

9292

93-
@pytest.mark.benchmark()
93+
@pytest.mark.benchmark(mds=1.1, instances=20, iterations=15, burn_in_iterations=15)
9494
def benchmark_write_profile_mt(profile25_mt, tmp_path):
9595
hl.read_matrix_table(str(profile25_mt)).write(str(tmp_path / 'tmp.mt'))
9696

9797

98-
@pytest.mark.benchmark()
98+
@pytest.mark.benchmark(mds=1.1, instances=20, iterations=20, burn_in_iterations=9)
9999
def benchmark_matrix_table_rows_is_transition(profile25_mt):
100100
ht = hl.read_matrix_table(str(profile25_mt)).rows().key_by()
101101
ht.select(is_snp=hl.is_snp(ht.alleles[0], ht.alleles[1]))._force_count()
102102

103103

104-
@pytest.mark.benchmark()
104+
@pytest.mark.benchmark(mds=1.1, instances=15, iterations=20, burn_in_iterations=6)
105105
def benchmark_matrix_table_filter_entries(profile25_mt):
106106
mt = hl.read_matrix_table(str(profile25_mt))
107107
mt.filter_entries((mt.GQ > 8) & (mt.DP > 2))._force_count_rows()
108108

109109

110-
@pytest.mark.benchmark()
110+
@pytest.mark.benchmark(mds=1.1, instances=20, iterations=10, burn_in_iterations=3)
111111
def benchmark_matrix_table_filter_entries_unfilter(profile25_mt):
112112
mt = hl.read_matrix_table(str(profile25_mt))
113113
mt.filter_entries((mt.GQ > 8) & (mt.DP > 2)).unfilter_entries()._force_count_rows()
@@ -164,27 +164,27 @@ def many_aggs(mt):
164164
return {f'x{i}': expr for i, expr in enumerate(aggs)}
165165

166166

167-
@pytest.mark.benchmark()
167+
@pytest.mark.benchmark(mds=1.1, instances=20, iterations=5, burn_in_iterations=4)
168168
def benchmark_matrix_table_many_aggs_row_wise(profile25_mt):
169169
mt = hl.read_matrix_table(str(profile25_mt))
170170
mt = mt.annotate_rows(**many_aggs(mt))
171171
mt.rows()._force_count()
172172

173173

174-
@pytest.mark.benchmark()
174+
@pytest.mark.benchmark(mds=1.1, instances=20, iterations=5, burn_in_iterations=10)
175175
def benchmark_matrix_table_many_aggs_col_wise(profile25_mt):
176176
mt = hl.read_matrix_table(str(profile25_mt))
177177
mt = mt.annotate_cols(**many_aggs(mt))
178178
mt.cols()._force_count()
179179

180180

181-
@pytest.mark.benchmark()
181+
@pytest.mark.benchmark(mds=1.1, instances=20, iterations=15, burn_in_iterations=8)
182182
def benchmark_matrix_table_aggregate_entries(profile25_mt):
183183
mt = hl.read_matrix_table(str(profile25_mt))
184184
mt.aggregate_entries(hl.agg.stats(mt.GQ))
185185

186186

187-
@pytest.mark.benchmark()
187+
@pytest.mark.benchmark(mds=1.1, instances=20, iterations=10, burn_in_iterations=8)
188188
def benchmark_matrix_table_call_stats_star_star(profile25_mt):
189189
mt = hl.read_matrix_table(str(profile25_mt))
190190
mt.annotate_rows(**hl.agg.call_stats(mt.GT, mt.alleles))._force_count_rows()
@@ -242,60 +242,60 @@ def benchmark_gnomad_coverage_stats_optimized(gnomad_dp_sim):
242242
mt.rows()._force_count()
243243

244244

245-
@pytest.mark.benchmark()
245+
@pytest.mark.benchmark(mds=1.1, instances=15, iterations=20, burn_in_iterations=10)
246246
def benchmark_per_row_stats_star_star(gnomad_dp_sim):
247247
mt = hl.read_matrix_table(str(gnomad_dp_sim))
248248
mt.annotate_rows(**hl.agg.stats(mt.x))._force_count_rows()
249249

250250

251-
@pytest.mark.benchmark()
251+
@pytest.mark.benchmark(mds=1.1, instances=20, iterations=10, burn_in_iterations=10)
252252
def benchmark_read_decode_gnomad_coverage(gnomad_dp_sim):
253253
hl.read_matrix_table(str(gnomad_dp_sim))._force_count_rows()
254254

255255

256-
@pytest.mark.benchmark()
256+
@pytest.mark.benchmark(mds=1.1, instances=10, iterations=5, burn_in_iterations=10)
257257
def benchmark_import_bgen_force_count_just_gp(sim_ukb_bgen, sim_ukb_sample):
258258
mt = hl.import_bgen(str(sim_ukb_bgen), sample_file=str(sim_ukb_sample), entry_fields=['GP'], n_partitions=8)
259259
mt._force_count_rows()
260260

261261

262-
@pytest.mark.benchmark()
262+
@pytest.mark.benchmark(mds=1.1, instances=10, iterations=5, burn_in_iterations=20)
263263
def benchmark_import_bgen_force_count_all(sim_ukb_bgen, sim_ukb_sample):
264264
mt = hl.import_bgen(
265265
str(sim_ukb_bgen), sample_file=str(sim_ukb_sample), entry_fields=['GT', 'GP', 'dosage'], n_partitions=8
266266
)
267267
mt._force_count_rows()
268268

269269

270-
@pytest.mark.benchmark()
270+
@pytest.mark.benchmark(mds=1.1, instances=10, iterations=5, burn_in_iterations=12)
271271
@pytest.mark.xfail(raises=TimeoutError, reason=XFail.Timeout)
272272
def benchmark_import_bgen_info_score(sim_ukb_bgen, sim_ukb_sample):
273273
mt = hl.import_bgen(str(sim_ukb_bgen), sample_file=str(sim_ukb_sample), entry_fields=['GP'], n_partitions=8)
274274
mt = mt.annotate_rows(info_score=hl.agg.info_score(mt.GP))
275275
mt.rows().select('info_score')._force_count()
276276

277277

278-
@pytest.mark.benchmark()
278+
@pytest.mark.benchmark(mds=1.1, instances=10, iterations=5, burn_in_iterations=18)
279279
def benchmark_import_bgen_filter_count(sim_ukb_bgen, sim_ukb_sample):
280280
mt = hl.import_bgen(str(sim_ukb_bgen), sample_file=str(sim_ukb_sample), entry_fields=['GT', 'GP'], n_partitions=8)
281281
mt = mt.filter_rows(mt.alleles == ['A', 'T'])
282282
mt._force_count_rows()
283283

284284

285-
@pytest.mark.benchmark()
285+
@pytest.mark.benchmark(mds=1.1, instances=15, iterations=20, burn_in_iterations=3)
286286
def benchmark_export_range_matrix_table_entry_field_p100(tmp_path):
287287
mt = hl.utils.range_matrix_table(n_rows=1_000_000, n_cols=10, n_partitions=100)
288288
mt = mt.annotate_entries(x=mt.col_idx + mt.row_idx)
289289
mt.x.export(str(tmp_path / 'result.txt'))
290290

291291

292-
@pytest.mark.benchmark()
292+
@pytest.mark.benchmark(mds=1.2, instances=10, iterations=10, burn_in_iterations=8)
293293
def benchmark_export_range_matrix_table_row_p100(tmp_path):
294294
mt = hl.utils.range_matrix_table(n_rows=1_000_000, n_cols=10, n_partitions=100)
295295
mt.row.export(str(tmp_path / 'result.txt'))
296296

297297

298-
@pytest.mark.benchmark()
298+
@pytest.mark.benchmark(mds=1.2, instances=15, iterations=25, burn_in_iterations=15)
299299
def benchmark_export_range_matrix_table_col_p100(tmp_path):
300300
mt = hl.utils.range_matrix_table(n_rows=1_000_000, n_cols=10, n_partitions=100)
301301
mt.col.export(str(tmp_path / 'result.txt'))
@@ -309,7 +309,7 @@ def benchmark_large_range_matrix_table_sum():
309309
mt.annotate_cols(foo=hl.agg.sum(mt.x))._force_count_cols()
310310

311311

312-
@pytest.mark.benchmark()
312+
@pytest.mark.benchmark(mds=1.2, instances=10, iterations=5, burn_in_iterations=7)
313313
def benchmark_kyle_sex_specific_qc(profile25_mt):
314314
mt = hl.read_matrix_table(str(profile25_mt))
315315
mt = mt.annotate_cols(sex=hl.if_else(hl.rand_bool(0.5), 'Male', 'Female'))
@@ -350,14 +350,14 @@ def benchmark_kyle_sex_specific_qc(profile25_mt):
350350
mt.rows()._force_count()
351351

352352

353-
@pytest.mark.benchmark()
353+
@pytest.mark.benchmark(mds=1.1, instances=25, iterations=10, burn_in_iterations=5)
354354
def benchmark_matrix_table_scan_count_rows_2():
355355
mt = hl.utils.range_matrix_table(n_rows=200_000_000, n_cols=10, n_partitions=16)
356356
mt = mt.annotate_rows(x=hl.scan.count())
357357
mt._force_count_rows()
358358

359359

360-
@pytest.mark.benchmark()
360+
@pytest.mark.benchmark(mds=1.3, instances=20, iterations=10, burn_in_iterations=20)
361361
def benchmark_matrix_table_scan_count_cols_2():
362362
mt = hl.utils.range_matrix_table(n_cols=10_000_000, n_rows=10)
363363
mt = mt.annotate_cols(x=hl.scan.count())
@@ -372,14 +372,14 @@ def benchmark_matrix_multi_write_nothing(tmp_path):
372372
hl.experimental.write_matrix_tables(mts, str(tmp_path / 'multi-write'))
373373

374374

375-
@pytest.mark.benchmark()
375+
@pytest.mark.benchmark(mds=1.1, instances=25, iterations=10, burn_in_iterations=5)
376376
def benchmark_mt_localize_and_collect(profile25_mt):
377377
mt = hl.read_matrix_table(str(profile25_mt))
378378
ht = mt.localize_entries("ent")
379379
ht.head(150).collect()
380380

381381

382-
@pytest.mark.benchmark()
382+
@pytest.mark.benchmark(mds=1.1, instances=20, iterations=15, burn_in_iterations=5)
383383
def benchmark_mt_group_by_memory_usage(random_doubles_mt):
384384
mt = hl.read_matrix_table(str(random_doubles_mt))
385385
mt = mt.group_rows_by(new_idx=mt.row_idx % 3).aggregate(x=hl.agg.mean(mt.x))

0 commit comments

Comments
 (0)