4
4
from benchmark .hail .utils import XFail
5
5
6
6
7
- @pytest .mark .benchmark ()
7
+ @pytest .mark .benchmark (mds = 1.1 , instances = 25 , iterations = 15 , burn_in_iterations = 8 )
8
8
def benchmark_matrix_table_decode_and_count (profile25_mt ):
9
9
mt = hl .read_matrix_table (str (profile25_mt ))
10
10
mt ._force_count_rows ()
11
11
12
12
13
- @pytest .mark .benchmark ()
13
+ @pytest .mark .benchmark (mds = 1.1 , instances = 20 , iterations = 20 , burn_in_iterations = 5 )
14
14
def benchmark_matrix_table_decode_and_count_just_gt (profile25_mt ):
15
15
mt = hl .read_matrix_table (str (profile25_mt )).select_entries ('GT' )
16
16
mt ._force_count_rows ()
17
17
18
18
19
- @pytest .mark .benchmark ()
19
+ @pytest .mark .benchmark (mds = 1.1 , instances = 25 , iterations = 10 , burn_in_iterations = 20 )
20
20
def benchmark_matrix_table_array_arithmetic (profile25_mt ):
21
21
mt = hl .read_matrix_table (str (profile25_mt ))
22
22
mt = mt .filter_rows (mt .alleles .length () == 2 )
23
23
mt .select_entries (dosage = hl .pl_dosage (mt .PL )).select_rows ()._force_count_rows ()
24
24
25
25
26
- @pytest .mark .benchmark ()
26
+ @pytest .mark .benchmark (mds = 1.1 , instances = 25 , iterations = 5 , burn_in_iterations = 10 )
27
27
def benchmark_matrix_table_entries_table (profile25_mt ):
28
28
mt = hl .read_matrix_table (str (profile25_mt ))
29
29
mt .entries ()._force_count ()
30
30
31
31
32
- @pytest .mark .benchmark ()
32
+ @pytest .mark .benchmark (mds = 1.1 , instances = 20 , iterations = 10 , burn_in_iterations = 10 )
33
33
def benchmark_matrix_table_entries_table_no_key (profile25_mt ):
34
34
mt = hl .read_matrix_table (str (profile25_mt )).key_rows_by ().key_cols_by ()
35
35
mt .entries ()._force_count ()
36
36
37
37
38
- @pytest .mark .benchmark ()
38
+ @pytest .mark .benchmark (mds = 1.1 , instances = 20 , iterations = 10 , burn_in_iterations = 30 )
39
39
def benchmark_matrix_table_rows_force_count (profile25_mt ):
40
40
ht = hl .read_matrix_table (str (profile25_mt )).rows ().key_by ()
41
41
ht ._force_count ()
42
42
43
43
44
- @pytest .mark .benchmark ()
44
+ @pytest .mark .benchmark (mds = 1.1 , instances = 15 , iterations = 10 , burn_in_iterations = 15 )
45
45
def benchmark_matrix_table_show (profile25_mt ):
46
46
mt = hl .read_matrix_table (str (profile25_mt ))
47
47
mt .show (100 )
48
48
49
49
50
- @pytest .mark .benchmark ()
50
+ @pytest .mark .benchmark (mds = 1.1 , instances = 20 , iterations = 10 , burn_in_iterations = 15 )
51
51
def benchmark_matrix_table_rows_show (profile25_mt ):
52
52
mt = hl .read_matrix_table (str (profile25_mt ))
53
53
mt .rows ().show (100 )
54
54
55
55
56
- @pytest .mark .benchmark ()
56
+ @pytest .mark .benchmark (mds = 1.1 , instances = 15 , iterations = 15 , burn_in_iterations = 16 )
57
57
def benchmark_matrix_table_cols_show (profile25_mt ):
58
58
mt = hl .read_matrix_table (str (profile25_mt ))
59
59
mt .cols ().show (100 )
60
60
61
61
62
- @pytest .mark .benchmark ()
62
+ @pytest .mark .benchmark (mds = 1.1 , instances = 20 , iterations = 15 , burn_in_iterations = 10 )
63
63
def benchmark_matrix_table_take_entry (profile25_mt ):
64
64
mt = hl .read_matrix_table (str (profile25_mt ))
65
65
mt .GT .take (100 )
66
66
67
67
68
- @pytest .mark .benchmark ()
68
+ @pytest .mark .benchmark (mds = 1.1 , instances = 20 , iterations = 15 , burn_in_iterations = 15 )
69
69
def benchmark_matrix_table_entries_show (profile25_mt ):
70
70
mt = hl .read_matrix_table (str (profile25_mt ))
71
71
mt .entries ().show ()
72
72
73
73
74
- @pytest .mark .benchmark ()
74
+ @pytest .mark .benchmark (mds = 1.1 , instances = 10 , iterations = 20 , burn_in_iterations = 10 )
75
75
def benchmark_matrix_table_take_row (profile25_mt ):
76
76
mt = hl .read_matrix_table (str (profile25_mt ))
77
77
mt .info .AF .take (100 )
78
78
79
79
80
- @pytest .mark .benchmark ()
80
+ @pytest .mark .benchmark (mds = 1.1 , instances = 15 , iterations = 20 , burn_in_iterations = 10 )
81
81
def benchmark_matrix_table_take_col (profile25_mt ):
82
82
mt = hl .read_matrix_table (str (profile25_mt ))
83
83
mt .s .take (100 )
84
84
85
85
86
- @pytest .mark .benchmark ()
86
+ @pytest .mark .benchmark (mds = 1.1 , instances = 20 , iterations = 10 , burn_in_iterations = 8 )
87
87
def benchmark_write_range_matrix_table_p100 (tmp_path ):
88
88
mt = hl .utils .range_matrix_table (n_rows = 1_000_000 , n_cols = 10 , n_partitions = 100 )
89
89
mt = mt .annotate_entries (x = mt .col_idx + mt .row_idx )
90
90
mt .write (str (tmp_path / 'tmp.mt' ))
91
91
92
92
93
- @pytest .mark .benchmark ()
93
+ @pytest .mark .benchmark (mds = 1.1 , instances = 20 , iterations = 15 , burn_in_iterations = 15 )
94
94
def benchmark_write_profile_mt (profile25_mt , tmp_path ):
95
95
hl .read_matrix_table (str (profile25_mt )).write (str (tmp_path / 'tmp.mt' ))
96
96
97
97
98
- @pytest .mark .benchmark ()
98
+ @pytest .mark .benchmark (mds = 1.1 , instances = 20 , iterations = 20 , burn_in_iterations = 9 )
99
99
def benchmark_matrix_table_rows_is_transition (profile25_mt ):
100
100
ht = hl .read_matrix_table (str (profile25_mt )).rows ().key_by ()
101
101
ht .select (is_snp = hl .is_snp (ht .alleles [0 ], ht .alleles [1 ]))._force_count ()
102
102
103
103
104
- @pytest .mark .benchmark ()
104
+ @pytest .mark .benchmark (mds = 1.1 , instances = 15 , iterations = 20 , burn_in_iterations = 6 )
105
105
def benchmark_matrix_table_filter_entries (profile25_mt ):
106
106
mt = hl .read_matrix_table (str (profile25_mt ))
107
107
mt .filter_entries ((mt .GQ > 8 ) & (mt .DP > 2 ))._force_count_rows ()
108
108
109
109
110
- @pytest .mark .benchmark ()
110
+ @pytest .mark .benchmark (mds = 1.1 , instances = 20 , iterations = 10 , burn_in_iterations = 3 )
111
111
def benchmark_matrix_table_filter_entries_unfilter (profile25_mt ):
112
112
mt = hl .read_matrix_table (str (profile25_mt ))
113
113
mt .filter_entries ((mt .GQ > 8 ) & (mt .DP > 2 )).unfilter_entries ()._force_count_rows ()
@@ -164,27 +164,27 @@ def many_aggs(mt):
164
164
return {f'x{ i } ' : expr for i , expr in enumerate (aggs )}
165
165
166
166
167
- @pytest .mark .benchmark ()
167
+ @pytest .mark .benchmark (mds = 1.1 , instances = 20 , iterations = 5 , burn_in_iterations = 4 )
168
168
def benchmark_matrix_table_many_aggs_row_wise (profile25_mt ):
169
169
mt = hl .read_matrix_table (str (profile25_mt ))
170
170
mt = mt .annotate_rows (** many_aggs (mt ))
171
171
mt .rows ()._force_count ()
172
172
173
173
174
- @pytest .mark .benchmark ()
174
+ @pytest .mark .benchmark (mds = 1.1 , instances = 20 , iterations = 5 , burn_in_iterations = 10 )
175
175
def benchmark_matrix_table_many_aggs_col_wise (profile25_mt ):
176
176
mt = hl .read_matrix_table (str (profile25_mt ))
177
177
mt = mt .annotate_cols (** many_aggs (mt ))
178
178
mt .cols ()._force_count ()
179
179
180
180
181
- @pytest .mark .benchmark ()
181
+ @pytest .mark .benchmark (mds = 1.1 , instances = 20 , iterations = 15 , burn_in_iterations = 8 )
182
182
def benchmark_matrix_table_aggregate_entries (profile25_mt ):
183
183
mt = hl .read_matrix_table (str (profile25_mt ))
184
184
mt .aggregate_entries (hl .agg .stats (mt .GQ ))
185
185
186
186
187
- @pytest .mark .benchmark ()
187
+ @pytest .mark .benchmark (mds = 1.1 , instances = 20 , iterations = 10 , burn_in_iterations = 8 )
188
188
def benchmark_matrix_table_call_stats_star_star (profile25_mt ):
189
189
mt = hl .read_matrix_table (str (profile25_mt ))
190
190
mt .annotate_rows (** hl .agg .call_stats (mt .GT , mt .alleles ))._force_count_rows ()
@@ -242,60 +242,60 @@ def benchmark_gnomad_coverage_stats_optimized(gnomad_dp_sim):
242
242
mt .rows ()._force_count ()
243
243
244
244
245
- @pytest .mark .benchmark ()
245
+ @pytest .mark .benchmark (mds = 1.1 , instances = 15 , iterations = 20 , burn_in_iterations = 10 )
246
246
def benchmark_per_row_stats_star_star (gnomad_dp_sim ):
247
247
mt = hl .read_matrix_table (str (gnomad_dp_sim ))
248
248
mt .annotate_rows (** hl .agg .stats (mt .x ))._force_count_rows ()
249
249
250
250
251
- @pytest .mark .benchmark ()
251
+ @pytest .mark .benchmark (mds = 1.1 , instances = 20 , iterations = 10 , burn_in_iterations = 10 )
252
252
def benchmark_read_decode_gnomad_coverage (gnomad_dp_sim ):
253
253
hl .read_matrix_table (str (gnomad_dp_sim ))._force_count_rows ()
254
254
255
255
256
- @pytest .mark .benchmark ()
256
+ @pytest .mark .benchmark (mds = 1.1 , instances = 10 , iterations = 5 , burn_in_iterations = 10 )
257
257
def benchmark_import_bgen_force_count_just_gp (sim_ukb_bgen , sim_ukb_sample ):
258
258
mt = hl .import_bgen (str (sim_ukb_bgen ), sample_file = str (sim_ukb_sample ), entry_fields = ['GP' ], n_partitions = 8 )
259
259
mt ._force_count_rows ()
260
260
261
261
262
- @pytest .mark .benchmark ()
262
+ @pytest .mark .benchmark (mds = 1.1 , instances = 10 , iterations = 5 , burn_in_iterations = 20 )
263
263
def benchmark_import_bgen_force_count_all (sim_ukb_bgen , sim_ukb_sample ):
264
264
mt = hl .import_bgen (
265
265
str (sim_ukb_bgen ), sample_file = str (sim_ukb_sample ), entry_fields = ['GT' , 'GP' , 'dosage' ], n_partitions = 8
266
266
)
267
267
mt ._force_count_rows ()
268
268
269
269
270
- @pytest .mark .benchmark ()
270
+ @pytest .mark .benchmark (mds = 1.1 , instances = 10 , iterations = 5 , burn_in_iterations = 12 )
271
271
@pytest .mark .xfail (raises = TimeoutError , reason = XFail .Timeout )
272
272
def benchmark_import_bgen_info_score (sim_ukb_bgen , sim_ukb_sample ):
273
273
mt = hl .import_bgen (str (sim_ukb_bgen ), sample_file = str (sim_ukb_sample ), entry_fields = ['GP' ], n_partitions = 8 )
274
274
mt = mt .annotate_rows (info_score = hl .agg .info_score (mt .GP ))
275
275
mt .rows ().select ('info_score' )._force_count ()
276
276
277
277
278
- @pytest .mark .benchmark ()
278
+ @pytest .mark .benchmark (mds = 1.1 , instances = 10 , iterations = 5 , burn_in_iterations = 18 )
279
279
def benchmark_import_bgen_filter_count (sim_ukb_bgen , sim_ukb_sample ):
280
280
mt = hl .import_bgen (str (sim_ukb_bgen ), sample_file = str (sim_ukb_sample ), entry_fields = ['GT' , 'GP' ], n_partitions = 8 )
281
281
mt = mt .filter_rows (mt .alleles == ['A' , 'T' ])
282
282
mt ._force_count_rows ()
283
283
284
284
285
- @pytest .mark .benchmark ()
285
+ @pytest .mark .benchmark (mds = 1.1 , instances = 15 , iterations = 20 , burn_in_iterations = 3 )
286
286
def benchmark_export_range_matrix_table_entry_field_p100 (tmp_path ):
287
287
mt = hl .utils .range_matrix_table (n_rows = 1_000_000 , n_cols = 10 , n_partitions = 100 )
288
288
mt = mt .annotate_entries (x = mt .col_idx + mt .row_idx )
289
289
mt .x .export (str (tmp_path / 'result.txt' ))
290
290
291
291
292
- @pytest .mark .benchmark ()
292
+ @pytest .mark .benchmark (mds = 1.2 , instances = 10 , iterations = 10 , burn_in_iterations = 8 )
293
293
def benchmark_export_range_matrix_table_row_p100 (tmp_path ):
294
294
mt = hl .utils .range_matrix_table (n_rows = 1_000_000 , n_cols = 10 , n_partitions = 100 )
295
295
mt .row .export (str (tmp_path / 'result.txt' ))
296
296
297
297
298
- @pytest .mark .benchmark ()
298
+ @pytest .mark .benchmark (mds = 1.2 , instances = 15 , iterations = 25 , burn_in_iterations = 15 )
299
299
def benchmark_export_range_matrix_table_col_p100 (tmp_path ):
300
300
mt = hl .utils .range_matrix_table (n_rows = 1_000_000 , n_cols = 10 , n_partitions = 100 )
301
301
mt .col .export (str (tmp_path / 'result.txt' ))
@@ -309,7 +309,7 @@ def benchmark_large_range_matrix_table_sum():
309
309
mt .annotate_cols (foo = hl .agg .sum (mt .x ))._force_count_cols ()
310
310
311
311
312
- @pytest .mark .benchmark ()
312
+ @pytest .mark .benchmark (mds = 1.2 , instances = 10 , iterations = 5 , burn_in_iterations = 7 )
313
313
def benchmark_kyle_sex_specific_qc (profile25_mt ):
314
314
mt = hl .read_matrix_table (str (profile25_mt ))
315
315
mt = mt .annotate_cols (sex = hl .if_else (hl .rand_bool (0.5 ), 'Male' , 'Female' ))
@@ -350,14 +350,14 @@ def benchmark_kyle_sex_specific_qc(profile25_mt):
350
350
mt .rows ()._force_count ()
351
351
352
352
353
- @pytest .mark .benchmark ()
353
+ @pytest .mark .benchmark (mds = 1.1 , instances = 25 , iterations = 10 , burn_in_iterations = 5 )
354
354
def benchmark_matrix_table_scan_count_rows_2 ():
355
355
mt = hl .utils .range_matrix_table (n_rows = 200_000_000 , n_cols = 10 , n_partitions = 16 )
356
356
mt = mt .annotate_rows (x = hl .scan .count ())
357
357
mt ._force_count_rows ()
358
358
359
359
360
- @pytest .mark .benchmark ()
360
+ @pytest .mark .benchmark (mds = 1.3 , instances = 20 , iterations = 10 , burn_in_iterations = 20 )
361
361
def benchmark_matrix_table_scan_count_cols_2 ():
362
362
mt = hl .utils .range_matrix_table (n_cols = 10_000_000 , n_rows = 10 )
363
363
mt = mt .annotate_cols (x = hl .scan .count ())
@@ -372,14 +372,14 @@ def benchmark_matrix_multi_write_nothing(tmp_path):
372
372
hl .experimental .write_matrix_tables (mts , str (tmp_path / 'multi-write' ))
373
373
374
374
375
- @pytest .mark .benchmark ()
375
+ @pytest .mark .benchmark (mds = 1.1 , instances = 25 , iterations = 10 , burn_in_iterations = 5 )
376
376
def benchmark_mt_localize_and_collect (profile25_mt ):
377
377
mt = hl .read_matrix_table (str (profile25_mt ))
378
378
ht = mt .localize_entries ("ent" )
379
379
ht .head (150 ).collect ()
380
380
381
381
382
- @pytest .mark .benchmark ()
382
+ @pytest .mark .benchmark (mds = 1.1 , instances = 20 , iterations = 15 , burn_in_iterations = 5 )
383
383
def benchmark_mt_group_by_memory_usage (random_doubles_mt ):
384
384
mt = hl .read_matrix_table (str (random_doubles_mt ))
385
385
mt = mt .group_rows_by (new_idx = mt .row_idx % 3 ).aggregate (x = hl .agg .mean (mt .x ))
0 commit comments