Skip to content

Commit de25154

Browse files
authored
Merge pull request #226 from uclahs-cds/jarbet-mark-dup-spark-mem-retry
add retry method for run_MarkDuplicatesSpark_GATK and run_sort_SAMtools
2 parents acf77d9 + fa481bd commit de25154

File tree

8 files changed

+87
-1
lines changed

8 files changed

+87
-1
lines changed

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,6 @@
22
path = external/nextflow-modules
33
url = [email protected]:uclahs-cds/pipeline-Nextflow-module.git
44
branch = main
5+
[submodule "external/nextflow-config"]
6+
path = external/nextflow-config
7+
url = [email protected]:uclahs-cds/pipeline-Nextflow-config.git

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
2525
- Use SAMtools `sort` instead of Picard `SortSam`
2626

2727
### Added
28+
- Add `retry` method to `run_sort_SAMtools` and `run_MarkDuplicatesSpark_GATK` (if run out of RAM then retry with more memory)
2829
- Add process `run_merge_SAMtools`: use when `params.mark_duplicates=false` to ensure multiple BAM outputs are merged
2930
- `.github/CODEOWNERS`
3031
- Add config file for F16 node

config/F32.config

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,12 @@ process {
1616
withName: run_sort_SAMtools {
1717
cpus = 12
1818
memory = 15.GB
19+
retry_strategy {
20+
memory {
21+
strategy = 'exponential'
22+
operand = 2
23+
}
24+
}
1925
}
2026
withName: run_merge_SAMtools {
2127
cpus = 12
@@ -28,5 +34,11 @@ process {
2834
withName: run_MarkDuplicatesSpark_GATK {
2935
cpus = 16
3036
memory = 35.GB
37+
retry_strategy {
38+
memory {
39+
strategy = 'add'
40+
operand = 29.GB
41+
}
42+
}
3143
}
3244
}

config/F72.config

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,12 @@ process {
1616
withName: run_sort_SAMtools {
1717
cpus = 12
1818
memory = 15.GB
19+
retry_strategy {
20+
memory {
21+
strategy = 'exponential'
22+
operand = 2
23+
}
24+
}
1925
}
2026
withName: run_merge_SAMtools {
2127
cpus = 12
@@ -28,5 +34,11 @@ process {
2834
withName: run_MarkDuplicatesSpark_GATK {
2935
cpus = 16
3036
memory = 35.GB
37+
retry_strategy {
38+
memory {
39+
strategy = 'exponential'
40+
operand = 2
41+
}
42+
}
3143
}
3244
}

config/M64.config

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,12 @@ process {
1616
withName: run_sort_SAMtools {
1717
cpus = 12
1818
memory = 15.GB
19+
retry_strategy {
20+
memory {
21+
strategy = 'exponential'
22+
operand = 2
23+
}
24+
}
1925
}
2026
withName: run_merge_SAMtools {
2127
cpus = 12
@@ -28,6 +34,12 @@ process {
2834
withName: run_MarkDuplicatesSpark_GATK {
2935
cpus = 16
3036
memory = 35.GB
37+
retry_strategy {
38+
memory {
39+
strategy = 'exponential'
40+
operand = 2
41+
}
42+
}
3143
}
3244
}
3345

config/base.config

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,12 @@
55
process {
66
cpus = { methods.check_max( 1 * task.attempt, 'cpus' ) }
77

8-
errorStrategy = { task.exitStatus in [143, 137, 104, 134, 139] ? 'retry' : 'finish' }
8+
commonRetryCodes = [104, 134, 137, 139, 143, 247] // Common out-of-memory error codes
9+
withName: 'run_MarkDuplicatesSpark_GATK' {
10+
ext.retry_codes = [52] // Spark OutOfMemory error codes
11+
}
12+
13+
errorStrategy = { task.exitStatus in (((task?.ext?.retry_codes) ? task.ext.retry_codes : []) + process.commonRetryCodes) ? 'retry' : 'terminate' }
914
maxRetries = 1
1015

1116
withLabel:process_low {

config/methods.config

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
includeConfig "../external/nextflow-config/config/retry/retry.config"
12

23
methods {
34

@@ -186,6 +187,44 @@ methods {
186187
}
187188
}
188189

190+
// Function to ensure that resource requirements don't go beyond
191+
// a maximum limit or below a minimum limit
192+
// needed for ../external/nextflow-config/config/retry/retry.config
193+
check_limits = { obj, type ->
194+
if (type == 'memory') {
195+
try {
196+
if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)
197+
return params.max_memory as nextflow.util.MemoryUnit
198+
else if (obj.compareTo(params.min_memory as nextflow.util.MemoryUnit) == -1)
199+
return params.min_memory as nextflow.util.MemoryUnit
200+
else
201+
return obj
202+
} catch (all) {
203+
println " ### WARNING ### Max memory '${params.max_memory}' or min memory '${params.min_memory}' is not valid! Using default value: $obj"
204+
return obj
205+
}
206+
} else if (type == 'time') {
207+
try {
208+
if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1)
209+
return params.max_time as nextflow.util.Duration
210+
else if (obj.compareTo(params.min_time as nextflow.util.Duration) == -1)
211+
return params.min_time as nextflow.util.Duration
212+
else
213+
return obj
214+
} catch (all) {
215+
println " ### WARNING ### Max time '${params.max_time}' or min time '${params.min_time}' is not valid! Using default value: $obj"
216+
return obj
217+
}
218+
} else if (type == 'cpus') {
219+
try {
220+
return Math.max( Math.min( obj, params.max_cpus as int ), params.min_cpus as int )
221+
} catch (all) {
222+
println " ### WARNING ### Max cpus '${params.max_cpus}' or min cpus '${params.min_cpus}' is not valid! Using default value: $obj"
223+
return obj
224+
}
225+
}
226+
}
227+
189228
// Resource allocation here. Static node-based allocations included here
190229
set_resources_allocation = {
191230
// Function to ensure that resource requirements don't go beyond
@@ -263,5 +302,6 @@ methods {
263302
methods.set_pipeline_logs()
264303
methods.check_aligner()
265304
methods.set_resources_allocation()
305+
retry.setup_retry()
266306
}
267307
}

external/nextflow-config

Submodule nextflow-config added at a3cf253

0 commit comments

Comments
 (0)