Skip to content

Commit

Permalink
Merge pull request #2 from TRON-Bioinformatics/migrate-dsl2
Browse files Browse the repository at this point in the history
Migrate to DSL 2
  • Loading branch information
priesgo authored Nov 22, 2021
2 parents eab507a + 1b08a47 commit 9cade27
Show file tree
Hide file tree
Showing 16 changed files with 478 additions and 150 deletions.
9 changes: 9 additions & 0 deletions .github/workflows/automated_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,19 @@ jobs:
distribution: 'zulu' # See 'Supported distributions' for available options
java-version: '11'
- uses: conda-incubator/setup-miniconda@v2
with:
auto-update-conda: true
channels: defaults,conda-forge,bioconda
- name: Install dependencies
run: |
apt-get update && apt-get --assume-yes install wget make procps software-properties-common
wget -qO- https://get.nextflow.io | bash && cp nextflow /usr/local/bin/nextflow
- name: Cache conda environments
uses: actions/cache@v2
with:
path: |
/home/runner/work/tronflow-mutect2/tronflow-mutect2/work/conda
key: ${{ runner.os }}-tronflow-mutect2
- name: Run tests
run: |
make
21 changes: 6 additions & 15 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

all : clean test check
all : clean test

clean:
rm -rf output
Expand All @@ -13,17 +13,8 @@ clean:


test:
echo "sample_name\t"`pwd`"/test_data/TESTX_S1_L001.bam\t"`pwd`"/test_data/TESTX_S1_L002.bam" > test_data/test_input.txt
nextflow main.nf -profile test,conda --output output/test1 --input_files test_data/test_input.txt
nextflow main.nf -profile test,conda --disable_common_germline_filter --output output/test2 --input_files test_data/test_input.txt
echo "sample_name_with_replicates\t"`pwd`"/test_data/TESTX_S1_L001.bam,"`pwd`"/test_data/TESTX_S1_L001.bam\t"`pwd`"/test_data/TESTX_S1_L002.bam,"`pwd`"/test_data/TESTX_S1_L002.bam" > test_data/test_input_with_replicates.txt
nextflow main.nf -profile test,conda --input_files test_data/test_input_with_replicates.txt --output output/test3
nextflow main.nf -profile test,conda --output output/test4 --input_files test_data/test_input.txt --intervals false


check:
test -s output/test1/sample_name/sample_name.mutect2.vcf || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test2/sample_name/sample_name.mutect2.vcf || { echo "Missing test 2 output file!"; exit 1; }
test -s output/test3/sample_name_with_replicates/sample_name_with_replicates.mutect2.vcf || { echo "Missing test 3 output file!"; exit 1; }
test -s output/test4/sample_name/sample_name.mutect2.vcf || { echo "Missing test 4 output file!"; exit 1; }

bash tests/test_00.sh
bash tests/test_01.sh
bash tests/test_02.sh
bash tests/test_03.sh
bash tests/test_04.sh
9 changes: 0 additions & 9 deletions environment.yml

This file was deleted.

140 changes: 16 additions & 124 deletions main.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
#!/usr/bin/env nextflow

nextflow.enable.dsl = 2

include { MUTECT2 } from './modules/01_mutect2'
include { LEARN_READ_ORIENTATION_MODEL } from './modules/02_learn_read_orientation'
include { PILEUP_SUMMARIES } from './modules/03_pileup_summary'
include { CALCULATE_CONTAMINATION } from './modules/04_calculate_contamination'
include { FILTER_CALLS } from './modules/05_filter_calls'

params.help= false
params.input_files = false
Expand Down Expand Up @@ -44,133 +51,18 @@ if (params.input_files) {
.splitCsv(header: ['name', 'tumor_bam', 'normal_bam'], sep: "\t")
.map{ row-> tuple(row.name, row.tumor_bam, row.normal_bam) }
.set { input_files }

Channel
.fromPath(params.input_files)
.splitCsv(header: ['name', 'tumor_bam', 'normal_bam'], sep: "\t")
.map{ row-> tuple(row.name, row.tumor_bam) }
.set { tumor_bams }
} else {
exit 1, "Input file not specified!"
}

process mutect2 {
cpus params.cpus_mutect2
memory params.memory_mutect2
tag "${name}"
publishDir "${params.output}/${name}", mode: "copy"

input:
set name, tumor_bam, normal_bam from input_files

output:
set val("${name}"), file("${name}.mutect2.unfiltered.vcf"), file("${name}.mutect2.unfiltered.vcf.stats") into unfiltered_vcfs
set val("${name}"), file("${name}.f1r2.tar.gz") into f1r2_stats

script:
normal_panel_option = params.pon ? "--panel-of-normals ${params.pon}" : ""
germline_filter = params.disable_common_germline_filter ? "" : "--germline-resource ${params.gnomad}"
normal_inputs = normal_bam.split(",").collect({v -> "--input $v"}).join(" ")
tumor_inputs = tumor_bam.split(",").collect({v -> "--input $v"}).join(" ")
intervals_option = params.intervals ? "--intervals ${params.intervals}" : ""
"""
gatk --java-options '-Xmx${params.memory_mutect2}' Mutect2 \
--reference ${params.reference} \
${intervals_option} \
${germline_filter} \
${normal_panel_option} \
${normal_inputs} --normal-sample normal \
${tumor_inputs} --tumor-sample tumor \
--output ${name}.mutect2.unfiltered.vcf \
--f1r2-tar-gz ${name}.f1r2.tar.gz
"""
}

process learnReadOrientationModel {
cpus params.cpus_read_orientation
memory params.memory_read_orientation
tag "${name}"
publishDir "${params.output}/${name}", mode: "copy"

input:
set name, file(f1r2_stats) from f1r2_stats

output:
set name, file("${name}.read-orientation-model.tar.gz") into read_orientation_model

"""
gatk --java-options '-Xmx${params.memory_read_orientation}' LearnReadOrientationModel \
--input ${f1r2_stats} \
--output ${name}.read-orientation-model.tar.gz
"""
}

process pileUpSummaries {
cpus params.cpus_pileup
memory params.memory_pileup
tag "${name}"
publishDir "${params.output}/${name}", mode: "copy"
workflow {
MUTECT2(input_files)
PILEUP_SUMMARIES(input_files)
LEARN_READ_ORIENTATION_MODEL(MUTECT2.out.f1r2_stats)
CALCULATE_CONTAMINATION(PILEUP_SUMMARIES.out.pileupsummaries)
FILTER_CALLS(
CALCULATE_CONTAMINATION.out.contaminationTables.join(
LEARN_READ_ORIENTATION_MODEL.out.read_orientation_model).join(MUTECT2.out.unfiltered_vcfs))

input:
set name, tumor_bam from tumor_bams

output:
set val("${name}"), file("${name}.pileupsummaries.table") into pileupsummaries

script:
tumor_inputs = tumor_bam.split(",").collect({v -> "--input $v"}).join(" ")
"""
gatk --java-options '-Xmx${params.memory_pileup}' GetPileupSummaries \
--intervals ${params.gnomad} \
--variant ${params.gnomad} \
${tumor_inputs} \
--output ${name}.pileupsummaries.table
"""
}

process calculateContamination {
cpus params.cpus_contamination
memory params.memory_contamination
tag "${name}"
publishDir "${params.output}/${name}", mode: "copy"

input:
set name, file(table) from pileupsummaries

output:
set name, file("${name}.segments.table"), file("${name}.calculatecontamination.table") into contaminationTables

"""
gatk --java-options '-Xmx${params.memory_contamination}' CalculateContamination \
--input ${table} \
-tumor-segmentation ${name}.segments.table \
--output ${name}.calculatecontamination.table
"""
FILTER_CALLS.out.final_vcfs.map {it.join("\t")}.collectFile(name: "${params.output}/mutect2_output_files.txt", newLine: true)
}

process filterCalls {
cpus params.cpus_filter
memory params.memory_filter
tag "${name}"
publishDir "${params.output}/${name}", mode: "copy"

input:
set name, file(segments_table), file(contamination_table), file(model),
file(unfiltered_vcf), file(vcf_stats) from contaminationTables.join(read_orientation_model).join(unfiltered_vcfs)

output:
set name, val("${params.output}/${name}/${name}.mutect2.vcf") into final_vcfs
file "${name}.mutect2.vcf"

"""
gatk --java-options '-Xmx${params.memory_filter}' FilterMutectCalls \
-V ${unfiltered_vcf} \
--reference ${params.reference} \
--tumor-segmentation ${segments_table} \
--contamination-table ${contamination_table} \
--ob-priors ${model} \
--output ${name}.mutect2.vcf
"""
}

final_vcfs.map {it.join("\t")}.collectFile(name: "${params.output}/mutect2_output_files.txt", newLine: true)
43 changes: 43 additions & 0 deletions modules/01_mutect2.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
params.memory_mutect2 = "16g"
params.cpus_mutect2 = 2
params.output = 'output'
params.gnomad = false
params.pon = false
params.disable_common_germline_filter = false
params.reference = false
params.intervals = false


process MUTECT2 {
cpus params.cpus_mutect2
memory params.memory_mutect2
tag "${name}"
publishDir "${params.output}/${name}", mode: "copy"

conda (params.enable_conda ? "bioconda::gatk4=4.2.0.0" : null)

input:
tuple val(name), val(tumor_bam), val(normal_bam)

output:
tuple val("${name}"), file("${name}.mutect2.unfiltered.vcf"), file("${name}.mutect2.unfiltered.vcf.stats"), emit: unfiltered_vcfs
tuple val("${name}"), file("${name}.f1r2.tar.gz"), emit: f1r2_stats

script:
normal_panel_option = params.pon ? "--panel-of-normals ${params.pon}" : ""
germline_filter = params.disable_common_germline_filter ? "" : "--germline-resource ${params.gnomad}"
normal_inputs = normal_bam.split(",").collect({v -> "--input $v"}).join(" ")
tumor_inputs = tumor_bam.split(",").collect({v -> "--input $v"}).join(" ")
intervals_option = params.intervals ? "--intervals ${params.intervals}" : ""
"""
gatk --java-options '-Xmx${params.memory_mutect2}' Mutect2 \
--reference ${params.reference} \
${intervals_option} \
${germline_filter} \
${normal_panel_option} \
${normal_inputs} --normal-sample normal \
${tumor_inputs} --tumor-sample tumor \
--output ${name}.mutect2.unfiltered.vcf \
--f1r2-tar-gz ${name}.f1r2.tar.gz
"""
}
25 changes: 25 additions & 0 deletions modules/02_learn_read_orientation.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
params.memory_read_orientation = "16g"
params.cpus_read_orientation = 2
params.output = 'output'


process LEARN_READ_ORIENTATION_MODEL {
cpus params.cpus_read_orientation
memory params.memory_read_orientation
tag "${name}"
publishDir "${params.output}/${name}", mode: "copy"

conda (params.enable_conda ? "bioconda::gatk4=4.2.0.0" : null)

input:
tuple val(name), file(f1r2_stats)

output:
tuple val(name), file("${name}.read-orientation-model.tar.gz"), emit: read_orientation_model

"""
gatk --java-options '-Xmx${params.memory_read_orientation}' LearnReadOrientationModel \
--input ${f1r2_stats} \
--output ${name}.read-orientation-model.tar.gz
"""
}
30 changes: 30 additions & 0 deletions modules/03_pileup_summary.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
params.memory_pileup = "32g"
params.cpus_pileup = 2
params.output = 'output'
params.gnomad = false


process PILEUP_SUMMARIES {
cpus params.cpus_pileup
memory params.memory_pileup
tag "${name}"
publishDir "${params.output}/${name}", mode: "copy"

conda (params.enable_conda ? "bioconda::gatk4=4.2.0.0" : null)

input:
tuple val(name), val(tumor_bam), val(normal_bam)

output:
tuple val("${name}"), file("${name}.pileupsummaries.table"), emit: pileupsummaries

script:
tumor_inputs = tumor_bam.split(",").collect({v -> "--input $v"}).join(" ")
"""
gatk --java-options '-Xmx${params.memory_pileup}' GetPileupSummaries \
--intervals ${params.gnomad} \
--variant ${params.gnomad} \
${tumor_inputs} \
--output ${name}.pileupsummaries.table
"""
}
26 changes: 26 additions & 0 deletions modules/04_calculate_contamination.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
params.memory_contamination = "16g"
params.cpus_contamination = 2
params.output = 'output'


process CALCULATE_CONTAMINATION {
cpus params.cpus_contamination
memory params.memory_contamination
tag "${name}"
publishDir "${params.output}/${name}", mode: "copy"

conda (params.enable_conda ? "bioconda::gatk4=4.2.0.0" : null)

input:
tuple val(name), file(table)

output:
tuple val(name), file("${name}.segments.table"), file("${name}.calculatecontamination.table"), emit: contaminationTables

"""
gatk --java-options '-Xmx${params.memory_contamination}' CalculateContamination \
--input ${table} \
-tumor-segmentation ${name}.segments.table \
--output ${name}.calculatecontamination.table
"""
}
31 changes: 31 additions & 0 deletions modules/05_filter_calls.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
params.memory_filter = "16g"
params.cpus_filter = 2
params.output = 'output'
params.reference = false


process FILTER_CALLS {
cpus params.cpus_filter
memory params.memory_filter
tag "${name}"
publishDir "${params.output}/${name}", mode: "copy"

conda (params.enable_conda ? "bioconda::gatk4=4.2.0.0" : null)

input:
tuple val(name), file(segments_table), file(contamination_table), file(model), file(unfiltered_vcf), file(vcf_stats)

output:
tuple val(name), val("${params.output}/${name}/${name}.mutect2.vcf"), emit: final_vcfs
file "${name}.mutect2.vcf"

"""
gatk --java-options '-Xmx${params.memory_filter}' FilterMutectCalls \
-V ${unfiltered_vcf} \
--reference ${params.reference} \
--tumor-segmentation ${segments_table} \
--contamination-table ${contamination_table} \
--ob-priors ${model} \
--output ${name}.mutect2.vcf
"""
}
4 changes: 2 additions & 2 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
*/

profiles {
conda { process.conda = "$baseDir/environment.yml" }
conda { params.enable_conda = true }
debug { process.beforeScript = 'echo $HOSTNAME' }
test {
params.reference = "$baseDir/test_data/ucsc.hg19.minimal.fasta"
Expand Down Expand Up @@ -37,7 +37,7 @@ env {
// Capture exit codes from upstream processes when piping
process.shell = ['/bin/bash', '-euo', 'pipefail']

VERSION = '1.2.0'
VERSION = '1.3.0'
DOI = 'https://zenodo.org/badge/latestdoi/355860788'

manifest {
Expand Down
Loading

0 comments on commit 9cade27

Please sign in to comment.