Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Detect variant #142

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
54 changes: 54 additions & 0 deletions .dockstore.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,57 @@ workflows:
- name: Gue Su Chang #guesu, gschang
- name: Irenaeus Chan #IrenaeusChan
- name: Chad Tomlinson #chad388
- subclass: WDL
primaryDescriptorPath: /definitions/somatic_wgs.wdl
name: somatic_wgs
authors:
- orcid: 0000-0003-4266-6700 #chrisamiller
- orcid: 0000-0002-6388-446X #malachig
- orcid: 0000-0003-4378-7328 #susannasiebert
- orcid: 0000-0002-0577-4705 #tmooney
- orcid: 0000-0001-6293-4632 #sridhar0605
- orcid: 0009-0002-2565-0774 #johnegarza
- orcid: 0000-0001-7547-5789 #jasonwalker80
- orcid: 0000-0003-1928-7139 #zlskidmore
- orcid: 0000-0002-7648-1360 #bryanfisk
- orcid: 0000-0003-0944-3126 #acoffman
- orcid: 0000-0002-2584-5320 #jhundal
- orcid: 0000-0002-9704-408X #Matthew-Mosior
- orcid: 0000-0002-3024-6891 #apaul7
- name: John Maruska #johnmaruska
- name: Layth Aljorani #Layth17
- name: Sai Mukund Ramakrishnan #saimukund20
- name: Feiyu Du #dufeiyu
- name: Saad Khan #smk5g5
- name: Samuel Peters #sam16711
- name: Agata Gruszczynska #GruA
- name: Gue Su Chang #guesu, gschang
- name: Irenaeus Chan #IrenaeusChan
- name: Chad Tomlinson #chad388
- subclass: WDL
primaryDescriptorPath: /definitions/detect_variants_wgs.wdl
name: detect_variants_wgs
authors:
- orcid: 0000-0003-4266-6700 #chrisamiller
- orcid: 0000-0002-6388-446X #malachig
- orcid: 0000-0003-4378-7328 #susannasiebert
- orcid: 0000-0002-0577-4705 #tmooney
- orcid: 0000-0001-6293-4632 #sridhar0605
- orcid: 0009-0002-2565-0774 #johnegarza
- orcid: 0000-0001-7547-5789 #jasonwalker80
- orcid: 0000-0003-1928-7139 #zlskidmore
- orcid: 0000-0002-7648-1360 #bryanfisk
- orcid: 0000-0003-0944-3126 #acoffman
- orcid: 0000-0002-2584-5320 #jhundal
- orcid: 0000-0002-9704-408X #Matthew-Mosior
- orcid: 0000-0002-3024-6891 #apaul7
- name: John Maruska #johnmaruska
- name: Layth Aljorani #Layth17
- name: Sai Mukund Ramakrishnan #saimukund20
- name: Feiyu Du #dufeiyu
- name: Saad Khan #smk5g5
- name: Samuel Peters #sam16711
- name: Agata Gruszczynska #GruA
- name: Gue Su Chang #guesu, gschang
- name: Irenaeus Chan #IrenaeusChan
- name: Chad Tomlinson #chad388
81 changes: 52 additions & 29 deletions definitions/detect_variants_wgs.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import "subworkflows/mutect.wdl" as m
import "subworkflows/strelka_and_post_processing.wdl" as sapp
import "subworkflows/varscan_pre_and_post_processing.wdl" as vpapp
import "subworkflows/vcf_readcount_annotator.wdl" as vra
import "subworkflows/cram_to_bam_and_index.wdl" as cb
import "tools/add_vep_fields_to_table.wdl" as avftt
import "tools/bam_readcount.wdl" as br
import "tools/bgzip.wdl" as b
Expand All @@ -22,10 +23,10 @@ workflow detectVariantsWgs {
File reference
File reference_fai
File reference_dict
File tumor_bam
File tumor_bam_bai
File normal_bam
File normal_bam_bai
File tumor_cram
File tumor_cram_index
File normal_cram
File normal_cram_index
File roi_intervals
Boolean strelka_exome_mode
Int strelka_cpu_reserved = 8
Expand Down Expand Up @@ -54,31 +55,52 @@ workflow detectVariantsWgs {

String gnomad_field_name = "gnomADe_AF" # only change with gnomad_filter_annotation
Float filter_gnomADe_maximum_population_allele_frequency = 0.001
Float filter_mapq0_threshold = 0.15
Float filter_mapq0_threshold = 0.10
Int filter_minimum_depth = 1
Float? filter_somatic_llr_threshold
Float? filter_somatic_llr_tumor_purity
Float? filter_somatic_llr_normal_contamination_rate
Boolean cle_vcf_filter = false
Array[String] variants_to_table_fields = ["CHROM", "POS", "ID", "REF", "ALT", "set", "AC", "AF"]
Array[String] variants_to_table_fields = ["CHROM", "POS", "ID", "REF", "ALT", "set", "AC", "AF","LLR"]
Array[String] variants_to_table_genotype_fields = ["GT", "AD"]
Array[String] vep_to_table_fields = ["HGVSc", "HGVSp"]
String tumor_sample_name
String normal_sample_name
Array[VepCustomAnnotation] vep_custom_annotations
File? validated_variants
File? validated_variants_tbi
String prefix # workflow final output prefix
}

call cb.cramTobamAndIndex as tumorConvert {
input:
reference=reference,
reference_index=reference_fai,
reference_dict=reference_dict,
cram=tumor_cram,
cram_index=tumor_cram_index

}

call cb.cramTobamAndIndex as normalConvert {
input:
reference=reference,
reference_index=reference_fai,
reference_dict=reference_dict,
cram=normal_cram,
cram_index=normal_cram_index

}

call m.mutect {
input:
reference=reference,
reference_fai=reference_fai,
reference_dict=reference_dict,
tumor_bam=tumor_bam,
tumor_bam_bai=tumor_bam_bai,
normal_bam=normal_bam,
normal_bam_bai=normal_bam_bai,
tumor_bam=tumorConvert.indexed_bam,
tumor_bam_bai=tumorConvert.indexed_bam_bai,
normal_bam=normalConvert.indexed_bam,
normal_bam_bai=normalConvert.indexed_bam_bai,
interval_list=roi_intervals,
scatter_count=scatter_count,
tumor_sample_name=tumor_sample_name,
Expand All @@ -92,12 +114,12 @@ workflow detectVariantsWgs {
reference_dict=reference_dict,

tumor_sample_name=tumor_sample_name,
tumor_bam=tumor_bam,
tumor_bam_bai=tumor_bam_bai,
tumor_bam=tumorConvert.indexed_bam,
tumor_bam_bai=tumorConvert.indexed_bam_bai,

normal_sample_name=normal_sample_name,
normal_bam=normal_bam,
normal_bam_bai=normal_bam_bai,
normal_bam=normalConvert.indexed_bam,
normal_bam_bai=normalConvert.indexed_bam_bai,

interval_list=roi_intervals,
exome_mode=strelka_exome_mode,
Expand All @@ -114,12 +136,12 @@ workflow detectVariantsWgs {
reference_dict=reference_dict,

tumor_sample_name=tumor_sample_name,
tumor_bam=tumor_bam,
tumor_bam_bai=tumor_bam_bai,
tumor_bam=tumorConvert.indexed_bam,
tumor_bam_bai=tumorConvert.indexed_bam_bai,

normal_sample_name=normal_sample_name,
normal_bam=normal_bam,
normal_bam_bai=normal_bam_bai,
normal_bam=normalConvert.indexed_bam,
normal_bam_bai=normalConvert.indexed_bam_bai,

interval_list=roi_intervals,
scatter_count=scatter_count,
Expand All @@ -136,11 +158,11 @@ workflow detectVariantsWgs {
reference_fai=reference_fai,
reference_dict=reference_dict,

tumor_bam=tumor_bam,
tumor_bam_bai=tumor_bam_bai,
tumor_bam=tumorConvert.indexed_bam,
tumor_bam_bai=tumorConvert.indexed_bam_bai,

normal_bam=normal_bam,
normal_bam_bai=normal_bam_bai,
normal_bam=normalConvert.indexed_bam,
normal_bam_bai=normalConvert.indexed_bam_bai,

interval_list=roi_intervals,
docm_vcf=docm_vcf,
Expand Down Expand Up @@ -207,8 +229,8 @@ workflow detectVariantsWgs {
reference_fai=reference_fai,
reference_dict=reference_dict,
sample=tumor_sample_name,
bam=tumor_bam,
bam_bai=tumor_bam_bai,
bam=tumorConvert.indexed_bam,
bam_bai=tumorConvert.indexed_bam_bai,
min_base_quality=readcount_minimum_base_quality,
min_mapping_quality=readcount_minimum_mapping_quality
}
Expand All @@ -220,8 +242,8 @@ workflow detectVariantsWgs {
reference_fai=reference_fai,
reference_dict=reference_dict,
sample=normal_sample_name,
bam=normal_bam,
bam_bai=normal_bam_bai,
bam=normalConvert.indexed_bam,
bam_bai=normalConvert.indexed_bam_bai,
min_base_quality=readcount_minimum_base_quality,
min_mapping_quality=readcount_minimum_mapping_quality
}
Expand Down Expand Up @@ -258,8 +280,8 @@ workflow detectVariantsWgs {
filter_somatic_llr_tumor_purity=filter_somatic_llr_tumor_purity,
filter_somatic_llr_normal_contamination_rate=filter_somatic_llr_normal_contamination_rate,
filter_minimum_depth=filter_minimum_depth,
tumor_bam=tumor_bam,
tumor_bam_bai=tumor_bam_bai,
tumor_bam=tumorConvert.indexed_bam,
tumor_bam_bai=tumorConvert.indexed_bam_bai,
do_cle_vcf_filter=cle_vcf_filter,
reference=reference,
reference_fai=reference_fai,
Expand Down Expand Up @@ -294,7 +316,8 @@ workflow detectVariantsWgs {
input:
vcf=annotatedFilterIndex.indexed_vcf,
tsv=variantsToTable.variants_tsv,
vep_fields=vep_to_table_fields
vep_fields=vep_to_table_fields,
prefix=prefix
}

output {
Expand Down
8 changes: 4 additions & 4 deletions definitions/somatic_wgs.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -177,10 +177,10 @@ workflow somaticWgs {
reference=reference,
reference_fai=reference_fai,
reference_dict=reference_dict,
tumor_bam=tumorAlignment.final_bam,
tumor_bam_bai=tumorAlignment.final_bam_bai,
normal_bam=normalAlignment.final_bam,
normal_bam_bai=normalAlignment.final_bam_bai,
tumor_cram=tumorAlignment.final_bam,
tumor_cram_index=tumorAlignment.final_bam_bai,
normal_cram=normalAlignment.final_bam,
normal_cram_index=normalAlignment.final_bam_bai,
roi_intervals=target_intervals,
strelka_exome_mode=false,
strelka_cpu_reserved=strelka_cpu_reserved,
Expand Down
33 changes: 33 additions & 0 deletions definitions/subworkflows/cram_to_bam_and_index.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
version 1.0

import "../tools/cram_to_bam.wdl" as cb
import "../tools/index_bam.wdl" as i

workflow cramTobamAndIndex{
input {
File cram
File cram_index
File reference
File reference_index
File reference_dict
}

call cb.cramToBam {
input:
cram=cram,
cram_index=cram_index,
reference=reference,
reference_index=reference_index,
reference_dict=reference_dict
}

call i.indexBam {
input: bam=cramToBam.bam
}

output {
File indexed_bam = indexBam.indexed_bam
File indexed_bam_bai = indexBam.indexed_bam_bai
File indexed_bai = indexBam.indexed_bai
}
}
4 changes: 2 additions & 2 deletions definitions/tools/cram_to_bam.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ task cramToBam {
File reference_dict
}

Int space_needed_gb = 10 + round(size([cram, cram_index, reference, reference_index, reference_dict], "GB") * 3)
Int space_needed_gb = 10 + round(size([cram, cram_index, reference, reference_index, reference_dict], "GB") * 5)
runtime {
memory: "4GB"
memory: "8GB"
docker: "quay.io/biocontainers/samtools:1.11--h6270b1f_0"
disks: "local-disk ~{space_needed_gb} HDD"
}
Expand Down
2 changes: 1 addition & 1 deletion definitions/tools/docm_gatk_haplotype_caller.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ task docmGatkHaplotypeCaller {
preemptible: 1
maxRetries: 2
memory: "9GB"
docker: "broadinstitute/gatk:4.1.2.0"
docker: "broadinstitute/gatk:4.1.3.0"
disks: "local-disk ~{space_needed_gb} HDD"
}

Expand Down
2 changes: 1 addition & 1 deletion definitions/tools/mutect.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ task mutect {
preemptible: 1
maxRetries: 2
docker: "broadinstitute/gatk:4.2.3.0"
memory: "2GB"
memory: "8GB"
bootDiskSizeGb: space_needed_gb
disks: "local-disk ~{space_needed_gb} HDD"
}
Expand Down
Loading