diff --git a/.dockstore.yml b/.dockstore.yml index 8e313cc..910feb0 100644 --- a/.dockstore.yml +++ b/.dockstore.yml @@ -27,3 +27,57 @@ workflows: - name: Gue Su Chang #guesu, gschang - name: Irenaeus Chan #IrenaeusChan - name: Chad Tomlinson #chad388 + - subclass: WDL + primaryDescriptorPath: /definitions/somatic_wgs.wdl + name: somatic_wgs + authors: + - orcid: 0000-0003-4266-6700 #chrisamiller + - orcid: 0000-0002-6388-446X #malachig + - orcid: 0000-0003-4378-7328 #susannasiebert + - orcid: 0000-0002-0577-4705 #tmooney + - orcid: 0000-0001-6293-4632 #sridhar0605 + - orcid: 0009-0002-2565-0774 #johnegarza + - orcid: 0000-0001-7547-5789 #jasonwalker80 + - orcid: 0000-0003-1928-7139 #zlskidmore + - orcid: 0000-0002-7648-1360 #bryanfisk + - orcid: 0000-0003-0944-3126 #acoffman + - orcid: 0000-0002-2584-5320 #jhundal + - orcid: 0000-0002-9704-408X #Matthew-Mosior + - orcid: 0000-0002-3024-6891 #apaul7 + - name: John Maruska #johnmaruska + - name: Layth Aljorani #Layth17 + - name: Sai Mukund Ramakrishnan #saimukund20 + - name: Feiyu Du #dufeiyu + - name: Saad Khan #smk5g5 + - name: Samuel Peters #sam16711 + - name: Agata Gruszczynska #GruA + - name: Gue Su Chang #guesu, gschang + - name: Irenaeus Chan #IrenaeusChan + - name: Chad Tomlinson #chad388 + - subclass: WDL + primaryDescriptorPath: /definitions/detect_variants_wgs.wdl + name: detect_variants_wgs + authors: + - orcid: 0000-0003-4266-6700 #chrisamiller + - orcid: 0000-0002-6388-446X #malachig + - orcid: 0000-0003-4378-7328 #susannasiebert + - orcid: 0000-0002-0577-4705 #tmooney + - orcid: 0000-0001-6293-4632 #sridhar0605 + - orcid: 0009-0002-2565-0774 #johnegarza + - orcid: 0000-0001-7547-5789 #jasonwalker80 + - orcid: 0000-0003-1928-7139 #zlskidmore + - orcid: 0000-0002-7648-1360 #bryanfisk + - orcid: 0000-0003-0944-3126 #acoffman + - orcid: 0000-0002-2584-5320 #jhundal + - orcid: 0000-0002-9704-408X #Matthew-Mosior + - orcid: 0000-0002-3024-6891 #apaul7 + - name: John Maruska #johnmaruska + - name: Layth Aljorani #Layth17 + - name: Sai Mukund Ramakrishnan #saimukund20 + - name: Feiyu Du #dufeiyu + - name: Saad Khan #smk5g5 + - name: Samuel Peters #sam16711 + - name: Agata Gruszczynska #GruA + - name: Gue Su Chang #guesu, gschang + - name: Irenaeus Chan #IrenaeusChan + - name: Chad Tomlinson #chad388 diff --git a/definitions/detect_variants_wgs.wdl b/definitions/detect_variants_wgs.wdl index b73dba8..00055bb 100644 --- a/definitions/detect_variants_wgs.wdl +++ b/definitions/detect_variants_wgs.wdl @@ -6,6 +6,7 @@ import "subworkflows/mutect.wdl" as m import "subworkflows/strelka_and_post_processing.wdl" as sapp import "subworkflows/varscan_pre_and_post_processing.wdl" as vpapp import "subworkflows/vcf_readcount_annotator.wdl" as vra +import "subworkflows/cram_to_bam_and_index.wdl" as cb import "tools/add_vep_fields_to_table.wdl" as avftt import "tools/bam_readcount.wdl" as br import "tools/bgzip.wdl" as b @@ -22,10 +23,10 @@ workflow detectVariantsWgs { File reference File reference_fai File reference_dict - File tumor_bam - File tumor_bam_bai - File normal_bam - File normal_bam_bai + File tumor_cram + File tumor_cram_index + File normal_cram + File normal_cram_index File roi_intervals Boolean strelka_exome_mode Int strelka_cpu_reserved = 8 @@ -54,13 +55,13 @@ workflow detectVariantsWgs { String gnomad_field_name = "gnomADe_AF" # only change with gnomad_filter_annotation Float filter_gnomADe_maximum_population_allele_frequency = 0.001 - Float filter_mapq0_threshold = 0.15 + Float filter_mapq0_threshold = 0.10 Int filter_minimum_depth = 1 Float? filter_somatic_llr_threshold Float? filter_somatic_llr_tumor_purity Float? filter_somatic_llr_normal_contamination_rate Boolean cle_vcf_filter = false - Array[String] variants_to_table_fields = ["CHROM", "POS", "ID", "REF", "ALT", "set", "AC", "AF"] + Array[String] variants_to_table_fields = ["CHROM", "POS", "ID", "REF", "ALT", "set", "AC", "AF","LLR"] Array[String] variants_to_table_genotype_fields = ["GT", "AD"] Array[String] vep_to_table_fields = ["HGVSc", "HGVSp"] String tumor_sample_name @@ -68,6 +69,27 @@ workflow detectVariantsWgs { Array[VepCustomAnnotation] vep_custom_annotations File? validated_variants File? validated_variants_tbi + String prefix # workflow final output prefix + } + + call cb.cramTobamAndIndex as tumorConvert { + input: + reference=reference, + reference_index=reference_fai, + reference_dict=reference_dict, + cram=tumor_cram, + cram_index=tumor_cram_index + + } + + call cb.cramTobamAndIndex as normalConvert { + input: + reference=reference, + reference_index=reference_fai, + reference_dict=reference_dict, + cram=normal_cram, + cram_index=normal_cram_index + } call m.mutect { @@ -75,10 +97,10 @@ workflow detectVariantsWgs { reference=reference, reference_fai=reference_fai, reference_dict=reference_dict, - tumor_bam=tumor_bam, - tumor_bam_bai=tumor_bam_bai, - normal_bam=normal_bam, - normal_bam_bai=normal_bam_bai, + tumor_bam=tumorConvert.indexed_bam, + tumor_bam_bai=tumorConvert.indexed_bam_bai, + normal_bam=normalConvert.indexed_bam, + normal_bam_bai=normalConvert.indexed_bam_bai, interval_list=roi_intervals, scatter_count=scatter_count, tumor_sample_name=tumor_sample_name, @@ -92,12 +114,12 @@ workflow detectVariantsWgs { reference_dict=reference_dict, tumor_sample_name=tumor_sample_name, - tumor_bam=tumor_bam, - tumor_bam_bai=tumor_bam_bai, + tumor_bam=tumorConvert.indexed_bam, + tumor_bam_bai=tumorConvert.indexed_bam_bai, normal_sample_name=normal_sample_name, - normal_bam=normal_bam, - normal_bam_bai=normal_bam_bai, + normal_bam=normalConvert.indexed_bam, + normal_bam_bai=normalConvert.indexed_bam_bai, interval_list=roi_intervals, exome_mode=strelka_exome_mode, @@ -114,12 +136,12 @@ workflow detectVariantsWgs { reference_dict=reference_dict, tumor_sample_name=tumor_sample_name, - tumor_bam=tumor_bam, - tumor_bam_bai=tumor_bam_bai, + tumor_bam=tumorConvert.indexed_bam, + tumor_bam_bai=tumorConvert.indexed_bam_bai, normal_sample_name=normal_sample_name, - normal_bam=normal_bam, - normal_bam_bai=normal_bam_bai, + normal_bam=normalConvert.indexed_bam, + normal_bam_bai=normalConvert.indexed_bam_bai, interval_list=roi_intervals, scatter_count=scatter_count, @@ -136,11 +158,11 @@ workflow detectVariantsWgs { reference_fai=reference_fai, reference_dict=reference_dict, - tumor_bam=tumor_bam, - tumor_bam_bai=tumor_bam_bai, + tumor_bam=tumorConvert.indexed_bam, + tumor_bam_bai=tumorConvert.indexed_bam_bai, - normal_bam=normal_bam, - normal_bam_bai=normal_bam_bai, + normal_bam=normalConvert.indexed_bam, + normal_bam_bai=normalConvert.indexed_bam_bai, interval_list=roi_intervals, docm_vcf=docm_vcf, @@ -207,8 +229,8 @@ workflow detectVariantsWgs { reference_fai=reference_fai, reference_dict=reference_dict, sample=tumor_sample_name, - bam=tumor_bam, - bam_bai=tumor_bam_bai, + bam=tumorConvert.indexed_bam, + bam_bai=tumorConvert.indexed_bam_bai, min_base_quality=readcount_minimum_base_quality, min_mapping_quality=readcount_minimum_mapping_quality } @@ -220,8 +242,8 @@ workflow detectVariantsWgs { reference_fai=reference_fai, reference_dict=reference_dict, sample=normal_sample_name, - bam=normal_bam, - bam_bai=normal_bam_bai, + bam=normalConvert.indexed_bam, + bam_bai=normalConvert.indexed_bam_bai, min_base_quality=readcount_minimum_base_quality, min_mapping_quality=readcount_minimum_mapping_quality } @@ -258,8 +280,8 @@ workflow detectVariantsWgs { filter_somatic_llr_tumor_purity=filter_somatic_llr_tumor_purity, filter_somatic_llr_normal_contamination_rate=filter_somatic_llr_normal_contamination_rate, filter_minimum_depth=filter_minimum_depth, - tumor_bam=tumor_bam, - tumor_bam_bai=tumor_bam_bai, + tumor_bam=tumorConvert.indexed_bam, + tumor_bam_bai=tumorConvert.indexed_bam_bai, do_cle_vcf_filter=cle_vcf_filter, reference=reference, reference_fai=reference_fai, @@ -294,7 +316,8 @@ workflow detectVariantsWgs { input: vcf=annotatedFilterIndex.indexed_vcf, tsv=variantsToTable.variants_tsv, - vep_fields=vep_to_table_fields + vep_fields=vep_to_table_fields, + prefix=prefix } output { diff --git a/definitions/somatic_wgs.wdl b/definitions/somatic_wgs.wdl index d68159f..9972c79 100644 --- a/definitions/somatic_wgs.wdl +++ b/definitions/somatic_wgs.wdl @@ -177,10 +177,10 @@ workflow somaticWgs { reference=reference, reference_fai=reference_fai, reference_dict=reference_dict, - tumor_bam=tumorAlignment.final_bam, - tumor_bam_bai=tumorAlignment.final_bam_bai, - normal_bam=normalAlignment.final_bam, - normal_bam_bai=normalAlignment.final_bam_bai, + tumor_cram=tumorAlignment.final_bam, + tumor_cram_index=tumorAlignment.final_bam_bai, + normal_cram=normalAlignment.final_bam, + normal_cram_index=normalAlignment.final_bam_bai, roi_intervals=target_intervals, strelka_exome_mode=false, strelka_cpu_reserved=strelka_cpu_reserved, diff --git a/definitions/subworkflows/cram_to_bam_and_index.wdl b/definitions/subworkflows/cram_to_bam_and_index.wdl new file mode 100644 index 0000000..0e341b8 --- /dev/null +++ b/definitions/subworkflows/cram_to_bam_and_index.wdl @@ -0,0 +1,33 @@ +version 1.0 + +import "../tools/cram_to_bam.wdl" as cb +import "../tools/index_bam.wdl" as i + +workflow cramTobamAndIndex{ + input { + File cram + File cram_index + File reference + File reference_index + File reference_dict + } + + call cb.cramToBam { + input: + cram=cram, + cram_index=cram_index, + reference=reference, + reference_index=reference_index, + reference_dict=reference_dict + } + + call i.indexBam { + input: bam=cramToBam.bam + } + + output { + File indexed_bam = indexBam.indexed_bam + File indexed_bam_bai = indexBam.indexed_bam_bai + File indexed_bai = indexBam.indexed_bai + } +} \ No newline at end of file diff --git a/definitions/tools/cram_to_bam.wdl b/definitions/tools/cram_to_bam.wdl index b4666fb..3f1f6a4 100644 --- a/definitions/tools/cram_to_bam.wdl +++ b/definitions/tools/cram_to_bam.wdl @@ -9,9 +9,9 @@ task cramToBam { File reference_dict } - Int space_needed_gb = 10 + round(size([cram, cram_index, reference, reference_index, reference_dict], "GB") * 3) + Int space_needed_gb = 10 + round(size([cram, cram_index, reference, reference_index, reference_dict], "GB") * 5) runtime { - memory: "4GB" + memory: "8GB" docker: "quay.io/biocontainers/samtools:1.11--h6270b1f_0" disks: "local-disk ~{space_needed_gb} HDD" } diff --git a/definitions/tools/docm_gatk_haplotype_caller.wdl b/definitions/tools/docm_gatk_haplotype_caller.wdl index 16d9e0d..d9e1564 100644 --- a/definitions/tools/docm_gatk_haplotype_caller.wdl +++ b/definitions/tools/docm_gatk_haplotype_caller.wdl @@ -27,7 +27,7 @@ task docmGatkHaplotypeCaller { preemptible: 1 maxRetries: 2 memory: "9GB" - docker: "broadinstitute/gatk:4.1.2.0" + docker: "broadinstitute/gatk:4.1.3.0" disks: "local-disk ~{space_needed_gb} HDD" } diff --git a/definitions/tools/mutect.wdl b/definitions/tools/mutect.wdl index a165b8f..f7cf0fb 100644 --- a/definitions/tools/mutect.wdl +++ b/definitions/tools/mutect.wdl @@ -29,7 +29,7 @@ task mutect { preemptible: 1 maxRetries: 2 docker: "broadinstitute/gatk:4.2.3.0" - memory: "2GB" + memory: "8GB" bootDiskSizeGb: space_needed_gb disks: "local-disk ~{space_needed_gb} HDD" }