nf-cmgg · nvnieuwk · Aug 29, 2024 · Aug 29, 2024 · Aug 29, 2024 · Aug 29, 2024
diff --git a/conf/modules.config b/conf/modules.config
diff --git a/main.nf b/main.nf
@@ -7,6 +7,9 @@
 ----------------------------------------------------------------------------------------
 */
 
+// Enables the workflow output definition: https://www.nextflow.io/docs/latest/workflow.html#workflow-output-def
+nextflow.preview.output = true
+
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS
@@ -47,6 +50,7 @@ params.vcfanno_config       = getGenomeAttribute('vcfanno_config', params.genome
 include { GERMLINE                } from './workflows/germline'
 include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_cmgg_germline_pipeline'
 include { PIPELINE_COMPLETION     } from './subworkflows/local/utils_cmgg_germline_pipeline'
+include { samplesheetToList       } from 'plugin/nf-schema'
 
 //
 // WORKFLOW: Run main analysis pipeline depending on type of input
@@ -132,7 +136,18 @@ workflow NFCMGG_GERMLINE {
     )
 
     emit:
-    multiqc_report = GERMLINE.out.multiqc_report // channel: /path/to/multiqc_report.html
+    vcf_tbi             = GERMLINE.out.vcf_tbi        // channel: [ val(meta), path(vcf), path(tbi) ]
+    multiqc_report      = GERMLINE.out.multiqc_report // channel: /path/to/multiqc/report.html
+    validation          = GERMLINE.out.validation
+    individual_reports  = GERMLINE.out.individual_reports
+    family_reports      = GERMLINE.out.family_reports
+    individuals_bed     = GERMLINE.out.individuals_bed
+    family_bed          = GERMLINE.out.family_bed
+    gvcf_tbi            = GERMLINE.out.gvcf_tbi
+    updio               = GERMLINE.out.updio
+    automap             = GERMLINE.out.automap
+    db                  = GERMLINE.out.db
+    ped                 = GERMLINE.out.ped
 
 }
 
@@ -194,7 +209,6 @@ workflow {
 
     def multiqc_logo = params.multiqc_logo   ?: "$projectDir/assets/CMGG_logo.png"
 
-
     //
     // SUBWORKFLOW: Run initialisation tasks
     //
@@ -232,6 +246,98 @@ workflow {
         params.hook_url,
         NFCMGG_GERMLINE.out.multiqc_report
     )
+
+    // TODO: remove this once dynamic publish paths have been added to nextflow
+    workflow.onComplete = {
+        def date = params.skip_date_project ? "" : "${new Date().format("yyyy-MM-dd")}_"
+        def final_output = "${params.outdir}/${params.project ? "${date}${params.project}" : "${date}${workflow.runName}"}"
+        def ids = samplesheetToList(params.input, "assets/schema_input.json").collect { entry ->
+                [ entry[0].id, entry[0].family ]
+            }
+            .flatten()
+            .findAll { id -> id instanceof String && id.length() > 0 }
+            .unique()
+
+        // Move around the output directory
+        file(params.outdir).eachFileRecurse { file ->
+            if (file.isDirectory()) {
+                return
+            }
+            def file_name = file.name
+            def file_full_name = file.toString()
+            def caller = file_full_name.contains("haplotypecaller") ? "haplotypecaller" :
+                file_full_name.contains("vardict") ? "vardict" : ""
+            def dot_caller = caller ? ".${caller}" : ""
+            def id = ids.find { id_ss -> file_name.contains(id_ss) } ?: ""
+            def custom_suffix = "${params.output_suffix ?: dot_caller}"
+            if (file_full_name.contains("/temp/vcfs/")) {
+                def extension = file_name.endsWith(".tbi") ? "vcf.gz.tbi" : "vcf.gz"
+                file.moveTo("${final_output}/${id}/${id}${custom_suffix}.${extension}")
+            }
+            else if (file_full_name.contains("/temp/validation/")) {
+                def validation_file = file_name.replace("${dot_caller}", "")
+                file.moveTo("${params.outdir}/${id}/validation/${caller}/${validation_file}")
+            }
+            else if (file_full_name.contains("/temp/individuals_reports/")) {
+                def report_extension = file_name.replace(dot_caller, "").replace(id, "")
+                file.moveTo("${params.outdir}/${id}/reports/${id}${custom_suffix}${report_extension}")
+            }
+            else if (file_full_name.contains("/temp/family_reports/")) {
+                def report_extension = file_name.replace(dot_caller, "").replace(id, "")
+                file.moveTo("${final_output}/${id}/reports/${id}${custom_suffix}${report_extension}")
+            }
+            else if (file_full_name.contains("/temp/individuals_beds/")) {
+                file.moveTo("${params.outdir}/${id}/${id}.bed")
+            }
+            else if (file_full_name.contains("/temp/family_beds/")) {
+                file.moveTo("${final_output}/${id}/${id}.bed")
+            }
+            else if (file_full_name.contains("/temp/gvcfs/")) {
+                def extension = file_name.endsWith(".tbi") ? "g.vcf.gz.tbi" : "g.vcf.gz"
+                file.moveTo("${params.outdir}/${id}/${id}.${caller}.${extension}")
+            }
+            else if (file_full_name.contains("/temp/updio/")) {
+                def sample = id
+                id = file_full_name.split("/temp/updio/")[-1].split("/")[0].replace("updio_${caller}_", "")
+                file.moveTo("${final_output}/${id}/updio_${caller}/${sample}/${file_name}")
+            }
+            else if (file_full_name.contains("/temp/automap/")) {
+                def sample = id
+                id = file_full_name.split("/temp/automap/")[-1].split("/")[0].replace("automap_${caller}_", "")
+                file.moveTo("${final_output}/${id}/automap_${caller}/${sample}/${file_name}")
+            }
+            else if (file_full_name.contains("/temp/ped/")) {
+                file.moveTo("${final_output}/${id}/${id}${custom_suffix}.ped")
+            }
+            else if (file_full_name.contains("/temp/db/")) {
+                file.moveTo("${final_output}/${id}/${id}${custom_suffix}.db")
+            }
+        }
+        file("${params.outdir}/temp").deleteDir()
+    }
+
+    publish:
+    NFCMGG_GERMLINE.out.vcf_tbi             >> 'temp/vcfs/'
+    NFCMGG_GERMLINE.out.validation          >> 'temp/validation/'
+    NFCMGG_GERMLINE.out.individual_reports  >> 'temp/individuals_reports/'
+    NFCMGG_GERMLINE.out.family_reports      >> 'temp/family_reports/'
+    NFCMGG_GERMLINE.out.individuals_bed     >> 'temp/individuals_beds/'
+    NFCMGG_GERMLINE.out.family_bed          >> 'temp/family_beds/'
+    NFCMGG_GERMLINE.out.gvcf_tbi            >> 'temp/gvcfs/'
+    NFCMGG_GERMLINE.out.multiqc_report      >> 'multiqc/'
+    NFCMGG_GERMLINE.out.updio               >> 'temp/updio/'
+    NFCMGG_GERMLINE.out.automap             >> 'temp/automap/'
+    NFCMGG_GERMLINE.out.db                  >> 'temp/db/'
+    NFCMGG_GERMLINE.out.ped                 >> 'temp/ped/'
+}
+
+output {
+    directory "${params.outdir}"
+    // TODO: add index once dynamic publish paths have been added to nextflow
+    // index {
+    //     path 'index.csv'
+    // }
+
 }
 
 /*

diff --git a/subworkflows/local/cram_call_genotype_gatk4/main.nf b/subworkflows/local/cram_call_genotype_gatk4/main.nf
@@ -48,6 +48,7 @@ workflow CRAM_CALL_GENOTYPE_GATK4 {
         }
         .mix(CRAM_CALL_GATK4.out.gvcfs)
 
+    ch_joint_beds = Channel.empty()
     if(!only_call) {
 
         GVCF_JOINT_GENOTYPE_GATK4(
@@ -61,7 +62,7 @@ workflow CRAM_CALL_GENOTYPE_GATK4 {
             scatter_count
         )
         ch_versions = ch_versions.mix(GVCF_JOINT_GENOTYPE_GATK4.out.versions)
-
+        ch_joint_beds = GVCF_JOINT_GENOTYPE_GATK4.out.joint_beds
     }
 
     if(!only_call && !only_merge) {
@@ -83,9 +84,11 @@ workflow CRAM_CALL_GENOTYPE_GATK4 {
     }
 
     emit:
-    vcfs = ch_vcfs         // channel: [ val(meta), path(vcf), path(tbi) ]
+    gvcfs = CRAM_CALL_GATK4.out.gvcfs
+    vcfs = ch_vcfs              // channel: [ val(meta), path(vcf), path(tbi) ]
+    joint_beds = ch_joint_beds  // channel: [ val(meta), path(bed) ]
 
-    reports = ch_reports   // channel: [ path(reports) ]
-    versions = ch_versions // channel: [ versions.yml ]
+    reports = ch_reports        // channel: [ path(reports) ]
+    versions = ch_versions      // channel: [ versions.yml ]
 
 }
diff --git a/subworkflows/local/cram_prepare_samtools_bedtools/main.nf b/subworkflows/local/cram_prepare_samtools_bedtools/main.nf
@@ -150,6 +150,7 @@ workflow CRAM_PREPARE_SAMTOOLS_BEDTOOLS {
         ch_fasta
     )
     ch_versions = ch_versions.mix(MOSDEPTH.out.versions.first())
+    ch_reports = ch_reports.mix(MOSDEPTH.out.global_txt, MOSDEPTH.out.summary_txt)
 
     ch_ready_rois
         .join(MOSDEPTH.out.quantized_bed, failOnDuplicate:true, failOnMismatch:true)

diff --git a/subworkflows/local/gvcf_joint_genotype_gatk4/main.nf b/subworkflows/local/gvcf_joint_genotype_gatk4/main.nf
@@ -74,6 +74,7 @@ workflow GVCF_JOINT_GENOTYPE_GATK4 {
     )
     ch_versions = ch_versions.mix(GATK4_GENOMICSDBIMPORT.out.versions.first())
 
+    ch_joint_beds = Channel.empty()
     if(!only_merge) {
 
         BCFTOOLS_QUERY(
@@ -108,13 +109,14 @@ workflow GVCF_JOINT_GENOTYPE_GATK4 {
             ch_fai
         )
         ch_versions = ch_versions.mix(MERGE_BEDS.out.versions.first())
+        ch_joint_beds = MERGE_BEDS.out.bed
 
         //
         // Split BED file into multiple BEDs specified by --scatter_count
         //
 
         INPUT_SPLIT_BEDTOOLS(
-            MERGE_BEDS.out.bed.map { meta, bed ->
+            ch_joint_beds.map { meta, bed ->
                 // Multiply the scatter count by the family size to better scatter big families
                 [meta, bed, (scatter_count * meta.family_count)]
             },
@@ -162,7 +164,8 @@ workflow GVCF_JOINT_GENOTYPE_GATK4 {
     }
 
     emit:
-    vcfs = ch_vcfs         // [ val(meta), path(vcf) ]
-    versions = ch_versions // [ path(versions) ]
+    vcfs = ch_vcfs              // [ val(meta), path(vcf) ]
+    joint_beds = ch_joint_beds  // [ val(meta), path(bed) ]
+    versions = ch_versions      // [ path(versions) ]
 
 }
diff --git a/tests/nextflow.config b/tests/nextflow.config
@@ -58,9 +58,39 @@ params {
     igenomes_ignore = true
     genomes_ignore = true
 
-    validationSchemaIgnoreParams     = 'genomes,igenomes_base,test_data,cram1,cram2,cram3,crai1,crai2,crai3,vcf1,vcf2,vcf3,tbi1,tbi2,tbi3,gvcf1,gvcf2,gvcf3,gtbi1,gtbi2,gtbi3,famvcf,famtbi,ped,bed,split1,split2,split3'
 }
 
+validation.ignoreParams = [
+    'genomes',
+    'igenomes_base',
+    'test_data',
+    'cram1',
+    'cram2',
+    'cram3',
+    'crai1',
+    'crai2',
+    'crai3',
+    'vcf1',
+    'vcf2',
+    'vcf3',
+    'tbi1',
+    'tbi2',
+    'tbi3',
+    'gvcf1',
+    'gvcf2',
+    'gvcf3',
+    'gtbi1',
+    'gtbi2',
+    'gtbi3',
+    'famvcf',
+    'famtbi',
+    'ped',
+    'bed',
+    'split1',
+    'split2',
+    'split3'
+]
+
 process {
     // Limit resources so that this can run on GitHub Actions
     resourceLimits = [

diff --git a/tests/pipeline/callers/main.nf.test.snap b/tests/pipeline/callers/main.nf.test.snap
@@ -23,7 +23,6 @@
                 "NA24385/reports/NA24385.global.dist.txt",
                 "NA24385/reports/NA24385.haplotypecaller.bcftools_stats.txt",
                 "NA24385/reports/NA24385.summary.txt",
-                "multiqc/multiqc_plots",
                 "multiqc/multiqc_report.html",
                 "samplesheet.csv",
                 "test/Ashkenazim/Ashkenazim.bed",
@@ -64,37 +63,36 @@
                 "NA24385/reports/NA24385.global.dist.txt",
                 "NA24385/reports/NA24385.haplotypecaller.bcftools_stats.txt",
                 "NA24385/reports/NA24385.summary.txt",
-                "multiqc/multiqc_plots",
                 "multiqc/multiqc_report.html",
                 "samplesheet.csv",
                 "test/Ashkenazim/Ashkenazim.bed",
                 "test/Ashkenazim/Ashkenazim.haplotypecaller.ped",
                 "test/Ashkenazim/Ashkenazim.haplotypecaller.vcf.gz",
                 "test/Ashkenazim/Ashkenazim.haplotypecaller.vcf.gz.tbi",
-                "test/Ashkenazim/NA24143.vardict.ped",
-                "test/Ashkenazim/NA24143.vardict.vcf.gz",
-                "test/Ashkenazim/NA24143.vardict.vcf.gz.tbi",
-                "test/Ashkenazim/NA24149.vardict.ped",
-                "test/Ashkenazim/NA24149.vardict.vcf.gz",
-                "test/Ashkenazim/NA24149.vardict.vcf.gz.tbi",
-                "test/Ashkenazim/NA24385.vardict.ped",
-                "test/Ashkenazim/NA24385.vardict.vcf.gz",
-                "test/Ashkenazim/NA24385.vardict.vcf.gz.tbi",
                 "test/Ashkenazim/reports/Ashkenazim.haplotypecaller.bcftools_stats.txt",
                 "test/Ashkenazim/reports/Ashkenazim.haplotypecaller.somalier.html",
-                "test/Ashkenazim/reports/NA24143.vardict.bcftools_stats.txt",
-                "test/Ashkenazim/reports/NA24143.vardict.somalier.html",
-                "test/Ashkenazim/reports/NA24149.vardict.bcftools_stats.txt",
-                "test/Ashkenazim/reports/NA24149.vardict.somalier.html",
-                "test/Ashkenazim/reports/NA24385.vardict.bcftools_stats.txt",
-                "test/Ashkenazim/reports/NA24385.vardict.somalier.html"
+                "test/NA24143/NA24143.vardict.ped",
+                "test/NA24143/NA24143.vardict.vcf.gz",
+                "test/NA24143/NA24143.vardict.vcf.gz.tbi",
+                "test/NA24143/reports/NA24143.vardict.bcftools_stats.txt",
+                "test/NA24143/reports/NA24143.vardict.somalier.html",
+                "test/NA24149/NA24149.vardict.ped",
+                "test/NA24149/NA24149.vardict.vcf.gz",
+                "test/NA24149/NA24149.vardict.vcf.gz.tbi",
+                "test/NA24149/reports/NA24149.vardict.bcftools_stats.txt",
+                "test/NA24149/reports/NA24149.vardict.somalier.html",
+                "test/NA24385/NA24385.vardict.ped",
+                "test/NA24385/NA24385.vardict.vcf.gz",
+                "test/NA24385/NA24385.vardict.vcf.gz.tbi",
+                "test/NA24385/reports/NA24385.vardict.bcftools_stats.txt",
+                "test/NA24385/reports/NA24385.vardict.somalier.html"
             ]
         ],
         "meta": {
-            "nf-test": "0.8.4",
-            "nextflow": "24.02.0"
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
         },
-        "timestamp": "2024-03-27T15:41:56.768447097"
+        "timestamp": "2024-08-30T12:15:27.996022227"
     },
     "vardict": {
         "content": [
@@ -111,30 +109,29 @@
                 "NA24385/NA24385.bed",
                 "NA24385/reports/NA24385.global.dist.txt",
                 "NA24385/reports/NA24385.summary.txt",
-                "multiqc/multiqc_plots",
                 "multiqc/multiqc_report.html",
                 "samplesheet.csv",
-                "test/Ashkenazim/NA24143.vardict.ped",
-                "test/Ashkenazim/NA24143.vardict.vcf.gz",
-                "test/Ashkenazim/NA24143.vardict.vcf.gz.tbi",
-                "test/Ashkenazim/NA24149.vardict.ped",
-                "test/Ashkenazim/NA24149.vardict.vcf.gz",
-                "test/Ashkenazim/NA24149.vardict.vcf.gz.tbi",
-                "test/Ashkenazim/NA24385.vardict.ped",
-                "test/Ashkenazim/NA24385.vardict.vcf.gz",
-                "test/Ashkenazim/NA24385.vardict.vcf.gz.tbi",
-                "test/Ashkenazim/reports/NA24143.vardict.bcftools_stats.txt",
-                "test/Ashkenazim/reports/NA24143.vardict.somalier.html",
-                "test/Ashkenazim/reports/NA24149.vardict.bcftools_stats.txt",
-                "test/Ashkenazim/reports/NA24149.vardict.somalier.html",
-                "test/Ashkenazim/reports/NA24385.vardict.bcftools_stats.txt",
-                "test/Ashkenazim/reports/NA24385.vardict.somalier.html"
+                "test/NA24143/NA24143.vardict.ped",
+                "test/NA24143/NA24143.vardict.vcf.gz",
+                "test/NA24143/NA24143.vardict.vcf.gz.tbi",
+                "test/NA24143/reports/NA24143.vardict.bcftools_stats.txt",
+                "test/NA24143/reports/NA24143.vardict.somalier.html",
+                "test/NA24149/NA24149.vardict.ped",
+                "test/NA24149/NA24149.vardict.vcf.gz",
+                "test/NA24149/NA24149.vardict.vcf.gz.tbi",
+                "test/NA24149/reports/NA24149.vardict.bcftools_stats.txt",
+                "test/NA24149/reports/NA24149.vardict.somalier.html",
+                "test/NA24385/NA24385.vardict.ped",
+                "test/NA24385/NA24385.vardict.vcf.gz",
+                "test/NA24385/NA24385.vardict.vcf.gz.tbi",
+                "test/NA24385/reports/NA24385.vardict.bcftools_stats.txt",
+                "test/NA24385/reports/NA24385.vardict.somalier.html"
             ]
         ],
         "meta": {
-            "nf-test": "0.8.4",
-            "nextflow": "24.02.0"
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
         },
-        "timestamp": "2024-03-27T15:39:37.704366119"
+        "timestamp": "2024-08-30T12:07:42.137754836"
     }
 }
diff --git a/tests/pipeline/default/main.nf.test.snap b/tests/pipeline/default/main.nf.test.snap
@@ -23,7 +23,6 @@
                 "NA24385/reports/NA24385.global.dist.txt",
                 "NA24385/reports/NA24385.haplotypecaller.bcftools_stats.txt",
                 "NA24385/reports/NA24385.summary.txt",
-                "multiqc/multiqc_plots",
                 "multiqc/multiqc_report.html",
                 "samplesheet.csv",
                 "test/Ashkenazim/Ashkenazim.bed",

diff --git a/tests/pipeline/gvcfs/main.nf.test.snap b/tests/pipeline/gvcfs/main.nf.test.snap
@@ -5,7 +5,6 @@
 
             ],
             [
-                "multiqc/multiqc_plots",
                 "multiqc/multiqc_report.html",
                 "samplesheet.csv",
                 "test/Ashkenazim/Ashkenazim.bed",