From 566ce0c99017479e05cb6dd43e8b04ca549566c8 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Mon, 5 Aug 2024 10:32:48 +0200 Subject: [PATCH 1/5] edit automap outdir --- modules/local/automap/automap/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/automap/automap/main.nf b/modules/local/automap/automap/main.nf index 807edf97..f3090927 100644 --- a/modules/local/automap/automap/main.nf +++ b/modules/local/automap/automap/main.nf @@ -29,7 +29,7 @@ process AUTOMAP_AUTOMAP { automap \\ --vcf $vcf \\ --genome $hg_genome \\ - --out $prefix \\ + --out $prefix/ \\ --repeats $repeats \\ $panel_file \\ $args From 13c1ea4b2a790d3fffb39142903b2682d3ba6ab3 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Mon, 5 Aug 2024 14:00:53 +0200 Subject: [PATCH 2/5] use sample id for automap individuals --- conf/modules.config | 2 +- subworkflows/local/cram_call_vardictjava/main.nf | 4 ++++ .../local/gvcf_joint_genotype_gatk4/main.nf | 16 ++++++++++++---- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 6ae71597..5607eaff 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -564,7 +564,7 @@ process { withName: "^.*VCF_ROH_AUTOMAP:AUTOMAP_AUTOMAP\$" { ext.prefix = {"automap_${meta.caller}"} ext.args = {[ - meta.family_count > 1 ? "--multivcf" : "", + meta.family_count > 1 ? "--multivcf" : "--id ${meta.samples}", params.automap_panel_name ? "--panelname ${params.automap_panel_name}" : "" ].findAll { it != "" }.join(" ")} publishDir = [ diff --git a/subworkflows/local/cram_call_vardictjava/main.nf b/subworkflows/local/cram_call_vardictjava/main.nf index 5a74f501..c75a4495 100644 --- a/subworkflows/local/cram_call_vardictjava/main.nf +++ b/subworkflows/local/cram_call_vardictjava/main.nf @@ -125,6 +125,10 @@ workflow CRAM_CALL_VARDICTJAVA { ch_filter_output .join(TABIX_TABIX.out.tbi, failOnDuplicate: true, failOnMismatch: true) + .map { meta, vcf, tbi -> + def new_meta = meta + [samples: meta.sample] + [ new_meta, vcf, tbi ] + } .set { ch_vcfs } emit: diff --git a/subworkflows/local/gvcf_joint_genotype_gatk4/main.nf b/subworkflows/local/gvcf_joint_genotype_gatk4/main.nf index 5b8df647..9cc0432c 100644 --- a/subworkflows/local/gvcf_joint_genotype_gatk4/main.nf +++ b/subworkflows/local/gvcf_joint_genotype_gatk4/main.nf @@ -53,9 +53,13 @@ workflow GVCF_JOINT_GENOTYPE_GATK4 { family_count: meta.family_count, caller: meta.caller ] - [ groupKey(new_meta, meta.family_count.toInteger()), gvcf, tbi ] + [ groupKey(new_meta, meta.family_count.toInteger()), gvcf, tbi, meta.sample ] } .groupTuple() + .map { meta, gvcf, tbi, samples -> + def new_meta = meta + [samples: "${samples.join(',')}"] // Having a comma-separated string ensures that joins don't fail + [ new_meta, gvcf, tbi ] + } .combine(GAWK.out.output.map { it[1] }) .map { meta, gvcfs, tbis, bed -> [ meta, gvcfs, tbis, bed, [], [] ] @@ -89,9 +93,13 @@ workflow GVCF_JOINT_GENOTYPE_GATK4 { family_count: meta.family_count, caller: meta.caller ] - [ groupKey(new_meta, meta.family_count.toInteger()), bed ] + [ groupKey(new_meta, meta.family_count.toInteger()), bed, meta.sample ] } .groupTuple() + .map { meta, bed, samples -> + def new_meta = meta + [samples: "${samples.join(',')}"] // Having a comma-separated string ensures that joins don't fail + [ new_meta, bed ] + } .dump(tag:'merge_beds_input', pretty: true) .set { ch_merge_beds_input } @@ -110,11 +118,11 @@ workflow GVCF_JOINT_GENOTYPE_GATK4 { // Multiply the scatter count by the family size to better scatter big families [meta, bed, (params.scatter_count * meta.family_count)] }, - GATK4_GENOMICSDBIMPORT.out.genomicsdb.map { meta, genomicsdb -> [ meta, genomicsdb, [] ]} + GATK4_GENOMICSDBIMPORT.out.genomicsdb.map { meta, genomicsdb -> [ meta, genomicsdb, [] ]}.view() ) ch_versions = ch_versions.mix(INPUT_SPLIT_BEDTOOLS.out.versions) - INPUT_SPLIT_BEDTOOLS.out.split + INPUT_SPLIT_BEDTOOLS.out.split.view() .map { meta, genomicsdb, extra, bed -> [ meta, genomicsdb, [], bed, [] ] } From ed11b257a52af87717d2e6bb86533face227adea Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 6 Aug 2024 10:55:19 +0200 Subject: [PATCH 3/5] remove view statements --- subworkflows/local/gvcf_joint_genotype_gatk4/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/gvcf_joint_genotype_gatk4/main.nf b/subworkflows/local/gvcf_joint_genotype_gatk4/main.nf index 9cc0432c..ee6f01c3 100644 --- a/subworkflows/local/gvcf_joint_genotype_gatk4/main.nf +++ b/subworkflows/local/gvcf_joint_genotype_gatk4/main.nf @@ -118,11 +118,11 @@ workflow GVCF_JOINT_GENOTYPE_GATK4 { // Multiply the scatter count by the family size to better scatter big families [meta, bed, (params.scatter_count * meta.family_count)] }, - GATK4_GENOMICSDBIMPORT.out.genomicsdb.map { meta, genomicsdb -> [ meta, genomicsdb, [] ]}.view() + GATK4_GENOMICSDBIMPORT.out.genomicsdb.map { meta, genomicsdb -> [ meta, genomicsdb, [] ]} ) ch_versions = ch_versions.mix(INPUT_SPLIT_BEDTOOLS.out.versions) - INPUT_SPLIT_BEDTOOLS.out.split.view() + INPUT_SPLIT_BEDTOOLS.out.split .map { meta, genomicsdb, extra, bed -> [ meta, genomicsdb, [], bed, [] ] } From a2224a206cb8b3bd6dc9c039af3614f121eec57e Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 6 Aug 2024 13:08:58 +0200 Subject: [PATCH 4/5] fix tests --- .../local/gvcf_joint_genotype_gatk4/main.nf | 4 +- .../main.nf.test.snap | 45 ++++++++++--------- .../cram_call_vardictjava/main.nf.test.snap | 30 +++++++------ .../local/vcf_roh_automap/main.nf.test | 4 +- .../local/vcf_roh_automap/main.nf.test.snap | 18 ++++---- 5 files changed, 56 insertions(+), 45 deletions(-) diff --git a/subworkflows/local/gvcf_joint_genotype_gatk4/main.nf b/subworkflows/local/gvcf_joint_genotype_gatk4/main.nf index ee6f01c3..d2aa1d96 100644 --- a/subworkflows/local/gvcf_joint_genotype_gatk4/main.nf +++ b/subworkflows/local/gvcf_joint_genotype_gatk4/main.nf @@ -57,7 +57,7 @@ workflow GVCF_JOINT_GENOTYPE_GATK4 { } .groupTuple() .map { meta, gvcf, tbi, samples -> - def new_meta = meta + [samples: "${samples.join(',')}"] // Having a comma-separated string ensures that joins don't fail + def new_meta = meta + [samples: "${samples.sort(false).join(',')}"] // Having a comma-separated string ensures that joins don't fail [ new_meta, gvcf, tbi ] } .combine(GAWK.out.output.map { it[1] }) @@ -97,7 +97,7 @@ workflow GVCF_JOINT_GENOTYPE_GATK4 { } .groupTuple() .map { meta, bed, samples -> - def new_meta = meta + [samples: "${samples.join(',')}"] // Having a comma-separated string ensures that joins don't fail + def new_meta = meta + [samples: "${samples.sort(false).join(',')}"] // Having a comma-separated string ensures that joins don't fail [ new_meta, bed ] } .dump(tag:'merge_beds_input', pretty: true) diff --git a/tests/subworkflows/local/cram_call_genotype_gatk4/main.nf.test.snap b/tests/subworkflows/local/cram_call_genotype_gatk4/main.nf.test.snap index 53983a99..64e23b12 100644 --- a/tests/subworkflows/local/cram_call_genotype_gatk4/main.nf.test.snap +++ b/tests/subworkflows/local/cram_call_genotype_gatk4/main.nf.test.snap @@ -24,7 +24,8 @@ "family": "Ashkenazim", "id": "Ashkenazim", "family_count": 3, - "caller": "haplotypecaller" + "caller": "haplotypecaller", + "samples": "NA24143,NA24149,NA24835" }, "Ashkenazim.haplotypecaller.vcf.gz", "Ashkenazim.haplotypecaller.vcf.gz.tbi" @@ -37,10 +38,10 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.02.0" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-03-27T10:07:15.389498873" + "timestamp": "2024-08-06T13:01:36.711373912" }, "filter - family": { "content": [ @@ -50,7 +51,8 @@ "family": "Ashkenazim", "id": "Ashkenazim", "family_count": 3, - "caller": "haplotypecaller" + "caller": "haplotypecaller", + "samples": "NA24143,NA24149,NA24835" }, "Ashkenazim.haplotypecaller.vcf.gz", "Ashkenazim.haplotypecaller.vcf.gz.tbi" @@ -63,10 +65,10 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.02.0" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-03-27T10:11:00.537979751" + "timestamp": "2024-08-06T13:02:26.651064532" }, "default - gvcfs": { "content": [ @@ -76,7 +78,8 @@ "family": "Ashkenazim", "id": "Ashkenazim", "family_count": 1, - "caller": "haplotypecaller" + "caller": "haplotypecaller", + "samples": "NA24143" }, "Ashkenazim.haplotypecaller.vcf.gz", "Ashkenazim.haplotypecaller.vcf.gz.tbi" @@ -87,10 +90,10 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.02.0" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-03-27T10:09:15.858225835" + "timestamp": "2024-08-06T12:56:23.435795439" }, "default - crams": { "content": [ @@ -100,7 +103,8 @@ "family": "Ashkenazim", "id": "Ashkenazim", "family_count": 1, - "caller": "haplotypecaller" + "caller": "haplotypecaller", + "samples": "NA24143" }, "Ashkenazim.haplotypecaller.vcf.gz", "Ashkenazim.haplotypecaller.vcf.gz.tbi" @@ -113,10 +117,10 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.02.0" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-03-27T10:06:11.00007181" + "timestamp": "2024-08-06T12:55:51.608812907" }, "only_call - family": { "content": [ @@ -143,7 +147,8 @@ "family": "Ashkenazim", "id": "Ashkenazim", "family_count": 2, - "caller": "haplotypecaller" + "caller": "haplotypecaller", + "samples": "NA24143,NA24835" }, "Ashkenazim.haplotypecaller.vcf.gz", "Ashkenazim.haplotypecaller.vcf.gz.tbi" @@ -156,9 +161,9 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.02.0" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-03-27T10:22:47.471505664" + "timestamp": "2024-08-06T13:04:20.473178165" } } \ No newline at end of file diff --git a/tests/subworkflows/local/cram_call_vardictjava/main.nf.test.snap b/tests/subworkflows/local/cram_call_vardictjava/main.nf.test.snap index d080f3f7..8cbbdb2e 100644 --- a/tests/subworkflows/local/cram_call_vardictjava/main.nf.test.snap +++ b/tests/subworkflows/local/cram_call_vardictjava/main.nf.test.snap @@ -8,7 +8,8 @@ "sample": "NA24143", "family": "Ashkenazim", "family_count": 1, - "caller": "vardict" + "caller": "vardict", + "samples": "NA24143" }, "NA24143.vardict.vcf.gz", "NA24143.vardict.vcf.gz.tbi" @@ -16,10 +17,10 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.02.0" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-03-27T10:30:53.553985963" + "timestamp": "2024-08-06T13:05:31.562298546" }, "default": { "content": [ @@ -30,7 +31,8 @@ "sample": "NA24143", "family": "Ashkenazim", "family_count": 1, - "caller": "vardict" + "caller": "vardict", + "samples": "NA24143" }, "NA24143.vardict.vcf.gz", "NA24143.vardict.vcf.gz.tbi" @@ -38,10 +40,10 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.02.0" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-03-27T10:31:27.5084132" + "timestamp": "2024-08-06T13:05:04.322416692" }, "family": { "content": [ @@ -52,7 +54,8 @@ "sample": "NA24143", "family": "Ashkenazim", "family_count": 2, - "caller": "vardict" + "caller": "vardict", + "samples": "NA24143" }, "NA24143.vardict.vcf.gz", "NA24143.vardict.vcf.gz.tbi" @@ -63,7 +66,8 @@ "sample": "NA24149", "family": "Ashkenazim", "family_count": 2, - "caller": "vardict" + "caller": "vardict", + "samples": "NA24149" }, "NA24149.vardict.vcf.gz", "NA24149.vardict.vcf.gz.tbi" @@ -71,9 +75,9 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.02.0" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-03-27T10:33:57.915748864" + "timestamp": "2024-08-06T13:06:05.012625054" } } \ No newline at end of file diff --git a/tests/subworkflows/local/vcf_roh_automap/main.nf.test b/tests/subworkflows/local/vcf_roh_automap/main.nf.test index 9d1efd67..b73cbbfd 100644 --- a/tests/subworkflows/local/vcf_roh_automap/main.nf.test +++ b/tests/subworkflows/local/vcf_roh_automap/main.nf.test @@ -18,7 +18,7 @@ nextflow_workflow { workflow { """ input[0] = Channel.of([ - [id:"Ashkenazim", family:"Ashkenazim", family_count:3, caller:"haplotypecaller"], + [id:"Ashkenazim", family:"Ashkenazim", family_count:3, caller:"haplotypecaller", samples: "NA24143,NA24835,NA24149"], file(params.famvcf, checkIfExists:true), file(params.famtbi, checkIfExists:true) ]) @@ -58,7 +58,7 @@ nextflow_workflow { workflow { """ input[0] = Channel.of([ - [id:"NA24143", family:"NA24143", family_count:1, caller:"haplotypecaller"], + [id:"NA24143", family:"NA24143", family_count:1, caller:"haplotypecaller", samples: "NA24143,NA24835,NA24149"], file(params.vcf1, checkIfExists:true), file(params.tbi1, checkIfExists:true) ]) diff --git a/tests/subworkflows/local/vcf_roh_automap/main.nf.test.snap b/tests/subworkflows/local/vcf_roh_automap/main.nf.test.snap index a0a3f316..60f01c03 100644 --- a/tests/subworkflows/local/vcf_roh_automap/main.nf.test.snap +++ b/tests/subworkflows/local/vcf_roh_automap/main.nf.test.snap @@ -7,7 +7,8 @@ "id": "Ashkenazim", "family": "Ashkenazim", "family_count": 3, - "caller": "haplotypecaller" + "caller": "haplotypecaller", + "samples": "NA24143,NA24835,NA24149" }, [ [ @@ -24,10 +25,10 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.02.0" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-04-04T16:13:43.925198036" + "timestamp": "2024-08-06T13:06:37.322068108" }, "default - sample": { "content": [ @@ -37,7 +38,8 @@ "id": "NA24143", "family": "NA24143", "family_count": 1, - "caller": "haplotypecaller" + "caller": "haplotypecaller", + "samples": "NA24143,NA24835,NA24149" }, [ "NA24143.HomRegions.cmgg_bio.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", @@ -49,9 +51,9 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.02.0" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-04-04T16:03:23.276826691" + "timestamp": "2024-08-06T13:06:47.205904762" } } \ No newline at end of file From 7f36cda921aa049c92697c8b23ad45480044b344 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 6 Aug 2024 13:22:55 +0200 Subject: [PATCH 5/5] update changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8941e98e..45846378 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## v1.7.0dev +## Fixes + +1. Automap analysis should now give the correct output files for individuals. + ## v1.6.0 - Offbeat Ostend - [April 29 2024] ### New features