From 3b50902113f5e6abadb58f1e7c850c6ace51f54e Mon Sep 17 00:00:00 2001 From: daniel Date: Mon, 13 Nov 2023 12:38:33 +0100 Subject: [PATCH 001/104] bump version to 2.8.0dev --- assets/multiqc_config.yml | 4 ++-- nextflow.config | 2 +- tests/pipeline/doubleprimers.nf.test.snap | 2 +- tests/pipeline/fasta.nf.test.snap | 2 +- tests/pipeline/iontorrent.nf.test.snap | 2 +- tests/pipeline/multi.nf.test.snap | 2 +- tests/pipeline/novaseq.nf.test.snap | 2 +- tests/pipeline/pacbio_its.nf.test.snap | 2 +- tests/pipeline/pplace.nf.test.snap | 2 +- tests/pipeline/reftaxcustom.nf.test.snap | 2 +- tests/pipeline/single.nf.test.snap | 2 +- tests/pipeline/sintax.nf.test.snap | 2 +- tests/pipeline/test.nf.test.snap | 2 +- 13 files changed, 14 insertions(+), 14 deletions(-) diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 4fd6b6ea9..64df13dbf 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > - This report has been generated by the nf-core/ampliseq + This report has been generated by the nf-core/ampliseq analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-ampliseq-methods-description": order: -1000 diff --git a/nextflow.config b/nextflow.config index 93e19e86a..5139e0ec1 100644 --- a/nextflow.config +++ b/nextflow.config @@ -329,7 +329,7 @@ manifest { description = """Amplicon sequencing analysis workflow using DADA2 and QIIME2""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '2.7.1' + version = '2.8.0dev' doi = '10.5281/zenodo.1493841' } diff --git a/tests/pipeline/doubleprimers.nf.test.snap b/tests/pipeline/doubleprimers.nf.test.snap index 26ffdc7ad..d7cc9dce8 100644 --- a/tests/pipeline/doubleprimers.nf.test.snap +++ b/tests/pipeline/doubleprimers.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.7.1}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-07-27T13:49:03+0000" }, diff --git a/tests/pipeline/fasta.nf.test.snap b/tests/pipeline/fasta.nf.test.snap index bde435d9a..6350712f6 100644 --- a/tests/pipeline/fasta.nf.test.snap +++ b/tests/pipeline/fasta.nf.test.snap @@ -7,7 +7,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FILTER_CODONS={pandas=1.1.5, python=3.9.1}, FILTER_LEN_ASV={Biostrings=2.58.0, R=4.0.3}, Workflow={nf-core/ampliseq=2.7.1}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FILTER_CODONS={pandas=1.1.5, python=3.9.1}, FILTER_LEN_ASV={Biostrings=2.58.0, R=4.0.3}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-05-28T21:06:17+0000" }, diff --git a/tests/pipeline/iontorrent.nf.test.snap b/tests/pipeline/iontorrent.nf.test.snap index 70bdf6153..420b3dd67 100644 --- a/tests/pipeline/iontorrent.nf.test.snap +++ b/tests/pipeline/iontorrent.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.7.1}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-06-20T01:42:35+0000" }, diff --git a/tests/pipeline/multi.nf.test.snap b/tests/pipeline/multi.nf.test.snap index 10a5f78eb..2c0382f0f 100644 --- a/tests/pipeline/multi.nf.test.snap +++ b/tests/pipeline/multi.nf.test.snap @@ -14,7 +14,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.7.1}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-05-28T21:15:03+0000" }, diff --git a/tests/pipeline/novaseq.nf.test.snap b/tests/pipeline/novaseq.nf.test.snap index 89bf199bf..427cd40b2 100644 --- a/tests/pipeline/novaseq.nf.test.snap +++ b/tests/pipeline/novaseq.nf.test.snap @@ -7,7 +7,7 @@ }, "software_versions": { "content": [ - "{CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_CODONS={pandas=1.1.5, python=3.9.1}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.7.1}}" + "{CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_CODONS={pandas=1.1.5, python=3.9.1}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-06-20T00:10:02+0000" }, diff --git a/tests/pipeline/pacbio_its.nf.test.snap b/tests/pipeline/pacbio_its.nf.test.snap index 0adfdad85..c211e2b61 100644 --- a/tests/pipeline/pacbio_its.nf.test.snap +++ b/tests/pipeline/pacbio_its.nf.test.snap @@ -35,7 +35,7 @@ }, "software_versions": { "content": [ - "{ASSIGNSH={pandas=1.1.5, python=3.9.1}, BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FORMAT_TAXRESULTS_STD={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_USEARCHGLOBAL={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.7.1}}" + "{ASSIGNSH={pandas=1.1.5, python=3.9.1}, BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FORMAT_TAXRESULTS_STD={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_USEARCHGLOBAL={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-06-20T02:07:02+0000" }, diff --git a/tests/pipeline/pplace.nf.test.snap b/tests/pipeline/pplace.nf.test.snap index abd94f98f..4f64efa81 100644 --- a/tests/pipeline/pplace.nf.test.snap +++ b/tests/pipeline/pplace.nf.test.snap @@ -8,7 +8,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, EPANG_PLACE={epang=0.3.8}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, GAPPA_ASSIGN={gappa=0.8.0}, GAPPA_GRAFT={gappa=0.8.0}, GAPPA_HEATTREE={gappa=0.8.0}, HMMER_AFAFORMATQUERY={hmmer/easel=0.48}, HMMER_AFAFORMATREF={hmmer/easel=0.48}, HMMER_HMMALIGNQUERY={hmmer=3.3.2}, HMMER_HMMALIGNREF={hmmer=3.3.2}, HMMER_HMMBUILD={hmmer=3.3.2}, HMMER_MASKQUERY={hmmer/easel=0.48}, HMMER_MASKREF={hmmer/easel=0.48}, HMMER_UNALIGNREF={hmmer/easel=0.48}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.7.1}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, EPANG_PLACE={epang=0.3.8}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, GAPPA_ASSIGN={gappa=0.8.0}, GAPPA_GRAFT={gappa=0.8.0}, GAPPA_HEATTREE={gappa=0.8.0}, HMMER_AFAFORMATQUERY={hmmer/easel=0.48}, HMMER_AFAFORMATREF={hmmer/easel=0.48}, HMMER_HMMALIGNQUERY={hmmer=3.3.2}, HMMER_HMMALIGNREF={hmmer=3.3.2}, HMMER_HMMBUILD={hmmer=3.3.2}, HMMER_MASKQUERY={hmmer/easel=0.48}, HMMER_MASKREF={hmmer/easel=0.48}, HMMER_UNALIGNREF={hmmer/easel=0.48}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-06-20T17:24:03+0000" }, diff --git a/tests/pipeline/reftaxcustom.nf.test.snap b/tests/pipeline/reftaxcustom.nf.test.snap index 875389718..7dca4e3e9 100644 --- a/tests/pipeline/reftaxcustom.nf.test.snap +++ b/tests/pipeline/reftaxcustom.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.7.1}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-05-28T21:18:54+0000" }, diff --git a/tests/pipeline/single.nf.test.snap b/tests/pipeline/single.nf.test.snap index 751d9a832..a31b986cf 100644 --- a/tests/pipeline/single.nf.test.snap +++ b/tests/pipeline/single.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.7.1}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-05-28T20:35:33+0000" }, diff --git a/tests/pipeline/sintax.nf.test.snap b/tests/pipeline/sintax.nf.test.snap index aae3466cd..069c7fa88 100644 --- a/tests/pipeline/sintax.nf.test.snap +++ b/tests/pipeline/sintax.nf.test.snap @@ -16,7 +16,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_SINTAX={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.7.1}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_SINTAX={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-06-20T16:40:18+0000" }, diff --git a/tests/pipeline/test.nf.test.snap b/tests/pipeline/test.nf.test.snap index 8fc51fa05..967f13691 100644 --- a/tests/pipeline/test.nf.test.snap +++ b/tests/pipeline/test.nf.test.snap @@ -22,7 +22,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_CLUSTERS={pandas=1.1.5, python=3.9.1}, FILTER_LEN_ASV={Biostrings=2.58.0, R=4.0.3}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, VSEARCH_CLUSTER={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.7.1}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_CLUSTERS={pandas=1.1.5, python=3.9.1}, FILTER_LEN_ASV={Biostrings=2.58.0, R=4.0.3}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, VSEARCH_CLUSTER={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-05-28T20:55:32+0000" }, From 77bb7ec1b7b80e7a43ab3fd6faee8b37e2cf41ff Mon Sep 17 00:00:00 2001 From: daniel Date: Mon, 13 Nov 2023 12:39:28 +0100 Subject: [PATCH 002/104] update changelog --- CHANGELOG.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index cf0e5ce0e..620c97165 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,18 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## nf-core/ampliseq version 2.8.0dev + +### `Added` + +### `Changed` + +### `Fixed` + +### `Dependencies` + +### `Removed` + ## nf-core/ampliseq version 2.7.1 - 2023-11-14 ### `Added` From 2992fb7c6a103c756e2250e23fb13af08ce16a66 Mon Sep 17 00:00:00 2001 From: daniel Date: Mon, 13 Nov 2023 12:41:07 +0100 Subject: [PATCH 003/104] fix indentation --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 5139e0ec1..8ca827203 100644 --- a/nextflow.config +++ b/nextflow.config @@ -329,7 +329,7 @@ manifest { description = """Amplicon sequencing analysis workflow using DADA2 and QIIME2""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '2.8.0dev' + version = '2.8.0dev' doi = '10.5281/zenodo.1493841' } From d86c5696b08fe2ab0dea8f04219e01c7ada55c55 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 12:04:12 -0500 Subject: [PATCH 004/104] Add params.qiime_ref_tax_custom in preparation of allowing custom qiime database. --- nextflow.config | 1 + nextflow_schema.json | 5 +++++ workflows/ampliseq.nf | 15 +++++++++++---- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/nextflow.config b/nextflow.config index 93e19e86a..1e5a567f0 100644 --- a/nextflow.config +++ b/nextflow.config @@ -108,6 +108,7 @@ params { cut_dada_ref_taxonomy = false sintax_ref_taxonomy = null qiime_ref_taxonomy = null + qiime_ref_tax_custom = null kraken2_ref_taxonomy = null kraken2_assign_taxlevels = null kraken2_ref_tax_custom = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 1d3098da5..69820e8c6 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -432,6 +432,11 @@ "greengenes85" ] }, + "qiime_ref_tax_custom": { + "type": "string", + "help_text": "Is preferred over `--qiime_ref_taxonomy`. Can be compressed tar archive (.tar.gz|.tgz) or folder containing the database.", + "description": "Path to a custom QIIME2 reference taxonomy database (*.tar.gz|*.tgz archive or folder)" + }, "classifier": { "type": "string", "description": "Path to QIIME2 trained classifier file (typically *-classifier.qza)", diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 05ddfee76..eee62b971 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -59,9 +59,16 @@ if (params.dada_ref_tax_custom) { val_dada_ref_taxonomy = "none" } -if (params.qiime_ref_taxonomy && !params.skip_taxonomy && !params.classifier) { +if (params.qiime_ref_tax_custom) { + ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}", checkIfExists: true) + val_qiime_ref_taxonomy = "user" +} else if (params.qiime_ref_taxonomy && !params.skip_taxonomy && !params.classifier) {. ch_qiime_ref_taxonomy = Channel.fromList(params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]).map { file(it) } -} else { ch_qiime_ref_taxonomy = Channel.empty() } + val_qiime_ref_taxonomy = params.qiime_ref_taxonomy.replace('=','_').replace('.','_') +} else { + ch_qiime_ref_taxonomy = Channel.empty() + val_qiime_ref_taxonomy = "none" +} if (params.sintax_ref_taxonomy && !params.skip_taxonomy) { ch_sintax_ref_taxonomy = Channel.fromList(params.sintax_ref_databases[params.sintax_ref_taxonomy]["file"]).map { file(it) } @@ -131,7 +138,7 @@ if ( params.dada_ref_taxonomy && !params.skip_dada_addspecies && !params.skip_da } //only run QIIME2 when taxonomy is actually calculated and all required data is available -if ( !(workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) && !params.skip_taxonomy && !params.skip_qiime && (!params.skip_dada_taxonomy || params.sintax_ref_taxonomy || params.qiime_ref_taxonomy || params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom) ) { +if ( !(workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) && !params.skip_taxonomy && !params.skip_qiime && (!params.skip_dada_taxonomy || params.sintax_ref_taxonomy || params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom) ) { run_qiime2 = true } else { run_qiime2 = false @@ -552,7 +559,7 @@ workflow AMPLISEQ { //QIIME2 if ( run_qiime2 ) { - if (params.qiime_ref_taxonomy && !params.classifier) { + if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.classifier) { QIIME2_PREPTAX ( ch_qiime_ref_taxonomy.collect(), params.FW_primer, From 439097c93751277109c643359abb11f7ab158a14 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 12:05:14 -0500 Subject: [PATCH 005/104] Implementation of logic to handle a custom qiime2 reference database stored in either a directory or a tarball. --- subworkflows/local/qiime2_preptax.nf | 29 ++++++++++++++++++++++++++-- workflows/ampliseq.nf | 1 + 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 7f3cb80b2..429aac713 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -2,6 +2,7 @@ * Training of a classifier with QIIME2 */ +include { UNTAR } from '../../modules/nf-core/untar/main' include { FORMAT_TAXONOMY_QIIME } from '../../modules/local/format_taxonomy_qiime' include { QIIME2_EXTRACT } from '../../modules/local/qiime2_extract' include { QIIME2_TRAIN } from '../../modules/local/qiime2_train' @@ -9,13 +10,37 @@ include { QIIME2_TRAIN } from '../../modules/local/qiime2_train' workflow QIIME2_PREPTAX { take: ch_qiime_ref_taxonomy //channel, list of files + val_qiime_ref_taxonomy //val FW_primer //val RV_primer //val main: - FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy ) + if (params.qiime_ref_tax_custom) { + ch_qiime_ref_taxonomy + .branch { + tar: it.isFile() && ( it.getName().endsWith(".tar.gz") || it.getName().endsWith (".tgz") ) + dir: it.isDirectory() + failed: true + }.set { ch_qiime_ref_taxonomy } + ch_qiime_ref_taxonomy.failed.subscribe { error "$it is neither a directory nor a file that ends in '.tar.gz' or '.tgz'. Please review input." } + + UNTAR ( + ch_qiime_ref_taxonomy.tar + .map { + db -> + def meta = [:] + meta.id = val_qiime_ref_taxonomy + [ meta, db ] } ) + ch_qiime_db_dir = UNTAR.out.untar.map{ it[1] } + ch_qiime_db_dir = ch_qiime_db_dir.mix(ch_qiime_ref_taxonomy.dir) + + ch_ref_database = ch_qiime_db_dir.map{ Channel.fromPath(it + "/*.tax").combine(Channel.fromPath(it + "/*.fna")) } + } else { + FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy ) + + ch_ref_database = FORMAT_TAXONOMY_QIIME.out.fasta.combine(FORMAT_TAXONOMY_QIIME.out.tax) + } - ch_ref_database = FORMAT_TAXONOMY_QIIME.out.fasta.combine(FORMAT_TAXONOMY_QIIME.out.tax) ch_ref_database .map { db -> diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index eee62b971..03d1f6c43 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -562,6 +562,7 @@ workflow AMPLISEQ { if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.classifier) { QIIME2_PREPTAX ( ch_qiime_ref_taxonomy.collect(), + val_qiime_ref_taxonomy, params.FW_primer, params.RV_primer ) From d924110622ce991ef6075520118717b87c816a34 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 12:28:57 -0500 Subject: [PATCH 006/104] Add greengenes2 2022.10 to qiime ref databases. --- bin/taxref_reformat_qiime_greengenes2022.sh | 8 ++++++++ conf/ref_databases.config | 6 ++++++ 2 files changed, 14 insertions(+) create mode 100755 bin/taxref_reformat_qiime_greengenes2022.sh diff --git a/bin/taxref_reformat_qiime_greengenes2022.sh b/bin/taxref_reformat_qiime_greengenes2022.sh new file mode 100755 index 000000000..1d84e7024 --- /dev/null +++ b/bin/taxref_reformat_qiime_greengenes2022.sh @@ -0,0 +1,8 @@ +#!/bin/sh + +# Decompress files. +gzip -d *.gz + +# Select and rename files +mv *.fna greengenes2022.fna +mv *.tsv greengenes2022.tax diff --git a/conf/ref_databases.config b/conf/ref_databases.config index c80820ecc..c7d9f2515 100644 --- a/conf/ref_databases.config +++ b/conf/ref_databases.config @@ -306,6 +306,12 @@ params { citation = "McDonald, D., Price, M., Goodrich, J. et al. An improved Greengenes taxonomy with explicit ranks for ecological and evolutionary analyses of bacteria and archaea. ISME J 6, 610–618 (2012). https://doi.org/10.1038/ismej.2011.139" fmtscript = "taxref_reformat_qiime_greengenes85.sh" } + 'greengenes2022' { + title = "Greengenes2 16S - Version 2022.10" + file = [ "http://ftp.microbio.me/greengenes_release/2022.10/2022.10.seqs.fna.gz", "http://ftp.microbio.me/greengenes_release/2022.10/2022.10.taxonomy.md5.tsv.gz" ] + citation = "McDonald, D., Jiang, Y., Balaban, M. et al. Greengenes2 unifies microbial data in a single reference tree. Nat Biotechnol (2023). https://doi.org/10.1038/s41587-023-01845-1" + fmtscript = "taxref_reformat_qiime_greengenes2022.sh" + } } //Sintax taxonomic reference databases sintax_ref_databases { From 10828417bcbc7ff3fb62a7bef96bbf49eb3c630a Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 12:41:45 -0500 Subject: [PATCH 007/104] Add greengenes2022 to acceptable qiime_ref_taxonomy values. --- nextflow_schema.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 1d3098da5..089731331 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -429,7 +429,8 @@ "unite-alleuk=8.3", "unite-alleuk=8.2", "unite-alleuk", - "greengenes85" + "greengenes85", + "greengenes2022" ] }, "classifier": { From 4612cd11bb16d4f27132bc9b1370dc1c8419e58a Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 12:50:31 -0500 Subject: [PATCH 008/104] gzip does not operate on symbolic links unless forced or, as now, told to emit result to stdout. --- bin/taxref_reformat_qiime_greengenes2022.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bin/taxref_reformat_qiime_greengenes2022.sh b/bin/taxref_reformat_qiime_greengenes2022.sh index 1d84e7024..aa4678a89 100755 --- a/bin/taxref_reformat_qiime_greengenes2022.sh +++ b/bin/taxref_reformat_qiime_greengenes2022.sh @@ -1,7 +1,8 @@ #!/bin/sh # Decompress files. -gzip -d *.gz +gzip -c -d 2022.10.seqs.fna.gz > 2022.10.seqs.fna +gzip -c -d 2022.10.taxonomy.md5.tsv.gz > 2022.10.taxonomy.md5.tsv # Select and rename files mv *.fna greengenes2022.fna From 14c89b9c22faec6544a1ffa97e28afe989e6336f Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 14:53:05 -0500 Subject: [PATCH 009/104] Some params checking logic. --- lib/WorkflowAmpliseq.groovy | 6 +++--- lib/WorkflowMain.groovy | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/WorkflowAmpliseq.groovy b/lib/WorkflowAmpliseq.groovy index 5e1039115..0868866a7 100755 --- a/lib/WorkflowAmpliseq.groovy +++ b/lib/WorkflowAmpliseq.groovy @@ -77,12 +77,12 @@ class WorkflowAmpliseq { } if (params.skip_dada_taxonomy && params.sbdiexport) { - if (!params.sintax_ref_taxonomy && (params.skip_qiime || !params.qiime_ref_taxonomy)) { + if (!params.sintax_ref_taxonomy && (params.skip_qiime || (!params.qiime_ref_taxonomy && !params.qiime_ref_tax_custom))) { Nextflow.error("Incompatible parameters: `--sbdiexport` expects taxa annotation and therefore annotation with either DADA2, SINTAX, or QIIME2 is needed.") } } - if ( (!params.FW_primer || !params.RV_primer) && params.qiime_ref_taxonomy && !params.skip_qiime && !params.skip_taxonomy ) { + if ( (!params.FW_primer || !params.RV_primer) && (params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.skip_qiime && !params.skip_taxonomy ) { Nextflow.error("Incompatible parameters: `--FW_primer` and `--RV_primer` are required for cutting the QIIME2 reference database to the amplicon sequences. Please specify primers or do not use `--qiime_ref_taxonomy`.") } @@ -90,7 +90,7 @@ class WorkflowAmpliseq { Nextflow.error("Incompatible parameters: `--FW_primer` and `--RV_primer` are required for cutting the DADA2 reference database to the amplicon sequences. Please specify primers or do not use `--cut_dada_ref_taxonomy`.") } - if (params.qiime_ref_taxonomy && params.classifier) { + if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && params.classifier) { Nextflow.error("Incompatible parameters: `--qiime_ref_taxonomy` will produce a classifier but `--classifier` points to a precomputed classifier, therefore, only use one of those.") } diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index 7f49735e4..4b7ec2afc 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -34,7 +34,7 @@ class WorkflowMain { if (params.sintax_ref_taxonomy && !params.skip_taxonomy) { sintaxreftaxonomyExistsError(params, log) } - if (params.qiime_ref_taxonomy && !params.skip_taxonomy && !params.classifier) { + if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.skip_taxonomy && !params.classifier) { qiimereftaxonomyExistsError(params, log) } From d214ec0252321261d56b315d0b34797432011442 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 14:55:09 -0500 Subject: [PATCH 010/104] Loose . lying around. --- workflows/ampliseq.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 03d1f6c43..3d4ad07d9 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -62,7 +62,7 @@ if (params.dada_ref_tax_custom) { if (params.qiime_ref_tax_custom) { ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}", checkIfExists: true) val_qiime_ref_taxonomy = "user" -} else if (params.qiime_ref_taxonomy && !params.skip_taxonomy && !params.classifier) {. +} else if (params.qiime_ref_taxonomy && !params.skip_taxonomy && !params.classifier) { ch_qiime_ref_taxonomy = Channel.fromList(params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]).map { file(it) } val_qiime_ref_taxonomy = params.qiime_ref_taxonomy.replace('=','_').replace('.','_') } else { From 9346d7aea37c365e31ad37ae8ffa08a51a2ab8a4 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 14:57:54 -0500 Subject: [PATCH 011/104] Only perform collect if going to FORMAT_TAXONOMY_QIIME. --- subworkflows/local/qiime2_preptax.nf | 2 +- workflows/ampliseq.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 429aac713..0d66308a1 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -36,7 +36,7 @@ workflow QIIME2_PREPTAX { ch_ref_database = ch_qiime_db_dir.map{ Channel.fromPath(it + "/*.tax").combine(Channel.fromPath(it + "/*.fna")) } } else { - FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy ) + FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy.collect() ) ch_ref_database = FORMAT_TAXONOMY_QIIME.out.fasta.combine(FORMAT_TAXONOMY_QIIME.out.tax) } diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 3d4ad07d9..b45fd9a57 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -561,7 +561,7 @@ workflow AMPLISEQ { if ( run_qiime2 ) { if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.classifier) { QIIME2_PREPTAX ( - ch_qiime_ref_taxonomy.collect(), + ch_qiime_ref_taxonomy, val_qiime_ref_taxonomy, params.FW_primer, params.RV_primer From ef053b1369e5f75aad794772e387257b06e99d35 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 15:13:23 -0500 Subject: [PATCH 012/104] =?UTF-8?q?Set=20into=20new=20channel=20when=20bra?= =?UTF-8?q?nching=20on=20ch=5Fqiime=5Fref=5Ftaxonomy.=C2=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- subworkflows/local/qiime2_preptax.nf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 0d66308a1..38a9faf89 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -21,18 +21,18 @@ workflow QIIME2_PREPTAX { tar: it.isFile() && ( it.getName().endsWith(".tar.gz") || it.getName().endsWith (".tgz") ) dir: it.isDirectory() failed: true - }.set { ch_qiime_ref_taxonomy } - ch_qiime_ref_taxonomy.failed.subscribe { error "$it is neither a directory nor a file that ends in '.tar.gz' or '.tgz'. Please review input." } + }.set { ch_qiime_ref_tax_branched } + ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a directory nor a file that ends in '.tar.gz' or '.tgz'. Please review input." } UNTAR ( - ch_qiime_ref_taxonomy.tar + ch_qiime_ref_tax_branched.tar .map { db -> def meta = [:] meta.id = val_qiime_ref_taxonomy [ meta, db ] } ) ch_qiime_db_dir = UNTAR.out.untar.map{ it[1] } - ch_qiime_db_dir = ch_qiime_db_dir.mix(ch_qiime_ref_taxonomy.dir) + ch_qiime_db_dir = ch_qiime_db_dir.mix(ch_qiime_ref_tax_branched.dir) ch_ref_database = ch_qiime_db_dir.map{ Channel.fromPath(it + "/*.tax").combine(Channel.fromPath(it + "/*.fna")) } } else { From a48a09fe3f7398ce15fc8b0b1bc449b92c880ae8 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 15:48:14 -0500 Subject: [PATCH 013/104] Try to unpack the database dir into component files using a module. --- modules/local/qiime2_unpack.nf | 31 ++++++++++++++++++++++++++++ subworkflows/local/qiime2_preptax.nf | 5 ++++- 2 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 modules/local/qiime2_unpack.nf diff --git a/modules/local/qiime2_unpack.nf b/modules/local/qiime2_unpack.nf new file mode 100644 index 000000000..e77286da7 --- /dev/null +++ b/modules/local/qiime2_unpack.nf @@ -0,0 +1,31 @@ +process QIIME2_UNPACK { + label 'process_low' + + conda "conda-forge::sed=4.7" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' : + 'docker.io/biocontainers/biocontainers:v1.2.0_cv1' }" + + input: + path(database) + + output: + path("*.fna"), emit: fasta + path("*.tax"), emit: tax + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + // TODO: need to not have this be a copy. + script: + """ + cp $database/*.fna . + cp $database/*.tax . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bash: \$(bash --version | sed -n 1p | sed 's/GNU bash, version //g') + END_VERSIONS + """ +} diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 38a9faf89..3ad8365f2 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -6,6 +6,7 @@ include { UNTAR } from '../../modules/nf-core/untar/main' include { FORMAT_TAXONOMY_QIIME } from '../../modules/local/format_taxonomy_qiime' include { QIIME2_EXTRACT } from '../../modules/local/qiime2_extract' include { QIIME2_TRAIN } from '../../modules/local/qiime2_train' +include { QIIME2_UNPACK } from '../../modules/local/qiime2_unpack' workflow QIIME2_PREPTAX { take: @@ -34,7 +35,9 @@ workflow QIIME2_PREPTAX { ch_qiime_db_dir = UNTAR.out.untar.map{ it[1] } ch_qiime_db_dir = ch_qiime_db_dir.mix(ch_qiime_ref_tax_branched.dir) - ch_ref_database = ch_qiime_db_dir.map{ Channel.fromPath(it + "/*.tax").combine(Channel.fromPath(it + "/*.fna")) } + QIIME2_UNPACK(ch_qiime_db_dir) + + ch_ref_database = ch_qiime_db_dir.map{ QIIME2_UNPACK.out.fasta.combine(QIIME2_UNPACK.out.tax) } } else { FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy.collect() ) From a9971b68ede12d0431d248362d1c279b7ac0cc07 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 15:59:03 -0500 Subject: [PATCH 014/104] Remove map wrapping the combine. --- subworkflows/local/qiime2_preptax.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 3ad8365f2..8229190ac 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -37,7 +37,7 @@ workflow QIIME2_PREPTAX { QIIME2_UNPACK(ch_qiime_db_dir) - ch_ref_database = ch_qiime_db_dir.map{ QIIME2_UNPACK.out.fasta.combine(QIIME2_UNPACK.out.tax) } + ch_ref_database = QIIME2_UNPACK.out.fasta.combine(QIIME2_UNPACK.out.tax) } else { FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy.collect() ) From aac51bd4b162e17ed0f9ca5864527651abf4e23d Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 16:14:05 -0500 Subject: [PATCH 015/104] Remove unpack in favour of map and filter. --- modules/local/qiime2_unpack.nf | 31 ---------------------------- subworkflows/local/qiime2_preptax.nf | 16 +++++++++++--- 2 files changed, 13 insertions(+), 34 deletions(-) delete mode 100644 modules/local/qiime2_unpack.nf diff --git a/modules/local/qiime2_unpack.nf b/modules/local/qiime2_unpack.nf deleted file mode 100644 index e77286da7..000000000 --- a/modules/local/qiime2_unpack.nf +++ /dev/null @@ -1,31 +0,0 @@ -process QIIME2_UNPACK { - label 'process_low' - - conda "conda-forge::sed=4.7" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' : - 'docker.io/biocontainers/biocontainers:v1.2.0_cv1' }" - - input: - path(database) - - output: - path("*.fna"), emit: fasta - path("*.tax"), emit: tax - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - // TODO: need to not have this be a copy. - script: - """ - cp $database/*.fna . - cp $database/*.tax . - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bash: \$(bash --version | sed -n 1p | sed 's/GNU bash, version //g') - END_VERSIONS - """ -} diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 8229190ac..2969d3709 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -6,7 +6,6 @@ include { UNTAR } from '../../modules/nf-core/untar/main' include { FORMAT_TAXONOMY_QIIME } from '../../modules/local/format_taxonomy_qiime' include { QIIME2_EXTRACT } from '../../modules/local/qiime2_extract' include { QIIME2_TRAIN } from '../../modules/local/qiime2_train' -include { QIIME2_UNPACK } from '../../modules/local/qiime2_unpack' workflow QIIME2_PREPTAX { take: @@ -35,9 +34,20 @@ workflow QIIME2_PREPTAX { ch_qiime_db_dir = UNTAR.out.untar.map{ it[1] } ch_qiime_db_dir = ch_qiime_db_dir.mix(ch_qiime_ref_tax_branched.dir) - QIIME2_UNPACK(ch_qiime_db_dir) + ch_ref_database_fna = ch_qiime_db_dir.map{ dir -> + files = file(dir.resolve("*.fna"), checkIfExists: true) + } | filter { + if (it instanceof List) log.warn "Found multiple fasta files for QIIME2 reference database." + ! it instanceof List + } + ch_ref_database_tax = ch_qiime_db_dir.map{ dir -> + files = file(dir.resolve("*.tax"), checkIfExists: true) + } | filter { + if (it instanceof List) log.warn "Found multiple fasta files for QIIME2 reference database." + ! it instanceof List + } - ch_ref_database = QIIME2_UNPACK.out.fasta.combine(QIIME2_UNPACK.out.tax) + ch_ref_database = ch_ref_database_fna.combine(ch_ref_database_tax) } else { FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy.collect() ) From 1b2825ec952b9d2d4d8edb8cbd348fd843d42223 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 16:17:38 -0500 Subject: [PATCH 016/104] Glob results in list in all circumstances, check length instead. --- subworkflows/local/qiime2_preptax.nf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 2969d3709..f040f2077 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -37,14 +37,14 @@ workflow QIIME2_PREPTAX { ch_ref_database_fna = ch_qiime_db_dir.map{ dir -> files = file(dir.resolve("*.fna"), checkIfExists: true) } | filter { - if (it instanceof List) log.warn "Found multiple fasta files for QIIME2 reference database." - ! it instanceof List + if (it.size() > 1) log.warn "Found multiple fasta files for QIIME2 reference database." + it.size() == 1 } ch_ref_database_tax = ch_qiime_db_dir.map{ dir -> files = file(dir.resolve("*.tax"), checkIfExists: true) } | filter { - if (it instanceof List) log.warn "Found multiple fasta files for QIIME2 reference database." - ! it instanceof List + if (it.size() > 1) log.warn "Found multiple tax files for QIIME2 reference database." + it.size() == 1 } ch_ref_database = ch_ref_database_fna.combine(ch_ref_database_tax) From c230c2261a7a55e6070c649b905d9cbd2a7d8982 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 16:42:02 -0500 Subject: [PATCH 017/104] Update usage.md. --- docs/usage.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/usage.md b/docs/usage.md index 38c2cc23f..721523fcf 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -225,6 +225,7 @@ Pre-configured reference taxonomy databases are: | sbdi-gtdb | + | - | - | - | 16S rRNA | | rdp | + | - | + | - | 16S rRNA | | greengenes | - | - | + | (+)¹ | 16S rRNA | +| greengenes2 | - | - | - | + | 16S rRNA | | pr2 | + | - | - | - | 18S rRNA | | unite-fungi | + | + | - | + | eukaryotic nuclear ribosomal ITS region | | unite-alleuk | + | + | - | + | eukaryotic nuclear ribosomal ITS region | From 1dd0d8ed5376cd76b6e440ffa3bec9a1da7bc9c6 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 16:49:45 -0500 Subject: [PATCH 018/104] Update CHANGELOG.md. --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 620c97165..c44c74f3e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +- []() - Added Greengenes2 database, version 2022.10, support for QIIME2 taxonomic classification. + ### `Changed` ### `Fixed` From 850a603a445dcec32739439b9049ac38f5446e2f Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 16:53:03 -0500 Subject: [PATCH 019/104] Add pull request link to changelog. --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c44c74f3e..10456d802 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` -- []() - Added Greengenes2 database, version 2022.10, support for QIIME2 taxonomic classification. +- [#666](https://github.com/nf-core/ampliseq/pull/666) - Added Greengenes2 database, version 2022.10, support for QIIME2 taxonomic classification. ### `Changed` From a4219a0baa0e4b256ee97c5c1a54a566fb801f07 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 17:11:59 -0500 Subject: [PATCH 020/104] Update CHANGELOG.md. --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 620c97165..b884a4087 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +[]() - Added `--qiime_ref_tax_custom` to permit custom reference database for QIIME2 taxonomic classification. + ### `Changed` ### `Fixed` From 0ccf6e6daf7b46be84a81fd9498d2e55fc5db795 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Wed, 29 Nov 2023 10:12:47 -0500 Subject: [PATCH 021/104] Update error message when passing both one of --qiime_ref_taxonomy or --qiime_ref_tax_custom and --classifier. --- lib/WorkflowAmpliseq.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/WorkflowAmpliseq.groovy b/lib/WorkflowAmpliseq.groovy index 0868866a7..25db3ed68 100755 --- a/lib/WorkflowAmpliseq.groovy +++ b/lib/WorkflowAmpliseq.groovy @@ -91,7 +91,7 @@ class WorkflowAmpliseq { } if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && params.classifier) { - Nextflow.error("Incompatible parameters: `--qiime_ref_taxonomy` will produce a classifier but `--classifier` points to a precomputed classifier, therefore, only use one of those.") + Nextflow.error("Incompatible parameters: `--qiime_ref_taxonomy` and `--qiime_ref_tax_custom` will produce a classifier but `--classifier` points to a precomputed classifier, therefore, only use one of those.") } if (params.kraken2_ref_tax_custom && !params.kraken2_assign_taxlevels ) { From 590f415952b249d83ae5fcd7d128b109310d0983 Mon Sep 17 00:00:00 2001 From: Matthew Date: Wed, 29 Nov 2023 11:13:35 -0500 Subject: [PATCH 022/104] Update CHANGELOG.md with pull request number. Co-authored-by: Daniel Straub <42973691+d4straub@users.noreply.github.com> --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b884a4087..6c2030cc4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` -[]() - Added `--qiime_ref_tax_custom` to permit custom reference database for QIIME2 taxonomic classification. +[#667](https://github.com/nf-core/ampliseq/pull/667) - Added `--qiime_ref_tax_custom` to permit custom reference database for QIIME2 taxonomic classification. ### `Changed` From f5d80f572e008693d3f83dd5a8f169784da546c7 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Wed, 29 Nov 2023 11:58:33 -0500 Subject: [PATCH 023/104] Add support for specifying two (possibly gzipped) files as --qiime_ref_tax_custom. --- modules/local/gzip_decompress.nf | 32 ++++++++++++ nextflow_schema.json | 2 +- subworkflows/local/qiime2_preptax.nf | 75 ++++++++++++++++++---------- workflows/ampliseq.nf | 6 ++- 4 files changed, 86 insertions(+), 29 deletions(-) create mode 100644 modules/local/gzip_decompress.nf diff --git a/modules/local/gzip_decompress.nf b/modules/local/gzip_decompress.nf new file mode 100644 index 000000000..fa8fa82cf --- /dev/null +++ b/modules/local/gzip_decompress.nf @@ -0,0 +1,32 @@ +process GZIP_DECOMPRESS { + tag "$file" + label 'process_single' + + conda "conda-forge::sed=4.7 conda-forge::gzip=1.13" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + path(file) + + output: + path("$outfile"), emit: ungzip + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + outfile = task.ext.outfile ?: ( meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.gz$/, "")) + + """ + gzip $args -c -d $file > $outfile + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gzip: \$(echo \$(gzip --version 2>&1) | sed 's/gzip //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/nextflow_schema.json b/nextflow_schema.json index 69820e8c6..c6c6f8b09 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -434,7 +434,7 @@ }, "qiime_ref_tax_custom": { "type": "string", - "help_text": "Is preferred over `--qiime_ref_taxonomy`. Can be compressed tar archive (.tar.gz|.tgz) or folder containing the database.", + "help_text": "Is preferred over `--qiime_ref_taxonomy`. Can be compressed tar archive (.tar.gz|.tgz), a pair of (possibly gzipped) filepaths, or folder containing the database.", "description": "Path to a custom QIIME2 reference taxonomy database (*.tar.gz|*.tgz archive or folder)" }, "classifier": { diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index f040f2077..7cc4817d0 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -3,6 +3,7 @@ */ include { UNTAR } from '../../modules/nf-core/untar/main' +include { GZIP_DECOMPRESS } from '../../modules/local/gzip_decompress.nf' include { FORMAT_TAXONOMY_QIIME } from '../../modules/local/format_taxonomy_qiime' include { QIIME2_EXTRACT } from '../../modules/local/qiime2_extract' include { QIIME2_TRAIN } from '../../modules/local/qiime2_train' @@ -15,36 +16,56 @@ workflow QIIME2_PREPTAX { RV_primer //val main: + ch_qiime2_preptax_versions = Channel.empty() + if (params.qiime_ref_tax_custom) { - ch_qiime_ref_taxonomy - .branch { - tar: it.isFile() && ( it.getName().endsWith(".tar.gz") || it.getName().endsWith (".tgz") ) - dir: it.isDirectory() - failed: true - }.set { ch_qiime_ref_tax_branched } - ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a directory nor a file that ends in '.tar.gz' or '.tgz'. Please review input." } + if (ch_qiime_ref_taxonomy.size() == 2) { + ch_qiime_ref_taxonomy + .branch { + gzip: it.isFile() && ( it.getName().endsWith(".gz") ) + decompressed: it.isFile() && ( it.getName().endsWith(".fna") || it.getName().endsWith (".tax") ) + failed: true + }.set { ch_qiime_ref_tax_branched } + ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a compressed or decompressed sequence or taxonomy file. Please review input." } - UNTAR ( - ch_qiime_ref_tax_branched.tar - .map { - db -> - def meta = [:] - meta.id = val_qiime_ref_taxonomy - [ meta, db ] } ) - ch_qiime_db_dir = UNTAR.out.untar.map{ it[1] } - ch_qiime_db_dir = ch_qiime_db_dir.mix(ch_qiime_ref_tax_branched.dir) + GZIP_DECOMPRESS(ch_qiime_ref_tax_branched.gzip) + ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(GZIP_DECOMPRESS.out.versions) - ch_ref_database_fna = ch_qiime_db_dir.map{ dir -> - files = file(dir.resolve("*.fna"), checkIfExists: true) - } | filter { - if (it.size() > 1) log.warn "Found multiple fasta files for QIIME2 reference database." - it.size() == 1 - } - ch_ref_database_tax = ch_qiime_db_dir.map{ dir -> - files = file(dir.resolve("*.tax"), checkIfExists: true) - } | filter { - if (it.size() > 1) log.warn "Found multiple tax files for QIIME2 reference database." - it.size() == 1 + ch_qiime_db_files = GZIP_DECOMPRESS.out.ungzip + ch_qiime_db_files = ch_qiime_db_files.mix(ch_qiime_ref_tax_branched.decompressed) + + ch_ref_database = ch_qiime_db_files.collate(2) + } else { + ch_qiime_ref_taxonomy + .branch { + tar: it.isFile() && ( it.getName().endsWith(".tar.gz") || it.getName().endsWith (".tgz") ) + dir: it.isDirectory() + failed: true + }.set { ch_qiime_ref_tax_branched } + ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a directory nor a file that ends in '.tar.gz' or '.tgz'. Please review input." } + + UNTAR ( + ch_qiime_ref_tax_branched.tar + .map { + db -> + def meta = [:] + meta.id = val_qiime_ref_taxonomy + [ meta, db ] } ) + ch_qiime_db_dir = UNTAR.out.untar.map{ it[1] } + ch_qiime_db_dir = ch_qiime_db_dir.mix(ch_qiime_ref_tax_branched.dir) + + ch_ref_database_fna = ch_qiime_db_dir.map{ dir -> + files = file(dir.resolve("*.fna"), checkIfExists: true) + } | filter { + if (it.size() > 1) log.warn "Found multiple fasta files for QIIME2 reference database." + it.size() == 1 + } + ch_ref_database_tax = ch_qiime_db_dir.map{ dir -> + files = file(dir.resolve("*.tax"), checkIfExists: true) + } | filter { + if (it.size() > 1) log.warn "Found multiple tax files for QIIME2 reference database." + it.size() == 1 + } } ch_ref_database = ch_ref_database_fna.combine(ch_ref_database_tax) diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index b45fd9a57..9bd1cf5c4 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -60,7 +60,11 @@ if (params.dada_ref_tax_custom) { } if (params.qiime_ref_tax_custom) { - ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}", checkIfExists: true) + if ("${params.qiime_ref_tax_custom}".contains(",")) { + ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}".split(","), checkIfExists: true) + } else { + ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}", checkIfExists: true) + } val_qiime_ref_taxonomy = "user" } else if (params.qiime_ref_taxonomy && !params.skip_taxonomy && !params.classifier) { ch_qiime_ref_taxonomy = Channel.fromList(params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]).map { file(it) } From 7016682fd8375525b2e23e3fdebaa01cbdd8f082 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Wed, 29 Nov 2023 14:07:43 -0500 Subject: [PATCH 024/104] Only support providing two files separated by a comma. --- nextflow_schema.json | 4 +- subworkflows/local/qiime2_preptax.nf | 59 ++++++---------------------- workflows/ampliseq.nf | 10 ++--- 3 files changed, 20 insertions(+), 53 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index c6c6f8b09..79a4cebb0 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -434,8 +434,8 @@ }, "qiime_ref_tax_custom": { "type": "string", - "help_text": "Is preferred over `--qiime_ref_taxonomy`. Can be compressed tar archive (.tar.gz|.tgz), a pair of (possibly gzipped) filepaths, or folder containing the database.", - "description": "Path to a custom QIIME2 reference taxonomy database (*.tar.gz|*.tgz archive or folder)" + "help_text": "Is preferred over `--qiime_ref_taxonomy`. A comma separated pair of (possibly gzipped) filepaths.", + "description": "Path to files of a custom QIIME2 reference taxonomy database (files may be gzipped)" }, "classifier": { "type": "string", diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 7cc4817d0..a4e1d7768 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -19,56 +19,23 @@ workflow QIIME2_PREPTAX { ch_qiime2_preptax_versions = Channel.empty() if (params.qiime_ref_tax_custom) { - if (ch_qiime_ref_taxonomy.size() == 2) { - ch_qiime_ref_taxonomy - .branch { - gzip: it.isFile() && ( it.getName().endsWith(".gz") ) - decompressed: it.isFile() && ( it.getName().endsWith(".fna") || it.getName().endsWith (".tax") ) - failed: true - }.set { ch_qiime_ref_tax_branched } - ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a compressed or decompressed sequence or taxonomy file. Please review input." } + ch_qiime_ref_taxonomy.view() - GZIP_DECOMPRESS(ch_qiime_ref_tax_branched.gzip) - ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(GZIP_DECOMPRESS.out.versions) + // ch_qiime_ref_taxonomy + // .branch { + // gzip: it.isFile() && ( it.getName().endsWith(".gz") ) + // decompressed: it.isFile() && ( it.getName().endsWith(".fna") || it.getName().endsWith (".tax") ) + // failed: true + // }.set { ch_qiime_ref_tax_branched } + // ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a compressed or decompressed sequence or taxonomy file. Please review input." } - ch_qiime_db_files = GZIP_DECOMPRESS.out.ungzip - ch_qiime_db_files = ch_qiime_db_files.mix(ch_qiime_ref_tax_branched.decompressed) + // GZIP_DECOMPRESS(ch_qiime_ref_tax_branched.gzip) + // ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(GZIP_DECOMPRESS.out.versions) - ch_ref_database = ch_qiime_db_files.collate(2) - } else { - ch_qiime_ref_taxonomy - .branch { - tar: it.isFile() && ( it.getName().endsWith(".tar.gz") || it.getName().endsWith (".tgz") ) - dir: it.isDirectory() - failed: true - }.set { ch_qiime_ref_tax_branched } - ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a directory nor a file that ends in '.tar.gz' or '.tgz'. Please review input." } + // ch_qiime_db_files = GZIP_DECOMPRESS.out.ungzip + // ch_qiime_db_files = ch_qiime_db_files.mix(ch_qiime_ref_tax_branched.decompressed) - UNTAR ( - ch_qiime_ref_tax_branched.tar - .map { - db -> - def meta = [:] - meta.id = val_qiime_ref_taxonomy - [ meta, db ] } ) - ch_qiime_db_dir = UNTAR.out.untar.map{ it[1] } - ch_qiime_db_dir = ch_qiime_db_dir.mix(ch_qiime_ref_tax_branched.dir) - - ch_ref_database_fna = ch_qiime_db_dir.map{ dir -> - files = file(dir.resolve("*.fna"), checkIfExists: true) - } | filter { - if (it.size() > 1) log.warn "Found multiple fasta files for QIIME2 reference database." - it.size() == 1 - } - ch_ref_database_tax = ch_qiime_db_dir.map{ dir -> - files = file(dir.resolve("*.tax"), checkIfExists: true) - } | filter { - if (it.size() > 1) log.warn "Found multiple tax files for QIIME2 reference database." - it.size() == 1 - } - } - - ch_ref_database = ch_ref_database_fna.combine(ch_ref_database_tax) + // ch_ref_database = ch_qiime_db_files.collate(2) } else { FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy.collect() ) diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 9bd1cf5c4..07df8960a 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -60,11 +60,11 @@ if (params.dada_ref_tax_custom) { } if (params.qiime_ref_tax_custom) { - if ("${params.qiime_ref_tax_custom}".contains(",")) { - ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}".split(","), checkIfExists: true) - } else { - ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}", checkIfExists: true) + if (!"${params.qiime_ref_tax_custom}".contains(",")) { + error "--qiime_ref_tax_custom takes two filepaths separated by a comma. Please review input." } + + ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}".split(","), checkIfExists: true) val_qiime_ref_taxonomy = "user" } else if (params.qiime_ref_taxonomy && !params.skip_taxonomy && !params.classifier) { ch_qiime_ref_taxonomy = Channel.fromList(params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]).map { file(it) } @@ -565,7 +565,7 @@ workflow AMPLISEQ { if ( run_qiime2 ) { if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.classifier) { QIIME2_PREPTAX ( - ch_qiime_ref_taxonomy, + ch_qiime_ref_taxonomy.collect(), val_qiime_ref_taxonomy, params.FW_primer, params.RV_primer From 79cbfe8fb0bf50035529deb5fe24d18693784b75 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Wed, 29 Nov 2023 14:41:49 -0500 Subject: [PATCH 025/104] Fix split returns a String[] and we actually need an ArrayList. --- subworkflows/local/qiime2_preptax.nf | 2 +- workflows/ampliseq.nf | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 7cc4817d0..f36dda424 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -19,7 +19,7 @@ workflow QIIME2_PREPTAX { ch_qiime2_preptax_versions = Channel.empty() if (params.qiime_ref_tax_custom) { - if (ch_qiime_ref_taxonomy.size() == 2) { + if ("${params.qiime_ref_tax_custom}".contains(",")) { ch_qiime_ref_taxonomy .branch { gzip: it.isFile() && ( it.getName().endsWith(".gz") ) diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 9bd1cf5c4..3a3323269 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -61,7 +61,7 @@ if (params.dada_ref_tax_custom) { if (params.qiime_ref_tax_custom) { if ("${params.qiime_ref_tax_custom}".contains(",")) { - ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}".split(","), checkIfExists: true) + ch_qiime_ref_taxonomy = Channel.fromPath(Arrays.asList("${params.qiime_ref_tax_custom}".split(",")), checkIfExists: true) } else { ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}", checkIfExists: true) } @@ -565,7 +565,7 @@ workflow AMPLISEQ { if ( run_qiime2 ) { if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.classifier) { QIIME2_PREPTAX ( - ch_qiime_ref_taxonomy, + ch_qiime_ref_taxonomy.collect(), val_qiime_ref_taxonomy, params.FW_primer, params.RV_primer From 6d767bc1ea80aa464d80b4aae48944759a44f3a6 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Wed, 29 Nov 2023 14:46:30 -0500 Subject: [PATCH 026/104] Move ch_ref_database set into correct scope. --- subworkflows/local/qiime2_preptax.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index f36dda424..19a9bc4b3 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -66,9 +66,9 @@ workflow QIIME2_PREPTAX { if (it.size() > 1) log.warn "Found multiple tax files for QIIME2 reference database." it.size() == 1 } - } - ch_ref_database = ch_ref_database_fna.combine(ch_ref_database_tax) + ch_ref_database = ch_ref_database_fna.combine(ch_ref_database_tax) + } } else { FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy.collect() ) From f76b49bbcd56315189603e0e292dba2e108f68e5 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Wed, 29 Nov 2023 14:53:35 -0500 Subject: [PATCH 027/104] Try using map to work through list of files. --- subworkflows/local/qiime2_preptax.nf | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 19a9bc4b3..96a0db961 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -21,18 +21,19 @@ workflow QIIME2_PREPTAX { if (params.qiime_ref_tax_custom) { if ("${params.qiime_ref_tax_custom}".contains(",")) { ch_qiime_ref_taxonomy - .branch { - gzip: it.isFile() && ( it.getName().endsWith(".gz") ) - decompressed: it.isFile() && ( it.getName().endsWith(".fna") || it.getName().endsWith (".tax") ) - failed: true - }.set { ch_qiime_ref_tax_branched } - ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a compressed or decompressed sequence or taxonomy file. Please review input." } - - GZIP_DECOMPRESS(ch_qiime_ref_tax_branched.gzip) - ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(GZIP_DECOMPRESS.out.versions) + .map { filepath -> + candidate = file(filepath, checkIfExists: true) + if (filepath.endsWith(".gz")) { + GZIP_DECOMPRESS(ch_qiime_ref_tax_branched.gzip) + ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(GZIP_DECOMPRESS.out.versions) - ch_qiime_db_files = GZIP_DECOMPRESS.out.ungzip - ch_qiime_db_files = ch_qiime_db_files.mix(ch_qiime_ref_tax_branched.decompressed) + return GZIP_DECOMPRESS.out.ungzip + } else if (filepath.endsWith(".fna") || filepath.endsWith(".tax")) { + return candidate + } else { + error "$filepath is neither a compressed or decompressed sequence or taxonomy file. Please review input." + } + }.set { ch_qiime_db_files } ch_ref_database = ch_qiime_db_files.collate(2) } else { From 0890a0e64beb641422698f61c2adfee3f0db46a7 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Wed, 29 Nov 2023 15:17:56 -0500 Subject: [PATCH 028/104] Can't call processes from inside maps. --- subworkflows/local/qiime2_preptax.nf | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 96a0db961..78a4ab27f 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -20,24 +20,23 @@ workflow QIIME2_PREPTAX { if (params.qiime_ref_tax_custom) { if ("${params.qiime_ref_tax_custom}".contains(",")) { - ch_qiime_ref_taxonomy - .map { filepath -> - candidate = file(filepath, checkIfExists: true) - if (filepath.endsWith(".gz")) { - GZIP_DECOMPRESS(ch_qiime_ref_tax_branched.gzip) - ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(GZIP_DECOMPRESS.out.versions) + ch_qiime_ref_taxonomy.flatten() + .branch { + compressed: it.isFile() && it.getName().endsWith(".gz") + decompressed: it.isFile() && ( it.getName().endsWith(".fna") || it.getName().endsWith(".tax") ) + failed: true + }.set { ch_qiime_ref_tax_branched } + ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a compressed or decompressed sequence or taxonomy file. Please review input." } + + GZIP_DECOMPRESS(ch_qiime_ref_tax_branched.compressed) + ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(GZIP_DECOMPRESS.out.versions) - return GZIP_DECOMPRESS.out.ungzip - } else if (filepath.endsWith(".fna") || filepath.endsWith(".tax")) { - return candidate - } else { - error "$filepath is neither a compressed or decompressed sequence or taxonomy file. Please review input." - } - }.set { ch_qiime_db_files } + ch_qiime_db_files = GZIP_DECOMPRESS.out.ungzip + ch_qiime_db_files = ch_qiime_db_files.mix(ch_qiime_ref_tax_branched.decompressed) ch_ref_database = ch_qiime_db_files.collate(2) } else { - ch_qiime_ref_taxonomy + ch_qiime_ref_taxonomy.flatten() .branch { tar: it.isFile() && ( it.getName().endsWith(".tar.gz") || it.getName().endsWith (".tgz") ) dir: it.isDirectory() From 7276a8d1e36eb176fa4171c46d34aed24e4c8fad Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Wed, 29 Nov 2023 15:20:25 -0500 Subject: [PATCH 029/104] Fix outfile definition in GZIP_DECOMPRESS. --- modules/local/gzip_decompress.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/gzip_decompress.nf b/modules/local/gzip_decompress.nf index fa8fa82cf..effd256c9 100644 --- a/modules/local/gzip_decompress.nf +++ b/modules/local/gzip_decompress.nf @@ -19,7 +19,7 @@ process GZIP_DECOMPRESS { script: def args = task.ext.args ?: '' - outfile = task.ext.outfile ?: ( meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.gz$/, "")) + outfile = task.ext.outfile ?: archive.baseName.toString().replaceFirst(/\.gz$/, "") """ gzip $args -c -d $file > $outfile From f0a8715a9cc33c520e2047f0f45c35093c4a28c5 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Wed, 29 Nov 2023 15:21:45 -0500 Subject: [PATCH 030/104] Fix outfile definition in GZIP_DECOMPRESS. --- modules/local/gzip_decompress.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/gzip_decompress.nf b/modules/local/gzip_decompress.nf index effd256c9..c6ea37a5a 100644 --- a/modules/local/gzip_decompress.nf +++ b/modules/local/gzip_decompress.nf @@ -19,7 +19,7 @@ process GZIP_DECOMPRESS { script: def args = task.ext.args ?: '' - outfile = task.ext.outfile ?: archive.baseName.toString().replaceFirst(/\.gz$/, "") + outfile = task.ext.outfile ?: file.baseName.toString().replaceFirst(/\.gz$/, "") """ gzip $args -c -d $file > $outfile From 56f241b05ecbbf1f068c3ba7ee9c11b749c92e16 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Wed, 29 Nov 2023 15:48:34 -0500 Subject: [PATCH 031/104] Add some comments. --- subworkflows/local/qiime2_preptax.nf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 78a4ab27f..561a75dc8 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -19,6 +19,7 @@ workflow QIIME2_PREPTAX { ch_qiime2_preptax_versions = Channel.empty() if (params.qiime_ref_tax_custom) { + // Handle case where we have been provided a pair of filepaths. if ("${params.qiime_ref_tax_custom}".contains(",")) { ch_qiime_ref_taxonomy.flatten() .branch { @@ -35,6 +36,7 @@ workflow QIIME2_PREPTAX { ch_qiime_db_files = ch_qiime_db_files.mix(ch_qiime_ref_tax_branched.decompressed) ch_ref_database = ch_qiime_db_files.collate(2) + // Handle case we have been provided a single filepath (tarball or directory). } else { ch_qiime_ref_taxonomy.flatten() .branch { From 7907df5bfb4843a80352e8de18265c7331c4ba8a Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Wed, 29 Nov 2023 15:49:02 -0500 Subject: [PATCH 032/104] Add an early check that two paths are provided when providing a comma-separated list. --- workflows/ampliseq.nf | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 3a3323269..a99e71f9e 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -61,7 +61,12 @@ if (params.dada_ref_tax_custom) { if (params.qiime_ref_tax_custom) { if ("${params.qiime_ref_tax_custom}".contains(",")) { - ch_qiime_ref_taxonomy = Channel.fromPath(Arrays.asList("${params.qiime_ref_tax_custom}".split(",")), checkIfExists: true) + qiime_ref_paths = "${params.qiime_ref_tax_custom}".split(",") + if (qiime_ref_paths.length != 2) { + error "--qiime_ref_tax_custom accepts a single filepath to a directory or tarball, or two files paths separated by a comma. Please review input." + } + + ch_qiime_ref_taxonomy = Channel.fromPath(Arrays.asList(qiime_ref_paths), checkIfExists: true) } else { ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}", checkIfExists: true) } From 1d6ce32e4911f86ebaf02af447db6fdcb3cadb47 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Wed, 29 Nov 2023 15:53:08 -0500 Subject: [PATCH 033/104] Make sure downstream is aware of new means of pointing to a qiime ref db. --- workflows/ampliseq.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index a99e71f9e..9195038dd 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -624,7 +624,7 @@ workflow AMPLISEQ { log.info "Use Kraken2 taxonomy classification" val_used_taxonomy = "Kraken2" ch_tax = QIIME2_INTAX ( ch_kraken2_tax, "" ).qza - } else if ( params.qiime_ref_taxonomy || params.classifier ) { + } else if ( params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.classifier ) { log.info "Use QIIME2 taxonomy classification" val_used_taxonomy = "QIIME2" ch_tax = QIIME2_TAXONOMY.out.qza @@ -724,7 +724,7 @@ workflow AMPLISEQ { // MODULE: Predict functional potential of a bacterial community from marker genes with Picrust2 // if ( params.picrust ) { - if ( run_qiime2 && !params.skip_abundance_tables && ( params.dada_ref_taxonomy || params.qiime_ref_taxonomy || params.classifier || params.sintax_ref_taxonomy || params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom ) && !params.skip_taxonomy ) { + if ( run_qiime2 && !params.skip_abundance_tables && ( params.dada_ref_taxonomy || params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.classifier || params.sintax_ref_taxonomy || params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom ) && !params.skip_taxonomy ) { PICRUST ( QIIME2_EXPORT.out.abs_fasta, QIIME2_EXPORT.out.abs_tsv, "QIIME2", "This Picrust2 analysis is based on filtered reads from QIIME2" ) } else { PICRUST ( ch_fasta, ch_dada2_asv, "DADA2", "This Picrust2 analysis is based on unfiltered reads from DADA2" ) @@ -856,7 +856,7 @@ workflow AMPLISEQ { !params.skip_taxonomy && ( params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom ) ? KRAKEN2_TAXONOMY_WF.out.tax_tsv.ifEmpty( [] ) : [], !params.skip_taxonomy && params.pplace_tree ? ch_pplace_tax.ifEmpty( [] ) : [], !params.skip_taxonomy && params.pplace_tree ? FASTA_NEWICK_EPANG_GAPPA.out.heattree.ifEmpty( [[],[]] ) : [[],[]], - !params.skip_taxonomy && ( params.qiime_ref_taxonomy || params.classifier ) && run_qiime2 ? QIIME2_TAXONOMY.out.tsv.ifEmpty( [] ) : [], + !params.skip_taxonomy && ( params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.classifier ) && run_qiime2 ? QIIME2_TAXONOMY.out.tsv.ifEmpty( [] ) : [], run_qiime2, run_qiime2 ? val_used_taxonomy : "", run_qiime2 && ( params.exclude_taxa != "none" || params.min_frequency != 1 || params.min_samples != 1 ) ? ch_dada2_asv.countLines()+","+QIIME2_FILTERTAXA.out.tsv.countLines() : "", From 913d284b4c6dd54a20c9dbe273656c30df2888f3 Mon Sep 17 00:00:00 2001 From: Matthew Date: Fri, 1 Dec 2023 14:13:39 -0500 Subject: [PATCH 034/104] Improve error message clarity for ill-formed file. Co-authored-by: Daniel Straub <42973691+d4straub@users.noreply.github.com> --- subworkflows/local/qiime2_preptax.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 561a75dc8..7a5960afe 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -27,7 +27,7 @@ workflow QIIME2_PREPTAX { decompressed: it.isFile() && ( it.getName().endsWith(".fna") || it.getName().endsWith(".tax") ) failed: true }.set { ch_qiime_ref_tax_branched } - ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a compressed or decompressed sequence or taxonomy file. Please review input." } + ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a compressed (ends with `.gz`) or decompressed sequence (ends with `.fna`) or taxonomy file (ends with `.tax`). Please review input." } GZIP_DECOMPRESS(ch_qiime_ref_tax_branched.compressed) ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(GZIP_DECOMPRESS.out.versions) From 330bf43e19cf42f87f9a9213928724fe42cbc98c Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Fri, 1 Dec 2023 14:34:05 -0500 Subject: [PATCH 035/104] Fix typo in error on --qiime_ref_paths form. --- workflows/ampliseq.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 9195038dd..faeffec62 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -63,7 +63,7 @@ if (params.qiime_ref_tax_custom) { if ("${params.qiime_ref_tax_custom}".contains(",")) { qiime_ref_paths = "${params.qiime_ref_tax_custom}".split(",") if (qiime_ref_paths.length != 2) { - error "--qiime_ref_tax_custom accepts a single filepath to a directory or tarball, or two files paths separated by a comma. Please review input." + error "--qiime_ref_tax_custom accepts a single filepath to a directory or tarball, or two filepaths separated by a comma. Please review input." } ch_qiime_ref_taxonomy = Channel.fromPath(Arrays.asList(qiime_ref_paths), checkIfExists: true) From b8c595cc7a647593656945cc34c55861986c2038 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Fri, 1 Dec 2023 14:41:10 -0500 Subject: [PATCH 036/104] Remove unneeded collect from FORMAT_TAXONOMY_QIIME invocation. --- subworkflows/local/qiime2_preptax.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 561a75dc8..d1ed888a6 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -72,7 +72,7 @@ workflow QIIME2_PREPTAX { ch_ref_database = ch_ref_database_fna.combine(ch_ref_database_tax) } } else { - FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy.collect() ) + FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy ) ch_ref_database = FORMAT_TAXONOMY_QIIME.out.fasta.combine(FORMAT_TAXONOMY_QIIME.out.tax) } From 3e05fe507674b72cc1757063fb4621f47defd4d9 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Fri, 1 Dec 2023 14:47:32 -0500 Subject: [PATCH 037/104] Improve version tracking in PREPTAX. --- subworkflows/local/qiime2_preptax.nf | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index d1ed888a6..050ad78c7 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -53,6 +53,8 @@ workflow QIIME2_PREPTAX { def meta = [:] meta.id = val_qiime_ref_taxonomy [ meta, db ] } ) + ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(UNTAR.out.versions) + ch_qiime_db_dir = UNTAR.out.untar.map{ it[1] } ch_qiime_db_dir = ch_qiime_db_dir.mix(ch_qiime_ref_tax_branched.dir) @@ -73,6 +75,7 @@ workflow QIIME2_PREPTAX { } } else { FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy ) + ch_qiime2_preptax_versions(FORMAT_TAXONOMY_QIIME.out.versions) ch_ref_database = FORMAT_TAXONOMY_QIIME.out.fasta.combine(FORMAT_TAXONOMY_QIIME.out.tax) } @@ -85,10 +88,14 @@ workflow QIIME2_PREPTAX { meta.RV_primer = RV_primer [ meta, db ] } .set { ch_ref_database } + QIIME2_EXTRACT ( ch_ref_database ) + ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(QIIME2_EXTRACT.out.versions) + QIIME2_TRAIN ( QIIME2_EXTRACT.out.qza ) + ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(QIIME2_TRAIN.out.versions) emit: - classifier = QIIME2_TRAIN.out.qza - versions = QIIME2_TRAIN.out.versions + classifier = QIIME2_TRAIN.out.qza + versions = ch_qiime2_preptax_versions } From b012aeb0792d86f5edcd1dc81b34049fc7b7b16d Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Fri, 1 Dec 2023 15:11:18 -0500 Subject: [PATCH 038/104] Add qiime_ref_tax_custom to testing in reftaxcustom nf-test. --- conf/test_reftaxcustom.config | 1 + 1 file changed, 1 insertion(+) diff --git a/conf/test_reftaxcustom.config b/conf/test_reftaxcustom.config index 4233d1ea0..c2d7c4eed 100644 --- a/conf/test_reftaxcustom.config +++ b/conf/test_reftaxcustom.config @@ -30,6 +30,7 @@ params { dada_assign_taxlevels = "Kingdom,Phylum,Class,Order,Family,Genus" kraken2_ref_tax_custom = "https://genome-idx.s3.amazonaws.com/kraken/16S_Greengenes13.5_20200326.tgz" kraken2_assign_taxlevels = "D,P,C,O" + qiime_ref_tax_custom = "https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.tax.gz" // Skip downstream analysis with QIIME2 skip_qiime = true From 7ce4fa815d297986b9837a3349c9451b49b74073 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 5 Dec 2023 12:02:21 +0000 Subject: [PATCH 039/104] Patch summary_report as it expects qiime_ref_taxonomy is set even when it can be null due to classifier being passed. --- modules/local/summary_report.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index d886f19bb..a8e082b01 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -118,7 +118,7 @@ process SUMMARY_REPORT { kraken2_tax ? "kraken2_taxonomy='$kraken2_tax',kraken2_confidence='$params.kraken2_confidence'" : "", kraken2_tax && !params.kraken2_ref_tax_custom ? "kraken2_ref_tax_title='${params.kraken2_ref_databases[params.kraken2_ref_taxonomy]["title"]}',kraken2_ref_tax_file='${params.kraken2_ref_databases[params.kraken2_ref_taxonomy]["file"]}',kraken2_ref_tax_citation='${params.kraken2_ref_databases[params.kraken2_ref_taxonomy]["citation"]}'" : "", pplace_tax ? "pplace_taxonomy='$pplace_tax',pplace_heattree='$pplace_heattree'" : "", - qiime2_tax ? "qiime2_taxonomy='$qiime2_tax',qiime2_ref_tax_title='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["title"]}',qiime2_ref_tax_file='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]}',qiime2_ref_tax_citation='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["citation"]}'" : "", + qiime2_tax && params.qiime_ref_taxonomy ? "qiime2_taxonomy='$qiime2_tax',qiime2_ref_tax_title='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["title"]}',qiime2_ref_tax_file='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]}',qiime2_ref_tax_citation='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["citation"]}'" : qiime2_tax ? "qiime2_taxonomy='$qiime2_tax'" : "", run_qiime2 ? "val_used_taxonomy='$val_used_taxonomy'" : "", filter_stats_tsv ? "filter_stats_tsv='$filter_stats_tsv',qiime2_filtertaxa='$qiime2_filtertaxa',exclude_taxa='$params.exclude_taxa',min_frequency='$params.min_frequency',min_samples='$params.min_samples'" : "", barplot ? "barplot=TRUE" : "", From d83deb097a671bef2f947b1f8e73e6a17ae4471f Mon Sep 17 00:00:00 2001 From: Dan Clayton Date: Wed, 6 Dec 2023 15:23:11 +0000 Subject: [PATCH 040/104] update output docs for collapsed abundance tables --- docs/output.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/output.md b/docs/output.md index f12fc41fa..62433ceac 100644 --- a/docs/output.md +++ b/docs/output.md @@ -407,7 +407,7 @@ All following analysis is based on these filtered tables. - `seven_number_summary.tsv`: Length of ASV sequences in different quantiles. - `filtered-sequences.qza`: QIIME2 fragment. - `qiime2/abundance_tables/` - - `abs-abund-table-*.tsv`: Tab-separated absolute abundance table at taxa level `*`, where `*` ranges by default from 2 to 6 or 7, depending on the used reference taxonomy database. + - `abs-abund-table-*.tsv`: Tab-separated absolute abundance table at taxa level `*`, where `*` ranges by default from 2 to 6, specified by the `--tax_agglom_min` and `--tax_agglom_max` parameters. - `count_table_filter_stats.tsv`: Tab-separated table with information on how much counts were filtered for each sample. - `feature-table.biom`: Abundance table in biom format for importing into downstream analysis tools. - `feature-table.tsv`: Tab-separated abundance table for each ASV and each sample. @@ -423,7 +423,7 @@ Absolute abundance tables produced by the previous steps contain count data, but Output files - `qiime2/rel_abundance_tables/` - - `rel-table-*.tsv`: Tab-separated absolute abundance table at taxa level `*`, where `*` ranges by default from 2 to 6 or 7, depending on the used reference taxonomy database. + - `rel-table-*.tsv`: Tab-separated absolute abundance table at taxa level `*`, where `*` ranges by default from 2 to 6, specified by the `--tax_agglom_min` and `--tax_agglom_max` parameters. - `rel-table-ASV.tsv`: Tab-separated relative abundance table for all ASVs. - `rel-table-ASV_with-DADA2-tax.tsv`: Tab-separated table for all ASVs with DADA2 taxonomic classification, sequence and relative abundance. - `rel-table-ASV_with-QIIME2-tax.tsv`: Tab-separated table for all ASVs with QIIME2 taxonomic classification, sequence and relative abundance. From d8ea1f03225d237161a95f420416056407f558aa Mon Sep 17 00:00:00 2001 From: Dan Clayton Date: Thu, 7 Dec 2023 09:20:58 +0000 Subject: [PATCH 041/104] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 620c97165..e51badb3f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` ### `Fixed` +- [#672](https://github.com/nf-core/ampliseq/pull/672) - Update output docs for collapsed abundance tables ### `Dependencies` From feacdd59242c8af026a4ded19447c6c3b724d19e Mon Sep 17 00:00:00 2001 From: Dan Clayton Date: Thu, 7 Dec 2023 11:42:31 +0000 Subject: [PATCH 042/104] run prettier --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e51badb3f..b08795266 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` ### `Fixed` + - [#672](https://github.com/nf-core/ampliseq/pull/672) - Update output docs for collapsed abundance tables ### `Dependencies` From a8ec74a2f10ca3d79f5384f5078838fcde1d74b5 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 15:40:10 +0000 Subject: [PATCH 043/104] Update changelog. --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b08795266..7639cf7cc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` - [#672](https://github.com/nf-core/ampliseq/pull/672) - Update output docs for collapsed abundance tables +- []() - Fix logic relating to generation of qiime2 taxonomy part of summary report ### `Dependencies` From ba71667c714ff22258bc7de7f0126dda8e8bcfe0 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 15:41:55 +0000 Subject: [PATCH 044/104] Don't skip qiime while testing. --- conf/test_reftaxcustom.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/test_reftaxcustom.config b/conf/test_reftaxcustom.config index c2d7c4eed..870a59e27 100644 --- a/conf/test_reftaxcustom.config +++ b/conf/test_reftaxcustom.config @@ -33,5 +33,5 @@ params { qiime_ref_tax_custom = "https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.tax.gz" // Skip downstream analysis with QIIME2 - skip_qiime = true + skip_qiime = false } From 1360415a39ae3aaf659a8201ef1958a1fb53414e Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 21:00:27 +0000 Subject: [PATCH 045/104] Add a skip option for just downstream qiime analysis, but still perform qiime taxonomic classification. --- nextflow.config | 1 + nextflow_schema.json | 4 ++++ workflows/ampliseq.nf | 13 ++++++++++--- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/nextflow.config b/nextflow.config index c4e94bfb1..c76b40582 100644 --- a/nextflow.config +++ b/nextflow.config @@ -87,6 +87,7 @@ params { skip_dada_quality = false skip_barrnap = false skip_qiime = false + skip_qiime_downstream = false skip_fastqc = false skip_alpha_rarefaction = false skip_abundance_tables = false diff --git a/nextflow_schema.json b/nextflow_schema.json index c6c6f8b09..2b4a8dca6 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -656,6 +656,10 @@ "type": "boolean", "description": "Skip all steps that are executed by QIIME2, including QIIME2 software download, taxonomy assignment by QIIME2, barplots, relative abundance tables, diversity analysis, differential abundance testing." }, + "skip_qiime_downstream": { + "type": "boolean", + "description": "Skip steps that are executed by QIIME2 except for taxonomic classification, including barplots, relative abundance tables, diversity analysis, differential abundance testing." + }, "skip_taxonomy": { "type": "boolean", "description": "Skip taxonomic classification. Incompatible with `--sbdiexport`" diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index faeffec62..9e85bf6af 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -146,8 +146,15 @@ if ( params.dada_ref_taxonomy && !params.skip_dada_addspecies && !params.skip_da } } -//only run QIIME2 when taxonomy is actually calculated and all required data is available -if ( !(workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) && !params.skip_taxonomy && !params.skip_qiime && (!params.skip_dada_taxonomy || params.sintax_ref_taxonomy || params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom) ) { +// Only run QIIME2 taxonomy classification if needed parameters are passed and we are not skipping taxonomy or qiime steps. +if ( !(workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) && !params.skip_taxonomy && !params.skip_qiime && (params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.classifier) ) { + run_qiime2_taxonomy = true +} else { + run_qiime2_taxonomy = false +} + +//only run QIIME2 downstream analysis when taxonomy is actually calculated and all required data is available +if ( !(workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) && !params.skip_taxonomy && !params.skip_qiime && !params.skip_qiime_downstream && (!params.skip_dada_taxonomy || params.sintax_ref_taxonomy || params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom) ) { run_qiime2 = true } else { run_qiime2 = false @@ -567,7 +574,7 @@ workflow AMPLISEQ { } //QIIME2 - if ( run_qiime2 ) { + if ( run_qiime2_taxonomy ) { if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.classifier) { QIIME2_PREPTAX ( ch_qiime_ref_taxonomy.collect(), From f4f5cda41b32a83c133b6678f8c8f4537d9b65ed Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 21:02:06 +0000 Subject: [PATCH 046/104] Skip qiime downstream in reftaxcustom. --- conf/test_reftaxcustom.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/test_reftaxcustom.config b/conf/test_reftaxcustom.config index 870a59e27..ea8a7c6d0 100644 --- a/conf/test_reftaxcustom.config +++ b/conf/test_reftaxcustom.config @@ -33,5 +33,5 @@ params { qiime_ref_tax_custom = "https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.tax.gz" // Skip downstream analysis with QIIME2 - skip_qiime = false + skip_qiime_downstream = true } From 549c166365bf68f4edecff6121597cbcb01c8b99 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 21:21:06 +0000 Subject: [PATCH 047/104] Fix path for testing tarball passed to --qiime_ref_tax_custom. --- conf/test_reftaxcustom.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/test_reftaxcustom.config b/conf/test_reftaxcustom.config index ea8a7c6d0..1afe1c2df 100644 --- a/conf/test_reftaxcustom.config +++ b/conf/test_reftaxcustom.config @@ -30,7 +30,7 @@ params { dada_assign_taxlevels = "Kingdom,Phylum,Class,Order,Family,Genus" kraken2_ref_tax_custom = "https://genome-idx.s3.amazonaws.com/kraken/16S_Greengenes13.5_20200326.tgz" kraken2_assign_taxlevels = "D,P,C,O" - qiime_ref_tax_custom = "https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.tax.gz" + qiime_ref_tax_custom = "https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.tar.gz" // Skip downstream analysis with QIIME2 skip_qiime_downstream = true From 851653404641128a88899ef1e3ceb88e79c945a9 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 21:38:04 +0000 Subject: [PATCH 048/104] Add snapshot of files coming from qiime2 taxonomy. --- tests/pipeline/reftaxcustom.nf.test.snap | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/pipeline/reftaxcustom.nf.test.snap b/tests/pipeline/reftaxcustom.nf.test.snap index 7dca4e3e9..842b18de0 100644 --- a/tests/pipeline/reftaxcustom.nf.test.snap +++ b/tests/pipeline/reftaxcustom.nf.test.snap @@ -43,12 +43,19 @@ "timestamp": "2023-05-28T21:18:54+0000" }, "kraken2": { + "content": [ + "taxonomy/GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza:md5,e3d3758fb6fb6b1d8e43dd1be578979b", + "taxonomy/taxonomy.tsv:md5,ad6793c7689b0a5e40169d2799564645" + ], + "timestamp": "2023-09-15T21:16:26+0000" + }, + "qiime2": { "content": [ "ASV_tax.user.kraken2.classifiedreads.txt:md5,8a4693c37d5c24b342ef161b92567764", "ASV_tax.user.kraken2.complete.tsv:md5,3613dac9ce1bf03f87b57d1523e705f1", "ASV_tax.user.kraken2.tsv:md5,95c3f9daa5da8fe00159fb07d394c3ce" ], - "timestamp": "2023-09-15T21:16:26+0000" + "timestamp": "2023-12-07T21:28:32+0000" }, "multiqc": { "content": [ From 745cab7de07628c06f5356dcdb9f8e64321bd074 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 21:39:18 +0000 Subject: [PATCH 049/104] Work towards a qiime_ref_tax_custom specific test. --- .github/workflows/ci.yml | 1 + conf/test_qiimecustom.config | 32 ++++++++++++++ nextflow.config | 1 + tests/pipeline/qiimecustom.nf.test | 55 +++++++++++++++++++++++++ tests/pipeline/qiimecustom.nf.test.snap | 43 +++++++++++++++++++ 5 files changed, 132 insertions(+) create mode 100644 conf/test_qiimecustom.config create mode 100644 tests/pipeline/qiimecustom.nf.test create mode 100644 tests/pipeline/qiimecustom.nf.test.snap diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 788582d92..e4b532bef 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -50,6 +50,7 @@ jobs: - "test_failed" - "test_multi" - "test_reftaxcustom" + - "test_qiimecustom" - "test_doubleprimers" - "test_iontorrent" - "test_novaseq" diff --git a/conf/test_qiimecustom.config b/conf/test_qiimecustom.config new file mode 100644 index 000000000..ea6b97d81 --- /dev/null +++ b/conf/test_qiimecustom.config @@ -0,0 +1,32 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/ampliseq -profile test_qiimecustom, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test custom QIIME2 reference taxonomy database profile' + config_profile_description = 'Minimal test dataset to check --qiime_ref_tax_custom' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + FW_primer = "GTGYCAGCMGCCGCGGTAA" + RV_primer = "GGACTACNVGGGTWTCTAAT" + input = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Samplesheet.tsv" + + // Custom reference taxonomy + qiime_ref_tax_custom = "https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.tax.gz,https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.fna.gz" + + // Skip downstream analysis with QIIME2 + skip_qiime_downstream = true +} diff --git a/nextflow.config b/nextflow.config index c76b40582..831a43a22 100644 --- a/nextflow.config +++ b/nextflow.config @@ -274,6 +274,7 @@ profiles { test_failed { includeConfig 'conf/test_failed.config' } test_full { includeConfig 'conf/test_full.config' } test_reftaxcustom { includeConfig 'conf/test_reftaxcustom.config' } + test_qiimecustom { includeConfig 'conf/test_qiimecustom.config' } test_novaseq { includeConfig 'conf/test_novaseq.config' } test_pplace { includeConfig 'conf/test_pplace.config' } test_sintax { includeConfig 'conf/test_sintax.config' } diff --git a/tests/pipeline/qiimecustom.nf.test b/tests/pipeline/qiimecustom.nf.test new file mode 100644 index 000000000..abd2a38a2 --- /dev/null +++ b/tests/pipeline/qiimecustom.nf.test @@ -0,0 +1,55 @@ +nextflow_pipeline { + + name "Test Workflow main.nf" + script "main.nf" + tag "test_reftaxcustom" + tag "dada2" + tag "pipeline" + + test("Custom DADA2 Reference Taxonomy Database") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(UTILS.removeNextflowVersion("$outputDir")).match("software_versions") }, + { assert snapshot(path("$outputDir/overall_summary.tsv")).match("overall_summary_tsv") }, + { assert snapshot(path("$outputDir/barrnap/rrna.arc.gff"), + path("$outputDir/barrnap/rrna.bac.gff"), + path("$outputDir/barrnap/rrna.euk.gff"), + path("$outputDir/barrnap/rrna.mito.gff")).match("barrnap") }, + { assert new File("$outputDir/barrnap/summary.tsv").exists() }, + { assert snapshot(path("$outputDir/cutadapt/cutadapt_summary.tsv")).match("cutadapt") }, + { assert snapshot(path("$outputDir/dada2/ASV_seqs.fasta"), + path("$outputDir/dada2/ASV_table.tsv"), + path("$outputDir/dada2/DADA2_stats.tsv"), + path("$outputDir/dada2/DADA2_table.rds"), + path("$outputDir/dada2/DADA2_table.tsv")).match("dada2") }, + { assert new File("$outputDir/dada2/ASV_tax.user.tsv").exists() }, + { assert new File("$outputDir/dada2/ASV_tax_species.user.tsv").exists() }, + { assert new File("$outputDir/fastqc/sampleID_1_1_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/sampleID_1_2_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/sampleID_1a_1_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/sampleID_1a_2_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/sampleID_2_1_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/sampleID_2_2_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/sampleID_2a_1_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/sampleID_2a_2_fastqc.html").exists() }, + { assert snapshot(path("$outputDir/input/Samplesheet.tsv")).match("input") }, + { assert snapshot(path("$outputDir/kraken2/ASV_tax.user.kraken2.classifiedreads.txt"), + path("$outputDir/kraken2/ASV_tax.user.kraken2.complete.tsv"), + path("$outputDir/kraken2/ASV_tax.user.kraken2.tsv")).match("kraken2") }, + { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, + { assert new File("$outputDir/summary_report/summary_report.html").exists() }, + { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } + ) + } + } +} diff --git a/tests/pipeline/qiimecustom.nf.test.snap b/tests/pipeline/qiimecustom.nf.test.snap new file mode 100644 index 000000000..680ca37ac --- /dev/null +++ b/tests/pipeline/qiimecustom.nf.test.snap @@ -0,0 +1,43 @@ +{ + "input": { + "content": [ + "Samplesheet.tsv:md5,dbf8d1a2b7933dab9e5a139f33c2b1f4" + ], + "timestamp": "2023-05-28T21:18:54+0000" + }, + "cutadapt": { + "content": [ + "cutadapt_summary.tsv:md5,5d02749984a811479e7d534fda75163f" + ], + "timestamp": "2023-05-28T21:18:54+0000" + }, + "software_versions": { + "content": [ + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + ], + "timestamp": "2023-05-28T21:18:54+0000" + }, + "overall_summary_tsv": { + "content": [ + "overall_summary.tsv:md5,3231d6ee72b9a1e7742e5605caaff05a" + ], + "timestamp": "2023-05-28T21:18:54+0000" + }, + "barrnap": { + "content": [ + "rrna.arc.gff:md5,6dae470aace9293d5eb8c318584852dd", + "rrna.bac.gff:md5,439a9084f089120f700f938dfb58fa41", + "rrna.euk.gff:md5,c9bc1d9d8fb77dc19c95dee2d53840eb", + "rrna.mito.gff:md5,df19e1b84ba6f691d20c72b397c88abf" + ], + "timestamp": "2023-05-28T21:18:54+0000" + }, + "multiqc": { + "content": [ + "multiqc_fastqc.txt:md5,147764e40079c3abf97a17cfe2275c52", + "multiqc_general_stats.txt:md5,88c2b9e6d02b83afe4f9551e6c9a91a7", + "multiqc_cutadapt.txt:md5,330a7b72dc671ca99fcb3fb84b6776c1" + ], + "timestamp": "2023-05-28T21:18:54+0000" + } +} From a1dfb5b1b6943fc244a58ea701ba50cd085ff2dc Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 21:45:15 +0000 Subject: [PATCH 050/104] Skip dada tax. --- conf/test_qiimecustom.config | 1 + 1 file changed, 1 insertion(+) diff --git a/conf/test_qiimecustom.config b/conf/test_qiimecustom.config index ea6b97d81..2233070cc 100644 --- a/conf/test_qiimecustom.config +++ b/conf/test_qiimecustom.config @@ -29,4 +29,5 @@ params { // Skip downstream analysis with QIIME2 skip_qiime_downstream = true + skip_dada_taxonomy = true } From 51dc97e82770417a5179abff1f50ae09c00ca71a Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 21:45:54 +0000 Subject: [PATCH 051/104] Sequence then taxonomy file for file pair to --qiime_ref_tax_custom. --- conf/test_qiimecustom.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/test_qiimecustom.config b/conf/test_qiimecustom.config index 2233070cc..2fc9cb736 100644 --- a/conf/test_qiimecustom.config +++ b/conf/test_qiimecustom.config @@ -25,7 +25,7 @@ params { input = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Samplesheet.tsv" // Custom reference taxonomy - qiime_ref_tax_custom = "https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.tax.gz,https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.fna.gz" + qiime_ref_tax_custom = "https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.fna.gz,https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.tax.gz" // Skip downstream analysis with QIIME2 skip_qiime_downstream = true From a33f17f7937769b43e9a3e9fb5c480cc115b67a3 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 21:49:21 +0000 Subject: [PATCH 052/104] Clarify in help text of --qiime_ref_tax_custom the ordering of a file pair. --- nextflow_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 62c54f79f..6ccfc3ad1 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -434,7 +434,7 @@ }, "qiime_ref_tax_custom": { "type": "string", - "help_text": "Is preferred over `--qiime_ref_taxonomy`. A comma separated pair of (possibly gzipped) filepaths.", + "help_text": "Is preferred over `--qiime_ref_taxonomy`. A comma separated pair of (possibly gzipped) filepaths (sequence, taxonomy).", "description": "Path to files of a custom QIIME2 reference taxonomy database (files may be gzipped)" }, "classifier": { From 8f57faec61a65a422c93c4cb6526ff3d6abcb65c Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 22:00:26 +0000 Subject: [PATCH 053/104] Update snapshots to include qiime2 in both correctly and add assertions for qiime2. --- tests/pipeline/qiimecustom.nf.test | 11 +++++------ tests/pipeline/qiimecustom.nf.test.snap | 7 +++++++ tests/pipeline/reftaxcustom.nf.test | 2 ++ tests/pipeline/reftaxcustom.nf.test.snap | 10 +++++----- 4 files changed, 19 insertions(+), 11 deletions(-) diff --git a/tests/pipeline/qiimecustom.nf.test b/tests/pipeline/qiimecustom.nf.test index abd2a38a2..8ec67571b 100644 --- a/tests/pipeline/qiimecustom.nf.test +++ b/tests/pipeline/qiimecustom.nf.test @@ -2,11 +2,11 @@ nextflow_pipeline { name "Test Workflow main.nf" script "main.nf" - tag "test_reftaxcustom" - tag "dada2" + tag "test_qiimecustom" + tag "qiime2" tag "pipeline" - test("Custom DADA2 Reference Taxonomy Database") { + test("Custom QIIME2 Reference Taxonomy Database") { when { params { @@ -41,9 +41,8 @@ nextflow_pipeline { { assert new File("$outputDir/fastqc/sampleID_2a_1_fastqc.html").exists() }, { assert new File("$outputDir/fastqc/sampleID_2a_2_fastqc.html").exists() }, { assert snapshot(path("$outputDir/input/Samplesheet.tsv")).match("input") }, - { assert snapshot(path("$outputDir/kraken2/ASV_tax.user.kraken2.classifiedreads.txt"), - path("$outputDir/kraken2/ASV_tax.user.kraken2.complete.tsv"), - path("$outputDir/kraken2/ASV_tax.user.kraken2.tsv")).match("kraken2") }, + { assert snapshot(path("$outputDir/qiime2/taxonomy/GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza"), + path("$outputDir/qiime2/taxonomy/taxonomy.tsv")).match("qiime2") }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, diff --git a/tests/pipeline/qiimecustom.nf.test.snap b/tests/pipeline/qiimecustom.nf.test.snap index 680ca37ac..616e1de0f 100644 --- a/tests/pipeline/qiimecustom.nf.test.snap +++ b/tests/pipeline/qiimecustom.nf.test.snap @@ -32,6 +32,13 @@ ], "timestamp": "2023-05-28T21:18:54+0000" }, + "qiime2": { + "content": [ + "GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza:md5,b744a656dbd4e710697bf9ee47f26c87", + "taxonomy.tsv:md5,44585412583f0cf5f2b82a1337f16756" + ], + "timestamp": "2023-12-07T21:28:32+0000" + }, "multiqc": { "content": [ "multiqc_fastqc.txt:md5,147764e40079c3abf97a17cfe2275c52", diff --git a/tests/pipeline/reftaxcustom.nf.test b/tests/pipeline/reftaxcustom.nf.test index abd2a38a2..3f72ec5f0 100644 --- a/tests/pipeline/reftaxcustom.nf.test +++ b/tests/pipeline/reftaxcustom.nf.test @@ -44,6 +44,8 @@ nextflow_pipeline { { assert snapshot(path("$outputDir/kraken2/ASV_tax.user.kraken2.classifiedreads.txt"), path("$outputDir/kraken2/ASV_tax.user.kraken2.complete.tsv"), path("$outputDir/kraken2/ASV_tax.user.kraken2.tsv")).match("kraken2") }, + { assert snapshot(path("$outputDir/qiime2/taxonomy/GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza"), + path("$outputDir/qiime2/taxonomy/taxonomy.tsv")).match("qiime2") }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, diff --git a/tests/pipeline/reftaxcustom.nf.test.snap b/tests/pipeline/reftaxcustom.nf.test.snap index 842b18de0..90b157ec2 100644 --- a/tests/pipeline/reftaxcustom.nf.test.snap +++ b/tests/pipeline/reftaxcustom.nf.test.snap @@ -44,16 +44,16 @@ }, "kraken2": { "content": [ - "taxonomy/GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza:md5,e3d3758fb6fb6b1d8e43dd1be578979b", - "taxonomy/taxonomy.tsv:md5,ad6793c7689b0a5e40169d2799564645" + "ASV_tax.user.kraken2.classifiedreads.txt:md5,8a4693c37d5c24b342ef161b92567764", + "ASV_tax.user.kraken2.complete.tsv:md5,3613dac9ce1bf03f87b57d1523e705f1", + "ASV_tax.user.kraken2.tsv:md5,95c3f9daa5da8fe00159fb07d394c3ce" ], "timestamp": "2023-09-15T21:16:26+0000" }, "qiime2": { "content": [ - "ASV_tax.user.kraken2.classifiedreads.txt:md5,8a4693c37d5c24b342ef161b92567764", - "ASV_tax.user.kraken2.complete.tsv:md5,3613dac9ce1bf03f87b57d1523e705f1", - "ASV_tax.user.kraken2.tsv:md5,95c3f9daa5da8fe00159fb07d394c3ce" + "GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza:md5,e3d3758fb6fb6b1d8e43dd1be578979b", + "taxonomy.tsv:md5,ad6793c7689b0a5e40169d2799564645" ], "timestamp": "2023-12-07T21:28:32+0000" }, From 74e05b2a26208befa00fd2bad63cc9fd5f6d97de Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 22:02:21 +0000 Subject: [PATCH 054/104] Make ordering of sequence and taxonomy files deterministic in case of file pair. --- subworkflows/local/qiime2_preptax.nf | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 9d5c68983..97ccba63c 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -35,7 +35,14 @@ workflow QIIME2_PREPTAX { ch_qiime_db_files = GZIP_DECOMPRESS.out.ungzip ch_qiime_db_files = ch_qiime_db_files.mix(ch_qiime_ref_tax_branched.decompressed) - ch_ref_database = ch_qiime_db_files.collate(2) + ch_ref_database_fna = ch_qiime_db_dir.filter { + it.getName().endsWith(".fna") + } + ch_ref_database_tax = ch_qiime_db_dir.filter { + it.getName().endsWith(".tax") + } + + ch_ref_database = ch_ref_database_fna.combine(ch_ref_database_tax) // Handle case we have been provided a single filepath (tarball or directory). } else { ch_qiime_ref_taxonomy.flatten() From b65df44c9ee053896666e9ccbee9bdc7ac2c41f8 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 22:03:51 +0000 Subject: [PATCH 055/104] Fix filtering in file pair case. --- subworkflows/local/qiime2_preptax.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 97ccba63c..7d0be52d7 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -35,10 +35,10 @@ workflow QIIME2_PREPTAX { ch_qiime_db_files = GZIP_DECOMPRESS.out.ungzip ch_qiime_db_files = ch_qiime_db_files.mix(ch_qiime_ref_tax_branched.decompressed) - ch_ref_database_fna = ch_qiime_db_dir.filter { + ch_ref_database_fna = ch_qiime_db_files.filter { it.getName().endsWith(".fna") } - ch_ref_database_tax = ch_qiime_db_dir.filter { + ch_ref_database_tax = ch_qiime_db_files.filter { it.getName().endsWith(".tax") } From 45bee719af1aba754a9bfbea274d5116204b0df7 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 22:31:07 +0000 Subject: [PATCH 056/104] Fix version mixing in --qiime_ref_taxonomy case. --- subworkflows/local/qiime2_preptax.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index d514f0860..dfa287253 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -82,7 +82,7 @@ workflow QIIME2_PREPTAX { } } else { FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy ) - ch_qiime2_preptax_versions(FORMAT_TAXONOMY_QIIME.out.versions) + ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(FORMAT_TAXONOMY_QIIME.out.versions) ch_ref_database = FORMAT_TAXONOMY_QIIME.out.fasta.combine(FORMAT_TAXONOMY_QIIME.out.tax) } From 3c9eaf129c0dd311b4e8bbdbc8e047eb2519cefb Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 22:45:45 +0000 Subject: [PATCH 057/104] Update software version expectations for tests that no longer run QIIME_PREPTAX. --- tests/pipeline/doubleprimers.nf.test.snap | 2 +- tests/pipeline/multi.nf.test.snap | 2 +- tests/pipeline/qiimecustom.nf.test.snap | 2 +- tests/pipeline/reftaxcustom.nf.test.snap | 2 +- tests/pipeline/sintax.nf.test.snap | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/pipeline/doubleprimers.nf.test.snap b/tests/pipeline/doubleprimers.nf.test.snap index d7cc9dce8..b5e9cb2bb 100644 --- a/tests/pipeline/doubleprimers.nf.test.snap +++ b/tests/pipeline/doubleprimers.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-07-27T13:49:03+0000" }, diff --git a/tests/pipeline/multi.nf.test.snap b/tests/pipeline/multi.nf.test.snap index 2c0382f0f..daba2601d 100644 --- a/tests/pipeline/multi.nf.test.snap +++ b/tests/pipeline/multi.nf.test.snap @@ -14,7 +14,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-05-28T21:15:03+0000" }, diff --git a/tests/pipeline/qiimecustom.nf.test.snap b/tests/pipeline/qiimecustom.nf.test.snap index 616e1de0f..6c39df372 100644 --- a/tests/pipeline/qiimecustom.nf.test.snap +++ b/tests/pipeline/qiimecustom.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-05-28T21:18:54+0000" }, diff --git a/tests/pipeline/reftaxcustom.nf.test.snap b/tests/pipeline/reftaxcustom.nf.test.snap index 90b157ec2..8fca1c5b0 100644 --- a/tests/pipeline/reftaxcustom.nf.test.snap +++ b/tests/pipeline/reftaxcustom.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-05-28T21:18:54+0000" }, diff --git a/tests/pipeline/sintax.nf.test.snap b/tests/pipeline/sintax.nf.test.snap index 069c7fa88..b19bf8feb 100644 --- a/tests/pipeline/sintax.nf.test.snap +++ b/tests/pipeline/sintax.nf.test.snap @@ -16,7 +16,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_SINTAX={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_SINTAX={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-06-20T16:40:18+0000" }, From 07f4407a4dcfc62be99f61e8a4ebaf6543caaf47 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 22:47:55 +0000 Subject: [PATCH 058/104] Remove assertions on dada2 tax and phyloseq files existing in test_qiimecustom. --- tests/pipeline/qiimecustom.nf.test | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/pipeline/qiimecustom.nf.test b/tests/pipeline/qiimecustom.nf.test index 8ec67571b..2cdc080a7 100644 --- a/tests/pipeline/qiimecustom.nf.test +++ b/tests/pipeline/qiimecustom.nf.test @@ -30,8 +30,6 @@ nextflow_pipeline { path("$outputDir/dada2/DADA2_stats.tsv"), path("$outputDir/dada2/DADA2_table.rds"), path("$outputDir/dada2/DADA2_table.tsv")).match("dada2") }, - { assert new File("$outputDir/dada2/ASV_tax.user.tsv").exists() }, - { assert new File("$outputDir/dada2/ASV_tax_species.user.tsv").exists() }, { assert new File("$outputDir/fastqc/sampleID_1_1_fastqc.html").exists() }, { assert new File("$outputDir/fastqc/sampleID_1_2_fastqc.html").exists() }, { assert new File("$outputDir/fastqc/sampleID_1a_1_fastqc.html").exists() }, @@ -46,8 +44,7 @@ nextflow_pipeline { { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, - { assert new File("$outputDir/summary_report/summary_report.html").exists() }, - { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } + { assert new File("$outputDir/summary_report/summary_report.html").exists() } ) } } From 1c129e568cb2cbba9c11af01584763e3bca96dfe Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 23:08:40 +0000 Subject: [PATCH 059/104] Looks like qiime2 tax alignment is non-deterministic, just verify the files it emits are emitted. --- tests/pipeline/qiimecustom.nf.test | 4 ++-- tests/pipeline/qiimecustom.nf.test.snap | 7 ------- tests/pipeline/reftaxcustom.nf.test | 4 ++-- tests/pipeline/reftaxcustom.nf.test.snap | 7 ------- 4 files changed, 4 insertions(+), 18 deletions(-) diff --git a/tests/pipeline/qiimecustom.nf.test b/tests/pipeline/qiimecustom.nf.test index 2cdc080a7..493968153 100644 --- a/tests/pipeline/qiimecustom.nf.test +++ b/tests/pipeline/qiimecustom.nf.test @@ -39,8 +39,8 @@ nextflow_pipeline { { assert new File("$outputDir/fastqc/sampleID_2a_1_fastqc.html").exists() }, { assert new File("$outputDir/fastqc/sampleID_2a_2_fastqc.html").exists() }, { assert snapshot(path("$outputDir/input/Samplesheet.tsv")).match("input") }, - { assert snapshot(path("$outputDir/qiime2/taxonomy/GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza"), - path("$outputDir/qiime2/taxonomy/taxonomy.tsv")).match("qiime2") }, + { assert new File("$outputDir/qiime2/taxonomy/taxonomy.tsv").exists() }, + { assert new File("$outputDir/qiime2/taxonomy/GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza").exists() }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, diff --git a/tests/pipeline/qiimecustom.nf.test.snap b/tests/pipeline/qiimecustom.nf.test.snap index 6c39df372..594688a92 100644 --- a/tests/pipeline/qiimecustom.nf.test.snap +++ b/tests/pipeline/qiimecustom.nf.test.snap @@ -32,13 +32,6 @@ ], "timestamp": "2023-05-28T21:18:54+0000" }, - "qiime2": { - "content": [ - "GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza:md5,b744a656dbd4e710697bf9ee47f26c87", - "taxonomy.tsv:md5,44585412583f0cf5f2b82a1337f16756" - ], - "timestamp": "2023-12-07T21:28:32+0000" - }, "multiqc": { "content": [ "multiqc_fastqc.txt:md5,147764e40079c3abf97a17cfe2275c52", diff --git a/tests/pipeline/reftaxcustom.nf.test b/tests/pipeline/reftaxcustom.nf.test index 3f72ec5f0..4e70861b6 100644 --- a/tests/pipeline/reftaxcustom.nf.test +++ b/tests/pipeline/reftaxcustom.nf.test @@ -44,8 +44,8 @@ nextflow_pipeline { { assert snapshot(path("$outputDir/kraken2/ASV_tax.user.kraken2.classifiedreads.txt"), path("$outputDir/kraken2/ASV_tax.user.kraken2.complete.tsv"), path("$outputDir/kraken2/ASV_tax.user.kraken2.tsv")).match("kraken2") }, - { assert snapshot(path("$outputDir/qiime2/taxonomy/GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza"), - path("$outputDir/qiime2/taxonomy/taxonomy.tsv")).match("qiime2") }, + { assert new File("$outputDir/qiime2/taxonomy/taxonomy.tsv").exists() }, + { assert new File("$outputDir/qiime2/taxonomy/GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza").exists() }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, diff --git a/tests/pipeline/reftaxcustom.nf.test.snap b/tests/pipeline/reftaxcustom.nf.test.snap index 8fca1c5b0..b5aa10f14 100644 --- a/tests/pipeline/reftaxcustom.nf.test.snap +++ b/tests/pipeline/reftaxcustom.nf.test.snap @@ -50,13 +50,6 @@ ], "timestamp": "2023-09-15T21:16:26+0000" }, - "qiime2": { - "content": [ - "GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza:md5,e3d3758fb6fb6b1d8e43dd1be578979b", - "taxonomy.tsv:md5,ad6793c7689b0a5e40169d2799564645" - ], - "timestamp": "2023-12-07T21:28:32+0000" - }, "multiqc": { "content": [ "multiqc_fastqc.txt:md5,147764e40079c3abf97a17cfe2275c52", From 04af15445a4aaae6f53ac17790c3be49b0e7e1b6 Mon Sep 17 00:00:00 2001 From: Matthew Date: Fri, 8 Dec 2023 14:18:19 +0000 Subject: [PATCH 060/104] Update CHANGELOG.md Co-authored-by: Daniel Straub <42973691+d4straub@users.noreply.github.com> --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7639cf7cc..14502e4f2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` - [#672](https://github.com/nf-core/ampliseq/pull/672) - Update output docs for collapsed abundance tables -- []() - Fix logic relating to generation of qiime2 taxonomy part of summary report +- [#673](https://github.com/nf-core/ampliseq/pull/673) - Fix logic relating to generation of qiime2 taxonomy part of summary report ### `Dependencies` From 2ace59599e1c0c5ed19ffabf03ce2adb5a34c428 Mon Sep 17 00:00:00 2001 From: Matthew Date: Fri, 8 Dec 2023 14:26:03 +0000 Subject: [PATCH 061/104] Make --skip_qiime_downstream help text clearer. Co-authored-by: Daniel Straub <42973691+d4straub@users.noreply.github.com> --- nextflow_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 6ccfc3ad1..14eef4b45 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -658,7 +658,7 @@ }, "skip_qiime_downstream": { "type": "boolean", - "description": "Skip steps that are executed by QIIME2 except for taxonomic classification, including barplots, relative abundance tables, diversity analysis, differential abundance testing." + "description": "Skip steps that are executed by QIIME2 except for taxonomic classification. Skip steps including barplots, relative abundance tables, diversity analysis, differential abundance testing." }, "skip_taxonomy": { "type": "boolean", From 4464c38cef7be3e9309c3d036fda7172aba130a4 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Fri, 8 Dec 2023 14:48:12 +0000 Subject: [PATCH 062/104] Remove assertion on qiime phyloseq file no longer produced. --- tests/pipeline/pplace.nf.test | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/pipeline/pplace.nf.test b/tests/pipeline/pplace.nf.test index 564cf2b9b..b0507df75 100644 --- a/tests/pipeline/pplace.nf.test +++ b/tests/pipeline/pplace.nf.test @@ -56,8 +56,7 @@ nextflow_pipeline { { assert new File("$outputDir/pplace/test_pplace.graft.test_pplace.epa_result.newick").exists() }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, - { assert new File("$outputDir/summary_report/summary_report.html").exists() }, - { assert new File("$outputDir/phyloseq/qiime2_phyloseq.rds").exists() } + { assert new File("$outputDir/summary_report/summary_report.html").exists() } ) } } From f1a86851942218bd72c33c0358b784c1e8207f4f Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Mon, 11 Dec 2023 16:37:09 +0100 Subject: [PATCH 063/104] Move midori to get dbs in alphabetical order --- conf/ref_databases.config | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/conf/ref_databases.config b/conf/ref_databases.config index c80820ecc..4ac7d02c0 100644 --- a/conf/ref_databases.config +++ b/conf/ref_databases.config @@ -25,22 +25,6 @@ params { fmtscript = "taxref_reformat_coidb.sh" dbversion = "COIDB 221216 (https://doi.org/10.17044/scilifelab.20514192.v2)" } - 'midori2-co1' { - title = "MIDORI2 - CO1 Taxonomy Database - Release GB250" - file = [ "http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz" ] - citation = "Machida RJ, Leray M, Ho SL, Knowlton N. Metazoan mitochondrial gene sequence reference datasets for taxonomic assignment of environmental samples. Sci Data. 2017 Mar 14;4:170027. doi: 10.1038/sdata.2017.27. PMID: 28291235; PMCID: PMC5349245." - fmtscript = "taxref_reformat_midori2.sh" - dbversion = "MIDORI2-CO1 GB250 (http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz)" - taxlevels = "Phylum,Class,Order,Family,Genus,Species" - } - 'midori2-co1=gb250' { - title = "MIDORI2 - CO1 Taxonomy Database - Release GB250" - file = [ "http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz" ] - citation = "Machida RJ, Leray M, Ho SL, Knowlton N. Metazoan mitochondrial gene sequence reference datasets for taxonomic assignment of environmental samples. Sci Data. 2017 Mar 14;4:170027. doi: 10.1038/sdata.2017.27. PMID: 28291235; PMCID: PMC5349245." - fmtscript = "taxref_reformat_midori2.sh" - dbversion = "MIDORI2-CO1 GB250 (http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz)" - taxlevels = "Phylum,Class,Order,Family,Genus,Species" - } 'gtdb' { title = "GTDB - Genome Taxonomy Database - Release R08-RS214.1" file = [ "https://data.ace.uq.edu.au/public/gtdb/data/releases/release214/214.1/genomic_files_reps/bac120_ssu_reps_r214.tar.gz", "https://data.ace.uq.edu.au/public/gtdb/data/releases/release214/214.1/genomic_files_reps/ar53_ssu_reps_r214.tar.gz" ] @@ -76,6 +60,22 @@ params { fmtscript = "taxref_reformat_gtdb.sh" dbversion = "GTDB R05-RS95 (https://data.ace.uq.edu.au/public/gtdb/data/releases/release95/95.0/)" } + 'midori2-co1' { + title = "MIDORI2 - CO1 Taxonomy Database - Release GB250" + file = [ "http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz" ] + citation = "Machida RJ, Leray M, Ho SL, Knowlton N. Metazoan mitochondrial gene sequence reference datasets for taxonomic assignment of environmental samples. Sci Data. 2017 Mar 14;4:170027. doi: 10.1038/sdata.2017.27. PMID: 28291235; PMCID: PMC5349245." + fmtscript = "taxref_reformat_midori2.sh" + dbversion = "MIDORI2-CO1 GB250 (http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz)" + taxlevels = "Phylum,Class,Order,Family,Genus,Species" + } + 'midori2-co1=gb250' { + title = "MIDORI2 - CO1 Taxonomy Database - Release GB250" + file = [ "http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz" ] + citation = "Machida RJ, Leray M, Ho SL, Knowlton N. Metazoan mitochondrial gene sequence reference datasets for taxonomic assignment of environmental samples. Sci Data. 2017 Mar 14;4:170027. doi: 10.1038/sdata.2017.27. PMID: 28291235; PMCID: PMC5349245." + fmtscript = "taxref_reformat_midori2.sh" + dbversion = "MIDORI2-CO1 GB250 (http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz)" + taxlevels = "Phylum,Class,Order,Family,Genus,Species" + } 'pr2' { title = "PR2 - Protist Reference Ribosomal Database - Version 5.0.0" file = [ "https://github.com/pr2database/pr2database/releases/download/v5.0.0/pr2_version_5.0.0_SSU_dada2.fasta.gz", "https://github.com/pr2database/pr2database/releases/download/v5.0.0/pr2_version_5.0.0_SSU_UTAX.fasta.gz" ] From 54debbf2d49da6900c7a8e2302520c6007a40670 Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Mon, 11 Dec 2023 17:13:56 +0100 Subject: [PATCH 064/104] Sort dbs in alphabetical order in schema --- nextflow_schema.json | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 1d3098da5..27e524c31 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -335,36 +335,36 @@ "description": "Name of supported database, and optionally also version number", "default": "silva=138", "enum": [ - "midori2-co1=gb250", - "midori2-co1", + "coidb", + "coidb=221216", + "gtdb", "gtdb=R05-RS95", "gtdb=R06-RS202", "gtdb=R07-RS207", "gtdb=R08-RS214", - "gtdb", - "coidb", - "coidb=221216", - "pr2=5.0.0", - "pr2=4.14.0", - "pr2=4.13.0", + "midori2-co1", + "midori2-co1=gb250", "pr2", - "rdp=18", + "pr2=4.13.0", + "pr2=4.14.0", + "pr2=5.0.0", "rdp", + "rdp=18", "sbdi-gtdb", - "sbdi-gtdb=R07-RS207-1", - "sbdi-gtdb=R06-RS202-3", "sbdi-gtdb=R06-RS202-1", + "sbdi-gtdb=R06-RS202-3", + "sbdi-gtdb=R07-RS207-1", + "silva", "silva=132", "silva=138", - "silva", - "unite-fungi=9.0", - "unite-fungi=8.3", - "unite-fungi=8.2", - "unite-fungi", - "unite-alleuk=9.0", - "unite-alleuk=8.3", + "unite-alleuk", "unite-alleuk=8.2", - "unite-alleuk" + "unite-alleuk=8.3", + "unite-alleuk=9.0", + "unite-fungi", + "unite-fungi=8.2", + "unite-fungi=8.3", + "unite-fungi=9.0" ] }, "dada_ref_tax_custom": { From fac3d731df0efc94d64323e6f7d171d1d60cd4f4 Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Mon, 11 Dec 2023 17:22:51 +0100 Subject: [PATCH 065/104] Spelling mistake --- docs/output.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/output.md b/docs/output.md index 62433ceac..c2b935373 100644 --- a/docs/output.md +++ b/docs/output.md @@ -140,7 +140,7 @@ DADA2 reduces sequence errors and dereplicates sequences by quality filtering, d - `ASV_table.tsv`: Counts for each ASV sequence. - `DADA2_stats.tsv`: Tracking read numbers through DADA2 processing steps, for each sample. - `DADA2_table.rds`: DADA2 ASV table as R object. - - `DADA2_tables.tsv`: DADA2 ASV table. + - `DADA2_table.tsv`: DADA2 ASV table. - `dada2/args/`: Directory containing files with all parameters for DADA2 steps. - `dada2/log/`: Directory containing log files for DADA2 steps. - `dada2/QC/` From f61d94547539f20c6ac26fb6bbba70022ea2fd32 Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Mon, 11 Dec 2023 17:23:08 +0100 Subject: [PATCH 066/104] Add PhytoRef taxonomy database for chloroplast 16S --- bin/taxref_reformat_phytoref.sh | 7 +++++++ conf/ref_databases.config | 8 ++++++++ nextflow_schema.json | 1 + 3 files changed, 16 insertions(+) create mode 100755 bin/taxref_reformat_phytoref.sh diff --git a/bin/taxref_reformat_phytoref.sh b/bin/taxref_reformat_phytoref.sh new file mode 100755 index 000000000..c61c081ed --- /dev/null +++ b/bin/taxref_reformat_phytoref.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +# Write the assignTaxonomy() fasta file: assignTaxonomy.fna +cat PhytoRef_with_taxonomy.fasta | sed '/>/s/>[^|]*|/>/' | sed '/>/s/|/;/g' > assignTaxonomy.fna + +# Write the addSpecies() fasta file: addSpecies.fna +cat PhytoRef_with_taxonomy.fasta | sed '/^>/s/>\([^|]\+\)|.*|\([^|]\+\)/>\1 \2/' > addSpecies.fna diff --git a/conf/ref_databases.config b/conf/ref_databases.config index 4ac7d02c0..65e0b9b11 100644 --- a/conf/ref_databases.config +++ b/conf/ref_databases.config @@ -76,6 +76,14 @@ params { dbversion = "MIDORI2-CO1 GB250 (http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz)" taxlevels = "Phylum,Class,Order,Family,Genus,Species" } + 'phytoref' { + title = "PhytoRef plastid 16S rRNA database for photosynthetic eukaryotes" + file = [ "http://phytoref.sb-roscoff.fr/static/downloads/PhytoRef_with_taxonomy.fasta" ] + citation = "Decelle, Johan, Sarah Romac, Rowena F. Stern, El Mahdi Bendif, Adriana Zingone, Stéphane Audic, Michael D. Guiry, et al. 2015. PhytoREF: A Reference Database of the Plastidial 16S rRNA Gene of Photosynthetic Eukaryotes with Curated Taxonomy. Molecular Ecology Resources 15 (6): 1435–45. https://doi.org/10.1111/1755-0998.12401." + fmtscript = "taxref_reformat_phytoref.sh" + dbversion = "unknown" + taxlevels = "Domain,Supergroup,Subphylum,Class,Subclass,Order,Suborder,Family,Genus,Species" + } 'pr2' { title = "PR2 - Protist Reference Ribosomal Database - Version 5.0.0" file = [ "https://github.com/pr2database/pr2database/releases/download/v5.0.0/pr2_version_5.0.0_SSU_dada2.fasta.gz", "https://github.com/pr2database/pr2database/releases/download/v5.0.0/pr2_version_5.0.0_SSU_UTAX.fasta.gz" ] diff --git a/nextflow_schema.json b/nextflow_schema.json index 27e524c31..9494e8f49 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -344,6 +344,7 @@ "gtdb=R08-RS214", "midori2-co1", "midori2-co1=gb250", + "phytoref", "pr2", "pr2=4.13.0", "pr2=4.14.0", From 58b0f7222e6409fdc1af4b085018de303376a3db Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Mon, 11 Dec 2023 17:26:50 +0100 Subject: [PATCH 067/104] Update CHANGELOG.md --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b08795266..3f31da024 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +- [#674](https://github.com/nf-core/ampliseq/pull/674) - Add PhytoRef database for DADA2 taxonomy assignment using `--dada_ref_taxonomy phytoref` + ### `Changed` ### `Fixed` From 836bb451d02842b3b6f435161e61707223e983af Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Mon, 11 Dec 2023 19:19:24 +0100 Subject: [PATCH 068/104] Add Zehr lab nifH database --- bin/taxref_reformat_zehr-nifh.sh | 7 +++++++ conf/ref_databases.config | 16 ++++++++++++++++ nextflow_schema.json | 4 +++- 3 files changed, 26 insertions(+), 1 deletion(-) create mode 100755 bin/taxref_reformat_zehr-nifh.sh diff --git a/bin/taxref_reformat_zehr-nifh.sh b/bin/taxref_reformat_zehr-nifh.sh new file mode 100755 index 000000000..86a8eb264 --- /dev/null +++ b/bin/taxref_reformat_zehr-nifh.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +# Write the assignTaxonomy() fasta file: assignTaxonomy.fna +cp *.fasta assignTaxonomy.fna + +# Write the addSpecies() fasta file: addSpecies.fna +cut -d, -f 2,6,7 *.csv | grep -v '^sequence,' | sed 's/\(.*\),\(.*\),\(.*\)/>\3 \2\n\1/' > addSpecies.fna diff --git a/conf/ref_databases.config b/conf/ref_databases.config index 65e0b9b11..6d0d7d3b2 100644 --- a/conf/ref_databases.config +++ b/conf/ref_databases.config @@ -247,6 +247,22 @@ params { dbversion = "UNITE-alleuk v8.2 (https://doi.org/10.15156/BIO/786370)" shfile = [ "https://scilifelab.figshare.com/ndownloader/files/34994569", "https://scilifelab.figshare.com/ndownloader/files/34994572"] } + 'zehr-nifh' { + title = "Zehr lab nifH database - version 2.5.0" + file = [ "https://raw.githubusercontent.com/moyn413/nifHdada2/master/nifH_dada2_v2.0.5.fasta", "https://raw.githubusercontent.com/moyn413/nifHdada2/master/nifH_dada2_phylum_v2.0.5.csv" ] + citation = "M. A. Moynihan & C. Furbo Reeder 2023. nifHdada2 GitHub repository, v2.0.5. Zenodo. http://doi.org/10.5281/zenodo.7996213" + fmtscript = "taxref_reformat_zehr-nifh.sh" + dbversion = "Zehr-nifH v. 2.5.0" + taxlevels = "Domain,Phylum,Class,Order,Family,Genus" + } + 'zehr-nifh=2.5.0' { + title = "Zehr lab nifH database - version 2.5.0" + file = [ "https://raw.githubusercontent.com/moyn413/nifHdada2/master/nifH_dada2_v2.0.5.fasta", "https://raw.githubusercontent.com/moyn413/nifHdada2/master/nifH_dada2_phylum_v2.0.5.csv" ] + citation = "M. A. Moynihan & C. Furbo Reeder 2023. nifHdada2 GitHub repository, v2.0.5. Zenodo. http://doi.org/10.5281/zenodo.7996213" + fmtscript = "taxref_reformat_zehr-nifh.sh" + dbversion = "Zehr-nifH v. 2.5.0" + taxlevels = "Domain,Phylum,Class,Order,Family,Genus" + } } //QIIME2 taxonomic reference databases qiime_ref_databases { diff --git a/nextflow_schema.json b/nextflow_schema.json index 9494e8f49..d2e5faa92 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -365,7 +365,9 @@ "unite-fungi", "unite-fungi=8.2", "unite-fungi=8.3", - "unite-fungi=9.0" + "unite-fungi=9.0", + "zehr-nifh", + "zehr-nifh=2.5.0" ] }, "dada_ref_tax_custom": { From e4a09ce0d243a1016e18986d6ac3d094ad3c2235 Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Mon, 11 Dec 2023 19:23:52 +0100 Subject: [PATCH 069/104] CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f31da024..5a1a239ff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` - [#674](https://github.com/nf-core/ampliseq/pull/674) - Add PhytoRef database for DADA2 taxonomy assignment using `--dada_ref_taxonomy phytoref` +- [#675](https://github.com/nf-core/ampliseq/pull/675) - Add the Zehr lab nifH database for DADA2 taxonomy assignment using `--dada_ref_taxonomy zehr-nifh` ### `Changed` From 02a25293d77cc501d01ed87de01013909037cea1 Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Mon, 11 Dec 2023 19:39:53 +0100 Subject: [PATCH 070/104] Fix formatting mistake --- bin/taxref_reformat_zehr-nifh.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/taxref_reformat_zehr-nifh.sh b/bin/taxref_reformat_zehr-nifh.sh index 86a8eb264..54171f51b 100755 --- a/bin/taxref_reformat_zehr-nifh.sh +++ b/bin/taxref_reformat_zehr-nifh.sh @@ -4,4 +4,4 @@ cp *.fasta assignTaxonomy.fna # Write the addSpecies() fasta file: addSpecies.fna -cut -d, -f 2,6,7 *.csv | grep -v '^sequence,' | sed 's/\(.*\),\(.*\),\(.*\)/>\3 \2\n\1/' > addSpecies.fna +cut -d, -f 2,6,7 *.csv | grep -v '^sequence,' | sed 's/\(.*\),[0-9]* \(.*\),\(.*\)/>\3 \2\n\1/' > addSpecies.fna From 09ae4b7f9e0032043358cccc35c22e4b15845eee Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Tue, 12 Dec 2023 10:27:38 +0100 Subject: [PATCH 071/104] Change unknown to unnversioned for PhytoRef --- conf/ref_databases.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/ref_databases.config b/conf/ref_databases.config index 6d0d7d3b2..263f43907 100644 --- a/conf/ref_databases.config +++ b/conf/ref_databases.config @@ -81,7 +81,7 @@ params { file = [ "http://phytoref.sb-roscoff.fr/static/downloads/PhytoRef_with_taxonomy.fasta" ] citation = "Decelle, Johan, Sarah Romac, Rowena F. Stern, El Mahdi Bendif, Adriana Zingone, Stéphane Audic, Michael D. Guiry, et al. 2015. PhytoREF: A Reference Database of the Plastidial 16S rRNA Gene of Photosynthetic Eukaryotes with Curated Taxonomy. Molecular Ecology Resources 15 (6): 1435–45. https://doi.org/10.1111/1755-0998.12401." fmtscript = "taxref_reformat_phytoref.sh" - dbversion = "unknown" + dbversion = "unversioned" taxlevels = "Domain,Supergroup,Subphylum,Class,Subclass,Order,Suborder,Family,Genus,Species" } 'pr2' { From 9a646392966ff594af828c746d99ecffb2361a20 Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 12 Dec 2023 10:46:36 +0100 Subject: [PATCH 072/104] fix phyloseq --- CHANGELOG.md | 1 + subworkflows/local/phyloseq_workflow.nf | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b08795266..ee5150f5b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` - [#672](https://github.com/nf-core/ampliseq/pull/672) - Update output docs for collapsed abundance tables +- [#676](https://github.com/nf-core/ampliseq/pull/676) - Phyloseq sometimes only produced one of multiple output files ### `Dependencies` diff --git a/subworkflows/local/phyloseq_workflow.nf b/subworkflows/local/phyloseq_workflow.nf index adf208b70..214656e39 100644 --- a/subworkflows/local/phyloseq_workflow.nf +++ b/subworkflows/local/phyloseq_workflow.nf @@ -36,7 +36,7 @@ workflow PHYLOSEQ_WORKFLOW { ch_phyloseq_inasv = ch_tsv } - PHYLOSEQ ( ch_tax, ch_phyloseq_inasv, ch_phyloseq_inmeta, ch_phyloseq_intree ) + PHYLOSEQ ( ch_tax, ch_phyloseq_inasv.first(), ch_phyloseq_inmeta, ch_phyloseq_intree ) emit: rds = PHYLOSEQ.out.rds From c3977cb13f8b8ce8f8605847aebbfec001b46290 Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 12 Dec 2023 12:26:44 +0100 Subject: [PATCH 073/104] use .collect() to avoid warning and adjust tests --- subworkflows/local/phyloseq_workflow.nf | 2 +- tests/pipeline/pplace.nf.test | 3 ++- tests/pipeline/reftaxcustom.nf.test | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/subworkflows/local/phyloseq_workflow.nf b/subworkflows/local/phyloseq_workflow.nf index 214656e39..2401cf44b 100644 --- a/subworkflows/local/phyloseq_workflow.nf +++ b/subworkflows/local/phyloseq_workflow.nf @@ -36,7 +36,7 @@ workflow PHYLOSEQ_WORKFLOW { ch_phyloseq_inasv = ch_tsv } - PHYLOSEQ ( ch_tax, ch_phyloseq_inasv.first(), ch_phyloseq_inmeta, ch_phyloseq_intree ) + PHYLOSEQ ( ch_tax, ch_phyloseq_inasv.collect(), ch_phyloseq_inmeta, ch_phyloseq_intree ) emit: rds = PHYLOSEQ.out.rds diff --git a/tests/pipeline/pplace.nf.test b/tests/pipeline/pplace.nf.test index 564cf2b9b..781d3dcd9 100644 --- a/tests/pipeline/pplace.nf.test +++ b/tests/pipeline/pplace.nf.test @@ -57,7 +57,8 @@ nextflow_pipeline { { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, { assert new File("$outputDir/summary_report/summary_report.html").exists() }, - { assert new File("$outputDir/phyloseq/qiime2_phyloseq.rds").exists() } + { assert new File("$outputDir/phyloseq/qiime2_phyloseq.rds").exists() }, + { assert new File("$outputDir/phyloseq/pplace_phyloseq.rds").exists() } ) } } diff --git a/tests/pipeline/reftaxcustom.nf.test b/tests/pipeline/reftaxcustom.nf.test index abd2a38a2..67c4d5468 100644 --- a/tests/pipeline/reftaxcustom.nf.test +++ b/tests/pipeline/reftaxcustom.nf.test @@ -48,7 +48,8 @@ nextflow_pipeline { path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, { assert new File("$outputDir/summary_report/summary_report.html").exists() }, - { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } + { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() }, + { assert new File("$outputDir/phyloseq/kraken2_phyloseq.rds").exists() } ) } } From a32b58251d7bf7edce7103b1314b03eb029a10d2 Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 12 Dec 2023 12:34:21 +0100 Subject: [PATCH 074/104] combine channels instead of using .collect() --- modules/local/phyloseq.nf | 3 +-- subworkflows/local/phyloseq_workflow.nf | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/modules/local/phyloseq.nf b/modules/local/phyloseq.nf index bbc6218b3..946c91fa0 100644 --- a/modules/local/phyloseq.nf +++ b/modules/local/phyloseq.nf @@ -8,8 +8,7 @@ process PHYLOSEQ { 'biocontainers/bioconductor-phyloseq:1.44.0--r43hdfd78af_0' }" input: - tuple val(prefix), path(tax_tsv) - path otu_tsv + tuple val(prefix), path(tax_tsv), path(otu_tsv) path sam_tsv path tree diff --git a/subworkflows/local/phyloseq_workflow.nf b/subworkflows/local/phyloseq_workflow.nf index 2401cf44b..3b6d9dd46 100644 --- a/subworkflows/local/phyloseq_workflow.nf +++ b/subworkflows/local/phyloseq_workflow.nf @@ -36,7 +36,7 @@ workflow PHYLOSEQ_WORKFLOW { ch_phyloseq_inasv = ch_tsv } - PHYLOSEQ ( ch_tax, ch_phyloseq_inasv.collect(), ch_phyloseq_inmeta, ch_phyloseq_intree ) + PHYLOSEQ ( ch_tax.combine(ch_phyloseq_inasv), ch_phyloseq_inmeta, ch_phyloseq_intree ) emit: rds = PHYLOSEQ.out.rds From 9550aecd328d61feaf0fb3efdbce52f1a6d9d74c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jeanette=20T=C3=A5ngrot?= Date: Tue, 12 Dec 2023 15:06:44 +0100 Subject: [PATCH 075/104] Add cut_its to SBDI export --- bin/sbdiexportreannotate.R | 13 ++++++++----- modules/local/sbdiexportreannotate.nf | 3 ++- workflows/ampliseq.nf | 4 ++-- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/bin/sbdiexportreannotate.R b/bin/sbdiexportreannotate.R index 19d5e3ae6..68a2b3928 100755 --- a/bin/sbdiexportreannotate.R +++ b/bin/sbdiexportreannotate.R @@ -18,7 +18,10 @@ dbversion <- args[1] taxfile <- args[2] taxmethod <- args[3] wfversion <- args[4] -predfile <- args[5] +cut_its <- args[5] +predfile <- args[6] + +cut_its = ifelse(cut_its == 'none', '', paste(' cut_its:', cut_its, sep='')) # Read taxonomy table taxonomy <- read.delim(taxfile, sep = '\t', stringsAsFactors = FALSE) @@ -108,10 +111,10 @@ taxtable <- taxonomy %>% date_identified = as.character(lubridate::today()), reference_db = dbversion, annotation_algorithm = case_when( - (taxmethod == 'sintax') ~ paste('Ampliseq',wfversion,'(https://nf-co.re/ampliseq) VSEARCH:sintax', sep=' '), - (!(is.na(otu) | otu == '')) ~ paste('Ampliseq',wfversion,'(https://nf-co.re/ampliseq) addsh', sep=' '), - (!(is.na(species_exact) | species_exact == '')) ~ paste('Ampliseq',wfversion,'(https://nf-co.re/ampliseq) DADA2:assignTaxonomy:addSpecies', sep=' '), - TRUE ~ paste('Ampliseq',wfversion,'(https://nf-co.re/ampliseq) DADA2:assignTaxonomy', sep=' ') + (taxmethod == 'sintax') ~ paste('Ampliseq ',wfversion,' (https://nf-co.re/ampliseq) VSEARCH:sintax',cut_its, sep=' '), + (!(is.na(otu) | otu == '')) ~ paste('Ampliseq ',wfversion,' (https://nf-co.re/ampliseq) addsh',cut_its, sep=' '), + (!(is.na(species_exact) | species_exact == '')) ~ paste('Ampliseq ',wfversion,' (https://nf-co.re/ampliseq) DADA2:assignTaxonomy:addSpecies',cut_its, sep=' '), + TRUE ~ paste('Ampliseq ',wfversion,' (https://nf-co.re/ampliseq) DADA2:assignTaxonomy',cut_its, sep='') ), identification_references = 'https://docs.biodiversitydata.se/analyse-data/molecular-tools/#taxonomy-annotation', taxon_remarks = ifelse(!(is.na(domain) | domain == ''), paste('Domain = \'',domain,'\'',sep=''),''), diff --git a/modules/local/sbdiexportreannotate.nf b/modules/local/sbdiexportreannotate.nf index f06fae364..8ebe870ce 100644 --- a/modules/local/sbdiexportreannotate.nf +++ b/modules/local/sbdiexportreannotate.nf @@ -11,6 +11,7 @@ process SBDIEXPORTREANNOTATE { path taxonomytable val taxonomymethod val dbversion + val cut_its path predictions output: @@ -28,7 +29,7 @@ process SBDIEXPORTREANNOTATE { ampliseq_version="v$workflow.manifest.version" fi - sbdiexportreannotate.R \"$dbversion\" $taxonomytable $taxonomymethod \"\$ampliseq_version\" $predictions + sbdiexportreannotate.R \"$dbversion\" $taxonomytable $taxonomymethod \"\$ampliseq_version\" $cut_its $predictions cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 05ddfee76..76a07726b 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -722,11 +722,11 @@ workflow AMPLISEQ { if ( params.sintax_ref_taxonomy ) { SBDIEXPORT ( ch_dada2_asv, ch_sintax_tax, ch_metadata ) db_version = params.sintax_ref_databases[params.sintax_ref_taxonomy]["dbversion"] - SBDIEXPORTREANNOTATE ( ch_sintax_tax, "sintax", db_version, ch_barrnapsummary.ifEmpty([]) ) + SBDIEXPORTREANNOTATE ( ch_sintax_tax, "sintax", db_version, params.cut_its, ch_barrnapsummary.ifEmpty([]) ) } else { SBDIEXPORT ( ch_dada2_asv, ch_dada2_tax, ch_metadata ) db_version = params.dada_ref_databases[params.dada_ref_taxonomy]["dbversion"] - SBDIEXPORTREANNOTATE ( ch_dada2_tax, "dada2", db_version, ch_barrnapsummary.ifEmpty([]) ) + SBDIEXPORTREANNOTATE ( ch_dada2_tax, "dada2", db_version, params.cut_its, ch_barrnapsummary.ifEmpty([]) ) } ch_versions = ch_versions.mix(SBDIEXPORT.out.versions.first()) } From cb517396405c2217968f6047e51040f0a8e65430 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jeanette=20T=C3=A5ngrot?= Date: Tue, 12 Dec 2023 15:32:49 +0100 Subject: [PATCH 076/104] Update CHANGELOG --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b08795266..9da4a49bb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` +- [#](https://github.com/nf-core/ampliseq/pull/) - Added cut_its information to SDBI export + ### `Fixed` - [#672](https://github.com/nf-core/ampliseq/pull/672) - Update output docs for collapsed abundance tables From 667189cda1217b5e3fd9ad0b149ef306f0e52b59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jeanette=20T=C3=A5ngrot?= Date: Tue, 12 Dec 2023 15:38:43 +0100 Subject: [PATCH 077/104] Update CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4bdb1b62b..205f686d3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` -- [#](https://github.com/nf-core/ampliseq/pull/) - Added cut_its information to SDBI export +- [#677](https://github.com/nf-core/ampliseq/pull/677) - Added cut_its information to SDBI export ### `Fixed` From 6b71e4d2500e72ac3eda29d80f0654ed7e5fa481 Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 19 Dec 2023 10:19:46 +0100 Subject: [PATCH 078/104] Fix reporting --- assets/report_template.Rmd | 12 +++++++++--- modules/local/summary_report.nf | 3 ++- workflows/ampliseq.nf | 2 +- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 8c8fc21e7..264a71493 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -980,9 +980,15 @@ cat("\n\nDADA2 taxonomy assignments can be found in folder [dada2](../dada2) in # Header cat("## QIIME2\n") -cat("The taxonomic classification was performed by [QIIME2](https://www.nature.com/articles/s41587-019-0209-9) - using the database: `", params$qiime2_ref_tax_title, "`. - More details about the reference taxonomy database can be found in the ['Methods section'](#methods).\n\n", sep = "") +# indicate reference taxonomy +if ( !isFALSE(params$qiime2_ref_tax_title) ) { + cat("The taxonomic classification was performed by [QIIME2](https://www.nature.com/articles/s41587-019-0209-9) + using the database: `", params$qiime2_ref_tax_title, "`. + More details about the reference taxonomy database can be found in the ['Methods section'](#methods).\n\n", sep = "") +} else { + cat("The taxonomic classification was performed by [QIIME2](https://www.nature.com/articles/s41587-019-0209-9) using a custom database ", + "provided by the user.\n\n", sep = "") +} # Read file and prepare table asv_tax <- read.table(params$qiime2_taxonomy, header = TRUE, sep = "\t") diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index a8e082b01..1a288a0fb 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -118,7 +118,8 @@ process SUMMARY_REPORT { kraken2_tax ? "kraken2_taxonomy='$kraken2_tax',kraken2_confidence='$params.kraken2_confidence'" : "", kraken2_tax && !params.kraken2_ref_tax_custom ? "kraken2_ref_tax_title='${params.kraken2_ref_databases[params.kraken2_ref_taxonomy]["title"]}',kraken2_ref_tax_file='${params.kraken2_ref_databases[params.kraken2_ref_taxonomy]["file"]}',kraken2_ref_tax_citation='${params.kraken2_ref_databases[params.kraken2_ref_taxonomy]["citation"]}'" : "", pplace_tax ? "pplace_taxonomy='$pplace_tax',pplace_heattree='$pplace_heattree'" : "", - qiime2_tax && params.qiime_ref_taxonomy ? "qiime2_taxonomy='$qiime2_tax',qiime2_ref_tax_title='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["title"]}',qiime2_ref_tax_file='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]}',qiime2_ref_tax_citation='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["citation"]}'" : qiime2_tax ? "qiime2_taxonomy='$qiime2_tax'" : "", + qiime2_tax ? "qiime2_taxonomy='$qiime2_tax'" : "", + qiime2_tax && params.qiime_ref_taxonomy ? "qiime2_ref_tax_title='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["title"]}',qiime2_ref_tax_file='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]}',qiime2_ref_tax_citation='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["citation"]}'" : "", run_qiime2 ? "val_used_taxonomy='$val_used_taxonomy'" : "", filter_stats_tsv ? "filter_stats_tsv='$filter_stats_tsv',qiime2_filtertaxa='$qiime2_filtertaxa',exclude_taxa='$params.exclude_taxa',min_frequency='$params.min_frequency',min_samples='$params.min_samples'" : "", barplot ? "barplot=TRUE" : "", diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 9e85bf6af..6dcc370f1 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -863,7 +863,7 @@ workflow AMPLISEQ { !params.skip_taxonomy && ( params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom ) ? KRAKEN2_TAXONOMY_WF.out.tax_tsv.ifEmpty( [] ) : [], !params.skip_taxonomy && params.pplace_tree ? ch_pplace_tax.ifEmpty( [] ) : [], !params.skip_taxonomy && params.pplace_tree ? FASTA_NEWICK_EPANG_GAPPA.out.heattree.ifEmpty( [[],[]] ) : [[],[]], - !params.skip_taxonomy && ( params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.classifier ) && run_qiime2 ? QIIME2_TAXONOMY.out.tsv.ifEmpty( [] ) : [], + !params.skip_taxonomy && ( params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.classifier ) && run_qiime2_taxonomy ? QIIME2_TAXONOMY.out.tsv.ifEmpty( [] ) : [], run_qiime2, run_qiime2 ? val_used_taxonomy : "", run_qiime2 && ( params.exclude_taxa != "none" || params.min_frequency != 1 || params.min_samples != 1 ) ? ch_dada2_asv.countLines()+","+QIIME2_FILTERTAXA.out.tsv.countLines() : "", From 67da9335ceb325b15939ad34d02e27cf4b599a9a Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 19 Dec 2023 11:00:07 +0100 Subject: [PATCH 079/104] Prevent masking low complexity regions by VSEARCH --- CHANGELOG.md | 1 + conf/modules.config | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ecf13f131..77c3a6bc0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#672](https://github.com/nf-core/ampliseq/pull/672) - Update output docs for collapsed abundance tables - [#673](https://github.com/nf-core/ampliseq/pull/673) - Fix logic relating to generation of qiime2 taxonomy part of summary report - [#676](https://github.com/nf-core/ampliseq/pull/676) - Phyloseq sometimes only produced one of multiple output files +- [#679](https://github.com/nf-core/ampliseq/pull/679) - Prevent masking low complexity regions by VSEARCH with lower case letters ### `Dependencies` diff --git a/conf/modules.config b/conf/modules.config index 68794ab70..69b7967c7 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -451,7 +451,7 @@ process { } withName: VSEARCH_CLUSTER { - ext.args = "--id ${params.vsearch_cluster_id} --usersort" + ext.args = '--id ${params.vsearch_cluster_id} --usersort --qmask "none"' ext.args2 = '--cluster_smallmem' ext.args3 = '--clusters' } From a1992a484a883b7bcc2531b112c2cc0f45743a3a Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 19 Dec 2023 11:08:55 +0100 Subject: [PATCH 080/104] fix error message for --input_folder --- subworkflows/local/parse_input.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/parse_input.nf b/subworkflows/local/parse_input.nf index ba8aa4846..ae134ae9d 100644 --- a/subworkflows/local/parse_input.nf +++ b/subworkflows/local/parse_input.nf @@ -11,7 +11,7 @@ workflow PARSE_INPUT { //Check folders in folder when multiple_sequencing_runs folders = multiple_sequencing_runs ? "/*" : "" error_message = "\nCannot find any reads matching: \"${input}${folders}${extension}\"\n" - error_message += "Please revise the input folder (\"--input\"): \"${input}\"\n" + error_message += "Please revise the input folder (\"--input_folder\"): \"${input}\"\n" error_message += "and the input file pattern (\"--extension\"): \"${extension}\"\n" error_message += "*Please note: Path needs to be enclosed in quotes!*\n" error_message += multiple_sequencing_runs ? "If you do not have multiple sequencing runs, please do not use \"--multiple_sequencing_runs\"!\n" : "If you have multiple sequencing runs, please add \"--multiple_sequencing_runs\"!\n" From 55116e04d53aae932153c2f67416211b25464ae1 Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 19 Dec 2023 13:08:17 +0100 Subject: [PATCH 081/104] Fix hyphens --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 69b7967c7..cbc04cd07 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -451,7 +451,7 @@ process { } withName: VSEARCH_CLUSTER { - ext.args = '--id ${params.vsearch_cluster_id} --usersort --qmask "none"' + ext.args = "--id ${params.vsearch_cluster_id} --usersort --qmask 'none'" ext.args2 = '--cluster_smallmem' ext.args3 = '--clusters' } From 51ee751e0bdacf590aab29fdaf15ad4220329e06 Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 19 Dec 2023 13:30:47 +0100 Subject: [PATCH 082/104] only report input folder when it is created --- assets/report_template.Rmd | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 264a71493..cf1fc5480 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -181,17 +181,13 @@ supporting denoising of any amplicon and supports a variety of taxonomic databas ```{r, results='asis'} if ( !isFALSE(params$metadata) ) { - cat(paste0(" -# Data input and Metadata - -Pipeline input was saved to the [input](../input) directory. - ")) + cat("# Data input and Metadata\n\n") } else { - cat(paste0(" -# Data input + cat("# Data input\n\n") +} -Pipeline input was saved in folder [input](../input). - ")) +if ( !isFALSE(params$metadata) || !isFALSE(params$input_samplesheet) ) { + cat("Pipeline input was saved in folder [input](../input).\n\n") } if ( !isFALSE(params$input_samplesheet) ) { From f572fafeceb7949351bd27c90bea468d2b9a9f01 Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 19 Dec 2023 13:42:50 +0100 Subject: [PATCH 083/104] report correct cutadapt percentages for all possible number ranges --- assets/report_template.Rmd | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index cf1fc5480..752a6b17a 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -258,8 +258,7 @@ the denoising tool or sequences might be lost due to being labelled as PCR chime # import tsv cutadapt_summary <- read.table(file = params$cutadapt_summary, header = TRUE, sep = "\t") -cutadapt_passed_col <- as.numeric(substr( - cutadapt_summary$cutadapt_passing_filters_percent, 1, 4)) +cutadapt_passed_col <- as.numeric( gsub("%","",cutadapt_summary$cutadapt_passing_filters_percent) ) cutadapt_max_discarded <- round( 100 - min(cutadapt_passed_col), 1 ) cutadapt_avg_passed <- round(mean(cutadapt_passed_col),1) From 13a6e5ec988505faf21306bd2b33646a5787b43c Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 19 Dec 2023 13:48:53 +0100 Subject: [PATCH 084/104] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ecf13f131..01ccf88ba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#672](https://github.com/nf-core/ampliseq/pull/672) - Update output docs for collapsed abundance tables - [#673](https://github.com/nf-core/ampliseq/pull/673) - Fix logic relating to generation of qiime2 taxonomy part of summary report - [#676](https://github.com/nf-core/ampliseq/pull/676) - Phyloseq sometimes only produced one of multiple output files +- [#680](https://github.com/nf-core/ampliseq/pull/680) - Improved pipeline summary report & error messages ### `Dependencies` From d0786b7a057e4478b67ffcba60d7dda3f038a9b1 Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 19 Dec 2023 14:54:40 +0100 Subject: [PATCH 085/104] add --dada_addspecies_allowmultiple and --dada_taxonomy_rc --- CHANGELOG.md | 1 + conf/modules.config | 9 ++++----- docs/usage.md | 8 ++++---- nextflow.config | 4 +++- nextflow_schema.json | 10 ++++++++++ 5 files changed, 22 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 77c3a6bc0..22dd3c526 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#667](https://github.com/nf-core/ampliseq/pull/667) - Added `--qiime_ref_tax_custom` to permit custom reference database for QIIME2 taxonomic classification - [#674](https://github.com/nf-core/ampliseq/pull/674) - Add PhytoRef database for DADA2 taxonomy assignment using `--dada_ref_taxonomy phytoref` - [#675](https://github.com/nf-core/ampliseq/pull/675) - Add the Zehr lab nifH database for DADA2 taxonomy assignment using `--dada_ref_taxonomy zehr-nifh` +- [#681](https://github.com/nf-core/ampliseq/pull/681) - For DADA2, with `--dada_addspecies_allowmultiple` multiple exact species matches are reported and with `--dada_taxonomy_rc` reverse-complement matches are also considered in taxonomic classification ### `Changed` diff --git a/conf/modules.config b/conf/modules.config index cbc04cd07..e02e93423 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -355,8 +355,7 @@ process { ext.seed = "${params.seed}" ext.args = [ 'minBoot = 50', - params.pacbio ? "tryRC = TRUE" : - params.iontorrent ? "tryRC = TRUE" : "" + params.dada_taxonomy_rc || params.pacbio || params.iontorrent ? "tryRC = TRUE" : "tryRC = FALSE" ].join(',').replaceAll('(,)*$', "") publishDir = [ [ @@ -375,9 +374,9 @@ process { withName: DADA2_ADDSPECIES { ext.seed = "${params.seed}" ext.args = [ - 'allowMultiple = FALSE, n = 1e5', - params.pacbio ? "tryRC = TRUE" : - params.iontorrent ? "tryRC = TRUE" : "" + 'n = 1e5', + params.dada_addspecies_allowmultiple ? "allowMultiple = TRUE" : "", + params.dada_taxonomy_rc || params.pacbio || params.iontorrent ? "tryRC = TRUE" : "tryRC = FALSE" ].join(',').replaceAll('(,)*$', "") publishDir = [ [ diff --git a/docs/usage.md b/docs/usage.md index 38c2cc23f..f8625b2f7 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -221,18 +221,18 @@ Pre-configured reference taxonomy databases are: | Database key | DADA2 | SINTAX | Kraken2 | QIIME2 | Target genes | | ------------ | ----- | ------ | ------- | ------ | --------------------------------------------- | | silva | + | - | + | + | 16S rRNA | -| gtdb | + | - | - | - | 16S rRNA | +| gtdb | +¹ | - | - | - | 16S rRNA | | sbdi-gtdb | + | - | - | - | 16S rRNA | | rdp | + | - | + | - | 16S rRNA | -| greengenes | - | - | + | (+)¹ | 16S rRNA | +| greengenes | - | - | + | (+)² | 16S rRNA | | pr2 | + | - | - | - | 18S rRNA | | unite-fungi | + | + | - | + | eukaryotic nuclear ribosomal ITS region | | unite-alleuk | + | + | - | + | eukaryotic nuclear ribosomal ITS region | | coidb | + | + | - | - | eukaryotic Cytochrome Oxidase I (COI) | | midori2-co1 | + | - | - | - | eukaryotic Cytochrome Oxidase I (COI) | -| standard | - | - | + | - | any in genomes of archaea, bacteria, viruses² | +| standard | - | - | + | - | any in genomes of archaea, bacteria, viruses³ | -¹: de-replicated at 85%, only for testing purposes; ²: quality of results might vary +¹[`--dada_taxonomy_rc`](https://nf-co.re/ampliseq/parameters#dada_taxonomy_rc) is recommended; ²: de-replicated at 85%, only for testing purposes; ³: quality of results might vary Special features of taxonomic classification tools: diff --git a/nextflow.config b/nextflow.config index 831a43a22..7f35f9e45 100644 --- a/nextflow.config +++ b/nextflow.config @@ -107,13 +107,15 @@ params { dada_ref_tax_custom = null dada_ref_tax_custom_sp = null cut_dada_ref_taxonomy = false + dada_addspecies_allowmultiple = false + dada_taxonomy_rc = false sintax_ref_taxonomy = null qiime_ref_taxonomy = null qiime_ref_tax_custom = null kraken2_ref_taxonomy = null kraken2_assign_taxlevels = null kraken2_ref_tax_custom = null - kraken2_confidence = 0 + kraken2_confidence = 0.0 // MultiQC options multiqc_config = null diff --git a/nextflow_schema.json b/nextflow_schema.json index b8afed35a..37d528fd1 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -390,6 +390,16 @@ "help_text": "Expected amplified sequences are extracted from the DADA2 reference taxonomy using the primer sequences, that might improve classification. This is not applied to species classification (assignSpecies) but only for lower taxonomic levels (assignTaxonomy).", "description": "If the expected amplified sequences are extracted from the DADA2 reference taxonomy database" }, + "dada_addspecies_allowmultiple": { + "type": "boolean", + "help_text": "Defines the behavior when multiple exact matches against different species are returned. By default only unambiguous identifications are returned. If TRUE, a concatenated string of all exactly matched species is returned.", + "description": "If multiple exact matches against different species are returned" + }, + "dada_taxonomy_rc": { + "type": "boolean", + "help_text": "Reverse-complement of each sequences will be used for classification if it is a better match to the reference sequences than the forward sequence.", + "description": "If reverse-complement of each sequences will be also tested for classification" + }, "pplace_tree": { "type": "string", "description": "Newick file with reference phylogenetic tree. Requires also `--pplace_aln` and `--pplace_model`." From 2edaa5a08de446166a1d7f1e367029a6fc4c1b26 Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Tue, 19 Dec 2023 18:30:49 +0000 Subject: [PATCH 086/104] Template update for nf-core/tools version 2.11 --- .github/CONTRIBUTING.md | 3 + .github/PULL_REQUEST_TEMPLATE.md | 1 + .github/workflows/ci.yml | 2 +- .github/workflows/fix-linting.yml | 4 +- .github/workflows/linting.yml | 12 +- .gitpod.yml | 4 +- CHANGELOG.md | 2 +- README.md | 17 +-- assets/multiqc_config.yml | 2 +- assets/slackreport.json | 2 +- conf/modules.config | 2 +- docs/usage.md | 4 +- lib/NfcoreTemplate.groovy | 32 ++--- modules.json | 6 +- .../dumpsoftwareversions/environment.yml | 7 ++ .../custom/dumpsoftwareversions/main.nf | 6 +- .../custom/dumpsoftwareversions/meta.yml | 7 +- .../dumpsoftwareversions/tests/main.nf.test | 38 ++++++ .../tests/main.nf.test.snap | 27 +++++ .../dumpsoftwareversions/tests/tags.yml | 2 + modules/nf-core/fastqc/environment.yml | 7 ++ modules/nf-core/fastqc/main.nf | 10 +- modules/nf-core/fastqc/meta.yml | 5 + modules/nf-core/fastqc/tests/main.nf.test | 109 ++++++++++++++++++ .../nf-core/fastqc/tests/main.nf.test.snap | 10 ++ modules/nf-core/fastqc/tests/tags.yml | 2 + modules/nf-core/multiqc/environment.yml | 7 ++ modules/nf-core/multiqc/main.nf | 8 +- modules/nf-core/multiqc/meta.yml | 11 +- modules/nf-core/multiqc/tests/main.nf.test | 63 ++++++++++ modules/nf-core/multiqc/tests/tags.yml | 2 + nextflow.config | 12 +- 32 files changed, 360 insertions(+), 66 deletions(-) create mode 100644 modules/nf-core/custom/dumpsoftwareversions/environment.yml create mode 100644 modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test create mode 100644 modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap create mode 100644 modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml create mode 100644 modules/nf-core/fastqc/environment.yml create mode 100644 modules/nf-core/fastqc/tests/main.nf.test create mode 100644 modules/nf-core/fastqc/tests/main.nf.test.snap create mode 100644 modules/nf-core/fastqc/tests/tags.yml create mode 100644 modules/nf-core/multiqc/environment.yml create mode 100644 modules/nf-core/multiqc/tests/main.nf.test create mode 100644 modules/nf-core/multiqc/tests/tags.yml diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index c1642f76b..e5e7a3d59 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -27,6 +27,9 @@ If you're not used to this workflow with git, you can start with some [docs from ## Tests +You can optionally test your changes by running the pipeline locally. Then it is recommended to use the `debug` profile to +receive warnings about process selectors and other debug info. Example: `nextflow run . -profile debug,test,docker --outdir `. + When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 636e0714b..ada206f79 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -19,6 +19,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/ampl - [ ] If necessary, also make a PR on the nf-core/ampliseq _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. - [ ] `CHANGELOG.md` is updated. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 53759d75f..435741ca9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -28,7 +28,7 @@ jobs: - "latest-everything" steps: - name: Check out pipeline code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml index ec23bef21..9781ad7c0 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix-linting.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest steps: # Use the @nf-core-bot token to check out so we can push later - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: token: ${{ secrets.nf_core_bot_auth_token }} @@ -24,7 +24,7 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} - - uses: actions/setup-node@v3 + - uses: actions/setup-node@v4 - name: Install Prettier run: npm install -g prettier @prettier/plugin-php diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index b8bdd2143..905c58e44 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -14,9 +14,9 @@ jobs: EditorConfig: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - - uses: actions/setup-node@v3 + - uses: actions/setup-node@v4 - name: Install editorconfig-checker run: npm install -g editorconfig-checker @@ -27,9 +27,9 @@ jobs: Prettier: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - - uses: actions/setup-node@v3 + - uses: actions/setup-node@v4 - name: Install Prettier run: npm install -g prettier @@ -40,7 +40,7 @@ jobs: PythonBlack: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Check code lints with Black uses: psf/black@stable @@ -71,7 +71,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 diff --git a/.gitpod.yml b/.gitpod.yml index 25488dcc0..acf726953 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -4,7 +4,9 @@ tasks: command: | pre-commit install --install-hooks nextflow self-update - + - name: unset JAVA_TOOL_OPTIONS + command: | + unset JAVA_TOOL_OPTIONS vscode: extensions: # based on nf-core.nf-core-extensionpack - codezombiech.gitignore # Language support for .gitignore files diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a8be928e..85cef0e2d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v2.7.0dev - [date] +## v2.8.0dev - [date] Initial release of nf-core/ampliseq, created with the [nf-core](https://nf-co.re/) template. diff --git a/README.md b/README.md index ccec01abd..d60f868d3 100644 --- a/README.md +++ b/README.md @@ -30,11 +30,8 @@ ## Usage -:::note -If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how -to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) -with `-profile test` before running the workflow on actual data. -::: +> [!NOTE] +> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. - + diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 64df13dbf..eaf9f190d 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,5 +1,5 @@ report_comment: > - This report has been generated by the nf-core/ampliseq + This report has been generated by the nf-core/ampliseq analysis pipeline. For information about how to interpret these results, please see the documentation. report_section_order: diff --git a/assets/slackreport.json b/assets/slackreport.json index b170caabe..6eab3738f 100644 --- a/assets/slackreport.json +++ b/assets/slackreport.json @@ -3,7 +3,7 @@ { "fallback": "Plain-text summary of the attachment.", "color": "<% if (success) { %>good<% } else { %>danger<%} %>", - "author_name": "nf-core/ampliseq v${version} - ${runName}", + "author_name": "nf-core/ampliseq ${version} - ${runName}", "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", "fields": [ diff --git a/conf/modules.config b/conf/modules.config index 39e813865..d91c6aba0 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -39,7 +39,7 @@ process { } withName: 'MULTIQC' { - ext.args = params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' + ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ path: { "${params.outdir}/multiqc" }, mode: params.publish_dir_mode, diff --git a/docs/usage.md b/docs/usage.md index c129febac..569ebf71a 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -20,7 +20,7 @@ You will need to create a samplesheet with information about the samples you wou The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes: -```console +```csv title="samplesheet.csv" sample,fastq_1,fastq_2 CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz @@ -33,7 +33,7 @@ The pipeline will auto-detect whether a sample is single- or paired-end using th A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice. -```console +```csv title="samplesheet.csv" sample,fastq_1,fastq_2 CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 01b8653d0..e248e4c3f 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -4,6 +4,7 @@ import org.yaml.snakeyaml.Yaml import groovy.json.JsonOutput +import nextflow.extension.FilesEx class NfcoreTemplate { @@ -141,12 +142,14 @@ class NfcoreTemplate { try { if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } [ 'sendmail', '-t' ].execute() << sendmail_html log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" } catch (all) { // Catch failures and try with plaintext def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) { + if ( mqc_report != null && mqc_report.size() <= max_multiqc_email_size.toBytes() ) { mail_cmd += [ '-A', mqc_report ] } mail_cmd.execute() << email_html @@ -155,14 +158,16 @@ class NfcoreTemplate { } // Write summary e-mail HTML to a file - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def output_hf = new File(output_d, "pipeline_report.html") + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") output_hf.withWriter { w -> w << email_html } - def output_tf = new File(output_d, "pipeline_report.txt") + FilesEx.copyTo(output_hf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.html"); + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") output_tf.withWriter { w -> w << email_txt } + FilesEx.copyTo(output_tf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.txt"); + output_tf.delete() } // @@ -227,15 +232,14 @@ class NfcoreTemplate { // Dump pipeline parameters in a json file // public static void dump_parameters(workflow, params) { - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') - def output_pf = new File(output_d, "params_${timestamp}.json") + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") def jsonStr = JsonOutput.toJson(params) - output_pf.text = JsonOutput.prettyPrint(jsonStr) + temp_pf.text = JsonOutput.prettyPrint(jsonStr) + + FilesEx.copyTo(temp_pf.toPath(), "${params.outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() } // diff --git a/modules.json b/modules.json index dca112895..d32b29575 100644 --- a/modules.json +++ b/modules.json @@ -7,17 +7,17 @@ "nf-core": { "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "bba7e362e4afead70653f84d8700588ea28d0f9e", "installed_by": ["modules"] }, "fastqc": { "branch": "master", - "git_sha": "bd8092b67b5103bdd52e300f75889442275c3117", + "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "4ab13872435962dadc239979554d13709e20bf29", "installed_by": ["modules"] } } diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml new file mode 100644 index 000000000..f0c63f698 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/environment.yml @@ -0,0 +1,7 @@ +name: custom_dumpsoftwareversions +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.17 diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index ebc872733..7685b33cd 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -2,10 +2,10 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda "bioconda::multiqc=1.14" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.17--pyhdfd78af_0' : + 'biocontainers/multiqc:1.17--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml index c32657de7..5f15a5fde 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml +++ b/modules/nf-core/custom/dumpsoftwareversions/meta.yml @@ -1,4 +1,4 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: custom_dumpsoftwareversions description: Custom module used to dump software versions within the nf-core pipeline template keywords: @@ -16,7 +16,6 @@ input: type: file description: YML file containing software versions pattern: "*.yml" - output: - yml: type: file @@ -30,7 +29,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@drpatelh" - "@grst" +maintainers: + - "@drpatelh" + - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test new file mode 100644 index 000000000..eec1db10a --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test @@ -0,0 +1,38 @@ +nextflow_process { + + name "Test Process CUSTOM_DUMPSOFTWAREVERSIONS" + script "../main.nf" + process "CUSTOM_DUMPSOFTWAREVERSIONS" + tag "modules" + tag "modules_nfcore" + tag "custom" + tag "dumpsoftwareversions" + tag "custom/dumpsoftwareversions" + + test("Should run without failures") { + when { + process { + """ + def tool1_version = ''' + TOOL1: + tool1: 0.11.9 + '''.stripIndent() + + def tool2_version = ''' + TOOL2: + tool2: 1.9 + '''.stripIndent() + + input[0] = Channel.of(tool1_version, tool2_version).collectFile() + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap new file mode 100644 index 000000000..4274ed57a --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap @@ -0,0 +1,27 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + "software_versions.yml:md5,1c851188476409cda5752ce971b20b58" + ], + "1": [ + "software_versions_mqc.yml:md5,2570f4ba271ad08357b0d3d32a9cf84d" + ], + "2": [ + "versions.yml:md5,3843ac526e762117eedf8825b40683df" + ], + "mqc_yml": [ + "software_versions_mqc.yml:md5,2570f4ba271ad08357b0d3d32a9cf84d" + ], + "versions": [ + "versions.yml:md5,3843ac526e762117eedf8825b40683df" + ], + "yml": [ + "software_versions.yml:md5,1c851188476409cda5752ce971b20b58" + ] + } + ], + "timestamp": "2023-11-03T14:43:22.157011" + } +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml new file mode 100644 index 000000000..405aa24ae --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml @@ -0,0 +1,2 @@ +custom/dumpsoftwareversions: + - modules/nf-core/custom/dumpsoftwareversions/** diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml new file mode 100644 index 000000000..1787b38a9 --- /dev/null +++ b/modules/nf-core/fastqc/environment.yml @@ -0,0 +1,7 @@ +name: fastqc +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::fastqc=0.12.1 diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 249f90644..9e19a74c5 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -2,10 +2,10 @@ process FASTQC { tag "$meta.id" label 'process_medium' - conda "bioconda::fastqc=0.11.9" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : - 'biocontainers/fastqc:0.11.9--0' }" + 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' : + 'biocontainers/fastqc:0.12.1--hdfd78af_0' }" input: tuple val(meta), path(reads) @@ -37,7 +37,7 @@ process FASTQC { cat <<-END_VERSIONS > versions.yml "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) END_VERSIONS """ @@ -49,7 +49,7 @@ process FASTQC { cat <<-END_VERSIONS > versions.yml "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) END_VERSIONS """ } diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml index 4da5bb5a0..ee5507e06 100644 --- a/modules/nf-core/fastqc/meta.yml +++ b/modules/nf-core/fastqc/meta.yml @@ -50,3 +50,8 @@ authors: - "@grst" - "@ewels" - "@FelixKrueger" +maintainers: + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test new file mode 100644 index 000000000..b9e8f926e --- /dev/null +++ b/modules/nf-core/fastqc/tests/main.nf.test @@ -0,0 +1,109 @@ +nextflow_process { + + name "Test Process FASTQC" + script "../main.nf" + process "FASTQC" + tag "modules" + tag "modules_nfcore" + tag "fastqc" + + test("Single-Read") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id: 'test', single_end:true ], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. + // looks like this:
Mon 2 Oct 2023
test.gz
+ // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 + { assert process.out.html.get(0).get(1) ==~ ".*/test_fastqc.html" }, + { assert path(process.out.html.get(0).get(1)).getText().contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match("versions") }, + { assert process.out.zip.get(0).get(1) ==~ ".*/test_fastqc.zip" } + ) + } + } +// TODO +// // +// // Test with paired-end data +// // +// workflow test_fastqc_paired_end { +// input = [ +// [id: 'test', single_end: false], // meta map +// [ +// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), +// file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) +// ] +// ] + +// FASTQC ( input ) +// } + +// // +// // Test with interleaved data +// // +// workflow test_fastqc_interleaved { +// input = [ +// [id: 'test', single_end: false], // meta map +// file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) +// ] + +// FASTQC ( input ) +// } + +// // +// // Test with bam data +// // +// workflow test_fastqc_bam { +// input = [ +// [id: 'test', single_end: false], // meta map +// file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) +// ] + +// FASTQC ( input ) +// } + +// // +// // Test with multiple samples +// // +// workflow test_fastqc_multiple { +// input = [ +// [id: 'test', single_end: false], // meta map +// [ +// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), +// file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), +// file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true), +// file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true) +// ] +// ] + +// FASTQC ( input ) +// } + +// // +// // Test with custom prefix +// // +// workflow test_fastqc_custom_prefix { +// input = [ +// [ id:'mysample', single_end:true ], // meta map +// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) +// ] + +// FASTQC ( input ) +// } +} diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap new file mode 100644 index 000000000..636a32cea --- /dev/null +++ b/modules/nf-core/fastqc/tests/main.nf.test.snap @@ -0,0 +1,10 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "timestamp": "2023-10-09T23:40:54+0000" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastqc/tests/tags.yml b/modules/nf-core/fastqc/tests/tags.yml new file mode 100644 index 000000000..7834294ba --- /dev/null +++ b/modules/nf-core/fastqc/tests/tags.yml @@ -0,0 +1,2 @@ +fastqc: + - modules/nf-core/fastqc/** diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml new file mode 100644 index 000000000..bc0bdb5b6 --- /dev/null +++ b/modules/nf-core/multiqc/environment.yml @@ -0,0 +1,7 @@ +name: multiqc +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.18 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 1fc387bee..00cc48d27 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,10 +1,10 @@ process MULTIQC { label 'process_single' - conda "bioconda::multiqc=1.14" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.18--pyhdfd78af_0' : + 'biocontainers/multiqc:1.18--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" @@ -25,12 +25,14 @@ process MULTIQC { def args = task.ext.args ?: '' def config = multiqc_config ? "--config $multiqc_config" : '' def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + def logo = multiqc_logo ? /--cl-config 'custom_logo: "${multiqc_logo}"'/ : '' """ multiqc \\ --force \\ $args \\ $config \\ $extra_config \\ + $logo \\ . cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index f93b5ee51..f1aa660eb 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,5 +1,5 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json -name: MultiQC +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: multiqc description: Aggregate results from bioinformatics analyses across many samples into a single report keywords: - QC @@ -13,7 +13,6 @@ tools: homepage: https://multiqc.info/ documentation: https://multiqc.info/docs/ licence: ["GPL-3.0-or-later"] - input: - multiqc_files: type: file @@ -31,7 +30,6 @@ input: type: file description: Optional logo file for MultiQC pattern: "*.{png}" - output: - report: type: file @@ -54,3 +52,8 @@ authors: - "@bunop" - "@drpatelh" - "@jfy133" +maintainers: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test new file mode 100644 index 000000000..c2dad217c --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -0,0 +1,63 @@ +nextflow_process { + + name "Test Process MULTIQC" + script "../main.nf" + process "MULTIQC" + tag "modules" + tag "modules_nfcore" + tag "multiqc" + + test("MULTIQC: FASTQC") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.report.get(0)).exists() }, + { assert path(process.out.data.get(0)).exists() }, + { assert path(process.out.versions.get(0)).getText().contains("multiqc") } + ) + } + + } + + test("MULTIQC: FASTQC and a config file") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) + input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.report.get(0)).exists() }, + { assert path(process.out.data.get(0)).exists() }, + { assert path(process.out.versions.get(0)).getText().contains("multiqc") } + ) + } + + } +} diff --git a/modules/nf-core/multiqc/tests/tags.yml b/modules/nf-core/multiqc/tests/tags.yml new file mode 100644 index 000000000..bea6c0d37 --- /dev/null +++ b/modules/nf-core/multiqc/tests/tags.yml @@ -0,0 +1,2 @@ +multiqc: + - modules/nf-core/multiqc/** diff --git a/nextflow.config b/nextflow.config index a982e809f..027837bfd 100644 --- a/nextflow.config +++ b/nextflow.config @@ -14,7 +14,7 @@ params { input = null // References genome = null - igenomes_base = 's3://ngi-igenomes/igenomes' + igenomes_base = 's3://ngi-igenomes/igenomes/' igenomes_ignore = false @@ -82,6 +82,7 @@ profiles { dumpHashes = true process.beforeScript = 'echo $HOSTNAME' cleanup = false + nextflow.enable.configProcessNamesValidation = true } conda { conda.enabled = true @@ -104,13 +105,13 @@ profiles { } docker { docker.enabled = true - docker.userEmulation = true conda.enabled = false singularity.enabled = false podman.enabled = false shifter.enabled = false charliecloud.enabled = false apptainer.enabled = false + runOptions = '-u $(id -u):$(id -g)' } arm { docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' @@ -181,7 +182,7 @@ singularity.registry = 'quay.io' // Nextflow plugins plugins { - id 'nf-validation' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet } // Load igenomes.config if required @@ -204,6 +205,9 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] +// Disable process selector warnings by default. Use debug profile to enable warnings. +nextflow.enable.configProcessNamesValidation = false + def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true @@ -229,7 +233,7 @@ manifest { description = """Amplicon sequencing analysis workflow using DADA2 and QIIME2""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '2.7.0dev' + version = '2.8.0dev' doi = '' } From b8648b2f422766f999f8d62cfdfd9f2059345803 Mon Sep 17 00:00:00 2001 From: daniel Date: Wed, 20 Dec 2023 09:09:08 +0100 Subject: [PATCH 087/104] update README.md to github style syntax --- README.md | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 78c4f1932..2e04e6cc8 100644 --- a/README.md +++ b/README.md @@ -65,13 +65,11 @@ nextflow run nf-core/ampliseq \ --outdir ``` -:::note -Adding metadata will considerably increase the output, see [metadata documentation](https://nf-co.re/ampliseq/usage#metadata). -::: +> [!NOTE] +> Adding metadata will considerably increase the output, see [metadata documentation](https://nf-co.re/ampliseq/usage#metadata). -:::note -By default the taxonomic assignment will be performed with DADA2 on SILVA database, but there are various tools and databases readily available, see [taxonomic classification documentation](https://nf-co.re/ampliseq/usage#taxonomic-classification). -::: +> [!TIP] +> By default the taxonomic assignment will be performed with DADA2 on SILVA database, but there are various tools and databases readily available, see [taxonomic classification documentation](https://nf-co.re/ampliseq/usage#taxonomic-classification). > [!WARNING] > Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; From 9ebe892d0f20deb666eeb59b30b725ee32e74a0c Mon Sep 17 00:00:00 2001 From: daniel Date: Wed, 20 Dec 2023 09:16:57 +0100 Subject: [PATCH 088/104] update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7d4b19e2f..8a0107dd7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#676](https://github.com/nf-core/ampliseq/pull/676) - Phyloseq sometimes only produced one of multiple output files - [#679](https://github.com/nf-core/ampliseq/pull/679) - Prevent masking low complexity regions by VSEARCH with lower case letters - [#680](https://github.com/nf-core/ampliseq/pull/680) - Improved pipeline summary report & error messages +- [#683](https://github.com/nf-core/ampliseq/pull/683) - Template update for nf-core/tools version 2.11 ### `Dependencies` From cbe6bf9a7a4af62cdb64d81d7b343583f726b3ed Mon Sep 17 00:00:00 2001 From: daniel Date: Wed, 20 Dec 2023 10:38:23 +0100 Subject: [PATCH 089/104] adjust some assertions in .nf.test.snap --- tests/pipeline/doubleprimers.nf.test.snap | 6 +++--- tests/pipeline/fasta.nf.test.snap | 2 +- tests/pipeline/iontorrent.nf.test.snap | 2 +- tests/pipeline/multi.nf.test.snap | 2 +- tests/pipeline/novaseq.nf.test.snap | 2 +- tests/pipeline/pacbio_its.nf.test.snap | 2 +- tests/pipeline/pplace.nf.test.snap | 4 ++-- tests/pipeline/qiimecustom.nf.test.snap | 6 +++--- tests/pipeline/reftaxcustom.nf.test.snap | 6 +++--- tests/pipeline/single.nf.test.snap | 2 +- tests/pipeline/sintax.nf.test.snap | 2 +- tests/pipeline/test.nf.test.snap | 6 +++--- 12 files changed, 21 insertions(+), 21 deletions(-) diff --git a/tests/pipeline/doubleprimers.nf.test.snap b/tests/pipeline/doubleprimers.nf.test.snap index b5e9cb2bb..bfa9d45a4 100644 --- a/tests/pipeline/doubleprimers.nf.test.snap +++ b/tests/pipeline/doubleprimers.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-07-27T13:49:03+0000" }, @@ -52,8 +52,8 @@ }, "multiqc": { "content": [ - "multiqc_general_stats.txt:md5,8429be0a16adf09b6634bf31b430bfac", - "multiqc_cutadapt.txt:md5,e89359b4478ef5d10620709f651f26a2" + "multiqc_general_stats.txt:md5,bb1d98b03d4cd5091acfbef93cb38fc4", + "multiqc_cutadapt.txt:md5,0ef33b6eb4d202c34fcfa51a0dffadda" ], "timestamp": "2023-05-28T21:08:54+0000" } diff --git a/tests/pipeline/fasta.nf.test.snap b/tests/pipeline/fasta.nf.test.snap index 6350712f6..db862e0f2 100644 --- a/tests/pipeline/fasta.nf.test.snap +++ b/tests/pipeline/fasta.nf.test.snap @@ -7,7 +7,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FILTER_CODONS={pandas=1.1.5, python=3.9.1}, FILTER_LEN_ASV={Biostrings=2.58.0, R=4.0.3}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FILTER_CODONS={pandas=1.1.5, python=3.9.1}, FILTER_LEN_ASV={Biostrings=2.58.0, R=4.0.3}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-05-28T21:06:17+0000" }, diff --git a/tests/pipeline/iontorrent.nf.test.snap b/tests/pipeline/iontorrent.nf.test.snap index 420b3dd67..61d7a6160 100644 --- a/tests/pipeline/iontorrent.nf.test.snap +++ b/tests/pipeline/iontorrent.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-06-20T01:42:35+0000" }, diff --git a/tests/pipeline/multi.nf.test.snap b/tests/pipeline/multi.nf.test.snap index daba2601d..730ba6a45 100644 --- a/tests/pipeline/multi.nf.test.snap +++ b/tests/pipeline/multi.nf.test.snap @@ -14,7 +14,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-05-28T21:15:03+0000" }, diff --git a/tests/pipeline/novaseq.nf.test.snap b/tests/pipeline/novaseq.nf.test.snap index 427cd40b2..0b4abc88d 100644 --- a/tests/pipeline/novaseq.nf.test.snap +++ b/tests/pipeline/novaseq.nf.test.snap @@ -7,7 +7,7 @@ }, "software_versions": { "content": [ - "{CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_CODONS={pandas=1.1.5, python=3.9.1}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_CODONS={pandas=1.1.5, python=3.9.1}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-06-20T00:10:02+0000" }, diff --git a/tests/pipeline/pacbio_its.nf.test.snap b/tests/pipeline/pacbio_its.nf.test.snap index c211e2b61..c6975e2c1 100644 --- a/tests/pipeline/pacbio_its.nf.test.snap +++ b/tests/pipeline/pacbio_its.nf.test.snap @@ -35,7 +35,7 @@ }, "software_versions": { "content": [ - "{ASSIGNSH={pandas=1.1.5, python=3.9.1}, BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FORMAT_TAXRESULTS_STD={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_USEARCHGLOBAL={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{ASSIGNSH={pandas=1.1.5, python=3.9.1}, BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FORMAT_TAXRESULTS_STD={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_USEARCHGLOBAL={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-06-20T02:07:02+0000" }, diff --git a/tests/pipeline/pplace.nf.test.snap b/tests/pipeline/pplace.nf.test.snap index 4f64efa81..724b70f35 100644 --- a/tests/pipeline/pplace.nf.test.snap +++ b/tests/pipeline/pplace.nf.test.snap @@ -8,7 +8,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, EPANG_PLACE={epang=0.3.8}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, GAPPA_ASSIGN={gappa=0.8.0}, GAPPA_GRAFT={gappa=0.8.0}, GAPPA_HEATTREE={gappa=0.8.0}, HMMER_AFAFORMATQUERY={hmmer/easel=0.48}, HMMER_AFAFORMATREF={hmmer/easel=0.48}, HMMER_HMMALIGNQUERY={hmmer=3.3.2}, HMMER_HMMALIGNREF={hmmer=3.3.2}, HMMER_HMMBUILD={hmmer=3.3.2}, HMMER_MASKQUERY={hmmer/easel=0.48}, HMMER_MASKREF={hmmer/easel=0.48}, HMMER_UNALIGNREF={hmmer/easel=0.48}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, EPANG_PLACE={epang=0.3.8}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, GAPPA_ASSIGN={gappa=0.8.0}, GAPPA_GRAFT={gappa=0.8.0}, GAPPA_HEATTREE={gappa=0.8.0}, HMMER_AFAFORMATQUERY={hmmer/easel=0.48}, HMMER_AFAFORMATREF={hmmer/easel=0.48}, HMMER_HMMALIGNQUERY={hmmer=3.3.2}, HMMER_HMMALIGNREF={hmmer=3.3.2}, HMMER_HMMBUILD={hmmer=3.3.2}, HMMER_MASKQUERY={hmmer/easel=0.48}, HMMER_MASKREF={hmmer/easel=0.48}, HMMER_UNALIGNREF={hmmer/easel=0.48}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-06-20T17:24:03+0000" }, @@ -53,7 +53,7 @@ "multiqc": { "content": [ "multiqc_general_stats.txt:md5,9e8ff06d7285ab8748a80e639d3dd54a", - "multiqc_cutadapt.txt:md5,330a7b72dc671ca99fcb3fb84b6776c1" + "multiqc_cutadapt.txt:md5,c640ad4fa88bc31b09fa353e606013a2" ], "timestamp": "2023-06-20T17:24:03+0000" } diff --git a/tests/pipeline/qiimecustom.nf.test.snap b/tests/pipeline/qiimecustom.nf.test.snap index 594688a92..e76286477 100644 --- a/tests/pipeline/qiimecustom.nf.test.snap +++ b/tests/pipeline/qiimecustom.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-05-28T21:18:54+0000" }, @@ -35,8 +35,8 @@ "multiqc": { "content": [ "multiqc_fastqc.txt:md5,147764e40079c3abf97a17cfe2275c52", - "multiqc_general_stats.txt:md5,88c2b9e6d02b83afe4f9551e6c9a91a7", - "multiqc_cutadapt.txt:md5,330a7b72dc671ca99fcb3fb84b6776c1" + "multiqc_general_stats.txt:md5,c6456e36c17e592f12f9a2f9069f24f8", + "multiqc_cutadapt.txt:md5,c640ad4fa88bc31b09fa353e606013a2" ], "timestamp": "2023-05-28T21:18:54+0000" } diff --git a/tests/pipeline/reftaxcustom.nf.test.snap b/tests/pipeline/reftaxcustom.nf.test.snap index b5aa10f14..2e5918544 100644 --- a/tests/pipeline/reftaxcustom.nf.test.snap +++ b/tests/pipeline/reftaxcustom.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-05-28T21:18:54+0000" }, @@ -53,8 +53,8 @@ "multiqc": { "content": [ "multiqc_fastqc.txt:md5,147764e40079c3abf97a17cfe2275c52", - "multiqc_general_stats.txt:md5,88c2b9e6d02b83afe4f9551e6c9a91a7", - "multiqc_cutadapt.txt:md5,330a7b72dc671ca99fcb3fb84b6776c1" + "multiqc_general_stats.txt:md5,c6456e36c17e592f12f9a2f9069f24f8", + "multiqc_cutadapt.txt:md5,c640ad4fa88bc31b09fa353e606013a2" ], "timestamp": "2023-05-28T21:18:54+0000" } diff --git a/tests/pipeline/single.nf.test.snap b/tests/pipeline/single.nf.test.snap index a31b986cf..7e0bf6191 100644 --- a/tests/pipeline/single.nf.test.snap +++ b/tests/pipeline/single.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-05-28T20:35:33+0000" }, diff --git a/tests/pipeline/sintax.nf.test.snap b/tests/pipeline/sintax.nf.test.snap index b19bf8feb..f2d01ce05 100644 --- a/tests/pipeline/sintax.nf.test.snap +++ b/tests/pipeline/sintax.nf.test.snap @@ -16,7 +16,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_SINTAX={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_SINTAX={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-06-20T16:40:18+0000" }, diff --git a/tests/pipeline/test.nf.test.snap b/tests/pipeline/test.nf.test.snap index 967f13691..2d29c03f1 100644 --- a/tests/pipeline/test.nf.test.snap +++ b/tests/pipeline/test.nf.test.snap @@ -22,7 +22,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_CLUSTERS={pandas=1.1.5, python=3.9.1}, FILTER_LEN_ASV={Biostrings=2.58.0, R=4.0.3}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, VSEARCH_CLUSTER={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_CLUSTERS={pandas=1.1.5, python=3.9.1}, FILTER_LEN_ASV={Biostrings=2.58.0, R=4.0.3}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, VSEARCH_CLUSTER={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-05-28T20:55:32+0000" }, @@ -58,8 +58,8 @@ "multiqc": { "content": [ "multiqc_fastqc.txt:md5,147764e40079c3abf97a17cfe2275c52", - "multiqc_general_stats.txt:md5,88c2b9e6d02b83afe4f9551e6c9a91a7", - "multiqc_cutadapt.txt:md5,330a7b72dc671ca99fcb3fb84b6776c1" + "multiqc_general_stats.txt:md5,c6456e36c17e592f12f9a2f9069f24f8", + "multiqc_cutadapt.txt:md5,c640ad4fa88bc31b09fa353e606013a2" ], "timestamp": "2023-05-28T20:55:32+0000" }, From 1e14c0ec8a34ff93ad8f1babb9227cb5828ca506 Mon Sep 17 00:00:00 2001 From: daniel Date: Wed, 20 Dec 2023 14:22:12 +0100 Subject: [PATCH 090/104] update docker runOptions --- nextflow.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nextflow.config b/nextflow.config index 21483d47c..cde6082a7 100644 --- a/nextflow.config +++ b/nextflow.config @@ -210,10 +210,10 @@ profiles { shifter.enabled = false charliecloud.enabled = false apptainer.enabled = false - runOptions = '-u $(id -u):$(id -g)' + docker.runOptions = '-u $(id -u):$(id -g)' } arm { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { singularity.enabled = true From 9742624b19082a54fd481119406785af6bc802d6 Mon Sep 17 00:00:00 2001 From: daniel Date: Wed, 20 Dec 2023 14:44:48 +0100 Subject: [PATCH 091/104] adjust pplace.nf.test.snap --- tests/pipeline/pplace.nf.test.snap | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pipeline/pplace.nf.test.snap b/tests/pipeline/pplace.nf.test.snap index 724b70f35..3e23ab638 100644 --- a/tests/pipeline/pplace.nf.test.snap +++ b/tests/pipeline/pplace.nf.test.snap @@ -52,7 +52,7 @@ }, "multiqc": { "content": [ - "multiqc_general_stats.txt:md5,9e8ff06d7285ab8748a80e639d3dd54a", + "multiqc_general_stats.txt:md5,cbe0b448f630111ee18976891354701a", "multiqc_cutadapt.txt:md5,c640ad4fa88bc31b09fa353e606013a2" ], "timestamp": "2023-06-20T17:24:03+0000" From 127b88a10216204010fb514aeb6bd1682cdeb96c Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Wed, 20 Dec 2023 15:54:53 +0000 Subject: [PATCH 092/104] Template update for nf-core/tools version 2.11.1 --- .../{release-announcments.yml => release-announcements.yml} | 0 nextflow.config | 4 ++-- 2 files changed, 2 insertions(+), 2 deletions(-) rename .github/workflows/{release-announcments.yml => release-announcements.yml} (100%) diff --git a/.github/workflows/release-announcments.yml b/.github/workflows/release-announcements.yml similarity index 100% rename from .github/workflows/release-announcments.yml rename to .github/workflows/release-announcements.yml diff --git a/nextflow.config b/nextflow.config index 027837bfd..05555e3bf 100644 --- a/nextflow.config +++ b/nextflow.config @@ -111,10 +111,10 @@ profiles { shifter.enabled = false charliecloud.enabled = false apptainer.enabled = false - runOptions = '-u $(id -u):$(id -g)' + docker.runOptions = '-u $(id -u):$(id -g)' } arm { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { singularity.enabled = true From 815e62b2af143a1a8545e900a86cae6ee4e3b4a6 Mon Sep 17 00:00:00 2001 From: Daniel Straub <42973691+d4straub@users.noreply.github.com> Date: Thu, 21 Dec 2023 14:05:30 +0100 Subject: [PATCH 093/104] Apply suggestions from code review --- bin/taxref_reformat_qiime_greengenes2022.sh | 8 ++------ conf/ref_databases.config | 8 +++++++- nextflow_schema.json | 3 ++- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/bin/taxref_reformat_qiime_greengenes2022.sh b/bin/taxref_reformat_qiime_greengenes2022.sh index aa4678a89..69c75faed 100755 --- a/bin/taxref_reformat_qiime_greengenes2022.sh +++ b/bin/taxref_reformat_qiime_greengenes2022.sh @@ -1,9 +1,5 @@ #!/bin/sh # Decompress files. -gzip -c -d 2022.10.seqs.fna.gz > 2022.10.seqs.fna -gzip -c -d 2022.10.taxonomy.md5.tsv.gz > 2022.10.taxonomy.md5.tsv - -# Select and rename files -mv *.fna greengenes2022.fna -mv *.tsv greengenes2022.tax +gzip -c -d *.seqs.fna.gz > greengenes2.fna +gzip -c -d *.taxonomy.md5.tsv.gz > greengenes2.tax diff --git a/conf/ref_databases.config b/conf/ref_databases.config index 157e835e6..e89df3383 100644 --- a/conf/ref_databases.config +++ b/conf/ref_databases.config @@ -330,7 +330,13 @@ params { citation = "McDonald, D., Price, M., Goodrich, J. et al. An improved Greengenes taxonomy with explicit ranks for ecological and evolutionary analyses of bacteria and archaea. ISME J 6, 610–618 (2012). https://doi.org/10.1038/ismej.2011.139" fmtscript = "taxref_reformat_qiime_greengenes85.sh" } - 'greengenes2022' { + 'greengenes2' { + title = "Greengenes2 16S - Version 2022.10" + file = [ "http://ftp.microbio.me/greengenes_release/2022.10/2022.10.seqs.fna.gz", "http://ftp.microbio.me/greengenes_release/2022.10/2022.10.taxonomy.md5.tsv.gz" ] + citation = "McDonald, D., Jiang, Y., Balaban, M. et al. Greengenes2 unifies microbial data in a single reference tree. Nat Biotechnol (2023). https://doi.org/10.1038/s41587-023-01845-1" + fmtscript = "taxref_reformat_qiime_greengenes2022.sh" + } + 'greengenes2=2022.10' { title = "Greengenes2 16S - Version 2022.10" file = [ "http://ftp.microbio.me/greengenes_release/2022.10/2022.10.seqs.fna.gz", "http://ftp.microbio.me/greengenes_release/2022.10/2022.10.taxonomy.md5.tsv.gz" ] citation = "McDonald, D., Jiang, Y., Balaban, M. et al. Greengenes2 unifies microbial data in a single reference tree. Nat Biotechnol (2023). https://doi.org/10.1038/s41587-023-01845-1" diff --git a/nextflow_schema.json b/nextflow_schema.json index 0f00790f0..29aee21ed 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -443,7 +443,8 @@ "unite-alleuk=8.2", "unite-alleuk", "greengenes85", - "greengenes2022" + "greengenes2", + "greengenes2=2022.10" ] }, "qiime_ref_tax_custom": { From 70b2e01d100e6d015c4cf52236200a3a7c90aff9 Mon Sep 17 00:00:00 2001 From: daniel Date: Thu, 11 Jan 2024 16:20:06 +0100 Subject: [PATCH 094/104] activate multi-cpu use in QIIME2_EXTRACT --- conf/base.config | 5 +++++ modules/local/qiime2_extract.nf | 5 +++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/conf/base.config b/conf/base.config index c628a10c3..c16be532b 100644 --- a/conf/base.config +++ b/conf/base.config @@ -63,4 +63,9 @@ process { withName:CUSTOM_DUMPSOFTWAREVERSIONS { cache = false } + withName:QIIME2_EXTRACT { + cpus = { check_max( 12 * task.attempt, 'cpus' ) } + memory = { check_max( 12.GB * task.attempt, 'memory' ) } + time = { check_max( 24.h * task.attempt, 'time' ) } + } } diff --git a/modules/local/qiime2_extract.nf b/modules/local/qiime2_extract.nf index f3a61b6e5..7ff383fd8 100644 --- a/modules/local/qiime2_extract.nf +++ b/modules/local/qiime2_extract.nf @@ -1,7 +1,5 @@ process QIIME2_EXTRACT { tag "${meta.FW_primer}-${meta.RV_primer}" - label 'process_low' - label 'single_cpu' container "qiime2/core:2023.7" @@ -20,6 +18,7 @@ process QIIME2_EXTRACT { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { error "QIIME2 does not support Conda. Please use Docker / Singularity / Podman instead." } + def args = task.ext.args ?: '' """ export XDG_CONFIG_HOME="./xdgconfig" export MPLCONFIGDIR="./mplconfigdir" @@ -37,9 +36,11 @@ process QIIME2_EXTRACT { --output-path ref-taxonomy.qza #Extract sequences based on primers qiime feature-classifier extract-reads \\ + --p-n-jobs ${task.cpus} \\ --i-sequences ref-seq.qza \\ --p-f-primer ${meta.FW_primer} \\ --p-r-primer ${meta.RV_primer} \\ + $args \\ --o-reads ${meta.FW_primer}-${meta.RV_primer}-ref-seq.qza \\ --quiet From 54e3d602af89c7304045e047d562e071c4bb5e13 Mon Sep 17 00:00:00 2001 From: daniel Date: Fri, 12 Jan 2024 09:24:55 +0100 Subject: [PATCH 095/104] fix conda package in FILTER_SSU --- CHANGELOG.md | 1 + modules/local/filter_ssu.nf | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1a565e81a..c45a1b96c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#679](https://github.com/nf-core/ampliseq/pull/679) - Prevent masking low complexity regions by VSEARCH with lower case letters - [#680](https://github.com/nf-core/ampliseq/pull/680) - Improved pipeline summary report & error messages - [#683](https://github.com/nf-core/ampliseq/pull/683) - Template update for nf-core/tools version 2.11 +- [#687](https://github.com/nf-core/ampliseq/pull/687) - Correct conda package for ASV SSU filtering ### `Dependencies` diff --git a/modules/local/filter_ssu.nf b/modules/local/filter_ssu.nf index 5b3c623c6..314a63c70 100644 --- a/modules/local/filter_ssu.nf +++ b/modules/local/filter_ssu.nf @@ -2,7 +2,7 @@ process FILTER_SSU { tag "${fasta}" label 'process_low' - conda "bioconductor::biostrings=2.58.0" + conda "bioconda::bioconductor-biostrings=2.58.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bioconductor-biostrings:2.58.0--r40h037d062_0' : 'biocontainers/bioconductor-biostrings:2.58.0--r40h037d062_0' }" From 06dcda77d883da693c0ce3dd916a3428d0b13158 Mon Sep 17 00:00:00 2001 From: daniel Date: Fri, 12 Jan 2024 10:02:53 +0100 Subject: [PATCH 096/104] update docs --- CHANGELOG.md | 5 ++--- CITATIONS.md | 20 ++++++++++++++++---- docs/usage.md | 4 +++- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1a565e81a..3fb8d139e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,11 +19,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` -- [#672](https://github.com/nf-core/ampliseq/pull/672) - Update output docs for collapsed abundance tables -- [#673](https://github.com/nf-core/ampliseq/pull/673) - Fix logic relating to generation of qiime2 taxonomy part of summary report +- [#672](https://github.com/nf-core/ampliseq/pull/672),[#688](https://github.com/nf-core/ampliseq/pull/688) - Updated documentation - [#676](https://github.com/nf-core/ampliseq/pull/676) - Phyloseq sometimes only produced one of multiple output files - [#679](https://github.com/nf-core/ampliseq/pull/679) - Prevent masking low complexity regions by VSEARCH with lower case letters -- [#680](https://github.com/nf-core/ampliseq/pull/680) - Improved pipeline summary report & error messages +- [#680](https://github.com/nf-core/ampliseq/pull/680),[#673](https://github.com/nf-core/ampliseq/pull/673) - Improved pipeline summary report & error messages - [#683](https://github.com/nf-core/ampliseq/pull/683) - Template update for nf-core/tools version 2.11 ### `Dependencies` diff --git a/CITATIONS.md b/CITATIONS.md index ee03b01c8..7c80b906f 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -41,6 +41,10 @@ > Quast C, Pruesse E, Yilmaz P, Gerken J, Schweer T, Yarza P, Peplies J, Glöckner FO. The SILVA ribosomal RNA gene database project: improved data processing and web-based tools. Nucleic Acids Res. 2013 Jan;41(Database issue):D590-6. doi: 10.1093/nar/gks1219. Epub 2012 Nov 28. PMID: 23193283; PMCID: PMC3531112. +- [Greengenes2](https://doi.org/10.1038/s41587-023-01845-1) + + > McDonald, D., Jiang, Y., Balaban, M. et al. Greengenes2 unifies microbial data in a single reference tree. Nat Biotechnol (2023). https://doi.org/10.1038/s41587-023-01845-1 + - [PR2 - Protist Reference Ribosomal Database](https://pubmed.ncbi.nlm.nih.gov/23193267/) > Guillou L, Bachar D, Audic S, Bass D, Berney C, Bittner L, Boutte C, Burgaud G, de Vargas C, Decelle J, Del Campo J, Dolan JR, Dunthorn M, Edvardsen B, Holzmann M, Kooistra WH, Lara E, Le Bescot N, Logares R, Mahé F, Massana R, Montresor M, Morard R, Not F, Pawlowski J, Probert I, Sauvadet AL, Siano R, Stoeck T, Vaulot D, Zimmermann P, Christen R. The Protist Ribosomal Reference database (PR2): a catalog of unicellular eukaryote small sub-unit rRNA sequences with curated taxonomy. Nucleic Acids Res. 2013 Jan;41(Database issue):D597-604. doi: 10.1093/nar/gks1160. Epub 2012 Nov 27. PMID: 23193267; PMCID: PMC3531120. @@ -61,13 +65,21 @@ > Kõljalg U, Larsson KH, Abarenkov K, Nilsson RH, Alexander IJ, Eberhardt U, Erland S, Høiland K, Kjøller R, Larsson E, Pennanen T, Sen R, Taylor AF, Tedersoo L, Vrålstad T, Ursing BM. UNITE: a database providing web-based methods for the molecular identification of ectomycorrhizal fungi. New Phytol. 2005 Jun;166(3):1063-8. doi: 10.1111/j.1469-8137.2005.01376.x. PMID: 15869663. - - [MIDORI2 - a collection of reference databases](https://doi.org/10.1002/edn3.303/) +- [MIDORI2 - a collection of reference databases](https://doi.org/10.1002/edn3.303/) + + > Leray, M., Knowlton, N., & Machida, R. J. (2022). MIDORI2: A collection of quality controlled, preformatted, and regularly updated reference databases for taxonomic assignment of eukaryotic mitochondrial sequences. Environmental DNA, 4, 894– 907. https://doi.org/10.1002/edn3.303. + +- [COIDB - CO1 Taxonomy Database](https://doi.org/10.17044/scilifelab.20514192.v2) + + > Sundh J, Manoharan L, Iwaszkiewicz-Eggebrecht E, Miraldo A, Andersson A, Ronquist F. COI reference sequences from BOLD DB. doi: https://doi.org/10.17044/scilifelab.20514192.v2. + +- [PhytoRef plastid 16S rRNA database for photosynthetic eukaryotes](https://pubmed.ncbi.nlm.nih.gov/25740460/) - > Leray, M., Knowlton, N., & Machida, R. J. (2022). MIDORI2: A collection of quality controlled, preformatted, and regularly updated reference databases for taxonomic assignment of eukaryotic mitochondrial sequences. Environmental DNA, 4, 894– 907. https://doi.org/10.1002/edn3.303. + > Decelle J, Romac S, Stern RF, Bendif el M, Zingone A, Audic S, Guiry MD, Guillou L, Tessier D, Le Gall F, Gourvil P, Dos Santos AL, Probert I, Vaulot D, de Vargas C, Christen R. PhytoREF: a reference database of the plastidial 16S rRNA gene of photosynthetic eukaryotes with curated taxonomy. Mol Ecol Resour. 2015 Nov;15(6):1435-45. doi: 10.1111/1755-0998.12401. Epub 2015 Apr 6. PMID: 25740460. - - [COIDB - CO1 Taxonomy Database](https://doi.org/10.17044/scilifelab.20514192.v2) +- [Zehr lab nifH database](http://doi.org/10.5281/zenodo.7996213) - > Sundh J, Manoharan L, Iwaszkiewicz-Eggebrecht E, Miraldo A, Andersson A, Ronquist F. COI reference sequences from BOLD DB. doi: https://doi.org/10.17044/scilifelab.20514192.v2. + > M. A. Moynihan & C. Furbo Reeder 2023. nifHdada2 GitHub repository, v2.0.5. Zenodo. http://doi.org/10.5281/zenodo.7996213 ### Phylogenetic placement diff --git a/docs/usage.md b/docs/usage.md index 779aa846c..acf62a379 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -231,6 +231,8 @@ Pre-configured reference taxonomy databases are: | unite-alleuk | + | + | - | + | eukaryotic nuclear ribosomal ITS region | | coidb | + | + | - | - | eukaryotic Cytochrome Oxidase I (COI) | | midori2-co1 | + | - | - | - | eukaryotic Cytochrome Oxidase I (COI) | +| phytoref | + | - | - | - | eukaryotic plastid 16S rRNA | +| zehr-nifh | + | - | - | - | Nitrogenase iron protein NifH | | standard | - | - | + | - | any in genomes of archaea, bacteria, viruses³ | ¹[`--dada_taxonomy_rc`](https://nf-co.re/ampliseq/parameters#dada_taxonomy_rc) is recommended; ²: de-replicated at 85%, only for testing purposes; ³: quality of results might vary @@ -242,7 +244,7 @@ Special features of taxonomic classification tools: - QIIME2's reference taxonomy databases will have regions matching the amplicon extracted with primer sequences. - DADA2, Kraken2, and QIIME2 have specific parameters to accept custom databases (but theoretically possible with all classifiers) -Parameter guidance is given in [nf-core/ampliseq website parameter documentation](https://nf-co.re/ampliseq/parameters/#taxonomic-database). +Parameter guidance is given in [nf-core/ampliseq website parameter documentation](https://nf-co.re/ampliseq/parameters/#taxonomic-database). Citations are listed in [`CITATIONS.md`](CITATIONS.md). ### Metadata From 9dbb3531423128282ed9732a812153a633ac0ba8 Mon Sep 17 00:00:00 2001 From: Daniel Straub <42973691+d4straub@users.noreply.github.com> Date: Fri, 12 Jan 2024 10:19:25 +0100 Subject: [PATCH 097/104] Apply suggestions from code review Co-authored-by: Till E. <64961761+tillenglert@users.noreply.github.com> --- CITATIONS.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CITATIONS.md b/CITATIONS.md index 7c80b906f..73e92bc00 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -67,7 +67,7 @@ - [MIDORI2 - a collection of reference databases](https://doi.org/10.1002/edn3.303/) - > Leray, M., Knowlton, N., & Machida, R. J. (2022). MIDORI2: A collection of quality controlled, preformatted, and regularly updated reference databases for taxonomic assignment of eukaryotic mitochondrial sequences. Environmental DNA, 4, 894– 907. https://doi.org/10.1002/edn3.303. + > Leray, M., Knowlton, N., & Machida, R. J. (2022). MIDORI2: A collection of quality controlled, preformatted, and regularly updated reference databases for taxonomic assignment of eukaryotic mitochondrial sequences. Environmental DNA, 4, 894– 907. doi: https://doi.org/10.1002/edn3.303. - [COIDB - CO1 Taxonomy Database](https://doi.org/10.17044/scilifelab.20514192.v2) @@ -79,7 +79,7 @@ - [Zehr lab nifH database](http://doi.org/10.5281/zenodo.7996213) - > M. A. Moynihan & C. Furbo Reeder 2023. nifHdada2 GitHub repository, v2.0.5. Zenodo. http://doi.org/10.5281/zenodo.7996213 + > M. A. Moynihan & C. Furbo Reeder 2023. nifHdada2 GitHub repository, v2.0.5. Zenodo. doi: http://doi.org/10.5281/zenodo.7996213 ### Phylogenetic placement From 08cb7f06ed557c12ede3163c33cb467eed357037 Mon Sep 17 00:00:00 2001 From: daniel Date: Fri, 12 Jan 2024 10:37:53 +0100 Subject: [PATCH 098/104] bump version to 2.8.0 --- CHANGELOG.md | 2 +- assets/multiqc_config.yml | 4 ++-- nextflow.config | 2 +- tests/pipeline/doubleprimers.nf.test.snap | 2 +- tests/pipeline/fasta.nf.test.snap | 2 +- tests/pipeline/iontorrent.nf.test.snap | 2 +- tests/pipeline/multi.nf.test.snap | 2 +- tests/pipeline/novaseq.nf.test.snap | 2 +- tests/pipeline/pacbio_its.nf.test.snap | 2 +- tests/pipeline/pplace.nf.test.snap | 2 +- tests/pipeline/qiimecustom.nf.test.snap | 2 +- tests/pipeline/reftaxcustom.nf.test.snap | 2 +- tests/pipeline/single.nf.test.snap | 2 +- tests/pipeline/sintax.nf.test.snap | 2 +- tests/pipeline/test.nf.test.snap | 2 +- 15 files changed, 16 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e9e7584f..f58c1c96b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## nf-core/ampliseq version 2.8.0dev +## nf-core/ampliseq version 2.8.0 - 2024-01-16 ### `Added` diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index eaf9f190d..af96c9d1a 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > - This report has been generated by the nf-core/ampliseq + This report has been generated by the nf-core/ampliseq analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-ampliseq-methods-description": order: -1000 diff --git a/nextflow.config b/nextflow.config index cde6082a7..903eda546 100644 --- a/nextflow.config +++ b/nextflow.config @@ -338,7 +338,7 @@ manifest { description = """Amplicon sequencing analysis workflow using DADA2 and QIIME2""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '2.8.0dev' + version = '2.8.0' doi = '10.5281/zenodo.1493841' } diff --git a/tests/pipeline/doubleprimers.nf.test.snap b/tests/pipeline/doubleprimers.nf.test.snap index bfa9d45a4..7cb5f68a6 100644 --- a/tests/pipeline/doubleprimers.nf.test.snap +++ b/tests/pipeline/doubleprimers.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0}}" ], "timestamp": "2023-07-27T13:49:03+0000" }, diff --git a/tests/pipeline/fasta.nf.test.snap b/tests/pipeline/fasta.nf.test.snap index db862e0f2..c049085ce 100644 --- a/tests/pipeline/fasta.nf.test.snap +++ b/tests/pipeline/fasta.nf.test.snap @@ -7,7 +7,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FILTER_CODONS={pandas=1.1.5, python=3.9.1}, FILTER_LEN_ASV={Biostrings=2.58.0, R=4.0.3}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FILTER_CODONS={pandas=1.1.5, python=3.9.1}, FILTER_LEN_ASV={Biostrings=2.58.0, R=4.0.3}, Workflow={nf-core/ampliseq=2.8.0}}" ], "timestamp": "2023-05-28T21:06:17+0000" }, diff --git a/tests/pipeline/iontorrent.nf.test.snap b/tests/pipeline/iontorrent.nf.test.snap index 61d7a6160..989cbd593 100644 --- a/tests/pipeline/iontorrent.nf.test.snap +++ b/tests/pipeline/iontorrent.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0}}" ], "timestamp": "2023-06-20T01:42:35+0000" }, diff --git a/tests/pipeline/multi.nf.test.snap b/tests/pipeline/multi.nf.test.snap index 730ba6a45..913b38188 100644 --- a/tests/pipeline/multi.nf.test.snap +++ b/tests/pipeline/multi.nf.test.snap @@ -14,7 +14,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0}}" ], "timestamp": "2023-05-28T21:15:03+0000" }, diff --git a/tests/pipeline/novaseq.nf.test.snap b/tests/pipeline/novaseq.nf.test.snap index 0b4abc88d..e6c17e7fb 100644 --- a/tests/pipeline/novaseq.nf.test.snap +++ b/tests/pipeline/novaseq.nf.test.snap @@ -7,7 +7,7 @@ }, "software_versions": { "content": [ - "{CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_CODONS={pandas=1.1.5, python=3.9.1}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_CODONS={pandas=1.1.5, python=3.9.1}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0}}" ], "timestamp": "2023-06-20T00:10:02+0000" }, diff --git a/tests/pipeline/pacbio_its.nf.test.snap b/tests/pipeline/pacbio_its.nf.test.snap index c6975e2c1..3cbdef01a 100644 --- a/tests/pipeline/pacbio_its.nf.test.snap +++ b/tests/pipeline/pacbio_its.nf.test.snap @@ -35,7 +35,7 @@ }, "software_versions": { "content": [ - "{ASSIGNSH={pandas=1.1.5, python=3.9.1}, BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FORMAT_TAXRESULTS_STD={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_USEARCHGLOBAL={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{ASSIGNSH={pandas=1.1.5, python=3.9.1}, BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FORMAT_TAXRESULTS_STD={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_USEARCHGLOBAL={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0}}" ], "timestamp": "2023-06-20T02:07:02+0000" }, diff --git a/tests/pipeline/pplace.nf.test.snap b/tests/pipeline/pplace.nf.test.snap index 3e23ab638..c8c3a9f1a 100644 --- a/tests/pipeline/pplace.nf.test.snap +++ b/tests/pipeline/pplace.nf.test.snap @@ -8,7 +8,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, EPANG_PLACE={epang=0.3.8}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, GAPPA_ASSIGN={gappa=0.8.0}, GAPPA_GRAFT={gappa=0.8.0}, GAPPA_HEATTREE={gappa=0.8.0}, HMMER_AFAFORMATQUERY={hmmer/easel=0.48}, HMMER_AFAFORMATREF={hmmer/easel=0.48}, HMMER_HMMALIGNQUERY={hmmer=3.3.2}, HMMER_HMMALIGNREF={hmmer=3.3.2}, HMMER_HMMBUILD={hmmer=3.3.2}, HMMER_MASKQUERY={hmmer/easel=0.48}, HMMER_MASKREF={hmmer/easel=0.48}, HMMER_UNALIGNREF={hmmer/easel=0.48}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, EPANG_PLACE={epang=0.3.8}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, GAPPA_ASSIGN={gappa=0.8.0}, GAPPA_GRAFT={gappa=0.8.0}, GAPPA_HEATTREE={gappa=0.8.0}, HMMER_AFAFORMATQUERY={hmmer/easel=0.48}, HMMER_AFAFORMATREF={hmmer/easel=0.48}, HMMER_HMMALIGNQUERY={hmmer=3.3.2}, HMMER_HMMALIGNREF={hmmer=3.3.2}, HMMER_HMMBUILD={hmmer=3.3.2}, HMMER_MASKQUERY={hmmer/easel=0.48}, HMMER_MASKREF={hmmer/easel=0.48}, HMMER_UNALIGNREF={hmmer/easel=0.48}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0}}" ], "timestamp": "2023-06-20T17:24:03+0000" }, diff --git a/tests/pipeline/qiimecustom.nf.test.snap b/tests/pipeline/qiimecustom.nf.test.snap index e76286477..5f758fd48 100644 --- a/tests/pipeline/qiimecustom.nf.test.snap +++ b/tests/pipeline/qiimecustom.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0}}" ], "timestamp": "2023-05-28T21:18:54+0000" }, diff --git a/tests/pipeline/reftaxcustom.nf.test.snap b/tests/pipeline/reftaxcustom.nf.test.snap index 2e5918544..4bfd8c2d7 100644 --- a/tests/pipeline/reftaxcustom.nf.test.snap +++ b/tests/pipeline/reftaxcustom.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0}}" ], "timestamp": "2023-05-28T21:18:54+0000" }, diff --git a/tests/pipeline/single.nf.test.snap b/tests/pipeline/single.nf.test.snap index 7e0bf6191..1b6e33c07 100644 --- a/tests/pipeline/single.nf.test.snap +++ b/tests/pipeline/single.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0}}" ], "timestamp": "2023-05-28T20:35:33+0000" }, diff --git a/tests/pipeline/sintax.nf.test.snap b/tests/pipeline/sintax.nf.test.snap index f2d01ce05..fed045a40 100644 --- a/tests/pipeline/sintax.nf.test.snap +++ b/tests/pipeline/sintax.nf.test.snap @@ -16,7 +16,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_SINTAX={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_SINTAX={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0}}" ], "timestamp": "2023-06-20T16:40:18+0000" }, diff --git a/tests/pipeline/test.nf.test.snap b/tests/pipeline/test.nf.test.snap index 2d29c03f1..8441f2a56 100644 --- a/tests/pipeline/test.nf.test.snap +++ b/tests/pipeline/test.nf.test.snap @@ -22,7 +22,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_CLUSTERS={pandas=1.1.5, python=3.9.1}, FILTER_LEN_ASV={Biostrings=2.58.0, R=4.0.3}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, VSEARCH_CLUSTER={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_CLUSTERS={pandas=1.1.5, python=3.9.1}, FILTER_LEN_ASV={Biostrings=2.58.0, R=4.0.3}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, VSEARCH_CLUSTER={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0}}" ], "timestamp": "2023-05-28T20:55:32+0000" }, From 24ab70982bfb6118f0f6251c453756de18c00f72 Mon Sep 17 00:00:00 2001 From: daniel Date: Fri, 12 Jan 2024 10:40:59 +0100 Subject: [PATCH 099/104] fix prettier --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f58c1c96b..6835818d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## nf-core/ampliseq version 2.8.0 - 2024-01-16 +## nf-core/ampliseq version 2.8.0 - 2024-01-16 ### `Added` From 0be678e6671585dd0a82a410c460eef40014891e Mon Sep 17 00:00:00 2001 From: daniel Date: Mon, 15 Jan 2024 10:10:29 +0100 Subject: [PATCH 100/104] update parameter help --- nextflow_schema.json | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 29aee21ed..84859d190 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -16,7 +16,7 @@ "mimetype": "text/tsv", "fa_icon": "fas fa-dna", "description": "Path to tab-separated sample sheet", - "help_text": "Path to sample sheet, either tab-separated (.tsv), comma-separated (.csv), or in YAML format (.yml/.yaml), that points to compressed fastq files.\n\nThe sample sheet must have two to four tab-separated columns/entries with the following headers: \n- `sampleID` (required): Unique sample IDs, must start with a letter, and can only contain letters, numbers or underscores\n- `forwardReads` (required): Paths to (forward) reads zipped FastQ files\n- `reverseReads` (optional): Paths to reverse reads zipped FastQ files, required if the data is paired-end\n- `run` (optional): If the data was produced by multiple sequencing runs, any string\n\nRelated parameters are:\n- `--pacbio` and `--iontorrent` if the sequencing data is PacBio data or IonTorrent data (default expected: paired-end Illumina data)\n- `--single_end` if the sequencing data is single-ended Illumina data (default expected: paired-end Illumina data)\n- `--dada_ref_taxonomy`, `--qiime_ref_taxonomy`, and/or `--sintax_ref_taxonomy` to choose an appropriate reference taxonomy for the type of amplicon (16S/18S/ITS/CO1) (default: DADA2 assignTaxonomy and 16S rRNA sequence database)", + "help_text": "Path to sample sheet, either tab-separated (.tsv), comma-separated (.csv), or in YAML format (.yml/.yaml), that points to compressed fastq files.\n\nThe sample sheet must have two to four tab-separated columns/entries with the following headers: \n- `sampleID` (required): Unique sample IDs, must start with a letter, and can only contain letters, numbers or underscores\n- `forwardReads` (required): Paths to (forward) reads zipped FastQ files\n- `reverseReads` (optional): Paths to reverse reads zipped FastQ files, required if the data is paired-end\n- `run` (optional): If the data was produced by multiple sequencing runs, any string\n\nRelated parameters are:\n- `--pacbio` and `--iontorrent` if the sequencing data is PacBio data or IonTorrent data (default expected: paired-end Illumina data)\n- `--single_end` if the sequencing data is single-ended Illumina data (default expected: paired-end Illumina data)\n- Choose an appropriate reference taxonomy for the type of amplicon (16S/18S/ITS/CO1) (default: DADA2 assignTaxonomy and 16S rRNA sequence database)", "schema": "assets/schema_input.json" }, "input_fasta": { @@ -24,14 +24,14 @@ "mimetype": "text/tsv", "fa_icon": "fas fa-dna", "description": "Path to ASV/OTU fasta file", - "help_text": "Path to fasta format file with sequences that will be taxonomically classified. The fasta file input option can be used to taxonomically classify previously produced ASV/OTU sequences.\n\nThe fasta sequence header line may contain a description, that will be kept as part of the sequence name. However, tabs will be changed into spaces.\n\nRelated parameters are:\n- `--dada_ref_taxonomy`, `--qiime_ref_taxonomy`, and/or `--sintax_ref_taxonomy` to choose an appropriate reference taxonomy for the type of amplicon (16S/18S/ITS/CO1) (default: DADA2 assignTaxonomy and 16S rRNA sequence database)" + "help_text": "Path to fasta format file with sequences that will be taxonomically classified. The fasta file input option can be used to taxonomically classify previously produced ASV/OTU sequences.\n\nThe fasta sequence header line may contain a description, that will be kept as part of the sequence name. However, tabs will be changed into spaces.\n\nRelated parameters are:\n- Choose an appropriate reference taxonomy for the type of amplicon (16S/18S/ITS/CO1) (default: DADA2 assignTaxonomy and 16S rRNA sequence database)" }, "input_folder": { "type": "string", "mimetype": "text/tsv", "fa_icon": "fas fa-dna", "description": "Path to folder containing zipped FastQ files", - "help_text": "Path to folder containing compressed fastq files.\n\nExample for input data organization from one sequencing run with two samples, paired-end data:\n\n```bash\ndata\n \u251c\u2500sample1_1_L001_R1_001.fastq.gz\n \u251c\u2500sample1_1_L001_R2_001.fastq.gz\n \u251c\u2500sample2_1_L001_R1_001.fastq.gz\n \u2514\u2500sample2_1_L001_R2_001.fastq.gz\n```\n\nPlease note the following requirements:\n\n1. The path must be enclosed in quotes\n2. The folder must contain gzip compressed demultiplexed fastq files. If the file names do not follow the default (`\"/*_R{1,2}_001.fastq.gz\"`), please check `--extension`.\n3. Sample identifiers are extracted from file names, i.e. the string before the first underscore `_`, these must be unique\n4. If your data is scattered, produce a sample sheet\n5. All sequencing data should originate from one sequencing run, because processing relies on run-specific error models that are unreliable when data from several sequencing runs are mixed. Sequencing data originating from multiple sequencing runs requires additionally the parameter `--multiple_sequencing_runs` and a specific folder structure.\n\nRelated parameters are:\n- `--pacbio` and `--iontorrent` if the sequencing data is PacBio data or IonTorrent data (default expected: paired-end Illumina data)\n- `--single_end` if the sequencing data is single-ended Illumina data (default expected: paired-end Illumina data)\n- `--multiple_sequencing_runs` if the sequencing data originates from multiple sequencing runs\n- `--extension` if the sequencing file names do not follow the default (`\"/*_R{1,2}_001.fastq.gz\"`)\n- `--dada_ref_taxonomy`, `--qiime_ref_taxonomy`, and/or `--sintax_ref_taxonomy` to choose an appropriate reference taxonomy for the type of amplicon (16S/18S/ITS/CO1) (default: DADA2 assignTaxonomy and 16S rRNA sequence database)" + "help_text": "Path to folder containing compressed fastq files.\n\nExample for input data organization from one sequencing run with two samples, paired-end data:\n\n```bash\ndata\n \u251c\u2500sample1_1_L001_R1_001.fastq.gz\n \u251c\u2500sample1_1_L001_R2_001.fastq.gz\n \u251c\u2500sample2_1_L001_R1_001.fastq.gz\n \u2514\u2500sample2_1_L001_R2_001.fastq.gz\n```\n\nPlease note the following requirements:\n\n1. The path must be enclosed in quotes\n2. The folder must contain gzip compressed demultiplexed fastq files. If the file names do not follow the default (`\"/*_R{1,2}_001.fastq.gz\"`), please check `--extension`.\n3. Sample identifiers are extracted from file names, i.e. the string before the first underscore `_`, these must be unique\n4. If your data is scattered, produce a sample sheet\n5. All sequencing data should originate from one sequencing run, because processing relies on run-specific error models that are unreliable when data from several sequencing runs are mixed. Sequencing data originating from multiple sequencing runs requires additionally the parameter `--multiple_sequencing_runs` and a specific folder structure.\n\nRelated parameters are:\n- `--pacbio` and `--iontorrent` if the sequencing data is PacBio data or IonTorrent data (default expected: paired-end Illumina data)\n- `--single_end` if the sequencing data is single-ended Illumina data (default expected: paired-end Illumina data)\n- `--multiple_sequencing_runs` if the sequencing data originates from multiple sequencing runs\n- `--extension` if the sequencing file names do not follow the default (`\"/*_R{1,2}_001.fastq.gz\"`)\n- Choose an appropriate reference taxonomy for the type of amplicon (16S/18S/ITS/CO1) (default: DADA2 assignTaxonomy and 16S rRNA sequence database)" }, "FW_primer": { "type": "string", @@ -372,7 +372,7 @@ }, "dada_ref_tax_custom": { "type": "string", - "help_text": "Is preferred over `--dada_ref_taxonomy`. Either `--skip_dada_addspecies` (no species annotation) or `--dada_ref_tax_custom_sp` (species annotation) is additionally required. Consider also setting `--dada_assign_taxlevels`.\n\nMust be compatible to DADA2's assignTaxonomy function: 'Can be compressed. This reference fasta file should be formatted so that the id lines correspond to the taxonomy (or classification) of the associated sequence, and each taxonomic level is separated by a semicolon.' See also https://rdrr.io/bioc/dada2/man/assignTaxonomy.html", + "help_text": "Overwrites `--dada_ref_taxonomy`. Either `--skip_dada_addspecies` (no species annotation) or `--dada_ref_tax_custom_sp` (species annotation) is additionally required. Consider also setting `--dada_assign_taxlevels`.\n\nMust be compatible to DADA2's assignTaxonomy function: 'Can be compressed. This reference fasta file should be formatted so that the id lines correspond to the taxonomy (or classification) of the associated sequence, and each taxonomic level is separated by a semicolon.' See also https://rdrr.io/bioc/dada2/man/assignTaxonomy.html", "description": "Path to a custom DADA2 reference taxonomy database" }, "dada_ref_tax_custom_sp": { @@ -449,7 +449,7 @@ }, "qiime_ref_tax_custom": { "type": "string", - "help_text": "Is preferred over `--qiime_ref_taxonomy`. A comma separated pair of (possibly gzipped) filepaths (sequence, taxonomy).", + "help_text": "Overwrites `--qiime_ref_taxonomy`. A comma separated pair of (possibly gzipped) filepaths (sequence, taxonomy).", "description": "Path to files of a custom QIIME2 reference taxonomy database (files may be gzipped)" }, "classifier": { @@ -475,7 +475,7 @@ }, "kraken2_ref_tax_custom": { "type": "string", - "help_text": "Is preferred over `--kraken2_ref_taxonomy`. Consider also setting `--kraken2_assign_taxlevels`. Can be compressed tar archive (.tar.gz|.tgz) or folder containing the database. See also https://benlangmead.github.io/aws-indexes/k2.", + "help_text": "Overwrites `--kraken2_ref_taxonomy`. Consider also setting `--kraken2_assign_taxlevels`. Can be compressed tar archive (.tar.gz|.tgz) or folder containing the database. See also https://benlangmead.github.io/aws-indexes/k2.", "description": "Path to a custom Kraken2 reference taxonomy database (*.tar.gz|*.tgz archive or folder)" }, "kraken2_assign_taxlevels": { From 678ce1aa94fabe66aff6a0c0189282780c0fe864 Mon Sep 17 00:00:00 2001 From: daniel Date: Mon, 15 Jan 2024 10:32:05 +0100 Subject: [PATCH 101/104] update qiime_ref_tax_custom help text --- nextflow_schema.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 84859d190..938690d47 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -449,8 +449,8 @@ }, "qiime_ref_tax_custom": { "type": "string", - "help_text": "Overwrites `--qiime_ref_taxonomy`. A comma separated pair of (possibly gzipped) filepaths (sequence, taxonomy).", - "description": "Path to files of a custom QIIME2 reference taxonomy database (files may be gzipped)" + "help_text": "Overwrites `--qiime_ref_taxonomy`. Either path to tarball (`*.tar.gz` or `*.tgz`) that contains sequence (`*.fna`) and taxonomy (`*.tax`) data, or alternatively a comma separated pair of filepaths to sequence (`*.fna`) and taxonomy (`*.tax`) data (possibly gzipped `*.gz`).", + "description": "Path to files of a custom QIIME2 reference taxonomy database (tarball, or two comma-separated files)" }, "classifier": { "type": "string", From 5ad866f15c31ee3b3d56ad7a2c57a28aff8121d1 Mon Sep 17 00:00:00 2001 From: daniel Date: Mon, 15 Jan 2024 10:46:46 +0100 Subject: [PATCH 102/104] change qiime_ref_tax_custom test files to nf-core --- CHANGELOG.md | 4 ++-- conf/test_qiimecustom.config | 2 +- conf/test_reftaxcustom.config | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6835818d7..402d8f727 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` - [#666](https://github.com/nf-core/ampliseq/pull/666) - Added Greengenes2 database, version 2022.10, support for QIIME2 taxonomic classification. -- [#667](https://github.com/nf-core/ampliseq/pull/667) - Added `--qiime_ref_tax_custom` to permit custom reference database for QIIME2 taxonomic classification +- [#667](https://github.com/nf-core/ampliseq/pull/667),[#691](https://github.com/nf-core/ampliseq/pull/691) - Added `--qiime_ref_tax_custom` to permit custom reference database for QIIME2 taxonomic classification - [#674](https://github.com/nf-core/ampliseq/pull/674) - Add PhytoRef database for DADA2 taxonomy assignment using `--dada_ref_taxonomy phytoref` - [#675](https://github.com/nf-core/ampliseq/pull/675) - Add the Zehr lab nifH database for DADA2 taxonomy assignment using `--dada_ref_taxonomy zehr-nifh` - [#681](https://github.com/nf-core/ampliseq/pull/681) - For DADA2, with `--dada_addspecies_allowmultiple` multiple exact species matches are reported and with `--dada_taxonomy_rc` reverse-complement matches are also considered in taxonomic classification @@ -19,7 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` -- [#672](https://github.com/nf-core/ampliseq/pull/672),[#688](https://github.com/nf-core/ampliseq/pull/688) - Updated documentation +- [#672](https://github.com/nf-core/ampliseq/pull/672),[#688](https://github.com/nf-core/ampliseq/pull/6),[#691](https://github.com/nf-core/ampliseq/pull/691) - Updated documentation - [#676](https://github.com/nf-core/ampliseq/pull/676) - Phyloseq sometimes only produced one of multiple output files - [#679](https://github.com/nf-core/ampliseq/pull/679) - Prevent masking low complexity regions by VSEARCH with lower case letters - [#680](https://github.com/nf-core/ampliseq/pull/680),[#673](https://github.com/nf-core/ampliseq/pull/673) - Improved pipeline summary report & error messages diff --git a/conf/test_qiimecustom.config b/conf/test_qiimecustom.config index 2fc9cb736..dd02eb4e9 100644 --- a/conf/test_qiimecustom.config +++ b/conf/test_qiimecustom.config @@ -25,7 +25,7 @@ params { input = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Samplesheet.tsv" // Custom reference taxonomy - qiime_ref_tax_custom = "https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.fna.gz,https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.tax.gz" + qiime_ref_tax_custom = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/testdata/85_greengenes.fna.gz,https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/testdata/85_greengenes.tax.gz" // Skip downstream analysis with QIIME2 skip_qiime_downstream = true diff --git a/conf/test_reftaxcustom.config b/conf/test_reftaxcustom.config index 1afe1c2df..40408bfb1 100644 --- a/conf/test_reftaxcustom.config +++ b/conf/test_reftaxcustom.config @@ -30,7 +30,7 @@ params { dada_assign_taxlevels = "Kingdom,Phylum,Class,Order,Family,Genus" kraken2_ref_tax_custom = "https://genome-idx.s3.amazonaws.com/kraken/16S_Greengenes13.5_20200326.tgz" kraken2_assign_taxlevels = "D,P,C,O" - qiime_ref_tax_custom = "https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.tar.gz" + qiime_ref_tax_custom = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/testdata/85_greengenes.tar.gz" // Skip downstream analysis with QIIME2 skip_qiime_downstream = true From 4694510a8f9d0846217f34d5cb27c161a7f916d4 Mon Sep 17 00:00:00 2001 From: daniel Date: Mon, 15 Jan 2024 12:51:23 +0100 Subject: [PATCH 103/104] replace local gzip with nf-core module pigz/uncompress --- modules.json | 5 ++ modules/local/gzip_decompress.nf | 32 ------------- modules/nf-core/pigz/uncompress/main.nf | 48 +++++++++++++++++++ modules/nf-core/pigz/uncompress/meta.yml | 32 +++++++++++++ .../pigz/uncompress/tests/main.nf.test | 33 +++++++++++++ .../pigz/uncompress/tests/main.nf.test.snap | 21 ++++++++ .../nf-core/pigz/uncompress/tests/tags.yml | 2 + subworkflows/local/qiime2_preptax.nf | 8 ++-- 8 files changed, 145 insertions(+), 36 deletions(-) delete mode 100644 modules/local/gzip_decompress.nf create mode 100644 modules/nf-core/pigz/uncompress/main.nf create mode 100644 modules/nf-core/pigz/uncompress/meta.yml create mode 100644 modules/nf-core/pigz/uncompress/tests/main.nf.test create mode 100644 modules/nf-core/pigz/uncompress/tests/main.nf.test.snap create mode 100644 modules/nf-core/pigz/uncompress/tests/tags.yml diff --git a/modules.json b/modules.json index 6969dc1d6..595c024f8 100644 --- a/modules.json +++ b/modules.json @@ -81,6 +81,11 @@ "git_sha": "4ab13872435962dadc239979554d13709e20bf29", "installed_by": ["modules"] }, + "pigz/uncompress": { + "branch": "master", + "git_sha": "4ef7becf6a2bbc8df466885d10b4051d1f318a6a", + "installed_by": ["modules"] + }, "untar": { "branch": "master", "git_sha": "d0b4fc03af52a1cc8c6fb4493b921b57352b1dd8", diff --git a/modules/local/gzip_decompress.nf b/modules/local/gzip_decompress.nf deleted file mode 100644 index c6ea37a5a..000000000 --- a/modules/local/gzip_decompress.nf +++ /dev/null @@ -1,32 +0,0 @@ -process GZIP_DECOMPRESS { - tag "$file" - label 'process_single' - - conda "conda-forge::sed=4.7 conda-forge::gzip=1.13" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'nf-core/ubuntu:20.04' }" - - input: - path(file) - - output: - path("$outfile"), emit: ungzip - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - outfile = task.ext.outfile ?: file.baseName.toString().replaceFirst(/\.gz$/, "") - - """ - gzip $args -c -d $file > $outfile - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gzip: \$(echo \$(gzip --version 2>&1) | sed 's/gzip //; s/ Copyright.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/pigz/uncompress/main.nf b/modules/nf-core/pigz/uncompress/main.nf new file mode 100644 index 000000000..9383c1464 --- /dev/null +++ b/modules/nf-core/pigz/uncompress/main.nf @@ -0,0 +1,48 @@ +process PIGZ_UNCOMPRESS { + label 'process_low' + //stageInMode 'copy' // this directive can be set in case the original input should be kept + + conda "conda-forge::pigz" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pigz:2.8': + 'biocontainers/pigz:2.8' }" + + input: + path zip + + output: + path "${uncompressed_filename}" , emit: file + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + uncompressed_filename = zip.toString() - '.gz' + // calling pigz -f to make it follow symlinks + """ + unpigz \\ + -p $task.cpus \\ + -fk \\ + $args \\ + ${zip} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + uncompressed_filename = zip.toString() - '.gz' + """ + touch ${zip.dropRight(3)} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\w*//' )) + END_VERSIONS + """ +} diff --git a/modules/nf-core/pigz/uncompress/meta.yml b/modules/nf-core/pigz/uncompress/meta.yml new file mode 100644 index 000000000..574a004be --- /dev/null +++ b/modules/nf-core/pigz/uncompress/meta.yml @@ -0,0 +1,32 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: "pigz_uncompress" +description: write your description here +keywords: + - uncompress + - gzip + - parallelized +tools: + - "pigz": + description: "Parallel implementation of the gzip algorithm." + homepage: "https://zlib.net/pigz/" + documentation: "https://zlib.net/pigz/pigz.pdf" + +input: + - zip: + type: file + description: Gzipped file + pattern: "*.{gzip}" + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - file: + type: file + description: File to compress + pattern: "*" + +authors: + - "@lrauschning" diff --git a/modules/nf-core/pigz/uncompress/tests/main.nf.test b/modules/nf-core/pigz/uncompress/tests/main.nf.test new file mode 100644 index 000000000..579556586 --- /dev/null +++ b/modules/nf-core/pigz/uncompress/tests/main.nf.test @@ -0,0 +1,33 @@ +nextflow_process { + + name "Test Process PIGZ_UNCOMPRESS" + script "modules/nf-core/pigz/uncompress/main.nf" + process "PIGZ_UNCOMPRESS" + tag "modules" + tag "modules_nfcore" + tag "pigz" + tag "pigz/uncompress" + + test("Should run without failures") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + + } + +} diff --git a/modules/nf-core/pigz/uncompress/tests/main.nf.test.snap b/modules/nf-core/pigz/uncompress/tests/main.nf.test.snap new file mode 100644 index 000000000..038cf2d72 --- /dev/null +++ b/modules/nf-core/pigz/uncompress/tests/main.nf.test.snap @@ -0,0 +1,21 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ], + "1": [ + "versions.yml:md5,a2d5ce72baa8b303f25afb9cf094f683" + ], + "file": [ + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ], + "versions": [ + "versions.yml:md5,a2d5ce72baa8b303f25afb9cf094f683" + ] + } + ], + "timestamp": "2023-10-18T12:37:21.987858" + } +} \ No newline at end of file diff --git a/modules/nf-core/pigz/uncompress/tests/tags.yml b/modules/nf-core/pigz/uncompress/tests/tags.yml new file mode 100644 index 000000000..6719a90ad --- /dev/null +++ b/modules/nf-core/pigz/uncompress/tests/tags.yml @@ -0,0 +1,2 @@ +pigz/uncompress: + - modules/nf-core/pigz/uncompress/** diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index dfa287253..ce7bac788 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -3,7 +3,7 @@ */ include { UNTAR } from '../../modules/nf-core/untar/main' -include { GZIP_DECOMPRESS } from '../../modules/local/gzip_decompress.nf' +include { PIGZ_UNCOMPRESS } from '../../modules/nf-core/pigz/uncompress/main' include { FORMAT_TAXONOMY_QIIME } from '../../modules/local/format_taxonomy_qiime' include { QIIME2_EXTRACT } from '../../modules/local/qiime2_extract' include { QIIME2_TRAIN } from '../../modules/local/qiime2_train' @@ -29,10 +29,10 @@ workflow QIIME2_PREPTAX { }.set { ch_qiime_ref_tax_branched } ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a compressed (ends with `.gz`) or decompressed sequence (ends with `.fna`) or taxonomy file (ends with `.tax`). Please review input." } - GZIP_DECOMPRESS(ch_qiime_ref_tax_branched.compressed) - ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(GZIP_DECOMPRESS.out.versions) + PIGZ_UNCOMPRESS(ch_qiime_ref_tax_branched.compressed) + ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(PIGZ_UNCOMPRESS.out.versions) - ch_qiime_db_files = GZIP_DECOMPRESS.out.ungzip + ch_qiime_db_files = PIGZ_UNCOMPRESS.out.file ch_qiime_db_files = ch_qiime_db_files.mix(ch_qiime_ref_tax_branched.decompressed) ch_ref_database_fna = ch_qiime_db_files.filter { From 08353dd6d648fc489b324fa753d91dfd81cf848e Mon Sep 17 00:00:00 2001 From: daniel Date: Mon, 15 Jan 2024 12:54:16 +0100 Subject: [PATCH 104/104] fix typo --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 402d8f727..723d4a835 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,7 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` -- [#672](https://github.com/nf-core/ampliseq/pull/672),[#688](https://github.com/nf-core/ampliseq/pull/6),[#691](https://github.com/nf-core/ampliseq/pull/691) - Updated documentation +- [#672](https://github.com/nf-core/ampliseq/pull/672),[#688](https://github.com/nf-core/ampliseq/pull/688),[#691](https://github.com/nf-core/ampliseq/pull/691) - Updated documentation - [#676](https://github.com/nf-core/ampliseq/pull/676) - Phyloseq sometimes only produced one of multiple output files - [#679](https://github.com/nf-core/ampliseq/pull/679) - Prevent masking low complexity regions by VSEARCH with lower case letters - [#680](https://github.com/nf-core/ampliseq/pull/680),[#673](https://github.com/nf-core/ampliseq/pull/673) - Improved pipeline summary report & error messages