diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 788582d92..e4b532bef 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -50,6 +50,7 @@ jobs: - "test_failed" - "test_multi" - "test_reftaxcustom" + - "test_qiimecustom" - "test_doubleprimers" - "test_iontorrent" - "test_novaseq" diff --git a/CHANGELOG.md b/CHANGELOG.md index 9af07dcf3..7ab5dc5bb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +- [#667](https://github.com/nf-core/ampliseq/pull/667) - Added `--qiime_ref_tax_custom` to permit custom reference database for QIIME2 taxonomic classification - [#674](https://github.com/nf-core/ampliseq/pull/674) - Add PhytoRef database for DADA2 taxonomy assignment using `--dada_ref_taxonomy phytoref` - [#675](https://github.com/nf-core/ampliseq/pull/675) - Add the Zehr lab nifH database for DADA2 taxonomy assignment using `--dada_ref_taxonomy zehr-nifh` diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 8c8fc21e7..264a71493 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -980,9 +980,15 @@ cat("\n\nDADA2 taxonomy assignments can be found in folder [dada2](../dada2) in # Header cat("## QIIME2\n") -cat("The taxonomic classification was performed by [QIIME2](https://www.nature.com/articles/s41587-019-0209-9) - using the database: `", params$qiime2_ref_tax_title, "`. - More details about the reference taxonomy database can be found in the ['Methods section'](#methods).\n\n", sep = "") +# indicate reference taxonomy +if ( !isFALSE(params$qiime2_ref_tax_title) ) { + cat("The taxonomic classification was performed by [QIIME2](https://www.nature.com/articles/s41587-019-0209-9) + using the database: `", params$qiime2_ref_tax_title, "`. + More details about the reference taxonomy database can be found in the ['Methods section'](#methods).\n\n", sep = "") +} else { + cat("The taxonomic classification was performed by [QIIME2](https://www.nature.com/articles/s41587-019-0209-9) using a custom database ", + "provided by the user.\n\n", sep = "") +} # Read file and prepare table asv_tax <- read.table(params$qiime2_taxonomy, header = TRUE, sep = "\t") diff --git a/conf/test_qiimecustom.config b/conf/test_qiimecustom.config new file mode 100644 index 000000000..2fc9cb736 --- /dev/null +++ b/conf/test_qiimecustom.config @@ -0,0 +1,33 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/ampliseq -profile test_qiimecustom, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test custom QIIME2 reference taxonomy database profile' + config_profile_description = 'Minimal test dataset to check --qiime_ref_tax_custom' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + FW_primer = "GTGYCAGCMGCCGCGGTAA" + RV_primer = "GGACTACNVGGGTWTCTAAT" + input = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Samplesheet.tsv" + + // Custom reference taxonomy + qiime_ref_tax_custom = "https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.fna.gz,https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.tax.gz" + + // Skip downstream analysis with QIIME2 + skip_qiime_downstream = true + skip_dada_taxonomy = true +} diff --git a/conf/test_reftaxcustom.config b/conf/test_reftaxcustom.config index 4233d1ea0..1afe1c2df 100644 --- a/conf/test_reftaxcustom.config +++ b/conf/test_reftaxcustom.config @@ -30,7 +30,8 @@ params { dada_assign_taxlevels = "Kingdom,Phylum,Class,Order,Family,Genus" kraken2_ref_tax_custom = "https://genome-idx.s3.amazonaws.com/kraken/16S_Greengenes13.5_20200326.tgz" kraken2_assign_taxlevels = "D,P,C,O" + qiime_ref_tax_custom = "https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.tar.gz" // Skip downstream analysis with QIIME2 - skip_qiime = true + skip_qiime_downstream = true } diff --git a/lib/WorkflowAmpliseq.groovy b/lib/WorkflowAmpliseq.groovy index 5e1039115..25db3ed68 100755 --- a/lib/WorkflowAmpliseq.groovy +++ b/lib/WorkflowAmpliseq.groovy @@ -77,12 +77,12 @@ class WorkflowAmpliseq { } if (params.skip_dada_taxonomy && params.sbdiexport) { - if (!params.sintax_ref_taxonomy && (params.skip_qiime || !params.qiime_ref_taxonomy)) { + if (!params.sintax_ref_taxonomy && (params.skip_qiime || (!params.qiime_ref_taxonomy && !params.qiime_ref_tax_custom))) { Nextflow.error("Incompatible parameters: `--sbdiexport` expects taxa annotation and therefore annotation with either DADA2, SINTAX, or QIIME2 is needed.") } } - if ( (!params.FW_primer || !params.RV_primer) && params.qiime_ref_taxonomy && !params.skip_qiime && !params.skip_taxonomy ) { + if ( (!params.FW_primer || !params.RV_primer) && (params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.skip_qiime && !params.skip_taxonomy ) { Nextflow.error("Incompatible parameters: `--FW_primer` and `--RV_primer` are required for cutting the QIIME2 reference database to the amplicon sequences. Please specify primers or do not use `--qiime_ref_taxonomy`.") } @@ -90,8 +90,8 @@ class WorkflowAmpliseq { Nextflow.error("Incompatible parameters: `--FW_primer` and `--RV_primer` are required for cutting the DADA2 reference database to the amplicon sequences. Please specify primers or do not use `--cut_dada_ref_taxonomy`.") } - if (params.qiime_ref_taxonomy && params.classifier) { - Nextflow.error("Incompatible parameters: `--qiime_ref_taxonomy` will produce a classifier but `--classifier` points to a precomputed classifier, therefore, only use one of those.") + if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && params.classifier) { + Nextflow.error("Incompatible parameters: `--qiime_ref_taxonomy` and `--qiime_ref_tax_custom` will produce a classifier but `--classifier` points to a precomputed classifier, therefore, only use one of those.") } if (params.kraken2_ref_tax_custom && !params.kraken2_assign_taxlevels ) { diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index 7f49735e4..4b7ec2afc 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -34,7 +34,7 @@ class WorkflowMain { if (params.sintax_ref_taxonomy && !params.skip_taxonomy) { sintaxreftaxonomyExistsError(params, log) } - if (params.qiime_ref_taxonomy && !params.skip_taxonomy && !params.classifier) { + if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.skip_taxonomy && !params.classifier) { qiimereftaxonomyExistsError(params, log) } diff --git a/modules/local/gzip_decompress.nf b/modules/local/gzip_decompress.nf new file mode 100644 index 000000000..c6ea37a5a --- /dev/null +++ b/modules/local/gzip_decompress.nf @@ -0,0 +1,32 @@ +process GZIP_DECOMPRESS { + tag "$file" + label 'process_single' + + conda "conda-forge::sed=4.7 conda-forge::gzip=1.13" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + path(file) + + output: + path("$outfile"), emit: ungzip + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + outfile = task.ext.outfile ?: file.baseName.toString().replaceFirst(/\.gz$/, "") + + """ + gzip $args -c -d $file > $outfile + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gzip: \$(echo \$(gzip --version 2>&1) | sed 's/gzip //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index a8e082b01..1a288a0fb 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -118,7 +118,8 @@ process SUMMARY_REPORT { kraken2_tax ? "kraken2_taxonomy='$kraken2_tax',kraken2_confidence='$params.kraken2_confidence'" : "", kraken2_tax && !params.kraken2_ref_tax_custom ? "kraken2_ref_tax_title='${params.kraken2_ref_databases[params.kraken2_ref_taxonomy]["title"]}',kraken2_ref_tax_file='${params.kraken2_ref_databases[params.kraken2_ref_taxonomy]["file"]}',kraken2_ref_tax_citation='${params.kraken2_ref_databases[params.kraken2_ref_taxonomy]["citation"]}'" : "", pplace_tax ? "pplace_taxonomy='$pplace_tax',pplace_heattree='$pplace_heattree'" : "", - qiime2_tax && params.qiime_ref_taxonomy ? "qiime2_taxonomy='$qiime2_tax',qiime2_ref_tax_title='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["title"]}',qiime2_ref_tax_file='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]}',qiime2_ref_tax_citation='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["citation"]}'" : qiime2_tax ? "qiime2_taxonomy='$qiime2_tax'" : "", + qiime2_tax ? "qiime2_taxonomy='$qiime2_tax'" : "", + qiime2_tax && params.qiime_ref_taxonomy ? "qiime2_ref_tax_title='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["title"]}',qiime2_ref_tax_file='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]}',qiime2_ref_tax_citation='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["citation"]}'" : "", run_qiime2 ? "val_used_taxonomy='$val_used_taxonomy'" : "", filter_stats_tsv ? "filter_stats_tsv='$filter_stats_tsv',qiime2_filtertaxa='$qiime2_filtertaxa',exclude_taxa='$params.exclude_taxa',min_frequency='$params.min_frequency',min_samples='$params.min_samples'" : "", barplot ? "barplot=TRUE" : "", diff --git a/nextflow.config b/nextflow.config index 8ca827203..831a43a22 100644 --- a/nextflow.config +++ b/nextflow.config @@ -87,6 +87,7 @@ params { skip_dada_quality = false skip_barrnap = false skip_qiime = false + skip_qiime_downstream = false skip_fastqc = false skip_alpha_rarefaction = false skip_abundance_tables = false @@ -108,6 +109,7 @@ params { cut_dada_ref_taxonomy = false sintax_ref_taxonomy = null qiime_ref_taxonomy = null + qiime_ref_tax_custom = null kraken2_ref_taxonomy = null kraken2_assign_taxlevels = null kraken2_ref_tax_custom = null @@ -272,6 +274,7 @@ profiles { test_failed { includeConfig 'conf/test_failed.config' } test_full { includeConfig 'conf/test_full.config' } test_reftaxcustom { includeConfig 'conf/test_reftaxcustom.config' } + test_qiimecustom { includeConfig 'conf/test_qiimecustom.config' } test_novaseq { includeConfig 'conf/test_novaseq.config' } test_pplace { includeConfig 'conf/test_pplace.config' } test_sintax { includeConfig 'conf/test_sintax.config' } diff --git a/nextflow_schema.json b/nextflow_schema.json index d2e5faa92..b8afed35a 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -435,6 +435,11 @@ "greengenes85" ] }, + "qiime_ref_tax_custom": { + "type": "string", + "help_text": "Is preferred over `--qiime_ref_taxonomy`. A comma separated pair of (possibly gzipped) filepaths (sequence, taxonomy).", + "description": "Path to files of a custom QIIME2 reference taxonomy database (files may be gzipped)" + }, "classifier": { "type": "string", "description": "Path to QIIME2 trained classifier file (typically *-classifier.qza)", @@ -654,6 +659,10 @@ "type": "boolean", "description": "Skip all steps that are executed by QIIME2, including QIIME2 software download, taxonomy assignment by QIIME2, barplots, relative abundance tables, diversity analysis, differential abundance testing." }, + "skip_qiime_downstream": { + "type": "boolean", + "description": "Skip steps that are executed by QIIME2 except for taxonomic classification. Skip steps including barplots, relative abundance tables, diversity analysis, differential abundance testing." + }, "skip_taxonomy": { "type": "boolean", "description": "Skip taxonomic classification. Incompatible with `--sbdiexport`" diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 7f3cb80b2..dfa287253 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -2,6 +2,8 @@ * Training of a classifier with QIIME2 */ +include { UNTAR } from '../../modules/nf-core/untar/main' +include { GZIP_DECOMPRESS } from '../../modules/local/gzip_decompress.nf' include { FORMAT_TAXONOMY_QIIME } from '../../modules/local/format_taxonomy_qiime' include { QIIME2_EXTRACT } from '../../modules/local/qiime2_extract' include { QIIME2_TRAIN } from '../../modules/local/qiime2_train' @@ -9,13 +11,82 @@ include { QIIME2_TRAIN } from '../../modules/local/qiime2_train' workflow QIIME2_PREPTAX { take: ch_qiime_ref_taxonomy //channel, list of files + val_qiime_ref_taxonomy //val FW_primer //val RV_primer //val main: - FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy ) + ch_qiime2_preptax_versions = Channel.empty() + + if (params.qiime_ref_tax_custom) { + // Handle case where we have been provided a pair of filepaths. + if ("${params.qiime_ref_tax_custom}".contains(",")) { + ch_qiime_ref_taxonomy.flatten() + .branch { + compressed: it.isFile() && it.getName().endsWith(".gz") + decompressed: it.isFile() && ( it.getName().endsWith(".fna") || it.getName().endsWith(".tax") ) + failed: true + }.set { ch_qiime_ref_tax_branched } + ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a compressed (ends with `.gz`) or decompressed sequence (ends with `.fna`) or taxonomy file (ends with `.tax`). Please review input." } + + GZIP_DECOMPRESS(ch_qiime_ref_tax_branched.compressed) + ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(GZIP_DECOMPRESS.out.versions) + + ch_qiime_db_files = GZIP_DECOMPRESS.out.ungzip + ch_qiime_db_files = ch_qiime_db_files.mix(ch_qiime_ref_tax_branched.decompressed) + + ch_ref_database_fna = ch_qiime_db_files.filter { + it.getName().endsWith(".fna") + } + ch_ref_database_tax = ch_qiime_db_files.filter { + it.getName().endsWith(".tax") + } + + ch_ref_database = ch_ref_database_fna.combine(ch_ref_database_tax) + // Handle case we have been provided a single filepath (tarball or directory). + } else { + ch_qiime_ref_taxonomy.flatten() + .branch { + tar: it.isFile() && ( it.getName().endsWith(".tar.gz") || it.getName().endsWith (".tgz") ) + dir: it.isDirectory() + failed: true + }.set { ch_qiime_ref_tax_branched } + ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a directory nor a file that ends in '.tar.gz' or '.tgz'. Please review input." } + + UNTAR ( + ch_qiime_ref_tax_branched.tar + .map { + db -> + def meta = [:] + meta.id = val_qiime_ref_taxonomy + [ meta, db ] } ) + ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(UNTAR.out.versions) + + ch_qiime_db_dir = UNTAR.out.untar.map{ it[1] } + ch_qiime_db_dir = ch_qiime_db_dir.mix(ch_qiime_ref_tax_branched.dir) + + ch_ref_database_fna = ch_qiime_db_dir.map{ dir -> + files = file(dir.resolve("*.fna"), checkIfExists: true) + } | filter { + if (it.size() > 1) log.warn "Found multiple fasta files for QIIME2 reference database." + it.size() == 1 + } + ch_ref_database_tax = ch_qiime_db_dir.map{ dir -> + files = file(dir.resolve("*.tax"), checkIfExists: true) + } | filter { + if (it.size() > 1) log.warn "Found multiple tax files for QIIME2 reference database." + it.size() == 1 + } + + ch_ref_database = ch_ref_database_fna.combine(ch_ref_database_tax) + } + } else { + FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy ) + ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(FORMAT_TAXONOMY_QIIME.out.versions) + + ch_ref_database = FORMAT_TAXONOMY_QIIME.out.fasta.combine(FORMAT_TAXONOMY_QIIME.out.tax) + } - ch_ref_database = FORMAT_TAXONOMY_QIIME.out.fasta.combine(FORMAT_TAXONOMY_QIIME.out.tax) ch_ref_database .map { db -> @@ -24,10 +95,15 @@ workflow QIIME2_PREPTAX { meta.RV_primer = RV_primer [ meta, db ] } .set { ch_ref_database } + QIIME2_EXTRACT ( ch_ref_database ) + ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(QIIME2_EXTRACT.out.versions) + QIIME2_TRAIN ( QIIME2_EXTRACT.out.qza ) + ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(QIIME2_TRAIN.out.versions) emit: - classifier = QIIME2_TRAIN.out.qza - versions = QIIME2_TRAIN.out.versions + classifier = QIIME2_TRAIN.out.qza + versions = ch_qiime2_preptax_versions } + diff --git a/tests/pipeline/doubleprimers.nf.test.snap b/tests/pipeline/doubleprimers.nf.test.snap index d7cc9dce8..b5e9cb2bb 100644 --- a/tests/pipeline/doubleprimers.nf.test.snap +++ b/tests/pipeline/doubleprimers.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-07-27T13:49:03+0000" }, diff --git a/tests/pipeline/multi.nf.test.snap b/tests/pipeline/multi.nf.test.snap index 2c0382f0f..daba2601d 100644 --- a/tests/pipeline/multi.nf.test.snap +++ b/tests/pipeline/multi.nf.test.snap @@ -14,7 +14,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-05-28T21:15:03+0000" }, diff --git a/tests/pipeline/qiimecustom.nf.test b/tests/pipeline/qiimecustom.nf.test new file mode 100644 index 000000000..493968153 --- /dev/null +++ b/tests/pipeline/qiimecustom.nf.test @@ -0,0 +1,51 @@ +nextflow_pipeline { + + name "Test Workflow main.nf" + script "main.nf" + tag "test_qiimecustom" + tag "qiime2" + tag "pipeline" + + test("Custom QIIME2 Reference Taxonomy Database") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(UTILS.removeNextflowVersion("$outputDir")).match("software_versions") }, + { assert snapshot(path("$outputDir/overall_summary.tsv")).match("overall_summary_tsv") }, + { assert snapshot(path("$outputDir/barrnap/rrna.arc.gff"), + path("$outputDir/barrnap/rrna.bac.gff"), + path("$outputDir/barrnap/rrna.euk.gff"), + path("$outputDir/barrnap/rrna.mito.gff")).match("barrnap") }, + { assert new File("$outputDir/barrnap/summary.tsv").exists() }, + { assert snapshot(path("$outputDir/cutadapt/cutadapt_summary.tsv")).match("cutadapt") }, + { assert snapshot(path("$outputDir/dada2/ASV_seqs.fasta"), + path("$outputDir/dada2/ASV_table.tsv"), + path("$outputDir/dada2/DADA2_stats.tsv"), + path("$outputDir/dada2/DADA2_table.rds"), + path("$outputDir/dada2/DADA2_table.tsv")).match("dada2") }, + { assert new File("$outputDir/fastqc/sampleID_1_1_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/sampleID_1_2_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/sampleID_1a_1_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/sampleID_1a_2_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/sampleID_2_1_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/sampleID_2_2_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/sampleID_2a_1_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/sampleID_2a_2_fastqc.html").exists() }, + { assert snapshot(path("$outputDir/input/Samplesheet.tsv")).match("input") }, + { assert new File("$outputDir/qiime2/taxonomy/taxonomy.tsv").exists() }, + { assert new File("$outputDir/qiime2/taxonomy/GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza").exists() }, + { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, + { assert new File("$outputDir/summary_report/summary_report.html").exists() } + ) + } + } +} diff --git a/tests/pipeline/qiimecustom.nf.test.snap b/tests/pipeline/qiimecustom.nf.test.snap new file mode 100644 index 000000000..594688a92 --- /dev/null +++ b/tests/pipeline/qiimecustom.nf.test.snap @@ -0,0 +1,43 @@ +{ + "input": { + "content": [ + "Samplesheet.tsv:md5,dbf8d1a2b7933dab9e5a139f33c2b1f4" + ], + "timestamp": "2023-05-28T21:18:54+0000" + }, + "cutadapt": { + "content": [ + "cutadapt_summary.tsv:md5,5d02749984a811479e7d534fda75163f" + ], + "timestamp": "2023-05-28T21:18:54+0000" + }, + "software_versions": { + "content": [ + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + ], + "timestamp": "2023-05-28T21:18:54+0000" + }, + "overall_summary_tsv": { + "content": [ + "overall_summary.tsv:md5,3231d6ee72b9a1e7742e5605caaff05a" + ], + "timestamp": "2023-05-28T21:18:54+0000" + }, + "barrnap": { + "content": [ + "rrna.arc.gff:md5,6dae470aace9293d5eb8c318584852dd", + "rrna.bac.gff:md5,439a9084f089120f700f938dfb58fa41", + "rrna.euk.gff:md5,c9bc1d9d8fb77dc19c95dee2d53840eb", + "rrna.mito.gff:md5,df19e1b84ba6f691d20c72b397c88abf" + ], + "timestamp": "2023-05-28T21:18:54+0000" + }, + "multiqc": { + "content": [ + "multiqc_fastqc.txt:md5,147764e40079c3abf97a17cfe2275c52", + "multiqc_general_stats.txt:md5,88c2b9e6d02b83afe4f9551e6c9a91a7", + "multiqc_cutadapt.txt:md5,330a7b72dc671ca99fcb3fb84b6776c1" + ], + "timestamp": "2023-05-28T21:18:54+0000" + } +} diff --git a/tests/pipeline/reftaxcustom.nf.test b/tests/pipeline/reftaxcustom.nf.test index 67c4d5468..dba78c126 100644 --- a/tests/pipeline/reftaxcustom.nf.test +++ b/tests/pipeline/reftaxcustom.nf.test @@ -44,6 +44,8 @@ nextflow_pipeline { { assert snapshot(path("$outputDir/kraken2/ASV_tax.user.kraken2.classifiedreads.txt"), path("$outputDir/kraken2/ASV_tax.user.kraken2.complete.tsv"), path("$outputDir/kraken2/ASV_tax.user.kraken2.tsv")).match("kraken2") }, + { assert new File("$outputDir/qiime2/taxonomy/taxonomy.tsv").exists() }, + { assert new File("$outputDir/qiime2/taxonomy/GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza").exists() }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, diff --git a/tests/pipeline/reftaxcustom.nf.test.snap b/tests/pipeline/reftaxcustom.nf.test.snap index 7dca4e3e9..b5aa10f14 100644 --- a/tests/pipeline/reftaxcustom.nf.test.snap +++ b/tests/pipeline/reftaxcustom.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-05-28T21:18:54+0000" }, diff --git a/tests/pipeline/sintax.nf.test.snap b/tests/pipeline/sintax.nf.test.snap index 069c7fa88..b19bf8feb 100644 --- a/tests/pipeline/sintax.nf.test.snap +++ b/tests/pipeline/sintax.nf.test.snap @@ -16,7 +16,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_SINTAX={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_SINTAX={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-06-20T16:40:18+0000" }, diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 05ddfee76..6dcc370f1 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -59,9 +59,25 @@ if (params.dada_ref_tax_custom) { val_dada_ref_taxonomy = "none" } -if (params.qiime_ref_taxonomy && !params.skip_taxonomy && !params.classifier) { +if (params.qiime_ref_tax_custom) { + if ("${params.qiime_ref_tax_custom}".contains(",")) { + qiime_ref_paths = "${params.qiime_ref_tax_custom}".split(",") + if (qiime_ref_paths.length != 2) { + error "--qiime_ref_tax_custom accepts a single filepath to a directory or tarball, or two filepaths separated by a comma. Please review input." + } + + ch_qiime_ref_taxonomy = Channel.fromPath(Arrays.asList(qiime_ref_paths), checkIfExists: true) + } else { + ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}", checkIfExists: true) + } + val_qiime_ref_taxonomy = "user" +} else if (params.qiime_ref_taxonomy && !params.skip_taxonomy && !params.classifier) { ch_qiime_ref_taxonomy = Channel.fromList(params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]).map { file(it) } -} else { ch_qiime_ref_taxonomy = Channel.empty() } + val_qiime_ref_taxonomy = params.qiime_ref_taxonomy.replace('=','_').replace('.','_') +} else { + ch_qiime_ref_taxonomy = Channel.empty() + val_qiime_ref_taxonomy = "none" +} if (params.sintax_ref_taxonomy && !params.skip_taxonomy) { ch_sintax_ref_taxonomy = Channel.fromList(params.sintax_ref_databases[params.sintax_ref_taxonomy]["file"]).map { file(it) } @@ -130,8 +146,15 @@ if ( params.dada_ref_taxonomy && !params.skip_dada_addspecies && !params.skip_da } } -//only run QIIME2 when taxonomy is actually calculated and all required data is available -if ( !(workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) && !params.skip_taxonomy && !params.skip_qiime && (!params.skip_dada_taxonomy || params.sintax_ref_taxonomy || params.qiime_ref_taxonomy || params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom) ) { +// Only run QIIME2 taxonomy classification if needed parameters are passed and we are not skipping taxonomy or qiime steps. +if ( !(workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) && !params.skip_taxonomy && !params.skip_qiime && (params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.classifier) ) { + run_qiime2_taxonomy = true +} else { + run_qiime2_taxonomy = false +} + +//only run QIIME2 downstream analysis when taxonomy is actually calculated and all required data is available +if ( !(workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) && !params.skip_taxonomy && !params.skip_qiime && !params.skip_qiime_downstream && (!params.skip_dada_taxonomy || params.sintax_ref_taxonomy || params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom) ) { run_qiime2 = true } else { run_qiime2 = false @@ -551,10 +574,11 @@ workflow AMPLISEQ { } //QIIME2 - if ( run_qiime2 ) { - if (params.qiime_ref_taxonomy && !params.classifier) { + if ( run_qiime2_taxonomy ) { + if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.classifier) { QIIME2_PREPTAX ( ch_qiime_ref_taxonomy.collect(), + val_qiime_ref_taxonomy, params.FW_primer, params.RV_primer ) @@ -607,7 +631,7 @@ workflow AMPLISEQ { log.info "Use Kraken2 taxonomy classification" val_used_taxonomy = "Kraken2" ch_tax = QIIME2_INTAX ( ch_kraken2_tax, "" ).qza - } else if ( params.qiime_ref_taxonomy || params.classifier ) { + } else if ( params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.classifier ) { log.info "Use QIIME2 taxonomy classification" val_used_taxonomy = "QIIME2" ch_tax = QIIME2_TAXONOMY.out.qza @@ -707,7 +731,7 @@ workflow AMPLISEQ { // MODULE: Predict functional potential of a bacterial community from marker genes with Picrust2 // if ( params.picrust ) { - if ( run_qiime2 && !params.skip_abundance_tables && ( params.dada_ref_taxonomy || params.qiime_ref_taxonomy || params.classifier || params.sintax_ref_taxonomy || params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom ) && !params.skip_taxonomy ) { + if ( run_qiime2 && !params.skip_abundance_tables && ( params.dada_ref_taxonomy || params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.classifier || params.sintax_ref_taxonomy || params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom ) && !params.skip_taxonomy ) { PICRUST ( QIIME2_EXPORT.out.abs_fasta, QIIME2_EXPORT.out.abs_tsv, "QIIME2", "This Picrust2 analysis is based on filtered reads from QIIME2" ) } else { PICRUST ( ch_fasta, ch_dada2_asv, "DADA2", "This Picrust2 analysis is based on unfiltered reads from DADA2" ) @@ -839,7 +863,7 @@ workflow AMPLISEQ { !params.skip_taxonomy && ( params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom ) ? KRAKEN2_TAXONOMY_WF.out.tax_tsv.ifEmpty( [] ) : [], !params.skip_taxonomy && params.pplace_tree ? ch_pplace_tax.ifEmpty( [] ) : [], !params.skip_taxonomy && params.pplace_tree ? FASTA_NEWICK_EPANG_GAPPA.out.heattree.ifEmpty( [[],[]] ) : [[],[]], - !params.skip_taxonomy && ( params.qiime_ref_taxonomy || params.classifier ) && run_qiime2 ? QIIME2_TAXONOMY.out.tsv.ifEmpty( [] ) : [], + !params.skip_taxonomy && ( params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.classifier ) && run_qiime2_taxonomy ? QIIME2_TAXONOMY.out.tsv.ifEmpty( [] ) : [], run_qiime2, run_qiime2 ? val_used_taxonomy : "", run_qiime2 && ( params.exclude_taxa != "none" || params.min_frequency != 1 || params.min_samples != 1 ) ? ch_dada2_asv.countLines()+","+QIIME2_FILTERTAXA.out.tsv.countLines() : "",