From d86c5696b08fe2ab0dea8f04219e01c7ada55c55 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 12:04:12 -0500 Subject: [PATCH 01/48] Add params.qiime_ref_tax_custom in preparation of allowing custom qiime database. --- nextflow.config | 1 + nextflow_schema.json | 5 +++++ workflows/ampliseq.nf | 15 +++++++++++---- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/nextflow.config b/nextflow.config index 93e19e86a..1e5a567f0 100644 --- a/nextflow.config +++ b/nextflow.config @@ -108,6 +108,7 @@ params { cut_dada_ref_taxonomy = false sintax_ref_taxonomy = null qiime_ref_taxonomy = null + qiime_ref_tax_custom = null kraken2_ref_taxonomy = null kraken2_assign_taxlevels = null kraken2_ref_tax_custom = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 1d3098da5..69820e8c6 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -432,6 +432,11 @@ "greengenes85" ] }, + "qiime_ref_tax_custom": { + "type": "string", + "help_text": "Is preferred over `--qiime_ref_taxonomy`. Can be compressed tar archive (.tar.gz|.tgz) or folder containing the database.", + "description": "Path to a custom QIIME2 reference taxonomy database (*.tar.gz|*.tgz archive or folder)" + }, "classifier": { "type": "string", "description": "Path to QIIME2 trained classifier file (typically *-classifier.qza)", diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 05ddfee76..eee62b971 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -59,9 +59,16 @@ if (params.dada_ref_tax_custom) { val_dada_ref_taxonomy = "none" } -if (params.qiime_ref_taxonomy && !params.skip_taxonomy && !params.classifier) { +if (params.qiime_ref_tax_custom) { + ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}", checkIfExists: true) + val_qiime_ref_taxonomy = "user" +} else if (params.qiime_ref_taxonomy && !params.skip_taxonomy && !params.classifier) {. ch_qiime_ref_taxonomy = Channel.fromList(params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]).map { file(it) } -} else { ch_qiime_ref_taxonomy = Channel.empty() } + val_qiime_ref_taxonomy = params.qiime_ref_taxonomy.replace('=','_').replace('.','_') +} else { + ch_qiime_ref_taxonomy = Channel.empty() + val_qiime_ref_taxonomy = "none" +} if (params.sintax_ref_taxonomy && !params.skip_taxonomy) { ch_sintax_ref_taxonomy = Channel.fromList(params.sintax_ref_databases[params.sintax_ref_taxonomy]["file"]).map { file(it) } @@ -131,7 +138,7 @@ if ( params.dada_ref_taxonomy && !params.skip_dada_addspecies && !params.skip_da } //only run QIIME2 when taxonomy is actually calculated and all required data is available -if ( !(workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) && !params.skip_taxonomy && !params.skip_qiime && (!params.skip_dada_taxonomy || params.sintax_ref_taxonomy || params.qiime_ref_taxonomy || params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom) ) { +if ( !(workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) && !params.skip_taxonomy && !params.skip_qiime && (!params.skip_dada_taxonomy || params.sintax_ref_taxonomy || params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom) ) { run_qiime2 = true } else { run_qiime2 = false @@ -552,7 +559,7 @@ workflow AMPLISEQ { //QIIME2 if ( run_qiime2 ) { - if (params.qiime_ref_taxonomy && !params.classifier) { + if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.classifier) { QIIME2_PREPTAX ( ch_qiime_ref_taxonomy.collect(), params.FW_primer, From 439097c93751277109c643359abb11f7ab158a14 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 12:05:14 -0500 Subject: [PATCH 02/48] Implementation of logic to handle a custom qiime2 reference database stored in either a directory or a tarball. --- subworkflows/local/qiime2_preptax.nf | 29 ++++++++++++++++++++++++++-- workflows/ampliseq.nf | 1 + 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 7f3cb80b2..429aac713 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -2,6 +2,7 @@ * Training of a classifier with QIIME2 */ +include { UNTAR } from '../../modules/nf-core/untar/main' include { FORMAT_TAXONOMY_QIIME } from '../../modules/local/format_taxonomy_qiime' include { QIIME2_EXTRACT } from '../../modules/local/qiime2_extract' include { QIIME2_TRAIN } from '../../modules/local/qiime2_train' @@ -9,13 +10,37 @@ include { QIIME2_TRAIN } from '../../modules/local/qiime2_train' workflow QIIME2_PREPTAX { take: ch_qiime_ref_taxonomy //channel, list of files + val_qiime_ref_taxonomy //val FW_primer //val RV_primer //val main: - FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy ) + if (params.qiime_ref_tax_custom) { + ch_qiime_ref_taxonomy + .branch { + tar: it.isFile() && ( it.getName().endsWith(".tar.gz") || it.getName().endsWith (".tgz") ) + dir: it.isDirectory() + failed: true + }.set { ch_qiime_ref_taxonomy } + ch_qiime_ref_taxonomy.failed.subscribe { error "$it is neither a directory nor a file that ends in '.tar.gz' or '.tgz'. Please review input." } + + UNTAR ( + ch_qiime_ref_taxonomy.tar + .map { + db -> + def meta = [:] + meta.id = val_qiime_ref_taxonomy + [ meta, db ] } ) + ch_qiime_db_dir = UNTAR.out.untar.map{ it[1] } + ch_qiime_db_dir = ch_qiime_db_dir.mix(ch_qiime_ref_taxonomy.dir) + + ch_ref_database = ch_qiime_db_dir.map{ Channel.fromPath(it + "/*.tax").combine(Channel.fromPath(it + "/*.fna")) } + } else { + FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy ) + + ch_ref_database = FORMAT_TAXONOMY_QIIME.out.fasta.combine(FORMAT_TAXONOMY_QIIME.out.tax) + } - ch_ref_database = FORMAT_TAXONOMY_QIIME.out.fasta.combine(FORMAT_TAXONOMY_QIIME.out.tax) ch_ref_database .map { db -> diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index eee62b971..03d1f6c43 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -562,6 +562,7 @@ workflow AMPLISEQ { if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.classifier) { QIIME2_PREPTAX ( ch_qiime_ref_taxonomy.collect(), + val_qiime_ref_taxonomy, params.FW_primer, params.RV_primer ) From 14c89b9c22faec6544a1ffa97e28afe989e6336f Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 14:53:05 -0500 Subject: [PATCH 03/48] Some params checking logic. --- lib/WorkflowAmpliseq.groovy | 6 +++--- lib/WorkflowMain.groovy | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/WorkflowAmpliseq.groovy b/lib/WorkflowAmpliseq.groovy index 5e1039115..0868866a7 100755 --- a/lib/WorkflowAmpliseq.groovy +++ b/lib/WorkflowAmpliseq.groovy @@ -77,12 +77,12 @@ class WorkflowAmpliseq { } if (params.skip_dada_taxonomy && params.sbdiexport) { - if (!params.sintax_ref_taxonomy && (params.skip_qiime || !params.qiime_ref_taxonomy)) { + if (!params.sintax_ref_taxonomy && (params.skip_qiime || (!params.qiime_ref_taxonomy && !params.qiime_ref_tax_custom))) { Nextflow.error("Incompatible parameters: `--sbdiexport` expects taxa annotation and therefore annotation with either DADA2, SINTAX, or QIIME2 is needed.") } } - if ( (!params.FW_primer || !params.RV_primer) && params.qiime_ref_taxonomy && !params.skip_qiime && !params.skip_taxonomy ) { + if ( (!params.FW_primer || !params.RV_primer) && (params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.skip_qiime && !params.skip_taxonomy ) { Nextflow.error("Incompatible parameters: `--FW_primer` and `--RV_primer` are required for cutting the QIIME2 reference database to the amplicon sequences. Please specify primers or do not use `--qiime_ref_taxonomy`.") } @@ -90,7 +90,7 @@ class WorkflowAmpliseq { Nextflow.error("Incompatible parameters: `--FW_primer` and `--RV_primer` are required for cutting the DADA2 reference database to the amplicon sequences. Please specify primers or do not use `--cut_dada_ref_taxonomy`.") } - if (params.qiime_ref_taxonomy && params.classifier) { + if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && params.classifier) { Nextflow.error("Incompatible parameters: `--qiime_ref_taxonomy` will produce a classifier but `--classifier` points to a precomputed classifier, therefore, only use one of those.") } diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index 7f49735e4..4b7ec2afc 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -34,7 +34,7 @@ class WorkflowMain { if (params.sintax_ref_taxonomy && !params.skip_taxonomy) { sintaxreftaxonomyExistsError(params, log) } - if (params.qiime_ref_taxonomy && !params.skip_taxonomy && !params.classifier) { + if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.skip_taxonomy && !params.classifier) { qiimereftaxonomyExistsError(params, log) } From d214ec0252321261d56b315d0b34797432011442 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 14:55:09 -0500 Subject: [PATCH 04/48] Loose . lying around. --- workflows/ampliseq.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 03d1f6c43..3d4ad07d9 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -62,7 +62,7 @@ if (params.dada_ref_tax_custom) { if (params.qiime_ref_tax_custom) { ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}", checkIfExists: true) val_qiime_ref_taxonomy = "user" -} else if (params.qiime_ref_taxonomy && !params.skip_taxonomy && !params.classifier) {. +} else if (params.qiime_ref_taxonomy && !params.skip_taxonomy && !params.classifier) { ch_qiime_ref_taxonomy = Channel.fromList(params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]).map { file(it) } val_qiime_ref_taxonomy = params.qiime_ref_taxonomy.replace('=','_').replace('.','_') } else { From 9346d7aea37c365e31ad37ae8ffa08a51a2ab8a4 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 14:57:54 -0500 Subject: [PATCH 05/48] Only perform collect if going to FORMAT_TAXONOMY_QIIME. --- subworkflows/local/qiime2_preptax.nf | 2 +- workflows/ampliseq.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 429aac713..0d66308a1 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -36,7 +36,7 @@ workflow QIIME2_PREPTAX { ch_ref_database = ch_qiime_db_dir.map{ Channel.fromPath(it + "/*.tax").combine(Channel.fromPath(it + "/*.fna")) } } else { - FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy ) + FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy.collect() ) ch_ref_database = FORMAT_TAXONOMY_QIIME.out.fasta.combine(FORMAT_TAXONOMY_QIIME.out.tax) } diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 3d4ad07d9..b45fd9a57 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -561,7 +561,7 @@ workflow AMPLISEQ { if ( run_qiime2 ) { if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.classifier) { QIIME2_PREPTAX ( - ch_qiime_ref_taxonomy.collect(), + ch_qiime_ref_taxonomy, val_qiime_ref_taxonomy, params.FW_primer, params.RV_primer From ef053b1369e5f75aad794772e387257b06e99d35 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 15:13:23 -0500 Subject: [PATCH 06/48] =?UTF-8?q?Set=20into=20new=20channel=20when=20branc?= =?UTF-8?q?hing=20on=20ch=5Fqiime=5Fref=5Ftaxonomy.=C2=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- subworkflows/local/qiime2_preptax.nf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 0d66308a1..38a9faf89 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -21,18 +21,18 @@ workflow QIIME2_PREPTAX { tar: it.isFile() && ( it.getName().endsWith(".tar.gz") || it.getName().endsWith (".tgz") ) dir: it.isDirectory() failed: true - }.set { ch_qiime_ref_taxonomy } - ch_qiime_ref_taxonomy.failed.subscribe { error "$it is neither a directory nor a file that ends in '.tar.gz' or '.tgz'. Please review input." } + }.set { ch_qiime_ref_tax_branched } + ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a directory nor a file that ends in '.tar.gz' or '.tgz'. Please review input." } UNTAR ( - ch_qiime_ref_taxonomy.tar + ch_qiime_ref_tax_branched.tar .map { db -> def meta = [:] meta.id = val_qiime_ref_taxonomy [ meta, db ] } ) ch_qiime_db_dir = UNTAR.out.untar.map{ it[1] } - ch_qiime_db_dir = ch_qiime_db_dir.mix(ch_qiime_ref_taxonomy.dir) + ch_qiime_db_dir = ch_qiime_db_dir.mix(ch_qiime_ref_tax_branched.dir) ch_ref_database = ch_qiime_db_dir.map{ Channel.fromPath(it + "/*.tax").combine(Channel.fromPath(it + "/*.fna")) } } else { From a48a09fe3f7398ce15fc8b0b1bc449b92c880ae8 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 15:48:14 -0500 Subject: [PATCH 07/48] Try to unpack the database dir into component files using a module. --- modules/local/qiime2_unpack.nf | 31 ++++++++++++++++++++++++++++ subworkflows/local/qiime2_preptax.nf | 5 ++++- 2 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 modules/local/qiime2_unpack.nf diff --git a/modules/local/qiime2_unpack.nf b/modules/local/qiime2_unpack.nf new file mode 100644 index 000000000..e77286da7 --- /dev/null +++ b/modules/local/qiime2_unpack.nf @@ -0,0 +1,31 @@ +process QIIME2_UNPACK { + label 'process_low' + + conda "conda-forge::sed=4.7" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' : + 'docker.io/biocontainers/biocontainers:v1.2.0_cv1' }" + + input: + path(database) + + output: + path("*.fna"), emit: fasta + path("*.tax"), emit: tax + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + // TODO: need to not have this be a copy. + script: + """ + cp $database/*.fna . + cp $database/*.tax . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bash: \$(bash --version | sed -n 1p | sed 's/GNU bash, version //g') + END_VERSIONS + """ +} diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 38a9faf89..3ad8365f2 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -6,6 +6,7 @@ include { UNTAR } from '../../modules/nf-core/untar/main' include { FORMAT_TAXONOMY_QIIME } from '../../modules/local/format_taxonomy_qiime' include { QIIME2_EXTRACT } from '../../modules/local/qiime2_extract' include { QIIME2_TRAIN } from '../../modules/local/qiime2_train' +include { QIIME2_UNPACK } from '../../modules/local/qiime2_unpack' workflow QIIME2_PREPTAX { take: @@ -34,7 +35,9 @@ workflow QIIME2_PREPTAX { ch_qiime_db_dir = UNTAR.out.untar.map{ it[1] } ch_qiime_db_dir = ch_qiime_db_dir.mix(ch_qiime_ref_tax_branched.dir) - ch_ref_database = ch_qiime_db_dir.map{ Channel.fromPath(it + "/*.tax").combine(Channel.fromPath(it + "/*.fna")) } + QIIME2_UNPACK(ch_qiime_db_dir) + + ch_ref_database = ch_qiime_db_dir.map{ QIIME2_UNPACK.out.fasta.combine(QIIME2_UNPACK.out.tax) } } else { FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy.collect() ) From a9971b68ede12d0431d248362d1c279b7ac0cc07 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 15:59:03 -0500 Subject: [PATCH 08/48] Remove map wrapping the combine. --- subworkflows/local/qiime2_preptax.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 3ad8365f2..8229190ac 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -37,7 +37,7 @@ workflow QIIME2_PREPTAX { QIIME2_UNPACK(ch_qiime_db_dir) - ch_ref_database = ch_qiime_db_dir.map{ QIIME2_UNPACK.out.fasta.combine(QIIME2_UNPACK.out.tax) } + ch_ref_database = QIIME2_UNPACK.out.fasta.combine(QIIME2_UNPACK.out.tax) } else { FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy.collect() ) From aac51bd4b162e17ed0f9ca5864527651abf4e23d Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 16:14:05 -0500 Subject: [PATCH 09/48] Remove unpack in favour of map and filter. --- modules/local/qiime2_unpack.nf | 31 ---------------------------- subworkflows/local/qiime2_preptax.nf | 16 +++++++++++--- 2 files changed, 13 insertions(+), 34 deletions(-) delete mode 100644 modules/local/qiime2_unpack.nf diff --git a/modules/local/qiime2_unpack.nf b/modules/local/qiime2_unpack.nf deleted file mode 100644 index e77286da7..000000000 --- a/modules/local/qiime2_unpack.nf +++ /dev/null @@ -1,31 +0,0 @@ -process QIIME2_UNPACK { - label 'process_low' - - conda "conda-forge::sed=4.7" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' : - 'docker.io/biocontainers/biocontainers:v1.2.0_cv1' }" - - input: - path(database) - - output: - path("*.fna"), emit: fasta - path("*.tax"), emit: tax - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - // TODO: need to not have this be a copy. - script: - """ - cp $database/*.fna . - cp $database/*.tax . - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bash: \$(bash --version | sed -n 1p | sed 's/GNU bash, version //g') - END_VERSIONS - """ -} diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 8229190ac..2969d3709 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -6,7 +6,6 @@ include { UNTAR } from '../../modules/nf-core/untar/main' include { FORMAT_TAXONOMY_QIIME } from '../../modules/local/format_taxonomy_qiime' include { QIIME2_EXTRACT } from '../../modules/local/qiime2_extract' include { QIIME2_TRAIN } from '../../modules/local/qiime2_train' -include { QIIME2_UNPACK } from '../../modules/local/qiime2_unpack' workflow QIIME2_PREPTAX { take: @@ -35,9 +34,20 @@ workflow QIIME2_PREPTAX { ch_qiime_db_dir = UNTAR.out.untar.map{ it[1] } ch_qiime_db_dir = ch_qiime_db_dir.mix(ch_qiime_ref_tax_branched.dir) - QIIME2_UNPACK(ch_qiime_db_dir) + ch_ref_database_fna = ch_qiime_db_dir.map{ dir -> + files = file(dir.resolve("*.fna"), checkIfExists: true) + } | filter { + if (it instanceof List) log.warn "Found multiple fasta files for QIIME2 reference database." + ! it instanceof List + } + ch_ref_database_tax = ch_qiime_db_dir.map{ dir -> + files = file(dir.resolve("*.tax"), checkIfExists: true) + } | filter { + if (it instanceof List) log.warn "Found multiple fasta files for QIIME2 reference database." + ! it instanceof List + } - ch_ref_database = QIIME2_UNPACK.out.fasta.combine(QIIME2_UNPACK.out.tax) + ch_ref_database = ch_ref_database_fna.combine(ch_ref_database_tax) } else { FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy.collect() ) From 1b2825ec952b9d2d4d8edb8cbd348fd843d42223 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 16:17:38 -0500 Subject: [PATCH 10/48] Glob results in list in all circumstances, check length instead. --- subworkflows/local/qiime2_preptax.nf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 2969d3709..f040f2077 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -37,14 +37,14 @@ workflow QIIME2_PREPTAX { ch_ref_database_fna = ch_qiime_db_dir.map{ dir -> files = file(dir.resolve("*.fna"), checkIfExists: true) } | filter { - if (it instanceof List) log.warn "Found multiple fasta files for QIIME2 reference database." - ! it instanceof List + if (it.size() > 1) log.warn "Found multiple fasta files for QIIME2 reference database." + it.size() == 1 } ch_ref_database_tax = ch_qiime_db_dir.map{ dir -> files = file(dir.resolve("*.tax"), checkIfExists: true) } | filter { - if (it instanceof List) log.warn "Found multiple fasta files for QIIME2 reference database." - ! it instanceof List + if (it.size() > 1) log.warn "Found multiple tax files for QIIME2 reference database." + it.size() == 1 } ch_ref_database = ch_ref_database_fna.combine(ch_ref_database_tax) From a4219a0baa0e4b256ee97c5c1a54a566fb801f07 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 17:11:59 -0500 Subject: [PATCH 11/48] Update CHANGELOG.md. --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 620c97165..b884a4087 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +[]() - Added `--qiime_ref_tax_custom` to permit custom reference database for QIIME2 taxonomic classification. + ### `Changed` ### `Fixed` From 0ccf6e6daf7b46be84a81fd9498d2e55fc5db795 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Wed, 29 Nov 2023 10:12:47 -0500 Subject: [PATCH 12/48] Update error message when passing both one of --qiime_ref_taxonomy or --qiime_ref_tax_custom and --classifier. --- lib/WorkflowAmpliseq.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/WorkflowAmpliseq.groovy b/lib/WorkflowAmpliseq.groovy index 0868866a7..25db3ed68 100755 --- a/lib/WorkflowAmpliseq.groovy +++ b/lib/WorkflowAmpliseq.groovy @@ -91,7 +91,7 @@ class WorkflowAmpliseq { } if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && params.classifier) { - Nextflow.error("Incompatible parameters: `--qiime_ref_taxonomy` will produce a classifier but `--classifier` points to a precomputed classifier, therefore, only use one of those.") + Nextflow.error("Incompatible parameters: `--qiime_ref_taxonomy` and `--qiime_ref_tax_custom` will produce a classifier but `--classifier` points to a precomputed classifier, therefore, only use one of those.") } if (params.kraken2_ref_tax_custom && !params.kraken2_assign_taxlevels ) { From 590f415952b249d83ae5fcd7d128b109310d0983 Mon Sep 17 00:00:00 2001 From: Matthew Date: Wed, 29 Nov 2023 11:13:35 -0500 Subject: [PATCH 13/48] Update CHANGELOG.md with pull request number. Co-authored-by: Daniel Straub <42973691+d4straub@users.noreply.github.com> --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b884a4087..6c2030cc4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` -[]() - Added `--qiime_ref_tax_custom` to permit custom reference database for QIIME2 taxonomic classification. +[#667](https://github.com/nf-core/ampliseq/pull/667) - Added `--qiime_ref_tax_custom` to permit custom reference database for QIIME2 taxonomic classification. ### `Changed` From f5d80f572e008693d3f83dd5a8f169784da546c7 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Wed, 29 Nov 2023 11:58:33 -0500 Subject: [PATCH 14/48] Add support for specifying two (possibly gzipped) files as --qiime_ref_tax_custom. --- modules/local/gzip_decompress.nf | 32 ++++++++++++ nextflow_schema.json | 2 +- subworkflows/local/qiime2_preptax.nf | 75 ++++++++++++++++++---------- workflows/ampliseq.nf | 6 ++- 4 files changed, 86 insertions(+), 29 deletions(-) create mode 100644 modules/local/gzip_decompress.nf diff --git a/modules/local/gzip_decompress.nf b/modules/local/gzip_decompress.nf new file mode 100644 index 000000000..fa8fa82cf --- /dev/null +++ b/modules/local/gzip_decompress.nf @@ -0,0 +1,32 @@ +process GZIP_DECOMPRESS { + tag "$file" + label 'process_single' + + conda "conda-forge::sed=4.7 conda-forge::gzip=1.13" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + path(file) + + output: + path("$outfile"), emit: ungzip + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + outfile = task.ext.outfile ?: ( meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.gz$/, "")) + + """ + gzip $args -c -d $file > $outfile + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gzip: \$(echo \$(gzip --version 2>&1) | sed 's/gzip //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/nextflow_schema.json b/nextflow_schema.json index 69820e8c6..c6c6f8b09 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -434,7 +434,7 @@ }, "qiime_ref_tax_custom": { "type": "string", - "help_text": "Is preferred over `--qiime_ref_taxonomy`. Can be compressed tar archive (.tar.gz|.tgz) or folder containing the database.", + "help_text": "Is preferred over `--qiime_ref_taxonomy`. Can be compressed tar archive (.tar.gz|.tgz), a pair of (possibly gzipped) filepaths, or folder containing the database.", "description": "Path to a custom QIIME2 reference taxonomy database (*.tar.gz|*.tgz archive or folder)" }, "classifier": { diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index f040f2077..7cc4817d0 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -3,6 +3,7 @@ */ include { UNTAR } from '../../modules/nf-core/untar/main' +include { GZIP_DECOMPRESS } from '../../modules/local/gzip_decompress.nf' include { FORMAT_TAXONOMY_QIIME } from '../../modules/local/format_taxonomy_qiime' include { QIIME2_EXTRACT } from '../../modules/local/qiime2_extract' include { QIIME2_TRAIN } from '../../modules/local/qiime2_train' @@ -15,36 +16,56 @@ workflow QIIME2_PREPTAX { RV_primer //val main: + ch_qiime2_preptax_versions = Channel.empty() + if (params.qiime_ref_tax_custom) { - ch_qiime_ref_taxonomy - .branch { - tar: it.isFile() && ( it.getName().endsWith(".tar.gz") || it.getName().endsWith (".tgz") ) - dir: it.isDirectory() - failed: true - }.set { ch_qiime_ref_tax_branched } - ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a directory nor a file that ends in '.tar.gz' or '.tgz'. Please review input." } + if (ch_qiime_ref_taxonomy.size() == 2) { + ch_qiime_ref_taxonomy + .branch { + gzip: it.isFile() && ( it.getName().endsWith(".gz") ) + decompressed: it.isFile() && ( it.getName().endsWith(".fna") || it.getName().endsWith (".tax") ) + failed: true + }.set { ch_qiime_ref_tax_branched } + ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a compressed or decompressed sequence or taxonomy file. Please review input." } - UNTAR ( - ch_qiime_ref_tax_branched.tar - .map { - db -> - def meta = [:] - meta.id = val_qiime_ref_taxonomy - [ meta, db ] } ) - ch_qiime_db_dir = UNTAR.out.untar.map{ it[1] } - ch_qiime_db_dir = ch_qiime_db_dir.mix(ch_qiime_ref_tax_branched.dir) + GZIP_DECOMPRESS(ch_qiime_ref_tax_branched.gzip) + ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(GZIP_DECOMPRESS.out.versions) - ch_ref_database_fna = ch_qiime_db_dir.map{ dir -> - files = file(dir.resolve("*.fna"), checkIfExists: true) - } | filter { - if (it.size() > 1) log.warn "Found multiple fasta files for QIIME2 reference database." - it.size() == 1 - } - ch_ref_database_tax = ch_qiime_db_dir.map{ dir -> - files = file(dir.resolve("*.tax"), checkIfExists: true) - } | filter { - if (it.size() > 1) log.warn "Found multiple tax files for QIIME2 reference database." - it.size() == 1 + ch_qiime_db_files = GZIP_DECOMPRESS.out.ungzip + ch_qiime_db_files = ch_qiime_db_files.mix(ch_qiime_ref_tax_branched.decompressed) + + ch_ref_database = ch_qiime_db_files.collate(2) + } else { + ch_qiime_ref_taxonomy + .branch { + tar: it.isFile() && ( it.getName().endsWith(".tar.gz") || it.getName().endsWith (".tgz") ) + dir: it.isDirectory() + failed: true + }.set { ch_qiime_ref_tax_branched } + ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a directory nor a file that ends in '.tar.gz' or '.tgz'. Please review input." } + + UNTAR ( + ch_qiime_ref_tax_branched.tar + .map { + db -> + def meta = [:] + meta.id = val_qiime_ref_taxonomy + [ meta, db ] } ) + ch_qiime_db_dir = UNTAR.out.untar.map{ it[1] } + ch_qiime_db_dir = ch_qiime_db_dir.mix(ch_qiime_ref_tax_branched.dir) + + ch_ref_database_fna = ch_qiime_db_dir.map{ dir -> + files = file(dir.resolve("*.fna"), checkIfExists: true) + } | filter { + if (it.size() > 1) log.warn "Found multiple fasta files for QIIME2 reference database." + it.size() == 1 + } + ch_ref_database_tax = ch_qiime_db_dir.map{ dir -> + files = file(dir.resolve("*.tax"), checkIfExists: true) + } | filter { + if (it.size() > 1) log.warn "Found multiple tax files for QIIME2 reference database." + it.size() == 1 + } } ch_ref_database = ch_ref_database_fna.combine(ch_ref_database_tax) diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index b45fd9a57..9bd1cf5c4 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -60,7 +60,11 @@ if (params.dada_ref_tax_custom) { } if (params.qiime_ref_tax_custom) { - ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}", checkIfExists: true) + if ("${params.qiime_ref_tax_custom}".contains(",")) { + ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}".split(","), checkIfExists: true) + } else { + ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}", checkIfExists: true) + } val_qiime_ref_taxonomy = "user" } else if (params.qiime_ref_taxonomy && !params.skip_taxonomy && !params.classifier) { ch_qiime_ref_taxonomy = Channel.fromList(params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]).map { file(it) } From 7016682fd8375525b2e23e3fdebaa01cbdd8f082 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Wed, 29 Nov 2023 14:07:43 -0500 Subject: [PATCH 15/48] Only support providing two files separated by a comma. --- nextflow_schema.json | 4 +- subworkflows/local/qiime2_preptax.nf | 59 ++++++---------------------- workflows/ampliseq.nf | 10 ++--- 3 files changed, 20 insertions(+), 53 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index c6c6f8b09..79a4cebb0 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -434,8 +434,8 @@ }, "qiime_ref_tax_custom": { "type": "string", - "help_text": "Is preferred over `--qiime_ref_taxonomy`. Can be compressed tar archive (.tar.gz|.tgz), a pair of (possibly gzipped) filepaths, or folder containing the database.", - "description": "Path to a custom QIIME2 reference taxonomy database (*.tar.gz|*.tgz archive or folder)" + "help_text": "Is preferred over `--qiime_ref_taxonomy`. A comma separated pair of (possibly gzipped) filepaths.", + "description": "Path to files of a custom QIIME2 reference taxonomy database (files may be gzipped)" }, "classifier": { "type": "string", diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 7cc4817d0..a4e1d7768 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -19,56 +19,23 @@ workflow QIIME2_PREPTAX { ch_qiime2_preptax_versions = Channel.empty() if (params.qiime_ref_tax_custom) { - if (ch_qiime_ref_taxonomy.size() == 2) { - ch_qiime_ref_taxonomy - .branch { - gzip: it.isFile() && ( it.getName().endsWith(".gz") ) - decompressed: it.isFile() && ( it.getName().endsWith(".fna") || it.getName().endsWith (".tax") ) - failed: true - }.set { ch_qiime_ref_tax_branched } - ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a compressed or decompressed sequence or taxonomy file. Please review input." } + ch_qiime_ref_taxonomy.view() - GZIP_DECOMPRESS(ch_qiime_ref_tax_branched.gzip) - ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(GZIP_DECOMPRESS.out.versions) + // ch_qiime_ref_taxonomy + // .branch { + // gzip: it.isFile() && ( it.getName().endsWith(".gz") ) + // decompressed: it.isFile() && ( it.getName().endsWith(".fna") || it.getName().endsWith (".tax") ) + // failed: true + // }.set { ch_qiime_ref_tax_branched } + // ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a compressed or decompressed sequence or taxonomy file. Please review input." } - ch_qiime_db_files = GZIP_DECOMPRESS.out.ungzip - ch_qiime_db_files = ch_qiime_db_files.mix(ch_qiime_ref_tax_branched.decompressed) + // GZIP_DECOMPRESS(ch_qiime_ref_tax_branched.gzip) + // ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(GZIP_DECOMPRESS.out.versions) - ch_ref_database = ch_qiime_db_files.collate(2) - } else { - ch_qiime_ref_taxonomy - .branch { - tar: it.isFile() && ( it.getName().endsWith(".tar.gz") || it.getName().endsWith (".tgz") ) - dir: it.isDirectory() - failed: true - }.set { ch_qiime_ref_tax_branched } - ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a directory nor a file that ends in '.tar.gz' or '.tgz'. Please review input." } + // ch_qiime_db_files = GZIP_DECOMPRESS.out.ungzip + // ch_qiime_db_files = ch_qiime_db_files.mix(ch_qiime_ref_tax_branched.decompressed) - UNTAR ( - ch_qiime_ref_tax_branched.tar - .map { - db -> - def meta = [:] - meta.id = val_qiime_ref_taxonomy - [ meta, db ] } ) - ch_qiime_db_dir = UNTAR.out.untar.map{ it[1] } - ch_qiime_db_dir = ch_qiime_db_dir.mix(ch_qiime_ref_tax_branched.dir) - - ch_ref_database_fna = ch_qiime_db_dir.map{ dir -> - files = file(dir.resolve("*.fna"), checkIfExists: true) - } | filter { - if (it.size() > 1) log.warn "Found multiple fasta files for QIIME2 reference database." - it.size() == 1 - } - ch_ref_database_tax = ch_qiime_db_dir.map{ dir -> - files = file(dir.resolve("*.tax"), checkIfExists: true) - } | filter { - if (it.size() > 1) log.warn "Found multiple tax files for QIIME2 reference database." - it.size() == 1 - } - } - - ch_ref_database = ch_ref_database_fna.combine(ch_ref_database_tax) + // ch_ref_database = ch_qiime_db_files.collate(2) } else { FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy.collect() ) diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 9bd1cf5c4..07df8960a 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -60,11 +60,11 @@ if (params.dada_ref_tax_custom) { } if (params.qiime_ref_tax_custom) { - if ("${params.qiime_ref_tax_custom}".contains(",")) { - ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}".split(","), checkIfExists: true) - } else { - ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}", checkIfExists: true) + if (!"${params.qiime_ref_tax_custom}".contains(",")) { + error "--qiime_ref_tax_custom takes two filepaths separated by a comma. Please review input." } + + ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}".split(","), checkIfExists: true) val_qiime_ref_taxonomy = "user" } else if (params.qiime_ref_taxonomy && !params.skip_taxonomy && !params.classifier) { ch_qiime_ref_taxonomy = Channel.fromList(params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]).map { file(it) } @@ -565,7 +565,7 @@ workflow AMPLISEQ { if ( run_qiime2 ) { if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.classifier) { QIIME2_PREPTAX ( - ch_qiime_ref_taxonomy, + ch_qiime_ref_taxonomy.collect(), val_qiime_ref_taxonomy, params.FW_primer, params.RV_primer From 79cbfe8fb0bf50035529deb5fe24d18693784b75 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Wed, 29 Nov 2023 14:41:49 -0500 Subject: [PATCH 16/48] Fix split returns a String[] and we actually need an ArrayList. --- subworkflows/local/qiime2_preptax.nf | 2 +- workflows/ampliseq.nf | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 7cc4817d0..f36dda424 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -19,7 +19,7 @@ workflow QIIME2_PREPTAX { ch_qiime2_preptax_versions = Channel.empty() if (params.qiime_ref_tax_custom) { - if (ch_qiime_ref_taxonomy.size() == 2) { + if ("${params.qiime_ref_tax_custom}".contains(",")) { ch_qiime_ref_taxonomy .branch { gzip: it.isFile() && ( it.getName().endsWith(".gz") ) diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 9bd1cf5c4..3a3323269 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -61,7 +61,7 @@ if (params.dada_ref_tax_custom) { if (params.qiime_ref_tax_custom) { if ("${params.qiime_ref_tax_custom}".contains(",")) { - ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}".split(","), checkIfExists: true) + ch_qiime_ref_taxonomy = Channel.fromPath(Arrays.asList("${params.qiime_ref_tax_custom}".split(",")), checkIfExists: true) } else { ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}", checkIfExists: true) } @@ -565,7 +565,7 @@ workflow AMPLISEQ { if ( run_qiime2 ) { if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.classifier) { QIIME2_PREPTAX ( - ch_qiime_ref_taxonomy, + ch_qiime_ref_taxonomy.collect(), val_qiime_ref_taxonomy, params.FW_primer, params.RV_primer From 6d767bc1ea80aa464d80b4aae48944759a44f3a6 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Wed, 29 Nov 2023 14:46:30 -0500 Subject: [PATCH 17/48] Move ch_ref_database set into correct scope. --- subworkflows/local/qiime2_preptax.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index f36dda424..19a9bc4b3 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -66,9 +66,9 @@ workflow QIIME2_PREPTAX { if (it.size() > 1) log.warn "Found multiple tax files for QIIME2 reference database." it.size() == 1 } - } - ch_ref_database = ch_ref_database_fna.combine(ch_ref_database_tax) + ch_ref_database = ch_ref_database_fna.combine(ch_ref_database_tax) + } } else { FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy.collect() ) From f76b49bbcd56315189603e0e292dba2e108f68e5 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Wed, 29 Nov 2023 14:53:35 -0500 Subject: [PATCH 18/48] Try using map to work through list of files. --- subworkflows/local/qiime2_preptax.nf | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 19a9bc4b3..96a0db961 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -21,18 +21,19 @@ workflow QIIME2_PREPTAX { if (params.qiime_ref_tax_custom) { if ("${params.qiime_ref_tax_custom}".contains(",")) { ch_qiime_ref_taxonomy - .branch { - gzip: it.isFile() && ( it.getName().endsWith(".gz") ) - decompressed: it.isFile() && ( it.getName().endsWith(".fna") || it.getName().endsWith (".tax") ) - failed: true - }.set { ch_qiime_ref_tax_branched } - ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a compressed or decompressed sequence or taxonomy file. Please review input." } - - GZIP_DECOMPRESS(ch_qiime_ref_tax_branched.gzip) - ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(GZIP_DECOMPRESS.out.versions) + .map { filepath -> + candidate = file(filepath, checkIfExists: true) + if (filepath.endsWith(".gz")) { + GZIP_DECOMPRESS(ch_qiime_ref_tax_branched.gzip) + ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(GZIP_DECOMPRESS.out.versions) - ch_qiime_db_files = GZIP_DECOMPRESS.out.ungzip - ch_qiime_db_files = ch_qiime_db_files.mix(ch_qiime_ref_tax_branched.decompressed) + return GZIP_DECOMPRESS.out.ungzip + } else if (filepath.endsWith(".fna") || filepath.endsWith(".tax")) { + return candidate + } else { + error "$filepath is neither a compressed or decompressed sequence or taxonomy file. Please review input." + } + }.set { ch_qiime_db_files } ch_ref_database = ch_qiime_db_files.collate(2) } else { From 0890a0e64beb641422698f61c2adfee3f0db46a7 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Wed, 29 Nov 2023 15:17:56 -0500 Subject: [PATCH 19/48] Can't call processes from inside maps. --- subworkflows/local/qiime2_preptax.nf | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 96a0db961..78a4ab27f 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -20,24 +20,23 @@ workflow QIIME2_PREPTAX { if (params.qiime_ref_tax_custom) { if ("${params.qiime_ref_tax_custom}".contains(",")) { - ch_qiime_ref_taxonomy - .map { filepath -> - candidate = file(filepath, checkIfExists: true) - if (filepath.endsWith(".gz")) { - GZIP_DECOMPRESS(ch_qiime_ref_tax_branched.gzip) - ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(GZIP_DECOMPRESS.out.versions) + ch_qiime_ref_taxonomy.flatten() + .branch { + compressed: it.isFile() && it.getName().endsWith(".gz") + decompressed: it.isFile() && ( it.getName().endsWith(".fna") || it.getName().endsWith(".tax") ) + failed: true + }.set { ch_qiime_ref_tax_branched } + ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a compressed or decompressed sequence or taxonomy file. Please review input." } + + GZIP_DECOMPRESS(ch_qiime_ref_tax_branched.compressed) + ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(GZIP_DECOMPRESS.out.versions) - return GZIP_DECOMPRESS.out.ungzip - } else if (filepath.endsWith(".fna") || filepath.endsWith(".tax")) { - return candidate - } else { - error "$filepath is neither a compressed or decompressed sequence or taxonomy file. Please review input." - } - }.set { ch_qiime_db_files } + ch_qiime_db_files = GZIP_DECOMPRESS.out.ungzip + ch_qiime_db_files = ch_qiime_db_files.mix(ch_qiime_ref_tax_branched.decompressed) ch_ref_database = ch_qiime_db_files.collate(2) } else { - ch_qiime_ref_taxonomy + ch_qiime_ref_taxonomy.flatten() .branch { tar: it.isFile() && ( it.getName().endsWith(".tar.gz") || it.getName().endsWith (".tgz") ) dir: it.isDirectory() From 7276a8d1e36eb176fa4171c46d34aed24e4c8fad Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Wed, 29 Nov 2023 15:20:25 -0500 Subject: [PATCH 20/48] Fix outfile definition in GZIP_DECOMPRESS. --- modules/local/gzip_decompress.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/gzip_decompress.nf b/modules/local/gzip_decompress.nf index fa8fa82cf..effd256c9 100644 --- a/modules/local/gzip_decompress.nf +++ b/modules/local/gzip_decompress.nf @@ -19,7 +19,7 @@ process GZIP_DECOMPRESS { script: def args = task.ext.args ?: '' - outfile = task.ext.outfile ?: ( meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.gz$/, "")) + outfile = task.ext.outfile ?: archive.baseName.toString().replaceFirst(/\.gz$/, "") """ gzip $args -c -d $file > $outfile From f0a8715a9cc33c520e2047f0f45c35093c4a28c5 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Wed, 29 Nov 2023 15:21:45 -0500 Subject: [PATCH 21/48] Fix outfile definition in GZIP_DECOMPRESS. --- modules/local/gzip_decompress.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/gzip_decompress.nf b/modules/local/gzip_decompress.nf index effd256c9..c6ea37a5a 100644 --- a/modules/local/gzip_decompress.nf +++ b/modules/local/gzip_decompress.nf @@ -19,7 +19,7 @@ process GZIP_DECOMPRESS { script: def args = task.ext.args ?: '' - outfile = task.ext.outfile ?: archive.baseName.toString().replaceFirst(/\.gz$/, "") + outfile = task.ext.outfile ?: file.baseName.toString().replaceFirst(/\.gz$/, "") """ gzip $args -c -d $file > $outfile From 56f241b05ecbbf1f068c3ba7ee9c11b749c92e16 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Wed, 29 Nov 2023 15:48:34 -0500 Subject: [PATCH 22/48] Add some comments. --- subworkflows/local/qiime2_preptax.nf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 78a4ab27f..561a75dc8 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -19,6 +19,7 @@ workflow QIIME2_PREPTAX { ch_qiime2_preptax_versions = Channel.empty() if (params.qiime_ref_tax_custom) { + // Handle case where we have been provided a pair of filepaths. if ("${params.qiime_ref_tax_custom}".contains(",")) { ch_qiime_ref_taxonomy.flatten() .branch { @@ -35,6 +36,7 @@ workflow QIIME2_PREPTAX { ch_qiime_db_files = ch_qiime_db_files.mix(ch_qiime_ref_tax_branched.decompressed) ch_ref_database = ch_qiime_db_files.collate(2) + // Handle case we have been provided a single filepath (tarball or directory). } else { ch_qiime_ref_taxonomy.flatten() .branch { From 7907df5bfb4843a80352e8de18265c7331c4ba8a Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Wed, 29 Nov 2023 15:49:02 -0500 Subject: [PATCH 23/48] Add an early check that two paths are provided when providing a comma-separated list. --- workflows/ampliseq.nf | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 3a3323269..a99e71f9e 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -61,7 +61,12 @@ if (params.dada_ref_tax_custom) { if (params.qiime_ref_tax_custom) { if ("${params.qiime_ref_tax_custom}".contains(",")) { - ch_qiime_ref_taxonomy = Channel.fromPath(Arrays.asList("${params.qiime_ref_tax_custom}".split(",")), checkIfExists: true) + qiime_ref_paths = "${params.qiime_ref_tax_custom}".split(",") + if (qiime_ref_paths.length != 2) { + error "--qiime_ref_tax_custom accepts a single filepath to a directory or tarball, or two files paths separated by a comma. Please review input." + } + + ch_qiime_ref_taxonomy = Channel.fromPath(Arrays.asList(qiime_ref_paths), checkIfExists: true) } else { ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}", checkIfExists: true) } From 1d6ce32e4911f86ebaf02af447db6fdcb3cadb47 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Wed, 29 Nov 2023 15:53:08 -0500 Subject: [PATCH 24/48] Make sure downstream is aware of new means of pointing to a qiime ref db. --- workflows/ampliseq.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index a99e71f9e..9195038dd 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -624,7 +624,7 @@ workflow AMPLISEQ { log.info "Use Kraken2 taxonomy classification" val_used_taxonomy = "Kraken2" ch_tax = QIIME2_INTAX ( ch_kraken2_tax, "" ).qza - } else if ( params.qiime_ref_taxonomy || params.classifier ) { + } else if ( params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.classifier ) { log.info "Use QIIME2 taxonomy classification" val_used_taxonomy = "QIIME2" ch_tax = QIIME2_TAXONOMY.out.qza @@ -724,7 +724,7 @@ workflow AMPLISEQ { // MODULE: Predict functional potential of a bacterial community from marker genes with Picrust2 // if ( params.picrust ) { - if ( run_qiime2 && !params.skip_abundance_tables && ( params.dada_ref_taxonomy || params.qiime_ref_taxonomy || params.classifier || params.sintax_ref_taxonomy || params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom ) && !params.skip_taxonomy ) { + if ( run_qiime2 && !params.skip_abundance_tables && ( params.dada_ref_taxonomy || params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.classifier || params.sintax_ref_taxonomy || params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom ) && !params.skip_taxonomy ) { PICRUST ( QIIME2_EXPORT.out.abs_fasta, QIIME2_EXPORT.out.abs_tsv, "QIIME2", "This Picrust2 analysis is based on filtered reads from QIIME2" ) } else { PICRUST ( ch_fasta, ch_dada2_asv, "DADA2", "This Picrust2 analysis is based on unfiltered reads from DADA2" ) @@ -856,7 +856,7 @@ workflow AMPLISEQ { !params.skip_taxonomy && ( params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom ) ? KRAKEN2_TAXONOMY_WF.out.tax_tsv.ifEmpty( [] ) : [], !params.skip_taxonomy && params.pplace_tree ? ch_pplace_tax.ifEmpty( [] ) : [], !params.skip_taxonomy && params.pplace_tree ? FASTA_NEWICK_EPANG_GAPPA.out.heattree.ifEmpty( [[],[]] ) : [[],[]], - !params.skip_taxonomy && ( params.qiime_ref_taxonomy || params.classifier ) && run_qiime2 ? QIIME2_TAXONOMY.out.tsv.ifEmpty( [] ) : [], + !params.skip_taxonomy && ( params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.classifier ) && run_qiime2 ? QIIME2_TAXONOMY.out.tsv.ifEmpty( [] ) : [], run_qiime2, run_qiime2 ? val_used_taxonomy : "", run_qiime2 && ( params.exclude_taxa != "none" || params.min_frequency != 1 || params.min_samples != 1 ) ? ch_dada2_asv.countLines()+","+QIIME2_FILTERTAXA.out.tsv.countLines() : "", From 913d284b4c6dd54a20c9dbe273656c30df2888f3 Mon Sep 17 00:00:00 2001 From: Matthew Date: Fri, 1 Dec 2023 14:13:39 -0500 Subject: [PATCH 25/48] Improve error message clarity for ill-formed file. Co-authored-by: Daniel Straub <42973691+d4straub@users.noreply.github.com> --- subworkflows/local/qiime2_preptax.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 561a75dc8..7a5960afe 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -27,7 +27,7 @@ workflow QIIME2_PREPTAX { decompressed: it.isFile() && ( it.getName().endsWith(".fna") || it.getName().endsWith(".tax") ) failed: true }.set { ch_qiime_ref_tax_branched } - ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a compressed or decompressed sequence or taxonomy file. Please review input." } + ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a compressed (ends with `.gz`) or decompressed sequence (ends with `.fna`) or taxonomy file (ends with `.tax`). Please review input." } GZIP_DECOMPRESS(ch_qiime_ref_tax_branched.compressed) ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(GZIP_DECOMPRESS.out.versions) From 330bf43e19cf42f87f9a9213928724fe42cbc98c Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Fri, 1 Dec 2023 14:34:05 -0500 Subject: [PATCH 26/48] Fix typo in error on --qiime_ref_paths form. --- workflows/ampliseq.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 9195038dd..faeffec62 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -63,7 +63,7 @@ if (params.qiime_ref_tax_custom) { if ("${params.qiime_ref_tax_custom}".contains(",")) { qiime_ref_paths = "${params.qiime_ref_tax_custom}".split(",") if (qiime_ref_paths.length != 2) { - error "--qiime_ref_tax_custom accepts a single filepath to a directory or tarball, or two files paths separated by a comma. Please review input." + error "--qiime_ref_tax_custom accepts a single filepath to a directory or tarball, or two filepaths separated by a comma. Please review input." } ch_qiime_ref_taxonomy = Channel.fromPath(Arrays.asList(qiime_ref_paths), checkIfExists: true) From b8c595cc7a647593656945cc34c55861986c2038 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Fri, 1 Dec 2023 14:41:10 -0500 Subject: [PATCH 27/48] Remove unneeded collect from FORMAT_TAXONOMY_QIIME invocation. --- subworkflows/local/qiime2_preptax.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 561a75dc8..d1ed888a6 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -72,7 +72,7 @@ workflow QIIME2_PREPTAX { ch_ref_database = ch_ref_database_fna.combine(ch_ref_database_tax) } } else { - FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy.collect() ) + FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy ) ch_ref_database = FORMAT_TAXONOMY_QIIME.out.fasta.combine(FORMAT_TAXONOMY_QIIME.out.tax) } From 3e05fe507674b72cc1757063fb4621f47defd4d9 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Fri, 1 Dec 2023 14:47:32 -0500 Subject: [PATCH 28/48] Improve version tracking in PREPTAX. --- subworkflows/local/qiime2_preptax.nf | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index d1ed888a6..050ad78c7 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -53,6 +53,8 @@ workflow QIIME2_PREPTAX { def meta = [:] meta.id = val_qiime_ref_taxonomy [ meta, db ] } ) + ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(UNTAR.out.versions) + ch_qiime_db_dir = UNTAR.out.untar.map{ it[1] } ch_qiime_db_dir = ch_qiime_db_dir.mix(ch_qiime_ref_tax_branched.dir) @@ -73,6 +75,7 @@ workflow QIIME2_PREPTAX { } } else { FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy ) + ch_qiime2_preptax_versions(FORMAT_TAXONOMY_QIIME.out.versions) ch_ref_database = FORMAT_TAXONOMY_QIIME.out.fasta.combine(FORMAT_TAXONOMY_QIIME.out.tax) } @@ -85,10 +88,14 @@ workflow QIIME2_PREPTAX { meta.RV_primer = RV_primer [ meta, db ] } .set { ch_ref_database } + QIIME2_EXTRACT ( ch_ref_database ) + ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(QIIME2_EXTRACT.out.versions) + QIIME2_TRAIN ( QIIME2_EXTRACT.out.qza ) + ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(QIIME2_TRAIN.out.versions) emit: - classifier = QIIME2_TRAIN.out.qza - versions = QIIME2_TRAIN.out.versions + classifier = QIIME2_TRAIN.out.qza + versions = ch_qiime2_preptax_versions } From b012aeb0792d86f5edcd1dc81b34049fc7b7b16d Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Fri, 1 Dec 2023 15:11:18 -0500 Subject: [PATCH 29/48] Add qiime_ref_tax_custom to testing in reftaxcustom nf-test. --- conf/test_reftaxcustom.config | 1 + 1 file changed, 1 insertion(+) diff --git a/conf/test_reftaxcustom.config b/conf/test_reftaxcustom.config index 4233d1ea0..c2d7c4eed 100644 --- a/conf/test_reftaxcustom.config +++ b/conf/test_reftaxcustom.config @@ -30,6 +30,7 @@ params { dada_assign_taxlevels = "Kingdom,Phylum,Class,Order,Family,Genus" kraken2_ref_tax_custom = "https://genome-idx.s3.amazonaws.com/kraken/16S_Greengenes13.5_20200326.tgz" kraken2_assign_taxlevels = "D,P,C,O" + qiime_ref_tax_custom = "https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.tax.gz" // Skip downstream analysis with QIIME2 skip_qiime = true From ba71667c714ff22258bc7de7f0126dda8e8bcfe0 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 15:41:55 +0000 Subject: [PATCH 30/48] Don't skip qiime while testing. --- conf/test_reftaxcustom.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/test_reftaxcustom.config b/conf/test_reftaxcustom.config index c2d7c4eed..870a59e27 100644 --- a/conf/test_reftaxcustom.config +++ b/conf/test_reftaxcustom.config @@ -33,5 +33,5 @@ params { qiime_ref_tax_custom = "https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.tax.gz" // Skip downstream analysis with QIIME2 - skip_qiime = true + skip_qiime = false } From 1360415a39ae3aaf659a8201ef1958a1fb53414e Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 21:00:27 +0000 Subject: [PATCH 31/48] Add a skip option for just downstream qiime analysis, but still perform qiime taxonomic classification. --- nextflow.config | 1 + nextflow_schema.json | 4 ++++ workflows/ampliseq.nf | 13 ++++++++++--- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/nextflow.config b/nextflow.config index c4e94bfb1..c76b40582 100644 --- a/nextflow.config +++ b/nextflow.config @@ -87,6 +87,7 @@ params { skip_dada_quality = false skip_barrnap = false skip_qiime = false + skip_qiime_downstream = false skip_fastqc = false skip_alpha_rarefaction = false skip_abundance_tables = false diff --git a/nextflow_schema.json b/nextflow_schema.json index c6c6f8b09..2b4a8dca6 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -656,6 +656,10 @@ "type": "boolean", "description": "Skip all steps that are executed by QIIME2, including QIIME2 software download, taxonomy assignment by QIIME2, barplots, relative abundance tables, diversity analysis, differential abundance testing." }, + "skip_qiime_downstream": { + "type": "boolean", + "description": "Skip steps that are executed by QIIME2 except for taxonomic classification, including barplots, relative abundance tables, diversity analysis, differential abundance testing." + }, "skip_taxonomy": { "type": "boolean", "description": "Skip taxonomic classification. Incompatible with `--sbdiexport`" diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index faeffec62..9e85bf6af 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -146,8 +146,15 @@ if ( params.dada_ref_taxonomy && !params.skip_dada_addspecies && !params.skip_da } } -//only run QIIME2 when taxonomy is actually calculated and all required data is available -if ( !(workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) && !params.skip_taxonomy && !params.skip_qiime && (!params.skip_dada_taxonomy || params.sintax_ref_taxonomy || params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom) ) { +// Only run QIIME2 taxonomy classification if needed parameters are passed and we are not skipping taxonomy or qiime steps. +if ( !(workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) && !params.skip_taxonomy && !params.skip_qiime && (params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.classifier) ) { + run_qiime2_taxonomy = true +} else { + run_qiime2_taxonomy = false +} + +//only run QIIME2 downstream analysis when taxonomy is actually calculated and all required data is available +if ( !(workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) && !params.skip_taxonomy && !params.skip_qiime && !params.skip_qiime_downstream && (!params.skip_dada_taxonomy || params.sintax_ref_taxonomy || params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom) ) { run_qiime2 = true } else { run_qiime2 = false @@ -567,7 +574,7 @@ workflow AMPLISEQ { } //QIIME2 - if ( run_qiime2 ) { + if ( run_qiime2_taxonomy ) { if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.classifier) { QIIME2_PREPTAX ( ch_qiime_ref_taxonomy.collect(), From f4f5cda41b32a83c133b6678f8c8f4537d9b65ed Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 21:02:06 +0000 Subject: [PATCH 32/48] Skip qiime downstream in reftaxcustom. --- conf/test_reftaxcustom.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/test_reftaxcustom.config b/conf/test_reftaxcustom.config index 870a59e27..ea8a7c6d0 100644 --- a/conf/test_reftaxcustom.config +++ b/conf/test_reftaxcustom.config @@ -33,5 +33,5 @@ params { qiime_ref_tax_custom = "https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.tax.gz" // Skip downstream analysis with QIIME2 - skip_qiime = false + skip_qiime_downstream = true } From 549c166365bf68f4edecff6121597cbcb01c8b99 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 21:21:06 +0000 Subject: [PATCH 33/48] Fix path for testing tarball passed to --qiime_ref_tax_custom. --- conf/test_reftaxcustom.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/test_reftaxcustom.config b/conf/test_reftaxcustom.config index ea8a7c6d0..1afe1c2df 100644 --- a/conf/test_reftaxcustom.config +++ b/conf/test_reftaxcustom.config @@ -30,7 +30,7 @@ params { dada_assign_taxlevels = "Kingdom,Phylum,Class,Order,Family,Genus" kraken2_ref_tax_custom = "https://genome-idx.s3.amazonaws.com/kraken/16S_Greengenes13.5_20200326.tgz" kraken2_assign_taxlevels = "D,P,C,O" - qiime_ref_tax_custom = "https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.tax.gz" + qiime_ref_tax_custom = "https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.tar.gz" // Skip downstream analysis with QIIME2 skip_qiime_downstream = true From 851653404641128a88899ef1e3ceb88e79c945a9 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 21:38:04 +0000 Subject: [PATCH 34/48] Add snapshot of files coming from qiime2 taxonomy. --- tests/pipeline/reftaxcustom.nf.test.snap | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/pipeline/reftaxcustom.nf.test.snap b/tests/pipeline/reftaxcustom.nf.test.snap index 7dca4e3e9..842b18de0 100644 --- a/tests/pipeline/reftaxcustom.nf.test.snap +++ b/tests/pipeline/reftaxcustom.nf.test.snap @@ -43,12 +43,19 @@ "timestamp": "2023-05-28T21:18:54+0000" }, "kraken2": { + "content": [ + "taxonomy/GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza:md5,e3d3758fb6fb6b1d8e43dd1be578979b", + "taxonomy/taxonomy.tsv:md5,ad6793c7689b0a5e40169d2799564645" + ], + "timestamp": "2023-09-15T21:16:26+0000" + }, + "qiime2": { "content": [ "ASV_tax.user.kraken2.classifiedreads.txt:md5,8a4693c37d5c24b342ef161b92567764", "ASV_tax.user.kraken2.complete.tsv:md5,3613dac9ce1bf03f87b57d1523e705f1", "ASV_tax.user.kraken2.tsv:md5,95c3f9daa5da8fe00159fb07d394c3ce" ], - "timestamp": "2023-09-15T21:16:26+0000" + "timestamp": "2023-12-07T21:28:32+0000" }, "multiqc": { "content": [ From 745cab7de07628c06f5356dcdb9f8e64321bd074 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 21:39:18 +0000 Subject: [PATCH 35/48] Work towards a qiime_ref_tax_custom specific test. --- .github/workflows/ci.yml | 1 + conf/test_qiimecustom.config | 32 ++++++++++++++ nextflow.config | 1 + tests/pipeline/qiimecustom.nf.test | 55 +++++++++++++++++++++++++ tests/pipeline/qiimecustom.nf.test.snap | 43 +++++++++++++++++++ 5 files changed, 132 insertions(+) create mode 100644 conf/test_qiimecustom.config create mode 100644 tests/pipeline/qiimecustom.nf.test create mode 100644 tests/pipeline/qiimecustom.nf.test.snap diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 788582d92..e4b532bef 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -50,6 +50,7 @@ jobs: - "test_failed" - "test_multi" - "test_reftaxcustom" + - "test_qiimecustom" - "test_doubleprimers" - "test_iontorrent" - "test_novaseq" diff --git a/conf/test_qiimecustom.config b/conf/test_qiimecustom.config new file mode 100644 index 000000000..ea6b97d81 --- /dev/null +++ b/conf/test_qiimecustom.config @@ -0,0 +1,32 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/ampliseq -profile test_qiimecustom, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test custom QIIME2 reference taxonomy database profile' + config_profile_description = 'Minimal test dataset to check --qiime_ref_tax_custom' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + FW_primer = "GTGYCAGCMGCCGCGGTAA" + RV_primer = "GGACTACNVGGGTWTCTAAT" + input = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Samplesheet.tsv" + + // Custom reference taxonomy + qiime_ref_tax_custom = "https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.tax.gz,https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.fna.gz" + + // Skip downstream analysis with QIIME2 + skip_qiime_downstream = true +} diff --git a/nextflow.config b/nextflow.config index c76b40582..831a43a22 100644 --- a/nextflow.config +++ b/nextflow.config @@ -274,6 +274,7 @@ profiles { test_failed { includeConfig 'conf/test_failed.config' } test_full { includeConfig 'conf/test_full.config' } test_reftaxcustom { includeConfig 'conf/test_reftaxcustom.config' } + test_qiimecustom { includeConfig 'conf/test_qiimecustom.config' } test_novaseq { includeConfig 'conf/test_novaseq.config' } test_pplace { includeConfig 'conf/test_pplace.config' } test_sintax { includeConfig 'conf/test_sintax.config' } diff --git a/tests/pipeline/qiimecustom.nf.test b/tests/pipeline/qiimecustom.nf.test new file mode 100644 index 000000000..abd2a38a2 --- /dev/null +++ b/tests/pipeline/qiimecustom.nf.test @@ -0,0 +1,55 @@ +nextflow_pipeline { + + name "Test Workflow main.nf" + script "main.nf" + tag "test_reftaxcustom" + tag "dada2" + tag "pipeline" + + test("Custom DADA2 Reference Taxonomy Database") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(UTILS.removeNextflowVersion("$outputDir")).match("software_versions") }, + { assert snapshot(path("$outputDir/overall_summary.tsv")).match("overall_summary_tsv") }, + { assert snapshot(path("$outputDir/barrnap/rrna.arc.gff"), + path("$outputDir/barrnap/rrna.bac.gff"), + path("$outputDir/barrnap/rrna.euk.gff"), + path("$outputDir/barrnap/rrna.mito.gff")).match("barrnap") }, + { assert new File("$outputDir/barrnap/summary.tsv").exists() }, + { assert snapshot(path("$outputDir/cutadapt/cutadapt_summary.tsv")).match("cutadapt") }, + { assert snapshot(path("$outputDir/dada2/ASV_seqs.fasta"), + path("$outputDir/dada2/ASV_table.tsv"), + path("$outputDir/dada2/DADA2_stats.tsv"), + path("$outputDir/dada2/DADA2_table.rds"), + path("$outputDir/dada2/DADA2_table.tsv")).match("dada2") }, + { assert new File("$outputDir/dada2/ASV_tax.user.tsv").exists() }, + { assert new File("$outputDir/dada2/ASV_tax_species.user.tsv").exists() }, + { assert new File("$outputDir/fastqc/sampleID_1_1_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/sampleID_1_2_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/sampleID_1a_1_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/sampleID_1a_2_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/sampleID_2_1_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/sampleID_2_2_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/sampleID_2a_1_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/sampleID_2a_2_fastqc.html").exists() }, + { assert snapshot(path("$outputDir/input/Samplesheet.tsv")).match("input") }, + { assert snapshot(path("$outputDir/kraken2/ASV_tax.user.kraken2.classifiedreads.txt"), + path("$outputDir/kraken2/ASV_tax.user.kraken2.complete.tsv"), + path("$outputDir/kraken2/ASV_tax.user.kraken2.tsv")).match("kraken2") }, + { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, + { assert new File("$outputDir/summary_report/summary_report.html").exists() }, + { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } + ) + } + } +} diff --git a/tests/pipeline/qiimecustom.nf.test.snap b/tests/pipeline/qiimecustom.nf.test.snap new file mode 100644 index 000000000..680ca37ac --- /dev/null +++ b/tests/pipeline/qiimecustom.nf.test.snap @@ -0,0 +1,43 @@ +{ + "input": { + "content": [ + "Samplesheet.tsv:md5,dbf8d1a2b7933dab9e5a139f33c2b1f4" + ], + "timestamp": "2023-05-28T21:18:54+0000" + }, + "cutadapt": { + "content": [ + "cutadapt_summary.tsv:md5,5d02749984a811479e7d534fda75163f" + ], + "timestamp": "2023-05-28T21:18:54+0000" + }, + "software_versions": { + "content": [ + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + ], + "timestamp": "2023-05-28T21:18:54+0000" + }, + "overall_summary_tsv": { + "content": [ + "overall_summary.tsv:md5,3231d6ee72b9a1e7742e5605caaff05a" + ], + "timestamp": "2023-05-28T21:18:54+0000" + }, + "barrnap": { + "content": [ + "rrna.arc.gff:md5,6dae470aace9293d5eb8c318584852dd", + "rrna.bac.gff:md5,439a9084f089120f700f938dfb58fa41", + "rrna.euk.gff:md5,c9bc1d9d8fb77dc19c95dee2d53840eb", + "rrna.mito.gff:md5,df19e1b84ba6f691d20c72b397c88abf" + ], + "timestamp": "2023-05-28T21:18:54+0000" + }, + "multiqc": { + "content": [ + "multiqc_fastqc.txt:md5,147764e40079c3abf97a17cfe2275c52", + "multiqc_general_stats.txt:md5,88c2b9e6d02b83afe4f9551e6c9a91a7", + "multiqc_cutadapt.txt:md5,330a7b72dc671ca99fcb3fb84b6776c1" + ], + "timestamp": "2023-05-28T21:18:54+0000" + } +} From a1dfb5b1b6943fc244a58ea701ba50cd085ff2dc Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 21:45:15 +0000 Subject: [PATCH 36/48] Skip dada tax. --- conf/test_qiimecustom.config | 1 + 1 file changed, 1 insertion(+) diff --git a/conf/test_qiimecustom.config b/conf/test_qiimecustom.config index ea6b97d81..2233070cc 100644 --- a/conf/test_qiimecustom.config +++ b/conf/test_qiimecustom.config @@ -29,4 +29,5 @@ params { // Skip downstream analysis with QIIME2 skip_qiime_downstream = true + skip_dada_taxonomy = true } From 51dc97e82770417a5179abff1f50ae09c00ca71a Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 21:45:54 +0000 Subject: [PATCH 37/48] Sequence then taxonomy file for file pair to --qiime_ref_tax_custom. --- conf/test_qiimecustom.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/test_qiimecustom.config b/conf/test_qiimecustom.config index 2233070cc..2fc9cb736 100644 --- a/conf/test_qiimecustom.config +++ b/conf/test_qiimecustom.config @@ -25,7 +25,7 @@ params { input = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Samplesheet.tsv" // Custom reference taxonomy - qiime_ref_tax_custom = "https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.tax.gz,https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.fna.gz" + qiime_ref_tax_custom = "https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.fna.gz,https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.tax.gz" // Skip downstream analysis with QIIME2 skip_qiime_downstream = true From a33f17f7937769b43e9a3e9fb5c480cc115b67a3 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 21:49:21 +0000 Subject: [PATCH 38/48] Clarify in help text of --qiime_ref_tax_custom the ordering of a file pair. --- nextflow_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 62c54f79f..6ccfc3ad1 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -434,7 +434,7 @@ }, "qiime_ref_tax_custom": { "type": "string", - "help_text": "Is preferred over `--qiime_ref_taxonomy`. A comma separated pair of (possibly gzipped) filepaths.", + "help_text": "Is preferred over `--qiime_ref_taxonomy`. A comma separated pair of (possibly gzipped) filepaths (sequence, taxonomy).", "description": "Path to files of a custom QIIME2 reference taxonomy database (files may be gzipped)" }, "classifier": { From 8f57faec61a65a422c93c4cb6526ff3d6abcb65c Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 22:00:26 +0000 Subject: [PATCH 39/48] Update snapshots to include qiime2 in both correctly and add assertions for qiime2. --- tests/pipeline/qiimecustom.nf.test | 11 +++++------ tests/pipeline/qiimecustom.nf.test.snap | 7 +++++++ tests/pipeline/reftaxcustom.nf.test | 2 ++ tests/pipeline/reftaxcustom.nf.test.snap | 10 +++++----- 4 files changed, 19 insertions(+), 11 deletions(-) diff --git a/tests/pipeline/qiimecustom.nf.test b/tests/pipeline/qiimecustom.nf.test index abd2a38a2..8ec67571b 100644 --- a/tests/pipeline/qiimecustom.nf.test +++ b/tests/pipeline/qiimecustom.nf.test @@ -2,11 +2,11 @@ nextflow_pipeline { name "Test Workflow main.nf" script "main.nf" - tag "test_reftaxcustom" - tag "dada2" + tag "test_qiimecustom" + tag "qiime2" tag "pipeline" - test("Custom DADA2 Reference Taxonomy Database") { + test("Custom QIIME2 Reference Taxonomy Database") { when { params { @@ -41,9 +41,8 @@ nextflow_pipeline { { assert new File("$outputDir/fastqc/sampleID_2a_1_fastqc.html").exists() }, { assert new File("$outputDir/fastqc/sampleID_2a_2_fastqc.html").exists() }, { assert snapshot(path("$outputDir/input/Samplesheet.tsv")).match("input") }, - { assert snapshot(path("$outputDir/kraken2/ASV_tax.user.kraken2.classifiedreads.txt"), - path("$outputDir/kraken2/ASV_tax.user.kraken2.complete.tsv"), - path("$outputDir/kraken2/ASV_tax.user.kraken2.tsv")).match("kraken2") }, + { assert snapshot(path("$outputDir/qiime2/taxonomy/GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza"), + path("$outputDir/qiime2/taxonomy/taxonomy.tsv")).match("qiime2") }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, diff --git a/tests/pipeline/qiimecustom.nf.test.snap b/tests/pipeline/qiimecustom.nf.test.snap index 680ca37ac..616e1de0f 100644 --- a/tests/pipeline/qiimecustom.nf.test.snap +++ b/tests/pipeline/qiimecustom.nf.test.snap @@ -32,6 +32,13 @@ ], "timestamp": "2023-05-28T21:18:54+0000" }, + "qiime2": { + "content": [ + "GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza:md5,b744a656dbd4e710697bf9ee47f26c87", + "taxonomy.tsv:md5,44585412583f0cf5f2b82a1337f16756" + ], + "timestamp": "2023-12-07T21:28:32+0000" + }, "multiqc": { "content": [ "multiqc_fastqc.txt:md5,147764e40079c3abf97a17cfe2275c52", diff --git a/tests/pipeline/reftaxcustom.nf.test b/tests/pipeline/reftaxcustom.nf.test index abd2a38a2..3f72ec5f0 100644 --- a/tests/pipeline/reftaxcustom.nf.test +++ b/tests/pipeline/reftaxcustom.nf.test @@ -44,6 +44,8 @@ nextflow_pipeline { { assert snapshot(path("$outputDir/kraken2/ASV_tax.user.kraken2.classifiedreads.txt"), path("$outputDir/kraken2/ASV_tax.user.kraken2.complete.tsv"), path("$outputDir/kraken2/ASV_tax.user.kraken2.tsv")).match("kraken2") }, + { assert snapshot(path("$outputDir/qiime2/taxonomy/GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza"), + path("$outputDir/qiime2/taxonomy/taxonomy.tsv")).match("qiime2") }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, diff --git a/tests/pipeline/reftaxcustom.nf.test.snap b/tests/pipeline/reftaxcustom.nf.test.snap index 842b18de0..90b157ec2 100644 --- a/tests/pipeline/reftaxcustom.nf.test.snap +++ b/tests/pipeline/reftaxcustom.nf.test.snap @@ -44,16 +44,16 @@ }, "kraken2": { "content": [ - "taxonomy/GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza:md5,e3d3758fb6fb6b1d8e43dd1be578979b", - "taxonomy/taxonomy.tsv:md5,ad6793c7689b0a5e40169d2799564645" + "ASV_tax.user.kraken2.classifiedreads.txt:md5,8a4693c37d5c24b342ef161b92567764", + "ASV_tax.user.kraken2.complete.tsv:md5,3613dac9ce1bf03f87b57d1523e705f1", + "ASV_tax.user.kraken2.tsv:md5,95c3f9daa5da8fe00159fb07d394c3ce" ], "timestamp": "2023-09-15T21:16:26+0000" }, "qiime2": { "content": [ - "ASV_tax.user.kraken2.classifiedreads.txt:md5,8a4693c37d5c24b342ef161b92567764", - "ASV_tax.user.kraken2.complete.tsv:md5,3613dac9ce1bf03f87b57d1523e705f1", - "ASV_tax.user.kraken2.tsv:md5,95c3f9daa5da8fe00159fb07d394c3ce" + "GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza:md5,e3d3758fb6fb6b1d8e43dd1be578979b", + "taxonomy.tsv:md5,ad6793c7689b0a5e40169d2799564645" ], "timestamp": "2023-12-07T21:28:32+0000" }, From 74e05b2a26208befa00fd2bad63cc9fd5f6d97de Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 22:02:21 +0000 Subject: [PATCH 40/48] Make ordering of sequence and taxonomy files deterministic in case of file pair. --- subworkflows/local/qiime2_preptax.nf | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 9d5c68983..97ccba63c 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -35,7 +35,14 @@ workflow QIIME2_PREPTAX { ch_qiime_db_files = GZIP_DECOMPRESS.out.ungzip ch_qiime_db_files = ch_qiime_db_files.mix(ch_qiime_ref_tax_branched.decompressed) - ch_ref_database = ch_qiime_db_files.collate(2) + ch_ref_database_fna = ch_qiime_db_dir.filter { + it.getName().endsWith(".fna") + } + ch_ref_database_tax = ch_qiime_db_dir.filter { + it.getName().endsWith(".tax") + } + + ch_ref_database = ch_ref_database_fna.combine(ch_ref_database_tax) // Handle case we have been provided a single filepath (tarball or directory). } else { ch_qiime_ref_taxonomy.flatten() From b65df44c9ee053896666e9ccbee9bdc7ac2c41f8 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 22:03:51 +0000 Subject: [PATCH 41/48] Fix filtering in file pair case. --- subworkflows/local/qiime2_preptax.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 97ccba63c..7d0be52d7 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -35,10 +35,10 @@ workflow QIIME2_PREPTAX { ch_qiime_db_files = GZIP_DECOMPRESS.out.ungzip ch_qiime_db_files = ch_qiime_db_files.mix(ch_qiime_ref_tax_branched.decompressed) - ch_ref_database_fna = ch_qiime_db_dir.filter { + ch_ref_database_fna = ch_qiime_db_files.filter { it.getName().endsWith(".fna") } - ch_ref_database_tax = ch_qiime_db_dir.filter { + ch_ref_database_tax = ch_qiime_db_files.filter { it.getName().endsWith(".tax") } From 45bee719af1aba754a9bfbea274d5116204b0df7 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 22:31:07 +0000 Subject: [PATCH 42/48] Fix version mixing in --qiime_ref_taxonomy case. --- subworkflows/local/qiime2_preptax.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index d514f0860..dfa287253 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -82,7 +82,7 @@ workflow QIIME2_PREPTAX { } } else { FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy ) - ch_qiime2_preptax_versions(FORMAT_TAXONOMY_QIIME.out.versions) + ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(FORMAT_TAXONOMY_QIIME.out.versions) ch_ref_database = FORMAT_TAXONOMY_QIIME.out.fasta.combine(FORMAT_TAXONOMY_QIIME.out.tax) } From 3c9eaf129c0dd311b4e8bbdbc8e047eb2519cefb Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 22:45:45 +0000 Subject: [PATCH 43/48] Update software version expectations for tests that no longer run QIIME_PREPTAX. --- tests/pipeline/doubleprimers.nf.test.snap | 2 +- tests/pipeline/multi.nf.test.snap | 2 +- tests/pipeline/qiimecustom.nf.test.snap | 2 +- tests/pipeline/reftaxcustom.nf.test.snap | 2 +- tests/pipeline/sintax.nf.test.snap | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/pipeline/doubleprimers.nf.test.snap b/tests/pipeline/doubleprimers.nf.test.snap index d7cc9dce8..b5e9cb2bb 100644 --- a/tests/pipeline/doubleprimers.nf.test.snap +++ b/tests/pipeline/doubleprimers.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-07-27T13:49:03+0000" }, diff --git a/tests/pipeline/multi.nf.test.snap b/tests/pipeline/multi.nf.test.snap index 2c0382f0f..daba2601d 100644 --- a/tests/pipeline/multi.nf.test.snap +++ b/tests/pipeline/multi.nf.test.snap @@ -14,7 +14,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-05-28T21:15:03+0000" }, diff --git a/tests/pipeline/qiimecustom.nf.test.snap b/tests/pipeline/qiimecustom.nf.test.snap index 616e1de0f..6c39df372 100644 --- a/tests/pipeline/qiimecustom.nf.test.snap +++ b/tests/pipeline/qiimecustom.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-05-28T21:18:54+0000" }, diff --git a/tests/pipeline/reftaxcustom.nf.test.snap b/tests/pipeline/reftaxcustom.nf.test.snap index 90b157ec2..8fca1c5b0 100644 --- a/tests/pipeline/reftaxcustom.nf.test.snap +++ b/tests/pipeline/reftaxcustom.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-05-28T21:18:54+0000" }, diff --git a/tests/pipeline/sintax.nf.test.snap b/tests/pipeline/sintax.nf.test.snap index 069c7fa88..b19bf8feb 100644 --- a/tests/pipeline/sintax.nf.test.snap +++ b/tests/pipeline/sintax.nf.test.snap @@ -16,7 +16,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_SINTAX={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_SINTAX={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-06-20T16:40:18+0000" }, From 07f4407a4dcfc62be99f61e8a4ebaf6543caaf47 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 22:47:55 +0000 Subject: [PATCH 44/48] Remove assertions on dada2 tax and phyloseq files existing in test_qiimecustom. --- tests/pipeline/qiimecustom.nf.test | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/pipeline/qiimecustom.nf.test b/tests/pipeline/qiimecustom.nf.test index 8ec67571b..2cdc080a7 100644 --- a/tests/pipeline/qiimecustom.nf.test +++ b/tests/pipeline/qiimecustom.nf.test @@ -30,8 +30,6 @@ nextflow_pipeline { path("$outputDir/dada2/DADA2_stats.tsv"), path("$outputDir/dada2/DADA2_table.rds"), path("$outputDir/dada2/DADA2_table.tsv")).match("dada2") }, - { assert new File("$outputDir/dada2/ASV_tax.user.tsv").exists() }, - { assert new File("$outputDir/dada2/ASV_tax_species.user.tsv").exists() }, { assert new File("$outputDir/fastqc/sampleID_1_1_fastqc.html").exists() }, { assert new File("$outputDir/fastqc/sampleID_1_2_fastqc.html").exists() }, { assert new File("$outputDir/fastqc/sampleID_1a_1_fastqc.html").exists() }, @@ -46,8 +44,7 @@ nextflow_pipeline { { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, - { assert new File("$outputDir/summary_report/summary_report.html").exists() }, - { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } + { assert new File("$outputDir/summary_report/summary_report.html").exists() } ) } } From 1c129e568cb2cbba9c11af01584763e3bca96dfe Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 23:08:40 +0000 Subject: [PATCH 45/48] Looks like qiime2 tax alignment is non-deterministic, just verify the files it emits are emitted. --- tests/pipeline/qiimecustom.nf.test | 4 ++-- tests/pipeline/qiimecustom.nf.test.snap | 7 ------- tests/pipeline/reftaxcustom.nf.test | 4 ++-- tests/pipeline/reftaxcustom.nf.test.snap | 7 ------- 4 files changed, 4 insertions(+), 18 deletions(-) diff --git a/tests/pipeline/qiimecustom.nf.test b/tests/pipeline/qiimecustom.nf.test index 2cdc080a7..493968153 100644 --- a/tests/pipeline/qiimecustom.nf.test +++ b/tests/pipeline/qiimecustom.nf.test @@ -39,8 +39,8 @@ nextflow_pipeline { { assert new File("$outputDir/fastqc/sampleID_2a_1_fastqc.html").exists() }, { assert new File("$outputDir/fastqc/sampleID_2a_2_fastqc.html").exists() }, { assert snapshot(path("$outputDir/input/Samplesheet.tsv")).match("input") }, - { assert snapshot(path("$outputDir/qiime2/taxonomy/GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza"), - path("$outputDir/qiime2/taxonomy/taxonomy.tsv")).match("qiime2") }, + { assert new File("$outputDir/qiime2/taxonomy/taxonomy.tsv").exists() }, + { assert new File("$outputDir/qiime2/taxonomy/GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza").exists() }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, diff --git a/tests/pipeline/qiimecustom.nf.test.snap b/tests/pipeline/qiimecustom.nf.test.snap index 6c39df372..594688a92 100644 --- a/tests/pipeline/qiimecustom.nf.test.snap +++ b/tests/pipeline/qiimecustom.nf.test.snap @@ -32,13 +32,6 @@ ], "timestamp": "2023-05-28T21:18:54+0000" }, - "qiime2": { - "content": [ - "GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza:md5,b744a656dbd4e710697bf9ee47f26c87", - "taxonomy.tsv:md5,44585412583f0cf5f2b82a1337f16756" - ], - "timestamp": "2023-12-07T21:28:32+0000" - }, "multiqc": { "content": [ "multiqc_fastqc.txt:md5,147764e40079c3abf97a17cfe2275c52", diff --git a/tests/pipeline/reftaxcustom.nf.test b/tests/pipeline/reftaxcustom.nf.test index 3f72ec5f0..4e70861b6 100644 --- a/tests/pipeline/reftaxcustom.nf.test +++ b/tests/pipeline/reftaxcustom.nf.test @@ -44,8 +44,8 @@ nextflow_pipeline { { assert snapshot(path("$outputDir/kraken2/ASV_tax.user.kraken2.classifiedreads.txt"), path("$outputDir/kraken2/ASV_tax.user.kraken2.complete.tsv"), path("$outputDir/kraken2/ASV_tax.user.kraken2.tsv")).match("kraken2") }, - { assert snapshot(path("$outputDir/qiime2/taxonomy/GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza"), - path("$outputDir/qiime2/taxonomy/taxonomy.tsv")).match("qiime2") }, + { assert new File("$outputDir/qiime2/taxonomy/taxonomy.tsv").exists() }, + { assert new File("$outputDir/qiime2/taxonomy/GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza").exists() }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, diff --git a/tests/pipeline/reftaxcustom.nf.test.snap b/tests/pipeline/reftaxcustom.nf.test.snap index 8fca1c5b0..b5aa10f14 100644 --- a/tests/pipeline/reftaxcustom.nf.test.snap +++ b/tests/pipeline/reftaxcustom.nf.test.snap @@ -50,13 +50,6 @@ ], "timestamp": "2023-09-15T21:16:26+0000" }, - "qiime2": { - "content": [ - "GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza:md5,e3d3758fb6fb6b1d8e43dd1be578979b", - "taxonomy.tsv:md5,ad6793c7689b0a5e40169d2799564645" - ], - "timestamp": "2023-12-07T21:28:32+0000" - }, "multiqc": { "content": [ "multiqc_fastqc.txt:md5,147764e40079c3abf97a17cfe2275c52", From 2ace59599e1c0c5ed19ffabf03ce2adb5a34c428 Mon Sep 17 00:00:00 2001 From: Matthew Date: Fri, 8 Dec 2023 14:26:03 +0000 Subject: [PATCH 46/48] Make --skip_qiime_downstream help text clearer. Co-authored-by: Daniel Straub <42973691+d4straub@users.noreply.github.com> --- nextflow_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 6ccfc3ad1..14eef4b45 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -658,7 +658,7 @@ }, "skip_qiime_downstream": { "type": "boolean", - "description": "Skip steps that are executed by QIIME2 except for taxonomic classification, including barplots, relative abundance tables, diversity analysis, differential abundance testing." + "description": "Skip steps that are executed by QIIME2 except for taxonomic classification. Skip steps including barplots, relative abundance tables, diversity analysis, differential abundance testing." }, "skip_taxonomy": { "type": "boolean", From 4464c38cef7be3e9309c3d036fda7172aba130a4 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Fri, 8 Dec 2023 14:48:12 +0000 Subject: [PATCH 47/48] Remove assertion on qiime phyloseq file no longer produced. --- tests/pipeline/pplace.nf.test | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/pipeline/pplace.nf.test b/tests/pipeline/pplace.nf.test index 564cf2b9b..b0507df75 100644 --- a/tests/pipeline/pplace.nf.test +++ b/tests/pipeline/pplace.nf.test @@ -56,8 +56,7 @@ nextflow_pipeline { { assert new File("$outputDir/pplace/test_pplace.graft.test_pplace.epa_result.newick").exists() }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, - { assert new File("$outputDir/summary_report/summary_report.html").exists() }, - { assert new File("$outputDir/phyloseq/qiime2_phyloseq.rds").exists() } + { assert new File("$outputDir/summary_report/summary_report.html").exists() } ) } } From 6b71e4d2500e72ac3eda29d80f0654ed7e5fa481 Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 19 Dec 2023 10:19:46 +0100 Subject: [PATCH 48/48] Fix reporting --- assets/report_template.Rmd | 12 +++++++++--- modules/local/summary_report.nf | 3 ++- workflows/ampliseq.nf | 2 +- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 8c8fc21e7..264a71493 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -980,9 +980,15 @@ cat("\n\nDADA2 taxonomy assignments can be found in folder [dada2](../dada2) in # Header cat("## QIIME2\n") -cat("The taxonomic classification was performed by [QIIME2](https://www.nature.com/articles/s41587-019-0209-9) - using the database: `", params$qiime2_ref_tax_title, "`. - More details about the reference taxonomy database can be found in the ['Methods section'](#methods).\n\n", sep = "") +# indicate reference taxonomy +if ( !isFALSE(params$qiime2_ref_tax_title) ) { + cat("The taxonomic classification was performed by [QIIME2](https://www.nature.com/articles/s41587-019-0209-9) + using the database: `", params$qiime2_ref_tax_title, "`. + More details about the reference taxonomy database can be found in the ['Methods section'](#methods).\n\n", sep = "") +} else { + cat("The taxonomic classification was performed by [QIIME2](https://www.nature.com/articles/s41587-019-0209-9) using a custom database ", + "provided by the user.\n\n", sep = "") +} # Read file and prepare table asv_tax <- read.table(params$qiime2_taxonomy, header = TRUE, sep = "\t") diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index a8e082b01..1a288a0fb 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -118,7 +118,8 @@ process SUMMARY_REPORT { kraken2_tax ? "kraken2_taxonomy='$kraken2_tax',kraken2_confidence='$params.kraken2_confidence'" : "", kraken2_tax && !params.kraken2_ref_tax_custom ? "kraken2_ref_tax_title='${params.kraken2_ref_databases[params.kraken2_ref_taxonomy]["title"]}',kraken2_ref_tax_file='${params.kraken2_ref_databases[params.kraken2_ref_taxonomy]["file"]}',kraken2_ref_tax_citation='${params.kraken2_ref_databases[params.kraken2_ref_taxonomy]["citation"]}'" : "", pplace_tax ? "pplace_taxonomy='$pplace_tax',pplace_heattree='$pplace_heattree'" : "", - qiime2_tax && params.qiime_ref_taxonomy ? "qiime2_taxonomy='$qiime2_tax',qiime2_ref_tax_title='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["title"]}',qiime2_ref_tax_file='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]}',qiime2_ref_tax_citation='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["citation"]}'" : qiime2_tax ? "qiime2_taxonomy='$qiime2_tax'" : "", + qiime2_tax ? "qiime2_taxonomy='$qiime2_tax'" : "", + qiime2_tax && params.qiime_ref_taxonomy ? "qiime2_ref_tax_title='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["title"]}',qiime2_ref_tax_file='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]}',qiime2_ref_tax_citation='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["citation"]}'" : "", run_qiime2 ? "val_used_taxonomy='$val_used_taxonomy'" : "", filter_stats_tsv ? "filter_stats_tsv='$filter_stats_tsv',qiime2_filtertaxa='$qiime2_filtertaxa',exclude_taxa='$params.exclude_taxa',min_frequency='$params.min_frequency',min_samples='$params.min_samples'" : "", barplot ? "barplot=TRUE" : "", diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 9e85bf6af..6dcc370f1 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -863,7 +863,7 @@ workflow AMPLISEQ { !params.skip_taxonomy && ( params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom ) ? KRAKEN2_TAXONOMY_WF.out.tax_tsv.ifEmpty( [] ) : [], !params.skip_taxonomy && params.pplace_tree ? ch_pplace_tax.ifEmpty( [] ) : [], !params.skip_taxonomy && params.pplace_tree ? FASTA_NEWICK_EPANG_GAPPA.out.heattree.ifEmpty( [[],[]] ) : [[],[]], - !params.skip_taxonomy && ( params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.classifier ) && run_qiime2 ? QIIME2_TAXONOMY.out.tsv.ifEmpty( [] ) : [], + !params.skip_taxonomy && ( params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.classifier ) && run_qiime2_taxonomy ? QIIME2_TAXONOMY.out.tsv.ifEmpty( [] ) : [], run_qiime2, run_qiime2 ? val_used_taxonomy : "", run_qiime2 && ( params.exclude_taxa != "none" || params.min_frequency != 1 || params.min_samples != 1 ) ? ch_dada2_asv.countLines()+","+QIIME2_FILTERTAXA.out.tsv.countLines() : "",