From d86c5696b08fe2ab0dea8f04219e01c7ada55c55 Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Tue, 28 Nov 2023 12:04:12 -0500
Subject: [PATCH 01/48] Add params.qiime_ref_tax_custom in preparation of
 allowing custom qiime database.

---
 nextflow.config       |  1 +
 nextflow_schema.json  |  5 +++++
 workflows/ampliseq.nf | 15 +++++++++++----
 3 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index 93e19e86a..1e5a567f0 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -108,6 +108,7 @@ params {
     cut_dada_ref_taxonomy    = false
     sintax_ref_taxonomy      = null
     qiime_ref_taxonomy       = null
+    qiime_ref_tax_custom     = null
     kraken2_ref_taxonomy     = null
     kraken2_assign_taxlevels = null
     kraken2_ref_tax_custom   = null
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 1d3098da5..69820e8c6 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -432,6 +432,11 @@
                         "greengenes85"
                     ]
                 },
+                "qiime_ref_tax_custom": {
+                    "type": "string",
+                    "help_text": "Is preferred over `--qiime_ref_taxonomy`. Can be compressed tar archive (.tar.gz|.tgz) or folder containing the database.",
+                    "description": "Path to a custom QIIME2 reference taxonomy database (*.tar.gz|*.tgz archive or folder)"
+                },
                 "classifier": {
                     "type": "string",
                     "description": "Path to QIIME2 trained classifier file (typically *-classifier.qza)",
diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf
index 05ddfee76..eee62b971 100644
--- a/workflows/ampliseq.nf
+++ b/workflows/ampliseq.nf
@@ -59,9 +59,16 @@ if (params.dada_ref_tax_custom) {
     val_dada_ref_taxonomy = "none"
 }
 
-if (params.qiime_ref_taxonomy && !params.skip_taxonomy && !params.classifier) {
+if (params.qiime_ref_tax_custom) {
+    ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}", checkIfExists: true)
+    val_qiime_ref_taxonomy = "user"
+} else if (params.qiime_ref_taxonomy && !params.skip_taxonomy && !params.classifier) {.
     ch_qiime_ref_taxonomy = Channel.fromList(params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]).map { file(it) }
-} else { ch_qiime_ref_taxonomy = Channel.empty() }
+    val_qiime_ref_taxonomy = params.qiime_ref_taxonomy.replace('=','_').replace('.','_')
+} else {
+    ch_qiime_ref_taxonomy = Channel.empty()
+    val_qiime_ref_taxonomy = "none"
+}
 
 if (params.sintax_ref_taxonomy && !params.skip_taxonomy) {
     ch_sintax_ref_taxonomy = Channel.fromList(params.sintax_ref_databases[params.sintax_ref_taxonomy]["file"]).map { file(it) }
@@ -131,7 +138,7 @@ if ( params.dada_ref_taxonomy && !params.skip_dada_addspecies && !params.skip_da
 }
 
 //only run QIIME2 when taxonomy is actually calculated and all required data is available
-if ( !(workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) && !params.skip_taxonomy && !params.skip_qiime && (!params.skip_dada_taxonomy || params.sintax_ref_taxonomy || params.qiime_ref_taxonomy || params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom) ) {
+if ( !(workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) && !params.skip_taxonomy && !params.skip_qiime && (!params.skip_dada_taxonomy || params.sintax_ref_taxonomy || params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom) ) {
     run_qiime2 = true
 } else {
     run_qiime2 = false
@@ -552,7 +559,7 @@ workflow AMPLISEQ {
 
     //QIIME2
     if ( run_qiime2 ) {
-        if (params.qiime_ref_taxonomy && !params.classifier) {
+        if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.classifier) {
             QIIME2_PREPTAX (
                 ch_qiime_ref_taxonomy.collect(),
                 params.FW_primer,

From 439097c93751277109c643359abb11f7ab158a14 Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Tue, 28 Nov 2023 12:05:14 -0500
Subject: [PATCH 02/48] Implementation of logic to handle a custom qiime2
 reference database stored in either a directory or a tarball.

---
 subworkflows/local/qiime2_preptax.nf | 29 ++++++++++++++++++++++++++--
 workflows/ampliseq.nf                |  1 +
 2 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf
index 7f3cb80b2..429aac713 100644
--- a/subworkflows/local/qiime2_preptax.nf
+++ b/subworkflows/local/qiime2_preptax.nf
@@ -2,6 +2,7 @@
  * Training of a classifier with QIIME2
  */
 
+include { UNTAR                 } from '../../modules/nf-core/untar/main'
 include { FORMAT_TAXONOMY_QIIME } from '../../modules/local/format_taxonomy_qiime'
 include { QIIME2_EXTRACT        } from '../../modules/local/qiime2_extract'
 include { QIIME2_TRAIN          } from '../../modules/local/qiime2_train'
@@ -9,13 +10,37 @@ include { QIIME2_TRAIN          } from '../../modules/local/qiime2_train'
 workflow QIIME2_PREPTAX {
     take:
     ch_qiime_ref_taxonomy //channel, list of files
+    val_qiime_ref_taxonomy //val
     FW_primer //val
     RV_primer //val
 
     main:
-    FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy )
+    if (params.qiime_ref_tax_custom) {
+        ch_qiime_ref_taxonomy
+            .branch {
+                tar: it.isFile() && ( it.getName().endsWith(".tar.gz") || it.getName().endsWith (".tgz") )
+                dir: it.isDirectory()
+                failed: true
+            }.set { ch_qiime_ref_taxonomy }
+        ch_qiime_ref_taxonomy.failed.subscribe { error "$it is neither a directory nor a file that ends in '.tar.gz' or '.tgz'. Please review input." }
+
+        UNTAR (
+            ch_qiime_ref_taxonomy.tar
+                .map {
+                    db ->
+                        def meta = [:]
+                        meta.id = val_qiime_ref_taxonomy
+                        [ meta, db ] } )
+        ch_qiime_db_dir = UNTAR.out.untar.map{ it[1] }
+        ch_qiime_db_dir = ch_qiime_db_dir.mix(ch_qiime_ref_taxonomy.dir)
+
+        ch_ref_database = ch_qiime_db_dir.map{ Channel.fromPath(it + "/*.tax").combine(Channel.fromPath(it + "/*.fna")) }
+    } else {
+        FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy )
+
+        ch_ref_database = FORMAT_TAXONOMY_QIIME.out.fasta.combine(FORMAT_TAXONOMY_QIIME.out.tax)
+    }
 
-    ch_ref_database = FORMAT_TAXONOMY_QIIME.out.fasta.combine(FORMAT_TAXONOMY_QIIME.out.tax)
     ch_ref_database
         .map {
             db ->
diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf
index eee62b971..03d1f6c43 100644
--- a/workflows/ampliseq.nf
+++ b/workflows/ampliseq.nf
@@ -562,6 +562,7 @@ workflow AMPLISEQ {
         if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.classifier) {
             QIIME2_PREPTAX (
                 ch_qiime_ref_taxonomy.collect(),
+                val_qiime_ref_taxonomy,
                 params.FW_primer,
                 params.RV_primer
             )

From 14c89b9c22faec6544a1ffa97e28afe989e6336f Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Tue, 28 Nov 2023 14:53:05 -0500
Subject: [PATCH 03/48] Some params checking logic.

---
 lib/WorkflowAmpliseq.groovy | 6 +++---
 lib/WorkflowMain.groovy     | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/WorkflowAmpliseq.groovy b/lib/WorkflowAmpliseq.groovy
index 5e1039115..0868866a7 100755
--- a/lib/WorkflowAmpliseq.groovy
+++ b/lib/WorkflowAmpliseq.groovy
@@ -77,12 +77,12 @@ class WorkflowAmpliseq {
         }
 
         if (params.skip_dada_taxonomy && params.sbdiexport) {
-            if (!params.sintax_ref_taxonomy && (params.skip_qiime || !params.qiime_ref_taxonomy)) {
+            if (!params.sintax_ref_taxonomy && (params.skip_qiime || (!params.qiime_ref_taxonomy && !params.qiime_ref_tax_custom))) {
                 Nextflow.error("Incompatible parameters: `--sbdiexport` expects taxa annotation and therefore annotation with either DADA2, SINTAX, or QIIME2 is needed.")
             }
         }
 
-        if ( (!params.FW_primer || !params.RV_primer) && params.qiime_ref_taxonomy && !params.skip_qiime && !params.skip_taxonomy ) {
+        if ( (!params.FW_primer || !params.RV_primer) && (params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.skip_qiime && !params.skip_taxonomy ) {
             Nextflow.error("Incompatible parameters: `--FW_primer` and `--RV_primer` are required for cutting the QIIME2 reference database to the amplicon sequences. Please specify primers or do not use `--qiime_ref_taxonomy`.")
         }
 
@@ -90,7 +90,7 @@ class WorkflowAmpliseq {
             Nextflow.error("Incompatible parameters: `--FW_primer` and `--RV_primer` are required for cutting the DADA2 reference database to the amplicon sequences. Please specify primers or do not use `--cut_dada_ref_taxonomy`.")
         }
 
-        if (params.qiime_ref_taxonomy && params.classifier) {
+        if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && params.classifier) {
             Nextflow.error("Incompatible parameters: `--qiime_ref_taxonomy` will produce a classifier but `--classifier` points to a precomputed classifier, therefore, only use one of those.")
         }
 
diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy
index 7f49735e4..4b7ec2afc 100755
--- a/lib/WorkflowMain.groovy
+++ b/lib/WorkflowMain.groovy
@@ -34,7 +34,7 @@ class WorkflowMain {
         if (params.sintax_ref_taxonomy && !params.skip_taxonomy) {
             sintaxreftaxonomyExistsError(params, log)
         }
-        if (params.qiime_ref_taxonomy && !params.skip_taxonomy && !params.classifier) {
+        if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.skip_taxonomy && !params.classifier) {
             qiimereftaxonomyExistsError(params, log)
         }
 

From d214ec0252321261d56b315d0b34797432011442 Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Tue, 28 Nov 2023 14:55:09 -0500
Subject: [PATCH 04/48] Loose . lying around.

---
 workflows/ampliseq.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf
index 03d1f6c43..3d4ad07d9 100644
--- a/workflows/ampliseq.nf
+++ b/workflows/ampliseq.nf
@@ -62,7 +62,7 @@ if (params.dada_ref_tax_custom) {
 if (params.qiime_ref_tax_custom) {
     ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}", checkIfExists: true)
     val_qiime_ref_taxonomy = "user"
-} else if (params.qiime_ref_taxonomy && !params.skip_taxonomy && !params.classifier) {.
+} else if (params.qiime_ref_taxonomy && !params.skip_taxonomy && !params.classifier) {
     ch_qiime_ref_taxonomy = Channel.fromList(params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]).map { file(it) }
     val_qiime_ref_taxonomy = params.qiime_ref_taxonomy.replace('=','_').replace('.','_')
 } else {

From 9346d7aea37c365e31ad37ae8ffa08a51a2ab8a4 Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Tue, 28 Nov 2023 14:57:54 -0500
Subject: [PATCH 05/48] Only perform collect if going to FORMAT_TAXONOMY_QIIME.

---
 subworkflows/local/qiime2_preptax.nf | 2 +-
 workflows/ampliseq.nf                | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf
index 429aac713..0d66308a1 100644
--- a/subworkflows/local/qiime2_preptax.nf
+++ b/subworkflows/local/qiime2_preptax.nf
@@ -36,7 +36,7 @@ workflow QIIME2_PREPTAX {
 
         ch_ref_database = ch_qiime_db_dir.map{ Channel.fromPath(it + "/*.tax").combine(Channel.fromPath(it + "/*.fna")) }
     } else {
-        FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy )
+        FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy.collect() )
 
         ch_ref_database = FORMAT_TAXONOMY_QIIME.out.fasta.combine(FORMAT_TAXONOMY_QIIME.out.tax)
     }
diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf
index 3d4ad07d9..b45fd9a57 100644
--- a/workflows/ampliseq.nf
+++ b/workflows/ampliseq.nf
@@ -561,7 +561,7 @@ workflow AMPLISEQ {
     if ( run_qiime2 ) {
         if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.classifier) {
             QIIME2_PREPTAX (
-                ch_qiime_ref_taxonomy.collect(),
+                ch_qiime_ref_taxonomy,
                 val_qiime_ref_taxonomy,
                 params.FW_primer,
                 params.RV_primer

From ef053b1369e5f75aad794772e387257b06e99d35 Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Tue, 28 Nov 2023 15:13:23 -0500
Subject: [PATCH 06/48] =?UTF-8?q?Set=20into=20new=20channel=20when=20branc?=
 =?UTF-8?q?hing=20on=20ch=5Fqiime=5Fref=5Ftaxonomy.=C2=A3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 subworkflows/local/qiime2_preptax.nf | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf
index 0d66308a1..38a9faf89 100644
--- a/subworkflows/local/qiime2_preptax.nf
+++ b/subworkflows/local/qiime2_preptax.nf
@@ -21,18 +21,18 @@ workflow QIIME2_PREPTAX {
                 tar: it.isFile() && ( it.getName().endsWith(".tar.gz") || it.getName().endsWith (".tgz") )
                 dir: it.isDirectory()
                 failed: true
-            }.set { ch_qiime_ref_taxonomy }
-        ch_qiime_ref_taxonomy.failed.subscribe { error "$it is neither a directory nor a file that ends in '.tar.gz' or '.tgz'. Please review input." }
+            }.set { ch_qiime_ref_tax_branched }
+        ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a directory nor a file that ends in '.tar.gz' or '.tgz'. Please review input." }
 
         UNTAR (
-            ch_qiime_ref_taxonomy.tar
+            ch_qiime_ref_tax_branched.tar
                 .map {
                     db ->
                         def meta = [:]
                         meta.id = val_qiime_ref_taxonomy
                         [ meta, db ] } )
         ch_qiime_db_dir = UNTAR.out.untar.map{ it[1] }
-        ch_qiime_db_dir = ch_qiime_db_dir.mix(ch_qiime_ref_taxonomy.dir)
+        ch_qiime_db_dir = ch_qiime_db_dir.mix(ch_qiime_ref_tax_branched.dir)
 
         ch_ref_database = ch_qiime_db_dir.map{ Channel.fromPath(it + "/*.tax").combine(Channel.fromPath(it + "/*.fna")) }
     } else {

From a48a09fe3f7398ce15fc8b0b1bc449b92c880ae8 Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Tue, 28 Nov 2023 15:48:14 -0500
Subject: [PATCH 07/48] Try to unpack the database dir into component files
 using a module.

---
 modules/local/qiime2_unpack.nf       | 31 ++++++++++++++++++++++++++++
 subworkflows/local/qiime2_preptax.nf |  5 ++++-
 2 files changed, 35 insertions(+), 1 deletion(-)
 create mode 100644 modules/local/qiime2_unpack.nf

diff --git a/modules/local/qiime2_unpack.nf b/modules/local/qiime2_unpack.nf
new file mode 100644
index 000000000..e77286da7
--- /dev/null
+++ b/modules/local/qiime2_unpack.nf
@@ -0,0 +1,31 @@
+process QIIME2_UNPACK {
+    label 'process_low'
+
+    conda "conda-forge::sed=4.7"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' :
+        'docker.io/biocontainers/biocontainers:v1.2.0_cv1' }"
+
+    input:
+    path(database)
+
+    output:
+    path("*.fna"), emit: fasta
+    path("*.tax"), emit: tax
+    path "versions.yml" , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    // TODO: need to not have this be a copy.
+    script:
+    """
+    cp $database/*.fna .
+    cp $database/*.tax .
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        bash: \$(bash --version | sed -n 1p | sed 's/GNU bash, version //g')
+    END_VERSIONS
+    """
+}
diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf
index 38a9faf89..3ad8365f2 100644
--- a/subworkflows/local/qiime2_preptax.nf
+++ b/subworkflows/local/qiime2_preptax.nf
@@ -6,6 +6,7 @@ include { UNTAR                 } from '../../modules/nf-core/untar/main'
 include { FORMAT_TAXONOMY_QIIME } from '../../modules/local/format_taxonomy_qiime'
 include { QIIME2_EXTRACT        } from '../../modules/local/qiime2_extract'
 include { QIIME2_TRAIN          } from '../../modules/local/qiime2_train'
+include { QIIME2_UNPACK         } from '../../modules/local/qiime2_unpack'
 
 workflow QIIME2_PREPTAX {
     take:
@@ -34,7 +35,9 @@ workflow QIIME2_PREPTAX {
         ch_qiime_db_dir = UNTAR.out.untar.map{ it[1] }
         ch_qiime_db_dir = ch_qiime_db_dir.mix(ch_qiime_ref_tax_branched.dir)
 
-        ch_ref_database = ch_qiime_db_dir.map{ Channel.fromPath(it + "/*.tax").combine(Channel.fromPath(it + "/*.fna")) }
+        QIIME2_UNPACK(ch_qiime_db_dir)
+
+        ch_ref_database = ch_qiime_db_dir.map{ QIIME2_UNPACK.out.fasta.combine(QIIME2_UNPACK.out.tax) }
     } else {
         FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy.collect() )
 

From a9971b68ede12d0431d248362d1c279b7ac0cc07 Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Tue, 28 Nov 2023 15:59:03 -0500
Subject: [PATCH 08/48] Remove map wrapping the combine.

---
 subworkflows/local/qiime2_preptax.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf
index 3ad8365f2..8229190ac 100644
--- a/subworkflows/local/qiime2_preptax.nf
+++ b/subworkflows/local/qiime2_preptax.nf
@@ -37,7 +37,7 @@ workflow QIIME2_PREPTAX {
 
         QIIME2_UNPACK(ch_qiime_db_dir)
 
-        ch_ref_database = ch_qiime_db_dir.map{ QIIME2_UNPACK.out.fasta.combine(QIIME2_UNPACK.out.tax) }
+        ch_ref_database = QIIME2_UNPACK.out.fasta.combine(QIIME2_UNPACK.out.tax)
     } else {
         FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy.collect() )
 

From aac51bd4b162e17ed0f9ca5864527651abf4e23d Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Tue, 28 Nov 2023 16:14:05 -0500
Subject: [PATCH 09/48] Remove unpack in favour of map and filter.

---
 modules/local/qiime2_unpack.nf       | 31 ----------------------------
 subworkflows/local/qiime2_preptax.nf | 16 +++++++++++---
 2 files changed, 13 insertions(+), 34 deletions(-)
 delete mode 100644 modules/local/qiime2_unpack.nf

diff --git a/modules/local/qiime2_unpack.nf b/modules/local/qiime2_unpack.nf
deleted file mode 100644
index e77286da7..000000000
--- a/modules/local/qiime2_unpack.nf
+++ /dev/null
@@ -1,31 +0,0 @@
-process QIIME2_UNPACK {
-    label 'process_low'
-
-    conda "conda-forge::sed=4.7"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' :
-        'docker.io/biocontainers/biocontainers:v1.2.0_cv1' }"
-
-    input:
-    path(database)
-
-    output:
-    path("*.fna"), emit: fasta
-    path("*.tax"), emit: tax
-    path "versions.yml" , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    // TODO: need to not have this be a copy.
-    script:
-    """
-    cp $database/*.fna .
-    cp $database/*.tax .
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        bash: \$(bash --version | sed -n 1p | sed 's/GNU bash, version //g')
-    END_VERSIONS
-    """
-}
diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf
index 8229190ac..2969d3709 100644
--- a/subworkflows/local/qiime2_preptax.nf
+++ b/subworkflows/local/qiime2_preptax.nf
@@ -6,7 +6,6 @@ include { UNTAR                 } from '../../modules/nf-core/untar/main'
 include { FORMAT_TAXONOMY_QIIME } from '../../modules/local/format_taxonomy_qiime'
 include { QIIME2_EXTRACT        } from '../../modules/local/qiime2_extract'
 include { QIIME2_TRAIN          } from '../../modules/local/qiime2_train'
-include { QIIME2_UNPACK         } from '../../modules/local/qiime2_unpack'
 
 workflow QIIME2_PREPTAX {
     take:
@@ -35,9 +34,20 @@ workflow QIIME2_PREPTAX {
         ch_qiime_db_dir = UNTAR.out.untar.map{ it[1] }
         ch_qiime_db_dir = ch_qiime_db_dir.mix(ch_qiime_ref_tax_branched.dir)
 
-        QIIME2_UNPACK(ch_qiime_db_dir)
+        ch_ref_database_fna = ch_qiime_db_dir.map{ dir ->
+            files = file(dir.resolve("*.fna"), checkIfExists: true)
+        } | filter {
+            if (it instanceof List) log.warn "Found multiple fasta files for QIIME2 reference database."
+            ! it instanceof List
+        }
+        ch_ref_database_tax = ch_qiime_db_dir.map{ dir ->
+            files = file(dir.resolve("*.tax"), checkIfExists: true)
+        } | filter {
+            if (it instanceof List) log.warn "Found multiple fasta files for QIIME2 reference database."
+            ! it instanceof List
+        }
 
-        ch_ref_database = QIIME2_UNPACK.out.fasta.combine(QIIME2_UNPACK.out.tax)
+        ch_ref_database = ch_ref_database_fna.combine(ch_ref_database_tax)
     } else {
         FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy.collect() )
 

From 1b2825ec952b9d2d4d8edb8cbd348fd843d42223 Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Tue, 28 Nov 2023 16:17:38 -0500
Subject: [PATCH 10/48] Glob results in list in all circumstances, check length
 instead.

---
 subworkflows/local/qiime2_preptax.nf | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf
index 2969d3709..f040f2077 100644
--- a/subworkflows/local/qiime2_preptax.nf
+++ b/subworkflows/local/qiime2_preptax.nf
@@ -37,14 +37,14 @@ workflow QIIME2_PREPTAX {
         ch_ref_database_fna = ch_qiime_db_dir.map{ dir ->
             files = file(dir.resolve("*.fna"), checkIfExists: true)
         } | filter {
-            if (it instanceof List) log.warn "Found multiple fasta files for QIIME2 reference database."
-            ! it instanceof List
+            if (it.size() > 1) log.warn "Found multiple fasta files for QIIME2 reference database."
+            it.size() == 1
         }
         ch_ref_database_tax = ch_qiime_db_dir.map{ dir ->
             files = file(dir.resolve("*.tax"), checkIfExists: true)
         } | filter {
-            if (it instanceof List) log.warn "Found multiple fasta files for QIIME2 reference database."
-            ! it instanceof List
+            if (it.size() > 1) log.warn "Found multiple tax files for QIIME2 reference database."
+            it.size() == 1
         }
 
         ch_ref_database = ch_ref_database_fna.combine(ch_ref_database_tax)

From a4219a0baa0e4b256ee97c5c1a54a566fb801f07 Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Tue, 28 Nov 2023 17:11:59 -0500
Subject: [PATCH 11/48] Update CHANGELOG.md.

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 620c97165..b884a4087 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### `Added`
 
+[]() - Added `--qiime_ref_tax_custom` to permit custom reference database for QIIME2 taxonomic classification.
+
 ### `Changed`
 
 ### `Fixed`

From 0ccf6e6daf7b46be84a81fd9498d2e55fc5db795 Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Wed, 29 Nov 2023 10:12:47 -0500
Subject: [PATCH 12/48] Update error message when passing both one of
 --qiime_ref_taxonomy or --qiime_ref_tax_custom and --classifier.

---
 lib/WorkflowAmpliseq.groovy | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/WorkflowAmpliseq.groovy b/lib/WorkflowAmpliseq.groovy
index 0868866a7..25db3ed68 100755
--- a/lib/WorkflowAmpliseq.groovy
+++ b/lib/WorkflowAmpliseq.groovy
@@ -91,7 +91,7 @@ class WorkflowAmpliseq {
         }
 
         if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && params.classifier) {
-            Nextflow.error("Incompatible parameters: `--qiime_ref_taxonomy` will produce a classifier but `--classifier` points to a precomputed classifier, therefore, only use one of those.")
+            Nextflow.error("Incompatible parameters: `--qiime_ref_taxonomy` and `--qiime_ref_tax_custom` will produce a classifier but `--classifier` points to a precomputed classifier, therefore, only use one of those.")
         }
 
         if (params.kraken2_ref_tax_custom && !params.kraken2_assign_taxlevels ) {

From 590f415952b249d83ae5fcd7d128b109310d0983 Mon Sep 17 00:00:00 2001
From: Matthew <matthew.marshall96@yahoo.co.uk>
Date: Wed, 29 Nov 2023 11:13:35 -0500
Subject: [PATCH 13/48] Update CHANGELOG.md with pull request number.

Co-authored-by: Daniel Straub <42973691+d4straub@users.noreply.github.com>
---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b884a4087..6c2030cc4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### `Added`
 
-[]() - Added `--qiime_ref_tax_custom` to permit custom reference database for QIIME2 taxonomic classification.
+[#667](https://github.com/nf-core/ampliseq/pull/667) - Added `--qiime_ref_tax_custom` to permit custom reference database for QIIME2 taxonomic classification.
 
 ### `Changed`
 

From f5d80f572e008693d3f83dd5a8f169784da546c7 Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Wed, 29 Nov 2023 11:58:33 -0500
Subject: [PATCH 14/48] Add support for specifying two (possibly gzipped) files
 as --qiime_ref_tax_custom.

---
 modules/local/gzip_decompress.nf     | 32 ++++++++++++
 nextflow_schema.json                 |  2 +-
 subworkflows/local/qiime2_preptax.nf | 75 ++++++++++++++++++----------
 workflows/ampliseq.nf                |  6 ++-
 4 files changed, 86 insertions(+), 29 deletions(-)
 create mode 100644 modules/local/gzip_decompress.nf

diff --git a/modules/local/gzip_decompress.nf b/modules/local/gzip_decompress.nf
new file mode 100644
index 000000000..fa8fa82cf
--- /dev/null
+++ b/modules/local/gzip_decompress.nf
@@ -0,0 +1,32 @@
+process GZIP_DECOMPRESS {
+    tag "$file"
+    label 'process_single'
+
+    conda "conda-forge::sed=4.7 conda-forge::gzip=1.13"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
+        'nf-core/ubuntu:20.04' }"
+
+    input:
+    path(file)
+
+    output:
+    path("$outfile"), emit: ungzip
+    path "versions.yml", emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args  = task.ext.args ?: ''
+    outfile = task.ext.outfile ?: ( meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.gz$/, ""))
+
+    """
+    gzip $args -c -d $file > $outfile
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        gzip: \$(echo \$(gzip --version 2>&1) | sed 's/gzip //; s/ Copyright.*\$//')
+    END_VERSIONS
+    """
+}
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 69820e8c6..c6c6f8b09 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -434,7 +434,7 @@
                 },
                 "qiime_ref_tax_custom": {
                     "type": "string",
-                    "help_text": "Is preferred over `--qiime_ref_taxonomy`. Can be compressed tar archive (.tar.gz|.tgz) or folder containing the database.",
+                    "help_text": "Is preferred over `--qiime_ref_taxonomy`. Can be compressed tar archive (.tar.gz|.tgz), a pair of (possibly gzipped) filepaths, or folder containing the database.",
                     "description": "Path to a custom QIIME2 reference taxonomy database (*.tar.gz|*.tgz archive or folder)"
                 },
                 "classifier": {
diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf
index f040f2077..7cc4817d0 100644
--- a/subworkflows/local/qiime2_preptax.nf
+++ b/subworkflows/local/qiime2_preptax.nf
@@ -3,6 +3,7 @@
  */
 
 include { UNTAR                 } from '../../modules/nf-core/untar/main'
+include { GZIP_DECOMPRESS       } from '../../modules/local/gzip_decompress.nf'
 include { FORMAT_TAXONOMY_QIIME } from '../../modules/local/format_taxonomy_qiime'
 include { QIIME2_EXTRACT        } from '../../modules/local/qiime2_extract'
 include { QIIME2_TRAIN          } from '../../modules/local/qiime2_train'
@@ -15,36 +16,56 @@ workflow QIIME2_PREPTAX {
     RV_primer //val
 
     main:
+    ch_qiime2_preptax_versions = Channel.empty()
+
     if (params.qiime_ref_tax_custom) {
-        ch_qiime_ref_taxonomy
-            .branch {
-                tar: it.isFile() && ( it.getName().endsWith(".tar.gz") || it.getName().endsWith (".tgz") )
-                dir: it.isDirectory()
-                failed: true
-            }.set { ch_qiime_ref_tax_branched }
-        ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a directory nor a file that ends in '.tar.gz' or '.tgz'. Please review input." }
+        if (ch_qiime_ref_taxonomy.size() == 2) {
+            ch_qiime_ref_taxonomy
+                .branch {
+                    gzip: it.isFile() && ( it.getName().endsWith(".gz") )
+                    decompressed: it.isFile() && ( it.getName().endsWith(".fna") || it.getName().endsWith (".tax") )
+                    failed: true
+                }.set { ch_qiime_ref_tax_branched }
+            ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a compressed or decompressed sequence or taxonomy file. Please review input." }
 
-        UNTAR (
-            ch_qiime_ref_tax_branched.tar
-                .map {
-                    db ->
-                        def meta = [:]
-                        meta.id = val_qiime_ref_taxonomy
-                        [ meta, db ] } )
-        ch_qiime_db_dir = UNTAR.out.untar.map{ it[1] }
-        ch_qiime_db_dir = ch_qiime_db_dir.mix(ch_qiime_ref_tax_branched.dir)
+            GZIP_DECOMPRESS(ch_qiime_ref_tax_branched.gzip)
+            ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(GZIP_DECOMPRESS.out.versions)
 
-        ch_ref_database_fna = ch_qiime_db_dir.map{ dir ->
-            files = file(dir.resolve("*.fna"), checkIfExists: true)
-        } | filter {
-            if (it.size() > 1) log.warn "Found multiple fasta files for QIIME2 reference database."
-            it.size() == 1
-        }
-        ch_ref_database_tax = ch_qiime_db_dir.map{ dir ->
-            files = file(dir.resolve("*.tax"), checkIfExists: true)
-        } | filter {
-            if (it.size() > 1) log.warn "Found multiple tax files for QIIME2 reference database."
-            it.size() == 1
+            ch_qiime_db_files = GZIP_DECOMPRESS.out.ungzip
+            ch_qiime_db_files = ch_qiime_db_files.mix(ch_qiime_ref_tax_branched.decompressed)
+
+            ch_ref_database = ch_qiime_db_files.collate(2)
+        } else {
+            ch_qiime_ref_taxonomy
+                .branch {
+                    tar: it.isFile() && ( it.getName().endsWith(".tar.gz") || it.getName().endsWith (".tgz") )
+                    dir: it.isDirectory()
+                    failed: true
+                }.set { ch_qiime_ref_tax_branched }
+            ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a directory nor a file that ends in '.tar.gz' or '.tgz'. Please review input." }
+
+            UNTAR (
+                ch_qiime_ref_tax_branched.tar
+                    .map {
+                        db ->
+                            def meta = [:]
+                            meta.id = val_qiime_ref_taxonomy
+                            [ meta, db ] } )
+            ch_qiime_db_dir = UNTAR.out.untar.map{ it[1] }
+            ch_qiime_db_dir = ch_qiime_db_dir.mix(ch_qiime_ref_tax_branched.dir)
+
+            ch_ref_database_fna = ch_qiime_db_dir.map{ dir ->
+                files = file(dir.resolve("*.fna"), checkIfExists: true)
+            } | filter {
+                if (it.size() > 1) log.warn "Found multiple fasta files for QIIME2 reference database."
+                it.size() == 1
+            }
+            ch_ref_database_tax = ch_qiime_db_dir.map{ dir ->
+                files = file(dir.resolve("*.tax"), checkIfExists: true)
+            } | filter {
+                if (it.size() > 1) log.warn "Found multiple tax files for QIIME2 reference database."
+                it.size() == 1
+            }
         }
 
         ch_ref_database = ch_ref_database_fna.combine(ch_ref_database_tax)
diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf
index b45fd9a57..9bd1cf5c4 100644
--- a/workflows/ampliseq.nf
+++ b/workflows/ampliseq.nf
@@ -60,7 +60,11 @@ if (params.dada_ref_tax_custom) {
 }
 
 if (params.qiime_ref_tax_custom) {
-    ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}", checkIfExists: true)
+    if ("${params.qiime_ref_tax_custom}".contains(",")) {
+        ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}".split(","), checkIfExists: true)
+    } else {
+        ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}", checkIfExists: true)
+    }
     val_qiime_ref_taxonomy = "user"
 } else if (params.qiime_ref_taxonomy && !params.skip_taxonomy && !params.classifier) {
     ch_qiime_ref_taxonomy = Channel.fromList(params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]).map { file(it) }

From 7016682fd8375525b2e23e3fdebaa01cbdd8f082 Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Wed, 29 Nov 2023 14:07:43 -0500
Subject: [PATCH 15/48] Only support providing two files separated by a comma.

---
 nextflow_schema.json                 |  4 +-
 subworkflows/local/qiime2_preptax.nf | 59 ++++++----------------------
 workflows/ampliseq.nf                | 10 ++---
 3 files changed, 20 insertions(+), 53 deletions(-)

diff --git a/nextflow_schema.json b/nextflow_schema.json
index c6c6f8b09..79a4cebb0 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -434,8 +434,8 @@
                 },
                 "qiime_ref_tax_custom": {
                     "type": "string",
-                    "help_text": "Is preferred over `--qiime_ref_taxonomy`. Can be compressed tar archive (.tar.gz|.tgz), a pair of (possibly gzipped) filepaths, or folder containing the database.",
-                    "description": "Path to a custom QIIME2 reference taxonomy database (*.tar.gz|*.tgz archive or folder)"
+                    "help_text": "Is preferred over `--qiime_ref_taxonomy`. A comma separated pair of (possibly gzipped) filepaths.",
+                    "description": "Path to files of a custom QIIME2 reference taxonomy database (files may be gzipped)"
                 },
                 "classifier": {
                     "type": "string",
diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf
index 7cc4817d0..a4e1d7768 100644
--- a/subworkflows/local/qiime2_preptax.nf
+++ b/subworkflows/local/qiime2_preptax.nf
@@ -19,56 +19,23 @@ workflow QIIME2_PREPTAX {
     ch_qiime2_preptax_versions = Channel.empty()
 
     if (params.qiime_ref_tax_custom) {
-        if (ch_qiime_ref_taxonomy.size() == 2) {
-            ch_qiime_ref_taxonomy
-                .branch {
-                    gzip: it.isFile() && ( it.getName().endsWith(".gz") )
-                    decompressed: it.isFile() && ( it.getName().endsWith(".fna") || it.getName().endsWith (".tax") )
-                    failed: true
-                }.set { ch_qiime_ref_tax_branched }
-            ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a compressed or decompressed sequence or taxonomy file. Please review input." }
+        ch_qiime_ref_taxonomy.view()
 
-            GZIP_DECOMPRESS(ch_qiime_ref_tax_branched.gzip)
-            ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(GZIP_DECOMPRESS.out.versions)
+        // ch_qiime_ref_taxonomy
+        //     .branch {
+        //         gzip: it.isFile() && ( it.getName().endsWith(".gz") )
+        //         decompressed: it.isFile() && ( it.getName().endsWith(".fna") || it.getName().endsWith (".tax") )
+        //         failed: true
+        //     }.set { ch_qiime_ref_tax_branched }
+        // ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a compressed or decompressed sequence or taxonomy file. Please review input." }
 
-            ch_qiime_db_files = GZIP_DECOMPRESS.out.ungzip
-            ch_qiime_db_files = ch_qiime_db_files.mix(ch_qiime_ref_tax_branched.decompressed)
+        // GZIP_DECOMPRESS(ch_qiime_ref_tax_branched.gzip)
+        // ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(GZIP_DECOMPRESS.out.versions)
 
-            ch_ref_database = ch_qiime_db_files.collate(2)
-        } else {
-            ch_qiime_ref_taxonomy
-                .branch {
-                    tar: it.isFile() && ( it.getName().endsWith(".tar.gz") || it.getName().endsWith (".tgz") )
-                    dir: it.isDirectory()
-                    failed: true
-                }.set { ch_qiime_ref_tax_branched }
-            ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a directory nor a file that ends in '.tar.gz' or '.tgz'. Please review input." }
+        // ch_qiime_db_files = GZIP_DECOMPRESS.out.ungzip
+        // ch_qiime_db_files = ch_qiime_db_files.mix(ch_qiime_ref_tax_branched.decompressed)
 
-            UNTAR (
-                ch_qiime_ref_tax_branched.tar
-                    .map {
-                        db ->
-                            def meta = [:]
-                            meta.id = val_qiime_ref_taxonomy
-                            [ meta, db ] } )
-            ch_qiime_db_dir = UNTAR.out.untar.map{ it[1] }
-            ch_qiime_db_dir = ch_qiime_db_dir.mix(ch_qiime_ref_tax_branched.dir)
-
-            ch_ref_database_fna = ch_qiime_db_dir.map{ dir ->
-                files = file(dir.resolve("*.fna"), checkIfExists: true)
-            } | filter {
-                if (it.size() > 1) log.warn "Found multiple fasta files for QIIME2 reference database."
-                it.size() == 1
-            }
-            ch_ref_database_tax = ch_qiime_db_dir.map{ dir ->
-                files = file(dir.resolve("*.tax"), checkIfExists: true)
-            } | filter {
-                if (it.size() > 1) log.warn "Found multiple tax files for QIIME2 reference database."
-                it.size() == 1
-            }
-        }
-
-        ch_ref_database = ch_ref_database_fna.combine(ch_ref_database_tax)
+        // ch_ref_database = ch_qiime_db_files.collate(2)
     } else {
         FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy.collect() )
 
diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf
index 9bd1cf5c4..07df8960a 100644
--- a/workflows/ampliseq.nf
+++ b/workflows/ampliseq.nf
@@ -60,11 +60,11 @@ if (params.dada_ref_tax_custom) {
 }
 
 if (params.qiime_ref_tax_custom) {
-    if ("${params.qiime_ref_tax_custom}".contains(",")) {
-        ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}".split(","), checkIfExists: true)
-    } else {
-        ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}", checkIfExists: true)
+    if (!"${params.qiime_ref_tax_custom}".contains(",")) {
+        error "--qiime_ref_tax_custom takes two filepaths separated by a comma. Please review input."
     }
+
+    ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}".split(","), checkIfExists: true)
     val_qiime_ref_taxonomy = "user"
 } else if (params.qiime_ref_taxonomy && !params.skip_taxonomy && !params.classifier) {
     ch_qiime_ref_taxonomy = Channel.fromList(params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]).map { file(it) }
@@ -565,7 +565,7 @@ workflow AMPLISEQ {
     if ( run_qiime2 ) {
         if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.classifier) {
             QIIME2_PREPTAX (
-                ch_qiime_ref_taxonomy,
+                ch_qiime_ref_taxonomy.collect(),
                 val_qiime_ref_taxonomy,
                 params.FW_primer,
                 params.RV_primer

From 79cbfe8fb0bf50035529deb5fe24d18693784b75 Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Wed, 29 Nov 2023 14:41:49 -0500
Subject: [PATCH 16/48] Fix split returns a String[] and we actually need an
 ArrayList.

---
 subworkflows/local/qiime2_preptax.nf | 2 +-
 workflows/ampliseq.nf                | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf
index 7cc4817d0..f36dda424 100644
--- a/subworkflows/local/qiime2_preptax.nf
+++ b/subworkflows/local/qiime2_preptax.nf
@@ -19,7 +19,7 @@ workflow QIIME2_PREPTAX {
     ch_qiime2_preptax_versions = Channel.empty()
 
     if (params.qiime_ref_tax_custom) {
-        if (ch_qiime_ref_taxonomy.size() == 2) {
+        if ("${params.qiime_ref_tax_custom}".contains(",")) {
             ch_qiime_ref_taxonomy
                 .branch {
                     gzip: it.isFile() && ( it.getName().endsWith(".gz") )
diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf
index 9bd1cf5c4..3a3323269 100644
--- a/workflows/ampliseq.nf
+++ b/workflows/ampliseq.nf
@@ -61,7 +61,7 @@ if (params.dada_ref_tax_custom) {
 
 if (params.qiime_ref_tax_custom) {
     if ("${params.qiime_ref_tax_custom}".contains(",")) {
-        ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}".split(","), checkIfExists: true)
+        ch_qiime_ref_taxonomy = Channel.fromPath(Arrays.asList("${params.qiime_ref_tax_custom}".split(",")), checkIfExists: true)
     } else {
         ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}", checkIfExists: true)
     }
@@ -565,7 +565,7 @@ workflow AMPLISEQ {
     if ( run_qiime2 ) {
         if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.classifier) {
             QIIME2_PREPTAX (
-                ch_qiime_ref_taxonomy,
+                ch_qiime_ref_taxonomy.collect(),
                 val_qiime_ref_taxonomy,
                 params.FW_primer,
                 params.RV_primer

From 6d767bc1ea80aa464d80b4aae48944759a44f3a6 Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Wed, 29 Nov 2023 14:46:30 -0500
Subject: [PATCH 17/48] Move ch_ref_database set into correct scope.

---
 subworkflows/local/qiime2_preptax.nf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf
index f36dda424..19a9bc4b3 100644
--- a/subworkflows/local/qiime2_preptax.nf
+++ b/subworkflows/local/qiime2_preptax.nf
@@ -66,9 +66,9 @@ workflow QIIME2_PREPTAX {
                 if (it.size() > 1) log.warn "Found multiple tax files for QIIME2 reference database."
                 it.size() == 1
             }
-        }
 
-        ch_ref_database = ch_ref_database_fna.combine(ch_ref_database_tax)
+            ch_ref_database = ch_ref_database_fna.combine(ch_ref_database_tax)
+        }
     } else {
         FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy.collect() )
 

From f76b49bbcd56315189603e0e292dba2e108f68e5 Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Wed, 29 Nov 2023 14:53:35 -0500
Subject: [PATCH 18/48] Try using map to work through list of files.

---
 subworkflows/local/qiime2_preptax.nf | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf
index 19a9bc4b3..96a0db961 100644
--- a/subworkflows/local/qiime2_preptax.nf
+++ b/subworkflows/local/qiime2_preptax.nf
@@ -21,18 +21,19 @@ workflow QIIME2_PREPTAX {
     if (params.qiime_ref_tax_custom) {
         if ("${params.qiime_ref_tax_custom}".contains(",")) {
             ch_qiime_ref_taxonomy
-                .branch {
-                    gzip: it.isFile() && ( it.getName().endsWith(".gz") )
-                    decompressed: it.isFile() && ( it.getName().endsWith(".fna") || it.getName().endsWith (".tax") )
-                    failed: true
-                }.set { ch_qiime_ref_tax_branched }
-            ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a compressed or decompressed sequence or taxonomy file. Please review input." }
-
-            GZIP_DECOMPRESS(ch_qiime_ref_tax_branched.gzip)
-            ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(GZIP_DECOMPRESS.out.versions)
+                .map { filepath ->
+                    candidate = file(filepath, checkIfExists: true)
+                    if (filepath.endsWith(".gz")) {
+                        GZIP_DECOMPRESS(ch_qiime_ref_tax_branched.gzip)
+                        ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(GZIP_DECOMPRESS.out.versions)
 
-            ch_qiime_db_files = GZIP_DECOMPRESS.out.ungzip
-            ch_qiime_db_files = ch_qiime_db_files.mix(ch_qiime_ref_tax_branched.decompressed)
+                        return GZIP_DECOMPRESS.out.ungzip
+                    } else if (filepath.endsWith(".fna") || filepath.endsWith(".tax")) {
+                        return candidate
+                    } else {
+                        error "$filepath is neither a compressed or decompressed sequence or taxonomy file. Please review input."
+                    }
+                }.set { ch_qiime_db_files }
 
             ch_ref_database = ch_qiime_db_files.collate(2)
         } else {

From 0890a0e64beb641422698f61c2adfee3f0db46a7 Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Wed, 29 Nov 2023 15:17:56 -0500
Subject: [PATCH 19/48] Can't call processes from inside maps.

---
 subworkflows/local/qiime2_preptax.nf | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf
index 96a0db961..78a4ab27f 100644
--- a/subworkflows/local/qiime2_preptax.nf
+++ b/subworkflows/local/qiime2_preptax.nf
@@ -20,24 +20,23 @@ workflow QIIME2_PREPTAX {
 
     if (params.qiime_ref_tax_custom) {
         if ("${params.qiime_ref_tax_custom}".contains(",")) {
-            ch_qiime_ref_taxonomy
-                .map { filepath ->
-                    candidate = file(filepath, checkIfExists: true)
-                    if (filepath.endsWith(".gz")) {
-                        GZIP_DECOMPRESS(ch_qiime_ref_tax_branched.gzip)
-                        ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(GZIP_DECOMPRESS.out.versions)
+            ch_qiime_ref_taxonomy.flatten()
+                .branch {
+                    compressed: it.isFile() && it.getName().endsWith(".gz")
+                    decompressed: it.isFile() && ( it.getName().endsWith(".fna") || it.getName().endsWith(".tax") )
+                    failed: true
+                }.set { ch_qiime_ref_tax_branched }
+            ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a compressed or decompressed sequence or taxonomy file. Please review input." }
+
+            GZIP_DECOMPRESS(ch_qiime_ref_tax_branched.compressed)
+            ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(GZIP_DECOMPRESS.out.versions)
 
-                        return GZIP_DECOMPRESS.out.ungzip
-                    } else if (filepath.endsWith(".fna") || filepath.endsWith(".tax")) {
-                        return candidate
-                    } else {
-                        error "$filepath is neither a compressed or decompressed sequence or taxonomy file. Please review input."
-                    }
-                }.set { ch_qiime_db_files }
+            ch_qiime_db_files = GZIP_DECOMPRESS.out.ungzip
+            ch_qiime_db_files = ch_qiime_db_files.mix(ch_qiime_ref_tax_branched.decompressed)
 
             ch_ref_database = ch_qiime_db_files.collate(2)
         } else {
-            ch_qiime_ref_taxonomy
+            ch_qiime_ref_taxonomy.flatten()
                 .branch {
                     tar: it.isFile() && ( it.getName().endsWith(".tar.gz") || it.getName().endsWith (".tgz") )
                     dir: it.isDirectory()

From 7276a8d1e36eb176fa4171c46d34aed24e4c8fad Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Wed, 29 Nov 2023 15:20:25 -0500
Subject: [PATCH 20/48] Fix outfile definition in GZIP_DECOMPRESS.

---
 modules/local/gzip_decompress.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/gzip_decompress.nf b/modules/local/gzip_decompress.nf
index fa8fa82cf..effd256c9 100644
--- a/modules/local/gzip_decompress.nf
+++ b/modules/local/gzip_decompress.nf
@@ -19,7 +19,7 @@ process GZIP_DECOMPRESS {
 
     script:
     def args  = task.ext.args ?: ''
-    outfile = task.ext.outfile ?: ( meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.gz$/, ""))
+    outfile = task.ext.outfile ?: archive.baseName.toString().replaceFirst(/\.gz$/, "")
 
     """
     gzip $args -c -d $file > $outfile

From f0a8715a9cc33c520e2047f0f45c35093c4a28c5 Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Wed, 29 Nov 2023 15:21:45 -0500
Subject: [PATCH 21/48] Fix outfile definition in GZIP_DECOMPRESS.

---
 modules/local/gzip_decompress.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/gzip_decompress.nf b/modules/local/gzip_decompress.nf
index effd256c9..c6ea37a5a 100644
--- a/modules/local/gzip_decompress.nf
+++ b/modules/local/gzip_decompress.nf
@@ -19,7 +19,7 @@ process GZIP_DECOMPRESS {
 
     script:
     def args  = task.ext.args ?: ''
-    outfile = task.ext.outfile ?: archive.baseName.toString().replaceFirst(/\.gz$/, "")
+    outfile = task.ext.outfile ?: file.baseName.toString().replaceFirst(/\.gz$/, "")
 
     """
     gzip $args -c -d $file > $outfile

From 56f241b05ecbbf1f068c3ba7ee9c11b749c92e16 Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Wed, 29 Nov 2023 15:48:34 -0500
Subject: [PATCH 22/48] Add some comments.

---
 subworkflows/local/qiime2_preptax.nf | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf
index 78a4ab27f..561a75dc8 100644
--- a/subworkflows/local/qiime2_preptax.nf
+++ b/subworkflows/local/qiime2_preptax.nf
@@ -19,6 +19,7 @@ workflow QIIME2_PREPTAX {
     ch_qiime2_preptax_versions = Channel.empty()
 
     if (params.qiime_ref_tax_custom) {
+        // Handle case where we have been provided a pair of filepaths.
         if ("${params.qiime_ref_tax_custom}".contains(",")) {
             ch_qiime_ref_taxonomy.flatten()
                 .branch {
@@ -35,6 +36,7 @@ workflow QIIME2_PREPTAX {
             ch_qiime_db_files = ch_qiime_db_files.mix(ch_qiime_ref_tax_branched.decompressed)
 
             ch_ref_database = ch_qiime_db_files.collate(2)
+        // Handle case we have been provided a single filepath (tarball or directory).
         } else {
             ch_qiime_ref_taxonomy.flatten()
                 .branch {

From 7907df5bfb4843a80352e8de18265c7331c4ba8a Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Wed, 29 Nov 2023 15:49:02 -0500
Subject: [PATCH 23/48] Add an early check that two paths are provided when
 providing a comma-separated list.

---
 workflows/ampliseq.nf | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf
index 3a3323269..a99e71f9e 100644
--- a/workflows/ampliseq.nf
+++ b/workflows/ampliseq.nf
@@ -61,7 +61,12 @@ if (params.dada_ref_tax_custom) {
 
 if (params.qiime_ref_tax_custom) {
     if ("${params.qiime_ref_tax_custom}".contains(",")) {
-        ch_qiime_ref_taxonomy = Channel.fromPath(Arrays.asList("${params.qiime_ref_tax_custom}".split(",")), checkIfExists: true)
+        qiime_ref_paths = "${params.qiime_ref_tax_custom}".split(",")
+        if (qiime_ref_paths.length != 2) {
+            error "--qiime_ref_tax_custom accepts a single filepath to a directory or tarball, or two files paths separated by a comma. Please review input."
+        }
+
+        ch_qiime_ref_taxonomy = Channel.fromPath(Arrays.asList(qiime_ref_paths), checkIfExists: true)
     } else {
         ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}", checkIfExists: true)
     }

From 1d6ce32e4911f86ebaf02af447db6fdcb3cadb47 Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Wed, 29 Nov 2023 15:53:08 -0500
Subject: [PATCH 24/48] Make sure downstream is aware of new means of pointing
 to a qiime ref db.

---
 workflows/ampliseq.nf | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf
index a99e71f9e..9195038dd 100644
--- a/workflows/ampliseq.nf
+++ b/workflows/ampliseq.nf
@@ -624,7 +624,7 @@ workflow AMPLISEQ {
             log.info "Use Kraken2 taxonomy classification"
             val_used_taxonomy = "Kraken2"
             ch_tax = QIIME2_INTAX ( ch_kraken2_tax, "" ).qza
-        } else if ( params.qiime_ref_taxonomy || params.classifier ) {
+        } else if ( params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.classifier ) {
             log.info "Use QIIME2 taxonomy classification"
             val_used_taxonomy = "QIIME2"
             ch_tax = QIIME2_TAXONOMY.out.qza
@@ -724,7 +724,7 @@ workflow AMPLISEQ {
     // MODULE: Predict functional potential of a bacterial community from marker genes with Picrust2
     //
     if ( params.picrust ) {
-        if ( run_qiime2 && !params.skip_abundance_tables && ( params.dada_ref_taxonomy || params.qiime_ref_taxonomy || params.classifier || params.sintax_ref_taxonomy || params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom ) && !params.skip_taxonomy ) {
+        if ( run_qiime2 && !params.skip_abundance_tables && ( params.dada_ref_taxonomy || params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.classifier || params.sintax_ref_taxonomy || params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom ) && !params.skip_taxonomy ) {
             PICRUST ( QIIME2_EXPORT.out.abs_fasta, QIIME2_EXPORT.out.abs_tsv, "QIIME2", "This Picrust2 analysis is based on filtered reads from QIIME2" )
         } else {
             PICRUST ( ch_fasta, ch_dada2_asv, "DADA2", "This Picrust2 analysis is based on unfiltered reads from DADA2" )
@@ -856,7 +856,7 @@ workflow AMPLISEQ {
             !params.skip_taxonomy && ( params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom ) ? KRAKEN2_TAXONOMY_WF.out.tax_tsv.ifEmpty( [] ) : [],
             !params.skip_taxonomy && params.pplace_tree ? ch_pplace_tax.ifEmpty( [] ) : [],
             !params.skip_taxonomy && params.pplace_tree ? FASTA_NEWICK_EPANG_GAPPA.out.heattree.ifEmpty( [[],[]] ) : [[],[]],
-            !params.skip_taxonomy && ( params.qiime_ref_taxonomy || params.classifier ) && run_qiime2 ? QIIME2_TAXONOMY.out.tsv.ifEmpty( [] ) : [],
+            !params.skip_taxonomy && ( params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.classifier ) && run_qiime2 ? QIIME2_TAXONOMY.out.tsv.ifEmpty( [] ) : [],
             run_qiime2,
             run_qiime2 ? val_used_taxonomy : "",
             run_qiime2 && ( params.exclude_taxa != "none" || params.min_frequency != 1 || params.min_samples != 1 ) ? ch_dada2_asv.countLines()+","+QIIME2_FILTERTAXA.out.tsv.countLines() : "",

From 913d284b4c6dd54a20c9dbe273656c30df2888f3 Mon Sep 17 00:00:00 2001
From: Matthew <matthew.marshall96@yahoo.co.uk>
Date: Fri, 1 Dec 2023 14:13:39 -0500
Subject: [PATCH 25/48] Improve error message clarity for ill-formed file.

Co-authored-by: Daniel Straub <42973691+d4straub@users.noreply.github.com>
---
 subworkflows/local/qiime2_preptax.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf
index 561a75dc8..7a5960afe 100644
--- a/subworkflows/local/qiime2_preptax.nf
+++ b/subworkflows/local/qiime2_preptax.nf
@@ -27,7 +27,7 @@ workflow QIIME2_PREPTAX {
                     decompressed: it.isFile() && ( it.getName().endsWith(".fna") || it.getName().endsWith(".tax") )
                     failed: true
                 }.set { ch_qiime_ref_tax_branched }
-            ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a compressed or decompressed sequence or taxonomy file. Please review input." }
+            ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a compressed (ends with `.gz`) or decompressed sequence (ends with `.fna`) or taxonomy file (ends with `.tax`). Please review input." }
 
             GZIP_DECOMPRESS(ch_qiime_ref_tax_branched.compressed)
             ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(GZIP_DECOMPRESS.out.versions)

From 330bf43e19cf42f87f9a9213928724fe42cbc98c Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Fri, 1 Dec 2023 14:34:05 -0500
Subject: [PATCH 26/48] Fix typo in error on --qiime_ref_paths form.

---
 workflows/ampliseq.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf
index 9195038dd..faeffec62 100644
--- a/workflows/ampliseq.nf
+++ b/workflows/ampliseq.nf
@@ -63,7 +63,7 @@ if (params.qiime_ref_tax_custom) {
     if ("${params.qiime_ref_tax_custom}".contains(",")) {
         qiime_ref_paths = "${params.qiime_ref_tax_custom}".split(",")
         if (qiime_ref_paths.length != 2) {
-            error "--qiime_ref_tax_custom accepts a single filepath to a directory or tarball, or two files paths separated by a comma. Please review input."
+            error "--qiime_ref_tax_custom accepts a single filepath to a directory or tarball, or two filepaths separated by a comma. Please review input."
         }
 
         ch_qiime_ref_taxonomy = Channel.fromPath(Arrays.asList(qiime_ref_paths), checkIfExists: true)

From b8c595cc7a647593656945cc34c55861986c2038 Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Fri, 1 Dec 2023 14:41:10 -0500
Subject: [PATCH 27/48] Remove unneeded collect from FORMAT_TAXONOMY_QIIME
 invocation.

---
 subworkflows/local/qiime2_preptax.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf
index 561a75dc8..d1ed888a6 100644
--- a/subworkflows/local/qiime2_preptax.nf
+++ b/subworkflows/local/qiime2_preptax.nf
@@ -72,7 +72,7 @@ workflow QIIME2_PREPTAX {
             ch_ref_database = ch_ref_database_fna.combine(ch_ref_database_tax)
         }
     } else {
-        FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy.collect() )
+        FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy )
 
         ch_ref_database = FORMAT_TAXONOMY_QIIME.out.fasta.combine(FORMAT_TAXONOMY_QIIME.out.tax)
     }

From 3e05fe507674b72cc1757063fb4621f47defd4d9 Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Fri, 1 Dec 2023 14:47:32 -0500
Subject: [PATCH 28/48] Improve version tracking in PREPTAX.

---
 subworkflows/local/qiime2_preptax.nf | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf
index d1ed888a6..050ad78c7 100644
--- a/subworkflows/local/qiime2_preptax.nf
+++ b/subworkflows/local/qiime2_preptax.nf
@@ -53,6 +53,8 @@ workflow QIIME2_PREPTAX {
                             def meta = [:]
                             meta.id = val_qiime_ref_taxonomy
                             [ meta, db ] } )
+            ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(UNTAR.out.versions)
+
             ch_qiime_db_dir = UNTAR.out.untar.map{ it[1] }
             ch_qiime_db_dir = ch_qiime_db_dir.mix(ch_qiime_ref_tax_branched.dir)
 
@@ -73,6 +75,7 @@ workflow QIIME2_PREPTAX {
         }
     } else {
         FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy )
+        ch_qiime2_preptax_versions(FORMAT_TAXONOMY_QIIME.out.versions)
 
         ch_ref_database = FORMAT_TAXONOMY_QIIME.out.fasta.combine(FORMAT_TAXONOMY_QIIME.out.tax)
     }
@@ -85,10 +88,14 @@ workflow QIIME2_PREPTAX {
                 meta.RV_primer = RV_primer
                 [ meta, db ] }
         .set { ch_ref_database }
+
     QIIME2_EXTRACT ( ch_ref_database )
+    ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(QIIME2_EXTRACT.out.versions)
+
     QIIME2_TRAIN ( QIIME2_EXTRACT.out.qza )
+    ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(QIIME2_TRAIN.out.versions)
 
     emit:
-    classifier      = QIIME2_TRAIN.out.qza
-    versions        = QIIME2_TRAIN.out.versions
+    classifier = QIIME2_TRAIN.out.qza
+    versions   = ch_qiime2_preptax_versions
 }

From b012aeb0792d86f5edcd1dc81b34049fc7b7b16d Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Fri, 1 Dec 2023 15:11:18 -0500
Subject: [PATCH 29/48] Add qiime_ref_tax_custom to testing in reftaxcustom
 nf-test.

---
 conf/test_reftaxcustom.config | 1 +
 1 file changed, 1 insertion(+)

diff --git a/conf/test_reftaxcustom.config b/conf/test_reftaxcustom.config
index 4233d1ea0..c2d7c4eed 100644
--- a/conf/test_reftaxcustom.config
+++ b/conf/test_reftaxcustom.config
@@ -30,6 +30,7 @@ params {
     dada_assign_taxlevels = "Kingdom,Phylum,Class,Order,Family,Genus"
     kraken2_ref_tax_custom = "https://genome-idx.s3.amazonaws.com/kraken/16S_Greengenes13.5_20200326.tgz"
     kraken2_assign_taxlevels = "D,P,C,O"
+    qiime_ref_tax_custom = "https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.tax.gz"
 
     // Skip downstream analysis with QIIME2
     skip_qiime = true

From ba71667c714ff22258bc7de7f0126dda8e8bcfe0 Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Thu, 7 Dec 2023 15:41:55 +0000
Subject: [PATCH 30/48] Don't skip qiime while testing.

---
 conf/test_reftaxcustom.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/test_reftaxcustom.config b/conf/test_reftaxcustom.config
index c2d7c4eed..870a59e27 100644
--- a/conf/test_reftaxcustom.config
+++ b/conf/test_reftaxcustom.config
@@ -33,5 +33,5 @@ params {
     qiime_ref_tax_custom = "https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.tax.gz"
 
     // Skip downstream analysis with QIIME2
-    skip_qiime = true
+    skip_qiime = false
 }

From 1360415a39ae3aaf659a8201ef1958a1fb53414e Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Thu, 7 Dec 2023 21:00:27 +0000
Subject: [PATCH 31/48] Add a skip option for just downstream qiime analysis,
 but still perform qiime taxonomic classification.

---
 nextflow.config       |  1 +
 nextflow_schema.json  |  4 ++++
 workflows/ampliseq.nf | 13 ++++++++++---
 3 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index c4e94bfb1..c76b40582 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -87,6 +87,7 @@ params {
     skip_dada_quality      = false
     skip_barrnap           = false
     skip_qiime             = false
+    skip_qiime_downstream  = false
     skip_fastqc            = false
     skip_alpha_rarefaction = false
     skip_abundance_tables  = false
diff --git a/nextflow_schema.json b/nextflow_schema.json
index c6c6f8b09..2b4a8dca6 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -656,6 +656,10 @@
                     "type": "boolean",
                     "description": "Skip all steps that are executed by QIIME2, including QIIME2 software download, taxonomy assignment by QIIME2, barplots, relative abundance tables, diversity analysis, differential abundance testing."
                 },
+                "skip_qiime_downstream": {
+                    "type": "boolean",
+                    "description": "Skip steps that are executed by QIIME2 except for taxonomic classification, including barplots, relative abundance tables, diversity analysis, differential abundance testing."
+                },
                 "skip_taxonomy": {
                     "type": "boolean",
                     "description": "Skip taxonomic classification. Incompatible with `--sbdiexport`"
diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf
index faeffec62..9e85bf6af 100644
--- a/workflows/ampliseq.nf
+++ b/workflows/ampliseq.nf
@@ -146,8 +146,15 @@ if ( params.dada_ref_taxonomy && !params.skip_dada_addspecies && !params.skip_da
     }
 }
 
-//only run QIIME2 when taxonomy is actually calculated and all required data is available
-if ( !(workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) && !params.skip_taxonomy && !params.skip_qiime && (!params.skip_dada_taxonomy || params.sintax_ref_taxonomy || params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom) ) {
+// Only run QIIME2 taxonomy classification if needed parameters are passed and we are not skipping taxonomy or qiime steps.
+if ( !(workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) && !params.skip_taxonomy && !params.skip_qiime && (params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.classifier) ) {
+    run_qiime2_taxonomy = true
+} else {
+    run_qiime2_taxonomy = false
+}
+
+//only run QIIME2 downstream analysis when taxonomy is actually calculated and all required data is available
+if ( !(workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) && !params.skip_taxonomy && !params.skip_qiime && !params.skip_qiime_downstream && (!params.skip_dada_taxonomy || params.sintax_ref_taxonomy || params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom) ) {
     run_qiime2 = true
 } else {
     run_qiime2 = false
@@ -567,7 +574,7 @@ workflow AMPLISEQ {
     }
 
     //QIIME2
-    if ( run_qiime2 ) {
+    if ( run_qiime2_taxonomy ) {
         if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.classifier) {
             QIIME2_PREPTAX (
                 ch_qiime_ref_taxonomy.collect(),

From f4f5cda41b32a83c133b6678f8c8f4537d9b65ed Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Thu, 7 Dec 2023 21:02:06 +0000
Subject: [PATCH 32/48] Skip qiime downstream in reftaxcustom.

---
 conf/test_reftaxcustom.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/test_reftaxcustom.config b/conf/test_reftaxcustom.config
index 870a59e27..ea8a7c6d0 100644
--- a/conf/test_reftaxcustom.config
+++ b/conf/test_reftaxcustom.config
@@ -33,5 +33,5 @@ params {
     qiime_ref_tax_custom = "https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.tax.gz"
 
     // Skip downstream analysis with QIIME2
-    skip_qiime = false
+    skip_qiime_downstream = true
 }

From 549c166365bf68f4edecff6121597cbcb01c8b99 Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Thu, 7 Dec 2023 21:21:06 +0000
Subject: [PATCH 33/48] Fix path for testing tarball passed to
 --qiime_ref_tax_custom.

---
 conf/test_reftaxcustom.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/test_reftaxcustom.config b/conf/test_reftaxcustom.config
index ea8a7c6d0..1afe1c2df 100644
--- a/conf/test_reftaxcustom.config
+++ b/conf/test_reftaxcustom.config
@@ -30,7 +30,7 @@ params {
     dada_assign_taxlevels = "Kingdom,Phylum,Class,Order,Family,Genus"
     kraken2_ref_tax_custom = "https://genome-idx.s3.amazonaws.com/kraken/16S_Greengenes13.5_20200326.tgz"
     kraken2_assign_taxlevels = "D,P,C,O"
-    qiime_ref_tax_custom = "https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.tax.gz"
+    qiime_ref_tax_custom = "https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.tar.gz"
 
     // Skip downstream analysis with QIIME2
     skip_qiime_downstream = true

From 851653404641128a88899ef1e3ceb88e79c945a9 Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Thu, 7 Dec 2023 21:38:04 +0000
Subject: [PATCH 34/48] Add snapshot of files coming from qiime2 taxonomy.

---
 tests/pipeline/reftaxcustom.nf.test.snap | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tests/pipeline/reftaxcustom.nf.test.snap b/tests/pipeline/reftaxcustom.nf.test.snap
index 7dca4e3e9..842b18de0 100644
--- a/tests/pipeline/reftaxcustom.nf.test.snap
+++ b/tests/pipeline/reftaxcustom.nf.test.snap
@@ -43,12 +43,19 @@
         "timestamp": "2023-05-28T21:18:54+0000"
     },
     "kraken2": {
+        "content": [
+            "taxonomy/GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza:md5,e3d3758fb6fb6b1d8e43dd1be578979b",
+            "taxonomy/taxonomy.tsv:md5,ad6793c7689b0a5e40169d2799564645"
+        ],
+        "timestamp": "2023-09-15T21:16:26+0000"
+    },
+    "qiime2": {
         "content": [
             "ASV_tax.user.kraken2.classifiedreads.txt:md5,8a4693c37d5c24b342ef161b92567764",
             "ASV_tax.user.kraken2.complete.tsv:md5,3613dac9ce1bf03f87b57d1523e705f1",
             "ASV_tax.user.kraken2.tsv:md5,95c3f9daa5da8fe00159fb07d394c3ce"
         ],
-        "timestamp": "2023-09-15T21:16:26+0000"
+        "timestamp": "2023-12-07T21:28:32+0000"
     },
     "multiqc": {
         "content": [

From 745cab7de07628c06f5356dcdb9f8e64321bd074 Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Thu, 7 Dec 2023 21:39:18 +0000
Subject: [PATCH 35/48] Work towards a qiime_ref_tax_custom specific test.

---
 .github/workflows/ci.yml                |  1 +
 conf/test_qiimecustom.config            | 32 ++++++++++++++
 nextflow.config                         |  1 +
 tests/pipeline/qiimecustom.nf.test      | 55 +++++++++++++++++++++++++
 tests/pipeline/qiimecustom.nf.test.snap | 43 +++++++++++++++++++
 5 files changed, 132 insertions(+)
 create mode 100644 conf/test_qiimecustom.config
 create mode 100644 tests/pipeline/qiimecustom.nf.test
 create mode 100644 tests/pipeline/qiimecustom.nf.test.snap

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 788582d92..e4b532bef 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -50,6 +50,7 @@ jobs:
           - "test_failed"
           - "test_multi"
           - "test_reftaxcustom"
+          - "test_qiimecustom"
           - "test_doubleprimers"
           - "test_iontorrent"
           - "test_novaseq"
diff --git a/conf/test_qiimecustom.config b/conf/test_qiimecustom.config
new file mode 100644
index 000000000..ea6b97d81
--- /dev/null
+++ b/conf/test_qiimecustom.config
@@ -0,0 +1,32 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Nextflow config file for running minimal tests
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Defines input files and everything required to run a fast and simple pipeline test.
+
+    Use as follows:
+        nextflow run nf-core/ampliseq -profile test_qiimecustom,<docker/singularity> --outdir <OUTDIR>
+
+----------------------------------------------------------------------------------------
+*/
+
+params {
+    config_profile_name = 'Test custom QIIME2 reference taxonomy database profile'
+    config_profile_description = 'Minimal test dataset to check --qiime_ref_tax_custom'
+
+    // Limit resources so that this can run on GitHub Actions
+    max_cpus   = 2
+    max_memory = '6.GB'
+    max_time   = '6.h'
+
+    // Input data
+    FW_primer = "GTGYCAGCMGCCGCGGTAA"
+    RV_primer = "GGACTACNVGGGTWTCTAAT"
+    input = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Samplesheet.tsv"
+
+    // Custom reference taxonomy
+    qiime_ref_tax_custom = "https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.tax.gz,https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.fna.gz"
+
+    // Skip downstream analysis with QIIME2
+    skip_qiime_downstream = true
+}
diff --git a/nextflow.config b/nextflow.config
index c76b40582..831a43a22 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -274,6 +274,7 @@ profiles {
     test_failed        { includeConfig 'conf/test_failed.config'        }
     test_full          { includeConfig 'conf/test_full.config'          }
     test_reftaxcustom  { includeConfig 'conf/test_reftaxcustom.config'  }
+    test_qiimecustom   { includeConfig 'conf/test_qiimecustom.config'   }
     test_novaseq       { includeConfig 'conf/test_novaseq.config'       }
     test_pplace        { includeConfig 'conf/test_pplace.config'        }
     test_sintax        { includeConfig 'conf/test_sintax.config'        }
diff --git a/tests/pipeline/qiimecustom.nf.test b/tests/pipeline/qiimecustom.nf.test
new file mode 100644
index 000000000..abd2a38a2
--- /dev/null
+++ b/tests/pipeline/qiimecustom.nf.test
@@ -0,0 +1,55 @@
+nextflow_pipeline {
+
+    name "Test Workflow main.nf"
+    script "main.nf"
+    tag "test_reftaxcustom"
+    tag "dada2"
+    tag "pipeline"
+
+    test("Custom DADA2 Reference Taxonomy Database") {
+
+        when {
+            params {
+                outdir = "$outputDir"
+            }
+        }
+
+        then {
+            assertAll(
+                { assert workflow.success },
+                { assert snapshot(UTILS.removeNextflowVersion("$outputDir")).match("software_versions") },
+                { assert snapshot(path("$outputDir/overall_summary.tsv")).match("overall_summary_tsv") },
+                { assert snapshot(path("$outputDir/barrnap/rrna.arc.gff"),
+                                path("$outputDir/barrnap/rrna.bac.gff"),
+                                path("$outputDir/barrnap/rrna.euk.gff"),
+                                path("$outputDir/barrnap/rrna.mito.gff")).match("barrnap") },
+                { assert new File("$outputDir/barrnap/summary.tsv").exists() },
+                { assert snapshot(path("$outputDir/cutadapt/cutadapt_summary.tsv")).match("cutadapt") },
+                { assert snapshot(path("$outputDir/dada2/ASV_seqs.fasta"),
+                                path("$outputDir/dada2/ASV_table.tsv"),
+                                path("$outputDir/dada2/DADA2_stats.tsv"),
+                                path("$outputDir/dada2/DADA2_table.rds"),
+                                path("$outputDir/dada2/DADA2_table.tsv")).match("dada2") },
+                { assert new File("$outputDir/dada2/ASV_tax.user.tsv").exists() },
+                { assert new File("$outputDir/dada2/ASV_tax_species.user.tsv").exists() },
+                { assert new File("$outputDir/fastqc/sampleID_1_1_fastqc.html").exists() },
+                { assert new File("$outputDir/fastqc/sampleID_1_2_fastqc.html").exists() },
+                { assert new File("$outputDir/fastqc/sampleID_1a_1_fastqc.html").exists() },
+                { assert new File("$outputDir/fastqc/sampleID_1a_2_fastqc.html").exists() },
+                { assert new File("$outputDir/fastqc/sampleID_2_1_fastqc.html").exists() },
+                { assert new File("$outputDir/fastqc/sampleID_2_2_fastqc.html").exists() },
+                { assert new File("$outputDir/fastqc/sampleID_2a_1_fastqc.html").exists() },
+                { assert new File("$outputDir/fastqc/sampleID_2a_2_fastqc.html").exists() },
+                { assert snapshot(path("$outputDir/input/Samplesheet.tsv")).match("input") },
+                { assert snapshot(path("$outputDir/kraken2/ASV_tax.user.kraken2.classifiedreads.txt"),
+                                path("$outputDir/kraken2/ASV_tax.user.kraken2.complete.tsv"),
+                                path("$outputDir/kraken2/ASV_tax.user.kraken2.tsv")).match("kraken2") },
+                { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"),
+                                path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"),
+                                path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") },
+                { assert new File("$outputDir/summary_report/summary_report.html").exists() },
+                { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() }
+            )
+        }
+    }
+}
diff --git a/tests/pipeline/qiimecustom.nf.test.snap b/tests/pipeline/qiimecustom.nf.test.snap
new file mode 100644
index 000000000..680ca37ac
--- /dev/null
+++ b/tests/pipeline/qiimecustom.nf.test.snap
@@ -0,0 +1,43 @@
+{
+    "input": {
+        "content": [
+            "Samplesheet.tsv:md5,dbf8d1a2b7933dab9e5a139f33c2b1f4"
+        ],
+        "timestamp": "2023-05-28T21:18:54+0000"
+    },
+    "cutadapt": {
+        "content": [
+            "cutadapt_summary.tsv:md5,5d02749984a811479e7d534fda75163f"
+        ],
+        "timestamp": "2023-05-28T21:18:54+0000"
+    },
+    "software_versions": {
+        "content": [
+            "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}"
+        ],
+        "timestamp": "2023-05-28T21:18:54+0000"
+    },
+    "overall_summary_tsv": {
+        "content": [
+            "overall_summary.tsv:md5,3231d6ee72b9a1e7742e5605caaff05a"
+        ],
+        "timestamp": "2023-05-28T21:18:54+0000"
+    },
+    "barrnap": {
+        "content": [
+            "rrna.arc.gff:md5,6dae470aace9293d5eb8c318584852dd",
+            "rrna.bac.gff:md5,439a9084f089120f700f938dfb58fa41",
+            "rrna.euk.gff:md5,c9bc1d9d8fb77dc19c95dee2d53840eb",
+            "rrna.mito.gff:md5,df19e1b84ba6f691d20c72b397c88abf"
+        ],
+        "timestamp": "2023-05-28T21:18:54+0000"
+    },
+    "multiqc": {
+        "content": [
+            "multiqc_fastqc.txt:md5,147764e40079c3abf97a17cfe2275c52",
+            "multiqc_general_stats.txt:md5,88c2b9e6d02b83afe4f9551e6c9a91a7",
+            "multiqc_cutadapt.txt:md5,330a7b72dc671ca99fcb3fb84b6776c1"
+        ],
+        "timestamp": "2023-05-28T21:18:54+0000"
+    }
+}

From a1dfb5b1b6943fc244a58ea701ba50cd085ff2dc Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Thu, 7 Dec 2023 21:45:15 +0000
Subject: [PATCH 36/48] Skip dada tax.

---
 conf/test_qiimecustom.config | 1 +
 1 file changed, 1 insertion(+)

diff --git a/conf/test_qiimecustom.config b/conf/test_qiimecustom.config
index ea6b97d81..2233070cc 100644
--- a/conf/test_qiimecustom.config
+++ b/conf/test_qiimecustom.config
@@ -29,4 +29,5 @@ params {
 
     // Skip downstream analysis with QIIME2
     skip_qiime_downstream = true
+    skip_dada_taxonomy = true
 }

From 51dc97e82770417a5179abff1f50ae09c00ca71a Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Thu, 7 Dec 2023 21:45:54 +0000
Subject: [PATCH 37/48] Sequence then taxonomy file for file pair to
 --qiime_ref_tax_custom.

---
 conf/test_qiimecustom.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/test_qiimecustom.config b/conf/test_qiimecustom.config
index 2233070cc..2fc9cb736 100644
--- a/conf/test_qiimecustom.config
+++ b/conf/test_qiimecustom.config
@@ -25,7 +25,7 @@ params {
     input = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Samplesheet.tsv"
 
     // Custom reference taxonomy
-    qiime_ref_tax_custom = "https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.tax.gz,https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.fna.gz"
+    qiime_ref_tax_custom = "https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.fna.gz,https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.tax.gz"
 
     // Skip downstream analysis with QIIME2
     skip_qiime_downstream = true

From a33f17f7937769b43e9a3e9fb5c480cc115b67a3 Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Thu, 7 Dec 2023 21:49:21 +0000
Subject: [PATCH 38/48] Clarify in help text of --qiime_ref_tax_custom the
 ordering of a file pair.

---
 nextflow_schema.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nextflow_schema.json b/nextflow_schema.json
index 62c54f79f..6ccfc3ad1 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -434,7 +434,7 @@
                 },
                 "qiime_ref_tax_custom": {
                     "type": "string",
-                    "help_text": "Is preferred over `--qiime_ref_taxonomy`. A comma separated pair of (possibly gzipped) filepaths.",
+                    "help_text": "Is preferred over `--qiime_ref_taxonomy`. A comma separated pair of (possibly gzipped) filepaths (sequence, taxonomy).",
                     "description": "Path to files of a custom QIIME2 reference taxonomy database (files may be gzipped)"
                 },
                 "classifier": {

From 8f57faec61a65a422c93c4cb6526ff3d6abcb65c Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Thu, 7 Dec 2023 22:00:26 +0000
Subject: [PATCH 39/48] Update snapshots to include qiime2 in both correctly
 and add assertions for qiime2.

---
 tests/pipeline/qiimecustom.nf.test       | 11 +++++------
 tests/pipeline/qiimecustom.nf.test.snap  |  7 +++++++
 tests/pipeline/reftaxcustom.nf.test      |  2 ++
 tests/pipeline/reftaxcustom.nf.test.snap | 10 +++++-----
 4 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/tests/pipeline/qiimecustom.nf.test b/tests/pipeline/qiimecustom.nf.test
index abd2a38a2..8ec67571b 100644
--- a/tests/pipeline/qiimecustom.nf.test
+++ b/tests/pipeline/qiimecustom.nf.test
@@ -2,11 +2,11 @@ nextflow_pipeline {
 
     name "Test Workflow main.nf"
     script "main.nf"
-    tag "test_reftaxcustom"
-    tag "dada2"
+    tag "test_qiimecustom"
+    tag "qiime2"
     tag "pipeline"
 
-    test("Custom DADA2 Reference Taxonomy Database") {
+    test("Custom QIIME2 Reference Taxonomy Database") {
 
         when {
             params {
@@ -41,9 +41,8 @@ nextflow_pipeline {
                 { assert new File("$outputDir/fastqc/sampleID_2a_1_fastqc.html").exists() },
                 { assert new File("$outputDir/fastqc/sampleID_2a_2_fastqc.html").exists() },
                 { assert snapshot(path("$outputDir/input/Samplesheet.tsv")).match("input") },
-                { assert snapshot(path("$outputDir/kraken2/ASV_tax.user.kraken2.classifiedreads.txt"),
-                                path("$outputDir/kraken2/ASV_tax.user.kraken2.complete.tsv"),
-                                path("$outputDir/kraken2/ASV_tax.user.kraken2.tsv")).match("kraken2") },
+                { assert snapshot(path("$outputDir/qiime2/taxonomy/GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza"),
+                                path("$outputDir/qiime2/taxonomy/taxonomy.tsv")).match("qiime2") },
                 { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"),
                                 path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"),
                                 path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") },
diff --git a/tests/pipeline/qiimecustom.nf.test.snap b/tests/pipeline/qiimecustom.nf.test.snap
index 680ca37ac..616e1de0f 100644
--- a/tests/pipeline/qiimecustom.nf.test.snap
+++ b/tests/pipeline/qiimecustom.nf.test.snap
@@ -32,6 +32,13 @@
         ],
         "timestamp": "2023-05-28T21:18:54+0000"
     },
+    "qiime2": {
+        "content": [
+            "GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza:md5,b744a656dbd4e710697bf9ee47f26c87",
+            "taxonomy.tsv:md5,44585412583f0cf5f2b82a1337f16756"
+        ],
+        "timestamp": "2023-12-07T21:28:32+0000"
+    },
     "multiqc": {
         "content": [
             "multiqc_fastqc.txt:md5,147764e40079c3abf97a17cfe2275c52",
diff --git a/tests/pipeline/reftaxcustom.nf.test b/tests/pipeline/reftaxcustom.nf.test
index abd2a38a2..3f72ec5f0 100644
--- a/tests/pipeline/reftaxcustom.nf.test
+++ b/tests/pipeline/reftaxcustom.nf.test
@@ -44,6 +44,8 @@ nextflow_pipeline {
                 { assert snapshot(path("$outputDir/kraken2/ASV_tax.user.kraken2.classifiedreads.txt"),
                                 path("$outputDir/kraken2/ASV_tax.user.kraken2.complete.tsv"),
                                 path("$outputDir/kraken2/ASV_tax.user.kraken2.tsv")).match("kraken2") },
+                { assert snapshot(path("$outputDir/qiime2/taxonomy/GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza"),
+                                path("$outputDir/qiime2/taxonomy/taxonomy.tsv")).match("qiime2") },
                 { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"),
                                 path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"),
                                 path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") },
diff --git a/tests/pipeline/reftaxcustom.nf.test.snap b/tests/pipeline/reftaxcustom.nf.test.snap
index 842b18de0..90b157ec2 100644
--- a/tests/pipeline/reftaxcustom.nf.test.snap
+++ b/tests/pipeline/reftaxcustom.nf.test.snap
@@ -44,16 +44,16 @@
     },
     "kraken2": {
         "content": [
-            "taxonomy/GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza:md5,e3d3758fb6fb6b1d8e43dd1be578979b",
-            "taxonomy/taxonomy.tsv:md5,ad6793c7689b0a5e40169d2799564645"
+            "ASV_tax.user.kraken2.classifiedreads.txt:md5,8a4693c37d5c24b342ef161b92567764",
+            "ASV_tax.user.kraken2.complete.tsv:md5,3613dac9ce1bf03f87b57d1523e705f1",
+            "ASV_tax.user.kraken2.tsv:md5,95c3f9daa5da8fe00159fb07d394c3ce"
         ],
         "timestamp": "2023-09-15T21:16:26+0000"
     },
     "qiime2": {
         "content": [
-            "ASV_tax.user.kraken2.classifiedreads.txt:md5,8a4693c37d5c24b342ef161b92567764",
-            "ASV_tax.user.kraken2.complete.tsv:md5,3613dac9ce1bf03f87b57d1523e705f1",
-            "ASV_tax.user.kraken2.tsv:md5,95c3f9daa5da8fe00159fb07d394c3ce"
+            "GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza:md5,e3d3758fb6fb6b1d8e43dd1be578979b",
+            "taxonomy.tsv:md5,ad6793c7689b0a5e40169d2799564645"
         ],
         "timestamp": "2023-12-07T21:28:32+0000"
     },

From 74e05b2a26208befa00fd2bad63cc9fd5f6d97de Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Thu, 7 Dec 2023 22:02:21 +0000
Subject: [PATCH 40/48] Make ordering of sequence and taxonomy files
 deterministic in case of file pair.

---
 subworkflows/local/qiime2_preptax.nf | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf
index 9d5c68983..97ccba63c 100644
--- a/subworkflows/local/qiime2_preptax.nf
+++ b/subworkflows/local/qiime2_preptax.nf
@@ -35,7 +35,14 @@ workflow QIIME2_PREPTAX {
             ch_qiime_db_files = GZIP_DECOMPRESS.out.ungzip
             ch_qiime_db_files = ch_qiime_db_files.mix(ch_qiime_ref_tax_branched.decompressed)
 
-            ch_ref_database = ch_qiime_db_files.collate(2)
+            ch_ref_database_fna = ch_qiime_db_dir.filter {
+                it.getName().endsWith(".fna")
+            }
+            ch_ref_database_tax = ch_qiime_db_dir.filter {
+                it.getName().endsWith(".tax")
+            }
+
+            ch_ref_database = ch_ref_database_fna.combine(ch_ref_database_tax)
         // Handle case we have been provided a single filepath (tarball or directory).
         } else {
             ch_qiime_ref_taxonomy.flatten()

From b65df44c9ee053896666e9ccbee9bdc7ac2c41f8 Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Thu, 7 Dec 2023 22:03:51 +0000
Subject: [PATCH 41/48] Fix filtering in file pair case.

---
 subworkflows/local/qiime2_preptax.nf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf
index 97ccba63c..7d0be52d7 100644
--- a/subworkflows/local/qiime2_preptax.nf
+++ b/subworkflows/local/qiime2_preptax.nf
@@ -35,10 +35,10 @@ workflow QIIME2_PREPTAX {
             ch_qiime_db_files = GZIP_DECOMPRESS.out.ungzip
             ch_qiime_db_files = ch_qiime_db_files.mix(ch_qiime_ref_tax_branched.decompressed)
 
-            ch_ref_database_fna = ch_qiime_db_dir.filter {
+            ch_ref_database_fna = ch_qiime_db_files.filter {
                 it.getName().endsWith(".fna")
             }
-            ch_ref_database_tax = ch_qiime_db_dir.filter {
+            ch_ref_database_tax = ch_qiime_db_files.filter {
                 it.getName().endsWith(".tax")
             }
 

From 45bee719af1aba754a9bfbea274d5116204b0df7 Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Thu, 7 Dec 2023 22:31:07 +0000
Subject: [PATCH 42/48] Fix version mixing in --qiime_ref_taxonomy case.

---
 subworkflows/local/qiime2_preptax.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf
index d514f0860..dfa287253 100644
--- a/subworkflows/local/qiime2_preptax.nf
+++ b/subworkflows/local/qiime2_preptax.nf
@@ -82,7 +82,7 @@ workflow QIIME2_PREPTAX {
         }
     } else {
         FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy )
-        ch_qiime2_preptax_versions(FORMAT_TAXONOMY_QIIME.out.versions)
+        ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(FORMAT_TAXONOMY_QIIME.out.versions)
 
         ch_ref_database = FORMAT_TAXONOMY_QIIME.out.fasta.combine(FORMAT_TAXONOMY_QIIME.out.tax)
     }

From 3c9eaf129c0dd311b4e8bbdbc8e047eb2519cefb Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Thu, 7 Dec 2023 22:45:45 +0000
Subject: [PATCH 43/48] Update software version expectations for tests that no
 longer run QIIME_PREPTAX.

---
 tests/pipeline/doubleprimers.nf.test.snap | 2 +-
 tests/pipeline/multi.nf.test.snap         | 2 +-
 tests/pipeline/qiimecustom.nf.test.snap   | 2 +-
 tests/pipeline/reftaxcustom.nf.test.snap  | 2 +-
 tests/pipeline/sintax.nf.test.snap        | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/pipeline/doubleprimers.nf.test.snap b/tests/pipeline/doubleprimers.nf.test.snap
index d7cc9dce8..b5e9cb2bb 100644
--- a/tests/pipeline/doubleprimers.nf.test.snap
+++ b/tests/pipeline/doubleprimers.nf.test.snap
@@ -13,7 +13,7 @@
     },
     "software_versions": {
         "content": [
-            "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}"
+            "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}"
         ],
         "timestamp": "2023-07-27T13:49:03+0000"
     },
diff --git a/tests/pipeline/multi.nf.test.snap b/tests/pipeline/multi.nf.test.snap
index 2c0382f0f..daba2601d 100644
--- a/tests/pipeline/multi.nf.test.snap
+++ b/tests/pipeline/multi.nf.test.snap
@@ -14,7 +14,7 @@
     },
     "software_versions": {
         "content": [
-            "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0dev}}"
+            "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0dev}}"
         ],
         "timestamp": "2023-05-28T21:15:03+0000"
     },
diff --git a/tests/pipeline/qiimecustom.nf.test.snap b/tests/pipeline/qiimecustom.nf.test.snap
index 616e1de0f..6c39df372 100644
--- a/tests/pipeline/qiimecustom.nf.test.snap
+++ b/tests/pipeline/qiimecustom.nf.test.snap
@@ -13,7 +13,7 @@
     },
     "software_versions": {
         "content": [
-            "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}"
+            "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}"
         ],
         "timestamp": "2023-05-28T21:18:54+0000"
     },
diff --git a/tests/pipeline/reftaxcustom.nf.test.snap b/tests/pipeline/reftaxcustom.nf.test.snap
index 90b157ec2..8fca1c5b0 100644
--- a/tests/pipeline/reftaxcustom.nf.test.snap
+++ b/tests/pipeline/reftaxcustom.nf.test.snap
@@ -13,7 +13,7 @@
     },
     "software_versions": {
         "content": [
-            "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}"
+            "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}"
         ],
         "timestamp": "2023-05-28T21:18:54+0000"
     },
diff --git a/tests/pipeline/sintax.nf.test.snap b/tests/pipeline/sintax.nf.test.snap
index 069c7fa88..b19bf8feb 100644
--- a/tests/pipeline/sintax.nf.test.snap
+++ b/tests/pipeline/sintax.nf.test.snap
@@ -16,7 +16,7 @@
     },
     "software_versions": {
         "content": [
-            "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_SINTAX={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0dev}}"
+            "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_SINTAX={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0dev}}"
         ],
         "timestamp": "2023-06-20T16:40:18+0000"
     },

From 07f4407a4dcfc62be99f61e8a4ebaf6543caaf47 Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Thu, 7 Dec 2023 22:47:55 +0000
Subject: [PATCH 44/48] Remove assertions on dada2 tax and phyloseq files
 existing in test_qiimecustom.

---
 tests/pipeline/qiimecustom.nf.test | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/tests/pipeline/qiimecustom.nf.test b/tests/pipeline/qiimecustom.nf.test
index 8ec67571b..2cdc080a7 100644
--- a/tests/pipeline/qiimecustom.nf.test
+++ b/tests/pipeline/qiimecustom.nf.test
@@ -30,8 +30,6 @@ nextflow_pipeline {
                                 path("$outputDir/dada2/DADA2_stats.tsv"),
                                 path("$outputDir/dada2/DADA2_table.rds"),
                                 path("$outputDir/dada2/DADA2_table.tsv")).match("dada2") },
-                { assert new File("$outputDir/dada2/ASV_tax.user.tsv").exists() },
-                { assert new File("$outputDir/dada2/ASV_tax_species.user.tsv").exists() },
                 { assert new File("$outputDir/fastqc/sampleID_1_1_fastqc.html").exists() },
                 { assert new File("$outputDir/fastqc/sampleID_1_2_fastqc.html").exists() },
                 { assert new File("$outputDir/fastqc/sampleID_1a_1_fastqc.html").exists() },
@@ -46,8 +44,7 @@ nextflow_pipeline {
                 { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"),
                                 path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"),
                                 path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") },
-                { assert new File("$outputDir/summary_report/summary_report.html").exists() },
-                { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() }
+                { assert new File("$outputDir/summary_report/summary_report.html").exists() }
             )
         }
     }

From 1c129e568cb2cbba9c11af01584763e3bca96dfe Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Thu, 7 Dec 2023 23:08:40 +0000
Subject: [PATCH 45/48] Looks like qiime2 tax alignment is non-deterministic,
 just verify the files it emits are emitted.

---
 tests/pipeline/qiimecustom.nf.test       | 4 ++--
 tests/pipeline/qiimecustom.nf.test.snap  | 7 -------
 tests/pipeline/reftaxcustom.nf.test      | 4 ++--
 tests/pipeline/reftaxcustom.nf.test.snap | 7 -------
 4 files changed, 4 insertions(+), 18 deletions(-)

diff --git a/tests/pipeline/qiimecustom.nf.test b/tests/pipeline/qiimecustom.nf.test
index 2cdc080a7..493968153 100644
--- a/tests/pipeline/qiimecustom.nf.test
+++ b/tests/pipeline/qiimecustom.nf.test
@@ -39,8 +39,8 @@ nextflow_pipeline {
                 { assert new File("$outputDir/fastqc/sampleID_2a_1_fastqc.html").exists() },
                 { assert new File("$outputDir/fastqc/sampleID_2a_2_fastqc.html").exists() },
                 { assert snapshot(path("$outputDir/input/Samplesheet.tsv")).match("input") },
-                { assert snapshot(path("$outputDir/qiime2/taxonomy/GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza"),
-                                path("$outputDir/qiime2/taxonomy/taxonomy.tsv")).match("qiime2") },
+                { assert new File("$outputDir/qiime2/taxonomy/taxonomy.tsv").exists() },
+                { assert new File("$outputDir/qiime2/taxonomy/GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza").exists() },
                 { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"),
                                 path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"),
                                 path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") },
diff --git a/tests/pipeline/qiimecustom.nf.test.snap b/tests/pipeline/qiimecustom.nf.test.snap
index 6c39df372..594688a92 100644
--- a/tests/pipeline/qiimecustom.nf.test.snap
+++ b/tests/pipeline/qiimecustom.nf.test.snap
@@ -32,13 +32,6 @@
         ],
         "timestamp": "2023-05-28T21:18:54+0000"
     },
-    "qiime2": {
-        "content": [
-            "GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza:md5,b744a656dbd4e710697bf9ee47f26c87",
-            "taxonomy.tsv:md5,44585412583f0cf5f2b82a1337f16756"
-        ],
-        "timestamp": "2023-12-07T21:28:32+0000"
-    },
     "multiqc": {
         "content": [
             "multiqc_fastqc.txt:md5,147764e40079c3abf97a17cfe2275c52",
diff --git a/tests/pipeline/reftaxcustom.nf.test b/tests/pipeline/reftaxcustom.nf.test
index 3f72ec5f0..4e70861b6 100644
--- a/tests/pipeline/reftaxcustom.nf.test
+++ b/tests/pipeline/reftaxcustom.nf.test
@@ -44,8 +44,8 @@ nextflow_pipeline {
                 { assert snapshot(path("$outputDir/kraken2/ASV_tax.user.kraken2.classifiedreads.txt"),
                                 path("$outputDir/kraken2/ASV_tax.user.kraken2.complete.tsv"),
                                 path("$outputDir/kraken2/ASV_tax.user.kraken2.tsv")).match("kraken2") },
-                { assert snapshot(path("$outputDir/qiime2/taxonomy/GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza"),
-                                path("$outputDir/qiime2/taxonomy/taxonomy.tsv")).match("qiime2") },
+                { assert new File("$outputDir/qiime2/taxonomy/taxonomy.tsv").exists() },
+                { assert new File("$outputDir/qiime2/taxonomy/GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza").exists() },
                 { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"),
                                 path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"),
                                 path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") },
diff --git a/tests/pipeline/reftaxcustom.nf.test.snap b/tests/pipeline/reftaxcustom.nf.test.snap
index 8fca1c5b0..b5aa10f14 100644
--- a/tests/pipeline/reftaxcustom.nf.test.snap
+++ b/tests/pipeline/reftaxcustom.nf.test.snap
@@ -50,13 +50,6 @@
         ],
         "timestamp": "2023-09-15T21:16:26+0000"
     },
-    "qiime2": {
-        "content": [
-            "GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza:md5,e3d3758fb6fb6b1d8e43dd1be578979b",
-            "taxonomy.tsv:md5,ad6793c7689b0a5e40169d2799564645"
-        ],
-        "timestamp": "2023-12-07T21:28:32+0000"
-    },
     "multiqc": {
         "content": [
             "multiqc_fastqc.txt:md5,147764e40079c3abf97a17cfe2275c52",

From 2ace59599e1c0c5ed19ffabf03ce2adb5a34c428 Mon Sep 17 00:00:00 2001
From: Matthew <matthew.marshall96@yahoo.co.uk>
Date: Fri, 8 Dec 2023 14:26:03 +0000
Subject: [PATCH 46/48] Make --skip_qiime_downstream help text clearer.

Co-authored-by: Daniel Straub <42973691+d4straub@users.noreply.github.com>
---
 nextflow_schema.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nextflow_schema.json b/nextflow_schema.json
index 6ccfc3ad1..14eef4b45 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -658,7 +658,7 @@
                 },
                 "skip_qiime_downstream": {
                     "type": "boolean",
-                    "description": "Skip steps that are executed by QIIME2 except for taxonomic classification, including barplots, relative abundance tables, diversity analysis, differential abundance testing."
+                    "description": "Skip steps that are executed by QIIME2 except for taxonomic classification. Skip steps including barplots, relative abundance tables, diversity analysis, differential abundance testing."
                 },
                 "skip_taxonomy": {
                     "type": "boolean",

From 4464c38cef7be3e9309c3d036fda7172aba130a4 Mon Sep 17 00:00:00 2001
From: Matthew Marshall <matthew.marshall@stfc.ac.uk>
Date: Fri, 8 Dec 2023 14:48:12 +0000
Subject: [PATCH 47/48] Remove assertion on qiime phyloseq file no longer
 produced.

---
 tests/pipeline/pplace.nf.test | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/pipeline/pplace.nf.test b/tests/pipeline/pplace.nf.test
index 564cf2b9b..b0507df75 100644
--- a/tests/pipeline/pplace.nf.test
+++ b/tests/pipeline/pplace.nf.test
@@ -56,8 +56,7 @@ nextflow_pipeline {
                 { assert new File("$outputDir/pplace/test_pplace.graft.test_pplace.epa_result.newick").exists() },
                 { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"),
                                 path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") },
-                { assert new File("$outputDir/summary_report/summary_report.html").exists() },
-                { assert new File("$outputDir/phyloseq/qiime2_phyloseq.rds").exists() }
+                { assert new File("$outputDir/summary_report/summary_report.html").exists() }
             )
         }
     }

From 6b71e4d2500e72ac3eda29d80f0654ed7e5fa481 Mon Sep 17 00:00:00 2001
From: daniel <d4straub@gmail.com>
Date: Tue, 19 Dec 2023 10:19:46 +0100
Subject: [PATCH 48/48] Fix reporting

---
 assets/report_template.Rmd      | 12 +++++++++---
 modules/local/summary_report.nf |  3 ++-
 workflows/ampliseq.nf           |  2 +-
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd
index 8c8fc21e7..264a71493 100644
--- a/assets/report_template.Rmd
+++ b/assets/report_template.Rmd
@@ -980,9 +980,15 @@ cat("\n\nDADA2 taxonomy assignments can be found in folder [dada2](../dada2) in
 # Header
 cat("## QIIME2\n")
 
-cat("The taxonomic classification was performed by [QIIME2](https://www.nature.com/articles/s41587-019-0209-9)
-    using the database: `", params$qiime2_ref_tax_title, "`.
-    More details about the reference taxonomy database can be found in the ['Methods section'](#methods).\n\n", sep = "")
+# indicate reference taxonomy
+if ( !isFALSE(params$qiime2_ref_tax_title) ) {
+    cat("The taxonomic classification was performed by [QIIME2](https://www.nature.com/articles/s41587-019-0209-9)
+        using the database: `", params$qiime2_ref_tax_title, "`.
+        More details about the reference taxonomy database can be found in the ['Methods section'](#methods).\n\n", sep = "")
+} else {
+    cat("The taxonomic classification was performed by [QIIME2](https://www.nature.com/articles/s41587-019-0209-9) using a custom database ",
+            "provided by the user.\n\n", sep = "")
+}
 
 # Read file and prepare table
 asv_tax <- read.table(params$qiime2_taxonomy, header = TRUE, sep = "\t")
diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf
index a8e082b01..1a288a0fb 100644
--- a/modules/local/summary_report.nf
+++ b/modules/local/summary_report.nf
@@ -118,7 +118,8 @@ process SUMMARY_REPORT  {
         kraken2_tax ? "kraken2_taxonomy='$kraken2_tax',kraken2_confidence='$params.kraken2_confidence'" : "",
         kraken2_tax && !params.kraken2_ref_tax_custom ? "kraken2_ref_tax_title='${params.kraken2_ref_databases[params.kraken2_ref_taxonomy]["title"]}',kraken2_ref_tax_file='${params.kraken2_ref_databases[params.kraken2_ref_taxonomy]["file"]}',kraken2_ref_tax_citation='${params.kraken2_ref_databases[params.kraken2_ref_taxonomy]["citation"]}'" : "",
         pplace_tax ? "pplace_taxonomy='$pplace_tax',pplace_heattree='$pplace_heattree'" : "",
-        qiime2_tax && params.qiime_ref_taxonomy ? "qiime2_taxonomy='$qiime2_tax',qiime2_ref_tax_title='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["title"]}',qiime2_ref_tax_file='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]}',qiime2_ref_tax_citation='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["citation"]}'" : qiime2_tax ? "qiime2_taxonomy='$qiime2_tax'" : "",
+        qiime2_tax ? "qiime2_taxonomy='$qiime2_tax'" : "",
+        qiime2_tax && params.qiime_ref_taxonomy ? "qiime2_ref_tax_title='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["title"]}',qiime2_ref_tax_file='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]}',qiime2_ref_tax_citation='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["citation"]}'" : "",
         run_qiime2 ? "val_used_taxonomy='$val_used_taxonomy'" : "",
         filter_stats_tsv ? "filter_stats_tsv='$filter_stats_tsv',qiime2_filtertaxa='$qiime2_filtertaxa',exclude_taxa='$params.exclude_taxa',min_frequency='$params.min_frequency',min_samples='$params.min_samples'" : "",
         barplot ? "barplot=TRUE" : "",
diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf
index 9e85bf6af..6dcc370f1 100644
--- a/workflows/ampliseq.nf
+++ b/workflows/ampliseq.nf
@@ -863,7 +863,7 @@ workflow AMPLISEQ {
             !params.skip_taxonomy && ( params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom ) ? KRAKEN2_TAXONOMY_WF.out.tax_tsv.ifEmpty( [] ) : [],
             !params.skip_taxonomy && params.pplace_tree ? ch_pplace_tax.ifEmpty( [] ) : [],
             !params.skip_taxonomy && params.pplace_tree ? FASTA_NEWICK_EPANG_GAPPA.out.heattree.ifEmpty( [[],[]] ) : [[],[]],
-            !params.skip_taxonomy && ( params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.classifier ) && run_qiime2 ? QIIME2_TAXONOMY.out.tsv.ifEmpty( [] ) : [],
+            !params.skip_taxonomy && ( params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.classifier ) && run_qiime2_taxonomy ? QIIME2_TAXONOMY.out.tsv.ifEmpty( [] ) : [],
             run_qiime2,
             run_qiime2 ? val_used_taxonomy : "",
             run_qiime2 && ( params.exclude_taxa != "none" || params.min_frequency != 1 || params.min_samples != 1 ) ? ch_dada2_asv.countLines()+","+QIIME2_FILTERTAXA.out.tsv.countLines() : "",