From f1a86851942218bd72c33c0358b784c1e8207f4f Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Mon, 11 Dec 2023 16:37:09 +0100 Subject: [PATCH 1/9] Move midori to get dbs in alphabetical order --- conf/ref_databases.config | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/conf/ref_databases.config b/conf/ref_databases.config index c80820ec..4ac7d02c 100644 --- a/conf/ref_databases.config +++ b/conf/ref_databases.config @@ -25,22 +25,6 @@ params { fmtscript = "taxref_reformat_coidb.sh" dbversion = "COIDB 221216 (https://doi.org/10.17044/scilifelab.20514192.v2)" } - 'midori2-co1' { - title = "MIDORI2 - CO1 Taxonomy Database - Release GB250" - file = [ "http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz" ] - citation = "Machida RJ, Leray M, Ho SL, Knowlton N. Metazoan mitochondrial gene sequence reference datasets for taxonomic assignment of environmental samples. Sci Data. 2017 Mar 14;4:170027. doi: 10.1038/sdata.2017.27. PMID: 28291235; PMCID: PMC5349245." - fmtscript = "taxref_reformat_midori2.sh" - dbversion = "MIDORI2-CO1 GB250 (http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz)" - taxlevels = "Phylum,Class,Order,Family,Genus,Species" - } - 'midori2-co1=gb250' { - title = "MIDORI2 - CO1 Taxonomy Database - Release GB250" - file = [ "http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz" ] - citation = "Machida RJ, Leray M, Ho SL, Knowlton N. Metazoan mitochondrial gene sequence reference datasets for taxonomic assignment of environmental samples. Sci Data. 2017 Mar 14;4:170027. doi: 10.1038/sdata.2017.27. PMID: 28291235; PMCID: PMC5349245." - fmtscript = "taxref_reformat_midori2.sh" - dbversion = "MIDORI2-CO1 GB250 (http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz)" - taxlevels = "Phylum,Class,Order,Family,Genus,Species" - } 'gtdb' { title = "GTDB - Genome Taxonomy Database - Release R08-RS214.1" file = [ "https://data.ace.uq.edu.au/public/gtdb/data/releases/release214/214.1/genomic_files_reps/bac120_ssu_reps_r214.tar.gz", "https://data.ace.uq.edu.au/public/gtdb/data/releases/release214/214.1/genomic_files_reps/ar53_ssu_reps_r214.tar.gz" ] @@ -76,6 +60,22 @@ params { fmtscript = "taxref_reformat_gtdb.sh" dbversion = "GTDB R05-RS95 (https://data.ace.uq.edu.au/public/gtdb/data/releases/release95/95.0/)" } + 'midori2-co1' { + title = "MIDORI2 - CO1 Taxonomy Database - Release GB250" + file = [ "http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz" ] + citation = "Machida RJ, Leray M, Ho SL, Knowlton N. Metazoan mitochondrial gene sequence reference datasets for taxonomic assignment of environmental samples. Sci Data. 2017 Mar 14;4:170027. doi: 10.1038/sdata.2017.27. PMID: 28291235; PMCID: PMC5349245." + fmtscript = "taxref_reformat_midori2.sh" + dbversion = "MIDORI2-CO1 GB250 (http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz)" + taxlevels = "Phylum,Class,Order,Family,Genus,Species" + } + 'midori2-co1=gb250' { + title = "MIDORI2 - CO1 Taxonomy Database - Release GB250" + file = [ "http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz" ] + citation = "Machida RJ, Leray M, Ho SL, Knowlton N. Metazoan mitochondrial gene sequence reference datasets for taxonomic assignment of environmental samples. Sci Data. 2017 Mar 14;4:170027. doi: 10.1038/sdata.2017.27. PMID: 28291235; PMCID: PMC5349245." + fmtscript = "taxref_reformat_midori2.sh" + dbversion = "MIDORI2-CO1 GB250 (http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz)" + taxlevels = "Phylum,Class,Order,Family,Genus,Species" + } 'pr2' { title = "PR2 - Protist Reference Ribosomal Database - Version 5.0.0" file = [ "https://github.com/pr2database/pr2database/releases/download/v5.0.0/pr2_version_5.0.0_SSU_dada2.fasta.gz", "https://github.com/pr2database/pr2database/releases/download/v5.0.0/pr2_version_5.0.0_SSU_UTAX.fasta.gz" ] From 54debbf2d49da6900c7a8e2302520c6007a40670 Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Mon, 11 Dec 2023 17:13:56 +0100 Subject: [PATCH 2/9] Sort dbs in alphabetical order in schema --- nextflow_schema.json | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 1d3098da..27e524c3 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -335,36 +335,36 @@ "description": "Name of supported database, and optionally also version number", "default": "silva=138", "enum": [ - "midori2-co1=gb250", - "midori2-co1", + "coidb", + "coidb=221216", + "gtdb", "gtdb=R05-RS95", "gtdb=R06-RS202", "gtdb=R07-RS207", "gtdb=R08-RS214", - "gtdb", - "coidb", - "coidb=221216", - "pr2=5.0.0", - "pr2=4.14.0", - "pr2=4.13.0", + "midori2-co1", + "midori2-co1=gb250", "pr2", - "rdp=18", + "pr2=4.13.0", + "pr2=4.14.0", + "pr2=5.0.0", "rdp", + "rdp=18", "sbdi-gtdb", - "sbdi-gtdb=R07-RS207-1", - "sbdi-gtdb=R06-RS202-3", "sbdi-gtdb=R06-RS202-1", + "sbdi-gtdb=R06-RS202-3", + "sbdi-gtdb=R07-RS207-1", + "silva", "silva=132", "silva=138", - "silva", - "unite-fungi=9.0", - "unite-fungi=8.3", - "unite-fungi=8.2", - "unite-fungi", - "unite-alleuk=9.0", - "unite-alleuk=8.3", + "unite-alleuk", "unite-alleuk=8.2", - "unite-alleuk" + "unite-alleuk=8.3", + "unite-alleuk=9.0", + "unite-fungi", + "unite-fungi=8.2", + "unite-fungi=8.3", + "unite-fungi=9.0" ] }, "dada_ref_tax_custom": { From fac3d731df0efc94d64323e6f7d171d1d60cd4f4 Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Mon, 11 Dec 2023 17:22:51 +0100 Subject: [PATCH 3/9] Spelling mistake --- docs/output.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/output.md b/docs/output.md index 62433cea..c2b93537 100644 --- a/docs/output.md +++ b/docs/output.md @@ -140,7 +140,7 @@ DADA2 reduces sequence errors and dereplicates sequences by quality filtering, d - `ASV_table.tsv`: Counts for each ASV sequence. - `DADA2_stats.tsv`: Tracking read numbers through DADA2 processing steps, for each sample. - `DADA2_table.rds`: DADA2 ASV table as R object. - - `DADA2_tables.tsv`: DADA2 ASV table. + - `DADA2_table.tsv`: DADA2 ASV table. - `dada2/args/`: Directory containing files with all parameters for DADA2 steps. - `dada2/log/`: Directory containing log files for DADA2 steps. - `dada2/QC/` From f61d94547539f20c6ac26fb6bbba70022ea2fd32 Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Mon, 11 Dec 2023 17:23:08 +0100 Subject: [PATCH 4/9] Add PhytoRef taxonomy database for chloroplast 16S --- bin/taxref_reformat_phytoref.sh | 7 +++++++ conf/ref_databases.config | 8 ++++++++ nextflow_schema.json | 1 + 3 files changed, 16 insertions(+) create mode 100755 bin/taxref_reformat_phytoref.sh diff --git a/bin/taxref_reformat_phytoref.sh b/bin/taxref_reformat_phytoref.sh new file mode 100755 index 00000000..c61c081e --- /dev/null +++ b/bin/taxref_reformat_phytoref.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +# Write the assignTaxonomy() fasta file: assignTaxonomy.fna +cat PhytoRef_with_taxonomy.fasta | sed '/>/s/>[^|]*|/>/' | sed '/>/s/|/;/g' > assignTaxonomy.fna + +# Write the addSpecies() fasta file: addSpecies.fna +cat PhytoRef_with_taxonomy.fasta | sed '/^>/s/>\([^|]\+\)|.*|\([^|]\+\)/>\1 \2/' > addSpecies.fna diff --git a/conf/ref_databases.config b/conf/ref_databases.config index 4ac7d02c..65e0b9b1 100644 --- a/conf/ref_databases.config +++ b/conf/ref_databases.config @@ -76,6 +76,14 @@ params { dbversion = "MIDORI2-CO1 GB250 (http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz)" taxlevels = "Phylum,Class,Order,Family,Genus,Species" } + 'phytoref' { + title = "PhytoRef plastid 16S rRNA database for photosynthetic eukaryotes" + file = [ "http://phytoref.sb-roscoff.fr/static/downloads/PhytoRef_with_taxonomy.fasta" ] + citation = "Decelle, Johan, Sarah Romac, Rowena F. Stern, El Mahdi Bendif, Adriana Zingone, Stéphane Audic, Michael D. Guiry, et al. 2015. PhytoREF: A Reference Database of the Plastidial 16S rRNA Gene of Photosynthetic Eukaryotes with Curated Taxonomy. Molecular Ecology Resources 15 (6): 1435–45. https://doi.org/10.1111/1755-0998.12401." + fmtscript = "taxref_reformat_phytoref.sh" + dbversion = "unknown" + taxlevels = "Domain,Supergroup,Subphylum,Class,Subclass,Order,Suborder,Family,Genus,Species" + } 'pr2' { title = "PR2 - Protist Reference Ribosomal Database - Version 5.0.0" file = [ "https://github.com/pr2database/pr2database/releases/download/v5.0.0/pr2_version_5.0.0_SSU_dada2.fasta.gz", "https://github.com/pr2database/pr2database/releases/download/v5.0.0/pr2_version_5.0.0_SSU_UTAX.fasta.gz" ] diff --git a/nextflow_schema.json b/nextflow_schema.json index 27e524c3..9494e8f4 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -344,6 +344,7 @@ "gtdb=R08-RS214", "midori2-co1", "midori2-co1=gb250", + "phytoref", "pr2", "pr2=4.13.0", "pr2=4.14.0", From 58b0f7222e6409fdc1af4b085018de303376a3db Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Mon, 11 Dec 2023 17:26:50 +0100 Subject: [PATCH 5/9] Update CHANGELOG.md --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b0879526..3f31da02 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +- [#674](https://github.com/nf-core/ampliseq/pull/674) - Add PhytoRef database for DADA2 taxonomy assignment using `--dada_ref_taxonomy phytoref` + ### `Changed` ### `Fixed` From 836bb451d02842b3b6f435161e61707223e983af Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Mon, 11 Dec 2023 19:19:24 +0100 Subject: [PATCH 6/9] Add Zehr lab nifH database --- bin/taxref_reformat_zehr-nifh.sh | 7 +++++++ conf/ref_databases.config | 16 ++++++++++++++++ nextflow_schema.json | 4 +++- 3 files changed, 26 insertions(+), 1 deletion(-) create mode 100755 bin/taxref_reformat_zehr-nifh.sh diff --git a/bin/taxref_reformat_zehr-nifh.sh b/bin/taxref_reformat_zehr-nifh.sh new file mode 100755 index 00000000..86a8eb26 --- /dev/null +++ b/bin/taxref_reformat_zehr-nifh.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +# Write the assignTaxonomy() fasta file: assignTaxonomy.fna +cp *.fasta assignTaxonomy.fna + +# Write the addSpecies() fasta file: addSpecies.fna +cut -d, -f 2,6,7 *.csv | grep -v '^sequence,' | sed 's/\(.*\),\(.*\),\(.*\)/>\3 \2\n\1/' > addSpecies.fna diff --git a/conf/ref_databases.config b/conf/ref_databases.config index 65e0b9b1..6d0d7d3b 100644 --- a/conf/ref_databases.config +++ b/conf/ref_databases.config @@ -247,6 +247,22 @@ params { dbversion = "UNITE-alleuk v8.2 (https://doi.org/10.15156/BIO/786370)" shfile = [ "https://scilifelab.figshare.com/ndownloader/files/34994569", "https://scilifelab.figshare.com/ndownloader/files/34994572"] } + 'zehr-nifh' { + title = "Zehr lab nifH database - version 2.5.0" + file = [ "https://raw.githubusercontent.com/moyn413/nifHdada2/master/nifH_dada2_v2.0.5.fasta", "https://raw.githubusercontent.com/moyn413/nifHdada2/master/nifH_dada2_phylum_v2.0.5.csv" ] + citation = "M. A. Moynihan & C. Furbo Reeder 2023. nifHdada2 GitHub repository, v2.0.5. Zenodo. http://doi.org/10.5281/zenodo.7996213" + fmtscript = "taxref_reformat_zehr-nifh.sh" + dbversion = "Zehr-nifH v. 2.5.0" + taxlevels = "Domain,Phylum,Class,Order,Family,Genus" + } + 'zehr-nifh=2.5.0' { + title = "Zehr lab nifH database - version 2.5.0" + file = [ "https://raw.githubusercontent.com/moyn413/nifHdada2/master/nifH_dada2_v2.0.5.fasta", "https://raw.githubusercontent.com/moyn413/nifHdada2/master/nifH_dada2_phylum_v2.0.5.csv" ] + citation = "M. A. Moynihan & C. Furbo Reeder 2023. nifHdada2 GitHub repository, v2.0.5. Zenodo. http://doi.org/10.5281/zenodo.7996213" + fmtscript = "taxref_reformat_zehr-nifh.sh" + dbversion = "Zehr-nifH v. 2.5.0" + taxlevels = "Domain,Phylum,Class,Order,Family,Genus" + } } //QIIME2 taxonomic reference databases qiime_ref_databases { diff --git a/nextflow_schema.json b/nextflow_schema.json index 9494e8f4..d2e5faa9 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -365,7 +365,9 @@ "unite-fungi", "unite-fungi=8.2", "unite-fungi=8.3", - "unite-fungi=9.0" + "unite-fungi=9.0", + "zehr-nifh", + "zehr-nifh=2.5.0" ] }, "dada_ref_tax_custom": { From e4a09ce0d243a1016e18986d6ac3d094ad3c2235 Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Mon, 11 Dec 2023 19:23:52 +0100 Subject: [PATCH 7/9] CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f31da02..5a1a239f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` - [#674](https://github.com/nf-core/ampliseq/pull/674) - Add PhytoRef database for DADA2 taxonomy assignment using `--dada_ref_taxonomy phytoref` +- [#675](https://github.com/nf-core/ampliseq/pull/675) - Add the Zehr lab nifH database for DADA2 taxonomy assignment using `--dada_ref_taxonomy zehr-nifh` ### `Changed` From 02a25293d77cc501d01ed87de01013909037cea1 Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Mon, 11 Dec 2023 19:39:53 +0100 Subject: [PATCH 8/9] Fix formatting mistake --- bin/taxref_reformat_zehr-nifh.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/taxref_reformat_zehr-nifh.sh b/bin/taxref_reformat_zehr-nifh.sh index 86a8eb26..54171f51 100755 --- a/bin/taxref_reformat_zehr-nifh.sh +++ b/bin/taxref_reformat_zehr-nifh.sh @@ -4,4 +4,4 @@ cp *.fasta assignTaxonomy.fna # Write the addSpecies() fasta file: addSpecies.fna -cut -d, -f 2,6,7 *.csv | grep -v '^sequence,' | sed 's/\(.*\),\(.*\),\(.*\)/>\3 \2\n\1/' > addSpecies.fna +cut -d, -f 2,6,7 *.csv | grep -v '^sequence,' | sed 's/\(.*\),[0-9]* \(.*\),\(.*\)/>\3 \2\n\1/' > addSpecies.fna From 09ae4b7f9e0032043358cccc35c22e4b15845eee Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Tue, 12 Dec 2023 10:27:38 +0100 Subject: [PATCH 9/9] Change unknown to unnversioned for PhytoRef --- conf/ref_databases.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/ref_databases.config b/conf/ref_databases.config index 6d0d7d3b..263f4390 100644 --- a/conf/ref_databases.config +++ b/conf/ref_databases.config @@ -81,7 +81,7 @@ params { file = [ "http://phytoref.sb-roscoff.fr/static/downloads/PhytoRef_with_taxonomy.fasta" ] citation = "Decelle, Johan, Sarah Romac, Rowena F. Stern, El Mahdi Bendif, Adriana Zingone, Stéphane Audic, Michael D. Guiry, et al. 2015. PhytoREF: A Reference Database of the Plastidial 16S rRNA Gene of Photosynthetic Eukaryotes with Curated Taxonomy. Molecular Ecology Resources 15 (6): 1435–45. https://doi.org/10.1111/1755-0998.12401." fmtscript = "taxref_reformat_phytoref.sh" - dbversion = "unknown" + dbversion = "unversioned" taxlevels = "Domain,Supergroup,Subphylum,Class,Subclass,Order,Suborder,Family,Genus,Species" } 'pr2' {