Skip to content

Commit

Permalink
add sidle reference taxonomy entries & custom input
Browse files Browse the repository at this point in the history
  • Loading branch information
d4straub committed Feb 9, 2024
1 parent aa1ff5a commit afbb18f
Show file tree
Hide file tree
Showing 7 changed files with 166 additions and 37 deletions.
22 changes: 22 additions & 0 deletions bin/taxref_reformat_sidle.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/sh

# Untar any tar file in the working directory
tar xzf database.tar.gz

# Greengenes 13_8
if [ -d "gg_13_8_otus" ]; then
mv gg_13_8_otus/rep_set/99_otus.fasta gg_13_8_otus_rep_set_99_otus.seq.fasta
mv gg_13_8_otus/rep_set_aligned/99_otus.fasta gg_13_8_otus_rep_set_aligned_99_otus.alnseq.fasta
mv gg_13_8_otus/taxonomy/99_otu_taxonomy.txt gg_13_8_otus_taxonomy_99_otu_taxonomy.tax.txt
elif [ -d "gg_13_8_otus" ]; then
mv SILVA_128_QIIME_release/rep_set/rep_set_all/99/99_otus.fasta SILVA_128_QIIME_release_rep_set_all_99_otus.seq.fasta
gunzip -c /SILVA_128_QIIME_release/rep_set_aligned/99/99_otus_aligned.fasta.gz > SILVA_128_QIIME_release_rep_set_aligned_99_otus_aligned.alnseq.fasta
mv SILVA_128_QIIME_release/taxonomy/taxonomy_all/99/consensus_taxonomy_7_levels SILVA_128_QIIME_release_taxonomy_all_99_consensus_taxonomy_7_levels.tax.txt
else
echo "Didnt detect any expected directory"
fi





37 changes: 37 additions & 0 deletions conf/ref_databases.config
Original file line number Diff line number Diff line change
Expand Up @@ -488,4 +488,41 @@ params {
taxlevels = "D,P,C,O,F,G,S"
}
}
// Sidle reference databases
sidle_ref_databases {
'silva' {
title = "SILVA - Version 128"
file = [ "https://www.arb-silva.de/fileadmin/silva_databases/qiime/Silva_128_release.tgz" ]
tree_qza = [ "https://data.qiime2.org/2021.4/common/sepp-refs-silva-128.qza" ]
citation = "https://www.arb-silva.de/; Bokulich, N.A., Robeson, M., Dillon, M.R. bokulich-lab/RESCRIPt. Zenodo. http://doi.org/10.5281/zenodo.3891931"
license = "https://www.arb-silva.de/silva-license-information/"
fmtscript = "taxref_reformat_sidle.sh"
taxlevels = "D,P,C,O,F,G"
}
'silva=128' {
title = "SILVA - Version 128"
file = [ "https://www.arb-silva.de/fileadmin/silva_databases/qiime/Silva_128_release.tgz" ]
tree_qza = [ "https://data.qiime2.org/2021.4/common/sepp-refs-silva-128.qza" ]
citation = "https://www.arb-silva.de/; Bokulich, N.A., Robeson, M., Dillon, M.R. bokulich-lab/RESCRIPt. Zenodo. http://doi.org/10.5281/zenodo.3891931"
license = "https://www.arb-silva.de/silva-license-information/"
fmtscript = "taxref_reformat_sidle.sh"
taxlevels = "D,P,C,O,F,G"
}
'greengenes' {
title = "Greengenes - Version 13_8"
file = [ "ftp://greengenes.microbio.me/greengenes_release/gg_13_5/gg_13_8_otus.tar.gz" ]
tree_qza = [ "https://data.qiime2.org/2021.4/common/sepp-refs-gg-13-8.qza" ]
citation = "McDonald, D., Price, M., Goodrich, J. et al. An improved Greengenes taxonomy with explicit ranks for ecological and evolutionary analyses of bacteria and archaea. ISME J 6, 610–618 (2012). https://doi.org/10.1038/ismej.2011.139"
fmtscript = "taxref_reformat_sidle.sh"
taxlevels = "D,P,C,O,F,G,S"
}
'greengenes=13_8' {
title = "Greengenes - Version 13_8"
file = [ "ftp://greengenes.microbio.me/greengenes_release/gg_13_5/gg_13_8_otus.tar.gz" ]
tree_qza = [ "https://data.qiime2.org/2021.4/common/sepp-refs-gg-13-8.qza" ]
citation = "McDonald, D., Price, M., Goodrich, J. et al. An improved Greengenes taxonomy with explicit ranks for ecological and evolutionary analyses of bacteria and archaea. ISME J 6, 610–618 (2012). https://doi.org/10.1038/ismej.2011.139"
fmtscript = "taxref_reformat_sidle.sh"
taxlevels = "D,P,C,O,F,G,S"
}
}
}
38 changes: 38 additions & 0 deletions modules/local/format_taxonomy_sidle.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
process FORMAT_TAXONOMY_SIDLE {
label 'process_low'

conda "conda-forge::sed=4.7"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' :
'docker.io/biocontainers/biocontainers:v1.2.0_cv1' }"

input:
path('database.tar.gz')
val(suffix)

output:
path( "*.seq.fasta" ) , emit: seq
path( "*.alnseq.fasta") , emit: alnseq
path( "*.tax.txt") , emit: tax
path( "ref_taxonomy.*.txt") , emit: ref_tax_info
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
"""
${params.sidle_ref_databases[params.sidle_ref_taxonomy]["fmtscript"]}
#Giving out information
echo -e "--sidle_ref_taxonomy: ${params.sidle_ref_taxonomy}\n" >ref_taxonomy.${suffix}.txt
echo -e "Title: ${params.sidle_ref_databases[params.sidle_ref_taxonomy]["title"]}\n" >>ref_taxonomy.${suffix}.txt
echo -e "Citation: ${params.sidle_ref_databases[params.sidle_ref_taxonomy]["citation"]}\n" >>ref_taxonomy.${suffix}.txt
echo "All entries: ${params.sidle_ref_databases[params.sidle_ref_taxonomy]}" >>ref_taxonomy.${suffix}.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bash: \$(bash --version | sed -n 1p | sed 's/GNU bash, version //g')
END_VERSIONS
"""
}
7 changes: 3 additions & 4 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -118,10 +118,9 @@ params {
kraken2_assign_taxlevels = null
kraken2_ref_tax_custom = null
kraken2_confidence = 0.0
sidle_ref_sequences = null
sidle_ref_alignedseq = null
sidle_ref_taxonomy = null
sidle_ref_tree = null
sidle_ref_tax_custom = null
sidle_ref_tree_custom = null

// MultiQC options
multiqc_config = null
Expand Down Expand Up @@ -159,7 +158,7 @@ params {
// Schema validation default options
validationFailUnrecognisedParams = false
validationLenientMode = false
validationSchemaIgnoreParams = 'dada_ref_databases,qiime_ref_databases,sintax_ref_databases,kraken2_ref_databases,genomes,igenomes_base'
validationSchemaIgnoreParams = 'dada_ref_databases,qiime_ref_databases,sintax_ref_databases,kraken2_ref_databases,sidle_ref_databases,genomes,igenomes_base'
validationShowHiddenParams = false
validate_params = true

Expand Down
25 changes: 13 additions & 12 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -504,24 +504,25 @@
"minimum": 0,
"maximum": 1
},
"sidle_ref_sequences": {
"type": "string",
"help_text": "",
"description": "Path to SIDLE reference taxonomy sequences (*.fasta)"
},
"sidle_ref_alignedseq": {
"sidle_ref_taxonomy": {
"type": "string",
"help_text": "",
"description": "Path to SIDLE aligned reference taxonomy sequences (*.fasta)"
"description": "Name of supported database, and optionally also version number",
"enum": [
"silva",
"silva=128",
"greengenes",
"greengenes=13_8"
]
},
"sidle_ref_taxonomy": {
"sidle_ref_tax_custom": {
"type": "string",
"help_text": "",
"description": "Path to SIDLE reference taxonomy strings (*.txt)"
"help_text": "Consider also setting `--sidle_ref_tree_custom`. Example usage: `--sidle_ref_tax_custom 'rep_set_99.fasta,rep_set_aligned_99.fasta,taxonomy_99_taxonomy.txt'`",
"description": "Comma separated paths to three files: reference taxonomy sequences (*.fasta), reference taxonomy strings (*.txt)"
},
"sidle_ref_tree": {
"sidle_ref_tree_custom": {
"type": "string",
"help_text": "",
"help_text": "Overwrites tree chosen by `--sidle_ref_taxonomy`",
"description": "Path to SIDLE reference taxonomy tree (*.qza)"
},
"sintax_ref_taxonomy": {
Expand Down
45 changes: 28 additions & 17 deletions subworkflows/local/sidle_wf.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,33 +2,44 @@
* Training of a classifier with QIIME2
*/

include { SIDLE_INDB } from '../../modules/local/sidle_indb'
include { SIDLE_INDBALIGNED } from '../../modules/local/sidle_indbaligned'
include { SIDLE_DBFILT } from '../../modules/local/sidle_dbfilt'
include { SIDLE_IN } from '../../modules/local/sidle_in'
include { SIDLE_TRIM } from '../../modules/local/sidle_trim'
include { SIDLE_DBEXTRACT } from '../../modules/local/sidle_dbextract'
include { SIDLE_ALIGN } from '../../modules/local/sidle_align'
include { SIDLE_DBRECON } from '../../modules/local/sidle_dbrecon'
include { SIDLE_TABLERECON } from '../../modules/local/sidle_tablerecon'
include { SIDLE_TAXRECON } from '../../modules/local/sidle_taxrecon'
include { SIDLE_FILTTAX } from '../../modules/local/sidle_filttax'
include { SIDLE_SEQRECON } from '../../modules/local/sidle_seqrecon'
include { SIDLE_TREERECON } from '../../modules/local/sidle_treerecon'

include { FORMAT_TAXONOMY_SIDLE } from '../../modules/local/format_taxonomy_sidle'
include { SIDLE_INDB } from '../../modules/local/sidle_indb'
include { SIDLE_INDBALIGNED } from '../../modules/local/sidle_indbaligned'
include { SIDLE_DBFILT } from '../../modules/local/sidle_dbfilt'
include { SIDLE_IN } from '../../modules/local/sidle_in'
include { SIDLE_TRIM } from '../../modules/local/sidle_trim'
include { SIDLE_DBEXTRACT } from '../../modules/local/sidle_dbextract'
include { SIDLE_ALIGN } from '../../modules/local/sidle_align'
include { SIDLE_DBRECON } from '../../modules/local/sidle_dbrecon'
include { SIDLE_TABLERECON } from '../../modules/local/sidle_tablerecon'
include { SIDLE_TAXRECON } from '../../modules/local/sidle_taxrecon'
include { SIDLE_FILTTAX } from '../../modules/local/sidle_filttax'
include { SIDLE_SEQRECON } from '../../modules/local/sidle_seqrecon'
include { SIDLE_TREERECON } from '../../modules/local/sidle_treerecon'

workflow SIDLE_WF {
take:
ch_asv_tables_sequences
ch_db_sequences
ch_db_alignedsequences
ch_db_taxonomy
ch_sidle_ref_taxonomy
val_sidle_ref_taxonomy
ch_db_tree

main:
ch_sidle_versions = Channel.empty()

// DB
if (!params.sidle_ref_tax_custom) {
//standard ref taxonomy input from conf/ref_databases.config, one tar.gz / tgz with all files
FORMAT_TAXONOMY_SIDLE ( ch_sidle_ref_taxonomy, val_sidle_ref_taxonomy )
ch_db_sequences = FORMAT_TAXONOMY_SIDLE.out.seqs
ch_db_alignedsequences = FORMAT_TAXONOMY_SIDLE.out.alnseq
ch_db_taxonomy = FORMAT_TAXONOMY_SIDLE.out.tax
} else {
//input from params.sidle_ref_tax_custom: it[0] = fasta = ch_db_sequences, it[1] = aligned fasta = ch_db_alignedsequences, it[2] = taxonomy txt = ch_db_taxonomy
ch_db_sequences = ch_sidle_ref_taxonomy.map{ it[0] }
ch_db_alignedsequences = ch_sidle_ref_taxonomy.map{ it[1] }
ch_db_taxonomy = ch_sidle_ref_taxonomy.map{ it[2] }
}
SIDLE_INDB ( ch_db_sequences, ch_db_taxonomy )
ch_sidle_versions = ch_sidle_versions.mix(SIDLE_INDB.out.versions)
SIDLE_INDBALIGNED ( ch_db_alignedsequences )
Expand Down
29 changes: 25 additions & 4 deletions workflows/ampliseq.nf
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,28 @@ if (params.classifier) {
ch_qiime_classifier = Channel.fromPath("${params.classifier}", checkIfExists: true)
} else { ch_qiime_classifier = Channel.empty() }

if (params.sidle_ref_tax_custom) {
if ("${params.sidle_ref_tax_custom}".contains(",")) {
sidle_ref_paths = "${params.sidle_ref_tax_custom}".split(",")
if (sidle_ref_paths.length != 3) {
error "--sidle_ref_tax_custom exately three filepaths separated by a comma (fasta, aligned fasta, taxonomy). Please review input."
}
ch_sidle_ref_taxonomy = Channel.fromPath( Arrays.asList(sidle_ref_paths), checkIfExists: true )
} else {
error "--sidle_ref_tax_custom accepts exately three filepaths separated by a comma. Please review input."
}
val_sidle_ref_taxonomy = "user"
ch_sidle_ref_taxonomy_tree = params.sidle_ref_tree_custom ? Channel.fromPath("${params.sidle_ref_tree_custom}", checkIfExists: true) : Channel.empty()
} else if (params.sidle_ref_taxonomy) {
ch_sidle_ref_taxonomy = Channel.fromList( params.sidle_ref_databases[params.sidle_ref_taxonomy]["file"] ).map { file(it) }
ch_sidle_ref_taxonomy_tree = params.sidle_ref_tree_custom ? Channel.fromPath("${params.sidle_ref_tree_custom}", checkIfExists: true) : Channel.fromList( params.sidle_ref_databases[params.sidle_ref_taxonomy]["tree_qza"] ).map { file(it) }
val_sidle_ref_taxonomy = params.sidle_ref_taxonomy.replace('=','_').replace('.','_')
} else {
ch_sidle_ref_taxonomy = Channel.empty()
ch_sidle_ref_taxonomy_tree = Channel.empty()
val_sidle_ref_taxonomy = "none"
}

if (params.dada_ref_tax_custom) {
//custom ref taxonomy input from params.dada_ref_tax_custom & params.dada_ref_tax_custom_sp
ch_assigntax = Channel.fromPath("${params.dada_ref_tax_custom}", checkIfExists: true)
Expand Down Expand Up @@ -439,10 +461,9 @@ workflow AMPLISEQ {
// run q2-sidle
SIDLE_WF (
DADA2_SPLITREGIONS.out.for_sidle,
file( params.sidle_ref_sequences, checkIfExists: true ), //TODO: ch_sidle_ref_sequences // "gg_13_8_otus_rep_set_99_otus.fasta"
file( params.sidle_ref_alignedseq, checkIfExists: true ), //TODO: ch_sidle_ref_alignedseq // "gg_13_8_otus_taxonomy_99_otu_taxonomy.txt"
file( params.sidle_ref_taxonomy, checkIfExists: true ), //TODO: ch_sidle_ref_taxonomy // "gg_13_8_otus_taxonomy_99_otu_taxonomy.txt"
file( params.sidle_ref_tree, checkIfExists: true ) //TODO: ch_sidle_ref_tree // https://data.qiime2.org/2021.4/common/sepp-refs-gg-13-8.qza
ch_sidle_ref_taxonomy.collect(),
val_sidle_ref_taxonomy,
ch_sidle_ref_taxonomy_tree
)
ch_versions = ch_versions.mix(SIDLE_WF.out.versions)
}
Expand Down

0 comments on commit afbb18f

Please sign in to comment.