diff --git a/conf/modules.config b/conf/modules.config index 0261fd3f..a0bda596 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -273,12 +273,146 @@ process { withName: DADA2_SPLITREGIONS { publishDir = [ - path: { "${params.outdir}/dada2/per_region" }, + path: { "${params.outdir}/sidle/per_region" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + withName: SIDLE_DBFILT { + ext.args = '--p-num-degenerates 3' // 3 for greengenes, 5 for SILVA 128 + ext.args2 = '--p-exclude "p__;,k__;" --p-mode contains' // "p__;,k__;" for greengenes + publishDir = [ + path: { "${params.outdir}/sidle/DB/1_prefiltering" }, + mode: params.publish_dir_mode, + pattern: "*.qza", + enabled: params.save_intermediates + ] + } + + withName: SIDLE_DBEXTRACT { + ext.args = '--p-identity 2' + publishDir = [ + path: { "${params.outdir}/sidle/DB/2_primer_extraction" }, + mode: params.publish_dir_mode, + pattern: "*.qza", + enabled: params.save_intermediates + ] + } + + withName: SIDLE_TRIM { + publishDir = [ + path: { "${params.outdir}/sidle/ASV/1_trim" }, + mode: params.publish_dir_mode, + pattern: "*.qza", + enabled: params.save_intermediates + ] + } + + withName: SIDLE_ALIGN { + ext.args = '' + publishDir = [ + path: { "${params.outdir}/sidle/ASV/2_align_db" }, + mode: params.publish_dir_mode, + pattern: "*.qza", + enabled: params.save_intermediates + ] + } + + withName: SIDLE_DBRECON { + ext.args = '' + publishDir = [ + [ + path: { "${params.outdir}/sidle/DB/3_reconstructed" }, + mode: params.publish_dir_mode, + pattern: "*.qza", + enabled: params.save_intermediates + ], + [ + path: { "${params.outdir}/sidle/DB/3_reconstructed" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.endsWith('.qza') || filename.equals('versions.yml') ? null : filename } + ] + ] + } + + withName: SIDLE_TABLERECON { + ext.args = "--p-min-counts 0" + publishDir = [ + [ + path: { "${params.outdir}/sidle/reconstructed/qza" }, + mode: params.publish_dir_mode, + pattern: "*.qza", + enabled: params.save_intermediates + ], + [ + path: { "${params.outdir}/sidle/reconstructed" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.endsWith('.qza') || filename.equals('versions.yml') ? null : filename } + ] + ] + } + + withName: SIDLE_TAXRECON { + ext.args = '--p-database "greengenes" --p-define-missing "inherit"' + publishDir = [ + [ + path: { "${params.outdir}/sidle/reconstructed/qza" }, + mode: params.publish_dir_mode, + pattern: "*.qza", + enabled: params.save_intermediates + ], + [ + path: { "${params.outdir}/sidle/reconstructed" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.endsWith('.qza') || filename.equals('versions.yml') ? null : filename }, + enabled: params.save_intermediates + ] + ] + } + + withName: SIDLE_FILTTAX { + publishDir = [ + path: { "${params.outdir}/sidle/reconstructed" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: SIDLE_SEQRECON { + publishDir = [ + [ + path: { "${params.outdir}/sidle/reconstructed/qza" }, + mode: params.publish_dir_mode, + pattern: "*.qza", + enabled: params.save_intermediates + ], + [ + path: { "${params.outdir}/sidle/reconstructed" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.endsWith('.qza') || filename.equals('versions.yml') ? null : filename }, + enabled: params.save_intermediates + ] + ] + } + + withName: SIDLE_TREERECON { + publishDir = [ + [ + path: { "${params.outdir}/sidle/reconstructed/qza" }, + mode: params.publish_dir_mode, + pattern: "*.qza", + enabled: params.save_intermediates + ], + [ + path: { "${params.outdir}/sidle/reconstructed" }, + mode: params.publish_dir_mode, + pattern: "*.nwk" + ] + ] + } + + withName: BARRNAP { ext.kingdom = "bac,arc,mito,euk" ext.args = "--quiet --reject 0.1" diff --git a/modules/local/dada2_splitregions.nf b/modules/local/dada2_splitregions.nf index 7b205326..d3107bcd 100644 --- a/modules/local/dada2_splitregions.nf +++ b/modules/local/dada2_splitregions.nf @@ -11,10 +11,11 @@ process DADA2_SPLITREGIONS { path(table) output: - tuple val(meta), path( "DADA2_table_*.tsv" ), emit: dada2asv - tuple val(meta), path( "ASV_table_*.tsv" ) , emit: asv - tuple val(meta), path( "ASV_seqs_*.fasta" ) , emit: fasta - path "versions.yml" , emit: versions + tuple val(meta), path( "DADA2_table_*.tsv" ) , emit: dada2asv + //tuple val(meta), path( "ASV_table_*.tsv" ) , emit: asv + //tuple val(meta), path( "ASV_seqs_*.fasta" ) , emit: fasta + tuple val(meta), path( "ASV_table_*.tsv" ), path( "ASV_seqs_*.fasta" ), emit: for_sidle + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/local/sidle_align.nf b/modules/local/sidle_align.nf new file mode 100644 index 00000000..c98f955e --- /dev/null +++ b/modules/local/sidle_align.nf @@ -0,0 +1,43 @@ +process SIDLE_ALIGN { + tag "$meta.region" + label 'process_medium' + + container 'docker.io/d4straub/pipesidle:0.1.0-beta' + + input: + tuple val(meta), path(kmers), path(seq) + + output: + tuple val(meta), path("*rep-seqs_align-map.qza"), emit: aligned_map + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.region}" + def primerfw = "${meta.fw_primer}" + def primerrv = "${meta.rv_primer}" + """ + # https://q2-sidle.readthedocs.io/en/latest/reconstruction.html#regional-alignment + export XDG_CONFIG_HOME="./xdgconfig" + export MPLCONFIGDIR="./mplconfigdir" + export NUMBA_CACHE_DIR="./numbacache" + + qiime sidle align-regional-kmers \\ + --p-n-workers $task.cpus \\ + --i-kmers ${kmers} \\ + --i-rep-seq ${seq} \\ + --p-region ${meta.region} \\ + $args \\ + --o-regional-alignment ${prefix}_rep-seqs_align-map.qza + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + qiime2: \$( qiime --version | sed '1!d;s/.* //' ) + qiime2 plugin sidle: \$( qiime sidle --version | sed 's/ (.*//' | sed 's/.*version //' ) + q2-sidle: \$( qiime sidle --version | sed 's/.*version //' | sed 's/)//' ) + END_VERSIONS + """ +} diff --git a/modules/local/sidle_dbextract.nf b/modules/local/sidle_dbextract.nf new file mode 100644 index 00000000..c57597e3 --- /dev/null +++ b/modules/local/sidle_dbextract.nf @@ -0,0 +1,58 @@ + +process SIDLE_DBEXTRACT { + tag "$meta.region,$meta.region_length" + label 'process_medium' + + container 'docker.io/d4straub/pipesidle:0.1.0-beta' + + input: + tuple val(meta), path(table), path(seq), path(db_seq), path(db_tax) + + output: + tuple val(meta), path("db_*_kmers.qza"), emit: kmers + tuple val(meta), path("db_*_map.qza") , emit: map + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.region}" + def primerfw = "${meta.fw_primer}" + def primerrv = "${meta.rv_primer}" + def length = "${meta.region_length}" + """ + # https://q2-sidle.readthedocs.io/en/latest/database_preparation.html#prepare-a-regional-database-for-each-primer-set + export XDG_CONFIG_HOME="./xdgconfig" + export MPLCONFIGDIR="./mplconfigdir" + export NUMBA_CACHE_DIR="./numbacache" + + #extract sequences + qiime feature-classifier extract-reads \\ + --p-n-jobs $task.cpus \\ + --i-sequences $db_seq \\ + $args \\ + --p-f-primer $primerfw \\ + --p-r-primer $primerrv \\ + --o-reads db_${prefix}.qza + + #prepare to be used in alignment + qiime sidle prepare-extracted-region \\ + --p-n-workers $task.cpus \\ + --i-sequences db_${prefix}.qza \\ + --p-region "${prefix}" \\ + --p-fwd-primer $primerfw \\ + --p-rev-primer $primerrv \\ + --p-trim-length $length \\ + --o-collapsed-kmers db_${prefix}_${length}_kmers.qza \\ + --o-kmer-map db_${prefix}_${length}_map.qza + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + qiime2: \$( qiime --version | sed '1!d;s/.* //' ) + qiime2 plugin sidle: \$( qiime sidle --version | sed 's/ (.*//' | sed 's/.*version //' ) + q2-sidle: \$( qiime sidle --version | sed 's/.*version //' | sed 's/)//' ) + END_VERSIONS + """ +} diff --git a/modules/local/sidle_dbfilt.nf b/modules/local/sidle_dbfilt.nf new file mode 100644 index 00000000..b5bfd93e --- /dev/null +++ b/modules/local/sidle_dbfilt.nf @@ -0,0 +1,50 @@ +process SIDLE_DBFILT { + label 'process_low' + + container 'docker.io/d4straub/pipesidle:0.1.0-beta' + + input: + path(seq) + path(tax) + + output: + path("db_filtered_sequences.qza") , emit: seq + path("db_filtered_sequences_tax.qza") , emit: tax + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + """ + # https://q2-sidle.readthedocs.io/en/latest/database_preparation.html#filtering-the-database + #pre-filtering should be very permissive! + export XDG_CONFIG_HOME="./xdgconfig" + export MPLCONFIGDIR="./mplconfigdir" + export NUMBA_CACHE_DIR="./numbacache" + + # authors of SMURF recommend "--p-num-degenerates 3" for greengenes 13_8 database at 99% + # the RESCRIPt formatted Silva 128 database is filtered to exclude sequences with more than 5 degenerates [3], [4] + qiime rescript cull-seqs \\ + --p-n-jobs $task.cpus \\ + --i-sequences $seq \\ + $args \\ + --o-clean-sequences db_filtered_sequences.qza + + #filtering a greengenes database for features missing a phylum (p__;) or kingdom(k__;) designation. + #CPU=1 + qiime taxa filter-seqs \\ + --i-sequences db_filtered_sequences.qza \\ + --i-taxonomy $tax \\ + $args2 \\ + --o-filtered-sequences db_filtered_sequences_tax.qza + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + qiime2: \$( qiime --version | sed '1!d;s/.* //' ) + qiime2 rescript: \$( qiime rescript --version | sed 's/ (.*//' | sed 's/.*version //' ) + END_VERSIONS + """ +} diff --git a/modules/local/sidle_dbrecon.nf b/modules/local/sidle_dbrecon.nf new file mode 100644 index 00000000..83c87264 --- /dev/null +++ b/modules/local/sidle_dbrecon.nf @@ -0,0 +1,56 @@ +process SIDLE_DBRECON { + label 'process_medium' + + container 'docker.io/d4straub/pipesidle:0.1.0-beta' + + input: + val(metaid) + path(map) + path(aligned_map) + + output: + path("reconstruction_map.qza") , emit: reconstruction_map + path("reconstruction_summary.qza"), emit: reconstruction_summary + path("reconstruction_summary/*") , emit: visualisation + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def db_input = "" + // sort the input so that the regions are sorted by sequence + def df = [metaid, map, aligned_map].transpose().sort{ it[0] } + for (i in df) { + db_input += " --p-region "+i[0]+" --i-kmer-map "+i[1]+" --i-regional-alignment "+i[2] + } + """ + #https://q2-sidle.readthedocs.io/en/latest/reconstruction.html#database-reconstruction + export XDG_CONFIG_HOME="./xdgconfig" + export MPLCONFIGDIR="./mplconfigdir" + export NUMBA_CACHE_DIR="./numbacache" + + qiime sidle reconstruct-database \\ + --p-n-workers $task.cpus \\ + $db_input \\ + $args \\ + --o-database-map reconstruction_map.qza \\ + --o-database-summary reconstruction_summary.qza + + #database summary can be used to evaluate the quality of the reconstruction; see Fuks, C; Elgart, M; Amir, A; et al (2018) “Combining 16S rRNA gene variable regions enables high-resolution microbial community profiling.” Microbiome. 6:17. doi: 10.1186/s40168-017-0396-x + qiime metadata tabulate \\ + --m-input-file reconstruction_summary.qza \\ + --o-visualization reconstruction_summary.qzv + qiime tools export \\ + --input-path reconstruction_summary.qzv \\ + --output-path "reconstruction_summary" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + qiime2: \$( qiime --version | sed '1!d;s/.* //' ) + qiime2 plugin sidle: \$( qiime sidle --version | sed 's/ (.*//' | sed 's/.*version //' ) + q2-sidle: \$( qiime sidle --version | sed 's/.*version //' | sed 's/)//' ) + END_VERSIONS + """ +} diff --git a/modules/local/sidle_filttax.nf b/modules/local/sidle_filttax.nf new file mode 100644 index 00000000..00133e64 --- /dev/null +++ b/modules/local/sidle_filttax.nf @@ -0,0 +1,37 @@ + +process SIDLE_FILTTAX { + label 'process_single' + + container 'docker.io/d4straub/pipesidle:0.1.0-beta' + + input: + path(table_tofilter) + path(table_ref) + + output: + path("reconstructed_taxonomy.tsv"), emit: filtered + path("reconstructed_merged.tsv") , emit: merged + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + #!/usr/bin/env Rscript + + df_tofilter <- read.table("$table_tofilter", header = TRUE, sep = "\t", stringsAsFactors = FALSE) + colnames(df_tofilter)[1] <- "ID" + + df_ref <- read.table("$table_ref", header = TRUE, sep = "\t", stringsAsFactors = FALSE, skip = 1, comment.char = "") + colnames(df_ref)[1] <- "ID" + + df_merged <- merge(df_tofilter, df_ref, by="ID", all.x=FALSE, all.y=TRUE) + write.table(df_merged, file = "reconstructed_merged.tsv", row.names=FALSE, sep="\t") + + df_filtered <- subset(df_tofilter, df_tofilter\$ID %in% df_ref\$ID) + write.table(df_filtered, file = "reconstructed_taxonomy.tsv", row.names=FALSE, sep="\t") + + writeLines(c("\\"${task.process}\\":", paste0(" R: ", paste0(R.Version()[c("major","minor")], collapse = ".")) ), "versions.yml") + """ +} diff --git a/modules/local/sidle_in.nf b/modules/local/sidle_in.nf new file mode 100644 index 00000000..1f1d5360 --- /dev/null +++ b/modules/local/sidle_in.nf @@ -0,0 +1,44 @@ +process SIDLE_IN { + tag "$meta.region" + label 'process_single' + + container 'docker.io/d4straub/pipesidle:0.1.0-beta' + + input: + tuple val(meta), path(table), path(seq) + + output: + tuple val(meta), path("*_table.qza"), path("*_rep-seqs.qza"), emit: table_seq + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.region}" + """ + export XDG_CONFIG_HOME="./xdgconfig" + export MPLCONFIGDIR="./mplconfigdir" + export NUMBA_CACHE_DIR="./numbacache" + + # seq + qiime tools import \\ + --input-path "$seq" \\ + --type 'FeatureData[Sequence]' \\ + --output-path ${prefix}_rep-seqs.qza + + # table + biom convert -i "$table" -o table.biom --table-type="OTU table" --to-hdf5 + qiime tools import \\ + --input-path table.biom \\ + --type 'FeatureTable[Frequency]' \\ + --input-format BIOMV210Format \\ + --output-path ${prefix}_table.qza + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + qiime2: \$( qiime --version | sed '1!d;s/.* //' ) + END_VERSIONS + """ +} diff --git a/modules/local/sidle_indb.nf b/modules/local/sidle_indb.nf new file mode 100644 index 00000000..33367313 --- /dev/null +++ b/modules/local/sidle_indb.nf @@ -0,0 +1,42 @@ +process SIDLE_INDB { + label 'process_single' + + container 'docker.io/d4straub/pipesidle:0.1.0-beta' + + input: + path(seq) + path(tax) + + output: + path("db_sequences.qza"), emit: seq + path("db_taxonomy.qza") , emit: tax + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + export XDG_CONFIG_HOME="./xdgconfig" + export MPLCONFIGDIR="./mplconfigdir" + export NUMBA_CACHE_DIR="./numbacache" + + # db_seq + qiime tools import \\ + --input-path $seq \\ + --output-path db_sequences.qza \\ + --type 'FeatureData[Sequence]' + + # db_tax + qiime tools import \\ + --input-path $tax \\ + --output-path db_taxonomy.qza \\ + --type 'FeatureData[Taxonomy]' \\ + --input-format HeaderlessTSVTaxonomyFormat + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + qiime2: \$( qiime --version | sed '1!d;s/.* //' ) + END_VERSIONS + """ +} diff --git a/modules/local/sidle_indbaligned.nf b/modules/local/sidle_indbaligned.nf new file mode 100644 index 00000000..e37b169a --- /dev/null +++ b/modules/local/sidle_indbaligned.nf @@ -0,0 +1,34 @@ +process SIDLE_INDBALIGNED { + label 'process_single' + + container 'docker.io/d4straub/pipesidle:0.1.0-beta' + + input: + path(seq) + + output: + path("db_alignedsequences.qza"), emit: seq + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + export XDG_CONFIG_HOME="./xdgconfig" + export MPLCONFIGDIR="./mplconfigdir" + export NUMBA_CACHE_DIR="./numbacache" + + # db_seq + qiime tools import \\ + --input-path $seq \\ + --output-path db_alignedsequences.qza \\ + --type 'FeatureData[AlignedSequence]' + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + qiime2: \$( qiime --version | sed '1!d;s/.* //' ) + END_VERSIONS + """ +} diff --git a/modules/local/sidle_seqrecon.nf b/modules/local/sidle_seqrecon.nf new file mode 100644 index 00000000..33a2bdf3 --- /dev/null +++ b/modules/local/sidle_seqrecon.nf @@ -0,0 +1,56 @@ +process SIDLE_SEQRECON { + label 'process_single' + + container 'docker.io/d4straub/pipesidle:0.1.0-beta' + + input: + path(reconstruction_map) + path(reconstruction_summary) + path(db_aligned_sequences) + + output: + path("reconstruction_fragments.qza") , emit: qza + path("reconstruction_fragments/*") , emit: visualisation + path("reconstructed_fragments.fasta"), emit: fasta + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + #https://q2-sidle.readthedocs.io/en/latest/reconstruction.html#reconstructing-the-phylogenetic-tree + #https://forum.qiime2.org/t/sidle-tutorial-missing-aligned-sequence-file/20604/4 for db_aligned_sequences + export XDG_CONFIG_HOME="./xdgconfig" + export MPLCONFIGDIR="./mplconfigdir" + export NUMBA_CACHE_DIR="./numbacache" + + #CPU=1 + qiime sidle reconstruct-fragment-rep-seqs \\ + --i-reconstruction-map ${reconstruction_map} \\ + --i-reconstruction-summary ${reconstruction_summary} \\ + --i-aligned-sequences ${db_aligned_sequences} \\ + --o-representative-fragments reconstruction_fragments.qza + + #export visualisation + qiime metadata tabulate \\ + --m-input-file reconstruction_fragments.qza \\ + --o-visualization reconstruction_fragments.qzv + qiime tools export \\ + --input-path reconstruction_fragments.qzv \\ + --output-path "reconstruction_fragments" + + #export fasta file + qiime tools export \\ + --input-path reconstruction_fragments.qza \\ + --output-path exported + cp exported/dna-sequences.fasta reconstructed_fragments.fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + qiime2: \$( qiime --version | sed '1!d;s/.* //' ) + qiime2 plugin sidle: \$( qiime sidle --version | sed 's/ (.*//' | sed 's/.*version //' ) + q2-sidle: \$( qiime sidle --version | sed 's/.*version //' | sed 's/)//' ) + END_VERSIONS + """ +} diff --git a/modules/local/sidle_tablerecon.nf b/modules/local/sidle_tablerecon.nf new file mode 100644 index 00000000..cd6d14f3 --- /dev/null +++ b/modules/local/sidle_tablerecon.nf @@ -0,0 +1,70 @@ +process SIDLE_TABLERECON { + label 'process_medium' + + container 'docker.io/d4straub/pipesidle:0.1.0-beta' + + input: + val(metaid) + path(table) + path(aligned_map) + path(reconstruction_map) + path(reconstruction_summary) + + output: + path("reconstruction_table.qza") , emit: qza + path("reconstruction_table/*") , emit: exported + path("reconstructed_feature-table.biom"), emit: biom + path("reconstructed_feature-table.tsv") , emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def region_input = "" + // sort the input so that the regions are sorted by sequence + def df = [metaid, aligned_map, table].transpose().sort{ it[0] } + for (i in df) { + region_input += " --p-region "+i[0]+" --i-regional-alignment "+i[1]+" --i-regional-table "+i[2] + } + """ + #https://q2-sidle.readthedocs.io/en/latest/reconstruction.html#table-reconstruction + export XDG_CONFIG_HOME="./xdgconfig" + export MPLCONFIGDIR="./mplconfigdir" + export NUMBA_CACHE_DIR="./numbacache" + + qiime sidle reconstruct-counts \\ + --p-n-workers $task.cpus \\ + $region_input \\ + --i-database-map $reconstruction_map \\ + --i-database-summary $reconstruction_summary \\ + $args \\ + --o-reconstructed-table reconstruction_table.qza + + #export visualisation + qiime feature-table summarize \\ + --i-table reconstruction_table.qza \\ + --o-visualization reconstruction_table.qzv + qiime tools export \\ + --input-path reconstruction_table.qzv \\ + --output-path "reconstruction_table" + + #export feature table in biom and tsv format + qiime tools export \\ + --input-path reconstruction_table.qza \\ + --output-path exported + biom convert \\ + -i exported/feature-table.biom \\ + -o reconstructed_feature-table.tsv \\ + --to-tsv + cp exported/feature-table.biom reconstructed_feature-table.biom + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + qiime2: \$( qiime --version | sed '1!d;s/.* //' ) + qiime2 plugin sidle: \$( qiime sidle --version | sed 's/ (.*//' | sed 's/.*version //' ) + q2-sidle: \$( qiime sidle --version | sed 's/.*version //' | sed 's/)//' ) + END_VERSIONS + """ +} diff --git a/modules/local/sidle_taxrecon.nf b/modules/local/sidle_taxrecon.nf new file mode 100644 index 00000000..0172f6c2 --- /dev/null +++ b/modules/local/sidle_taxrecon.nf @@ -0,0 +1,56 @@ +process SIDLE_TAXRECON { + label 'process_single' + + container 'docker.io/d4straub/pipesidle:0.1.0-beta' + + input: + path(reconstruction_map) + path(tax) + + output: + path("reconstruction_taxonomy.qza"), emit: qza + path("reconstruction_taxonomy/*") , emit: visualisation + path("reconstruction_taxonomy.tsv"), emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + #https://q2-sidle.readthedocs.io/en/latest/reconstruction.html#taxonomic-reconstruction + #https://forum.qiime2.org/t/sidle-reconstruct-database/25439 + export XDG_CONFIG_HOME="./xdgconfig" + export MPLCONFIGDIR="./mplconfigdir" + export NUMBA_CACHE_DIR="./numbacache" + + #CPU=1 + qiime sidle reconstruct-taxonomy \\ + --i-reconstruction-map ${reconstruction_map} \\ + --i-taxonomy ${tax} \\ + $args \\ + --o-reconstructed-taxonomy reconstruction_taxonomy.qza + + #export visualisation + qiime metadata tabulate \\ + --m-input-file reconstruction_taxonomy.qza \\ + --o-visualization reconstruction_taxonomy.qzv + qiime tools export \\ + --input-path reconstruction_taxonomy.qzv \\ + --output-path "reconstruction_taxonomy" + + #export taxonomic tsv + qiime tools export \\ + --input-path reconstruction_taxonomy.qza \\ + --output-path exported + cp exported/taxonomy.tsv reconstruction_taxonomy.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + qiime2: \$( qiime --version | sed '1!d;s/.* //' ) + qiime2 plugin sidle: \$( qiime sidle --version | sed 's/ (.*//' | sed 's/.*version //' ) + q2-sidle: \$( qiime sidle --version | sed 's/.*version //' | sed 's/)//' ) + END_VERSIONS + """ +} diff --git a/modules/local/sidle_treerecon.nf b/modules/local/sidle_treerecon.nf new file mode 100644 index 00000000..0d12be82 --- /dev/null +++ b/modules/local/sidle_treerecon.nf @@ -0,0 +1,47 @@ +process SIDLE_TREERECON { + label 'process_medium' + + container 'docker.io/d4straub/pipesidle:0.1.0-beta' + + input: + path(reconstruction_fragments) + path(ref_db_tree) + + output: + path("reconstructed_tree.qza") , emit: qza + path("reconstruction_placements.qza"), emit: qza_placements + path("reconstructed_tree.nwk") , emit: nwk + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + # https://q2-sidle.readthedocs.io/en/latest/reconstruction.html#reconstructing-the-phylogenetic-tree + # required: SEPP file https://forum.qiime2.org/t/sidle-tutorial-missing-aligned-sequence-file/20604/8 + # SEPP file only available for Greengenes 13_8 or SILVE 128 (not 138!): https://forum.qiime2.org/t/error-in-reconstructing-the-phylogenetic-tree/23757/8 + export XDG_CONFIG_HOME="./xdgconfig" + export MPLCONFIGDIR="./mplconfigdir" + export NUMBA_CACHE_DIR="./numbacache" + + qiime fragment-insertion sepp \\ + --p-threads $task.cpus \\ + --i-representative-sequences $reconstruction_fragments \\ + --i-reference-database $ref_db_tree \\ + --o-tree reconstructed_tree.qza \\ + --o-placements reconstruction_placements.qza + + #export tree file + qiime tools export \\ + --input-path reconstructed_tree.qza \\ + --output-path exported + cp exported/tree.nwk reconstructed_tree.nwk + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + qiime2: \$( qiime --version | sed '1!d;s/.* //' ) + q2-fragment-insertion: \$( qiime fragment-insertion --version | sed 's/.*version //' | sed 's/)//' ) + END_VERSIONS + """ +} diff --git a/modules/local/sidle_trim.nf b/modules/local/sidle_trim.nf new file mode 100644 index 00000000..3120a09c --- /dev/null +++ b/modules/local/sidle_trim.nf @@ -0,0 +1,45 @@ +process SIDLE_TRIM { + tag "$meta.region,$meta.region_length" + label 'process_single' + + container 'docker.io/d4straub/pipesidle:0.1.0-beta' + + input: + tuple val(meta), path(table), path(seq) + + output: + tuple val(meta), path("*_table.qza") , emit: table + tuple val(meta), path("*_rep-seqs.qza") , emit: seq + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.region}" + def primerfw = "${meta.fw_primer}" + def primerrv = "${meta.rv_primer}" + def length = "${meta.region_length}" + """ + # https://q2-sidle.readthedocs.io/en/latest/read_preparation.html#dada2 + export XDG_CONFIG_HOME="./xdgconfig" + export MPLCONFIGDIR="./mplconfigdir" + export NUMBA_CACHE_DIR="./numbacache" + + #CPU=1 + qiime sidle trim-dada2-posthoc \ + --i-table ${table} \ + --i-representative-sequences ${seq} \ + --p-trim-length $length \ + --o-trimmed-table ${prefix}_${length}_table.qza \ + --o-trimmed-representative-sequences ${prefix}_${length}_rep-seqs.qza + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + qiime2: \$( qiime --version | sed '1!d;s/.* //' ) + qiime2 plugin sidle: \$( qiime sidle --version | sed 's/ (.*//' | sed 's/.*version //' ) + q2-sidle: \$( qiime sidle --version | sed 's/.*version //' | sed 's/)//' ) + END_VERSIONS + """ +} diff --git a/nextflow.config b/nextflow.config index 6a8e9994..d1d582a1 100644 --- a/nextflow.config +++ b/nextflow.config @@ -118,6 +118,10 @@ params { kraken2_assign_taxlevels = null kraken2_ref_tax_custom = null kraken2_confidence = 0.0 + sidle_ref_sequences = null + sidle_ref_alignedseq = null + sidle_ref_taxonomy = null + sidle_ref_tree = null // MultiQC options multiqc_config = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 4a146da4..39c34d82 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -504,6 +504,26 @@ "minimum": 0, "maximum": 1 }, + "sidle_ref_sequences": { + "type": "string", + "help_text": "", + "description": "Path to SIDLE reference taxonomy sequences (*.fasta)" + }, + "sidle_ref_alignedseq": { + "type": "string", + "help_text": "", + "description": "Path to SIDLE aligned reference taxonomy sequences (*.fasta)" + }, + "sidle_ref_taxonomy": { + "type": "string", + "help_text": "", + "description": "Path to SIDLE reference taxonomy strings (*.txt)" + }, + "sidle_ref_tree": { + "type": "string", + "help_text": "", + "description": "Path to SIDLE reference taxonomy tree (*.qza)" + }, "sintax_ref_taxonomy": { "type": "string", "help_text": "Choose any of the supported databases, and optionally also specify the version. Database and version are separated by an equal sign (`=`, e.g. `coidb=221216`) . This will download the desired database and initiate taxonomic classification with VSEARCH sintax and the chosen database, which if needed is formatted to produce a file that is compatible with VSEARCH sintax.\n\nThe following databases are supported:\n- COIDB - eukaryotic Cytochrome Oxidase I (COI) from The Barcode of Life Data System (BOLD) - COI\n- UNITE - eukaryotic nuclear ribosomal ITS region - ITS\n\nGenerally, using `coidb`, `unite-fungi`, or `unite-alleuk` will select the most recent supported version.", diff --git a/subworkflows/local/sidle_wf.nf b/subworkflows/local/sidle_wf.nf new file mode 100644 index 00000000..54101cf9 --- /dev/null +++ b/subworkflows/local/sidle_wf.nf @@ -0,0 +1,121 @@ +/* + * Training of a classifier with QIIME2 + */ + +include { SIDLE_INDB } from '../../modules/local/sidle_indb' +include { SIDLE_INDBALIGNED } from '../../modules/local/sidle_indbaligned' +include { SIDLE_DBFILT } from '../../modules/local/sidle_dbfilt' +include { SIDLE_IN } from '../../modules/local/sidle_in' +include { SIDLE_TRIM } from '../../modules/local/sidle_trim' +include { SIDLE_DBEXTRACT } from '../../modules/local/sidle_dbextract' +include { SIDLE_ALIGN } from '../../modules/local/sidle_align' +include { SIDLE_DBRECON } from '../../modules/local/sidle_dbrecon' +include { SIDLE_TABLERECON } from '../../modules/local/sidle_tablerecon' +include { SIDLE_TAXRECON } from '../../modules/local/sidle_taxrecon' +include { SIDLE_FILTTAX } from '../../modules/local/sidle_filttax' +include { SIDLE_SEQRECON } from '../../modules/local/sidle_seqrecon' +include { SIDLE_TREERECON } from '../../modules/local/sidle_treerecon' + + +workflow SIDLE_WF { + take: + ch_asv_tables_sequences + ch_db_sequences + ch_db_alignedsequences + ch_db_taxonomy + ch_db_tree + + main: + ch_sidle_versions = Channel.empty() + + // DB + SIDLE_INDB ( ch_db_sequences, ch_db_taxonomy ) + ch_sidle_versions = ch_sidle_versions.mix(SIDLE_INDB.out.versions) + SIDLE_INDBALIGNED ( ch_db_alignedsequences ) + ch_sidle_versions = ch_sidle_versions.mix(SIDLE_INDBALIGNED.out.versions) + SIDLE_DBFILT ( SIDLE_INDB.out.seq, SIDLE_INDB.out.tax ) + ch_sidle_versions = ch_sidle_versions.mix(SIDLE_DBFILT.out.versions) + + // ASV + SIDLE_IN ( ch_asv_tables_sequences ) + ch_sidle_versions = ch_sidle_versions.mix(SIDLE_IN.out.versions) + SIDLE_TRIM ( SIDLE_IN.out.table_seq ) + ch_sidle_versions = ch_sidle_versions.mix(SIDLE_TRIM.out.versions) + + // Combine & reconstruct + SIDLE_DBEXTRACT ( + SIDLE_IN.out.table_seq + .combine( SIDLE_DBFILT.out.seq ) + .combine( SIDLE_DBFILT.out.tax ) ) + ch_sidle_versions = ch_sidle_versions.mix(SIDLE_DBEXTRACT.out.versions) + + SIDLE_ALIGN ( SIDLE_DBEXTRACT.out.kmers.join(SIDLE_TRIM.out.seq).dump(tag: 'into_SIDLE_ALIGN') ) + ch_sidle_versions = ch_sidle_versions.mix(SIDLE_ALIGN.out.versions) + + SIDLE_DBEXTRACT.out.map + .join(SIDLE_ALIGN.out.aligned_map) + .multiMap { meta, map, aligned_map -> + sampleid: meta.id + map: map + aligned_map: aligned_map + } + .set { ch_db_reconstruction } + + SIDLE_DBRECON ( + ch_db_reconstruction.sampleid.collect(), + ch_db_reconstruction.map.collect(), + ch_db_reconstruction.aligned_map.collect() ) + ch_sidle_versions = ch_sidle_versions.mix(SIDLE_DBRECON.out.versions) + + SIDLE_TRIM.out.table + .join(SIDLE_ALIGN.out.aligned_map) + .multiMap { meta, table, aligned_map -> + sampleid: meta.id + table: table + aligned_map: aligned_map + } + .set { ch_table_reconstruction } + + // Abundance table + SIDLE_TABLERECON ( + ch_table_reconstruction.sampleid.collect(), + ch_table_reconstruction.table.collect(), + ch_table_reconstruction.aligned_map.collect(), + SIDLE_DBRECON.out.reconstruction_map, + SIDLE_DBRECON.out.reconstruction_summary ) + ch_sidle_versions = ch_sidle_versions.mix(SIDLE_TABLERECON.out.versions) + + // Taxonomic classification + SIDLE_TAXRECON ( + SIDLE_DBRECON.out.reconstruction_map, + SIDLE_INDB.out.tax ) + ch_sidle_versions = ch_sidle_versions.mix(SIDLE_TAXRECON.out.versions) + SIDLE_FILTTAX ( SIDLE_TAXRECON.out.tsv, SIDLE_TABLERECON.out.tsv ) + ch_sidle_versions = ch_sidle_versions.mix(SIDLE_FILTTAX.out.versions) + + // Reconstruct sequences/fragments + // required: aligned sequences file: https://forum.qiime2.org/t/finding-alignment-files-for-sidle/23773/2 + SIDLE_SEQRECON ( + SIDLE_DBRECON.out.reconstruction_map, + SIDLE_DBRECON.out.reconstruction_summary, + SIDLE_INDBALIGNED.out.seq ) + // "The output of reconstruct-fragment-rep-seqs provides consensus sequences only if a reference sequence can't be resolved (ids that have a | symbol in them.) It's designed specifically to integrate with the fragment insertion and makes some downstream assumptions, including that you have the same database and insertion tree version.", see https://forum.qiime2.org/t/how-to-merge-q2-sidle-output-with-other-results/22823/2 + ch_sidle_versions = ch_sidle_versions.mix(SIDLE_SEQRECON.out.versions) + + // Reconstruct phylogenetic tree + SIDLE_TREERECON ( + SIDLE_SEQRECON.out.qza, + ch_db_tree ) + ch_sidle_versions = ch_sidle_versions.mix(SIDLE_TREERECON.out.versions) + + emit: + tax_qza = SIDLE_TAXRECON.out.qza + tax_tsv = SIDLE_FILTTAX.out.filtered + tax_tsv_merged = SIDLE_FILTTAX.out.merged + table_biom = SIDLE_TABLERECON.out.biom + table_qza = SIDLE_TABLERECON.out.qza + table_tsv = SIDLE_TABLERECON.out.tsv + tree_nwk = SIDLE_TREERECON.out.nwk + tree_qza = SIDLE_TREERECON.out.qza + versions = ch_sidle_versions +} diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index c9b88322..447907dd 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -179,6 +179,7 @@ include { DADA2_RMCHIMERA } from '../modules/local/dada2_rmchimera include { DADA2_STATS } from '../modules/local/dada2_stats' include { DADA2_MERGE } from '../modules/local/dada2_merge' include { DADA2_SPLITREGIONS } from '../modules/local/dada2_splitregions' +include { SIDLE_WF } from '../subworkflows/local/sidle_wf' include { BARRNAP } from '../modules/local/barrnap' include { BARRNAPSUMMARY } from '../modules/local/barrnapsummary' include { FILTER_SSU } from '../modules/local/filter_ssu' @@ -418,19 +419,32 @@ workflow AMPLISEQ { ch_stats = DADA2_MERGE.out.dada2stats } - //separate sequences and abundances when several regions + // + // SUBWORKFLOW / MODULES : Taxonomic classification with DADA2, SINTAX and/or QIIME2 + // if ( params.input_multiregion ) { + // separate sequences and abundances when several regions DADA2_SPLITREGIONS ( //DADA2_DENOISING per run & region -> per run ch_reads .map { info, reads -> def meta = info.subMap( info.keySet() - 'id' - 'sample' - 'run' ) // All of 'id', 'sample', 'run' must be removed to merge by region - def inf2 = info.subMap( info.keySet() - 'single_end' )// May not contain false,true,null: remove 'single_end' + def inf2 = info.subMap( 'id', 'sample' )// May not contain false,true,null; only 'id', 'sample' required [ meta, inf2 ] } .groupTuple(by: 0 ).dump(tag:'DADA2_SPLITREGIONS:meta'), - DADA2_MERGE.out.dada2asv.first() ) + DADA2_MERGE.out.dada2asv ) ch_versions = ch_versions.mix(DADA2_SPLITREGIONS.out.versions) + + // run q2-sidle + SIDLE_WF ( + DADA2_SPLITREGIONS.out.for_sidle, + file( params.sidle_ref_sequences, checkIfExists: true ), //TODO: ch_sidle_ref_sequences // "gg_13_8_otus_rep_set_99_otus.fasta" + file( params.sidle_ref_alignedseq, checkIfExists: true ), //TODO: ch_sidle_ref_alignedseq // "gg_13_8_otus_taxonomy_99_otu_taxonomy.txt" + file( params.sidle_ref_taxonomy, checkIfExists: true ), //TODO: ch_sidle_ref_taxonomy // "gg_13_8_otus_taxonomy_99_otu_taxonomy.txt" + file( params.sidle_ref_tree, checkIfExists: true ) //TODO: ch_sidle_ref_tree // https://data.qiime2.org/2021.4/common/sepp-refs-gg-13-8.qza + ) + ch_versions = ch_versions.mix(SIDLE_WF.out.versions) } //