Skip to content

Commit

Permalink
add SIDLE workflow from d4straub/pipesidle
Browse files Browse the repository at this point in the history
  • Loading branch information
d4straub committed Feb 9, 2024
1 parent 0f04676 commit aa1ff5a
Show file tree
Hide file tree
Showing 19 changed files with 940 additions and 8 deletions.
136 changes: 135 additions & 1 deletion conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -273,12 +273,146 @@ process {

withName: DADA2_SPLITREGIONS {
publishDir = [
path: { "${params.outdir}/dada2/per_region" },
path: { "${params.outdir}/sidle/per_region" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: SIDLE_DBFILT {
ext.args = '--p-num-degenerates 3' // 3 for greengenes, 5 for SILVA 128
ext.args2 = '--p-exclude "p__;,k__;" --p-mode contains' // "p__;,k__;" for greengenes
publishDir = [
path: { "${params.outdir}/sidle/DB/1_prefiltering" },
mode: params.publish_dir_mode,
pattern: "*.qza",
enabled: params.save_intermediates
]
}

withName: SIDLE_DBEXTRACT {
ext.args = '--p-identity 2'
publishDir = [
path: { "${params.outdir}/sidle/DB/2_primer_extraction" },
mode: params.publish_dir_mode,
pattern: "*.qza",
enabled: params.save_intermediates
]
}

withName: SIDLE_TRIM {
publishDir = [
path: { "${params.outdir}/sidle/ASV/1_trim" },
mode: params.publish_dir_mode,
pattern: "*.qza",
enabled: params.save_intermediates
]
}

withName: SIDLE_ALIGN {
ext.args = ''
publishDir = [
path: { "${params.outdir}/sidle/ASV/2_align_db" },
mode: params.publish_dir_mode,
pattern: "*.qza",
enabled: params.save_intermediates
]
}

withName: SIDLE_DBRECON {
ext.args = ''
publishDir = [
[
path: { "${params.outdir}/sidle/DB/3_reconstructed" },
mode: params.publish_dir_mode,
pattern: "*.qza",
enabled: params.save_intermediates
],
[
path: { "${params.outdir}/sidle/DB/3_reconstructed" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.endsWith('.qza') || filename.equals('versions.yml') ? null : filename }
]
]
}

withName: SIDLE_TABLERECON {
ext.args = "--p-min-counts 0"
publishDir = [
[
path: { "${params.outdir}/sidle/reconstructed/qza" },
mode: params.publish_dir_mode,
pattern: "*.qza",
enabled: params.save_intermediates
],
[
path: { "${params.outdir}/sidle/reconstructed" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.endsWith('.qza') || filename.equals('versions.yml') ? null : filename }
]
]
}

withName: SIDLE_TAXRECON {
ext.args = '--p-database "greengenes" --p-define-missing "inherit"'
publishDir = [
[
path: { "${params.outdir}/sidle/reconstructed/qza" },
mode: params.publish_dir_mode,
pattern: "*.qza",
enabled: params.save_intermediates
],
[
path: { "${params.outdir}/sidle/reconstructed" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.endsWith('.qza') || filename.equals('versions.yml') ? null : filename },
enabled: params.save_intermediates
]
]
}

withName: SIDLE_FILTTAX {
publishDir = [
path: { "${params.outdir}/sidle/reconstructed" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: SIDLE_SEQRECON {
publishDir = [
[
path: { "${params.outdir}/sidle/reconstructed/qza" },
mode: params.publish_dir_mode,
pattern: "*.qza",
enabled: params.save_intermediates
],
[
path: { "${params.outdir}/sidle/reconstructed" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.endsWith('.qza') || filename.equals('versions.yml') ? null : filename },
enabled: params.save_intermediates
]
]
}

withName: SIDLE_TREERECON {
publishDir = [
[
path: { "${params.outdir}/sidle/reconstructed/qza" },
mode: params.publish_dir_mode,
pattern: "*.qza",
enabled: params.save_intermediates
],
[
path: { "${params.outdir}/sidle/reconstructed" },
mode: params.publish_dir_mode,
pattern: "*.nwk"
]
]
}


withName: BARRNAP {
ext.kingdom = "bac,arc,mito,euk"
ext.args = "--quiet --reject 0.1"
Expand Down
9 changes: 5 additions & 4 deletions modules/local/dada2_splitregions.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,11 @@ process DADA2_SPLITREGIONS {
path(table)

output:
tuple val(meta), path( "DADA2_table_*.tsv" ), emit: dada2asv
tuple val(meta), path( "ASV_table_*.tsv" ) , emit: asv
tuple val(meta), path( "ASV_seqs_*.fasta" ) , emit: fasta
path "versions.yml" , emit: versions
tuple val(meta), path( "DADA2_table_*.tsv" ) , emit: dada2asv
//tuple val(meta), path( "ASV_table_*.tsv" ) , emit: asv
//tuple val(meta), path( "ASV_seqs_*.fasta" ) , emit: fasta
tuple val(meta), path( "ASV_table_*.tsv" ), path( "ASV_seqs_*.fasta" ), emit: for_sidle
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when
Expand Down
43 changes: 43 additions & 0 deletions modules/local/sidle_align.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
process SIDLE_ALIGN {
tag "$meta.region"
label 'process_medium'

container 'docker.io/d4straub/pipesidle:0.1.0-beta'

input:
tuple val(meta), path(kmers), path(seq)

output:
tuple val(meta), path("*rep-seqs_align-map.qza"), emit: aligned_map
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.region}"
def primerfw = "${meta.fw_primer}"
def primerrv = "${meta.rv_primer}"
"""
# https://q2-sidle.readthedocs.io/en/latest/reconstruction.html#regional-alignment
export XDG_CONFIG_HOME="./xdgconfig"
export MPLCONFIGDIR="./mplconfigdir"
export NUMBA_CACHE_DIR="./numbacache"
qiime sidle align-regional-kmers \\
--p-n-workers $task.cpus \\
--i-kmers ${kmers} \\
--i-rep-seq ${seq} \\
--p-region ${meta.region} \\
$args \\
--o-regional-alignment ${prefix}_rep-seqs_align-map.qza
cat <<-END_VERSIONS > versions.yml
"${task.process}":
qiime2: \$( qiime --version | sed '1!d;s/.* //' )
qiime2 plugin sidle: \$( qiime sidle --version | sed 's/ (.*//' | sed 's/.*version //' )
q2-sidle: \$( qiime sidle --version | sed 's/.*version //' | sed 's/)//' )
END_VERSIONS
"""
}
58 changes: 58 additions & 0 deletions modules/local/sidle_dbextract.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@

process SIDLE_DBEXTRACT {
tag "$meta.region,$meta.region_length"
label 'process_medium'

container 'docker.io/d4straub/pipesidle:0.1.0-beta'

input:
tuple val(meta), path(table), path(seq), path(db_seq), path(db_tax)

output:
tuple val(meta), path("db_*_kmers.qza"), emit: kmers
tuple val(meta), path("db_*_map.qza") , emit: map
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.region}"
def primerfw = "${meta.fw_primer}"
def primerrv = "${meta.rv_primer}"
def length = "${meta.region_length}"
"""
# https://q2-sidle.readthedocs.io/en/latest/database_preparation.html#prepare-a-regional-database-for-each-primer-set
export XDG_CONFIG_HOME="./xdgconfig"
export MPLCONFIGDIR="./mplconfigdir"
export NUMBA_CACHE_DIR="./numbacache"
#extract sequences
qiime feature-classifier extract-reads \\
--p-n-jobs $task.cpus \\
--i-sequences $db_seq \\
$args \\
--p-f-primer $primerfw \\
--p-r-primer $primerrv \\
--o-reads db_${prefix}.qza
#prepare to be used in alignment
qiime sidle prepare-extracted-region \\
--p-n-workers $task.cpus \\
--i-sequences db_${prefix}.qza \\
--p-region "${prefix}" \\
--p-fwd-primer $primerfw \\
--p-rev-primer $primerrv \\
--p-trim-length $length \\
--o-collapsed-kmers db_${prefix}_${length}_kmers.qza \\
--o-kmer-map db_${prefix}_${length}_map.qza
cat <<-END_VERSIONS > versions.yml
"${task.process}":
qiime2: \$( qiime --version | sed '1!d;s/.* //' )
qiime2 plugin sidle: \$( qiime sidle --version | sed 's/ (.*//' | sed 's/.*version //' )
q2-sidle: \$( qiime sidle --version | sed 's/.*version //' | sed 's/)//' )
END_VERSIONS
"""
}
50 changes: 50 additions & 0 deletions modules/local/sidle_dbfilt.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
process SIDLE_DBFILT {
label 'process_low'

container 'docker.io/d4straub/pipesidle:0.1.0-beta'

input:
path(seq)
path(tax)

output:
path("db_filtered_sequences.qza") , emit: seq
path("db_filtered_sequences_tax.qza") , emit: tax
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def args2 = task.ext.args2 ?: ''
"""
# https://q2-sidle.readthedocs.io/en/latest/database_preparation.html#filtering-the-database
#pre-filtering should be very permissive!
export XDG_CONFIG_HOME="./xdgconfig"
export MPLCONFIGDIR="./mplconfigdir"
export NUMBA_CACHE_DIR="./numbacache"
# authors of SMURF recommend "--p-num-degenerates 3" for greengenes 13_8 database at 99%
# the RESCRIPt formatted Silva 128 database is filtered to exclude sequences with more than 5 degenerates [3], [4]
qiime rescript cull-seqs \\
--p-n-jobs $task.cpus \\
--i-sequences $seq \\
$args \\
--o-clean-sequences db_filtered_sequences.qza
#filtering a greengenes database for features missing a phylum (p__;) or kingdom(k__;) designation.
#CPU=1
qiime taxa filter-seqs \\
--i-sequences db_filtered_sequences.qza \\
--i-taxonomy $tax \\
$args2 \\
--o-filtered-sequences db_filtered_sequences_tax.qza
cat <<-END_VERSIONS > versions.yml
"${task.process}":
qiime2: \$( qiime --version | sed '1!d;s/.* //' )
qiime2 rescript: \$( qiime rescript --version | sed 's/ (.*//' | sed 's/.*version //' )
END_VERSIONS
"""
}
56 changes: 56 additions & 0 deletions modules/local/sidle_dbrecon.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
process SIDLE_DBRECON {
label 'process_medium'

container 'docker.io/d4straub/pipesidle:0.1.0-beta'

input:
val(metaid)
path(map)
path(aligned_map)

output:
path("reconstruction_map.qza") , emit: reconstruction_map
path("reconstruction_summary.qza"), emit: reconstruction_summary
path("reconstruction_summary/*") , emit: visualisation
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def db_input = ""
// sort the input so that the regions are sorted by sequence
def df = [metaid, map, aligned_map].transpose().sort{ it[0] }
for (i in df) {
db_input += " --p-region "+i[0]+" --i-kmer-map "+i[1]+" --i-regional-alignment "+i[2]
}
"""
#https://q2-sidle.readthedocs.io/en/latest/reconstruction.html#database-reconstruction
export XDG_CONFIG_HOME="./xdgconfig"
export MPLCONFIGDIR="./mplconfigdir"
export NUMBA_CACHE_DIR="./numbacache"
qiime sidle reconstruct-database \\
--p-n-workers $task.cpus \\
$db_input \\
$args \\
--o-database-map reconstruction_map.qza \\
--o-database-summary reconstruction_summary.qza
#database summary can be used to evaluate the quality of the reconstruction; see Fuks, C; Elgart, M; Amir, A; et al (2018) “Combining 16S rRNA gene variable regions enables high-resolution microbial community profiling.” Microbiome. 6:17. doi: 10.1186/s40168-017-0396-x
qiime metadata tabulate \\
--m-input-file reconstruction_summary.qza \\
--o-visualization reconstruction_summary.qzv
qiime tools export \\
--input-path reconstruction_summary.qzv \\
--output-path "reconstruction_summary"
cat <<-END_VERSIONS > versions.yml
"${task.process}":
qiime2: \$( qiime --version | sed '1!d;s/.* //' )
qiime2 plugin sidle: \$( qiime sidle --version | sed 's/ (.*//' | sed 's/.*version //' )
q2-sidle: \$( qiime sidle --version | sed 's/.*version //' | sed 's/)//' )
END_VERSIONS
"""
}
Loading

0 comments on commit aa1ff5a

Please sign in to comment.