Skip to content

Commit

Permalink
produce per-region ASV tables and fasta
Browse files Browse the repository at this point in the history
  • Loading branch information
d4straub committed Feb 7, 2024
1 parent 45fefd4 commit 715ae05
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 0 deletions.
8 changes: 8 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,14 @@ process {
]
}

withName: DADA2_SPLITREGIONS {
publishDir = [
path: { "${params.outdir}/dada2/per_region" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: BARRNAP {
ext.kingdom = "bac,arc,mito,euk"
ext.args = "--quiet --reject 0.1"
Expand Down
67 changes: 67 additions & 0 deletions modules/local/dada2_splitregions.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
process DADA2_SPLITREGIONS {
label 'process_low'

// TODO: DADA2 not neccessary, R base sufficient!
conda "bioconda::bioconductor-dada2=1.22.0 conda-forge::r-digest=0.6.30"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bioconductor-dada2:1.22.0--r41h399db7b_0' :
'biocontainers/bioconductor-dada2:1.22.0--r41h399db7b_0' }"

input:
tuple val(meta), val(mapping)
path(table)

output:
tuple val(meta), path( "DADA2_table_*.tsv" ), emit: dada2asv
tuple val(meta), path( "ASV_table_*.tsv" ) , emit: asv
tuple val(meta), path( "ASV_seqs_*.fasta" ) , emit: fasta
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
//make groovy map to R list
def mapping_r_list = mapping
.toString()
.replaceAll("':","=")
.replaceAll(", '",",")
.replaceAll("\\['","list(")
.replaceAll("\\[","list(")
.replaceAll("\\]",")")
.replaceAll("false","'false'")
def suffix = "region" + meta.region + "_" + meta.fw_primer + "_" + meta.rv_primer
"""
#!/usr/bin/env Rscript
nested_list <- $mapping_r_list
mapping <- as.data.frame(do.call(rbind, nested_list))
df <- read.csv("$table", header=TRUE, sep="\\t")
# extract samples of this region
keep <- intersect( colnames(df), c("ASV_ID", unlist(mapping\$id), "sequence" ) )
df <- subset( df, select = keep )
# modify sample names from .id to .sample
for (row in 1:nrow(mapping)) {
colnames(df) <- gsub( mapping[row,]\$id, mapping[row,]\$sample, colnames(df) )
}
# filter rows with only 0, occurring because many samples were removed
df <- df[as.logical(rowSums(df[,2:(ncol(df)-1)] != 0)), ]
# Write file with ASV abdundance and sequences to file
write.table(df, file = "DADA2_table_${suffix}.tsv", sep = "\\t", row.names = FALSE, quote = FALSE, na = '')
# Write fasta file with ASV sequences to file
write.table(data.frame(s = sprintf(">%s\n%s", df\$ASV_ID, df\$sequence)), 'ASV_seqs_${suffix}.fasta', col.names = FALSE, row.names = FALSE, quote = FALSE, na = '')
# Write ASV file with ASV abundances to file
df\$sequence <- NULL
write.table(df, file = "ASV_table_${suffix}.tsv", sep="\\t", row.names = FALSE, quote = FALSE, na = '')
writeLines(c("\\"${task.process}\\":", paste0(" R: ", paste0(R.Version()[c("major","minor")], collapse = ".")),paste0(" dada2: ", packageVersion("dada2")) ), "versions.yml")
"""
}
15 changes: 15 additions & 0 deletions workflows/ampliseq.nf
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ include { DADA2_DENOISING } from '../modules/local/dada2_denoising
include { DADA2_RMCHIMERA } from '../modules/local/dada2_rmchimera'
include { DADA2_STATS } from '../modules/local/dada2_stats'
include { DADA2_MERGE } from '../modules/local/dada2_merge'
include { DADA2_SPLITREGIONS } from '../modules/local/dada2_splitregions'
include { BARRNAP } from '../modules/local/barrnap'
include { BARRNAPSUMMARY } from '../modules/local/barrnapsummary'
include { FILTER_SSU } from '../modules/local/filter_ssu'
Expand Down Expand Up @@ -417,6 +418,20 @@ workflow AMPLISEQ {
ch_stats = DADA2_MERGE.out.dada2stats
}

//separate sequences and abundances when several regions
if ( params.input_multiregion ) {
DADA2_SPLITREGIONS (
//DADA2_DENOISING per run & region -> per run
ch_reads
.map {
info, reads ->
def meta = info.subMap( info.keySet() - 'id' - 'sample' - 'run' ) //All of 'id', 'sample', 'run' must be removed to merge by region; downstream unused: 'single_end'
[ meta, info ] }
.groupTuple(by: 0 ).dump(tag:'DADA2_SPLITREGIONS:meta'),
DADA2_MERGE.out.dada2asv.first() )
ch_versions = ch_versions.mix(DADA2_SPLITREGIONS.out.versions)
}

//
// MODULE : ASV post-clustering with VSEARCH
//
Expand Down

0 comments on commit 715ae05

Please sign in to comment.