Skip to content

Commit

Permalink
fix codon filtering
Browse files Browse the repository at this point in the history
  • Loading branch information
d4straub committed Aug 31, 2023
1 parent 4ca49d4 commit dc92556
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 30 deletions.
24 changes: 14 additions & 10 deletions assets/report_template.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ params:
filter_ssu_asv: ""
filter_len_asv: FALSE
filter_len_asv_len_orig: FALSE
filter_codons: FALSE
filter_codons_fasta: FALSE
filter_codons_stats: FALSE
stop_codons: ""
itsx_cutasv_summary: ""
cut_dada_ref_taxonomy: FALSE
Expand Down Expand Up @@ -576,7 +577,7 @@ if ( params$dada_sample_inference == "independent" ) {
```

```{r, results='asis'}
flag_any_filtering <- !isFALSE(params$path_barrnap_sum) || !isFALSE(params$filter_len_asv) || !isFALSE(params$filter_codons) || !isFALSE(params$vsearch_cluster)
flag_any_filtering <- !isFALSE(params$path_barrnap_sum) || !isFALSE(params$filter_len_asv) || !isFALSE(params$filter_codons_fasta) || !isFALSE(params$vsearch_cluster)
```

<!-- Section on ASV filtering -->
Expand Down Expand Up @@ -778,18 +779,25 @@ cat("\n\nLength filter results can be found in folder [asv_length_filter](../asv

<!-- Subsection on codon usage filter -->

```{r, eval = !isFALSE(params$filter_codons), results='asis'}
```{r, eval = !isFALSE(params$filter_codons_fasta), results='asis'}
filter_codons_fasta <- read.table(file = params$filter_codons_fasta, header = FALSE, sep = "\t")
filter_codons_fasta_passed <- nrow(filter_codons_fasta)/2
cat(paste0("
## Codon usage
Amplicons of coding regions are expected to be free of stop codons and consist of condon tripletts.
ASVs were filtered against the presence of stop codons (",params$stop_codons,") in the specified open reading frame of the ASV.
Additionally, ASVs that are not multiple of 3 in length were omitted.
",filter_codons_fasta_passed," ASVs passed the filtering.
Codon usage filter results can be found in folder [codon_filter](../codon_filter).
"))
```

```{r, eval = !isFALSE(params$filter_codons_stats), results='asis'}
# import stats tsv
filter_codons_stats <- read.table(file = params$filter_codons, header = TRUE, sep = "\t")
filter_codons_stats <- read.table(file = params$filter_codons_stats, header = TRUE, sep = "\t")
cat("The following table shows read counts for each sample after filtering:")
Expand All @@ -798,10 +806,6 @@ datatable(filter_codons_stats, options = list(
scrollX = TRUE,
scrollY = "300px",
paging = FALSE))
#TODO: add ASV count after filtering
cat("\n\nCodon usage filter results can be found in folder [codon_filter](../codon_filter).")
```

<!-- Section on taxonomic classification -->
Expand Down Expand Up @@ -1559,8 +1563,8 @@ if ( !isFALSE(params$filter_len_asv) && flag_filter_len_stats ) {
cat("were removed (",sum(filter_len_asv_filtered$Counts)," ASVs passed).")
}
```
```{r, eval = !isFALSE(params$filter_codons), results='asis'}
cat("ASVs with stop codons (",params$stop_codons,") or with a length not multiple of 3 in length were removed.")
```{r, eval = !isFALSE(params$filter_codons_fasta), results='asis'}
cat(filter_codons_fasta_passed,"ASVs with stop codons (",params$stop_codons,") or with a length not multiple of 3 were removed.")
```

```{r, eval = any_taxonomy, results='asis'}
Expand Down
32 changes: 19 additions & 13 deletions bin/filt_codons.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
dest="count",
type=argparse.FileType("r"),
help="Count table file of the ASVs from the AmpliSeq pipeline",
required=True,
required=False,
)

parser.add_argument(
Expand Down Expand Up @@ -115,21 +115,25 @@ def check_asv(seq, start, stop):

Out_Seq = open(args.prefix + "_filtered.fna", "w")
Out_list = open(args.prefix + "_filtered.list", "w")
Out_table = open(args.prefix + "_filtered.table.tsv", "w")
if args.count is not None:
Out_table = open(args.prefix + "_filtered.table.tsv", "w")
else:
Out_table = open("empty_" + args.prefix + "_filtered.table.tsv", "w")

count_dict = {}
p1 = re.compile("\t")
p2 = re.compile(">")

count = 0
for line in args.count:
line = line.rstrip("\n")
if count == 0:
print(line, file=Out_table)
count += 1
else:
tmp_list = re.split(p1, line)
count_dict[tmp_list[0]] = line
if args.count is not None:
count = 0
for line in args.count:
line = line.rstrip("\n")
if count == 0:
print(line, file=Out_table)
count += 1
else:
tmp_list = re.split(p1, line)
count_dict[tmp_list[0]] = line

for line in args.fasta:
line = line.rstrip("\n")
Expand All @@ -143,9 +147,11 @@ def check_asv(seq, start, stop):
if check_asv(line, begin, end):
print(">", bin_head, "\n", line, file=Out_Seq, sep="")
print(bin_head, file=Out_list)
print(count_dict[bin_head], file=Out_table)
if args.count is not None:
print(count_dict[bin_head], file=Out_table)

args.count.close()
if args.count is not None:
args.count.close()
args.fasta.close()
Out_Seq.close()
Out_list.close()
Expand Down
11 changes: 6 additions & 5 deletions modules/local/filter_codons.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,24 @@ process FILTER_CODONS {
input:
path(fasta)
path(asv)
path(dada2stats)

output:
path( "ASV_codon_filtered.table.tsv" ) , emit: asv
path( "ASV_codon_filtered.table.tsv" ) , emit: asv, optional: true
path( "ASV_codon_filtered.fna" ) , emit: fasta
path( "ASV_codon_filtered.list" ) , emit: list
path( "codon.filtered.stats.tsv" ) , emit: stats
path( "codon.filtered.stats.tsv" ) , emit: stats, optional: true
path( "versions.yml" ) , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def count_table = asv ? "-t ${asv}" : ""
def make_stats_cmd = asv ? "filt_codon_stats.py ASV_codon_filtered.table.tsv" : ""
"""
filt_codons.py -f ${fasta} -t ${asv} -p ASV_codon ${args}
filt_codon_stats.py ASV_codon_filtered.table.tsv
filt_codons.py -f ${fasta} ${count_table} -p ASV_codon ${args}
$make_stats_cmd
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
4 changes: 3 additions & 1 deletion modules/local/summary_report.nf
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ process SUMMARY_REPORT {
path(filter_ssu_asv)
path(filter_len_asv_stats)
path(filter_len_asv_len_orig)
path(filter_codons_fasta)
path(filter_codons_stats)
path(itsx_cutasv_summary)
path(dada2_tax)
Expand Down Expand Up @@ -105,7 +106,8 @@ process SUMMARY_REPORT {
filter_len_asv_len_orig ? "filter_len_asv_len_orig='$filter_len_asv_len_orig'" : "",
params.min_len_asv ? "min_len_asv=$params.min_len_asv" : "min_len_asv=0",
params.max_len_asv ? "max_len_asv=$params.max_len_asv" : "max_len_asv=0",
filter_codons_stats ? "filter_codons='$filter_codons_stats',stop_codons='$params.stop_codons'" : "",
filter_codons_fasta ? "filter_codons_fasta='$filter_codons_fasta',stop_codons='$params.stop_codons'" : "",
filter_codons_stats ? "filter_codons_stats='$filter_codons_stats'" : "",
itsx_cutasv_summary ? "itsx_cutasv_summary='$itsx_cutasv_summary',cut_its='$params.cut_its'" : "",
!dada2_tax ? "" :
params.dada_ref_tax_custom ? "dada2_taxonomy='$dada2_tax',flag_ref_tax_user=TRUE" :
Expand Down
3 changes: 2 additions & 1 deletion workflows/ampliseq.nf
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,7 @@ workflow AMPLISEQ {
// Modules : Filtering based on codons in an open reading frame
//
if (params.filter_codons ) {
FILTER_CODONS ( ch_dada2_fasta, ch_dada2_asv, ch_stats )
FILTER_CODONS ( ch_dada2_fasta, ch_dada2_asv.ifEmpty( [] ) )
ch_versions = ch_versions.mix(FILTER_CODONS.out.versions.ifEmpty(null))
MERGE_STATS_CODONS( ch_stats, FILTER_CODONS.out.stats )
ch_stats = MERGE_STATS_CODONS.out.tsv
Expand Down Expand Up @@ -804,6 +804,7 @@ workflow AMPLISEQ {
params.filter_ssu ? FILTER_SSU.out.fasta.ifEmpty( [] ) : [],
params.min_len_asv || params.max_len_asv ? FILTER_LEN_ASV.out.stats.ifEmpty( [] ) : [],
params.min_len_asv || params.max_len_asv ? FILTER_LEN_ASV.out.len_orig.ifEmpty( [] ) : [],
params.filter_codons ? FILTER_CODONS.out.fasta.ifEmpty( [] ) : [],
params.filter_codons ? FILTER_CODONS.out.stats.ifEmpty( [] ) : [],
params.cut_its != "none" ? ITSX_CUTASV.out.summary.ifEmpty( [] ) : [],
!params.skip_taxonomy && params.dada_ref_taxonomy && !params.skip_dada_taxonomy ? ch_dada2_tax.ifEmpty( [] ) : [],
Expand Down

0 comments on commit dc92556

Please sign in to comment.