Skip to content

Commit 28bfdf3

Browse files
committed
Tidy up tmp outputs throughout and capture output
- Capture all tool output into log as this overwhelms the terminal when running lots of samples/cores - Closes #17 by removing temporary output at the end of each tool invocation instead of at the end of the workflow. - This reduces per-sample hard-drive usage by 20x
1 parent 8a3634b commit 28bfdf3

17 files changed

+59
-44
lines changed

Snakefile

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,6 @@ def _get_seqdir(wildcards):
1616
return os.path.dirname(samples.loc[(wildcards.sample), ["assembly"]].dropna()[0])
1717

1818
rule all:
19-
input:
20-
"pipeline_finished.txt"
21-
22-
rule cleanup:
2319
input:
2420
expand("results/{sample}/amrplusplus/{amrplusplus_outputs}", sample=samples.index, amrplusplus_outputs=amrplusplus_exts),
2521
expand("results/{sample}/rgi/rgi.json", sample=samples.index),
@@ -32,18 +28,11 @@ rule cleanup:
3228
expand("results/{sample}/resfams/resfams.tblout", sample=samples.index),
3329
expand("results/{sample}/mykrobe/report.json", sample=samples.index),
3430
expand("results/{sample}/resfinder/data_resfinder.json", sample=samples.index),
35-
expand("results/{sample}/srax/Results/sraX_analysis.html", sample=samples.index),
31+
expand("results/{sample}/srax/sraX_analysis.html", sample=samples.index),
3632
expand("results/{sample}/sstar/report.tsv", sample=samples.index),
3733
expand("results/{sample}/kmerresistance/results.KmerRes", sample=samples.index),
3834
expand("results/{sample}/deeparg/output.mapping.ARG", sample=samples.index),
3935
#expand("results/{sample}/srst2/srst2__fullgenes__ResFinder__results.txt", sample=samples.index)
40-
output:
41-
"pipeline_finished.txt"
42-
shell:
43-
"""
44-
rm -r results/*/groot/graphs results/*/deeparg/*.fasta results/*/amrplusplus/tmp results/*/staramr/hits/ results/*/ariba/*.gz results/*/srax/tmp results/*/mykrobe/skels || echo "tempfiles already absent"
45-
touch pipeline_finished.txt
46-
"""
4736

4837
#include: "rules/srst2.smk"
4938
include: "rules/deeparg.smk"

rules/abricate.smk

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,5 +17,5 @@ rule run_abricate:
1717
shell:
1818
"""
1919
abricate --list > {log}
20-
abricate --threads {threads} --nopath --db {params.dbname} --minid {params.minid} --mincov {params.mincov} {input.contigs} > {output.report} 2> >(tee -a {log} >&2)
20+
abricate --threads {threads} --nopath --db {params.dbname} --minid {params.minid} --mincov {params.mincov} {input.contigs} > {output.report} > {log} 2>&1
2121
"""

rules/amrfinder.smk

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,12 @@ rule run_amrfinder:
2020
conda:
2121
"../envs/amrfinder.yaml"
2222
params:
23-
organism = config["params"]["amrfinder"]["organism"]
23+
organism = config["params"]["amrfinder"]["organism"],
24+
output_tmp_dir = "results/{sample}/amrfinder/tmp"
2425
threads:
2526
config["params"]["threads"]
2627
shell:
27-
"amrfinder -n {input.contigs} -o {output.report} -O {params.organism} -d {input.db}/latest 2> >(tee {log} >&2) "
28+
"""
29+
amrfinder -n {input.contigs} -o {output.report} -O {params.organism} -d {input.db}/latest >{log} 2>&1
30+
rm -rf {params.output_tmp_dir}
31+
"""

rules/amrplusplus.smk

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,10 @@ rule run_amrplusplus:
6666
shell:
6767
"""
6868
mkdir -p {params.output_prefix_tmp}
69-
trimmomatic PE {input.read1} {input.read2} {params.output_prefix_tmp}/{wildcards.sample}_r1_pe_trimmed.fq {params.output_prefix_tmp}/{wildcards.sample}_r1_se_trimmed.fq {params.output_prefix_tmp}/{wildcards.sample}_r2_pe_trimmed.fq {params.output_prefix_tmp}/{wildcards.sample}_r2_se_trimmed.fq SLIDINGWINDOW:4:15 LEADING:3 TRAILING:3 MINLEN:36 2> >(tee {log} >&2)
70-
bwa mem {input.megares_db} {params.output_prefix_tmp}/{wildcards.sample}_r1_pe_trimmed.fq {params.output_prefix_tmp}/{wildcards.sample}_r2_pe_trimmed.fq | samtools sort -n -O sam > {params.output_prefix_tmp}/{wildcards.sample}.sam 2> >(tee -a {log} >&2)
71-
{input.resistome_tool} -ref_fp {input.megares_db} -annot_fp {input.megares_annot} -sam_fp {params.output_prefix_tmp}/{wildcards.sample}.sam -gene_fp {output.amr_gene} -group_fp {output.amr_group} -class_fp {output.amr_class} -mech_fp {output.amr_mech} -t 80 2> >(tee -a {log} >&2)
72-
{input.rarefaction_tool} -ref_fp {input.megares_db} -annot_fp {input.megares_annot} -sam_fp {params.output_prefix_tmp}/{wildcards.sample}.sam -gene_fp {output.amr_gene}_rare -group_fp {output.amr_group}_rare -class_fp {output.amr_class}_rare -mech_fp {output.amr_mech}_rare -min 5 -max 100 -skip 5 -samples 1 -t 80 2> >(tee -a {log} >&2)
73-
{input.snp_tool} -amr_fp {input.megares_db} -sampe {params.output_prefix_tmp}/{wildcards.sample}.sam -out_fp {output.amr_snps} 2> >(tee -a {log} >&2)
69+
trimmomatic PE {input.read1} {input.read2} {params.output_prefix_tmp}/{wildcards.sample}_r1_pe_trimmed.fq {params.output_prefix_tmp}/{wildcards.sample}_r1_se_trimmed.fq {params.output_prefix_tmp}/{wildcards.sample}_r2_pe_trimmed.fq {params.output_prefix_tmp}/{wildcards.sample}_r2_se_trimmed.fq SLIDINGWINDOW:4:15 LEADING:3 TRAILING:3 MINLEN:36 >{log} 2>&1
70+
bwa mem {input.megares_db} {params.output_prefix_tmp}/{wildcards.sample}_r1_pe_trimmed.fq {params.output_prefix_tmp}/{wildcards.sample}_r2_pe_trimmed.fq 2>> {log} | samtools sort -n -O sam > {params.output_prefix_tmp}/{wildcards.sample}.sam 2>>{log}
71+
{input.resistome_tool} -ref_fp {input.megares_db} -annot_fp {input.megares_annot} -sam_fp {params.output_prefix_tmp}/{wildcards.sample}.sam -gene_fp {output.amr_gene} -group_fp {output.amr_group} -class_fp {output.amr_class} -mech_fp {output.amr_mech} -t 80 >>{log} 2>&1
72+
{input.rarefaction_tool} -ref_fp {input.megares_db} -annot_fp {input.megares_annot} -sam_fp {params.output_prefix_tmp}/{wildcards.sample}.sam -gene_fp {output.amr_gene}_rare -group_fp {output.amr_group}_rare -class_fp {output.amr_class}_rare -mech_fp {output.amr_mech}_rare -min 5 -max 100 -skip 5 -samples 1 -t 80 >>{log} 2>&1
73+
{input.snp_tool} -amr_fp {input.megares_db} -sampe {params.output_prefix_tmp}/{wildcards.sample}.sam -out_fp {output.amr_snps} >>{log} 2>&1
74+
rm -rf {params.output_prefix_tmp}
7475
"""

rules/ariba.smk

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,11 @@ rule run_ariba:
2525
"logs/ariba_{sample}.log"
2626
conda:
2727
"../envs/ariba.yaml"
28-
threads:
29-
config["params"]["threads"]
28+
threads: 1
3029
params:
3130
output_folder = "results/{sample}/ariba/"
3231
shell:
3332
"""
34-
rm -r {params.output_folder};
35-
ariba run --threads 1 {input.ref_db} {input.read1} {input.read2} {params.output_folder} 2> >(tee {log} >&2)
33+
rm -r {params.output_folder}
34+
ariba run --threads {threads} {input.ref_db} {input.read1} {input.read2} {params.output_folder} > {log} 2>&1
3635
"""

rules/deeparg.smk

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,14 @@ rule run_deeparg:
44
output:
55
report = "results/{sample}/deeparg/output.mapping.ARG",
66
report_potential = "results/{sample}/deeparg/output.mapping.potential.ARG"
7+
log:
8+
"logs/amrfinder_{sample}.log"
79
singularity:
810
"docker://gaarangoa/deeparg:v1.0.1"
911
shell:
1012
"""
11-
python /deeparg/deepARG.py --align --type nucl --reads --input /data/results/{wildcards.sample}/deeparg/reads.fasta --output /data/results/{wildcards.sample}/deeparg/output
13+
python /deeparg/deepARG.py --align --type nucl --reads --input /data/results/{wildcards.sample}/deeparg/reads.fasta --output /data/results/{wildcards.sample}/deeparg/output > {log} 2>&1
14+
rm /data/results/{wildcards.sample}/deeparg/reads.fasta
1215
"""
1316

1417
rule prepare_deeparg_reads:

rules/groot.smk

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,4 +36,7 @@ rule run_groot:
3636
max_read_length = config['params']['groot']['read_length'] + 5,
3737
graph_dir = "results/{sample}/groot/graphs"
3838
shell:
39-
"zcat {input.read1} {input.read2} | seqkit seq --min-len {params.min_read_length} --max-len {params.max_read_length} | groot align -g {params.graph_dir} -p {threads} -i {input.db_index} --log {log} | groot report --log {log} > {output.report}"
39+
"""
40+
zcat {input.read1} {input.read2} | seqkit seq --min-len {params.min_read_length} --max-len {params.max_read_length} | groot align -g {params.graph_dir} -p {threads} -i {input.db_index} --log {log} | groot report --log {log} > {output.report}
41+
rm -rf {params.graph_dir}
42+
"""

rules/kmerresistance.smk

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,6 @@ rule run_kmerresistance:
5151
shell:
5252
"""
5353
zcat {input.read1} {input.read2} > {params.output_folder}/temp_all_reads.fq
54-
kmerresistance -i {params.output_folder}/temp_all_reads.fq -t_db {params.kma_resfinder_db} -s_db {params.species_db} -o {params.output_folder}/results 2> >(tee {log} >&2)
54+
kmerresistance -i {params.output_folder}/temp_all_reads.fq -t_db {params.kma_resfinder_db} -s_db {params.species_db} -o {params.output_folder}/results > {log} 2>&1
5555
rm {params.output_folder}/temp_all_reads.fq
5656
"""

rules/mykrobe.smk

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@ rule run_mykrobe:
1313
config["params"]["threads"]
1414
params:
1515
tmp = "results/{sample}/mykrobe/tmp/",
16-
skel_dir = "results/{sample}/mykrobe/skels"
16+
skel_dir = "results/{sample}/mykrobe/skels",
17+
tmp_dir = "results/{sample}/mykrobe/tmp"
1718
shell:
18-
"mykrobe predict {wildcards.sample} tb -1 {input.read1} {input.read2} --skeleton_dir {params.skel_dir} --threads {threads} --format json --output {output.report} --tmp {params.tmp} 2> >(tee {log} >&2) "
19+
"""
20+
mykrobe predict {wildcards.sample} tb -1 {input.read1} {input.read2} --skeleton_dir {params.skel_dir} --threads {threads} --format json --output {output.report} --tmp {params.tmp} > {log} 2>&1
21+
rm -rf {params.skel_dir} {params.tmp_dir}
22+
"""

rules/pointfinder.smk

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,10 @@ rule run_pointfinder:
3838
config["params"]["threads"]
3939
params:
4040
species = config["params"]["pointfinder"]["species"],
41-
41+
output_tmp_dir = "results/{sample}/pointfinder/tmp"
4242
shell:
4343
"""
44-
python {input.pointfinder_script} -i {input.contigs} -p {input.pointfinder_db} -s {params.species} -m blastn -m_p $(which blastn) -o results/{wildcards.sample}/pointfinder 2> >(tee {log} >&2)
44+
python {input.pointfinder_script} -i {input.contigs} -p {input.pointfinder_db} -s {params.species} -m blastn -m_p $(which blastn) -o results/{wildcards.sample}/pointfinder > {log} 2>&1
4545
cp {output.raw_report} {output.report}
46+
rm -rf {params.output_tmp_dir}
4647
"""

0 commit comments

Comments
 (0)