Skip to content

Commit

Permalink
feat: prepared wf to run test data; removed unused conf options; snak…
Browse files Browse the repository at this point in the history
…efmt + linting
  • Loading branch information
m-jahn committed Jul 23, 2024
1 parent 4b31486 commit d351622
Show file tree
Hide file tree
Showing 11 changed files with 169 additions and 159 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,5 @@ resources/**
# Custom additions
Notes.md
.vscode/*
.snakemake-workflow-catalog.yml
.snakemake-workflow-catalog.yml
.test/results/*
69 changes: 32 additions & 37 deletions .test/config/config.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,4 @@

# optional: define output folder here
# default: "./results"
output: null

# define samplesheet here
samplesheet: "./test/config/samples.tsv"
samplesheet: "config/samples.tsv"

get_genome:
database: "ncbi"
Expand All @@ -28,18 +22,20 @@ star:
multi: 10
sam_multi: 1
intron_max: 1
default: [
"--readFilesCommand zcat ",
"--outSAMstrandField None ",
"--outSAMattributes All ",
"--outSAMattrIHstart 0 ",
"--outFilterType Normal ",
"--outFilterMultimapScoreRange 1 ",
"-o STARmappings ",
"--outSAMtype BAM Unsorted ",
"--outStd BAM_Unsorted ",
"--outMultimapperOrder Random ",
"--alignEndsType EndToEnd"]
default:
[
"--readFilesCommand zcat ",
"--outSAMstrandField None ",
"--outSAMattributes All ",
"--outSAMattrIHstart 0 ",
"--outFilterType Normal ",
"--outFilterMultimapScoreRange 1 ",
"-o STARmappings ",
"--outSAMtype BAM Unsorted ",
"--outStd BAM_Unsorted ",
"--outMultimapperOrder Random ",
"--alignEndsType EndToEnd",
]

extract_features:
biotypes: ["rRNA", "tRNA"]
Expand All @@ -54,26 +50,25 @@ deeptools:
normalize: "CPM"

annotate_orfs:
window_size: 30
sorf_max_length: 300
sorf_min_length: 45
orf_start_codon_table: 11
orf_stop_codon: ["TAA", "TAG", "TGA"]
orf_longest_only: False
window_size: 30
sorf_max_length: 300
sorf_min_length: 45
orf_start_codon_table: 11
orf_stop_codon: ["TAA", "TAG", "TGA"]
orf_longest_only: False

shift_reads:
window_size: 30
read_length: [27, 45]
# rpf_read_length: [30, 45]
# qti_read_length: [27, 45]
rnaseq_read_length: [0, 1000]
end_alignment: "3prime"
shift_table: "config/shift_table/shift_table.csv"
export_bam: False
export_bigwig: True
skip_shifting: False
skip_length_filter: True

window_size: 30
read_length: [27, 45]
# rpf_read_length: [30, 45]
# qti_read_length: [27, 45]
rnaseq_read_length: [0, 1000]
end_alignment: "3prime"
shift_table: "config/shift_table/shift_table.csv"
export_bam: False
export_bigwig: True
skip_shifting: False
skip_length_filter: True

multiqc:
config: "config/multiqc_config.yml"
2 changes: 2 additions & 0 deletions .test/config/multiqc_config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
remove_sections:
- samtools-stats
5 changes: 2 additions & 3 deletions .test/config/samples.tsv
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
sample condition replicate lib_prep data_folder fq1
RPF-RTP1 RPF-RTP 1 mpusp .test/data RPF-RTP1_R1_001.fastq.gz
RPF-RTP2 RPF-RTP 2 mpusp .test/data RPF-RTP2_R1_001.fastq.gz

RPF-RTP1 RPF-RTP 1 mpusp data RPF-RTP1_R1_001.fastq.gz
RPF-RTP2 RPF-RTP 2 mpusp data RPF-RTP2_R1_001.fastq.gz
20 changes: 20 additions & 0 deletions .test/config/shift_table/shift_table.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
fraction,offsets_start
27,-11
28,-12
29,-13
30,-14
31,-15
32,-16
33,-17
34,-18
35,-19
36,-20
37,-21
38,-22
39,-23
40,-24
41,-25
42,-26
43,-27
44,-28
45,-29
67 changes: 31 additions & 36 deletions config/config.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,3 @@

# optional: define output folder here
# default: "./results"
output: null

# define samplesheet here
samplesheet: "config/samples.tsv"

get_genome:
Expand All @@ -28,18 +22,20 @@ star:
multi: 10
sam_multi: 1
intron_max: 1
default: [
"--readFilesCommand zcat ",
"--outSAMstrandField None ",
"--outSAMattributes All ",
"--outSAMattrIHstart 0 ",
"--outFilterType Normal ",
"--outFilterMultimapScoreRange 1 ",
"-o STARmappings ",
"--outSAMtype BAM Unsorted ",
"--outStd BAM_Unsorted ",
"--outMultimapperOrder Random ",
"--alignEndsType EndToEnd"]
default:
[
"--readFilesCommand zcat ",
"--outSAMstrandField None ",
"--outSAMattributes All ",
"--outSAMattrIHstart 0 ",
"--outFilterType Normal ",
"--outFilterMultimapScoreRange 1 ",
"-o STARmappings ",
"--outSAMtype BAM Unsorted ",
"--outStd BAM_Unsorted ",
"--outMultimapperOrder Random ",
"--alignEndsType EndToEnd",
]

extract_features:
biotypes: ["rRNA", "tRNA"]
Expand All @@ -54,26 +50,25 @@ deeptools:
normalize: "CPM"

annotate_orfs:
window_size: 30
sorf_max_length: 300
sorf_min_length: 45
orf_start_codon_table: 11
orf_stop_codon: ["TAA", "TAG", "TGA"]
orf_longest_only: False
window_size: 30
sorf_max_length: 300
sorf_min_length: 45
orf_start_codon_table: 11
orf_stop_codon: ["TAA", "TAG", "TGA"]
orf_longest_only: False

shift_reads:
window_size: 30
read_length: [27, 45]
# rpf_read_length: [30, 45]
# qti_read_length: [27, 45]
rnaseq_read_length: [0, 1000]
end_alignment: "3prime"
shift_table: "config/shift_table/shift_table.csv"
export_bam: False
export_bigwig: True
skip_shifting: False
skip_length_filter: True

window_size: 30
read_length: [27, 45]
# rpf_read_length: [30, 45]
# qti_read_length: [27, 45]
rnaseq_read_length: [0, 1000]
end_alignment: "3prime"
shift_table: "config/shift_table/shift_table.csv"
export_bam: False
export_bigwig: True
skip_shifting: False
skip_length_filter: True

multiqc:
config: "config/multiqc_config.yml"
23 changes: 13 additions & 10 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ import pandas as pd
from datetime import date
from snakemake.utils import min_version

#min_version("7.0")
# min_version("7.0")

__author__ = "Rina Ahmed-Begrich, Michael Jahn"
__year__ = str(date.today()).split('-')[0]
__year__ = str(date.today()).split("-")[0]

bold = "\033[1m"
green = "\033[92m"
Expand All @@ -27,13 +27,14 @@ msg = f"""{cyan}Bacterial-Riboseq: A Snakemake workflow
for the analysis of riboseq data in bacteria.{end}
"""

epilog=f"""
epilog = f"""
{cyan}Written by {__author__}.
Max Planck Unit for the Science of Pathogens. Copyright (c) {__year__}.
Copyright Holder All Rights Reserved.{end}
"""


# load configuration
# -----------------------------------------------------
configfile: "config/config.yml"
Expand All @@ -51,35 +52,37 @@ include: "rules/postprocessing.smk"
shell.executable("bash")
shell.prefix(f"set -eo pipefail; ")

if config.get('output') is None:
config['output'] = os.path.join(os.getcwd(), "./results")

onstart:
print("\n--- Analysis started...\n")
print()
print("--- Analysis parameters --------------------------------------------\n")
print(f"Current working directory: {os.path.join(os.getcwd())}")
print(f"Output directory:", {config['output']})
print(f"Current working directory: {os.getcwd()}")
print(f"Output directory: {os.path.join(os.getcwd(), 'results')}")
print()
print(f"Riboseq samples: {list(samples.index)}")
print()


onsuccess:
print()
print(msg)
print(epilog)
print("--- Workflow finished, no error! -----------------------------------")
print()
debug = os.path.join(config['output'], "workflow.log")
shell("cat {log} > {debug} && echo -e '\nWorkflow finished, no error!\n' >> {debug}")
debug = os.path.join(os.getcwd(), "results/workflow.log")
shell(
"cat {log} > {debug} && echo -e '\nWorkflow finished, no error!\n' >> {debug}"
)


onerror:
print()
print(msg)
print(epilog)
print("--- An error occurred! ---------------------------------------------")
print()
error = os.path.join(config['output'], "error.log")
error = os.path.join(os.getcwd(), "results/error.log")
shell("cat {log} > {error} && echo -e '\nAn error occurred!' >> {error}")


Expand Down
45 changes: 25 additions & 20 deletions workflow/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -10,47 +10,52 @@ samples = (
.sort_index()
)

#TODO: write validation schema
# TODO: write validation schema
# validate(SAMPLES, schema="../config/schemas/samples.schema.yml")


def get_final_output():
targets = []
targets.append("results/multiqc/multiqc_report.html")
targets.append(
expand("results/{mapping_status}/length_dist/{sample}_length_dist.tsv",
expand(
"results/{mapping_status}/length_dist/{sample}_length_dist.tsv",
mapping_status=["mapped", "deduplicated", "filtered_bam"],
sample=samples.index)
sample=samples.index,
)
)
targets.append("results/get_genome/mRNA_features.gff")
targets.append(
expand("results/shift_reads/{sample}_shift.csv",
sample=samples.index)
expand("results/shift_reads/{sample}_shift.csv", sample=samples.index)
)
return targets


# get fastq files
def get_fastq(wildcards):
if wildcards.status == 'raw':
if wildcards.status == "raw":
return expand(
"{input_dir}/{sample}",
input_dir=samples.loc[wildcards.sample]["data_folder"],
sample=samples.loc[wildcards.sample]["fq1"])
if wildcards.status == 'clipped':
return expand(
"results/clipped/{sample}.fastq.gz",
sample=samples.index)
sample=samples.loc[wildcards.sample]["fq1"],
)
if wildcards.status == "clipped":
return expand("results/clipped/{sample}.fastq.gz", sample=samples.index)


# get bam files
def get_bam(wildcards):
if wildcards.mapping_status == 'mapped':
return expand(os.path.join("results", "mapped", "{sample}.bam"),
sample=wildcards.sample)
if wildcards.mapping_status == 'deduplicated':
return expand(os.path.join("results", "deduplicated", "{sample}.bam"),
sample=wildcards.sample)
if wildcards.mapping_status == 'filtered_bam':
return expand(os.path.join("results", "filtered_bam", "{sample}.bam"),
sample=wildcards.sample)
if wildcards.mapping_status == "mapped":
return expand(
os.path.join("results", "mapped", "{sample}.bam"), sample=wildcards.sample
)
if wildcards.mapping_status == "deduplicated":
return expand(
os.path.join("results", "deduplicated", "{sample}.bam"),
sample=wildcards.sample,
)
if wildcards.mapping_status == "filtered_bam":
return expand(
os.path.join("results", "filtered_bam", "{sample}.bam"),
sample=wildcards.sample,
)
3 changes: 2 additions & 1 deletion workflow/rules/postprocessing.smk
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Riboseq postprocessing: #
# ----------------------------------------------------- #


# module to extract selected biotypes from gff file
# -----------------------------------------------------
rule extract_mRNA_features:
Expand Down Expand Up @@ -60,7 +61,7 @@ rule shift_reads:
"""--- Shifting Ribo-Seq reads."""
params:
config["shift_reads"],
threads: workflow.cores,
threads: workflow.cores
log:
path="results/shift_reads/log/{sample}.log",
script:
Expand Down
Loading

0 comments on commit d351622

Please sign in to comment.