Skip to content

Commit

Permalink
fix: remove sORF related steps
Browse files Browse the repository at this point in the history
  • Loading branch information
m-jahn committed Jul 29, 2024
1 parent dcc8a1a commit 1c2ff00
Show file tree
Hide file tree
Showing 3 changed files with 2 additions and 21 deletions.
5 changes: 0 additions & 5 deletions .test/config/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,6 @@ deeptools:

annotate_orfs:
window_size: 30
sorf_max_length: 300
sorf_min_length: 45
orf_start_codon_table: 11
orf_stop_codon: ["TAA", "TAG", "TGA"]
orf_longest_only: False

shift_reads:
window_size: 30
Expand Down
5 changes: 0 additions & 5 deletions config/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,6 @@ deeptools:

annotate_orfs:
window_size: 30
sorf_max_length: 300
sorf_min_length: 45
orf_start_codon_table: 11
orf_stop_codon: ["TAA", "TAG", "TGA"]
orf_longest_only: False

shift_reads:
window_size: 30
Expand Down
13 changes: 2 additions & 11 deletions workflow/scripts/annotate_orfs.R
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,7 @@ seqinfo(genome_dna) <- seqinfo(txdb$result)
list_cds <- loadRegion(txdb$result, "cds", by = "tx")
list_tx <- loadRegion(txdb$result, "mrna", by = "tx")

# filter out ORFs that
# - are below a certain size threshold = annotated sORFs
# - have no old_locus_tag = predicted new sORFs
# - are only predicted with this pipeline = completely new sORFs
# parse genome gff file GFF
df_gff <- genome_gff %>%
read_tsv(
comment = "#",
Expand All @@ -118,9 +115,6 @@ df_gff <- genome_gff %>%
) %>%
filter(name %in% names(list_cds))

list_cds <- list_cds[filter(df_gff, width > sorf_max_length)$name]
list_tx <- list_tx[filter(df_gff, width > sorf_max_length)$name]

# make leader and start codon regions
list_leader <- startCodons(list_cds) %>%
extendLeaders(extension = window_size)
Expand All @@ -136,10 +130,7 @@ df_annotated_orfs <- list_cds %>%
mutate(
sequence = as.character(list_cds_seq),
start_codon = str_sub(sequence, 1, 3),
stop_codon = str_sub(sequence, -3, -1),
intergenic = FALSE,
intragenic = FALSE,
partial_overlap = FALSE
stop_codon = str_sub(sequence, -3, -1)
)

# export results
Expand Down

0 comments on commit 1c2ff00

Please sign in to comment.