From 1c2ff00c16ba8239e63d08b5d70b325bd73f218b Mon Sep 17 00:00:00 2001 From: jahn Date: Mon, 29 Jul 2024 10:07:08 +0200 Subject: [PATCH] fix: remove sORF related steps --- .test/config/config.yml | 5 ----- config/config.yml | 5 ----- workflow/scripts/annotate_orfs.R | 13 ++----------- 3 files changed, 2 insertions(+), 21 deletions(-) diff --git a/.test/config/config.yml b/.test/config/config.yml index 7da85ae..1993861 100644 --- a/.test/config/config.yml +++ b/.test/config/config.yml @@ -58,11 +58,6 @@ deeptools: annotate_orfs: window_size: 30 - sorf_max_length: 300 - sorf_min_length: 45 - orf_start_codon_table: 11 - orf_stop_codon: ["TAA", "TAG", "TGA"] - orf_longest_only: False shift_reads: window_size: 30 diff --git a/config/config.yml b/config/config.yml index 7da85ae..1993861 100644 --- a/config/config.yml +++ b/config/config.yml @@ -58,11 +58,6 @@ deeptools: annotate_orfs: window_size: 30 - sorf_max_length: 300 - sorf_min_length: 45 - orf_start_codon_table: 11 - orf_stop_codon: ["TAA", "TAG", "TGA"] - orf_longest_only: False shift_reads: window_size: 30 diff --git a/workflow/scripts/annotate_orfs.R b/workflow/scripts/annotate_orfs.R index b1ee725..fc125c7 100644 --- a/workflow/scripts/annotate_orfs.R +++ b/workflow/scripts/annotate_orfs.R @@ -96,10 +96,7 @@ seqinfo(genome_dna) <- seqinfo(txdb$result) list_cds <- loadRegion(txdb$result, "cds", by = "tx") list_tx <- loadRegion(txdb$result, "mrna", by = "tx") -# filter out ORFs that -# - are below a certain size threshold = annotated sORFs -# - have no old_locus_tag = predicted new sORFs -# - are only predicted with this pipeline = completely new sORFs +# parse genome gff file GFF df_gff <- genome_gff %>% read_tsv( comment = "#", @@ -118,9 +115,6 @@ df_gff <- genome_gff %>% ) %>% filter(name %in% names(list_cds)) -list_cds <- list_cds[filter(df_gff, width > sorf_max_length)$name] -list_tx <- list_tx[filter(df_gff, width > sorf_max_length)$name] - # make leader and start codon regions list_leader <- startCodons(list_cds) %>% extendLeaders(extension = window_size) @@ -136,10 +130,7 @@ df_annotated_orfs <- list_cds %>% mutate( sequence = as.character(list_cds_seq), start_codon = str_sub(sequence, 1, 3), - stop_codon = str_sub(sequence, -3, -1), - intergenic = FALSE, - intragenic = FALSE, - partial_overlap = FALSE + stop_codon = str_sub(sequence, -3, -1) ) # export results