22
22
import click
23
23
from colorhash import ColorHash
24
24
25
- from utils import crw , rfam , ribovision , gtrnadb , config , generate_model_info , shared
25
+ from utils import crw , rfam , ribovision , gtrnadb , config , shared
26
26
from utils import generate_model_info as gmi
27
27
from utils import list_models as lm
28
28
from utils import generate_cm_library as gcl
@@ -38,21 +38,21 @@ def get_ribotyper_output(fasta_input, output_folder, cm_library, skip_ribovore_f
38
38
output_folder , os .path .basename (output_folder ) + ".ribotyper.long.out"
39
39
)
40
40
if not os .path .exists (ribotyper_long_out ):
41
- cmd = "ribotyper.pl --skipval -i {cm_library}/modelinfo.txt -f {fasta_input} {output_folder}" .format (
42
- cm_library = cm_library , fasta_input = fasta_input , output_folder = output_folder
43
- )
41
+ cmd = f"ribotyper.pl --skipval -i { cm_library } /modelinfo.txt -f { fasta_input } { output_folder } "
44
42
print (cmd )
45
43
os .system (cmd )
46
44
f_out = os .path .join (output_folder , "hits.txt" )
47
45
if not skip_ribovore_filters :
48
46
cmd = (
49
- "cat %s | grep -v '^#' | grep -v MultipleHits | grep PASS | awk -v OFS='\t ' '{print $2, $8, $3}' > %s"
50
- % (ribotyper_long_out , f_out )
47
+ f"cat { ribotyper_long_out } | grep -v '^#' | "
48
+ f"grep -v MultipleHits | grep PASS | "
49
+ f"awk -v OFS='\t ' '{{print $2, $8, $3}}' > { f_out } "
51
50
)
52
51
else :
53
52
cmd = (
54
- "cat %s | grep -v '^#' | grep -v NoHits | awk -v OFS='\t ' '{print $2, $8, $3}' > %s"
55
- % (ribotyper_long_out , f_out )
53
+ f"cat { ribotyper_long_out } | grep -v '^#' "
54
+ f"| grep -v NoHits | "
55
+ f"awk -v OFS='\t ' '{{print $2, $8, $3}}' > { f_out } "
56
56
)
57
57
os .system (cmd )
58
58
return f_out
@@ -121,7 +121,7 @@ def get_seq_ids(input_fasta):
121
121
Get a list of sequence ids from a fasta file.
122
122
"""
123
123
seq_ids = set ()
124
- with open (input_fasta , "r" ) as f_in :
124
+ with open (input_fasta , "r" , encoding = "utf-8" ) as f_in :
125
125
for line in f_in :
126
126
if line .startswith (">" ):
127
127
match = re .search (r">(.*?)\s" , line )
@@ -138,7 +138,7 @@ def get_hits(folder):
138
138
hits_file = os .path .join (folder , "hits.txt" )
139
139
if not os .path .exists (hits_file ):
140
140
return hits
141
- with open (hits_file , "r" ) as f_in :
141
+ with open (hits_file , "r" , encoding = "utf-8" ) as f_in :
142
142
for line in f_in :
143
143
hits .add (line .split ("\t " )[0 ])
144
144
return hits
@@ -150,9 +150,9 @@ def get_subset_fasta(fasta_input, output_filename, seq_ids):
150
150
from <fasta_input>.
151
151
"""
152
152
index_filename = output_filename + ".txt"
153
- with open (index_filename , "w" ) as f_out :
153
+ with open (index_filename , "w" , encoding = "utf-8" ) as f_out :
154
154
for seq_id in seq_ids :
155
- f_out .write (seq_id + " \n " )
155
+ f_out .write (f" { seq_id } \n " )
156
156
cmd = f"esl-sfetch -o { output_filename } -f { fasta_input } { index_filename } "
157
157
os .system (cmd )
158
158
os .system ("esl-sfetch --index " + output_filename )
@@ -233,8 +233,9 @@ def draw(
233
233
skip_ribovore_filters ,
234
234
),
235
235
"r" ,
236
- ) as f :
237
- for line in f .readlines ():
236
+ encoding = "utf-8" ,
237
+ ) as f_ribotyper :
238
+ for line in f_ribotyper .readlines ():
238
239
rnacentral_id , model_id , _ = line .split ("\t " )
239
240
ribovision .visualise (
240
241
"rfam" ,
@@ -346,7 +347,6 @@ def draw(
346
347
"RF00005" ,
347
348
output_folder ,
348
349
subset_fasta ,
349
- False ,
350
350
constraint ,
351
351
exclusion ,
352
352
fold_type ,
@@ -371,6 +371,7 @@ def draw(
371
371
372
372
373
373
def organise_results (results_folder , output_folder ):
374
+ """Move files to the final folder structure."""
374
375
destination = os .path .join (output_folder , "results" )
375
376
svg_folder = os .path .join (destination , "svg" )
376
377
thumbnail_folder = os .path .join (destination , "thumbnail" )
@@ -386,11 +387,12 @@ def organise_results(results_folder, output_folder):
386
387
os .system (f"mkdir -p { folder } " )
387
388
388
389
svgs = glob .glob (os .path .join (results_folder , "*.colored.svg" ))
389
- if len ( svgs ) :
390
+ if svgs :
390
391
for svg in svgs :
391
- with open (svg , "r" ) as f_svg :
392
+ with open (svg , "r" , encoding = "utf-8" ) as f_svg :
392
393
thumbnail = generate_thumbnail (f_svg .read (), svg )
393
- with open (svg .replace (".colored." , ".thumbnail." ), "w" ) as f_thumbnail :
394
+ thumbnail_filename = svg .replace (".colored." , ".thumbnail." )
395
+ with open (thumbnail_filename , "w" , encoding = "utf-8" ) as f_thumbnail :
394
396
f_thumbnail .write (thumbnail )
395
397
os .system (f"mv { results_folder } /*.colored.svg { svg_folder } " )
396
398
os .system (f"mv { results_folder } /*.thumbnail.svg { thumbnail_folder } " )
@@ -403,7 +405,6 @@ def gtrnadb_group():
403
405
"""
404
406
Use tRNA templates for structure visualisation.
405
407
"""
406
- pass
407
408
408
409
409
410
@gtrnadb_group .command ("setup" )
@@ -486,7 +487,6 @@ def rnasep_group():
486
487
"""
487
488
Use RNAse P templates for structure visualisation.
488
489
"""
489
- pass
490
490
491
491
492
492
@rnasep_group .command ("draw" )
@@ -513,8 +513,9 @@ def rnasep_draw(
513
513
fasta_input , output_folder , config .RNASEP_CM_LIBRARY , skip_ribovore_filters
514
514
),
515
515
"r" ,
516
- ) as f :
517
- for line in f .readlines ():
516
+ encoding = "utf-8" ,
517
+ ) as f_ribotyper :
518
+ for line in f_ribotyper .readlines ():
518
519
rnacentral_id , model_id , _ = line .split ("\t " )
519
520
ribovision .visualise (
520
521
"rnasep" ,
@@ -533,7 +534,6 @@ def crw_group():
533
534
"""
534
535
Use CRW templates for structure visualisation.
535
536
"""
536
- pass
537
537
538
538
539
539
@crw_group .command ("draw" )
@@ -560,8 +560,9 @@ def rrna_draw(
560
560
fasta_input , output_folder , config .CRW_CM_LIBRARY , skip_ribovore_filters
561
561
),
562
562
"r" ,
563
- ) as f :
564
- for line in f .readlines ():
563
+ encoding = "utf-8" ,
564
+ ) as f_ribotyper :
565
+ for line in f_ribotyper .readlines ():
565
566
rnacentral_id , model_id , _ = line .split ("\t " )
566
567
ribovision .visualise (
567
568
"crw" ,
@@ -580,7 +581,6 @@ def ribovision_group():
580
581
"""
581
582
Use RiboVision templates for structure visualisation.
582
583
"""
583
- pass
584
584
585
585
586
586
@ribovision_group .command ("draw_lsu" )
@@ -610,8 +610,9 @@ def ribovision_draw_lsu(
610
610
skip_ribovore_filters ,
611
611
),
612
612
"r" ,
613
- ) as f :
614
- for line in f .readlines ():
613
+ encoding = "utf-8" ,
614
+ ) as f_ribotyper :
615
+ for line in f_ribotyper .readlines ():
615
616
rnacentral_id , model_id , _ = line .split ("\t " )
616
617
ribovision .visualise (
617
618
"lsu" ,
@@ -652,8 +653,9 @@ def ribovision_draw_ssu(
652
653
skip_ribovore_filters ,
653
654
),
654
655
"r" ,
655
- ) as f :
656
- for line in f .readlines ():
656
+ encoding = "utf-8" ,
657
+ ) as f_ribotyper :
658
+ for line in f_ribotyper .readlines ():
657
659
rnacentral_id , model_id , _ = line .split ("\t " )
658
660
ribovision .visualise (
659
661
"ssu" ,
@@ -672,7 +674,6 @@ def rfam_group():
672
674
"""
673
675
Use Rfam templates for structure visualisation.
674
676
"""
675
- pass
676
677
677
678
678
679
@rfam_group .command ("blacklisted" )
@@ -732,26 +733,27 @@ def rfam_validate(rfam_accession, output):
732
733
be output to the given file, otherwise it will not.
733
734
"""
734
735
if rfam_accession not in rfam .blacklisted ():
735
- output .write (rfam_accession + " \n " )
736
+ output .write (f" { rfam_accession } \n " )
736
737
737
738
738
739
def generate_thumbnail (image , description ):
740
+ """Generate a thumbnail SVG as an outline of the 2D diagram."""
739
741
move_to_start_position = None
740
742
color = ColorHash (description ).hex
741
743
points = []
742
- for i , line in enumerate (image .split ("\n " )):
744
+ for _ , line in enumerate (image .split ("\n " )):
743
745
if "width" in line and not "stroke-width" in line :
744
746
width = re .findall (r'width="(\d+(\.\d+)?)"' , line )
745
747
if "height" in line :
746
748
height = re .findall (r'height="(\d+(\.\d+)?)"' , line )
747
- for nt in re .finditer (
748
- '<text x="(\d+)(\.\d+)?" y="(\d+)(\.\d+)?".*?</text>' , line
749
+ for nt_block in re .finditer (
750
+ r '<text x="(\d+)(\.\d+)?" y="(\d+)(\.\d+)?".*?</text>' , line
749
751
):
750
- if "numbering-label" in nt .group (0 ):
752
+ if "numbering-label" in nt_block .group (0 ):
751
753
continue
752
754
if not move_to_start_position :
753
- move_to_start_position = f"M{ nt .group (1 )} { nt .group (3 )} "
754
- points .append (f"L{ nt .group (1 )} { nt .group (3 )} " )
755
+ move_to_start_position = f"M{ nt_block .group (1 )} { nt_block .group (3 )} "
756
+ points .append (f"L{ nt_block .group (1 )} { nt_block .group (3 )} " )
755
757
if len (points ) < 200 :
756
758
stroke_width = "3"
757
759
elif len (points ) < 500 :
@@ -760,8 +762,11 @@ def generate_thumbnail(image, description):
760
762
stroke_width = "4"
761
763
else :
762
764
stroke_width = "2"
763
- thumbnail = '<svg xmlns="http://www.w3.org/2000/svg" width="{}" height="{}"><path style="stroke:{};stroke-width:{}px;fill:none;" d="' .format (
764
- width [0 ][0 ], height [0 ][0 ], color , stroke_width
765
+ thumbnail = (
766
+ f'<svg xmlns="http://www.w3.org/2000/svg" '
767
+ f'width="{ width [0 ][0 ]} " height="{ height [0 ][0 ]} ">'
768
+ f'<path style="stroke:{ color } ;stroke-width:{ stroke_width } px;'
769
+ f'fill:none;" d="'
765
770
)
766
771
thumbnail += move_to_start_position
767
772
thumbnail += " " .join (points )
@@ -775,12 +780,12 @@ def organise_metadata(output_folder, result_folders):
775
780
"""
776
781
tsv_folder = os .path .join (output_folder , "results" , "tsv" )
777
782
os .system (f"mkdir -p { tsv_folder } " )
778
- with open (os .path .join (tsv_folder , "metadata.tsv" ), "w" ) as f_out :
783
+ with open (os .path .join (tsv_folder , "metadata.tsv" ), "w" , encoding = "utf-8" ) as f_out :
779
784
for folder in result_folders :
780
785
hits = os .path .join (folder , "hits.txt" )
781
786
if not os .path .exists (hits ):
782
787
continue
783
- with open (hits , "r" ) as f_hits :
788
+ with open (hits , "r" , encoding = "utf-8" ) as f_hits :
784
789
for line in f_hits .readlines ():
785
790
if "gtrnadb" in folder :
786
791
line = line .replace ("PASS" , "GtRNAdb" )
@@ -902,7 +907,9 @@ def force_draw(
902
907
"ribovision_lsu" : "RiboVision" ,
903
908
"rnasep" : "RNAse P database" ,
904
909
}
905
- with open (os .path .join (metadata_folder , "metadata.tsv" ), "a" ) as f_out :
910
+ with open (
911
+ os .path .join (metadata_folder , "metadata.tsv" ), "a" , encoding = "utf-8"
912
+ ) as f_out :
906
913
line = f"{ seq_id } \t { model_id } \t { label_mapping [model_type ]} \n "
907
914
f_out .write (line )
908
915
@@ -917,7 +924,9 @@ def list_models():
917
924
for item in data :
918
925
print (item ["description" ])
919
926
lm .check_unique_descriptions (data )
920
- with open (os .path .join (config .DATA , "models.json" ), "w" ) as models_file :
927
+ with open (
928
+ os .path .join (config .DATA , "models.json" ), "w" , encoding = "utf-8"
929
+ ) as models_file :
921
930
json .dump (data , models_file )
922
931
923
932
0 commit comments