@@ -35,13 +35,13 @@ def __init__(self, pos: int, limit: int) -> None:
35
35
super ().__init__ (self .message )
36
36
37
37
class AminoAcidTypeError (Exception ):
38
- """Execption raised for Amino Acid does not correspond to a 1-letter code or
39
- a valid amino acid type
38
+ """Exception raised for Amino Acid which does not correspond to a 1-letter
39
+ code or a valid amino acid type
40
40
41
41
valid amino acid type : 'aromatic', 'acidic', 'basic', 'polar', 'hydrophobic'
42
42
43
43
Attribute:
44
- aa (str): the amino acid string wich caused the error
44
+ aa (str): the amino acid string which caused the error
45
45
"""
46
46
47
47
def __init__ (self , aa : str ) -> None :
@@ -87,14 +87,14 @@ def get_seq_from_pdb(pdb: Path) -> str:
87
87
def read_models (models : Path ) -> Dict [str , str ]:
88
88
"""Reads the model file
89
89
90
- For each model, add its id as key in a dictionnary and add as value the
90
+ For each model, add its id as key in a dictionary and add as value the
91
91
sequence return by get_seq_from_pdb().
92
92
93
93
Args:
94
94
models (pathlib.Path): The file containing the model paths
95
95
96
96
Returns:
97
- all_seq (dict): A dictionnary with each pair of id - seq
97
+ all_seq (dict): A dictionary with each pair of id - seq
98
98
"""
99
99
100
100
all_seq = {}
@@ -110,13 +110,13 @@ def read_models(models: Path) -> Dict[str, str]:
110
110
def read_multi_fasta (fasta : Path ) -> Dict [str , str ]:
111
111
"""Reads a multi fasta file
112
112
113
- Add in a dictionnary all pair id - sequence.
113
+ Add in a dictionary all pair id - sequence.
114
114
115
115
Args:
116
116
fasta (pathlib.Path): The multi fasta file
117
117
118
118
Returns:
119
- all_seq (dict): A dictionnary with each pair of id - seq
119
+ all_seq (dict): A dictionary with each pair of id - seq
120
120
"""
121
121
122
122
all_seq = {}
@@ -141,11 +141,11 @@ def read_multi_fasta(fasta: Path) -> Dict[str, str]:
141
141
def get_identity (ref_seq : Dict [str , str ], target : str ) -> Tuple [str , float ]:
142
142
"""Get the % identity between two sequences
143
143
144
- For each pair reference - target, build a global alignment and calculates
144
+ For each pair reference - target, build a global alignment and calculate
145
145
the percentage of identity.
146
146
147
147
Args:
148
- ref_seq (dict): Dictionnary with ids of reference as key and their
148
+ ref_seq (dict): Dictionary with ids of reference as key and their
149
149
sequences as value
150
150
target (str): The target sequence
151
151
@@ -184,13 +184,13 @@ def get_identity(ref_seq: Dict[str, str], target: str) -> Tuple[str, float]:
184
184
def build_comparison_data (id_dict : Optional [Dict [str , Union [str , int , None ]]],
185
185
file : Path ,
186
186
empty = True ) -> Dict [str , Union [str , int , None ]]:
187
- """Read the ASMC groups.tsv and load information in a dictionnary
187
+ """Read the ASMC groups.tsv and load information in a dictionary
188
188
189
189
Args:
190
- id_dict (dict): An empty dictionnary or contaning sub dict as value of
190
+ id_dict (dict): An empty dictionary or containing sub dict as value of
191
191
seqID (key)
192
192
file (pathlib.Path): The ASMC groups.tsv
193
- empty (bool, optional): Defaults to True.
193
+ empty (bool, optional): Default to True.
194
194
195
195
Returns:
196
196
id_dict (dict): The updated id_dict
@@ -232,15 +232,15 @@ def build_comparison_data(id_dict: Optional[Dict[str, Union[str, int, None]]],
232
232
233
233
def add_ref_data_to_comparison_data (id_dict :Dict [str , Union [str , int , None ]],
234
234
file :Path ) -> Tuple [Dict [str , Union [str , int , None ]], Set [str ]]:
235
- """Reads the identity_target_ref .tsv file and add information to a
235
+ """Reads the identity_targets_refs .tsv file and add information to a
236
236
comparison data
237
237
238
238
Args:
239
239
id_dict (dict): Dict with sub dict as value and seqID as key
240
- file (pathlib.Path): The identity_target_ref .tsv
240
+ file (pathlib.Path): The identity_targets_refs .tsv
241
241
242
242
Returns:
243
- id_dict (dict): The updated id_dict,
243
+ id_dict (dict): The updated id_dict
244
244
ref_set (set): Set containing the reference IDs
245
245
"""
246
246
@@ -261,7 +261,7 @@ def add_ref_data_to_comparison_data(id_dict:Dict[str, Union[str, int, None]],
261
261
return id_dict , ref_set
262
262
263
263
def LD_two_rows (s1 : str , s2 : str ) -> int :
264
- """Calcultes Levenshtein distance between two strings
264
+ """Calculates Levenshtein distance between two strings
265
265
266
266
Simple implementation of Levenshtein distance based on the two rows
267
267
algorithm.
@@ -274,7 +274,7 @@ def LD_two_rows(s1: str, s2: str) -> int:
274
274
int: The Levenshtein/edit distance
275
275
"""
276
276
277
- # Switch s1 and s2 for reduce the columns number
277
+ # Switch s1 and s2 to reduce the columns number
278
278
if len (s1 ) > len (s2 ):
279
279
s1 , s2 = s2 , s1
280
280
@@ -363,7 +363,7 @@ def build_active_site_checking_file(id_dict: Dict[str, Union[str, int, None]],
363
363
g2 = id_dict [key ]["g2" ]
364
364
ref_pid = id_dict [key ]["ref_pid" ]
365
365
366
- # Add dictionnary items
366
+ # Add dictionary items
367
367
text += f"{ key } \t { g1 } \t { seq1 } \t { g2 } \t { seq2 } \t { d } \t { ref } \t { ref_pid } \t "
368
368
text += f"{ seq_ref } \t { d1 } \t { d2 } \t "
369
369
@@ -456,7 +456,7 @@ def extract_aa(file: Path, pos: int, aa: str, group: Optional[int]):
456
456
457
457
def get_unique (group_file : Path ) -> Tuple [Dict [str , Tuple [str , Set [str ]]],
458
458
Dict [str , Tuple [int , int , float ]]]:
459
- """Calculates statistics on the number of unique sequences per groups
459
+ """Calculates statistics on the number of unique sequences per group
460
460
461
461
Args:
462
462
group_file (Path): ASMC tsv output
@@ -465,8 +465,8 @@ def get_unique(group_file: Path) -> Tuple[Dict[str, Tuple[str, Set[str]]],
465
465
FileFormatError: Raised if the tsv contains less than 3 columns
466
466
467
467
Returns:
468
- unique_seq (dict): Dict with seq as key and as value tuple containing
469
- the group id and a set of sequence ids
468
+ unique_seq (dict): Dict with seq as key and a tuple containing
469
+ the group id and a set of sequence ids as values
470
470
471
471
groups_stats (dict): Dict with group id as key and a tuple of int and
472
472
float as value
0 commit comments