@@ -111,46 +111,58 @@ def parse_gvf_dir(dir_):
111
111
pos = row ["#start" ]
112
112
if pos not in ret [strain ]["mutations" ]:
113
113
ret [strain ]["mutations" ][pos ] = []
114
- mutation_types = row ["#type" ].split ("," )
115
- num_of_mutations = len (mutation_types )
116
- for i in range (num_of_mutations ):
117
- mutation_dict = {
118
- "ref" : attrs ["Reference_seq" ],
119
- "alt" : attrs ["Variant_seq" ].split ("," )[i ],
120
- "gene" : attrs ["vcf_gene" ],
121
- "ao" : float (attrs ["ao" ].split ("," )[i ]),
122
- "dp" : float (attrs ["dp" ]),
123
- "multi_aa_name" : attrs ["multi_aa_name" ],
124
- "clade_defining" :
125
- attrs ["clade_defining" ] == "True" ,
126
- "hidden_cell" : False ,
127
- "mutation_name" : attrs ["Name" ],
128
- "functions" : {}
129
- }
130
- alt_freq = mutation_dict ["ao" ]/ mutation_dict ["dp" ]
131
- mutation_dict ["alt_freq" ] = str (round (alt_freq , 4 ))
132
- type = mutation_types [i ]
133
- if type == "ins" :
134
- mutation_dict ["mutation_type" ] = "insertion"
135
- elif type == "del" :
136
- mutation_dict ["mutation_type" ] = "deletion"
137
- else :
138
- mutation_dict ["mutation_type" ] = "snp"
139
- ret [strain ]["mutations" ][pos ].append (mutation_dict )
140
114
115
+ mutation_name = attrs ["Name" ]
116
+ alt = attrs ["Variant_seq" ]
117
+
118
+ mutation_dict = {}
119
+ for existing_dict in ret [strain ]["mutations" ][pos ]:
120
+ cond1 = existing_dict ["mutation_name" ] == mutation_name
121
+ cond2 = existing_dict ["alt" ] == alt
122
+ if cond1 and cond2 :
123
+ mutation_dict = existing_dict
124
+ break
125
+
126
+ if not mutation_dict :
127
+ mutation_dict = {
128
+ "ref" : attrs ["Reference_seq" ],
129
+ "alt" : alt ,
130
+ "gene" : attrs ["vcf_gene" ],
131
+ "ao" : float (attrs ["ao" ]),
132
+ "dp" : float (attrs ["dp" ]),
133
+ "multi_aa_name" : attrs ["multi_aa_name" ],
134
+ "clade_defining" :
135
+ attrs ["clade_defining" ] == "True" ,
136
+ "hidden_cell" : False ,
137
+ "mutation_name" : mutation_name ,
138
+ "functions" : {}
139
+ }
140
+
141
+ alt_freq = mutation_dict ["ao" ]/ mutation_dict ["dp" ]
142
+ mutation_dict ["alt_freq" ] = str (round (alt_freq , 4 ))
143
+
144
+ mutation_type = row ["#type" ]
145
+ if mutation_type == "ins" :
146
+ mutation_dict ["mutation_type" ] = "insertion"
147
+ elif mutation_type == "del" :
148
+ mutation_dict ["mutation_type" ] = "deletion"
149
+ else :
150
+ mutation_dict ["mutation_type" ] = mutation_type
151
+
152
+ ret [strain ]["mutations" ][pos ].append (mutation_dict )
153
+
154
+ fn_dict = mutation_dict ["functions" ]
141
155
fn_category = attrs ["function_category" ].strip ('"' )
156
+ if not fn_category :
157
+ continue
158
+ if fn_category not in fn_dict :
159
+ fn_dict [fn_category ] = {}
160
+
142
161
fn_desc = attrs ["function_description" ].strip ('"' )
143
162
fn_source = attrs ["source" ].strip ('"' )
144
163
fn_citation = attrs ["citation" ].strip ('"' )
145
- fn_dict = {}
146
- if fn_category :
147
- if fn_category not in fn_dict :
148
- fn_dict [fn_category ] = {}
149
- fn_dict [fn_category ][fn_desc ] = \
150
- {"source" : fn_source , "citation" : fn_citation }
151
- for i in range (len (ret [strain ]["mutations" ][pos ])):
152
- parsed_mutation = ret [strain ]["mutations" ][pos ]
153
- parsed_mutation [i ]["functions" ].update (fn_dict )
164
+ fn_dict [fn_category ][fn_desc ] = \
165
+ {"source" : fn_source , "citation" : fn_citation }
154
166
155
167
return ret
156
168
0 commit comments