Skip to content

Commit 967d8bc

Browse files
authored
Update parsing logic for distinct alleles (#157)
* Update gvf files * Forgot to stage new gvf files last commit * Update parsing logic for new gvf format * Update surveillance reports * Update nf-ncov-voc
1 parent e3f66f0 commit 967d8bc

File tree

1,392 files changed

+222427
-158285
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,392 files changed

+222427
-158285
lines changed

data_parser.py

Lines changed: 47 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -111,46 +111,58 @@ def parse_gvf_dir(dir_):
111111
pos = row["#start"]
112112
if pos not in ret[strain]["mutations"]:
113113
ret[strain]["mutations"][pos] = []
114-
mutation_types = row["#type"].split(",")
115-
num_of_mutations = len(mutation_types)
116-
for i in range(num_of_mutations):
117-
mutation_dict = {
118-
"ref": attrs["Reference_seq"],
119-
"alt": attrs["Variant_seq"].split(",")[i],
120-
"gene": attrs["vcf_gene"],
121-
"ao": float(attrs["ao"].split(",")[i]),
122-
"dp": float(attrs["dp"]),
123-
"multi_aa_name": attrs["multi_aa_name"],
124-
"clade_defining":
125-
attrs["clade_defining"] == "True",
126-
"hidden_cell": False,
127-
"mutation_name": attrs["Name"],
128-
"functions": {}
129-
}
130-
alt_freq = mutation_dict["ao"]/mutation_dict["dp"]
131-
mutation_dict["alt_freq"] = str(round(alt_freq, 4))
132-
type = mutation_types[i]
133-
if type == "ins":
134-
mutation_dict["mutation_type"] = "insertion"
135-
elif type == "del":
136-
mutation_dict["mutation_type"] = "deletion"
137-
else:
138-
mutation_dict["mutation_type"] = "snp"
139-
ret[strain]["mutations"][pos].append(mutation_dict)
140114

115+
mutation_name = attrs["Name"]
116+
alt = attrs["Variant_seq"]
117+
118+
mutation_dict = {}
119+
for existing_dict in ret[strain]["mutations"][pos]:
120+
cond1 = existing_dict["mutation_name"] == mutation_name
121+
cond2 = existing_dict["alt"] == alt
122+
if cond1 and cond2:
123+
mutation_dict = existing_dict
124+
break
125+
126+
if not mutation_dict:
127+
mutation_dict = {
128+
"ref": attrs["Reference_seq"],
129+
"alt": alt,
130+
"gene": attrs["vcf_gene"],
131+
"ao": float(attrs["ao"]),
132+
"dp": float(attrs["dp"]),
133+
"multi_aa_name": attrs["multi_aa_name"],
134+
"clade_defining":
135+
attrs["clade_defining"] == "True",
136+
"hidden_cell": False,
137+
"mutation_name": mutation_name,
138+
"functions": {}
139+
}
140+
141+
alt_freq = mutation_dict["ao"]/mutation_dict["dp"]
142+
mutation_dict["alt_freq"] = str(round(alt_freq, 4))
143+
144+
mutation_type = row["#type"]
145+
if mutation_type == "ins":
146+
mutation_dict["mutation_type"] = "insertion"
147+
elif mutation_type == "del":
148+
mutation_dict["mutation_type"] = "deletion"
149+
else:
150+
mutation_dict["mutation_type"] = mutation_type
151+
152+
ret[strain]["mutations"][pos].append(mutation_dict)
153+
154+
fn_dict = mutation_dict["functions"]
141155
fn_category = attrs["function_category"].strip('"')
156+
if not fn_category:
157+
continue
158+
if fn_category not in fn_dict:
159+
fn_dict[fn_category] = {}
160+
142161
fn_desc = attrs["function_description"].strip('"')
143162
fn_source = attrs["source"].strip('"')
144163
fn_citation = attrs["citation"].strip('"')
145-
fn_dict = {}
146-
if fn_category:
147-
if fn_category not in fn_dict:
148-
fn_dict[fn_category] = {}
149-
fn_dict[fn_category][fn_desc] = \
150-
{"source": fn_source, "citation": fn_citation}
151-
for i in range(len(ret[strain]["mutations"][pos])):
152-
parsed_mutation = ret[strain]["mutations"][pos]
153-
parsed_mutation[i]["functions"].update(fn_dict)
164+
fn_dict[fn_category][fn_desc] = \
165+
{"source": fn_source, "citation": fn_citation}
154166

155167
return ret
156168

nf-ncov-voc

0 commit comments

Comments
 (0)