-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenerate_json.py
64 lines (52 loc) · 1.64 KB
/
generate_json.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import json
data = []
with open('rapidwords.txt') as f:
for line in f:
data.append(line.split(' ', 1))
# Process
rw = dict()
for entry in data:
index = entry[0].split(".")
domain = entry[1]
hypernyms = []
current = rw
for k in range(0, len(index)):
hypernyms.append(".".join(index[0:k]))
current = current.setdefault("subindexes",dict())
current = current.setdefault(index[k], dict())
current["domain"] = domain.replace("\n","")
current["index"] = entry[0]
current["hypernyms"] = hypernyms
current["hypernyms"].remove("")
# Process hyponyms
def add_hyponyms(entry, top):
ans = [ top ]
for key, value in entry.get("subindexes",dict()).items():
ans.extend(add_hyponyms(value,top+"."+key))
entry["hyponyms"] = ans.copy()
entry["hyponyms"].remove(entry["index"])
return ans
for key, value in rw["subindexes"].items():
add_hyponyms(value,key)
flat = dict()
def flatten_data (entry) :
flat[entry["index"]] = {
"domain" : entry["domain"],
"hypernyms" : entry["hypernyms"],
"hyponyms": entry["hyponyms"]
}
for value in entry.get("subindexes",dict()).values():
flatten_data(value)
for value in rw["subindexes"].values():
flatten_data(value)
def compact_data(entry):
for value in entry.get("subindexes",dict()).values():
compact_data(value)
entry.pop("hyponyms", None)
entry.pop("hypernyms", None)
entry.pop("index", None)
compact_data(rw)
with open ("rapidwords-compact.json","w") as f:
json.dump(rw,f) #,indent="\t")
with open("rapidwords.json", "w") as f:
json.dump(flat,f) #,indent="\t")