-
Notifications
You must be signed in to change notification settings - Fork 202
/
stroke_data_parser.py
94 lines (81 loc) · 2.32 KB
/
stroke_data_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import json
import os
from copy import copy
root = os.path.dirname(__file__)
dictionary_file = os.path.join(root, 'vendor/makemeahanzi/dictionary.txt')
graphics_file = os.path.join(root, 'vendor/makemeahanzi/graphics.txt')
output_dir = os.path.join(root, 'data')
positioners = {
'⿰': 2,
'⿱': 2,
'⿲': 3,
'⿳': 3,
'⿴': 2,
'⿵': 2,
'⿶': 2,
'⿷': 2,
'⿸': 2,
'⿹': 2,
'⿺': 2,
'⿻': 2,
}
missing_marker = '?'
graphics_data = {}
dict_data = {}
with open(dictionary_file) as f:
lines = f.readlines()
for line in lines:
decoded_line = json.loads(line)
dict_data[decoded_line['character']] = decoded_line
with open(graphics_file) as f:
lines = f.readlines()
for line in lines:
decoded_line = json.loads(line)
char = decoded_line.pop('character')
graphics_data[char] = decoded_line
def get_decomp_index(char, subchar):
"Parse the decomposition tree to figure out what the index of the subchar is within the char"
stack = []
for piece in dict_data[char]['decomposition']:
last_node = None
path = []
if len(stack) > 0:
last_node = stack.pop()
path = copy(last_node['path'])
path.append(last_node['children'])
last_node['children'] += 1
if last_node['children'] < last_node['size']:
stack.append(last_node)
if piece in positioners:
node = {
'size': positioners[piece],
'children': 0,
'path': path,
}
stack.append(node)
elif piece == subchar:
return path
return None
def get_radical_strokes(char):
radical = dict_data[char]['radical']
if char == radical:
return None
decomp_index = get_decomp_index(char, radical)
if not decomp_index:
return None
rad_strokes = []
for stroke_num, match_index in enumerate(dict_data[char]['matches']):
if match_index == decomp_index:
rad_strokes.append(stroke_num)
return rad_strokes
# write out data
for char in graphics_data:
radical = get_radical_strokes(char)
if radical:
graphics_data[char]['radStrokes'] = radical
for char, data in graphics_data.items():
out_file = os.path.join(output_dir, f'{char}.json')
with open(out_file, 'w') as f:
f.write(json.dumps(data, ensure_ascii=False))
with open(os.path.join(output_dir, 'all.json'), 'w') as f:
f.write(json.dumps(graphics_data, ensure_ascii=False))