-
Notifications
You must be signed in to change notification settings - Fork 5
/
abbreviate.py
68 lines (53 loc) · 2.07 KB
/
abbreviate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/python
# coding: utf-8
import sys
import json
import numpy as np
import json
from src.steno import Steno
from src.word import Word
class Abbreviation:
picked = []
words = []
source = "resources/Lexique383.tsv"
def contraction_exists(self,data, word):
for steno, val in data.items():
if val != word:
continue
if steno.count('/') < 2:
return True
return False
def generate(self) :
with open('resources/dicofr.json') as json_file:
data = json.load(json_file)
with open('resources/verbs.json') as json_file:
verbs= json.load(json_file)
translated_word = {}
dup = {}
for elem_steno, elem_word in data.items():
if elem_steno.count('/') < 2:
continue
if self.contraction_exists(data, elem_word):
continue
words = elem_steno.split('/')
word = words[0]+'/'+words[1]+'/'+words[-1]
if elem_steno.count('/') == 3:
word = words[0]+'/'+words[-1]
print(word)
if (word[-1] not in data) and (word not in data) and (word not in translated_word) and (word[-1] not in verbs) and (word not in verbs):
translated_word[word] = elem_word
continue
if (word in translated_word):
if elem_steno not in dup:
dup[elem_steno] =[]
dup[elem_steno].append(translated_word[word])
dup[elem_steno].append(word)
translated_word.pop(word)
# d.write("'"+steno + "':'"+ word.word+"',\n")
json_object = json.dumps(translated_word, indent = 4, ensure_ascii=False )
with open('resources/abbrev.json', "w") as d:
d.write(json_object)
dup_object = json.dumps(dup, indent = 4, ensure_ascii=False )
with open('resources/dup-abbrev.json', "w") as d:
d.write(dup_object)
Abbreviation().generate()