-
Notifications
You must be signed in to change notification settings - Fork 17
/
editDistance.py
89 lines (71 loc) · 2.35 KB
/
editDistance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import numpy as np
import pdb
class Check_edit_distance():
'''
Class implements Edit distance for computing CER i.e (Character Error Rate)
'''
def __init__(self):
pass
def decode_best_path(self,probs, ref=None, blank=0):
best_path = np.argmax(probs,axis=0).tolist()
# Collapse phone string
hyp = []
for i,b in enumerate(best_path):
# ignore blanks
if b == blank:
continue
# ignore repeats
elif i != 0 and b == best_path[i-1]:
continue
else:
hyp.append(b)
# Optionally compute phone error rate to ground truth
dist = 0
if ref is not None:
#ref = ref.tolist()
dist,_,_,_,_ = self.edit_distance(ref,hyp)
return hyp,dist
def edit_distance(self,ref,hyp):
"""
Edit distance between two sequences reference (ref) and hypothesis (hyp).
Returns edit distance, number of insertions, deletions and substitutions to
transform hyp to ref, and number of correct matches.
"""
n = len(ref)
m = len(hyp)
ins = dels = subs = corr = 0
D = np.zeros((n+1,m+1))
D[:,0] = np.arange(n+1)
D[0,:] = np.arange(m+1)
for i in xrange(1,n+1):
for j in xrange(1,m+1):
if ref[i-1] == hyp[j-1]:
D[i,j] = D[i-1,j-1]
else:
D[i,j] = min(D[i-1,j],D[i,j-1],D[i-1,j-1])+1
i=n
j=m
while i>0 and j>0:
if ref[i-1] == hyp[j-1]:
corr += 1
elif D[i-1,j] == D[i,j]-1:
ins += 1
j += 1
elif D[i,j-1] == D[i,j]-1:
dels += 1
i += 1
elif D[i-1,j-1] == D[i,j]-1:
subs += 1
i -= 1
j -= 1
ins += i
dels += j
return D[-1,-1],ins,dels,subs,corr
def disp(self,ref,hyp):
dist,ins,dels,subs,corr = self.edit_distance(ref,hyp)
'''
print "Reference : %s, Hypothesis : %s"%(''.join(ref),''.join(hyp))
print "Distance : %d"%dist
print "Ins : %d, Dels : %d, Subs : %d, Corr : %d"%(ins,dels,subs,corr)
'''
return dist,corr