-
Notifications
You must be signed in to change notification settings - Fork 0
/
OneCharacterRecognize.py
66 lines (54 loc) · 2.22 KB
/
OneCharacterRecognize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import numpy as np
from sklearn import svm, metrics
from sklearn.externals import joblib
from sklearn.neighbors import KNeighborsClassifier
sameSize = 16
class OneCharacterRecognize:
def __init__(self, train_num=70000, test_num=None, debug=False, svm=True, new_clf=False):
self.savePath = './data/handwritingData.pkl'
self.debug = debug
self.svm = svm
self.new_clf = new_clf
self.train_num = train_num
if debug:
self.train_input = np.loadtxt("./data/feature.txt")
self.totNum = len(self.train_input)
self.test_num = test_num if test_num else self.totNum - self.train_num
self.desired_output = []
with open('./data/tag.txt', 'r') as f:
for line in f:
self.desired_output.append(line.strip('\n'))
def get_train_data(self, start, end):
return self.train_input[start:end], self.desired_output[start:end]
def get_classifier(self):
if self.svm and not self.new_clf: return joblib.load(self.savePath)
train_input, train_output = self.get_train_data(0, self.train_num)
if self.svm:
clf = svm.SVC(gamma=0.01)
clf.fit(train_input, train_output)
joblib.dump(clf, self.savePath)
return clf
else:
clf = KNeighborsClassifier(n_neighbors=3)
clf.fit(train_input, train_output)
return clf
def test_precision(self):
clf = self.get_classifier()
print 'read test file'
test_input, test_output = self.get_train_data(self.train_num, self.train_num + self.test_num)
print 'start predict', len(test_input)
predicted = clf.predict(test_input)
print(
"Classification report for classifier %s:\n%s\n" % (
clf, metrics.classification_report(test_output, predicted)))
print("Confusion matrix:\n%s" % metrics.confusion_matrix(test_output, predicted))
def predict(self, feature):
clf = self.get_classifier()
try :
c = clf.predict(feature)
return c
except :
return 0
if __name__ == '__main__':
a = OneCharacterRecognize(debug=True)
a.test_precision()