-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
182 lines (136 loc) · 6.43 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
"""
Importing all necessary packages
"""
import os
import cv2
import glob
import argparse
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from data_processing import preprocess, mask_image, hist_eq, edge
def load_data(img, size):
"""
Return the image in grayscale,
with an array shape of (size*size,)
"""
img = cv2.imread(img, 0)
img = cv2.resize(img, (size, size))
img = img.flatten()
return img
def normalize_array(img):
"""
Returns an Normalized image array
by dividing the array by 255
"""
img =img/255
return img
def predict_result(predict):
"""
Returns the predicted coin denomination names
corresponding to the neural network output
"""
result = {0:'one_grosz', 1:'two_grosz', 2:'five_grosz', 3:'ten_grosz',
4:'twenty_grosz', 5:'fifty_grosz', 6:'one_zloty', 7:'two_zloty', 8:'five_zloty'}
return result[predict]
def addition(result):
"""
Returns the value of the given coin denominations
"""
values = {'one_grosz':0.01 , 'two_grosz':0.02, 'five_grosz':0.05, 'ten_grosz':0.10,
'twenty_grosz':0.20, 'fifty_grosz':0.50, 'one_zloty':1.00, 'two_zloty':2.00, 'five_zloty':5.00}
return values[result]
if __name__ == '__main__':
parser = argparse.ArgumentParser(description = "User input for image preprocessing")
parser.add_argument("--file_path", help = "Training dataset")
parser.add_argument("--size", default = "64", type=int, help="dimension of output images")
parser.add_argument("--input_image", help = " Image for prediction")
args = parser.parse_args()
h, w = args.size, args.size # All the images are interpreted in array of shape (64*64)
X = []
y = []
files_name = os.listdir(args.file_path) # Getting all the folder names in a path
temp_name = ['1_grz', '2_grz', '5_grz', '10_grz', '20_grz', '50_grz', '1_zl', '2_zl', '5_zl']
for n, _ in enumerate(files_name):
temp = glob.glob(args.file_path +"/"+ temp_name[n]+"/*")
for i in temp:
X.append(load_data(i, args.size)) # Loading all the images with a shape of (64*64)
y.append(int(n)) # Labels for the input images
# Converting list to arrray
X = np.asarray(X, dtype = np.float32)
y = np.asarray(y, dtype = np.float32)
X = normalize_array(X) # Normalizing the image data
# Splitting the dataset into training and testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle = True,
random_state = 42, test_size = 0.1)
# Initializing the Neural Network model
"""
Created a 3 layer Neural network with 2 hidden layers.
Size of hidden layer 1 = 512
Size of hidden layer 1 = 256
Size of output layer = 9 ( nine different coin denominations)
Dropout Regularization with keep probability 0.5 in hidden layer 1 and 0.7 in hidden layer 2
hidden Layer 1 and 2 = ReLu (Retified Linear Unit) activation function used
Output Layer = Softmax activation function
"""
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(512, kernel_initializer = tf.keras.initializers.he_normal(seed = None),
activation = tf.nn.relu),
tf.keras.layers.Dropout(0.5),
tf.keras.layers.Dense(256, kernel_initializer = tf.keras.initializers.he_normal(seed = None),
activation=tf.nn.relu),
tf.keras.layers.Dropout(0.5),
tf.keras.layers.Dense(512, kernel_initializer=tf.keras.initializers.he_normal(seed=None),
activation=tf.nn.relu),
tf.keras.layers.Dropout(0.7),
tf.keras.layers.Dense(9, activation = tf.nn.softmax)
])
"""
Optimized : Adam optimized
Learning rate : 0.001
metrices : accuracy
"""
adam = tf.keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
model.compile(optimizer=adam,
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
# Trained with 5 epochs with the batch size of 32
model.fit(X_train, y_train, epochs= 5 )
# Evaluate the Testing set
print(model.evaluate(X_test,y_test))
# Prediction
output = cv2.imread(args.input_image) # Loading the necessay image for prediction
# Preproceesing of the image
"""
Getting circles with Hough circle transform
Histogram equalization and finding the egdes of the images
Reshape and flatten the image
"""
inp_img = preprocess(output)
radius = np.amax(inp_img, 0)[2] # Returns the maximum in an array along the rows
total_sum = 0
for coin, (x, y, r) in enumerate(inp_img): # Looping over all the circles one by one
each_coin = output[y - radius:y + radius, x - radius:x + radius] # Cropping out each coins detected in an image
if each_coin.shape[0] == 0 or each_coin.shape[1] == 0: # Not taking the False positive coins detected in the image
pass
else:
each_coin = cv2.resize(each_coin, (args.size, args.size))
each_coin = mask_image(each_coin)
each_coin = hist_eq(each_coin)
each_coin = edge(each_coin)
temp_img = each_coin.flatten()
temp_img = normalize_array(temp_img)
temp_img = cv2.resize(temp_img, (1, h*w)).T
predict_coin = model.predict([temp_img]) # Predicting the image with the trained neural network model
result = predict_result(np.argmax(predict_coin[0]))
total_sum += addition(result) # Getting the total sum of all the predicted coins
cv2.circle(output, (x, y), r, (0, 255, 0), 2) # Drawing the circles with radius r in the image used for prediction
cv2.putText(output, result,(x - 40, y), cv2.FONT_HERSHEY_PLAIN,5, (0, 255, 0),
thickness=2, lineType=cv2.LINE_AA) # Writing the predicted coin denominations on each coin of the image
cv2.putText(output, "There is {} Zloty worth of coins".format(total_sum), (5, output.shape[0] - 8),
cv2.FONT_HERSHEY_PLAIN, 5, (0, 0, 255), thickness=2, lineType=cv2.LINE_AA) # Writing the total sum in the image
output = cv2.resize(output, None, fx = 1/2, fy= 1/2) # The loaded predicted image is very big so resizing by half the original resolution
cv2.imshow('output' ,output) # Displays the predicted image
cv2.waitKey(0)
cv2.destroyAllWindows()