-
Notifications
You must be signed in to change notification settings - Fork 0
/
student.py
executable file
·176 lines (145 loc) · 6.83 KB
/
student.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
import os
import glob
import cv2
import numpy as np
import matplotlib
from skimage.io import imread
from skimage.color import rgb2grey
from skimage.feature import hog
from skimage.transform import resize
from numpy.linalg import norm
from sklearn.cluster import KMeans, MiniBatchKMeans
from scipy.spatial.distance import cdist
from scipy.stats import mode
from sklearn.svm import LinearSVC
def get_tiny_images(image_paths):
'''
This feature is inspired by: A. Torralba, R. Fergus, W. T. Freeman. IEEE
Transactions on Pattern Analysis and Machine Intelligence, vol.30(11),
pp. 1958-1970, 2008. http://groups.csail.mit.edu/vision/TinyImages/
Inputs:
image_paths: a 1-D Python list of strings. Each string is a complete
path to an image on the filesystem.
Outputs:
An n x d numpy array where n is the number of images and d is the
length of the tiny image representation vector. e.g. if the images
are resized to 16x16, then d is 16 * 16 = 256.
'''
images = np.zeros((len(image_paths), 256))
for i, file in enumerate(image_paths):
img = imread(file)
img = rgb2grey(img)
img = resize(img, (16, 16), anti_aliasing=True).flatten()
images[i] = img / norm(img)
return images
def build_vocabulary(image_paths, vocab_size):
'''
This function samples HOG descriptors from the training images,
clusters them with kmeans, and then returns the cluster centers.
Inputs:
image_paths: a Python list of image path strings
vocab_size: an integer indicating the number of words desired for the
bag of words vocab set
Outputs:
a vocab_size x (z*z*9) (see below) array which contains the cluster
centers that result from the K Means clustering.
'''
image_list = [imread(file) for file in image_paths]
cells_per_block = (2, 2)
z = cells_per_block[0]
pixels_per_cell = (4, 4)
feature_vectors_images = []
for image in image_list:
feature_vectors = hog(image, feature_vector=True, pixels_per_cell=pixels_per_cell,
cells_per_block=cells_per_block, visualize=False)
feature_vectors = feature_vectors.reshape(-1, z*z*9)
feature_vectors_images.append(feature_vectors)
all_feature_vectors = np.vstack(feature_vectors_images)
kmeans = MiniBatchKMeans(n_clusters=vocab_size, max_iter=500).fit(all_feature_vectors) # change max_iter for lower compute time
vocabulary = np.vstack(kmeans.cluster_centers_)
return vocabulary
def get_bags_of_words(image_paths):
'''
This function takes in a list of image paths and calculates a bag of
words histogram for each image, then returns those histograms in an array.
Inputs:
image_paths: A Python list of strings, where each string is a complete
path to one image on the disk.
Outputs:
An nxd numpy matrix, where n is the number of images in image_paths and
d is size of the histogram built for each image.
'''
vocab = np.load('vocab.npy')
print('Loaded vocab from file.')
vocab_length = vocab.shape[0]
image_list = [imread(file) for file in image_paths]
# Instantiate empty array
images_histograms = np.zeros((len(image_list), vocab_length))
cells_per_block = (2, 2) # Change for lower compute time
z = cells_per_block[0]
pixels_per_cell = (4, 4) # Change for lower compute time
feature_vectors_images = []
for i, image in enumerate(image_list):
feature_vectors = hog(image, feature_vector=True, pixels_per_cell=pixels_per_cell,
cells_per_block=cells_per_block, visualize=False)
feature_vectors = feature_vectors.reshape(-1, z*z*9)
histogram = np.zeros(vocab_length)
distances = cdist(feature_vectors, vocab)
closest_vocab = np.argsort(distances, axis=1)[:,0]
indices, counts = np.unique(closest_vocab, return_counts=True)
histogram[indices] += counts
histogram = histogram / norm(histogram)
images_histograms[i] = histogram
return images_histograms
def svm_classify(train_image_feats, train_labels, test_image_feats):
'''
This function predicts a category for every test image by training
15 many-versus-one linear SVM classifiers on the training data, then
uses those learned classifiers on the testing data.
Inputs:
train_image_feats: An nxd numpy array, where n is the number of training
examples, and d is the image descriptor vector size.
train_labels: An nx1 Python list containing the corresponding ground
truth labels for the training data.
test_image_feats: An mxd numpy array, where m is the number of test
images and d is the image descriptor vector size.
Outputs:
An mx1 numpy array of strings, where each string is the predicted label
for the corresponding image in test_image_feats
'''
clf = LinearSVC(random_state=0, tol=1e-5)
clf.fit(train_image_feats, train_labels)
test_predictions = clf.predict(test_image_feats)
return test_predictions
def nearest_neighbor_classify(train_image_feats, train_labels, test_image_feats):
'''
This function predicts the category for every test image by finding
the training image with most similar features. For any arbitrary
k, find the closest k neighbors and then vote among them
to find the most common category and return that as its prediction.
Inputs:
train_image_feats: An nxd numpy array, where n is the number of training
examples, and d is the image descriptor vector size.
train_labels: An nx1 Python list containing the corresponding ground
truth labels for the training data.
test_image_feats: An mxd numpy array, where m is the number of test
images and d is the image descriptor vector size.
Outputs:
An mx1 numpy list of strings, where each string is the predicted label
for the corresponding image in test_image_feats
'''
k = 5;
# Gets the distance between each test image feature and each train image feature
distances = cdist(test_image_feats, train_image_feats, 'euclidean')
# Find the k closest features to each test image feature
sorted_indices = np.argsort(distances, axis=1)
knns = sorted_indices[:,0:k]
# Determine the labels of those k features
labels = np.zeros_like(knns)
get_labels = lambda t: train_labels[t]
vlabels = np.vectorize(get_labels)
# Simple majority/plurality vote to choose label
labels = vlabels(knns) # Enhance this with distances to make weighted vote
# Pick the most common label from the k
labels = mode(np.unique(labels, return_counts=True, axis=1)[0], axis=1)[0]
return labels