diff --git a/Data_helper.py b/Data_helper.py new file mode 100644 index 000000000..9ff51229f --- /dev/null +++ b/Data_helper.py @@ -0,0 +1,151 @@ +from Drawer import * +import cPickle as pickle +from copy import deepcopy +import subprocess +mult = 1. + +def shuffle(train_path, file, expectC, S, batch, epoch): + with open(file,'rb') as f: + pick, data = pickle.load(f) + C = len(pick) + if C != expectC: + exit("There is a mismatch between the model and the parsed annotations") + size = len(data) + print 'Dataset of total {}'.format(size) + batch_per_epoch = int(size / batch) + + for i in range(epoch): + print 'EPOCH {}'.format(i+1) + # Shuffle data + shuffle_idx = np.random.permutation(np.arange(size)) + for b in range(batch_per_epoch): + for r in range(1): + start_idx = b * batch + end_idx = (b+1) * batch + + datum = list() + x_batch = list() + jpgs = list() + try: + # if True: + for j in range(start_idx,end_idx): + real_idx = shuffle_idx[j] + this = data[real_idx] + jpg = this[0] + w, h, allobj_ = this[1] + allobj = deepcopy(allobj_) + flip = (r / 2) + (r % 2) * (j % 2) + flip = flip % 2 + + path = '{}{}'.format(train_path, jpg) + img, allobj = crop(path, allobj) + + if flip == 1: + img = img[:,:,::-1,:] + + img = [img] + jpgs += [path] + + cellx = 1. * w / S + celly = 1. * h / S + for x in allobj: + # cv2.rectangle(img[0], (x[1], x[2]), (x[3], x[4]), (0,0,255), 2) + centerx = .5*(x[1]+x[3]) #xmin, xmax + centery = .5*(x[2]+x[4]) #ymin, ymax + if flip == 1: + centerx = w - centerx + cx = centerx / cellx + cy = centery / celly + x[3] = float(x[3]-x[1]) / w + x[4] = float(x[4]-x[2]) / h + x[3] = np.sqrt(x[3]) + x[4] = np.sqrt(x[4]) + x[1] = cx - np.floor(cx) + x[2] = cy - np.floor(cy) + x += [np.floor(cx)] + x += [np.floor(cy)] + + if False: + for x in allobj: + cx = x[5] + x[1] + cy = x[6] + x[2] + centerx = cx * cellx + centery = cy * celly + ww = x[3] * x[3] * w + hh = x[4] * x[4] * h + cv2.rectangle(im, + (int(centerx - ww/2), int(centery - hh/2)), + (int(centerx + ww/2), int(centery + hh/2)), + (0,0,255), 2) + + cv2.imshow("result", im) + cv2.waitKey() + cv2.destroyAllWindows() + + + probs = np.zeros([S*S,C]) + confs = np.zeros([S*S,2]) + coord = np.zeros([S*S,2,4]) + proid = np.zeros([S*S,C]) + conid = np.zeros([S*S,2]) + cooid1 = cooid2 = np.zeros([S*S,1,4]) + prear = np.zeros([S*S,4]) + for x in allobj: + at = int(x[6] * S + x[5]) + probs[at, :] = [0.] * C + probs[at, pick.index(x[0])] = 1. + proid[at, :] = [1] * C + coord[at, 0, :] = x[1:5] + coord[at, 1, :] = x[1:5] + prear[at,0] = x[1] - x[3]**2 * 3.5 # xleft + prear[at,1] = x[2] - x[4]**2 * 3.5 # yup + prear[at,2] = x[1] + x[3]**2 * 3.5 # xright + prear[at,3] = x[2] + x[4]**2 * 3.5 # ybot + confs[at, :] = [1.] * 2 + conid[at, :] = [1.] * 2 + cooid1[at, 0, :] = [1.] * 4 + cooid2[at, 0, :] = [1.] * 4 + upleft = np.expand_dims(prear[:,0:2], 1) # 49 x 1 + botright = np.expand_dims(prear[:,2:4], 1) + #================================================== + probs = probs.reshape([-1]) # true_class + confs1 = confs[:,0] + confs2 = confs[:,1] + coord = coord.reshape([-1]) # true_coo + upleft = np.concatenate([upleft]*2,1) + botright = np.concatenate([botright]*2,1) + proid = proid.reshape([-1]) # class_idtf + conid1 = conid[:,0] + conid2 = conid[:,1] + cooid1 = cooid1 + cooid2 = cooid2 + #================================================== + new = [ + [probs], [confs1], [confs2], [coord], + [upleft], [botright], + [proid], [conid1], [conid2], [cooid1], [cooid2] + ] + if datum == list(): + datum = new + x_batch = img + else: + x_batch += img + for i in range(len(datum)): + datum[i] = np.concatenate([datum[i], new[i]]) + + if False: + here = 0 + names = list() + while here + C < S*S*C: + consider = probs[here:here+C] + if (np.sum(consider) > 0.5): + names += [pick[np.argmax(consider)]] + here += C + print '{} : {}'.format(jpg, names) + + + x_batch = np.concatenate(x_batch, 0) + yield (x_batch, datum) + except: + print 'Random scale/translate sends object(s) out of bound' + continue diff --git a/Drawer.py b/Drawer.py new file mode 100644 index 000000000..ab2f8700a --- /dev/null +++ b/Drawer.py @@ -0,0 +1,130 @@ +from box import * +from PIL import Image, ImageFile +ImageFile.LOAD_TRUNCATED_IMAGES = True +import cv2 + +def fix(x,c): + return max(min(x,c),0) + +def crop(imPath, allobj = None): + + im = cv2.imread(imPath) + if allobj is not None: + h, w, _ = im.shape + scale = np.random.uniform()/3. + 1. + max_offx = (scale-1.) * w + max_offy = (scale-1.) * h + offx = int(np.random.uniform() * max_offx) + offy = int(np.random.uniform() * max_offy) + im = cv2.resize(im, (0,0), fx = scale, fy = scale) + im = im[offy : (offy + h), offx : (offx + w)] + #--------------- + # (x,y) --> (scale*x, scale*y) + # (scale*x - offx, scale*y - offy) + #-------------- + for obj in allobj: + obj[1] = int(obj[1]*scale-offx) + obj[3] = int(obj[3]*scale-offx) + obj[2] = int(obj[2]*scale-offy) + obj[4] = int(obj[4]*scale-offy) + obj[1] = fix(obj[1], w) + obj[3] = fix(obj[3], w) + obj[2] = fix(obj[2], h) + obj[4] = fix(obj[4], h) + #print obj, w, h + + # return im + im_ = cv2.resize(im, (448, 448)) + image_array = np.array(im_) + image_array = image_array / 255. + image_array = image_array * 2. - 1. + image_array = np.expand_dims(image_array, 0) # 1, height, width, 3 + + if allobj is not None: + return image_array, allobj + else: + return image_array + +def to_color(indx, base): + base2 = base * base + b = indx / base2 + r = (indx % base2) / base + g = (indx % base2) % base + return (b * 127, r * 127, g * 127) + +def draw_predictions(predictions, + img_path, flip, threshold, + C, S, labels, colors): + + B = 2 + boxes = [] + SS = S * S # number of grid cells + prob_size = SS * C # class probabilities + conf_size = SS * B # confidences for each grid cell + probs = predictions[0 : prob_size] + confs = predictions[prob_size : (prob_size + conf_size)] + cords = predictions[(prob_size + conf_size) : ] + probs = probs.reshape([SS, C]) + confs = confs.reshape([SS, B]) + cords = cords.reshape([SS, B, 4]) + + for grid in range(SS): + for b in range(B): + new_box = BoundBox(C) + new_box.c = confs[grid, b] + new_box.x = (cords[grid, b, 0] + grid % S) / S + new_box.y = (cords[grid, b, 1] + grid // S) / S + new_box.w = cords[grid, b, 2] ** 2 + new_box.h = cords[grid, b, 3] ** 2 + new_box.id = '{}-{}'.format(grid, b) + for c in range(C): + new_box.probs[c] = new_box.c * probs[grid, c] + boxes.append(new_box) + + # non max suppress boxes + if True: + for c in range(C): + for i in range(len(boxes)): boxes[i].class_num = c + boxes = sorted(boxes, cmp=prob_compare) + for i in range(len(boxes)): + boxi = boxes[i] + if boxi.probs[c] == 0: continue + for j in range(i + 1, len(boxes)): + boxj = boxes[j] + boxij = box_intersection(boxi, boxj) + boxja = boxj.w * boxj.h + apart = boxij / boxja + if apart >= .5: + if boxi.probs[c] > boxj.probs[c]: + boxes[j].probs[c] = 0. + else: + boxes[i].probs[c] = 0. + + imgcv = cv2.imread(img_path) + if flip: imgcv = cv2.flip(imgcv, 1) + print img_path + h, w, _ = imgcv.shape + for b in boxes: + max_indx = np.argmax(b.probs) + max_prob = b.probs[max_indx] + label = 'object' * int(C < 2) + label += labels[max_indx] * int(C > 1) + if (max_prob > threshold): + left = int ((b.x - b.w/2.) * w) + right = int ((b.x + b.w/2.) * w) + top = int ((b.y - b.h/2.) * h) + bot = int ((b.y + b.h/2.) * h) + if left < 0 : left = 0 + if right > w - 1: right = w - 1 + if top < 0 : top = 0 + if bot > h - 1: bot = h - 1 + cv2.rectangle(imgcv, + (left, top), (right, bot), + colors[max_indx], 2) + mess = '{}:{:.3f}'.format(label, max_prob) + cv2.putText(imgcv, mess, (left, top - 10), + 0, 1e-3 * h, colors[max_indx]) + + img_name = 'results/{}'.format( + img_path.split('/')[-1].split('.')[0]) + cv2.imwrite(img_name + flip * '_' + '.jpg', imgcv) \ No newline at end of file diff --git a/TFnet.py b/TFnet.py new file mode 100644 index 000000000..0c289fb38 --- /dev/null +++ b/TFnet.py @@ -0,0 +1,298 @@ +import tensorflow as tf +import numpy as np +import os +import time +from Drawer import * +from Data_helper import shuffle +from Yolo import * +import subprocess +import sys + +class SimpleNet(object): + + labels = list() + colors = list() + C = int() + model = str() + step = int() + learning_rate = float() + scale_prob = float() + scale_conf = float() + scale_noobj = float() + scale_coor = float() + save_every = int() + + def __init__(self, yolo, FLAGS): + self.model = yolo.model + self.S = yolo.S + self.labels = yolo.labels + self.C = len(self.labels) + + base = int(np.ceil(pow(self.C, 1./3))) + for x in range(len(self.labels)): + self.colors += [to_color(x, base)] + + self.inp = tf.placeholder(tf.float32, + [None, 448, 448, 3], name = 'input') + self.drop = tf.placeholder(tf.float32, name = 'dropout') + + now = self.inp + for i in range(yolo.layer_number): + print now.get_shape() + l = yolo.layers[i] + if l.type == 'CONVOLUTIONAL': + if l.pad < 0: + size = np.int(now.get_shape()[1]) + expect = -(l.pad + 1) * l.stride # there you go bietche + expect += l.size - size + padding = [expect / 2, expect - expect / 2] + if padding[0] < 0: padding[0] = 0 + if padding[1] < 0: padding[1] = 0 + else: + padding = [l.pad, l.pad] + l.pad = 'VALID' + now = tf.pad(now, [[0, 0], padding, padding, [0, 0]]) + if FLAGS.savepb: + b = tf.constant(l.biases) + w = tf.constant(l.weights) + else: + b = tf.Variable(l.biases) + w = tf.Variable(l.weights) + now = tf.nn.conv2d(now, w, + strides=[1, l.stride, l.stride, 1], + padding=l.pad) + now = tf.nn.bias_add(now, b) + now = tf.maximum(0.1 * now, now) + elif l.type == 'MAXPOOL': + l.pad = 'VALID' + now = tf.nn.max_pool(now, + padding = l.pad, + ksize = [1,l.size,l.size,1], + strides = [1,l.stride,l.stride,1]) + elif l.type == 'FLATTEN': + now = tf.transpose(now, [0,3,1,2]) + now = tf.reshape(now, + [-1, int(np.prod(now.get_shape()[1:]))]) + elif l.type == 'CONNECTED': + name = str() + if i == yolo.layer_number - 1: name = 'output' + else: name = 'conn' + if FLAGS.savepb: + b = tf.constant(l.biases) + w = tf.constant(l.weights) + else: + b = tf.Variable(l.biases) + w = tf.Variable(l.weights) + now = tf.nn.xw_plus_b(now, w, b, name = name) + elif l.type == 'LEAKY': + now = tf.maximum(0.1 * now, now) + elif l.type == 'DROPOUT': + if not FLAGS.savepb: + print ('dropout') + now = tf.nn.dropout(now, keep_prob = self.drop) + print now.get_shape() + self.out = now + + def setup_meta_ops(self, FLAGS): + self.save_every = FLAGS.save + self.learning_rate = FLAGS.lr + scales = [float(f) for i, f in enumerate(FLAGS.scale.split(','))] + self.scale_prob, self.scale_conf, self.scale_noobj, self.scale_coor = scales + if FLAGS.gpu > 0: + percentage = min(FLAGS.gpu, 1.) + print 'gpu mode {} usage'.format(percentage) + gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=percentage) + self.sess = tf.Session(config = tf.ConfigProto( + allow_soft_placement = True, + log_device_placement = False, + gpu_options = gpu_options)) + else: + print 'cpu mode' + self.sess = tf.Session(config = tf.ConfigProto( + allow_soft_placement = False, + log_device_placement = False)) + if FLAGS.train: self.decode() + if FLAGS.savepb: + self.savepb('graph-{}.pb'.format(self.model)) + sys.exit() + else: self.saver = tf.train.Saver(tf.all_variables(), max_to_keep = FLAGS.keep) + self.sess.run(tf.initialize_all_variables()) + if FLAGS.load: + load_point = 'backup/model-{}'.format(self.step) + print 'loading from {}'.format(load_point) + self.saver.restore(self.sess, load_point) + + def savepb(self, name): + print 'Saving pb to {}'.format(name) + tf.train.write_graph(self.sess.graph_def,'./', name, as_text = False) + + def to_constant(self, inc = 0): + with open('binaries/yolo-{}-{}.weights'.format( + self.model.split('-')[0], self.step + inc), 'w') as f: + f.write(np.array([1]*4, dtype=np.int32).tobytes()) + for i, variable in enumerate(tf.trainable_variables()): + val = variable.eval(self.sess) + if len(val.shape) == 4: + val = val.transpose([3,2,0,1]) + val = val.reshape([-1]) + f.write(val.tobytes()) + + def decode(self): + print ('Set up loss and train ops (may cause lag)...') + SS = self.S * self.S + self.true_class = tf.placeholder(tf.float32, # + [None, SS * self.C]) + self.true_coo = tf.placeholder(tf.float32, # + [None, SS * 2 * 4]) + self.class_idtf = tf.placeholder(tf.float32, # + [None, SS * self.C]) + self.cooid1 = tf.placeholder(tf.float32, # + [None, SS, 1, 4]) + self.cooid2 = tf.placeholder(tf.float32, # + [None, SS, 1, 4]) + self.confs1 = tf.placeholder(tf.float32, # + [None, SS]) + self.confs2 = tf.placeholder(tf.float32, # + [None, SS]) + self.conid1 = tf.placeholder(tf.float32, # + [None, SS]) + self.conid2 = tf.placeholder(tf.float32, # + [None, SS]) + self.upleft = tf.placeholder(tf.float32, # + [None, SS, 2, 2]) + self.botright = tf.placeholder(tf.float32, # + [None, SS, 2, 2]) + + coords = self.out[:, SS * (self.C + 2):] + coords = tf.reshape(coords, [-1, SS, 2, 4]) + + wh = tf.pow(coords[:,:,:,2:4], 2) * 3.5; + xy = coords[:,:,:,0:2] + floor = xy - wh + ceil = xy + wh + + # [batch, 49, box, xy] + intersect_upleft = tf.maximum(floor, self.upleft) + intersect_botright = tf.minimum(ceil, self.botright) + intersect_wh = intersect_botright - intersect_upleft + intersect_wh = tf.maximum(intersect_wh, 0.0) + + # [batch, 49, box] + intersect_area1 = tf.mul(intersect_wh[:,:,0,0], intersect_wh[:,:,0,1]) + intersect_area2 = tf.mul(intersect_wh[:,:,1,0], intersect_wh[:,:,1,1]) + inferior_cell = intersect_area1 > intersect_area2 + inferior_cell = tf.to_float(inferior_cell) + + # [batch, 49] + confs1 = tf.mul(inferior_cell, self.confs1) + confs2 = tf.mul((1.-inferior_cell), self.confs2) + confs1 = tf.expand_dims(confs1, -1) + confs2 = tf.expand_dims(confs2, -1) + confs = tf.concat(2, [confs1, confs2]) + # [batch, 49, 2] + + mult = inferior_cell + conid1 = tf.mul(mult, self.conid1) + conid2 = tf.mul((1. - mult), self.conid2) + conid1 = tf.expand_dims(conid1, -1) + conid2 = tf.expand_dims(conid2, -1) + conid = tf.concat(2, [conid1, conid2]) + # [batch, 49, 2] + + times = tf.expand_dims(inferior_cell, -1) # [batch, 49, 1] + times = tf.expand_dims(times, 2) # [batch, 49, 1, 1] + times = tf.concat(3, [times]*4) # [batch, 49, 1, 4] + cooid1 = tf.mul(times, self.cooid1) + cooid2 = (1. - times) * self.cooid2 + cooid = tf.concat(2, [cooid1, cooid2]) # [batch, 49, 2, 4] + + confs = tf.reshape(confs, + [-1, int(np.prod(confs.get_shape()[1:]))]) + conid = tf.reshape(conid, + [-1, int(np.prod(conid.get_shape()[1:]))]) + cooid = tf.reshape(cooid, + [-1, int(np.prod(cooid.get_shape()[1:]))]) + + conid = conid + tf.to_float(conid > .5) * (self.scale_conf - 1.) + conid = conid + tf.to_float(conid < .5) * self.scale_noobj + + true = tf.concat(1,[self.true_class, confs, self.true_coo]) + idtf = tf.concat(1,[self.class_idtf * self.scale_prob, conid, + cooid * self.scale_coor]) + + self.loss = tf.pow(self.out - true, 2) + self.loss = tf.mul(self.loss, idtf) + self.loss = tf.reduce_sum(self.loss, 1) + self.loss = .5 * tf.reduce_mean(self.loss) + + optimizer = tf.train.RMSPropOptimizer(self.learning_rate) + gradients = optimizer.compute_gradients(self.loss) + self.train_op = optimizer.apply_gradients(gradients) + + def train(self, train_set, annotate, batch_size, epoch): + batches = shuffle(train_set, annotate, self.C, self.S, batch_size, epoch) + for i, batch in enumerate(batches): + x_batch, datum = batch + feed_dict = { + self.inp : x_batch, + self.drop : .5, + self.true_class : datum[0], + self.confs1 : datum[1], + self.confs2 : datum[2], + self.true_coo : datum[3], + self.upleft : datum[4], + self.botright : datum[5], + self.class_idtf : datum[6], + self.conid1 : datum[7], + self.conid2 : datum[8], + self.cooid1 : datum[9], + self.cooid2 : datum[10], + } + _, loss = self.sess.run([self.train_op, self.loss], feed_dict) + print 'step {} - batch {} - loss {}'.format(1+i+self.step, 1+i, loss) + if (i+1) % (self.save_every/batch_size) == 0: + print 'save checkpoint and binaries at step {}'.format(self.step+i+1) + self.saver.save(self.sess, 'backup/model-{}'.format(self.step+i+1)) + self.to_constant(inc = i+1) + + print 'save checkpoint and binaries at step {}'.format(self.step+i+1) + self.saver.save(self.sess, 'backup/model-{}'.format(self.step+i+1)) + self.to_constant(inc = i+1) + + def predict(self, FLAGS): + img_path = FLAGS.test + threshold = FLAGS.threshold + all_img_ = os.listdir(img_path) + batch = min(FLAGS.batch, len(all_img_)) + for j in range(len(all_img_)/batch): + img_feed = list() + all_img = all_img_[j*batch: (j*batch+batch)] + new_all = list() + for img in all_img: + if '.xml' in img: continue + new_all += [img] + this_img = '{}/{}'.format(img_path, img) + this_img = crop(this_img) + img_feed.append(this_img) + img_feed.append(this_img[:,:,::-1,:]) + all_img = new_all + + feed_dict = { + self.inp : np.concatenate(img_feed, 0), + self.drop : 1.0 + } + + print ('Forwarding {} images ...'.format(len(img_feed))) + start = time.time() + out = self.sess.run([self.out], feed_dict) + stop = time.time() + last = stop - start + print ('Total time = {}s / {} imgs = {} fps'.format( + last, len(img_feed), len(img_feed) / last)) + for i, prediction in enumerate(out[0]): + draw_predictions( + prediction, + '{}/{}'.format(img_path, all_img[i/2]), + i % 2, threshold, + self.C, self.S, self.labels, self.colors) + print ('Results stored in results/') diff --git a/Yolo.py b/Yolo.py new file mode 100644 index 000000000..4dd7a1a3c --- /dev/null +++ b/Yolo.py @@ -0,0 +1,146 @@ +import numpy as np +import os +import tensorflow as tf +import time +from configs.process import cfg_yielder + +labels20 = ["aeroplane", "bicycle", "bird", "boat", "bottle", + "bus", "car", "cat", "chair", "cow", "diningtable", "dog", + "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", + "train", "tvmonitor"] +default_models = ['full', 'small', 'tiny'] + +class layer: + def __init__(self, type, size = 0, + c = 0, n = 0, h = 0, w = 0): + self.type = type + self.size = size + self.c, self.n = (c, n) + self.h, self.w = (h, w) + +class maxpool_layer(layer): + def __init__(self, size, c, n, h, w, stride, pad): + layer.__init__(self, 'MAXPOOL', + size, c, n, h, w) + self.stride = stride + self.pad = pad + +class convolu_layer(layer): + def __init__(self, size, c, n, h, w, stride, pad): + layer.__init__(self, 'CONVOLUTIONAL', + size, c, n, h, w) + self.stride = stride + self.pad = pad + +class connect_layer(layer): + def __init__(self, size, c, n, h, w, + input_size, output_size): + layer.__init__(self, 'CONNECTED', + size, c, n, h, w) + self.output_size = output_size + self.input_size = input_size + +class YOLO(object): + + layers = [] + S = int() + model = str() + + def __init__(self, model): + with open('labels.txt', 'r') as f: + pick = f.readlines() + for i in range(len(pick)): pick[i] = pick[i].strip() + if model in default_models: pick = labels20 + self.labels = pick + self.model = model + self.layers = [] + self.build(model) + self.layer_number = len(self.layers) + postfix = int('-' in model) * 'binaries/' + weight_file = postfix + 'yolo-{}.weights'.format(model) + print ('Loading {} ...'.format(weight_file)) + start = time.time() + self.loadWeights(weight_file) + stop = time.time() + print ('Finished in {}s'.format(stop - start)) + + def build(self, model): + cfg = model.split('-')[0] + print ('parsing yolo-{}.cfg'.format(cfg)) + layers = cfg_yielder(cfg) + for i, info in enumerate(layers): + if i == 0: + self.S = info + continue + if len(info) == 1: new = layer(type = info[0]) + if info[0] == 'conv': new = convolu_layer(*info[1:]) + if info[0] == 'pool': new = maxpool_layer(*info[1:]) + if info[0] == 'conn': new = connect_layer(*info[1:]) + self.layers.append(new) + + def loadWeights(self, weight_path): + self.startwith = np.array( + np.memmap(weight_path, mode = 'r', + offset = 0, shape = (), + dtype = '(4)i4,')) + #self.startwith = np.array(self.startwith) + offset = 16 + chunkMB = 1000 + chunk = int(chunkMB * 2**18) + + # Read byte arrays from file + for i in range(self.layer_number): + l = self.layers[i] + if l.type == "CONVOLUTIONAL": + weight_number = l.n * l.c * l.size * l.size + l.biases = np.memmap(weight_path, mode = 'r', + offset = offset, shape = (), + dtype = '({})float32,'.format(l.n)) + offset += 4 * l.n + l.weights = np.memmap(weight_path, mode = 'r', + offset = offset, shape = (), + dtype = '({})float32,'.format(weight_number)) + offset += 4 * weight_number + + elif l.type == "CONNECTED": + bias_number = l.output_size + weight_number = l.output_size * l.input_size + l.biases = np.memmap(weight_path, mode = 'r', + offset = offset, shape = (), + dtype = '({})float32,'.format(bias_number)) + offset += bias_number * 4 + + chunks = [chunk] * (weight_number / chunk) + chunks += [weight_number % chunk] + l.weights = np.array([], dtype = np.float32) + for c in chunks: + l.weights = np.concatenate((l.weights, + np.memmap(weight_path, mode = 'r', + offset = offset, shape = (), + dtype = '({})float32,'.format(c)))) + offset += c * 4 + + # Defensive python right here bietch. + if offset == os.path.getsize(weight_path): + print ('Successfully identified all {} bytes'.format( + offset)) + else: + print 'expect ', offset, ' bytes, found ', os.path.getsize(weight_path) + exit() + + # Reshape + for i in range(self.layer_number): + l = self.layers[i] + + if l.type == 'CONVOLUTIONAL': + weight_array = l.weights + weight_array = np.reshape(weight_array, + [l.n, l.c, l.size, l.size]) + weight_array = weight_array.transpose([2,3,1,0]) + l.weights = weight_array + + if l.type == 'CONNECTED': + weight_array = l.weights + weight_array = np.reshape(weight_array, + [l.input_size, l.output_size]) + l.weights = weight_array diff --git a/box.py b/box.py new file mode 100644 index 000000000..e52f0f6f6 --- /dev/null +++ b/box.py @@ -0,0 +1,44 @@ +import numpy as np + +class BoundBox: + def __init__(self, classes): + self.x = 0 + self.y = 0 + self.h = 0 + self.w = 0 + self.c = 0 + self.class_num = 0 + self.probs = np.zeros((classes,)) + +def overlap(x1,w1,x2,w2): + l1 = x1 - w1/2.; + l2 = x2 - w2/2.; + left = max(l1, l2) + r1 = x1 + w1/2.; + r2 = x2 + w2/2.; + right = min(r1, r2) + return right - left; + +def box_intersection(a, b): + w = overlap(a.x, a.w, b.x, b.w); + h = overlap(a.y, a.h, b.y, b.h); + if (w < 0 or h < 0): + return 0; + area = w * h; + return area; + +def box_union(a, b): + i = box_intersection(a, b); + u = a.w*a.h + b.w*b.h - i; + return u; + +def box_iou(a, b): + return box_intersection(a, b)/box_union(a, b); + +def prob_compare(boxa,boxb): + if(boxa.probs[boxa.class_num] < boxb.probs[boxb.class_num]): + return 1 + elif(boxa.probs[boxa.class_num] == boxb.probs[boxb.class_num]): + return 0 + else: + return -1 \ No newline at end of file diff --git a/clean.py b/clean.py new file mode 100644 index 000000000..c9a45ddc3 --- /dev/null +++ b/clean.py @@ -0,0 +1,117 @@ +import os +import numpy as np +import cv2 +import cPickle as pickle +import sys + +if len(sys.argv) == 1: + ANN = '../pascal/VOCdevkit/ANN' +else: + ANN = sys.argv[1] + +# ---- CONSTANTS------- +labels20 = ["aeroplane", "bicycle", "bird", "boat", "bottle", + "bus", "car", "cat", "chair", "cow", "diningtable", "dog", + "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", + "train", "tvmonitor"] + +with open('labels.txt', 'r') as f: + pick = f.readlines() + for i in range(len(pick)): pick[i] = pick[i].strip() + +def pp(l): + for i in l: print '{}: {}'.format(i,l[i]) + +def parse(line): + x = line.split('>')[1].split('<')[0] + try: + r = int(x) + except: + r = x + return r + +dumps = list() +tempdir = os.getcwd() +os.chdir(ANN) +size = len(os.listdir('.')) + +for i, file in enumerate(os.listdir('.')): + + sys.stdout.write('\r') + percentage = 1. * i / size + progress = int(percentage * 20) + sys.stdout.write('[{}>{}]{:.0f}%'.format(progress*'=',' '*(19-progress),percentage*100)) + sys.stdout.flush() + + if file.split('.')[1] != 'xml': + continue + with open(file, 'r') as f: + lines = f.readlines() + + w = h = int() + all = current = list() + obj = False + noHuman = True + noPlant = True + for line in lines: + if '' in line: + w = parse(line) + if '' in line: + h = parse(line) + if '' in line: + obj = True + if '' in line: + obj = False + if '' in line: + obj = False + if '' in line: + obj = True + if not obj: continue + if '' in line: + if current != list() and current[0] in pick: + all += [current] + if current[0] == 'person': noHuman = False + if current[0] == 'pottedplant': noPlant = False + current = list() + name = parse(line) + if name not in pick: + obj = False + continue + current = [name,None,None,None,None] + xn = '' in line + xx = '' in line + yn = '' in line + yx = '' in line + if xn: current[1] = parse(line) + if xx: current[3] = parse(line) + if yn: current[2] = parse(line) + if yx: current[4] = parse(line) + + if current != list() and current[0] in pick: + all += [current] + if current[0] == 'person': noHuman = False + if current[0] == 'pottedplant': noPlant = False + + if all == list(): continue + jpg = file.split('.')[0]+'.jpg' + add = [[jpg, [w, h, all]]] * (1 + noHuman* (15 + noPlant * 11)) + dumps += add + + +stat = dict() +for dump in dumps: + all = dump[1][2] + for current in all: + if current[0] in pick: + if current[0] in stat: + stat[current[0]]+=1 + else: + stat[current[0]] =1 + +print +print 'Statistics:' +pp(stat) +print 'Dataset size: {}'.format(len(dumps)) +with open('parsed.yolotf', 'wb') as f: + pickle.dump([pick, dumps],f,protocol=-1) +os.chdir(tempdir) \ No newline at end of file diff --git a/configs/__init__.py b/configs/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/configs/process.py b/configs/process.py new file mode 100644 index 000000000..6f8bdeac5 --- /dev/null +++ b/configs/process.py @@ -0,0 +1,120 @@ +import numpy as np +import os + +def cfg_yielder(model, mode = True): + # Parse --------------------------------------- + with open('configs/yolo-{}.cfg'.format(model), 'rb') as f: + lines = f.readlines() + + s = [] + S = int() + add = dict() + for line in lines: + line = line.strip() + if 'side' in line: + S = int(line.split('=')[1].strip()) + if '[' in line: + if add != {}: + s += [add] + add = dict() + else: + try: + i = float(line.split('=')[1].strip()) + if i == int(i): i = int(i) + add[line.split('=')[0]] = i + except: + try: + if line.split('=')[1] == 'leaky' and 'output' in add: + add[line.split('=')[0]] = line.split('=')[1] + except: + pass + yield S + # Interprete--------------------------------------- + weightf = 'yolo-{}.weights'.format(model) + if mode: + allbytes = os.path.getsize('yolo-{}.weights'.format(model)) + allbytes /= 4 + allbytes -= 4 + last_convo = int() + for i, d in enumerate(s): + if len(d) == 4: + last_convo = i + flag = False + channel = 3 + out = int() + for i, d in enumerate(s): + if len(d) == 4: + allbytes -= d['size'] ** 2 * channel * d['filters'] + allbytes -= d['filters'] + channel = d['filters'] + elif 'output' in d: + if flag is False: + out = out1 = d['output'] + flag = True + continue + allbytes -= out * d['output'] + allbytes -= d['output'] + out = d['output'] + allbytes -= out1 + size = (np.sqrt(allbytes/out1/channel)) + size = int(size) + n = last_convo + 1 + while 'output' not in s[n]: + size *= s[n].get('size',1) + n += 1 + else: + last_convo = None + size = None + + w = 448 + h = 448 + c = 3 + l = w * h * c + flat = False + yield ['CROP'] + for i, d in enumerate(s): + #print w, h, c, l + flag = False + if len(d) == 4: + mult = (d['size'] == 3) + mult *= (d['stride'] != 2) + 1. + if d['size'] == 1: d['pad'] = 0 + new = (w + mult * d['pad'] - d['size']) + new /= d['stride'] + new = int(np.floor(new + 1.)) + if i == last_convo: + d['pad'] = -size + new = size + yield ['conv', d['size'], c, d['filters'], + h, w, d['stride'], d['pad']] + w = h = new + c = d['filters'] + l = w * h * c + #print w, h, c + if len(d) == 2: + if 'output' not in d: + yield ['pool', d['size'], 0, + 0, 0, 0, d['stride'], 0] + new = (w * 1.0 - d['size'])/d['stride'] + 1 + new = int(np.floor(new)) + w = h = new + l = w * h * c + else: + if not flat: + flat = True + yield ['FLATTEN'] + yield ['conn', 0, 0, + 0, 0, 0, l, d['output']] + l = d['output'] + if 'activation' in d: + yield ['LEAKY'] + if len(d) == 1: + if 'output' not in d: + yield ['DROPOUT'] + else: + if not flat: + flat = True + yield ['FLATTEN'] + yield ['conn', 0, 0, + 0, 0, 0, l, d['output']] + l = d['output'] \ No newline at end of file diff --git a/configs/yolo-3c.cfg b/configs/yolo-3c.cfg new file mode 100644 index 000000000..921da109a --- /dev/null +++ b/configs/yolo-3c.cfg @@ -0,0 +1,76 @@ +[convolutional] +filters=4 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=4 +stride=4 + +[convolutional] +filters=8 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=4 +stride=4 + +[convolutional] +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[connected] +output=1024 +activation=linear + +[connected] +output=2048 +activation=leaky + +[dropout] +probability=.5 + +[connected] +output= 1573 +activation=linear + +[detection] +classes=3 +side=11 \ No newline at end of file diff --git a/configs/yolo-8c.cfg b/configs/yolo-8c.cfg new file mode 100644 index 000000000..6bf66b7f9 --- /dev/null +++ b/configs/yolo-8c.cfg @@ -0,0 +1,108 @@ +[net] +batch=64 +subdivisions=2 +height=448 +width=448 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.0001 +policy=steps +steps=20,40,60,80,20000,30000 +scales=5,5,2,2,.1,.1 +max_batches = 40000 + +[crop] +crop_width=448 +crop_height=448 +flip=0 +angle=0 +saturation = 1.5 +exposure = 1.5 + +[convolutional] +filters=4 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=8 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=4 +stride=4 + +[convolutional] +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[connected] +output=256 +activation=linear + +[connected] +output=4096 +activation=leaky + +[dropout] +probability=.5 + +[connected] +output= 1573 +activation=linear + +[detection] +classes=3 +coords=4 +rescore=1 +side=11 +num=2 +softmax=0 +sqrt=1 +jitter=.2 + +object_scale=1 +noobject_scale=.5 +class_scale=1 +coord_scale=5 + diff --git a/configs/yolo-coco.cfg b/configs/yolo-coco.cfg new file mode 100644 index 000000000..0c13a3126 --- /dev/null +++ b/configs/yolo-coco.cfg @@ -0,0 +1,240 @@ +[net] +batch=64 +subdivisions=4 +height=448 +width=448 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.0001 +policy=steps +steps=100,200,300,100000,150000 +scales=2.5,2,2,.1,.1 +max_batches = 300000 + +[crop] +crop_width=448 +crop_height=448 +flip=0 +angle=0 +saturation = 1.5 +exposure = 1.5 + +[convolutional] +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=192 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + + +################################# + + +[convolutional] +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=3 +stride=2 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[local] +size=3 +stride=1 +pad=1 +filters=192 +activation=leaky + +[dropout] +probability=.5 + +[connected] +output= 4410 +activation=linear + +[detection] +classes=80 +coords=4 +rescore=1 +side=7 +num=2 +softmax=0 +sqrt=1 +jitter=.2 + +object_scale=1 +noobject_scale=.5 +class_scale=1 +coord_scale=5 + diff --git a/configs/yolo-full.cfg b/configs/yolo-full.cfg new file mode 100644 index 000000000..9eb08d942 --- /dev/null +++ b/configs/yolo-full.cfg @@ -0,0 +1,234 @@ +[net] +batch=64 +subdivisions=64 +height=448 +width=448 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.001 +policy=steps +steps=200,400,600,20000,30000 +scales=2.5,2,2,.1,.1 +max_batches = 40000 + +[crop] +crop_width=448 +crop_height=448 +flip=0 +angle=0 +saturation = 1.5 +exposure = 1.5 + +[convolutional] +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=192 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +####### + +[convolutional] +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=3 +stride=2 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[connected] +output=4096 +activation=leaky + +[dropout] +probability=.5 + +[connected] +output= 1470 +activation=linear + +[detection] +classes=20 +coords=4 +rescore=1 +side=7 +num=2 +softmax=0 +sqrt=1 +jitter=.2 + +object_scale=1 +noobject_scale=.5 +class_scale=1 +coord_scale=5 \ No newline at end of file diff --git a/configs/yolo-small.cfg b/configs/yolo-small.cfg new file mode 100644 index 000000000..2a84485b2 --- /dev/null +++ b/configs/yolo-small.cfg @@ -0,0 +1,239 @@ +[net] +batch=64 +subdivisions=64 +height=448 +width=448 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.001 +policy=steps +steps=200,400,600,20000,30000 +scales=2.5,2,2,.1,.1 +max_batches = 40000 + +[crop] +crop_width=448 +crop_height=448 +flip=0 +angle=0 +saturation = 1.5 +exposure = 1.5 + +[convolutional] +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=192 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +####### + +[convolutional] +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=3 +stride=2 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[connected] +output=512 +activation=leaky + +[connected] +output=4096 +activation=leaky + +[dropout] +probability=.5 + +[connected] +output= 1470 +activation=linear + +[detection] +classes=20 +coords=4 +rescore=1 +side=7 +num=2 +softmax=0 +sqrt=1 +jitter=.2 + +object_scale=1 +noobject_scale=.5 +class_scale=1 +coord_scale=5 + diff --git a/configs/yolo-tiny.cfg b/configs/yolo-tiny.cfg new file mode 100644 index 000000000..8d139e8fd --- /dev/null +++ b/configs/yolo-tiny.cfg @@ -0,0 +1,138 @@ +[net] +batch=64 +subdivisions=64 +height=448 +width=448 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.0001 +policy=steps +steps=20,40,60,80,20000,30000 +scales=5,5,2,2,.1,.1 +max_batches = 40000 + +[crop] +crop_width=448 +crop_height=448 +flip=0 +angle=0 +saturation = 1.5 +exposure = 1.5 + +[convolutional] +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[connected] +output=256 +activation=linear + +[connected] +output=4096 +activation=leaky + +[dropout] +probability=.5 + +[connected] +output= 1470 +activation=linear + +[detection] +classes=20 +coords=4 +rescore=1 +side=7 +num=2 +softmax=0 +sqrt=1 +jitter=.2 +object_scale=1 +noobject_scale=.5 +class_scale=1 +coord_scale=5 \ No newline at end of file diff --git a/labels.txt b/labels.txt new file mode 100644 index 000000000..95d1ed6c7 --- /dev/null +++ b/labels.txt @@ -0,0 +1,3 @@ +tvmonitor +pottedplant +person \ No newline at end of file diff --git a/makew.py b/makew.py new file mode 100644 index 000000000..da59e061f --- /dev/null +++ b/makew.py @@ -0,0 +1,77 @@ +from configs.process import cfg_yielder +from Yolo import * +import numpy as np +import os +import sys + + +src = sys.argv[1] +des = sys.argv[2] + +wlayer = ['CONVOLUTIONAL', 'CONNECTED'] +class collector(object): + def __init__(self, yolo): + self.i = 0 + self.yolo = yolo + def inc(self): + while self.yolo.layers[self.i].type not in wlayer: + self.i += 1 + if self.i == len(self.yolo.layers): + break + def give(self): + self.inc() + l = self.yolo.layers[self.i] + w = l.weights + if l.type == 'CONVOLUTIONAL': + w = w.transpose([3,2,0,1]) + w = w.reshape([-1]) + w = np.concatenate((l.biases, w)) + self.i += 1 + return np.float32(w) + +yolo = YOLO(src) +col = collector(yolo) +mark = int(1) +writer = open('yolo-{}.weights'.format(des),'w') +writer.write(np.int32(yolo.startwith).tobytes()) +offset = int(16) +flag = True + +# PHASE 01: recollect +print 'recollect:' +for i, k in enumerate(zip(cfg_yielder(des, False), + cfg_yielder(src, False))): + if not i: continue + if k[0][:] != k[1][:] and k[0][0] in ['conv', 'conn']: + flag = False + if flag: + k = k[0] + if k[0] not in ['conv', 'conn']: continue + w = col.give() + writer.write(w.tobytes()) + offset += w.shape[0] * 4 + print k + elif not flag: + mark = i + break + +# PHASE 02: random init +print 'random init:' +if not flag: + for i, k in enumerate(cfg_yielder(des, False)): + if i < mark: continue + if k[0] not in ['conv','conn']: continue + print k + if k[0] == 'conv': + w = np.random.normal( + scale = .05, + size = (k[1]*k[1]*k[2]*k[3]+k[3],)) + else: + w = np.random.normal( + scale = .05, + size = (k[6]*k[7]+k[7],)) + w = np.float32(w) + writer.write(w.tobytes()) + offset += w.shape[0] * 4 +writer.close() +print 'total size: {} bytes'.format(offset) \ No newline at end of file diff --git a/tensor.py b/tensor.py new file mode 100644 index 000000000..8006b5bf7 --- /dev/null +++ b/tensor.py @@ -0,0 +1,53 @@ +from Yolo import * +from box import * +from TFnet import * +from tensorflow import flags +import sys +import time +import os + +flags.DEFINE_string("test", "data", "path to testing folder") +flags.DEFINE_string("pascal", "../pascal/VOCdevkit", "path to training set") +flags.DEFINE_float("threshold", 0.1, "detection threshold") +flags.DEFINE_string("model", "3c", "yolo configuration of choice") +flags.DEFINE_boolean("train", False, "training mode or not?") +flags.DEFINE_boolean("load", False, "load the newest train in backup/checkpoint") +flags.DEFINE_boolean("savepb", False, "save net and weight to a .pb file") +flags.DEFINE_float("gpu", 0.0, "How much gpu (from 0.0 to 1.0)") +flags.DEFINE_float("lr", 1e-5, "Learning rate") +flags.DEFINE_string("scale", "1,1,.5,5.", + "Comma-separated scaling for probability, confidence, noobj, coordinate terms in the loss") +flags.DEFINE_integer("keep",20,"Number of most recent training results to save") +flags.DEFINE_integer("batch", 12, "Batch size") +flags.DEFINE_integer("epoch", 1000, "Number of epoch") +flags.DEFINE_integer("save", 2000, "Save checkpoint every ? training examples") +FLAGS = flags.FLAGS +image = FLAGS.pascal + '/IMG/' +annot = FLAGS.pascal + '/ANN/' + 'parsed.yolotf' + +step = int() +if FLAGS.load: + try: + with open('backup/checkpoint','r') as f: + lines = f.readlines() + except: + sys.exit('Seems like there is no recent training in backup/') + name = lines[-1].split(' ')[1].split('"')[1] + step = int(name.split('-')[1]) +yoloNet = YOLO(FLAGS.model + int(step > 0) * '-{}'.format(step)) + +print ('Compiling net & initialise parameters...') +start = time.time() +if FLAGS.gpu <= 0.: + with tf.device('cpu:0'): + model = SimpleNet(yoloNet, FLAGS) +else: + model = SimpleNet(yoloNet, FLAGS) +model.step = step +model.setup_meta_ops(FLAGS) +print ('Finished in {}s'.format(time.time() - start)) + +if FLAGS.train: + print 'training mode' + model.train(image, annot, FLAGS.batch, FLAGS.epoch) +model.predict(FLAGS) \ No newline at end of file