decouple yolo from the rest

LittleD3092 · Nov 11, 2016 · a4f222f · a4f222f
1 parent 1d6135b
commit a4f222f
Show file tree

Hide file tree

Showing 20 changed files with 1,598 additions and 920 deletions.
diff --git a/backup/checkpoint b/backup/checkpoint
@@ -0,0 +1,9 @@
+model_checkpoint_path: "model-400"
+all_model_checkpoint_paths: "model-50"
+all_model_checkpoint_paths: "model-100"
+all_model_checkpoint_paths: "model-150"
+all_model_checkpoint_paths: "model-200"
+all_model_checkpoint_paths: "model-250"
+all_model_checkpoint_paths: "model-300"
+all_model_checkpoint_paths: "model-350"
+all_model_checkpoint_paths: "model-400"
diff --git a/clean.py b/clean.py
@@ -1,3 +1,20 @@
+"""
+file: ./clean.py
+includes: a script to parse Pascal VOC data
+this script produces the binary file parsed.bin, which contains
+a cPickle dump of a list. Each element in the list corresponds
+to an image, the element in turn contains a list of  parsed bounding 
+boxes coordinates and asscociated classes of each object defined
+in labels.txt. If labels.txt is left blank, the default choice of
+all twenty objects are used (see list labels20 below).
+
+The cPickle dump will be used mainly by ./data.py, inside function
+shuffle(). shuffle() will shuffle and cut the dump into batches,
+preprocess them so that they are ready to be fed into net.
+
+WARNING: this script is messy, it hurts to read :(
+"""
+
 import os
 import numpy as np
 import cv2
@@ -15,9 +32,10 @@
 	"horse", "motorbike", "person", "pottedplant", "sheep", "sofa",
 	"train", "tvmonitor"]
 
+pick = list()
 with open('labels.txt', 'r') as f:
-	pick = f.readlines()
-	for i in range(len(pick)): pick[i] = pick[i].strip()
+	pick = [l.strip() for l in f.readlines()]
+if pick == list(): pick = labels20
 
 def pp(l):
 	for i in l: print '{}: {}'.format(i,l[i])
@@ -50,10 +68,10 @@ def parse(line):
 
 	w = h = int()
 	all = current = list()
+	name = str()
 	obj = False
-	noHuman = True
-	noPlant = True
-	for line in lines:
+	for i in range(len(lines)):
+		line = lines[i]
 		if '<width>' in line:
 			w = parse(line)
 		if '<height>' in line:
@@ -70,14 +88,13 @@ def parse(line):
 		if '<name>' in line:
 			if current != list() and current[0] in pick:
 					all += [current]
-					if current[0] == 'person': noHuman = False
-					if current[0] == 'pottedplant': noPlant = False
 			current = list()
 			name = parse(line)
 			if name not in pick: 
 				obj = False
 				continue
 			current = [name,None,None,None,None]
+		if len(current) != 5: continue
 		xn = '<xmin>' in line
 		xx = '<xmax>' in line
 		yn = '<ymin>' in line
@@ -89,12 +106,10 @@ def parse(line):
 
 	if current != list() and current[0] in pick:
 		all += [current]
-		if current[0] == 'person': noHuman = False
-		if current[0] == 'pottedplant': noPlant = False
 
 	if all == list(): continue
 	jpg = file.split('.')[0]+'.jpg'
-	add = [[jpg, [w, h, all]]] * (1 + noHuman* (15 + noPlant * 11))
+	add = [[jpg, [w, h, all]]]
 	dumps += add
 
 
@@ -112,6 +127,6 @@ def parse(line):
 print 'Statistics:'
 pp(stat)
 print 'Dataset size: {}'.format(len(dumps))
-with open('parsed.yolotf', 'wb') as f:
-	pickle.dump([pick, dumps],f,protocol=-1)
+with open('parsed.bin', 'wb') as f:
+	pickle.dump([dumps],f,protocol=-1)
 os.chdir(tempdir)
diff --git a/configs/process.py b/configs/process.py
@@ -1,148 +1,148 @@
 import numpy as np
 import os
 
-def cfg_yielder(model, undiscovered = True):
+def _parse(l): return l.split('=')[1].strip()
+
+def parser(model):
 	"""
-	yielding each layer information, i.e. yielding type & size
-	of each layer of `model`.
-	Because of different reasons, it is not always be the ideal 
-	case that following .cfg file will successfully match the 
-	size of .weights file, so one would need to investigate the
-	.weights file if s/he is parsing the .cfg file for the first 
-	time (undiscovered = True) in order to adjust the parsing 
-	appropriately.
+	Read the .cfg file to extract layers into `s`
+	as well as model-specific parameters into `meta`
 	"""
-
-	# Step 1: parsing cfg file
 	with open('configs/yolo-{}.cfg'.format(model), 'rb') as f:
-		lines = f.readlines()
-
-	s = [] # contains layers' info
-	S = int() # the number of grid cell
+		lines = f.readlines()		
+
+	s = [] # will contains layers' info
 	add = dict()
 	for line in lines:
 		line = line.strip()
-		if 'side' in line:
-			S = int(line.split('=')[1].strip())
+		# deepnet general layers
 		if '[' in line:
-			if add != {}:
-				s += [add]
-			add = dict()
+			if add != {}: s += [add]
+			add = {'type':line}
 		else:
 			try:
-				i = float(line.split('=')[1].strip())
+				i = float(_parse(line))
 				if i == int(i): i = int(i)
 				add[line.split('=')[0]] = i
 			except:
 				try:
-					if line.split('=')[1] == 'leaky' and 'output' in add:
-						add[line.split('=')[0]] = line.split('=')[1]
+					if _parse(line) == 'leaky':
+						add['activation'] = 'leaky'
 				except:
 					pass
-	yield S
+	add['model'] = model
+	return s, add
+
+def discoverer(weightf, s):
+	"""
+	discoverer returns:
+	1. index of last convolutional layer
+	2. the expected size of this conv layer's kernel
+	"""
+	allbytes = os.path.getsize(weightf)
+	allfloat = allbytes/4; allfloat -= 4 
+	last_convo = int() 
+	for i, d in enumerate(s):
+		if len(d) >= 4:
+			last_convo = i
+	channel = 3; dense = False # flag for 1st dense layer
+	out = int() 
+	for i, d in enumerate(s):
+		# ignore darknet specifications
+		if 'batch' in d: continue
+		if 'crop_width' in d: continue
+		if 'side' in d: continue
+
+		if d['type'] == '[convolutional]': 
+			kernel = d['size'] ** 2 * channel * d['filters']
+			allfloat -= kernel + d['filters']
+			channel = d['filters']
+			if 'batch_normalize' in d:
+				allfloat -= 2 * d['filters']
+		elif d['type'] == '[connected]':
+			if dense is False: 
+				out = out1 = d['output'] 
+				dense = True; continue 
+			weight = out * d['output']
+			allfloat -= weight + d['output']
+			out = d['output']
+
+	allfloat -= out1 # substract the bias
+	if allfloat <= 0:
+		message = 'yolo-{}.cfg suggests a bigger size'
+		message += ' than yolo-{}.weights actually is'
+		exit('Error: {}'.format(message.format(model, model)))
+
+	# expected size of last convolution kernel
+	size = (np.sqrt(1.*allfloat/out1/channel))
+	print 'Last convolutional kernel size = {}'.format(size)
+	size = int(size)
+	n = last_convo + 1
+	while 'output' not in s[n]:
+		size *= s[n].get('size',1)
+		n += 1
+	return last_convo, size
+
+def cfg_yielder(model, undiscovered = True):
+	"""
+	yielding each layer information, if model is discovered 
+	for the first time (undiscovered = True), discoverer
+	will be employed
+	"""
+
+	layers, meta = parser(model); yield meta
 
-	# Step 2: investigate the weight file
-	weightf = 'yolo-{}.weights'.format(model)
 	if undiscovered:
-		allbytes = os.path.getsize('yolo-{}.weights'.format(model))
-		allbytes /= 4 # each float is 4 byte
-		allbytes -= 4 # the first 4 bytes are darknet specifications
-		last_convo = int() 
-		for i, d in enumerate(s):
-			if len(d) == 4:
-				last_convo = i # the index of last convolution layer
-		flag = False
-		channel = 3 # initial number of channel in the tensor volume
-		out = int() 
-		for i, d in enumerate(s):
-    		# for each iteration in this loop
-			# allbytes will be gradually subtracted
-			# by the size of the corresponding layer (d)
-			# except for the 1st dense layer
-			# it should be what remains after subtracting
-			# all other layers
-			if len(d) == 4:
-				allbytes -= d['size'] ** 2 * channel * d['filters']
-				allbytes -= d['filters']
-				channel = d['filters']
-			elif 'output' in d: # this is a dense layer
-				if flag is False: # this is the first dense layer
-					out = out1 = d['output'] # output unit of the 1st dense layer
-					flag = True # mark that the 1st dense layer is passed
-					continue # don't do anything with the 1st dense layer
-				allbytes -= out * d['output']
-				allbytes -= d['output']
-				out = d['output']
-		allbytes -= out1 # substract the bias
-		if allbytes <= 0:
-				message = "Error: yolo-{}.cfg suggests a bigger size"
-				message += " than yolo-{}.weights actually is"
-				print message.format(model, model)
-				assert allbytes > 0
-		# allbytes is now = I * out1
-		# where I is the input size of the 1st dense layer
-		# I is also the volume of the last convolution layer
-		# I = size * size * channel
-		size = (np.sqrt(allbytes/out1/channel)) 
-		size = int(size)
-		n = last_convo + 1
-		while 'output' not in s[n]:
-			size *= s[n].get('size',1)
-			n += 1
-	else:
-		last_convo = None
-		size = None
+		weightf = 'yolo-{}.weights'.format(model)
+		last_convo, size = discoverer(weightf, layers)
+	else: last_convo = None; size = None
 
-	# Step 3: Yielding config
-	w = 448
-	h = 448
-	c = 3
-	l = w * h * c
-	flat = False
-	yield ['CROP']
-	for i, d in enumerate(s):
-		#print w, h, c, l
-		flag = False
-		if len(d) == 4:
+	# Start yielding
+	w = 448; h = 448; c = 3; l = w * h * c
+	yield ['CROP']; flat = False # flag for 1st dense layer
+	for i, d in enumerate(layers):
+		# ignore darknet specifications
+		if 'batch' in d: continue
+		if 'crop_width' in d: continue
+		if 'side' in d: continue
+
+		if d['type'] == '[convolutional]':
 			mult = (d['size'] == 3) 
 			mult *= (d['stride'] != 2) + 1.
 			if d['size'] == 1: d['pad'] = 0
 			new = (w + mult * d['pad'] - d['size'])
 			new /= d['stride']
 			new = int(np.floor(new + 1.))
 			if i == last_convo:
+    			# signal tfnet to figure out the pad itself
+				# to achieve the desired `size`. Namely, to
+				# use the negative sign:
 				d['pad'] = -size
 				new = size
 			yield ['conv', d['size'], c, d['filters'], 
 				    h, w, d['stride'], d['pad']]	
 			w = h = new
 			c = d['filters']
 			l = w * h * c
-			#print w, h, c
-		if len(d) == 2:
-			if 'output' not in d:
-				yield ['pool', d['size'], 0, 
-					0, 0, 0, d['stride'], 0]
-				new = (w * 1.0 - d['size'])/d['stride'] + 1
-				new = int(np.floor(new))
-				w = h = new
-				l = w * h * c
-			else:
-				if not flat:
-					flat = True
-					yield ['FLATTEN']
-				yield ['conn', 0, 0,
-				0, 0, 0, l, d['output']]
-				l = d['output']
-				if 'activation' in d:
-					yield ['LEAKY']
-		if len(d) == 1:
-			if 'output' not in d:
-				yield ['DROPOUT']
-			else:
-				if not flat:
-					flat = True
-					yield ['FLATTEN']
-				yield ['conn', 0, 0,
-				0, 0, 0, l, d['output']]
-				l = d['output']
+			if 'batch_normalize' in d: 
+				yield['bnrm', 0, 0, c, 0, 0]
+			if 'activation' in d: yield ['leaky']
+
+		if d['type'] == '[maxpool]':
+			yield ['pool', d['size'], 0, 
+				0, 0, 0, d['stride'], 0]
+			new = (w * 1.0 - d['size'])/d['stride'] + 1
+			new = int(np.floor(new))
+			w = h = new
+			l = w * h * c
+
+		if d['type'] == '[connected]':
+			if not flat:
+				yield ['flatten']
+				flat = True
+			yield ['conn'] + [0] * 5 + [l, d['output']]
+			l = d['output']
+			if 'activation' in d: yield ['leaky']
+
+		if d['type'] == '[dropout]': 
+			yield ['drop', d['probability']]