diff --git a/backup/checkpoint b/backup/checkpoint
new file mode 100644
index 000000000..19bb6a253
--- /dev/null
+++ b/backup/checkpoint
@@ -0,0 +1,9 @@
+model_checkpoint_path: "model-400"
+all_model_checkpoint_paths: "model-50"
+all_model_checkpoint_paths: "model-100"
+all_model_checkpoint_paths: "model-150"
+all_model_checkpoint_paths: "model-200"
+all_model_checkpoint_paths: "model-250"
+all_model_checkpoint_paths: "model-300"
+all_model_checkpoint_paths: "model-350"
+all_model_checkpoint_paths: "model-400"
diff --git a/clean.py b/clean.py
index c9a45ddc3..d45353e8b 100644
--- a/clean.py
+++ b/clean.py
@@ -1,3 +1,20 @@
+"""
+file: ./clean.py
+includes: a script to parse Pascal VOC data
+this script produces the binary file parsed.bin, which contains
+a cPickle dump of a list. Each element in the list corresponds
+to an image, the element in turn contains a list of  parsed bounding 
+boxes coordinates and asscociated classes of each object defined
+in labels.txt. If labels.txt is left blank, the default choice of
+all twenty objects are used (see list labels20 below).
+
+The cPickle dump will be used mainly by ./data.py, inside function
+shuffle(). shuffle() will shuffle and cut the dump into batches,
+preprocess them so that they are ready to be fed into net.
+
+WARNING: this script is messy, it hurts to read :(
+"""
+
 import os
 import numpy as np
 import cv2
@@ -15,9 +32,10 @@
 	"horse", "motorbike", "person", "pottedplant", "sheep", "sofa",
 	"train", "tvmonitor"]
 
+pick = list()
 with open('labels.txt', 'r') as f:
-	pick = f.readlines()
-	for i in range(len(pick)): pick[i] = pick[i].strip()
+	pick = [l.strip() for l in f.readlines()]
+if pick == list(): pick = labels20
 
 def pp(l):
 	for i in l: print '{}: {}'.format(i,l[i])
@@ -50,10 +68,10 @@ def parse(line):
 
 	w = h = int()
 	all = current = list()
+	name = str()
 	obj = False
-	noHuman = True
-	noPlant = True
-	for line in lines:
+	for i in range(len(lines)):
+		line = lines[i]
 		if '<width>' in line:
 			w = parse(line)
 		if '<height>' in line:
@@ -70,14 +88,13 @@ def parse(line):
 		if '<name>' in line:
 			if current != list() and current[0] in pick:
 					all += [current]
-					if current[0] == 'person': noHuman = False
-					if current[0] == 'pottedplant': noPlant = False
 			current = list()
 			name = parse(line)
 			if name not in pick: 
 				obj = False
 				continue
 			current = [name,None,None,None,None]
+		if len(current) != 5: continue
 		xn = '<xmin>' in line
 		xx = '<xmax>' in line
 		yn = '<ymin>' in line
@@ -89,12 +106,10 @@ def parse(line):
 
 	if current != list() and current[0] in pick:
 		all += [current]
-		if current[0] == 'person': noHuman = False
-		if current[0] == 'pottedplant': noPlant = False
 
 	if all == list(): continue
 	jpg = file.split('.')[0]+'.jpg'
-	add = [[jpg, [w, h, all]]] * (1 + noHuman* (15 + noPlant * 11))
+	add = [[jpg, [w, h, all]]]
 	dumps += add
 
 
@@ -112,6 +127,6 @@ def parse(line):
 print 'Statistics:'
 pp(stat)
 print 'Dataset size: {}'.format(len(dumps))
-with open('parsed.yolotf', 'wb') as f:
-	pickle.dump([pick, dumps],f,protocol=-1)
+with open('parsed.bin', 'wb') as f:
+	pickle.dump([dumps],f,protocol=-1)
 os.chdir(tempdir)
\ No newline at end of file
diff --git a/configs/process.py b/configs/process.py
index 20bd162df..b749d2ec4 100644
--- a/configs/process.py
+++ b/configs/process.py
@@ -1,109 +1,112 @@
 import numpy as np
 import os
 
-def cfg_yielder(model, undiscovered = True):
+def _parse(l): return l.split('=')[1].strip()
+    
+def parser(model):
 	"""
-	yielding each layer information, i.e. yielding type & size
-	of each layer of `model`.
-	Because of different reasons, it is not always be the ideal 
-	case that following .cfg file will successfully match the 
-	size of .weights file, so one would need to investigate the
-	.weights file if s/he is parsing the .cfg file for the first 
-	time (undiscovered = True) in order to adjust the parsing 
-	appropriately.
+	Read the .cfg file to extract layers into `s`
+	as well as model-specific parameters into `meta`
 	"""
-
-	# Step 1: parsing cfg file
 	with open('configs/yolo-{}.cfg'.format(model), 'rb') as f:
-		lines = f.readlines()
-
-	s = [] # contains layers' info
-	S = int() # the number of grid cell
+		lines = f.readlines()		
+	
+	s = [] # will contains layers' info
 	add = dict()
 	for line in lines:
 		line = line.strip()
-		if 'side' in line:
-			S = int(line.split('=')[1].strip())
+		# deepnet general layers
 		if '[' in line:
-			if add != {}:
-				s += [add]
-			add = dict()
+			if add != {}: s += [add]
+			add = {'type':line}
 		else:
 			try:
-				i = float(line.split('=')[1].strip())
+				i = float(_parse(line))
 				if i == int(i): i = int(i)
 				add[line.split('=')[0]] = i
 			except:
 				try:
-					if line.split('=')[1] == 'leaky' and 'output' in add:
-						add[line.split('=')[0]] = line.split('=')[1]
+					if _parse(line) == 'leaky':
+						add['activation'] = 'leaky'
 				except:
 					pass
-	yield S
+	add['model'] = model
+	return s, add
+
+def discoverer(weightf, s):
+	"""
+	discoverer returns:
+	1. index of last convolutional layer
+	2. the expected size of this conv layer's kernel
+	"""
+	allbytes = os.path.getsize(weightf)
+	allfloat = allbytes/4; allfloat -= 4 
+	last_convo = int() 
+	for i, d in enumerate(s):
+		if len(d) >= 4:
+			last_convo = i
+	channel = 3; dense = False # flag for 1st dense layer
+	out = int() 
+	for i, d in enumerate(s):
+		# ignore darknet specifications
+		if 'batch' in d: continue
+		if 'crop_width' in d: continue
+		if 'side' in d: continue
+	
+		if d['type'] == '[convolutional]': 
+			kernel = d['size'] ** 2 * channel * d['filters']
+			allfloat -= kernel + d['filters']
+			channel = d['filters']
+			if 'batch_normalize' in d:
+				allfloat -= 2 * d['filters']
+		elif d['type'] == '[connected]':
+			if dense is False: 
+				out = out1 = d['output'] 
+				dense = True; continue 
+			weight = out * d['output']
+			allfloat -= weight + d['output']
+			out = d['output']
+
+	allfloat -= out1 # substract the bias
+	if allfloat <= 0:
+		message = 'yolo-{}.cfg suggests a bigger size'
+		message += ' than yolo-{}.weights actually is'
+		exit('Error: {}'.format(message.format(model, model)))
+	
+	# expected size of last convolution kernel
+	size = (np.sqrt(1.*allfloat/out1/channel))
+	print 'Last convolutional kernel size = {}'.format(size)
+	size = int(size)
+	n = last_convo + 1
+	while 'output' not in s[n]:
+		size *= s[n].get('size',1)
+		n += 1
+	return last_convo, size
+
+def cfg_yielder(model, undiscovered = True):
+	"""
+	yielding each layer information, if model is discovered 
+	for the first time (undiscovered = True), discoverer
+	will be employed
+	"""
+	
+	layers, meta = parser(model); yield meta
 
-	# Step 2: investigate the weight file
-	weightf = 'yolo-{}.weights'.format(model)
 	if undiscovered:
-		allbytes = os.path.getsize('yolo-{}.weights'.format(model))
-		allbytes /= 4 # each float is 4 byte
-		allbytes -= 4 # the first 4 bytes are darknet specifications
-		last_convo = int() 
-		for i, d in enumerate(s):
-			if len(d) == 4:
-				last_convo = i # the index of last convolution layer
-		flag = False
-		channel = 3 # initial number of channel in the tensor volume
-		out = int() 
-		for i, d in enumerate(s):
-    		# for each iteration in this loop
-			# allbytes will be gradually subtracted
-			# by the size of the corresponding layer (d)
-			# except for the 1st dense layer
-			# it should be what remains after subtracting
-			# all other layers
-			if len(d) == 4:
-				allbytes -= d['size'] ** 2 * channel * d['filters']
-				allbytes -= d['filters']
-				channel = d['filters']
-			elif 'output' in d: # this is a dense layer
-				if flag is False: # this is the first dense layer
-					out = out1 = d['output'] # output unit of the 1st dense layer
-					flag = True # mark that the 1st dense layer is passed
-					continue # don't do anything with the 1st dense layer
-				allbytes -= out * d['output']
-				allbytes -= d['output']
-				out = d['output']
-		allbytes -= out1 # substract the bias
-		if allbytes <= 0:
-				message = "Error: yolo-{}.cfg suggests a bigger size"
-				message += " than yolo-{}.weights actually is"
-				print message.format(model, model)
-				assert allbytes > 0
-		# allbytes is now = I * out1
-		# where I is the input size of the 1st dense layer
-		# I is also the volume of the last convolution layer
-		# I = size * size * channel
-		size = (np.sqrt(allbytes/out1/channel)) 
-		size = int(size)
-		n = last_convo + 1
-		while 'output' not in s[n]:
-			size *= s[n].get('size',1)
-			n += 1
-	else:
-		last_convo = None
-		size = None
+		weightf = 'yolo-{}.weights'.format(model)
+		last_convo, size = discoverer(weightf, layers)
+	else: last_convo = None; size = None
 
-	# Step 3: Yielding config
-	w = 448
-	h = 448
-	c = 3
-	l = w * h * c
-	flat = False
-	yield ['CROP']
-	for i, d in enumerate(s):
-		#print w, h, c, l
-		flag = False
-		if len(d) == 4:
+	# Start yielding
+	w = 448; h = 448; c = 3; l = w * h * c
+	yield ['CROP']; flat = False # flag for 1st dense layer
+	for i, d in enumerate(layers):
+		# ignore darknet specifications
+		if 'batch' in d: continue
+		if 'crop_width' in d: continue
+		if 'side' in d: continue
+
+		if d['type'] == '[convolutional]':
 			mult = (d['size'] == 3) 
 			mult *= (d['stride'] != 2) + 1.
 			if d['size'] == 1: d['pad'] = 0
@@ -111,6 +114,9 @@ def cfg_yielder(model, undiscovered = True):
 			new /= d['stride']
 			new = int(np.floor(new + 1.))
 			if i == last_convo:
+    			# signal tfnet to figure out the pad itself
+				# to achieve the desired `size`. Namely, to
+				# use the negative sign:
 				d['pad'] = -size
 				new = size
 			yield ['conv', d['size'], c, d['filters'], 
@@ -118,31 +124,25 @@ def cfg_yielder(model, undiscovered = True):
 			w = h = new
 			c = d['filters']
 			l = w * h * c
-			#print w, h, c
-		if len(d) == 2:
-			if 'output' not in d:
-				yield ['pool', d['size'], 0, 
-					0, 0, 0, d['stride'], 0]
-				new = (w * 1.0 - d['size'])/d['stride'] + 1
-				new = int(np.floor(new))
-				w = h = new
-				l = w * h * c
-			else:
-				if not flat:
-					flat = True
-					yield ['FLATTEN']
-				yield ['conn', 0, 0,
-				0, 0, 0, l, d['output']]
-				l = d['output']
-				if 'activation' in d:
-					yield ['LEAKY']
-		if len(d) == 1:
-			if 'output' not in d:
-				yield ['DROPOUT']
-			else:
-				if not flat:
-					flat = True
-					yield ['FLATTEN']
-				yield ['conn', 0, 0,
-				0, 0, 0, l, d['output']]
-				l = d['output']
\ No newline at end of file
+			if 'batch_normalize' in d: 
+				yield['bnrm', 0, 0, c, 0, 0]
+			if 'activation' in d: yield ['leaky']
+			
+		if d['type'] == '[maxpool]':
+			yield ['pool', d['size'], 0, 
+				0, 0, 0, d['stride'], 0]
+			new = (w * 1.0 - d['size'])/d['stride'] + 1
+			new = int(np.floor(new))
+			w = h = new
+			l = w * h * c
+
+		if d['type'] == '[connected]':
+			if not flat:
+				yield ['flatten']
+				flat = True
+			yield ['conn'] + [0] * 5 + [l, d['output']]
+			l = d['output']
+			if 'activation' in d: yield ['leaky']
+
+		if d['type'] == '[dropout]': 
+			yield ['drop', d['probability']]
\ No newline at end of file
diff --git a/configs/process_.py b/configs/process_.py
new file mode 100644
index 000000000..3f5df526a
--- /dev/null
+++ b/configs/process_.py
@@ -0,0 +1,152 @@
+import numpy as np
+import os
+import sys
+
+model = sys.argv[1]
+undiscovered = True
+
+# Step 1: parsing cfg file
+with open('yolo-{}.cfg'.format(model), 'rb') as f:
+    lines = f.readlines()
+
+s = [] # contains layers' info
+S = int() # the number of grid cell
+add = dict()
+for line in lines:
+    line = line.strip()
+    if 'side' in line:
+        S = int(line.split('=')[1].strip())
+    if '[' in line:
+        if add != {}:
+            s += [add]
+        add = dict()
+    else:
+        try:
+            i = float(line.split('=')[1].strip())
+            if i == int(i): i = int(i)
+            add[line.split('=')[0]] = i
+        except:
+            try:
+                if line.split('=')[1] == 'leaky':
+                    add[line.split('=')[0]] = 'leaky'
+            except:
+                pass
+
+# Step 2: investigate the weight file
+weightf = '../yolo-{}.weights'.format(model)
+if undiscovered:
+    allbytes = os.path.getsize(weightf.format(model))
+    allbytes /= 4 # each float is 4 byte
+    allbytes -= 4 # the first 4 bytes are darknet specifications
+    last_convo = int() 
+    for i, d in enumerate(s):
+        if len(d) == 4:
+            last_convo = i # the index of last convolution layer
+    flag = False
+    channel = 3 # initial number of channel in the tensor volume
+    out = int() 
+    for i, d in enumerate(s):
+        if 'batch' in d: continue
+        if 'crop_width' in d: continue
+        if 'side' in d: continue
+        # for each iteration in this loop
+        # allbytes will be gradually subtracted
+        # by the size of the corresponding layer (d)
+        # except for the 1st dense layer
+        # it should be what remains after subtracting
+        # all other layers
+        if len(d) >= 4:
+            allbytes -= d['size'] ** 2 * channel * d['filters']
+            allbytes -= d['filters']
+            channel = d['filters']
+            if 'batch_normalize' in d:
+                allbytes -= 2 * d['filters']
+        elif 'output' in d: # this is a dense layer
+            if flag is False: # this is the first dense layer
+                out = out1 = d['output'] # output unit of the 1st dense layer
+                flag = True # mark that the 1st dense layer is passed
+                continue # don't do anything with the 1st dense layer
+            allbytes -= out * d['output']
+            allbytes -= d['output']
+            out = d['output']
+    allbytes -= out1 # substract the bias
+    if allbytes <= 0:
+            message = "Error: yolo-{}.cfg suggests a bigger size"
+            message += " than yolo-{}.weights actually is"
+            print message.format(model, model)
+            assert allbytes > 0
+    # allbytes is now = I * out1
+    # where I is the input size of the 1st dense layer
+    # I is also the volume of the last convolution layer
+    # I = size * size * channel
+    size = (np.sqrt(allbytes/out1/channel)) 
+    print size
+    size = int(size)
+    n = last_convo + 1
+    while 'output' not in s[n]:
+        size *= s[n].get('size',1)
+        n += 1
+else:
+    last_convo = None
+    size = None
+
+# Step 3: printing config
+w = 448
+h = 448
+c = 3
+l = w * h * c
+flat = False
+
+for i, d in enumerate(s):
+    if 'batch' in d: continue
+    if 'crop_width' in d: continue
+    if 'side' in d: continue
+
+    flag = False # flag for passing the 1st dense layer
+    if len(d) >= 4:
+        mult = (d['size'] == 3) 
+        mult *= (d['stride'] != 2) + 1.
+        if d['size'] == 1: d['pad'] = 0
+        new = (w + mult * d['pad'] - d['size'])
+        new /= d['stride']
+        new = int(np.floor(new + 1.))
+        if i == last_convo:
+            # yield the negative expected size
+            # instead of the indicated pad.
+            d['pad'] = -size 
+            new = size
+        batch_norm = d.get('batch_normalize', 0)
+        print ['conv', d['size'], c, d['filters'], 
+            h, w, d['stride'], d['pad'], batch_norm]
+        w = h = new
+        c = d['filters']
+        l = w * h * c
+        if 'activation' in d:
+            print ['LEAKY']
+    if len(d) == 2:
+        if 'output' not in d:
+            print ['pool', d['size'], 0, 
+                0, 0, 0, d['stride'], 0]
+            new = (w     * 1.0 - d['size'])/d['stride'] + 1
+            new = int(np.floor(new))
+            w = h = new
+            l = w * h * c
+        else:
+            if not flat:
+                flat = True
+                print ['FLATTEN']
+            print ['conn', 0, 0,
+            0, 0, 0, l, d['output']]
+            l = d['output']
+            if 'activation' in d:
+                print ['LEAKY']
+    if len(d) == 1:
+        if 'output' not in d:
+            print ['DROPOUT']
+        else:
+            if not flat:
+                flat = True
+                print ['FLATTEN']
+            print ['conn', 0, 0,
+            0, 0, 0, l, d['output']]
+            l = d['output']
\ No newline at end of file
diff --git a/configs/yolo-2c.cfg b/configs/yolo-2c.cfg
new file mode 100644
index 000000000..b6951ac6d
--- /dev/null
+++ b/configs/yolo-2c.cfg
@@ -0,0 +1,138 @@
+[net]
+batch=64
+subdivisions=64
+height=448
+width=448
+channels=3
+momentum=0.9
+decay=0.0005
+
+learning_rate=0.0001
+policy=steps
+steps=20,40,60,80,20000,30000
+scales=5,5,2,2,.1,.1
+max_batches = 40000
+
+[crop]
+crop_width=448
+crop_height=448
+flip=0
+angle=0
+saturation = 1.5
+exposure = 1.5
+
+[convolutional]
+filters=16
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[connected]
+output=256
+activation=linear
+
+[connected]
+output=4096
+activation=leaky
+
+[dropout]
+probability=.5
+
+[connected]
+output=1452
+activation=linear
+
+[detection]
+classes=2
+coords=4
+rescore=1
+side=11
+num=2
+softmax=0
+sqrt=1
+jitter=.2
+object_scale=1
+noobject_scale=.5
+class_scale=1
+coord_scale=5
\ No newline at end of file
diff --git a/configs/yolo-baby.cfg b/configs/yolo-baby.cfg
new file mode 100644
index 000000000..75218c221
--- /dev/null
+++ b/configs/yolo-baby.cfg
@@ -0,0 +1,125 @@
+[net]
+batch=64
+subdivisions=2
+height=448
+width=448
+channels=3
+momentum=0.9
+decay=0.0005
+
+saturation=.75
+exposure=.75
+hue = .1
+
+learning_rate=0.0005
+policy=steps
+steps=200,400,600,800,20000,30000
+scales=2.5,2,2,2,.1,.1
+max_batches = 40000
+
+[convolutional]
+batch_normalize=1
+filters=16
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+
+[connected]
+output= 1470
+activation=linear
+
+[detection]
+classes=20
+coords=4
+rescore=1
+side=7
+num=2
+softmax=0
+sqrt=1
+jitter=.2
+
+object_scale=1
+noobject_scale=.5
+class_scale=1
+coord_scale=5
\ No newline at end of file
diff --git a/configs/yolo.cfg b/configs/yolo.cfg
new file mode 100644
index 000000000..c4f415c11
--- /dev/null
+++ b/configs/yolo.cfg
@@ -0,0 +1,257 @@
+[net]
+batch=1
+subdivisions=1
+height=448
+width=448
+channels=3
+momentum=0.9
+decay=0.0005
+saturation=1.5
+exposure=1.5
+hue=.1
+
+learning_rate=0.0005
+policy=steps
+steps=200,400,600,20000,30000
+scales=2.5,2,2,.1,.1
+max_batches = 40000
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=7
+stride=2
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=192
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+#######
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=2
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[local]
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+
+[dropout]
+probability=.5
+
+[connected]
+output= 1715
+activation=linear
+
+[detection]
+classes=20
+coords=4
+rescore=1
+side=7
+num=3
+softmax=0
+sqrt=1
+jitter=.2
+
+object_scale=1
+noobject_scale=.5
+class_scale=1
+coord_scale=5
+
diff --git a/darknet.py b/darknet.py
new file mode 100644
index 000000000..29e39899d
--- /dev/null
+++ b/darknet.py
@@ -0,0 +1,156 @@
+"""
+file: darknet.py
+includes: definition of class Darknet
+this class works with Darknet files: .cfg, .weights
+and produces Darknet objects that are easy for TFNet
+to use for building the corresponding tensorflow net.
+
+this class uses configs/process.py as a parser for .cfg
+files to understand the structure of .weights file. It
+will use these information to load all the weights into
+its attribute .layers - a well structured list, with each
+element is an object of class layer() defined right below
+"""
+
+from configs.process import *
+import tensorflow as tf
+import numpy as np
+import time
+import os
+
+class layer:
+    def __init__(self, type, size = 0, 
+    	c = 0, n = 0, h = 0, w = 0):
+        self.type = type
+        self.size = size
+        self.c, self.n = (c, n) 
+        self.h, self.w = (h, w)
+
+class dropout_layer(layer):
+    def __init__(self, p):
+        self.type = 'dropout'
+        self.prob = p
+
+class btchnrm_layer(layer):
+    def __init__(self, size, c, n, h, w ): # <- cryin' haha
+        layer.__init__(self, 'batchnorm',
+            size, c, n, h, w)
+
+class maxpool_layer(layer):
+    def __init__(self, size, c, n, h, w, 
+        stride, pad ):
+		layer.__init__(self, 'maxpool', 
+			size, c, n, h, w)
+		self.stride = stride
+		self.pad = pad
+
+class convolu_layer(layer):
+    def __init__(self, size, c, n, h, w, 
+        stride, pad ):
+        layer.__init__(self, 'convolutional', 
+        	size, c, n, h, w)
+        self.stride = stride
+        self.pad = pad
+
+class connect_layer(layer):
+    def __init__(self, size, c, n, h, w, 
+    	input_size, output_size):
+		layer.__init__(self, 'connected', 
+			size, c, n, h, w)
+		self.output_size = output_size
+		self.input_size = input_size
+
+class Darknet(object):
+
+    layers = list()
+    model = str()
+    partial = bool()
+
+    def __init__(self, model, partial = False):        
+        self.partial = partial 
+        self.model = model
+        self.parse(model)
+        
+        postfix = int('-' in model) * 'binaries/'
+        weight_file = postfix + 'yolo-{}.weights'.format(model)
+        print ('Loading {} ...'.format(weight_file))
+        start = time.time()
+        self.loadWeights(weight_file)
+        stop = time.time()
+        print ('Finished in {}s'.format(stop - start))
+
+    def parse(self, model):
+        cfg = model.split('-')[0]
+        print ('Parsing yolo-{}.cfg'.format(cfg))
+        layers = cfg_yielder(cfg)
+        for i, info in enumerate(layers):
+            if i == 0: self.meta = info; continue
+            if len(info) == 1: new = layer(type = info[0])
+            if info[0] == 'bnrm': new = btchnrm_layer(*info[1:])
+            if info[0] == 'drop': new = dropout_layer(*info[1:])
+            if info[0] == 'conv': new = convolu_layer(*info[1:])
+            if info[0] == 'pool': new = maxpool_layer(*info[1:])
+            if info[0] == 'conn': new = connect_layer(*info[1:])
+            self.layers.append(new)
+
+    def loadWeights(self, weight_path):
+        file_len = os.path.getsize(weight_path); offset = 16
+
+        # Read byte arrays from file
+        for i in range(len(self.layers)):
+            l = self.layers[i]
+            if l.type == "convolutional":
+                weight_number = l.n * l.c * l.size * l.size
+                l.biases = np.memmap(weight_path, mode = 'r',
+                    offset = offset, shape = (),
+                    dtype = '({})float32,'.format(l.n))
+                offset += 4 * l.n
+                l.weights = np.memmap(weight_path, mode = 'r',
+                    offset = offset, shape = (),
+                    dtype = '({})float32,'.format(weight_number))
+                offset += 4 * weight_number
+            
+            elif l.type == "batchnorm":
+                l.biases = np.memmap(weight_path, mode = 'r',
+                    offset = offset, shape = (),
+                    dtype = '({})float32,'.format(l.n))
+                offset += 4 * l.n
+                l.weights = np.memmap(weight_path, mode = 'r',
+                    offset = offset, shape = (),
+                    dtype = '({})float32,'.format(l.n))
+                offset += 4 * l.n
+
+            elif l.type == "connected":
+                bias_number = l.output_size
+                weight_number = l.output_size * l.input_size
+                l.biases = np.memmap(weight_path, mode = 'r',
+                    offset = offset, shape = (),
+                    dtype = '({})float32,'.format(bias_number))
+                offset += bias_number * 4
+                l.weights = np.memmap(weight_path, mode = 'r',
+                    offset = offset, shape = (),
+                    dtype = '({})float32,'.format(weight_number))
+                offset += weight_number * 4
+              
+        # Defensive python right here bietch.
+        if offset == file_len:
+            print 'Successfully identified all {} bytes'.format(offset)
+        else:
+            exit('Error: expect {} bytes, found {}'.format(offset, file_len))
+
+        # Reshape
+        for i in range(len(self.layers)):
+            l = self.layers[i]
+            
+            if l.type == 'convolutional':
+                weight_array = l.weights
+                weight_array = np.reshape(weight_array,
+                	[l.n, l.c, l.size, l.size])
+                weight_array = weight_array.transpose([2,3,1,0])
+                l.weights = weight_array
+
+            if l.type == 'connected':
+                weight_array = l.weights
+                weight_array = np.reshape(weight_array,
+                	[l.input_size, l.output_size])
+                l.weights = weight_array
\ No newline at end of file
diff --git a/data.py b/data.py
index 810b4ea7d..56c5a0b53 100644
--- a/data.py
+++ b/data.py
@@ -1,209 +1,46 @@
-from drawer import *
+"""
+file: ./data.py
+includes: shuffle()
+shuffle will load the cPickle dump parsed.bin inside
+"""
+
 import cPickle as pickle
-from copy import deepcopy
-import subprocess
-mult = 1.
+from yolo.train import *
+
+off_bound_msg = 'Random scale/translate sends obj off bound'
 
-def shuffle(train_path, file, expectC, S, batch, epoch):
-	with open(file,'rb') as f:
-		pick, data = pickle.load(f)
-	C = len(pick)
-	if C != expectC:
-		exit("There is a mismatch between the model and the parsed annotations")
+def shuffle(train_path, parsed, batch, epoch, meta):
+	with open(parsed, 'rb') as f: data = pickle.load(f)[0]
 	size = len(data)
-	print 'Dataset of total {}'.format(size)
+	print 'Dataset of {} instance(s)'.format(size)
+	if batch > size: exit('Error: batch size is too big')
 	batch_per_epoch = int(size / batch)
+	total = epoch * batch_per_epoch
+	yield total
 
 	for i in range(epoch):
 		print 'EPOCH {}'.format(i+1)
 		# Shuffle data
 		shuffle_idx = np.random.permutation(np.arange(size))
 		for b in range(batch_per_epoch):
-			for r in range(1):
-				start_idx = b * batch
-				end_idx = (b+1) * batch
-
-				datum = list()
-				x_batch = list()
-				jpgs = list()
-				try:
-				# if True:
-					for j in range(start_idx,end_idx):
-						real_idx = shuffle_idx[j]
-						this = data[real_idx]
-						jpg = this[0]
-						w, h, allobj_ = this[1]
-						allobj = deepcopy(allobj_)
-						flip = (r / 2)  + (r % 2) * (j % 2)
-						flip = flip % 2
-
-						path = '{}{}'.format(train_path, jpg)
-						img, allobj = crop(path, allobj)
-
-						if flip == 1: 
-							img = img[:,:,::-1,:]
-
-						img = [img]
-						jpgs += [path]
-
-						cellx = 1. * w / S
-						celly = 1. * h / S
-						for x in allobj:
-							# cv2.rectangle(img[0], (x[1], x[2]), (x[3], x[4]), (0,0,255), 2)
-							centerx = .5*(x[1]+x[3]) #xmin, xmax
-							centery = .5*(x[2]+x[4]) #ymin, ymax
-							if flip == 1:
-								centerx = w - centerx
-							cx = centerx / cellx
-							cy = centery / celly
-							x[3] = float(x[3]-x[1]) / w
-							x[4] = float(x[4]-x[2]) / h
-							x[3] = np.sqrt(x[3])
-							x[4] = np.sqrt(x[4])
-							x[1] = cx - np.floor(cx)
-							x[2] = cy - np.floor(cy)
-							x += [np.floor(cx)] 
-							x += [np.floor(cy)]
-
-						# if False:
-						# 	for x in allobj:
-						# 		cx = x[5] + x[1]
-						# 		cy = x[6] + x[2]
-						# 		centerx = cx * cellx
-						# 		centery = cy * celly
-						# 		ww = x[3] * x[3] * w
-						# 		hh = x[4] * x[4] * h
-						# 		cv2.rectangle(im,
-						# 			(int(centerx - ww/2), int(centery - hh/2)),
-						# 			(int(centerx + ww/2), int(centery + hh/2)),
-						# 			(0,0,255), 2)
-
-						# 	cv2.imshow("result", im)
-						# 	cv2.waitKey()
-						# 	cv2.destroyAllWindows()
-
-						"""
-						YOLO formulates the problem as a regression problem. Normally from the
-						annotation, we can directly produce a target tensor to calculate the L2
-						loss as (network_output - target)^2. But YOLO's L2 loss formulation is not
-						that straightforward, namely the complication comes from its loss is selective:
-						not penalizes all entries in the network_output, depending on what network_output
-						looks like during training, moreover the loss also weights each term in the loss
-						differently, e.g. coordinate term is weighted more than confidence terms, etc.
-
-						To resolve this complication, I came up with a procedure that can calculate YOLO's
-						loss function in two parts, all the operation in each part are tensor operations. The
-						first part is done here during minibatch yielding, tensor operations are done on numpy
-						tensors, the second part is done in decode() method inside tfnet.py, as tensorflow tensors.
-						Why the seperation? I believe there are three reasons: 1. tensorflow tensors
-						does not support member assignment, so any operation involving member assignment must be
-						done as numpy tensors. 2. Efficiency: some operation are best to be done here than there.
-						3. Inherent constraints in YOLO's formulation of the loss, please read the comming text
-						for details.  
-						
-						The following text explains the next 11 tensors that I'll define
-						They will be passed as placeholders into the network and serve as
-						materials for calculating YOLO's loss. I look forward to suggestions
-						on improving this (my) current approach.
-						-----------------------------------------------------------------
-		
-						probs is the target class probability tensor
-						confs1 and confs2 are confidence score of boxes 1 and boxes 2
-						upleft are upper left corner coordinates of bounding boxes
-						botright are bottom right corner coordinates of bounding boxes
-						So far, probs, confs1, confs2, upleft, botright constitutes the target 
-						of regression, why do we need the ___id tensors?
-
-						You know from the paper that only grid cells that are responsible for 
-						correct prediction are penalized (by an L2 loss), so not all entries in
-						the above tensors should take part in the loss calculation, furthermore 
-						according to the paper, coordinates terms in the loss should be weighted more 
-						than the other terms, and of two boxes that each grid cell predicts, one with better 
-						IOU should be weighted differently than the other.
-
-						These __id tensors are meant to solve the above complication. They act as weights
-						and will be set to appropriate value either in data.py (as numpy tensors, during the 
-						batch generating phase (this file)) or in tfnet.py (as tensorflow tensors, during the 
-						loss calculation phase). For example, if an entry should not affect the loss, its 
-						corresponding weight will be set to zero, if an entry correspond to coordinate loss, 
-						the weight should be 5.0, so on.
-
-						proid will weight probs, and its final value is set here in data.py
-						conid1 weights confs1
-						conid2 weights confs2
-						cooid1 weights coordinate of box1
-						cooid2 weights coordinate of box2
-
-						conid1, conid2, cooid1, cooid2's values are initialised in data.py and set to correct value 
-						in tfnet.py. Why? because we only know their correct value when IOU of each predicted box 
-						with the target are calculated, i.e. the forward pass must be done before this. 
-						"""
-						probs = np.zeros([S*S,C])
-						confs = np.zeros([S*S,2])
-						coord = np.zeros([S*S,2,4])
-						proid = np.zeros([S*S,C])
-						conid = np.zeros([S*S,2])
-						cooid1 = cooid2 = np.zeros([S*S,1,4])
-						prear = np.zeros([S*S,4])
-						for x in allobj:
-							at = int(x[6] * S + x[5])
-							probs[at, :] = [0.] * C
-							probs[at, pick.index(x[0])] = 1.
-							proid[at, :] = [1] * C
-							coord[at, 0, :] = x[1:5]
-							coord[at, 1, :] = x[1:5]
-							scale = .5 * S
-							prear[at,0] = x[1] - x[3]**2 * scale # xleft
-							prear[at,1] = x[2] - x[4]**2 * scale # yup
-							prear[at,2] = x[1] + x[3]**2 * scale # xright
-							prear[at,3] = x[2] + x[4]**2 * scale # ybot
-							confs[at, :] = [1.] * 2
-							conid[at, :] = [1.] * 2
-							cooid1[at, 0, :] = [1.] * 4
-							cooid2[at, 0, :] = [1.] * 4
-						upleft   = np.expand_dims(prear[:,0:2], 1) # 49 x 1 
-						botright = np.expand_dims(prear[:,2:4], 1)
-
-					# Finalise the placeholders' values
-						probs = probs.reshape([-1]) # true_class
-						confs1 = confs[:,0]
-						confs2 = confs[:,1]
-						coord = coord.reshape([-1]) # true_coo
-						upleft   = np.concatenate([upleft]*2,1)
-						botright = np.concatenate([botright]*2,1)
-						proid = proid.reshape([-1]) # class_idtf
-						conid1 = conid[:,0]
-						conid2 = conid[:,1]
-						cooid1 = cooid1
-						cooid2 = cooid2
-
-					# Assemble the placeholders' value 
-						new = [
-							[probs], [confs1], [confs2], [coord],
-							[upleft], [botright],
-							[proid], [conid1], [conid2], [cooid1], [cooid2]
-						]
-						if datum == list():
-							datum = new
-							x_batch = img
-						else:
-							x_batch += img
-							for i in range(len(datum)):
-								datum[i] = np.concatenate([datum[i], new[i]])
-
-						if False:
-							here = 0
-							names = list()
-							while here + C < S*S*C:
-								consider = probs[here:here+C]
-								if (np.sum(consider) > 0.5):
-									names += [pick[np.argmax(consider)]]
-								here += C
-							print '{} : {}'.format(jpg, names)
-
-
-					x_batch = np.concatenate(x_batch, 0)
-					yield (x_batch, datum)
-				except:
-					print 'Random scale/translate sends object(s) out of bound'
-					continue
+			start_idx = b * batch
+			end_idx = (b+1) * batch
+
+			datum = list()
+			x_batch = list()
+			offbound = False
+			for j in range(start_idx,end_idx):
+				real_idx = shuffle_idx[j]
+				this = data[real_idx]
+				img, tensors = yolo_batch(train_path, this, meta)
+				if img is None: offbound = True; break
+				x_batch += [img]
+				if datum == list():	datum = tensors
+				else: 
+					for i in range(len(datum)):
+						new_datum_i = [datum[i], tensors[i]]
+						datum[i] = np.concatenate(new_datum_i)		
+			
+			if offbound: print off_bound_msg; continue
+			x_batch = np.concatenate(x_batch, 0)
+			yield (x_batch, datum)
diff --git a/drawer.py b/drawer.py
deleted file mode 100644
index 64b3dd7b7..000000000
--- a/drawer.py
+++ /dev/null
@@ -1,131 +0,0 @@
-from box import *
-from PIL import Image, ImageFile
-ImageFile.LOAD_TRUNCATED_IMAGES = True
-import cv2
-
-def fix(x,c):
-	return max(min(x,c),0)
-
-def crop(imPath, allobj = None):
-	
-	im = cv2.imread(imPath)
-	if allobj is not None:
-		h, w, _ = im.shape
-		scale = np.random.uniform()/3. + 1.
-		max_offx = (scale-1.) * w
-		max_offy = (scale-1.) * h
-		offx = int(np.random.uniform() * max_offx)
-		offy = int(np.random.uniform() * max_offy)
-		im = cv2.resize(im, (0,0), fx = scale, fy = scale)
-		im = im[offy : (offy + h), offx : (offx + w)]
-		#---------------
-		# (x,y) --> (scale*x, scale*y)
-		# (scale*x - offx, scale*y - offy)
-		#--------------
-		for obj in allobj:
-			obj[1] = int(obj[1]*scale-offx)
-			obj[3] = int(obj[3]*scale-offx)
-			obj[2] = int(obj[2]*scale-offy)
-			obj[4] = int(obj[4]*scale-offy)
-			obj[1] = fix(obj[1], w)
-			obj[3] = fix(obj[3], w)
-			obj[2] = fix(obj[2], h)
-			obj[4] = fix(obj[4], h)
-			#print obj, w, h
-
-	# return im
-	im_ = cv2.resize(im, (448, 448))
-	image_array = np.array(im_)
-	image_array = image_array / 255.
-	image_array = image_array * 2. - 1.
-	image_array = np.expand_dims(image_array, 0) # 1, height, width, 3
-
-	if allobj is not None:
-		return image_array, allobj
-	else:
-		return image_array
-	
-def to_color(indx, base):
-	base2 = base * base
-	b = indx / base2
-	r = (indx % base2) / base
-	g = (indx % base2) % base
-	return (b * 127, r * 127, g * 127)
-
-def draw_predictions(predictions, 
-	img_path, flip, threshold,
-	C, S, labels, colors):
-	
-	B = 2
-	boxes = []
-	SS        =  S * S # number of grid cells
-	prob_size = SS * C # class probabilities
-	conf_size = SS * B # confidences for each grid cell
-	probs = predictions[0 : prob_size]
-	confs = predictions[prob_size : (prob_size + conf_size)]
-	cords = predictions[(prob_size + conf_size) : ]
-	probs = probs.reshape([SS, C])
-	confs = confs.reshape([SS, B])
-	cords = cords.reshape([SS, B, 4])
-
-	for grid in range(SS):
-		for b in range(B):
-			new_box   = BoundBox(C)
-			new_box.c =  confs[grid, b]
-			new_box.x = (cords[grid, b, 0] + grid %  S) / S
-			new_box.y = (cords[grid, b, 1] + grid // S) / S
-			new_box.w =  cords[grid, b, 2] ** 2
-			new_box.h =  cords[grid, b, 3] ** 2
-			new_box.id = '{}-{}'.format(grid, b)
-			for c in range(C):
-				new_box.probs[c] = new_box.c * probs[grid, c]
-			boxes.append(new_box)
-
-	# non max suppress boxes
-	if True:
-		for c in range(C):
-			for i in range(len(boxes)): boxes[i].class_num = c
-			boxes = sorted(boxes, cmp=prob_compare)
-			for i in range(len(boxes)):
-				boxi = boxes[i]
-				if boxi.probs[c] == 0: continue
-				for j in range(i + 1, len(boxes)):
-					boxj = boxes[j]
-					boxij = box_intersection(boxi, boxj)
-					boxja = boxj.w * boxj.h
-					apart = boxij / boxja
-					if apart >= .5:
-						if boxi.probs[c] > boxj.probs[c]:
-							boxes[j].probs[c] = 0.
-						else:
-							boxes[i].probs[c] = 0.
-
-	imgcv = cv2.imread(img_path)
-	if flip: imgcv = cv2.flip(imgcv, 1)
-	print img_path
-	h, w, _ = imgcv.shape
-	for b in boxes:
-		max_indx = np.argmax(b.probs)
-		max_prob = b.probs[max_indx]
-		label = 'object' * int(C < 2)
-		label += labels[max_indx] * int(C > 1)
-		if (max_prob > threshold):
-			left  = int ((b.x - b.w/2.) * w)
-			right = int ((b.x + b.w/2.) * w)
-			top   = int ((b.y - b.h/2.) * h)
-			bot   = int ((b.y + b.h/2.) * h)
-			if left  < 0    :  left = 0
-			if right > w - 1: right = w - 1
-			if top   < 0    :   top = 0
-			if bot   > h - 1:   bot = h - 1
-			thick = int((h+w)/300)
-			cv2.rectangle(imgcv, 
-				(left, top), (right, bot), 
-				colors[max_indx], thick)
-			mess = '{}:{:.3f}'.format(label, max_prob)
-			cv2.putText(imgcv, mess, (left, top - 12), 
-				0, 1e-3 * h, colors[max_indx],thick/5)
-	
-	img_name = 'results/{}'.format(
-		img_path.split('/')[-1].split('.')[0])
-	cv2.imwrite(img_name + flip * '_' + '.jpg', imgcv)
\ No newline at end of file
diff --git a/genw.py b/genw.py
index 2bbaf679d..8a7b50675 100644
--- a/genw.py
+++ b/genw.py
@@ -1,32 +1,33 @@
-from configs.process import cfg_yielder
-from yolo import *
+from configs.process import *
+from yolo.train import *
+from tensorflow import flags
+from darknet import *
 import numpy as np
 import os
 import sys
 
+flags.DEFINE_string("src", "", "source of recollection: model name if source is complete, file name if source is partial, blank if no source")
+flags.DEFINE_string("des", "", "name of new model")
+flags.DEFINE_float("std", 1e-2, "standard deviation of random initialization")
+FLAGS = flags.FLAGS
+src = FLAGS.src
+des = FLAGS.des
 
-src = sys.argv[1]
-try:
-	des = sys.argv[2]
-except:
-	des = src
-	src = str()
-
-wlayer = ['CONVOLUTIONAL', 'CONNECTED']
+wlayer = ['convolutional', 'connected']
 class collector(object):
-	def __init__(self, yolo):
+	def __init__(self, net):
 		self.i = 0
-		self.yolo = yolo
+		self.net = net
 	def inc(self):
-		while self.yolo.layers[self.i].type not in wlayer:
+		while self.net.layers[self.i].type not in wlayer:
 			self.i += 1
-			if self.i == len(self.yolo.layers):
+			if self.i == len(self.net.layers):
 				break
 	def give(self):
 		self.inc()
-		l = self.yolo.layers[self.i]
+		l = self.net.layers[self.i]
 		w = l.weights
-		if l.type == 'CONVOLUTIONAL':
+		if l.type == 'convolutional':
 			w = w.transpose([3,2,0,1])
 		w = w.reshape([-1])
 		w = np.concatenate((l.biases, w))
@@ -40,8 +41,12 @@ def give(self):
 offset = int(16)
 
 if src != str():
-	yolo = YOLO(src)
-	col = collector(yolo)
+	partial = False
+	if ".weights" in src:
+		partial = True
+		src = des # same structure
+	net = Darknet(src, partial)
+	col = collector(net)
 	flag = True
 
 	# PHASE 01: recollect
@@ -61,8 +66,6 @@ def give(self):
 		elif not flag:
 			mark = i
 			break
-	if mark == i:
-    		print 'none'
 else:
     flag = False
 
@@ -75,11 +78,11 @@ def give(self):
 		print k
 		if k[0] == 'conv':
 			w = np.random.normal(
-				scale = .05,
+				scale = FLAGS.std,
 				size = (k[1]*k[1]*k[2]*k[3]+k[3],))
 		else:
 			w = np.random.normal(
-				scale = .05,
+				scale = FLAGS.std,
 				size = (k[6]*k[7]+k[7],))
 		w = np.float32(w)
 		writer.write(w.tobytes())
diff --git a/labels.txt b/labels.txt
index 95d1ed6c7..871a65b4e 100644
--- a/labels.txt
+++ b/labels.txt
@@ -1,3 +1,2 @@
 tvmonitor
-pottedplant
-person
\ No newline at end of file
+pottedplant
\ No newline at end of file
diff --git a/main.py b/main.py
index 3a6847b27..5d807d146 100644
--- a/main.py
+++ b/main.py
@@ -1,32 +1,28 @@
-from yolo import *
-from box import *
+from darknet import *
 from tfnet import *
 from tensorflow import flags
-import sys
-import time
-import os
 
-flags.DEFINE_string("test", "data", "path to testing folder")
-flags.DEFINE_string("pascal", "../pascal/VOCdevkit", "path to training set")
+flags.DEFINE_string("testset", "test", "path to testing directory")
+flags.DEFINE_string("dataset", "../pascal/VOCdevkit/IMG/", "path to dataset directory")
+flags.DEFINE_string("annotation", "../pascal/VOCdevkit/ANN/", "path to annotation directory")
 flags.DEFINE_float("threshold", 0.1, "detection threshold")
-flags.DEFINE_string("model", "3c", "yolo configuration of choice")
+flags.DEFINE_string("model", "3c", "configuration of choice")
 flags.DEFINE_boolean("train", False, "training mode or not?")
-flags.DEFINE_boolean("load", False, "load the newest train in backup/checkpoint")
+flags.DEFINE_integer("load", 0, "load a saved backup/checkpoint, -1 for newest")
 flags.DEFINE_boolean("savepb", False, "save net and weight to a .pb file")
 flags.DEFINE_float("gpu", 0.0, "How much gpu (from 0.0 to 1.0)")
 flags.DEFINE_float("lr", 1e-5, "Learning rate")
-flags.DEFINE_string("scale", "1,1,.5,5.", 
-	"Comma-separated scaling for probability, confidence, noobj, coordinate terms in the loss")
 flags.DEFINE_integer("keep",20,"Number of most recent training results to save")
 flags.DEFINE_integer("batch", 12, "Batch size")
 flags.DEFINE_integer("epoch", 1000, "Number of epoch")
 flags.DEFINE_integer("save", 2000, "Save checkpoint every ? training examples")
+
 FLAGS = flags.FLAGS
-image = FLAGS.pascal + '/IMG/'
-annot = FLAGS.pascal + '/ANN/' + 'parsed.yolotf'
+image = FLAGS.dataset
+annot = FLAGS.annotation + 'parsed.bin'
 
 step = int()
-if FLAGS.load:
+if FLAGS.load < 0:
 	try:
 		with open('backup/checkpoint','r') as f:
 			lines = f.readlines()
@@ -34,20 +30,23 @@
 		sys.exit('Seems like there is no recent training in backup/')
 	name = lines[-1].split(' ')[1].split('"')[1]
 	step = int(name.split('-')[1])
-yoloNet = YOLO(FLAGS.model + int(step > 0) * '-{}'.format(step))
+else: step = FLAGS.load
+yoloNet = Darknet(FLAGS.model + int(step > 0) * '-{}'.format(step))
 
-print ('Compiling net & initialise parameters...')
+print ('\nCompiling net & fill in parameters...')
 start = time.time()
 if FLAGS.gpu <= 0.:
 	with tf.device('cpu:0'):
-		model = SimpleNet(yoloNet, FLAGS)
+		tfnet = TFNet(yoloNet, FLAGS)
 else:
-	model = SimpleNet(yoloNet, FLAGS)
-model.step = step
-model.setup_meta_ops(FLAGS)
+	tfnet = TFNet(yoloNet, FLAGS)
+tfnet.step = step
+tfnet.setup_meta_ops()
 print ('Finished in {}s'.format(time.time() - start))
 
 if FLAGS.train:
-	print 'training mode'
-	model.train(image, annot, FLAGS.batch, FLAGS.epoch)
-model.predict(FLAGS)
\ No newline at end of file
+	print '\nEnter training ...'
+	tfnet.train(image, annot, FLAGS.batch, FLAGS.epoch)
+
+print
+tfnet.predict()
\ No newline at end of file
diff --git a/ops.py b/ops.py
new file mode 100644
index 000000000..7a0205e7f
--- /dev/null
+++ b/ops.py
@@ -0,0 +1,42 @@
+from yolo.train import *
+
+def convl(l, x, name):
+    if l.pad < 0: # figure the pad out
+        size = np.int(x.get_shape()[1])
+        expect = -(l.pad + 1) * l.stride 
+        expect += l.size - size
+        padding = [expect / 2, expect - expect / 2]
+        if padding[0] < 0: padding[0] = 0
+        if padding[1] < 0: padding[1] = 0
+    else:
+        padding = [l.pad, l.pad]
+    l.pad = 'VALID'
+    x = tf.pad(x, [[0, 0], padding, padding, [0, 0]])
+    x = tf.nn.conv2d(x, l.weights, 
+        padding = l.pad, name = name,
+        strides=[1, l.stride, l.stride, 1])
+    # if l.batch_norm == 1: x = slim.batch_norm(x)
+    # else: x = tf.nn.bias_add(x, l.b)
+    return tf.nn.bias_add(x, l.biases)
+
+def bnorm(l, x, name):
+    return x
+
+def dense(l, x, name):
+    return tf.nn.xw_plus_b(x, l.weights, l.biases, name = name)
+    
+def maxpool(l, x, name):
+    l.pad = 'VALID'
+    return tf.nn.max_pool(x, padding = l.pad,
+        ksize = [1,l.size,l.size,1], name = name, 
+        strides = [1,l.stride,l.stride,1])
+
+def flatten(x, name):
+    x = tf.transpose(x, [0,3,1,2])
+    return slim.flatten(x, scope = name)
+
+def leaky(x, name):
+    return tf.maximum(.1*x, x, name = name)
+
+def dropout(x, drop, name):
+    return tf.nn.dropout(x, drop, name = name)
diff --git a/tfnet.py b/tfnet.py
index dd96a4a38..7c426b6a7 100644
--- a/tfnet.py
+++ b/tfnet.py
@@ -1,125 +1,84 @@
-import tensorflow as tf
-import numpy as np
-import os
-import time
-from drawer import *
-from data import shuffle
-from yolo import *
-import subprocess
-import sys
-
-class SimpleNet(object):
+"""
+file: tfnet.py
+includes: definition of class TFNet
+this class initializes by building the forward pass
+its methods include train, predict and savepb - saving
+the current model to a protobuf file (no variable included)
+"""
 
-	labels = list()
-	colors = list()
-	C = int()
-	model = str()
-	step = int()
-	learning_rate = float()
-	scale_prob = float()
-	scale_conf = float()
-	scale_noobj = float()
-	scale_coor = float()
-	save_every = int()
+import sys
+from yolo.drawer import *
+from darknet import *
+from ops import *
+from data import *
 
-	def __init__(self, yolo, FLAGS):
-		self.model = yolo.model
-		self.S = yolo.S
-		self.labels = yolo.labels
-		self.C = len(self.labels)
+const_layer = ['leaky', 'dropout']
+var_layer = ['convolutional', 'connected', 'batchnorm']
 
-		base = int(np.ceil(pow(self.C, 1./3)))
-		for x in range(len(self.labels)):
-			self.colors += [to_color(x, base)]		
+class TFNet(object):
+	def __init__(self, darknet, FLAGS):
+		# Attach model's hyper params to the tfnet
+		self.meta = yolo_metaprocess(darknet.meta)
+		self.FLAGS = FLAGS	
 
+		# Placeholders
 		self.inp = tf.placeholder(tf.float32,
 			[None, 448, 448, 3], name = 'input')
-		self.drop = tf.placeholder(tf.float32, name = 'dropout')
-		
+		self.drop = dict()
+		self.feed = dict()
+
+		# Iterate through darknet layers
 		now = self.inp
-		for i in range(yolo.layer_number):
-			print now.get_shape()
-			l = yolo.layers[i]
-			if l.type == 'CONVOLUTIONAL':
-				if l.pad < 0:
-					size = np.int(now.get_shape()[1])
-					expect = -(l.pad + 1) * l.stride # there you go bietche 
-					expect += l.size - size
-					padding = [expect / 2, expect - expect / 2]
-					if padding[0] < 0: padding[0] = 0
-					if padding[1] < 0: padding[1] = 0
-				else:
-					padding = [l.pad, l.pad]
-				l.pad = 'VALID'
-				now = tf.pad(now, [[0, 0], padding, padding, [0, 0]])
-				if FLAGS.savepb:
-					b = tf.constant(l.biases)
-					w = tf.constant(l.weights)
-				else:
-					b = tf.Variable(l.biases)
-					w = tf.Variable(l.weights)
-				now = tf.nn.conv2d(now, w,
-					strides=[1, l.stride, l.stride, 1],
-					padding=l.pad)
-				now = tf.nn.bias_add(now, b)
-				now = tf.maximum(0.1 * now, now)			
-			elif l.type == 'MAXPOOL':
-				l.pad = 'VALID'
-				now = tf.nn.max_pool(now, 
-					padding = l.pad,
-					ksize = [1,l.size,l.size,1], 
-					strides = [1,l.stride,l.stride,1])			
-			elif l.type == 'FLATTEN':
-				now = tf.transpose(now, [0,3,1,2])
-				now = tf.reshape(now, 
-					[-1, int(np.prod(now.get_shape()[1:]))])			
-			elif l.type == 'CONNECTED':
-				name = str()
-				if i == yolo.layer_number - 1: name = 'output'
-				else: name = 'conn'
-				if FLAGS.savepb:
-					b = tf.constant(l.biases)
-					w = tf.constant(l.weights)
-				else:
-					b = tf.Variable(l.biases)
-					w = tf.Variable(l.weights)
-				now = tf.nn.xw_plus_b(now, w, b, name = name)
-			elif l.type == 'LEAKY':
-				now = tf.maximum(0.1 * now, now)
-			elif l.type == 'DROPOUT':
-				if not FLAGS.savepb:
-					print ('dropout')
-					now = tf.nn.dropout(now, keep_prob = self.drop)
-		print now.get_shape()
+		for i, l in enumerate(darknet.layers):
+			if i == len(darknet.layers)-1: name = 'output'
+			else: name = l.type+'-{}'.format(i)
+			# no variable when saving to .pb file
+			if l.type in var_layer and not FLAGS.savepb:
+				l.biases = tf.Variable(l.biases)
+				l.weights = tf.Variable(l.weights)
+			arg = [l, now, name]
+			if l.type=='convolutional': now = convl(*arg)
+			elif l.type == 'connected': now = dense(*arg)
+			elif l.type == 'batchnorm': now = bnorm(*arg)
+			elif l.type == 'maxpool': now = maxpool(*arg)	
+			elif l.type == 'flatten': now = flatten(*arg[1:])
+			elif l.type == 'leaky'  : now =   leaky(*arg[1:])
+			# Dropout
+			elif l.type == 'dropout' and not FLAGS.savepb:
+				self.drop[name] = tf.placeholder(tf.float32)
+				self.drop[name + '_'] = l.prob
+				self.feed[self.drop[name]] = self.drop[name+'_']
+				print 'Dropout p = {}'.format(l.prob)
+				now = dropout(now, self.drop[name], name)
+			if l.type not in const_layer: print now.get_shape()
+
+		# Attach the output to this tfnet
 		self.out = now
 
-	def setup_meta_ops(self, FLAGS):
-		self.save_every = FLAGS.save
-		self.learning_rate = FLAGS.lr
-		scales = [float(f) for i, f in enumerate(FLAGS.scale.split(','))]
-		self.scale_prob, self.scale_conf, self.scale_noobj, self.scale_coor = scales 
-		if FLAGS.gpu > 0: 
+	def setup_meta_ops(self):
+		if self.FLAGS.gpu > 0: 
 			percentage = min(FLAGS.gpu, 1.)
-			print 'gpu mode {} usage'.format(percentage)
+			print 'GPU mode with {} usage'.format(percentage)
 			gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=percentage)
 			self.sess = tf.Session(config = tf.ConfigProto(
 				allow_soft_placement = True,
 				log_device_placement = False,
 				gpu_options = gpu_options))
 		else:
-			print 'cpu mode'
+			print 'CPU mode'
 			self.sess = tf.Session(config = tf.ConfigProto(
 				allow_soft_placement = False,
 				log_device_placement = False))
-		if FLAGS.train: self.decode()
-		if FLAGS.savepb: 
-			self.savepb('graph-{}.pb'.format(self.model))
+		if self.FLAGS.train: yolo_loss(self)
+		if self.FLAGS.savepb: 
+			self.savepb('graph-{}.pb'.format(self.meta['model']))
 			sys.exit()
-		else: self.saver = tf.train.Saver(tf.all_variables(), max_to_keep = FLAGS.keep)
+		else: self.saver = tf.train.Saver(tf.all_variables(), 
+			max_to_keep = self.FLAGS.keep)
 		self.sess.run(tf.initialize_all_variables())
-		if FLAGS.load:
+		if self.FLAGS.load > 0:
 			load_point = 'backup/model-{}'.format(self.step)
-			print 'loading from {}'.format(load_point)
+			print 'Loading from {}'.format(load_point)
 			self.saver.restore(self.sess, load_point)
 
 	def savepb(self, name):
@@ -128,7 +87,7 @@ def savepb(self, name):
 
 	def to_constant(self, inc = 0):
 		with open('binaries/yolo-{}-{}.weights'.format(
-			self.model.split('-')[0], self.step + inc), 'w') as f:
+			self.meta['model'].split('-')[0], self.step + inc), 'w') as f:
 			f.write(np.array([1]*4, dtype=np.int32).tobytes())
 			for i, variable in enumerate(tf.trainable_variables()):
 				val = variable.eval(self.sess)
@@ -137,183 +96,61 @@ def to_constant(self, inc = 0):
 				val = val.reshape([-1])
 				f.write(val.tobytes())
 	
-	def decode(self):
-    		"""
-			Please refer to the comment section inside data.py
-			to understand the below placeholders. I look forward
-			to receiving comments/improvements on my current
-			implementation of YOLO's loss calculation
-			"""
-
-		print ('Set up loss and train ops (may cause lag)...')
-		SS = self.S * self.S
-		self.true_class = tf.placeholder(tf.float32, #
-			[None, SS * self.C])
-		self.true_coo = tf.placeholder(tf.float32, #
-			[None, SS * 2 * 4])
-		self.class_idtf = tf.placeholder(tf.float32, #
-			[None, SS * self.C])
-		self.cooid1 = tf.placeholder(tf.float32, #
-			[None, SS, 1, 4])
-		self.cooid2 = tf.placeholder(tf.float32, #
-			[None, SS, 1, 4])
-		self.confs1 = tf.placeholder(tf.float32, #
-			[None, SS])
-		self.confs2 = tf.placeholder(tf.float32, #
-			[None, SS])
-		self.conid1 = tf.placeholder(tf.float32, #
-			[None, SS])
-		self.conid2 = tf.placeholder(tf.float32, #
-			[None, SS])
-		self.upleft = tf.placeholder(tf.float32, #
-			[None, SS, 2, 2])
-		self.botright = tf.placeholder(tf.float32, #
-			[None, SS, 2, 2])
-
-		# Extract the coordinate prediction from 
-		# output of YOLO's net
-		coords = self.out[:, SS * (self.C + 2):]
-		coords = tf.reshape(coords, [-1, SS, 2, 4])
-
-		wh = tf.pow(coords[:,:,:,2:4], 2) * (.5 * self.S); # weight & height of each box
-		xy = coords[:,:,:,0:2] # the center coordinates of each box
-		floor = xy - wh
-		ceil = xy + wh
-
-		# calculate the coordinates of the intersection 
-		# between predicted boxes and correct boxes
-		intersect_upleft = tf.maximum(floor, self.upleft)
-		intersect_botright = tf.minimum(ceil, self.botright)
-		intersect_wh = intersect_botright - intersect_upleft
-		intersect_wh = tf.maximum(intersect_wh, 0.0)
-		
-		# calculate the areas of intersection 
-		intersect_area1 = tf.mul(intersect_wh[:,:,0,0], intersect_wh[:,:,0,1])
-		intersect_area2 = tf.mul(intersect_wh[:,:,1,0], intersect_wh[:,:,1,1])
-		# determine which box has worse & which box has better IOU to ground truth
-		inferior_cell = intersect_area1 > intersect_area2
-		inferior_cell = tf.to_float(inferior_cell)
-
-		# since the initial value of confs is 1.0 throughout
-		# now we know which box of each pair has worse IOU
-		# its value should be set to 0.0
-		confs1 = tf.mul(inferior_cell, self.confs1) 
-		confs2 = tf.mul((1.-inferior_cell), self.confs2)
-		confs1 = tf.expand_dims(confs1, -1)
-		confs2 = tf.expand_dims(confs2, -1)
-		confs = tf.concat(2, [confs1, confs2])
-
-		# Again, since now we know which box of each pair has worse IOU
-		# it should not contribute to the loss value
-		# hence the corresponding conid is set to 0.0
-		mult = inferior_cell
-		conid1 =  tf.mul(mult, self.conid1)
-		conid2 =  tf.mul((1. - mult), self.conid2)
-		conid1 = tf.expand_dims(conid1, -1)
-		conid2 = tf.expand_dims(conid2, -1)
-		conid = tf.concat(2, [conid1, conid2])
+	def train(self, train_set, parsed_annota, batch, epoch):
+		batches = shuffle(train_set, parsed_annota, batch, epoch, self.meta)
+
+		print 'Training statistics:'
+		print '   Learning rate : {}'.format(self.FLAGS.lr)
+		print '   Batch size    : {}'.format(batch)
+		print '   Epoch number  : {}'.format(epoch)
+		print '   Backup every  : {}'.format(self.FLAGS.save)
+
+		total = int() # total number of batches
+		for i, packet in enumerate(batches):
+			if i == 0: total = packet; continue
+			x_batch, datum = packet
+			feed_dict = yolo_feed_dict(self, x_batch, datum)
+			feed_dict[self.inp] = x_batch
+			for k in self.feed: feed_dict[k] = self.feed[k]
 
-		# Again, since now we know which box of each pair has worse IOU
-		# it should not contribute to the loss value, 
-		# hence the corresponding cooid is set to 0.0 
-		times = tf.expand_dims(inferior_cell, -1) # [batch, 49, 1]
-		times = tf.expand_dims(times, 2) # [batch, 49, 1, 1]
-		times = tf.concat(3, [times]*4) # [batch, 49, 1, 4]
-		cooid1 = tf.mul(times, self.cooid1)
-		cooid2 = (1. - times) * self.cooid2
-		cooid = tf.concat(2, [cooid1, cooid2]) # [batch, 49, 2, 4]
-
-		# reshape
-		confs = tf.reshape(confs,
-			[-1, int(np.prod(confs.get_shape()[1:]))])
-		conid = tf.reshape(conid,
-			[-1, int(np.prod(conid.get_shape()[1:]))])
-		cooid = tf.reshape(cooid,
-			[-1, int(np.prod(cooid.get_shape()[1:]))])
-
-		conid = conid + tf.to_float(conid > .5) * (self.scale_conf - 1.)
-		conid = conid + tf.to_float(conid < .5) * self.scale_noobj
-
-		# true is the regression target
-		# idtf is the weight
-		# the L2 loss of YOLO is then: tf.mul(idtf, (self.out - true)**2)
-		true = tf.concat(1,[self.true_class, confs, self.true_coo])
-		idtf = tf.concat(1,[self.class_idtf * self.scale_prob, conid,
-							cooid * self.scale_coor])
-
-		self.loss = tf.pow(self.out - true, 2)
-		self.loss = tf.mul(self.loss, idtf)
-		self.loss = tf.reduce_sum(self.loss, 1)
-		self.loss = .5 * tf.reduce_mean(self.loss)
-
-		optimizer = tf.train.RMSPropOptimizer(self.learning_rate)
-		gradients = optimizer.compute_gradients(self.loss)
-		self.train_op = optimizer.apply_gradients(gradients)
-
-	def train(self, train_set, annotate, batch_size, epoch):
-		batches = shuffle(train_set, annotate, self.C, self.S, batch_size, epoch)
-		for i, batch in enumerate(batches):
-			x_batch, datum = batch
-			feed_dict = {
-				self.inp : x_batch,
-				self.drop : .5,
-				self.true_class : datum[0],
-				self.confs1 : datum[1],
-				self.confs2 : datum[2],
-				self.true_coo : datum[3],
-				self.upleft : datum[4],
-				self.botright : datum[5],
-				self.class_idtf : datum[6],
-				self.conid1 : datum[7],
-				self.conid2 : datum[8],
-				self.cooid1 : datum[9],
-				self.cooid2 : datum[10],
-			}
 			_, loss = self.sess.run([self.train_op, self.loss], feed_dict)
-			print 'step {} - batch {} - loss {}'.format(1+i+self.step, 1+i, loss)
-			if (i+1) % (self.save_every/batch_size) == 0:
-				print 'save checkpoint and binaries at step {}'.format(self.step+i+1)
-				self.saver.save(self.sess, 'backup/model-{}'.format(self.step+i+1))
-				self.to_constant(inc = i+1)
-
-		print 'save checkpoint and binaries at step {}'.format(self.step+i+1)
-		self.saver.save(self.sess, 'backup/model-{}'.format(self.step+i+1))
-		self.to_constant(inc = i+1)
-
-	def predict(self, FLAGS):
-		img_path = FLAGS.test
-		threshold = FLAGS.threshold
-		all_img_ = os.listdir(img_path)
-		batch = min(FLAGS.batch, len(all_img_))
-		for j in range(len(all_img_)/batch):
-			img_feed = list()
-			all_img = all_img_[j*batch: (j*batch+batch)]
+			print 'step {} - batch {} - loss {}'.format(i+self.step, i, loss)
+			if i % (self.FLAGS.save/batch) == 0 or i == total:
+				print 'save checkpoint and binaries at step {}'.format(self.step+i)
+				self.saver.save(self.sess, 'backup/model-{}'.format(self.step+i))
+				self.to_constant(inc = i)
+
+	def predict(self):
+		inp_path = self.FLAGS.testset
+		all_inp_ = os.listdir(inp_path)
+		all_inp_ = [i for i in all_inp_ if is_yolo_inp(i)]
+		batch = min(self.FLAGS.batch, len(all_inp_))
+
+		for j in range(len(all_inp_)/batch):
+			inp_feed = list()
+			all_inp = all_inp_[j*batch: (j*batch+batch)]
 			new_all = list()
-			for img in all_img:
-				if '.jpg' not in img: continue
-				new_all += [img]
-				this_img = '{}/{}'.format(img_path, img)
-				this_img = crop(this_img)
-				img_feed.append(this_img)
-				img_feed.append(this_img[:,:,::-1,:])
-			all_img = new_all
-
-			feed_dict = {
-				self.inp : np.concatenate(img_feed, 0), 
-				self.drop : 1.0
-			}
+			for inp in all_inp:
+				new_all += [inp]
+				this_inp = '{}/{}'.format(inp_path, inp)
+				this_inp = yolo_preprocess(this_inp)
+				inp_feed.append(this_inp)
+			all_inp = new_all
+
+			feed_dict = {self.inp : np.concatenate(inp_feed, 0)}
+			for k in self.feed: feed_dict[k] = 1.0
 		
-			print ('Forwarding {} images ...'.format(len(img_feed)))
+			print ('Forwarding {} inputs ...'.format(len(inp_feed)))
 			start = time.time()
 			out = self.sess.run([self.out], feed_dict)
 			stop = time.time()
 			last = stop - start
-			print ('Total time = {}s / {} imgs = {} fps'.format(
-				last, len(img_feed), len(img_feed) / last))
+			print ('Total time = {}s / {} inps = {} ips'.format(
+				last, len(inp_feed), len(inp_feed) / last))
+
 			for i, prediction in enumerate(out[0]):
-				draw_predictions(
-					prediction,
-					'{}/{}'.format(img_path, all_img[i/2]), 
-					i % 2, threshold,
-					self.C, self.S, self.labels, self.colors)
-			print ('Results stored in results/')
+				yolo_postprocess(
+					prediction, '{}/{}'.format(inp_path, all_inp[i]), 
+					self.FLAGS, self.meta)
+		
+		print ('Results stored in results/')
diff --git a/yolo.py b/yolo.py
deleted file mode 100644
index 4dd7a1a3c..000000000
--- a/yolo.py
+++ /dev/null
@@ -1,146 +0,0 @@
-import numpy as np
-import os
-import tensorflow as tf
-import time
-from configs.process import cfg_yielder
-
-labels20 = ["aeroplane", "bicycle", "bird", "boat", "bottle",
-    "bus", "car", "cat", "chair", "cow", "diningtable", "dog",
-    "horse", "motorbike", "person", "pottedplant", "sheep", "sofa",
-    "train", "tvmonitor"]
-default_models = ['full', 'small', 'tiny']
-
-class layer:
-    def __init__(self, type, size = 0, 
-    	c = 0, n = 0, h = 0, w = 0):
-        self.type = type
-        self.size = size
-        self.c, self.n = (c, n) 
-        self.h, self.w = (h, w)
-
-class maxpool_layer(layer):
-    def __init__(self, size, c, n, h, w, stride, pad):
-		layer.__init__(self, 'MAXPOOL', 
-			size, c, n, h, w)
-		self.stride = stride
-		self.pad = pad
-
-class convolu_layer(layer):
-    def __init__(self, size, c, n, h, w, stride, pad):
-        layer.__init__(self, 'CONVOLUTIONAL', 
-        	size, c, n, h, w)
-        self.stride = stride
-        self.pad = pad
-
-class connect_layer(layer):
-    def __init__(self, size, c, n, h, w, 
-    	input_size, output_size):
-		layer.__init__(self, 'CONNECTED', 
-			size, c, n, h, w)
-		self.output_size = output_size
-		self.input_size = input_size
-
-class YOLO(object):
-
-    layers = []
-    S = int()
-    model = str()
-
-    def __init__(self, model):
-        with open('labels.txt', 'r') as f:
-            pick = f.readlines()
-            for i in range(len(pick)): pick[i] = pick[i].strip()
-        if model in default_models: pick = labels20
-        self.labels = pick
-        self.model = model
-        self.layers = []
-        self.build(model)
-        self.layer_number = len(self.layers)
-        postfix = int('-' in model) * 'binaries/'
-        weight_file = postfix + 'yolo-{}.weights'.format(model)
-        print ('Loading {} ...'.format(weight_file))
-        start = time.time()
-        self.loadWeights(weight_file)
-        stop = time.time()
-        print ('Finished in {}s'.format(stop - start))
-
-    def build(self, model):
-		cfg = model.split('-')[0]
-		print ('parsing yolo-{}.cfg'.format(cfg))
-		layers = cfg_yielder(cfg)
-		for i, info in enumerate(layers):
-			if i == 0: 
-				self.S = info
-				continue
-			if len(info) == 1: new = layer(type = info[0])
-			if info[0] == 'conv': new = convolu_layer(*info[1:])
-			if info[0] == 'pool': new = maxpool_layer(*info[1:])
-			if info[0] == 'conn': new = connect_layer(*info[1:])
-			self.layers.append(new)
-
-    def loadWeights(self, weight_path):
-        self.startwith = np.array(
-            np.memmap(weight_path, mode = 'r',
-                offset = 0, shape = (),
-                dtype = '(4)i4,'))
-        #self.startwith = np.array(self.startwith)
-        offset = 16
-        chunkMB = 1000
-        chunk = int(chunkMB * 2**18) 
-        
-        # Read byte arrays from file
-        for i in range(self.layer_number):
-            l = self.layers[i]
-            if l.type == "CONVOLUTIONAL":
-                weight_number = l.n * l.c * l.size * l.size
-                l.biases = np.memmap(weight_path, mode = 'r',
-                    offset = offset, shape = (),
-                    dtype = '({})float32,'.format(l.n))
-                offset += 4 * l.n
-                l.weights = np.memmap(weight_path, mode = 'r',
-                    offset = offset, shape = (),
-                    dtype = '({})float32,'.format(weight_number))
-                offset += 4 * weight_number
-
-            elif l.type == "CONNECTED":
-                bias_number = l.output_size
-                weight_number = l.output_size * l.input_size
-                l.biases = np.memmap(weight_path, mode = 'r',
-                    offset = offset, shape = (),
-                    dtype = '({})float32,'.format(bias_number))
-                offset += bias_number * 4
-            
-                chunks  = [chunk] * (weight_number / chunk) 
-                chunks += [weight_number % chunk]
-                l.weights = np.array([], dtype = np.float32)
-                for c in chunks:
-                    l.weights = np.concatenate((l.weights,
-                        np.memmap(weight_path, mode = 'r',
-                        offset = offset, shape = (),
-                        dtype = '({})float32,'.format(c))))
-                    offset += c * 4
-                    
-        # Defensive python right here bietch.
-        if offset == os.path.getsize(weight_path):
-            print ('Successfully identified all {} bytes'.format(
-                offset))
-        else:
-            print 'expect ', offset, ' bytes, found ', os.path.getsize(weight_path)
-            exit()
-
-        # Reshape
-        for i in range(self.layer_number):
-            l = self.layers[i]
-            
-            if l.type == 'CONVOLUTIONAL':
-                weight_array = l.weights
-                weight_array = np.reshape(weight_array,
-                	[l.n, l.c, l.size, l.size])
-                weight_array = weight_array.transpose([2,3,1,0])
-                l.weights = weight_array
-
-            if l.type == 'CONNECTED':
-                weight_array = l.weights
-                weight_array = np.reshape(weight_array,
-                	[l.input_size, l.output_size])
-                l.weights = weight_array
diff --git a/yolo/__init__.py b/yolo/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/box.py b/yolo/box.py
similarity index 100%
rename from box.py
rename to yolo/box.py
diff --git a/yolo/drawer.py b/yolo/drawer.py
new file mode 100644
index 000000000..7582f0090
--- /dev/null
+++ b/yolo/drawer.py
@@ -0,0 +1,191 @@
+"""
+file: yolo/drawer.py
+includes: yolo_metaprocess(), yolo_preprocess() and yolo_postprocess()
+together they add yolo framework's specificities into the general framework:
+	0. what to do with the net's hyper-parameters?
+	1. what to do before flowing the net?
+	2. what to do with the net's output?
+"""
+
+from box import *
+from PIL import Image, ImageFile
+ImageFile.LOAD_TRUNCATED_IMAGES = True
+import cv2
+
+labels20 = ["aeroplane", "bicycle", "bird", "boat", "bottle",
+    "bus", "car", "cat", "chair", "cow", "diningtable", "dog",
+    "horse", "motorbike", "person", "pottedplant", "sheep", "sofa",
+    "train", "tvmonitor"]
+default_models = ['full', 'small', 'tiny']
+
+def yolo_metaprocess(meta):
+	"""
+	Add to meta (a dict) `labels` correspond to that model and
+	`colors` correspond to these labels, for drawing predictions.
+	"""
+	def to_color(indx, base):
+		base2 = base * base
+		b = indx / base2
+		r = (indx % base2) / base
+		g = (indx % base2) % base
+		return (b * 127, r * 127, g * 127)
+	if meta['model'] in default_models: meta['labels'] = labels20
+	else: 
+		with open('labels.txt','r') as f:
+			meta['labels'] = [l.strip() for l in f.readlines()]
+	if len(meta['labels']) != meta['classes']:
+		msg = 'labels.txt and configs/yolo-{}.cfg '
+		msg+= 'indicate different class number'
+		exit('Error: {}'.format(msg.format(meta['model'])))
+	colors = list()
+	base = int(np.ceil(pow(meta['classes'], 1./3)))
+	for x in range(len(meta['labels'])): 
+		colors += [to_color(x, base)]
+	meta['colors'] = colors
+	return meta
+
+def is_yolo_inp(name): return '.jpg' in name
+
+def yolo_preprocess(imPath, allobj = None):
+	"""
+	Takes an image, return it as a numpy tensor that is readily
+	to be fed into tfnet. If there is an accompanied annotation (allobj),
+	meaning this preprocessing is serving the train process, then this
+	image will be transformed with random noise to augment training data, 
+	using scale, translation, flipping and recolor. The accompanied 
+	parsed annotation (allobj) will also be modified accordingly.
+	"""
+	def recolor(im):
+	# `im` is a cv2 image python object
+	# recolor `im` by adding in random
+	# intensity transformations, DO NOT
+	# perform shift/scale or rotate here
+	# ADD YOUR CODE BELOW:
+		return im
+	
+	def fix(x,c): # fit x inside [0,c]
+		return max(min(x,c),0)
+	
+	im = cv2.imread(imPath)
+	if allobj is not None: # in training mode
+		h, w, _ = im.shape
+		# Scale and translate
+		scale = np.random.uniform() / 5. + 1.
+		max_offx = (scale-1.) * w
+		max_offy = (scale-1.) * h
+		offx = int(np.random.uniform() * max_offx)
+		offy = int(np.random.uniform() * max_offy)
+		im = cv2.resize(im, (0,0), fx = scale, fy = scale)
+		im = im[offy : (offy + h), offx : (offx + w)]
+		flip = np.random.binomial(1, .5)
+		for obj in allobj:
+			obj[1] = int(obj[1]*scale-offx)
+			obj[3] = int(obj[3]*scale-offx)
+			obj[2] = int(obj[2]*scale-offy)
+			obj[4] = int(obj[4]*scale-offy)
+			obj[1] = fix(obj[1], w) #xmin
+			obj[3] = fix(obj[3], w) #xmax
+			obj[2] = fix(obj[2], h) #ymin
+			obj[4] = fix(obj[4], h) #ymax	
+			if flip:
+				temp = obj[1]
+				obj[1] = w - obj[3]
+				obj[3] = w - temp
+
+		if flip: im = cv2.flip(im, 1)
+		im = recolor(im)
+
+	# return np array input to YOLO
+	im_ = cv2.resize(im, (448, 448))
+	image_array = np.array(im_)
+	image_array = image_array / 255.
+	image_array = image_array * 2. - 1.
+	image_array = np.expand_dims(image_array, 0)
+	if allobj is not None: return image_array, allobj
+	else: return image_array
+	
+
+def yolo_postprocess(predictions, 
+	img_path, FLAGS, meta):
+	"""
+	Takes net output, draw predictions, save to results/
+	prediction is a numpy tensor - net's output
+	img_path is the path to testing folder
+	FLAGS contains threshold for predictions
+	meta supplies labels and colors for drawing
+	"""
+	# meta
+	threshold = FLAGS.threshold
+	C, B, S = meta['classes'], meta['num'], meta['side']
+	colors, labels = meta['colors'], meta['labels']
+
+	boxes = []
+	SS        =  S * S # number of grid cells
+	prob_size = SS * C # class probabilities
+	conf_size = SS * B # confidences for each grid cell
+	probs = predictions[0 : prob_size]
+	confs = predictions[prob_size : (prob_size + conf_size)]
+	cords = predictions[(prob_size + conf_size) : ]
+	probs = probs.reshape([SS, C])
+	confs = confs.reshape([SS, B])
+	cords = cords.reshape([SS, B, 4])
+
+	for grid in range(SS):
+		for b in range(B):
+			new_box   = BoundBox(C)
+			new_box.c =  confs[grid, b]
+			new_box.x = (cords[grid, b, 0] + grid %  S) / S
+			new_box.y = (cords[grid, b, 1] + grid // S) / S
+			new_box.w =  cords[grid, b, 2] ** 2
+			new_box.h =  cords[grid, b, 3] ** 2
+			new_box.id = '{}-{}'.format(grid, b)
+			for c in range(C):
+				new_box.probs[c] = new_box.c * probs[grid, c]
+			boxes.append(new_box)
+
+	# non max suppress boxes
+	for c in range(C):
+		for i in range(len(boxes)): boxes[i].class_num = c
+		boxes = sorted(boxes, cmp=prob_compare)
+		for i in range(len(boxes)):
+			boxi = boxes[i]
+			if boxi.probs[c] == 0: continue
+			for j in range(i + 1, len(boxes)):
+				boxj = boxes[j]
+				boxij = box_intersection(boxi, boxj)
+				boxja = boxj.w * boxj.h
+				apart = boxij / boxja
+				if apart >= .5:
+					if boxi.probs[c] > boxj.probs[c]:
+						boxes[j].probs[c] = 0.
+					else:
+						boxes[i].probs[c] = 0.
+
+	imgcv = cv2.imread(img_path)
+	print img_path
+	h, w, _ = imgcv.shape
+	for b in boxes:
+		max_indx = np.argmax(b.probs)
+		max_prob = b.probs[max_indx]
+		label = 'object' * int(C < 2)
+		label += labels[max_indx] * int(C > 1)
+		if (max_prob > threshold):
+			left  = int ((b.x - b.w/2.) * w)
+			right = int ((b.x + b.w/2.) * w)
+			top   = int ((b.y - b.h/2.) * h)
+			bot   = int ((b.y + b.h/2.) * h)
+			if left  < 0    :  left = 0
+			if right > w - 1: right = w - 1
+			if top   < 0    :   top = 0
+			if bot   > h - 1:   bot = h - 1
+			thick = int((h+w)/300)
+			cv2.rectangle(imgcv, 
+				(left, top), (right, bot), 
+				colors[max_indx], thick)
+			mess = '{}:{:.3f}'.format(label, max_prob)
+			cv2.putText(imgcv, mess, (left, top - 12), 
+				0, 1e-3 * h, colors[max_indx],thick/5)
+	
+	img_name = 'results/{}'.format(
+		img_path.split('/')[-1].split('.')[0])
+	cv2.imwrite(img_name + '.jpg', imgcv)
\ No newline at end of file
diff --git a/yolo/train.py b/yolo/train.py
new file mode 100644
index 000000000..152a761c7
--- /dev/null
+++ b/yolo/train.py
@@ -0,0 +1,195 @@
+"""
+file: /yolo/train.py
+includes: yolo_batch(), yolo_feed_dict() and yolo_loss()
+together they support the pipeline: 
+    annotation -> minibatch -> loss evaluation -> training
+"""
+
+import tensorflow.contrib.slim as slim
+import tensorflow as tf
+from copy import deepcopy
+from drawer import *
+
+# ignore this function
+def show(im, allobj, S, w, h, cellx, celly):
+    for obj in allobj:
+        a = obj[5] % S
+        b = obj[5] / S
+    	cx = a + obj[1]
+    	cy = b + obj[2]
+    	centerx = cx * cellx
+    	centery = cy * celly
+    	ww = obj[3] * w
+    	hh = obj[4] * h
+    	cv2.rectangle(im,
+    		(int(centerx - ww/2), int(centery - hh/2)),
+    		(int(centerx + ww/2), int(centery + hh/2)),
+    		(0,0,255), 2)
+    cv2.imshow("result", im)
+    cv2.waitKey()
+    cv2.destroyAllWindows()
+
+def yolo_batch(train_path, chunk, meta):
+    """
+    Takes a chunk of parsed annotations
+    return placeholders for net's input
+    correspond to this chunk
+    """
+    # meta
+    S, B = meta['side'], meta['num']
+    C, labels = meta['classes'], meta['labels']
+
+    # preprocess
+    jpg = chunk[0]; w, h, allobj_ = chunk[1]
+    allobj = deepcopy(allobj_)
+    path = '{}{}'.format(train_path, jpg)
+    img, allobj = yolo_preprocess(path, allobj)
+
+    # Calculate regression target
+    cellx = 1. * w / S
+    celly = 1. * h / S
+    for obj in allobj:
+        centerx = .5*(obj[1]+obj[3]) #xmin, xmax
+        centery = .5*(obj[2]+obj[4]) #ymin, ymax
+        cx = centerx / cellx
+        cy = centery / celly
+        if cx >= S or cy >= S: return None, None
+        obj[3] = float(obj[3]-obj[1]) / w
+        obj[4] = float(obj[4]-obj[2]) / h
+        obj[3] = np.sqrt(obj[3])
+        obj[4] = np.sqrt(obj[4])
+        obj[1] = cx - np.floor(cx) # centerx
+        obj[2] = cy - np.floor(cy) # centery
+        obj += [int(np.floor(cy) * S + np.floor(cx))]
+
+    # Calculate placeholders' values
+    probs = np.zeros([S*S,C])
+    confs = np.zeros([S*S,B])
+    coord = np.zeros([S*S,B,4])
+    proid = np.zeros([S*S,C])
+    conid = np.zeros([S*S,B])
+    cooid = np.zeros([S*S,B,4])
+    prear = np.zeros([S*S,4])
+    for obj in allobj:
+        probs[obj[5], :] = [0.] * C
+        probs[obj[5], labels.index(obj[0])] = 1.
+        proid[obj[5], :] = [1] * C
+        coord[obj[5], :, :] = [obj[1:5]] * B
+        prear[obj[5],0] = obj[1] - obj[3]**2 * .5 * S # xleft
+        prear[obj[5],1] = obj[2] - obj[4]**2 * .5 * S # yup
+        prear[obj[5],2] = obj[1] + obj[3]**2 * .5 * S # xright
+        prear[obj[5],3] = obj[2] + obj[4]**2 * .5 * S # ybot
+        confs[obj[5], :] = [1.] * B
+        conid[obj[5], :] = [1.] * B
+        cooid[obj[5], :, :] = [[1.] * 4] * B
+
+    # Finalise the placeholders' values
+    upleft   = np.expand_dims(prear[:,0:2], 1)
+    botright = np.expand_dims(prear[:,2:4], 1)
+    wh = botright - upleft; 
+    area = wh[:,:,0] * wh[:,:,1]
+    upleft   = np.concatenate([upleft] * B, 1)
+    botright = np.concatenate([botright] * B, 1)
+    areas = np.concatenate([area] * B, 1)
+
+    # Assemble the placeholders' value 
+    tensors = [[probs], [confs] , [coord],
+               [proid], [conid] , [cooid],
+               [areas], [upleft], [botright]]
+    
+    return img, tensors
+
+def yolo_feed_dict(net, x_batch, datum):
+    return {
+        net.probs : datum[0], net.confs  : datum[1],
+        net.coord : datum[2], net.proid  : datum[3],
+        net.conid : datum[4], net.cooid  : datum[5],
+        net.areas : datum[6], net.upleft : datum[7], 
+        net.botright : datum[8]
+    }
+
+def yolo_loss(net):
+    """
+    Takes net.out and placeholders -
+    listed in feed_dict() func above, 
+    to build net.train_op and net.loss
+    """
+    # meta
+    m = net.meta
+    sprob = m['class_scale']
+    sconf = m['object_scale']
+    snoob = m['noobject_scale'] 
+    scoor = m['coord_scale']
+    S, B, C = m['side'], m['num'], m['classes']
+    SS = S * S # number of grid cells
+
+    print 'Loss hyper-parameters:'
+    print '\tside    = {}'.format(m['side'])
+    print '\tbox     = {}'.format(m['num'])
+    print '\tclasses = {}'.format(m['classes'])
+    print '\tscales  = {}'.format([sprob, sconf, snoob, scoor])
+
+    size1 = [None, SS, C]
+    size2 = [None, SS, B]
+    # target of regression
+    net.probs = tf.placeholder(tf.float32, size1)
+    net.confs = tf.placeholder(tf.float32, size2)
+    net.coord = tf.placeholder(tf.float32, size2 + [4])
+    # weights term for L2 loss
+    net.proid = tf.placeholder(tf.float32, size1)
+    net.conid = tf.placeholder(tf.float32, size2)
+    net.cooid = tf.placeholder(tf.float32, size2 + [4])
+    # material for loss calculation
+    net.upleft = tf.placeholder(tf.float32, size2 + [2])
+    net.botright = tf.placeholder(tf.float32, size2 + [2])
+    net.areas = tf.placeholder(tf.float32, size2)
+
+    # Extract the coordinate prediction from net.out
+    coords = net.out[:, SS * (C + B):]
+    coords = tf.reshape(coords, [-1, SS, B, 4])
+    wh = tf.pow(coords[:,:,:,2:4], 2) * S # unit: grid cell
+    area_pred = wh[:,:,:,0] * wh[:,:,:,1] # unit: grid cell^2 
+    centers = coords[:,:,:,0:2] # [batch, SS, B, 2]
+    floor = centers - (wh * .5) # [batch, SS, B, 2]
+    ceil  = centers + (wh * .5) # [batch, SS, B, 2]
+
+    # calculate the intersection areas
+    intersect_upleft   = tf.maximum(floor, net.upleft) 
+    intersect_botright = tf.minimum(ceil , net.botright)
+    intersect_wh = intersect_botright - intersect_upleft
+    intersect_wh = tf.maximum(intersect_wh, 0.0)
+    intersect = tf.mul(intersect_wh[:,:,:,0], intersect_wh[:,:,:,1])
+    
+    # calculate the best IOU, set 0.0 confidence for worse boxes
+    iou = tf.div(intersect, net.areas + area_pred - intersect)
+    best_box = tf.equal(iou, tf.reduce_max(iou, [2], True))
+    best_box = tf.to_float(best_box)
+    confs = tf.mul(best_box, net.confs)
+
+    # take care of the weight terms
+    weight_con = snoob*(1.-best_box) + sconf*best_box
+    conid = tf.mul(net.conid, weight_con)
+    weight_coo = tf.concat(3, 4 * [tf.expand_dims(best_box, -1)])
+    cooid = tf.mul(net.cooid, scoor * weight_coo)
+    proid = sprob * net.proid
+
+    # flatten 'em all
+    probs = slim.flatten(net.probs)
+    proid = slim.flatten(proid)
+    confs = slim.flatten(confs)
+    conid = slim.flatten(conid)
+    coord = slim.flatten(net.coord)
+    cooid = slim.flatten(cooid)
+    true = tf.concat(1, [probs, confs, coord])
+    wght = tf.concat(1, [proid, conid, cooid])
+    
+    print 'Building net.loss'
+    net.loss = tf.pow(net.out - true, 2)
+    net.loss = tf.mul(net.loss, wght)
+    net.loss = tf.reduce_sum(net.loss, 1)
+    net.loss = .5 * tf.reduce_mean(net.loss)
+
+    print 'Building net.train_op'
+    optimizer = tf.train.RMSPropOptimizer(net.FLAGS.lr)
+    gradients = optimizer.compute_gradients(net.loss)
+    net.train_op = optimizer.apply_gradients(gradients)
\ No newline at end of file