batch_norm

LittleD3092 · Nov 12, 2016 · 84e9f4c · 84e9f4c
1 parent a0da883
commit 84e9f4c
Show file tree

Hide file tree

Showing 4 changed files with 79 additions and 65 deletions.
diff --git a/configs/process.py b/configs/process.py
@@ -57,8 +57,8 @@ def discoverer(weightf, s):
 			kernel = d['size'] ** 2 * channel * d['filters']
 			allfloat -= kernel + d['filters']
 			channel = d['filters']
-			if 'batch_normalize' in d:
-				allfloat -= 3* d['filters']
+			if 'batch_normalize' in d: # scale, mean, var
+				allfloat -= 3* d['filters'] 
 		elif d['type'] == '[connected]':
 			if dense is False: 
 				out = out1 = d['output'] 
@@ -124,8 +124,6 @@ def cfg_yielder(model, undiscovered = True):
 			w = h = new
 			c = d['filters']
 			l = w * h * c
-			if 'batch_normalize' in d: 
-				yield['bnrm', 0, 0, c, 0, 0]
 			if 'activation' in d: yield ['leaky']
 
 		if d['type'] == '[maxpool]':

diff --git a/darknet.py b/darknet.py
@@ -25,17 +25,14 @@ def __init__(self, type, size = 0,
         self.size = size
         self.c, self.n = (c, n) 
         self.h, self.w = (h, w)
+        # any trainable var goes in here:
+        self.p = dict() 
 
 class dropout_layer(layer):
     def __init__(self, p):
-        self.type = 'dropout'
+        layer.__init__(self, 'dropout')
         self.prob = p
 
-class btchnrm_layer(layer):
-    def __init__(self, size, c, n, h, w ): # <- cryin' haha
-        layer.__init__(self, 'batchnorm',
-            size, c, n, h, w)
-
 class maxpool_layer(layer):
     def __init__(self, size, c, n, h, w, 
         stride, pad ):
@@ -46,11 +43,12 @@ def __init__(self, size, c, n, h, w,
 
 class convolu_layer(layer):
     def __init__(self, size, c, n, h, w, 
-        stride, pad ):
+        stride, pad, batch_norm ): # <- cryin'
         layer.__init__(self, 'convolutional', 
         	size, c, n, h, w)
         self.stride = stride
         self.pad = pad
+        self.batch_norm = bool(batch_norm)
 
 class connect_layer(layer):
     def __init__(self, size, c, n, h, w, 
@@ -86,7 +84,6 @@ def parse(self, model):
         for i, info in enumerate(layers):
             if i == 0: self.meta = info; continue
             if len(info) == 1: new = layer(type = info[0])
-            if info[0] == 'bnrm': new = btchnrm_layer(*info[1:])
             if info[0] == 'drop': new = dropout_layer(*info[1:])
             if info[0] == 'conv': new = convolu_layer(*info[1:])
             if info[0] == 'pool': new = maxpool_layer(*info[1:])
@@ -100,58 +97,44 @@ def loadWeights(self, weight_path):
         for i in range(len(self.layers)):
             l = self.layers[i]
 
-            # if(state.train){
-            #     mean_cpu(l.output, l.batch, l.out_c, l.out_h*l.out_w, l.mean);   
-            #     variance_cpu(l.output, l.mean, l.batch, l.out_c, l.out_h*l.out_w, l.variance);   
-            #     normalize_cpu(l.output, l.mean, l.variance, l.batch, l.out_c, l.out_h*l.out_w);   
-            # } else {
-            #     normalize_cpu(l.output, l.rolling_mean, l.rolling_variance, l.batch, l.out_c, l.out_h*l.out_w);
-            # }
-            # scale_bias(l.output, l.scales, l.batch, l.out_c, l.out_h*l.out_w);
-
-            # int num = l.n*l.c*l.size*l.size;
-            # fwrite(l.biases, sizeof(float), l.n, fp);
-            # if (l.batch_normalize){
-            #     fwrite(l.scales, sizeof(float), l.n, fp);
-            #     fwrite(l.rolling_mean, sizeof(float), l.n, fp);
-            #     fwrite(l.rolling_variance, sizeof(float), l.n, fp);
-            # }
-            # fwrite(l.weights, sizeof(float), num, fp);
+            # Convolution with bn: conv -> normalize -> scale -> add bias
+            # Saving conv with bn: bias -> scale -> mean -> var -> kernel
+
 
             if l.type == "convolutional":
                 weight_number = l.n * l.c * l.size * l.size
-                l.biases = np.memmap(weight_path, mode = 'r',
+                l.p['biases'] = np.memmap(weight_path, mode = 'r',
                     offset = offset, shape = (),
                     dtype = '({})float32,'.format(l.n))
                 offset += 4 * l.n
-                l.weights = np.memmap(weight_path, mode = 'r',
+
+                if l.batch_norm:
+                    l.p['scale'] = np.memmap(weight_path, mode = 'r',
+                        offset = offset, shape = (),
+                        dtype = '({})float32,'.format(l.n))
+                    offset += 4 * l.n
+                    l.p['mean'] = np.memmap(weight_path, mode = 'r',
+                        offset = offset, shape = (),
+                        dtype = '({})float32,'.format(l.n))
+                    offset += 4 * l.n
+                    l.p['var'] = np.memmap(weight_path, mode = 'r',
+                        offset = offset, shape = (),
+                        dtype = '({})float32,'.format(l.n))
+                    offset += 4 * l.n
+
+                l.p['kernel'] = np.memmap(weight_path, mode = 'r',
                     offset = offset, shape = (),
                     dtype = '({})float32,'.format(weight_number))
                 offset += 4 * weight_number
-
-            elif l.type == "batchnorm":
-                l.biases = np.memmap(weight_path, mode = 'r',
-                    offset = offset, shape = (),
-                    dtype = '({})float32,'.format(l.n))
-                offset += 4 * l.n
-                l.weights = np.memmap(weight_path, mode = 'r',
-                    offset = offset, shape = (),
-                    dtype = '({})float32,'.format(l.n))
-                offset += 4 * l.n
-                l.weights = np.memmap(weight_path, mode = 'r',
-                    offset = offset, shape = (),
-                    dtype = '({})float32,'.format(l.n))
-                offset += 4 * l.n
-
 
             elif l.type == "connected":
                 bias_number = l.output_size
                 weight_number = l.output_size * l.input_size
-                l.biases = np.memmap(weight_path, mode = 'r',
+                l.p['biases'] = np.memmap(weight_path, mode = 'r',
                     offset = offset, shape = (),
                     dtype = '({})float32,'.format(bias_number))
                 offset += bias_number * 4
-                l.weights = np.memmap(weight_path, mode = 'r',
+                l.p['weights'] = np.memmap(weight_path, mode = 'r',
                     offset = offset, shape = (),
                     dtype = '({})float32,'.format(weight_number))
                 offset += weight_number * 4
@@ -167,14 +150,14 @@ def loadWeights(self, weight_path):
             l = self.layers[i]
 
             if l.type == 'convolutional':
-                weight_array = l.weights
+                weight_array = l.p['kernel']
                 weight_array = np.reshape(weight_array,
                 	[l.n, l.c, l.size, l.size])
                 weight_array = weight_array.transpose([2,3,1,0])
-                l.weights = weight_array
+                l.p['kernel'] = weight_array
 
             if l.type == 'connected':
-                weight_array = l.weights
+                weight_array = l.p['weights']
                 weight_array = np.reshape(weight_array,
                 	[l.input_size, l.output_size])
-                l.weights = weight_array
+                l.p['weights'] = weight_array
diff --git a/ops.py b/ops.py
@@ -12,18 +12,52 @@ def convl(l, x, name):
         padding = [l.pad, l.pad]
     l.pad = 'VALID'
     x = tf.pad(x, [[0, 0], padding, padding, [0, 0]])
-    x = tf.nn.conv2d(x, l.weights, 
+    x = tf.nn.conv2d(x, l.p['kernel'], 
         padding = l.pad, name = name,
         strides=[1, l.stride, l.stride, 1])
-    # if l.batch_norm == 1: x = slim.batch_norm(x)
-    # else: x = tf.nn.bias_add(x, l.b)
-    return tf.nn.bias_add(x, l.biases)
+    if l.batch_norm:
+        x = batchnorm(l, x, '{}-bnorm'.format(name))
+    return tf.nn.bias_add(x, l.p['biases'])
 
-def bnorm(l, x, name):
+def batchnorm(l, x, name):
     return x
 
+class batch_norm(object):
+    """Code modification of http://stackoverflow.com/a/33950177"""
+    def __init__(self, epsilon=1e-5, momentum = 0.9, name="batch_norm"):
+        with tf.variable_scope(name):
+            self.epsilon = epsilon
+            self.momentum = momentum
+            self.ema = tf.train.ExponentialMovingAverage(decay=self.momentum)
+            self.name = name
+
+    def __call__(self, l, x):
+        shape = x.get_shape().as_list()
+
+        if train:
+            with tf.variable_scope(self.name) as scope:
+                self.beta = tf.get_variable("beta", [shape[-1]],
+                                    initializer=tf.constant_initializer(0.))
+                self.gamma = tf.get_variable("gamma", [shape[-1]],
+                                    initializer=tf.random_normal_initializer(1., 0.02))
+
+                batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2], name='moments')
+                ema_apply_op = self.ema.apply([batch_mean, batch_var])
+                self.ema_mean, self.ema_var = self.ema.average(batch_mean), self.ema.average(batch_var)
+
+                with tf.control_dependencies([ema_apply_op]):
+                    mean, var = tf.identity(batch_mean), tf.identity(batch_var)
+        else:
+            mean, var = self.ema_mean, self.ema_var
+
+        normed = tf.nn.batch_norm_with_global_normalization(
+                x, mean, var, self.beta, self.gamma, self.epsilon, scale_after_normalization=True)
+
+        return normed
+
 def dense(l, x, name):
-    return tf.nn.xw_plus_b(x, l.weights, l.biases, name = name)
+    return tf.nn.xw_plus_b(x, l.p['weights'], 
+        l.p['biases'], name = name)
 
 def maxpool(l, x, name):
     l.pad = 'VALID'

diff --git a/tfnet.py b/tfnet.py
@@ -13,7 +13,7 @@
 from data import *
 
 const_layer = ['leaky', 'dropout']
-var_layer = ['convolutional', 'connected', 'batchnorm']
+var_layer = ['convolutional', 'connected']
 
 class TFNet(object):
 	def __init__(self, darknet, FLAGS):
@@ -34,21 +34,20 @@ def __init__(self, darknet, FLAGS):
 			else: name = l.type+'-{}'.format(i)
 			# no variable when saving to .pb file
 			if l.type in var_layer and not FLAGS.savepb:
-				l.biases = tf.Variable(l.biases)
-				l.weights = tf.Variable(l.weights)
+				for var in l.p: l.p[var] = tf.Variable(l.p[var])
 			arg = [l, now, name]
 			if l.type=='convolutional': now = convl(*arg)
 			elif l.type == 'connected': now = dense(*arg)
-			elif l.type == 'batchnorm': now = bnorm(*arg)
 			elif l.type == 'maxpool': now = maxpool(*arg)	
 			elif l.type == 'flatten': now = flatten(*arg[1:])
 			elif l.type == 'leaky'  : now =   leaky(*arg[1:])
 			# Dropout
 			elif l.type == 'dropout' and not FLAGS.savepb:
-				self.drop[name] = tf.placeholder(tf.float32)
-				self.drop[name + '_'] = l.prob
-				self.feed[self.drop[name]] = self.drop[name+'_']
 				print 'Dropout p = {}'.format(l.prob)
+				self.drop[name] = tf.placeholder(tf.float32)
+				drop_value_name = '{}_'.format(name)
+				self.drop[drop_value_name] = l.prob
+				self.feed[self.drop[name]] = self.drop[drop_value_name]
 				now = dropout(now, self.drop[name], name)
 			if l.type not in const_layer: print now.get_shape()