fzliu · dpaiton · Sep 28, 2015 · Sep 28, 2015 · Oct 8, 2015 · Oct 8, 2015
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,8 @@
+*.dat
+*.log
 *.caffemodel
 *.pyc
 outputs/*
 test/*
 .DS_Store
+.*.swp
diff --git a/models/vgg13/VGG_ILSVRC_13_layers_deploy.prototxt b/models/vgg13/VGG_ILSVRC_13_layers_deploy.prototxt
@@ -0,0 +1,127 @@
+name: "VGG_ILSVRC_13_layers"
+input: "data"
+force_backward: true
+input_shape {
+  dim: 10
+  dim: 3
+  dim: 224
+  dim: 224
+}
+layers {
+  bottom: "data"
+  top: "conv1"
+  name: "conv1"
+  type: CONVOLUTION
+  convolution_param {
+    num_output: 96
+    kernel_size: 7
+    stride: 2
+  }
+}
+layers {
+  bottom: "conv1"
+  top: "conv1"
+  name: "relu1"
+  type: RELU
+}
+layers {
+  bottom: "conv1"
+  top: "norm1"
+  name: "norm1"
+  type: LRN
+  lrn_param {
+    local_size: 5
+    alpha: 0.0005
+    beta: 0.75
+    k: 2
+  }
+}
+layers {
+  bottom: "norm1"
+  top: "pool1"
+  name: "pool1"
+  type: POOLING
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 3
+  }
+}
+layers {
+  bottom: "pool1"
+  top: "conv2"
+  name: "conv2"
+  type: CONVOLUTION
+  convolution_param {
+    num_output: 256
+    kernel_size: 5
+  }
+}
+layers {
+  bottom: "conv2"
+  top: "conv2"
+  name: "relu2"
+  type: RELU
+}
+layers {
+  bottom: "conv2"
+  top: "pool2"
+  name: "pool2"
+  type: POOLING
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layers {
+  bottom: "pool2"
+  top: "conv3"
+  name: "conv3"
+  type: CONVOLUTION
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+  }
+}
+layers {
+  bottom: "conv3"
+  top: "conv3"
+  name: "relu3"
+  type: RELU
+}
+layers {
+  bottom: "conv3"
+  top: "conv4"
+  name: "conv4"
+  type: CONVOLUTION
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+  }
+}
+layers {
+  bottom: "conv4"
+  top: "conv4"
+  name: "relu4"
+  type: RELU
+}
+layers {
+  bottom: "conv4"
+  top: "conv5"
+  name: "conv5"
+  type: CONVOLUTION
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+  }
+}
+layers {
+  bottom: "conv5"
+  top: "conv5"
+  name: "relu5"
+  type: RELU
+}
diff --git a/style.py b/style.py
@@ -26,6 +26,14 @@
 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+TODOs:
+  - Verify model_WEIGHTS is working as expected
+      * Preliminary exploration suggests that changing layer weights
+        does not influence converged output
+  - Fix BFGS for normalized VGG model
+      * Changing ratio parameter has no effect on output
+=======
 """
 
 # system imports
@@ -96,6 +104,38 @@
 parser.add_argument("-v", "--verbose", action="store_true", required=False, help="print minimization outputs")
 parser.add_argument("-o", "--output", default=None, required=False, help="output path")
 
+def _normalize(mat):
+    if (np.max(mat) - np.min(mat)) == 0:
+        return mat
+    else:
+        return (mat - np.min(mat)) / (np.max(mat) - np.min(mat))
+
+def _make_ramp_filter(dims, low_pass=False):
+    """
+        Creates a ramp function for manipulating images in Fourier space.
+        NOTE: Make sure to fftshift output if it is being used for image whitening.
+
+        :param tuple dims:
+            Output dimensions. Expects (height, width, channels).
+        :param bool low_pass:
+            If true, output function will be multiplied by a Gaussian low-pass filter
+    """
+
+    # Ramp function
+    nyq = np.array((np.int32(np.floor(dims[0]/2)), np.int32(np.floor(dims[1]/2))))
+    grid = np.mgrid[-nyq[0]:nyq[0]+(dims[0]%(nyq[0]*2)), -nyq[1]:nyq[1]+(dims[1]%(nyq[1]*2))]
+    ramp_filter = np.sqrt(np.square(grid[0]) + np.square(grid[1]))
+    ramp_filter = _normalize(ramp_filter)
+
+    if low_pass:
+        # Gaussian LPF
+        sig = 0.78125*nyq # Cutoff frequencies for gaussian
+        LPF = np.exp(-0.5*(np.square(ramp_filter/(sig[0]/2.0)) + np.square(ramp_filter/(sig[1]/2.0))))
+        out_filt = np.dstack((ramp_filter * LPF,)*dims[2])
+    else:
+        out_filt = np.dstack((ramp_filter,)*dims[2])
+
+    return out_filt
 
 def _compute_style_grad(F, G, G_style, layer):
     """
@@ -300,6 +340,9 @@ def load_model(self, model_file, pretrained_file, mean_file):
                 Path to mean file.
         """
 
+        assert(os.path.isfile(model_file))
+        assert(os.path.isfile(pretrained_file))
+
         # load net (supressing stderr output)
         null_fds = os.open(os.devnull, os.O_RDWR)
         out_orig = os.dup(2)
@@ -341,33 +384,34 @@ def _rescale_net(self, img):
         self.net.blobs["data"].reshape(*new_dims)
         self.transformer.inputs["data"] = new_dims
 
-    def _make_noise_input(self, init):
+    def _make_noise_input(self, BETA):
         """
             Creates an initial input (generated) image.
+
+            :param str BETA:
+               Scaling value for noise. -1 is pink 
         """
 
+        BETA = np.int(BETA)
+
         # specify dimensions and create grid in Fourier domain
         dims = tuple(self.net.blobs["data"].data.shape[2:]) + \
                (self.net.blobs["data"].data.shape[1], )
-        grid = np.mgrid[0:dims[0], 0:dims[1]]
-
-        # create frequency representation for pink noise
-        Sf = (grid[0] - (dims[0]-1)/2.0) ** 2 + \
-             (grid[1] - (dims[1]-1)/2.0) ** 2
-        Sf[np.where(Sf == 0)] = 1
-        Sf = np.sqrt(Sf)
-        Sf = np.dstack((Sf**int(init),)*dims[2])
-
-        # apply ifft to create pink noise and normalize
-        ifft_kernel = np.cos(2*np.pi*np.random.randn(*dims)) + \
-                      1j*np.sin(2*np.pi*np.random.randn(*dims))
-        img_noise = np.abs(ifftn(Sf * ifft_kernel))
-        img_noise -= img_noise.min()
-        img_noise /= img_noise.max()
-
-        # preprocess the pink noise image
-        x0 = self.transformer.preprocess("data", img_noise)
 
+        if BETA == 0: # white noise
+            x0 = np.random.randn(*dims)
+        else:
+            ramp_filter = _make_ramp_filter(dims)
+            ramp_filter[ramp_filter == 0] = np.min(ramp_filter[ramp_filter != 0]) - \
+                    np.finfo(ramp_filter.item(0)).eps
+            img_filter = _normalize(np.power(ramp_filter,BETA))
+            color_img = np.zeros(dims)
+            for ch in range(dims[2]):
+                img_ch = np.abs(np.fft.ifft2(img_filter[:,:,ch] * np.random.randn(*dims[:2])))
+                #FIXME: Color channels should reflect natural scene statistics
+                color_img[:,:,ch] = np.copy(_normalize(img_ch))
+            x0 = self.transformer.preprocess("data", color_img)
+
         return x0
 
     def _create_pbar(self, max_iter):
@@ -428,6 +472,10 @@ def transfer_style(self, img_style, img_content, length=512, ratio=1e5,
             img0 = self.transformer.preprocess("data", init)
         elif init == "content":
             img0 = self.transformer.preprocess("data", img_content)
+        elif init == "pink":
+            img0 = self._make_noise_input("-1")
+        elif init == "white":
+            img0 = self._make_noise_input("0")
         elif init == "mixed":
             img0 = 0.95*self.transformer.preprocess("data", img_content) + \
                    0.05*self.transformer.preprocess("data", img_style)
@@ -511,6 +559,9 @@ def main(args):
         out_path = "outputs/{0}-{1}-{2}-{3}-{4}-{5}.jpg".format(*out_path_fmt)
 
     # DONE!
+    if os.path.dirname(out_path):
+        if not os.path.exists(os.path.dirname(out_path)):
+            os.makedirs(os.path.dirname(out_path))
     imsave(out_path, img_as_ubyte(img_out))
     logging.info("Output saved to {0}.".format(out_path))