Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fixed output bug, refactored slightly #18

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
*.dat
*.log
*.caffemodel
*.pyc
outputs/*
test/*
.DS_Store
.*.swp
127 changes: 127 additions & 0 deletions models/vgg13/VGG_ILSVRC_13_layers_deploy.prototxt
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
name: "VGG_ILSVRC_13_layers"
input: "data"
force_backward: true
input_shape {
dim: 10
dim: 3
dim: 224
dim: 224
}
layers {
bottom: "data"
top: "conv1"
name: "conv1"
type: CONVOLUTION
convolution_param {
num_output: 96
kernel_size: 7
stride: 2
}
}
layers {
bottom: "conv1"
top: "conv1"
name: "relu1"
type: RELU
}
layers {
bottom: "conv1"
top: "norm1"
name: "norm1"
type: LRN
lrn_param {
local_size: 5
alpha: 0.0005
beta: 0.75
k: 2
}
}
layers {
bottom: "norm1"
top: "pool1"
name: "pool1"
type: POOLING
pooling_param {
pool: MAX
kernel_size: 3
stride: 3
}
}
layers {
bottom: "pool1"
top: "conv2"
name: "conv2"
type: CONVOLUTION
convolution_param {
num_output: 256
kernel_size: 5
}
}
layers {
bottom: "conv2"
top: "conv2"
name: "relu2"
type: RELU
}
layers {
bottom: "conv2"
top: "pool2"
name: "pool2"
type: POOLING
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layers {
bottom: "pool2"
top: "conv3"
name: "conv3"
type: CONVOLUTION
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
}
}
layers {
bottom: "conv3"
top: "conv3"
name: "relu3"
type: RELU
}
layers {
bottom: "conv3"
top: "conv4"
name: "conv4"
type: CONVOLUTION
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
}
}
layers {
bottom: "conv4"
top: "conv4"
name: "relu4"
type: RELU
}
layers {
bottom: "conv4"
top: "conv5"
name: "conv5"
type: CONVOLUTION
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
}
}
layers {
bottom: "conv5"
top: "conv5"
name: "relu5"
type: RELU
}
89 changes: 70 additions & 19 deletions style.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,14 @@
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

TODOs:
- Verify model_WEIGHTS is working as expected
* Preliminary exploration suggests that changing layer weights
does not influence converged output
- Fix BFGS for normalized VGG model
* Changing ratio parameter has no effect on output
=======
"""

# system imports
Expand Down Expand Up @@ -96,6 +104,38 @@
parser.add_argument("-v", "--verbose", action="store_true", required=False, help="print minimization outputs")
parser.add_argument("-o", "--output", default=None, required=False, help="output path")

def _normalize(mat):
if (np.max(mat) - np.min(mat)) == 0:
return mat
else:
return (mat - np.min(mat)) / (np.max(mat) - np.min(mat))

def _make_ramp_filter(dims, low_pass=False):
"""
Creates a ramp function for manipulating images in Fourier space.
NOTE: Make sure to fftshift output if it is being used for image whitening.

:param tuple dims:
Output dimensions. Expects (height, width, channels).
:param bool low_pass:
If true, output function will be multiplied by a Gaussian low-pass filter
"""

# Ramp function
nyq = np.array((np.int32(np.floor(dims[0]/2)), np.int32(np.floor(dims[1]/2))))
grid = np.mgrid[-nyq[0]:nyq[0]+(dims[0]%(nyq[0]*2)), -nyq[1]:nyq[1]+(dims[1]%(nyq[1]*2))]
ramp_filter = np.sqrt(np.square(grid[0]) + np.square(grid[1]))
ramp_filter = _normalize(ramp_filter)

if low_pass:
# Gaussian LPF
sig = 0.78125*nyq # Cutoff frequencies for gaussian
LPF = np.exp(-0.5*(np.square(ramp_filter/(sig[0]/2.0)) + np.square(ramp_filter/(sig[1]/2.0))))
out_filt = np.dstack((ramp_filter * LPF,)*dims[2])
else:
out_filt = np.dstack((ramp_filter,)*dims[2])

return out_filt

def _compute_style_grad(F, G, G_style, layer):
"""
Expand Down Expand Up @@ -300,6 +340,9 @@ def load_model(self, model_file, pretrained_file, mean_file):
Path to mean file.
"""

assert(os.path.isfile(model_file))
assert(os.path.isfile(pretrained_file))

# load net (supressing stderr output)
null_fds = os.open(os.devnull, os.O_RDWR)
out_orig = os.dup(2)
Expand Down Expand Up @@ -341,33 +384,34 @@ def _rescale_net(self, img):
self.net.blobs["data"].reshape(*new_dims)
self.transformer.inputs["data"] = new_dims

def _make_noise_input(self, init):
def _make_noise_input(self, BETA):
"""
Creates an initial input (generated) image.

:param str BETA:
Scaling value for noise. -1 is pink
"""

BETA = np.int(BETA)

# specify dimensions and create grid in Fourier domain
dims = tuple(self.net.blobs["data"].data.shape[2:]) + \
(self.net.blobs["data"].data.shape[1], )
grid = np.mgrid[0:dims[0], 0:dims[1]]

# create frequency representation for pink noise
Sf = (grid[0] - (dims[0]-1)/2.0) ** 2 + \
(grid[1] - (dims[1]-1)/2.0) ** 2
Sf[np.where(Sf == 0)] = 1
Sf = np.sqrt(Sf)
Sf = np.dstack((Sf**int(init),)*dims[2])

# apply ifft to create pink noise and normalize
ifft_kernel = np.cos(2*np.pi*np.random.randn(*dims)) + \
1j*np.sin(2*np.pi*np.random.randn(*dims))
img_noise = np.abs(ifftn(Sf * ifft_kernel))
img_noise -= img_noise.min()
img_noise /= img_noise.max()

# preprocess the pink noise image
x0 = self.transformer.preprocess("data", img_noise)

if BETA == 0: # white noise
x0 = np.random.randn(*dims)
else:
ramp_filter = _make_ramp_filter(dims)
ramp_filter[ramp_filter == 0] = np.min(ramp_filter[ramp_filter != 0]) - \
np.finfo(ramp_filter.item(0)).eps
img_filter = _normalize(np.power(ramp_filter,BETA))
color_img = np.zeros(dims)
for ch in range(dims[2]):
img_ch = np.abs(np.fft.ifft2(img_filter[:,:,ch] * np.random.randn(*dims[:2])))
#FIXME: Color channels should reflect natural scene statistics
color_img[:,:,ch] = np.copy(_normalize(img_ch))
x0 = self.transformer.preprocess("data", color_img)

return x0

def _create_pbar(self, max_iter):
Expand Down Expand Up @@ -428,6 +472,10 @@ def transfer_style(self, img_style, img_content, length=512, ratio=1e5,
img0 = self.transformer.preprocess("data", init)
elif init == "content":
img0 = self.transformer.preprocess("data", img_content)
elif init == "pink":
img0 = self._make_noise_input("-1")
elif init == "white":
img0 = self._make_noise_input("0")
elif init == "mixed":
img0 = 0.95*self.transformer.preprocess("data", img_content) + \
0.05*self.transformer.preprocess("data", img_style)
Expand Down Expand Up @@ -511,6 +559,9 @@ def main(args):
out_path = "outputs/{0}-{1}-{2}-{3}-{4}-{5}.jpg".format(*out_path_fmt)

# DONE!
if os.path.dirname(out_path):
if not os.path.exists(os.path.dirname(out_path)):
os.makedirs(os.path.dirname(out_path))
imsave(out_path, img_as_ubyte(img_out))
logging.info("Output saved to {0}.".format(out_path))

Expand Down