Skip to content

Commit

Permalink
Added more example pics
Browse files Browse the repository at this point in the history
  • Loading branch information
Div99 committed May 19, 2018
1 parent e1fe6a6 commit 0a7e174
Show file tree
Hide file tree
Showing 7 changed files with 365 additions and 591 deletions.
18 changes: 15 additions & 3 deletions CapGenerator/eval_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def extract_features(filename):
return feature

# generate a description for an image
def generate_desc(model, tokenizer, photo, index_word, max_length, beam_size=10):
def generate_desc(model, tokenizer, photo, index_word, max_length, beam_size=5):

captions = [['startseq', 0.0]]
# seed the generation process
Expand Down Expand Up @@ -91,7 +91,19 @@ def evaluate_model(model, descriptions, photos, tokenizer, index_word, max_lengt
print('BLEU-3: %f' % corpus_bleu(actual, predicted, weights=(0.3, 0.3, 0.3, 0)))
print('BLEU-4: %f' % corpus_bleu(actual, predicted, weights=(0.25, 0.25, 0.25, 0.25)))


def eval_test_set(model, descriptions, photos, tokenizer, index_word, max_length):
actual, predicted = list(), list()
# step over the whole set
for key, desc_list in descriptions.items():
# generate description
yhat = generate_desc(model, tokenizer, photos[key], index_word, max_length)[0]
# store actual and predicted
references = [d.split() for d in desc_list]
actual.append(references)
# Use best caption
predicted.append(yhat[0].split())
predicted = sorted(predicted)
actual = [x for _,x in sorted(zip(actual,predicted))]

if __name__ == '__main__':

Expand All @@ -111,7 +123,7 @@ def evaluate_model(model, descriptions, photos, tokenizer, index_word, max_lengt
if args.model:
filename = args.model
else:
filename = 'models/model-ep005-loss3.454-val_loss3.872.h5'
filename = 'models/model-ep005-loss3.504-val_loss3.893.h5'
model = load_model(filename)

if args.image:
Expand Down
67 changes: 54 additions & 13 deletions CapGenerator/generate_model.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np
import tensorflow as tf
from pickle import load
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
Expand All @@ -14,8 +15,13 @@
from keras.layers import TimeDistributed
from keras.layers import concatenate
from keras.callbacks import ModelCheckpoint
from keras.optimizers import Adam

EMBEDDING_DIM = 128
EMBEDDING_DIM = 256

lstm_layers = 2
dropout_rate = 0.2
learning_rate = 0.001

# convert a dictionary of clean descriptions to a list of descriptions
def to_lines(descriptions):
Expand All @@ -41,7 +47,7 @@ def max_length(descriptions):
def create_sequences(tokenizer, max_length, desc_list, photo):
vocab_size = len(tokenizer.word_index) + 1

X1, X2, y = list(), list(), list()
X1, X2, y = [], [], []
# walk through each description for the image
for desc in desc_list:
# encode the sequence
Expand All @@ -61,14 +67,51 @@ def create_sequences(tokenizer, max_length, desc_list, photo):
return np.array(X1), np.array(X2), np.array(y)

# data generator, intended to be used in a call to model.fit_generator()
def data_generator(descriptions, photos, tokenizer, max_length):
def data_generator(descriptions, photos, tokenizer, max_length, n_step = 1):
# loop for ever over images
while 1:
for key, desc_list in descriptions.items():
# retrieve the photo feature
photo = photos[key][0]
in_img, in_seq, out_word = create_sequences(tokenizer, max_length, desc_list, photo)
yield [[in_img, in_seq], out_word]
# loop over photo identifiers in the dataset
keys = list(descriptions.keys())
for i in range(0, len(keys), n_step):
Ximages, XSeq, y = list(), list(),list()
for j in range(i, min(len(keys), i+n_step)):
image_id = keys[j]
# retrieve the photo feature
photo = photos[image_id][0]
desc_list = descriptions[image_id]
in_img, in_seq, out_word = create_sequences(tokenizer, max_length, desc_list, photo)
for k in range(len(in_img)):
Ximages.append(in_img[k])
XSeq.append(in_seq[k])
y.append(out_word[k])
yield [[np.array(Ximages), np.array(XSeq)], np.array(y)]

def categorical_crossentropy_from_logits(y_true, y_pred):
y_true = y_true[:, :-1, :] # Discard the last timestep
y_pred = y_pred[:, :-1, :] # Discard the last timestep
loss = tf.nn.softmax_cross_entropy_with_logits(labels=y_true,
logits=y_pred)
return loss

def categorical_accuracy_with_variable_timestep(y_true, y_pred):
y_true = y_true[:, :-1, :] # Discard the last timestep
y_pred = y_pred[:, :-1, :] # Discard the last timestep

# Flatten the timestep dimension
shape = tf.shape(y_true)
y_true = tf.reshape(y_true, [-1, shape[-1]])
y_pred = tf.reshape(y_pred, [-1, shape[-1]])

# Discard rows that are all zeros as they represent padding words.
is_zero_y_true = tf.equal(y_true, 0)
is_zero_row_y_true = tf.reduce_all(is_zero_y_true, axis=-1)
y_true = tf.boolean_mask(y_true, ~is_zero_row_y_true)
y_pred = tf.boolean_mask(y_pred, ~is_zero_row_y_true)

accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(y_true, axis=1),
tf.argmax(y_pred, axis=1)),
dtype=tf.float32))
return accuracy

# define the captioning model
def define_model(vocab_size, max_length):
Expand All @@ -80,14 +123,12 @@ def define_model(vocab_size, max_length):

# embedding
inputs2 = Input(shape=(max_length,))
emb2 = Embedding(vocab_size, 256, mask_zero=True)(inputs2)
emb3 = LSTM(256, return_sequences=True)(emb2)
emb4 = TimeDistributed(Dense(EMBEDDING_DIM, activation='relu'))(emb3)
emb2 = Embedding(vocab_size, EMBEDDING_DIM, mask_zero=True)(inputs2)

# merge inputs
merged = concatenate([fe3, emb4])
merged = concatenate([fe3, emb2])
# language model (decoder)
lm2 = LSTM(1000)(merged)
lm2 = LSTM(500, return_sequences=False)(merged)
#lm3 = Dense(500, activation='relu')(lm2)
outputs = Dense(vocab_size, activation='softmax')(lm2)

Expand Down
Loading

0 comments on commit 0a7e174

Please sign in to comment.