Skip to content

Commit

Permalink
minor refactor to improve readability, major refactor of model still …
Browse files Browse the repository at this point in the history
…pending
  • Loading branch information
heavani committed Apr 11, 2019
1 parent 459353d commit 4c3c29d
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 459 deletions.
3 changes: 1 addition & 2 deletions PythonROUGE.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,7 @@ def PythonROUGE(guess_summ_list,ref_summ_list,ngram_order=2):
xml_file.write('</EVAL>\n')
xml_file.write('</ROUGE-EVAL>\n')
xml_file.close()



# this is the file where the output of ROUGE will be stored
ROUGE_output_path = prefix + 'ROUGE_result.txt'

Expand Down
58 changes: 27 additions & 31 deletions SeqUnit.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from dualAttentionUnit import dualAttentionWrapper
from LstmUnit import LstmUnit
from fgateLstmUnit import fgateLstmUnit
from OutputUnit import OutputUnit
from model import *


Expand Down Expand Up @@ -86,15 +85,6 @@ def __init__(self, batch_size, hidden_size, emb_size, field_size, pos_size, sour

context_outputs = self.define_decoder_arch()

# get start token #FIXME why
logits0 = context_outputs['logits'][:, -1, :]
dist0 = tf.nn.softmax(logits0) # start token
x0 = tf.cast(tf.argmax(dist0, 1), tf.int32)

# FIXME what
past0 = context_outputs['presents']
hidden0 = context_outputs['hidden'][:, -1, :]

# get GPT embeddings
self.lookup_all_embeddings()

Expand All @@ -113,11 +103,19 @@ def __init__(self, batch_size, hidden_size, emb_size, field_size, pos_size, sour
self.units.update({'attention': self.att_layer})

# loss functions
# calculate those locations where field values are present
self.copy_gate_mask = tf.cast(
tf.greater(self.decoder_pos_input, tf.zeros_like(self.decoder_pos_input)), tf.float32)
self.copy_gate_mask = tf.concat([self.copy_gate_mask, tf.zeros([tf.shape(self.encoder_input)[0], 1], tf.float32)], 1)

# decoder for training
# get start values to start gpt generation
logits0 = context_outputs['logits'][:, -1, :]
dist0 = tf.nn.softmax(logits0) # start token
x0 = tf.cast(tf.argmax(dist0, 1), tf.int32)
past0 = context_outputs['presents']
hidden0 = context_outputs['hidden'][:, -1, :]

de_outputs, _, self.de_conv_loss, self.copy_gate_loss = self.decoder_t(self.decoder_input, self.decoder_len, x0, past0, hidden0)

# decoder for testing
Expand Down Expand Up @@ -163,30 +161,30 @@ def __init__(self, batch_size, hidden_size, emb_size, field_size, pos_size, sour
self.grads, _ = tf.clip_by_global_norm(tf.gradients(self.mean_loss, train_params, colocate_gradients_with_ops=True), self.grad_clip)

# accumulate gradient
self.opt = tf.train.AdamOptimizer(learning_rate=learning_rate)
self.acc_gradients = list(map(lambda param: tf.get_variable(param.name.split(":")[0],
param.get_shape(), param.dtype,
tf.constant_initializer(0.0), trainable=False),
train_params))
self.opt = tf.train.AdamOptimizer(learning_rate=learning_rate)
self.acc_gradients = list(map(lambda param: tf.get_variable(param.name.split(":")[0],
param.get_shape(), param.dtype,
tf.constant_initializer(0.0), trainable=False),
train_params))

# initialize losses?
self._loss = tf.get_variable("acc_loss", (), tf.float32, tf.constant_initializer(0.0), trainable=False)
self._cov_loss = tf.get_variable("acc_cov_loss", (), tf.float32, tf.constant_initializer(0.0), trainable=False)
self._gate_loss = tf.get_variable("acc_gate_loss", (), tf.float32, tf.constant_initializer(0.0), trainable=False)
# initialize losses?
self._loss = tf.get_variable("acc_loss", (), tf.float32, tf.constant_initializer(0.0), trainable=False)
self._cov_loss = tf.get_variable("acc_cov_loss", (), tf.float32, tf.constant_initializer(0.0), trainable=False)
self._gate_loss = tf.get_variable("acc_gate_loss", (), tf.float32, tf.constant_initializer(0.0), trainable=False)

self.accumulate_gradients()
self.accumulate_gradients()

# train update
self.update = self.opt.apply_gradients(
zip(list(map(lambda v: v.value(), self.acc_gradients)), train_params), global_step=self.global_step)
# train update
self.update = self.opt.apply_gradients(
zip(list(map(lambda v: v.value(), self.acc_gradients)), train_params), global_step=self.global_step)

# collect all values to reset after updating with accumulated gradient
self.reset = list(map(lambda param: param.initializer, self.acc_gradients))
self.reset.append(self._loss.initializer)
self.reset.append(self._cov_loss.initializer)
self.reset.append(self._gate_loss.initializer)
# collect all values to reset after updating with accumulated gradient
self.reset = list(map(lambda param: param.initializer, self.acc_gradients))
self.reset.append(self._loss.initializer)
self.reset.append(self._cov_loss.initializer)
self.reset.append(self._gate_loss.initializer)

self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)
self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)

def accumulate_gradients(self):
# We abuse the gradient descent optimizer for accumulating gradients and loss (summing)
Expand Down Expand Up @@ -447,7 +445,6 @@ def loop_fn(t, x_t, past, hidden, emit_ta, emit_gate, coverage_att_sum, covloss,
emit_gate = emit_gate.write(t, tf.multiply(p_gen, copy_mask))
emit_ta = emit_ta.write(t, final_dists)

# FIXME what
this_covloss = tf.reduce_sum(tf.minimum(coverage_att_sum, o_weight))
covloss += this_covloss
coverage_att_sum += o_weight
Expand Down Expand Up @@ -558,7 +555,6 @@ def loop_fn(t, x_t, past, hidden, field_pos_emb, emit_ta, att_ta, coverage_att_s
emit_ta = emit_ta.write(t, final_dists)
att_ta = att_ta.write(t, tf.transpose(o_weight, [1,0]))

# FIXME what
x_nt = tf.cast(tf.argmax(final_dists, 1), tf.int32)

# field pos emb next round
Expand Down
Loading

0 comments on commit 4c3c29d

Please sign in to comment.