Skip to content

Commit

Permalink
add pointer
Browse files Browse the repository at this point in the history
  • Loading branch information
czyssrs committed Jan 26, 2019
1 parent ed1f2e0 commit 11f96a9
Show file tree
Hide file tree
Showing 16 changed files with 320 additions and 87 deletions.
Binary file added AttentionUnit.pyc
Binary file not shown.
Binary file added DataLoader.pyc
Binary file not shown.
Binary file added LstmUnit.pyc
Binary file not shown.
37 changes: 21 additions & 16 deletions Main.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@

tf.app.flags.DEFINE_string("mode",'train','train or test')
tf.app.flags.DEFINE_string("load",'0','load directory') # BBBBBESTOFAll
tf.app.flags.DEFINE_string("dir",'processed_data','data set directory')
tf.app.flags.DEFINE_string("dir",'/scratch/home/zhiyu/wiki2bio/processed_data','data set directory')
tf.app.flags.DEFINE_integer("limits", 0,'max data set size')


Expand All @@ -51,14 +51,20 @@

model_dir = sys.argv[1]

gold_path_test = 'processed_data/test/test_split_for_rouge/gold_summary_'
gold_path_valid = 'processed_data/valid/valid_split_for_rouge/gold_summary_'
### path for calculate ROUGE
# gold_path_test = 'processed_data/test/test_split_for_rouge/gold_summary_'
# gold_path_valid = 'processed_data/valid/valid_split_for_rouge/gold_summary_'

###
root_path = "/scratch/home/zhiyu/wiki2bio/"
gold_path_valid = root_path + 'original_data/valid.summary'
gold_path_test = root_path + 'original_data/test.summary'

# test phase
if FLAGS.load != "0":
save_dir = 'results/res/' + FLAGS.load + '/'
save_file_dir = save_dir + 'files/'
pred_dir = 'results/evaluation/' + FLAGS.load + '/'
save_dir = root_path + 'results/res/' + model_dir + '/loads/' + FLAGS.load + '/'
save_file_dir = root_path + 'results/res/' + model_dir + '/' + 'files/'
pred_dir = root_path + 'results/evaluation/' + model_dir + '/' + FLAGS.load + '/'
if not os.path.exists(pred_dir):
os.mkdir(pred_dir)
if not os.path.exists(save_file_dir):
Expand All @@ -67,13 +73,12 @@
pred_beam_path = pred_dir + 'beam_summary_'
# train phase
else:
prefix = str(int(time.time() * 1000))
os.mkdir('results/res/' + model_dir)
os.mkdir('results/evaluation/' + model_dir)
save_dir = 'results/res/' + model_dir + '/' + prefix + '/'
# prefix = str(int(time.time() * 1000))
os.mkdir(root_path + 'results/res/' + model_dir)
os.mkdir(root_path + 'results/evaluation/' + model_dir)
save_dir = root_path + 'results/res/' + model_dir + '/'
save_file_dir = save_dir + 'files/'
pred_dir = 'results/evaluation/' + model_dir + '/' + prefix + '/'
os.mkdir(save_dir)
pred_dir = root_path + 'results/evaluation/' + model_dir + '/'
if not os.path.exists(pred_dir):
os.mkdir(pred_dir)
if not os.path.exists(save_file_dir):
Expand Down Expand Up @@ -101,8 +106,6 @@ def train(sess, dataloader, model):
record_loss += this_loss
k += 1
record_k += 1
ksave_dir = save_model(model, save_dir, k // FLAGS.report)
write_log(evaluate(sess, dataloader, model, ksave_dir, 'test'))
progress_bar(k%FLAGS.report, FLAGS.report)
### czy
if (record_k % FLAGS.report_loss == 0):
Expand All @@ -111,6 +114,7 @@ def train(sess, dataloader, model):
record_loss = 0.0

if (k % FLAGS.report == 0):
print "Round: ", k / FLAGS.report
cost_time = time.time() - start_time
write_log("%d : loss = %.3f, time = %.3f " % (k // FLAGS.report, loss, cost_time))
loss, start_time = 0.0, time.time()
Expand All @@ -136,12 +140,12 @@ def save_model(model, save_dir, cnt):
def evaluate(sess, dataloader, model, ksave_dir, mode='valid'):
if mode == 'valid':
# texts_path = "original_data/valid.summary"
texts_path = "processed_data/valid/valid.box.val"
texts_path = root_path + "processed_data/valid/valid.box.val"
gold_path = gold_path_valid
evalset = dataloader.dev_set
else:
# texts_path = "original_data/test.summary"
texts_path = "processed_data/test/test.box.val"
texts_path = root_path + "processed_data/test/test.box.val"
gold_path = gold_path_test
evalset = dataloader.test_set

Expand Down Expand Up @@ -187,6 +191,7 @@ def evaluate(sess, dataloader, model, ksave_dir, mode='valid'):


### new bleu
print ksave_dir + mode + "_summary_unk.txt"
bleu_unk = bleu_score(gold_path, ksave_dir + mode + "_summary_unk.txt")
nocopy_result = "without copy BLEU: %.4f\n"%bleu_unk
bleu_copy = bleu_score(gold_path, ksave_dir + mode + "_summary_copy.clean.txt")
Expand Down
Binary file added OutputUnit.pyc
Binary file not shown.
Binary file added PythonROUGE.pyc
Binary file not shown.
37 changes: 27 additions & 10 deletions SeqUnit.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def __init__(self, batch_size, hidden_size, emb_size, field_size, pos_size, sour
### original loss with logits
#losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=de_outputs, labels=self.decoder_output)

losses = -tf.reduce_sum(self.decoder_output_one_hot * tf.log(de_outputs), 2)
losses = -tf.reduce_sum(self.decoder_output_one_hot * tf.log(de_outputs + 1e-9), 2)


mask = tf.sign(tf.to_float(self.decoder_output))
Expand Down Expand Up @@ -240,15 +240,14 @@ def loop_fn(t, x_t, s_t, emit_ta, finished):
### pointer generator
#emit_ta = emit_ta.write(t, o_t)

### o_weight = len * batch * 1, already normalized. p_gen = batch * 1
### o_weight = batch * len, already normalized. p_gen = batch * 1
out_dist = p_gen * tf.nn.softmax(o_t) # batch * self.target_vocab
att_dist = tf.squeeze(o_weight) # len * batch
att_dist = (1 - p_gen) * tf.transpose(att_dist, [1,0]) # batch * len
att_dist = (1 - p_gen) * o_weight # batch * len

batch_nums = tf.range(0, limit=batch_size) # shape (batch_size)
batch_nums = tf.expand_dims(batch_nums, 1) # shape (batch_size, 1)
batch_nums = tf.tile(batch_nums, [1, encoder_len]) # shape (batch_size, attn_len)
indices = tf.stack( (batch_nums, self.encoder_input), axis=2) # shape (batch_size, enc_t, 2)
batch_nums = tf.tile(batch_nums, [1, encoder_len]) # shape (batch_size, enc_len)
indices = tf.stack((batch_nums, self.encoder_input), axis=2) # shape (batch_size, enc_len, 2)
shape = [batch_size, self.target_vocab]
attn_dists_projected = tf.scatter_nd(indices, att_dist, shape)

Expand Down Expand Up @@ -282,11 +281,29 @@ def decoder_g(self, initial_state):

def loop_fn(t, x_t, s_t, emit_ta, att_ta, finished):
o_t, s_nt = self.dec_lstm(x_t, s_t, finished)
o_t, w_t = self.att_layer(o_t)
o_t, o_weight, p_gen = self.att_layer(o_t, x_t, s_t)
o_t = self.dec_out(o_t, finished)
emit_ta = emit_ta.write(t, o_t)
att_ta = att_ta.write(t, w_t)
next_token = tf.arg_max(o_t, 1)

### pointer generator
#emit_ta = emit_ta.write(t, o_t)

### o_weight = batch * len, already normalized. p_gen = batch * 1
out_dist = p_gen * tf.nn.softmax(o_t) # batch * self.target_vocab
att_dist = (1 - p_gen) * o_weight # batch * len

batch_nums = tf.range(0, limit=batch_size) # shape (batch_size)
batch_nums = tf.expand_dims(batch_nums, 1) # shape (batch_size, 1)
batch_nums = tf.tile(batch_nums, [1, encoder_len]) # shape (batch_size, enc_len)
indices = tf.stack((batch_nums, self.encoder_input), axis=2) # shape (batch_size, enc_len, 2)
shape = [batch_size, self.target_vocab]
attn_dists_projected = tf.scatter_nd(indices, att_dist, shape) # batch * target_vocab

final_dists = out_dist + attn_dists_projected


emit_ta = emit_ta.write(t, final_dists)
att_ta = att_ta.write(t, tf.transpose(o_weight, [1,0]))
next_token = tf.arg_max(final_dists, 1)
x_nt = tf.nn.embedding_lookup(self.embedding, next_token)
finished = tf.logical_or(finished, tf.equal(next_token, self.stop_token))
finished = tf.logical_or(finished, tf.greater_equal(t, self.max_length))
Expand Down
Binary file added SeqUnit.pyc
Binary file not shown.
20 changes: 15 additions & 5 deletions dualAttentionUnit.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

class dualAttentionWrapper(object):
def __init__(self, emb_size, hidden_size, input_size, field_size, hs, fds, scope_name):
### here input_size == hidden_size
self.hs = tf.transpose(hs, [1,0,2]) # input_len * batch * input_size
self.fds = tf.transpose(fds, [1,0,2])
self.hidden_size = hidden_size
Expand All @@ -33,14 +34,16 @@ def __init__(self, emb_size, hidden_size, input_size, field_size, hs, fds, scope
### add pointer params
### p_gen = sigmod(wh * ht + ws * st + wx * xt + bptr)
self.wh_ptr = tf.get_variable('wh_ptr', [self.hidden_size, 1])
self.ws_ptr = tf.get_variable('ws_ptr', [self.hidden_size, 1])
self.ws_ptr = tf.get_variable('ws_ptr', [2*self.hidden_size, 1])
self.wx_ptr = tf.get_variable('wx_ptr', [self.emb_size, 1])
self.b_ptr = tf.get_variable('b_ptr', [1])

self.params.update({'Wh': self.Wh, 'Ws': self.Ws, 'Wo': self.Wo,
'bh': self.bh, 'bs': self.bs, 'bo': self.bo,
'Wf': self.Wf, 'Wr': self.Wr,
'bf': self.bf, 'br': self.br})
'bf': self.bf, 'br': self.br,
'wh_ptr': self.wh_ptr, 'ws_ptr': self.ws_ptr,
'wx_ptr': self.wx_ptr, 'b_ptr': self.b_ptr})

hs2d = tf.reshape(self.hs, [-1, input_size])
phi_hs2d = tf.tanh(tf.nn.xw_plus_b(hs2d, self.Wh, self.bh))
Expand All @@ -52,12 +55,12 @@ def __init__(self, emb_size, hidden_size, input_size, field_size, hs, fds, scope
def __call__(self, x, in_t, s_t, coverage = None, finished = None):
gamma_h = tf.tanh(tf.nn.xw_plus_b(x, self.Ws, self.bs)) # batch * hidden_size
alpha_h = tf.tanh(tf.nn.xw_plus_b(x, self.Wr, self.br))
fd_weights = tf.reduce_sum(self.phi_fds * alpha_h, reduction_indices=2, keep_dims=True)
fd_weights = tf.reduce_sum(self.phi_fds * alpha_h, reduction_indices=2, keep_dims=True) # len * batch * 1
fd_weights = tf.exp(fd_weights - tf.reduce_max(fd_weights, reduction_indices=0, keep_dims=True))
fd_weights = tf.divide(fd_weights, (1e-6 + tf.reduce_sum(fd_weights, reduction_indices=0, keep_dims=True))) # len * batch * 1


weights = tf.reduce_sum(self.phi_hs * gamma_h, reduction_indices=2, keep_dims=True) # input_len * batch
weights = tf.reduce_sum(self.phi_hs * gamma_h, reduction_indices=2, keep_dims=True) # input_len * batch * 1
weights = tf.exp(weights - tf.reduce_max(weights, reduction_indices=0, keep_dims=True))
weights = tf.divide(weights, (1e-6 + tf.reduce_sum(weights, reduction_indices=0, keep_dims=True)))
weights = tf.divide(weights * fd_weights, (1e-6 + tf.reduce_sum(weights * fd_weights, reduction_indices=0, keep_dims=True))) # len * batch * 1
Expand All @@ -67,11 +70,18 @@ def __call__(self, x, in_t, s_t, coverage = None, finished = None):

#### poniter generator
### p_gen = sigmod(wh * ht + ws * st + wx * xt + bptr)
p_gen = tf.matmul(context, self.wh_ptr) + tf.matmul(s_t, self.ws_ptr) + tf.matmul(x_t, self.self.wx_ptr) + self.b_ptr
h_prev, c_prev = s_t
s_t = tf.concat([h_prev, c_prev], 1)
p_gen = tf.matmul(context, self.wh_ptr) + tf.matmul(s_t, self.ws_ptr) + tf.matmul(in_t, self.wx_ptr) + self.b_ptr
p_gen = tf.sigmoid(p_gen) # batch * 1
weights = tf.squeeze(weights) # len * batch
weights = tf.transpose(weights, [1,0]) # batch * len

if finished is not None:
out = tf.where(finished, tf.zeros_like(out), out)
p_gen = tf.where(finished, tf.zeros_like(p_gen), p_gen)
weights = tf.where(finished, tf.zeros_like(weights), weights)

return out, weights, p_gen

def save(self, path):
Expand Down
Binary file added dualAttentionUnit.pyc
Binary file not shown.
Binary file added fgateLstmUnit.pyc
Binary file not shown.
177 changes: 177 additions & 0 deletions multi-bleu.perl
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.

# $Id$
use warnings;
use strict;

my $lowercase = 0;
if ($ARGV[0] eq "-lc") {
$lowercase = 1;
shift;
}

my $stem = $ARGV[0];
if (!defined $stem) {
print STDERR "usage: multi-bleu.pl [-lc] reference < hypothesis\n";
print STDERR "Reads the references from reference or reference0, reference1, ...\n";
exit(1);
}

$stem .= ".ref" if !-e $stem && !-e $stem."0" && -e $stem.".ref0";

my @REF;
my $ref=0;
while(-e "$stem$ref") {
&add_to_ref("$stem$ref",\@REF);
$ref++;
}
&add_to_ref($stem,\@REF) if -e $stem;
die("ERROR: could not find reference file $stem") unless scalar @REF;

# add additional references explicitly specified on the command line
shift;
foreach my $stem (@ARGV) {
&add_to_ref($stem,\@REF) if -e $stem;
}



sub add_to_ref {
my ($file,$REF) = @_;
my $s=0;
if ($file =~ /.gz$/) {
open(REF,"gzip -dc $file|") or die "Can't read $file";
} else {
open(REF,$file) or die "Can't read $file";
}
while(<REF>) {
chop;
push @{$$REF[$s++]}, $_;
}
close(REF);
}

my(@CORRECT,@TOTAL,$length_translation,$length_reference);
my $s=0;
while(<STDIN>) {
chop;
$_ = lc if $lowercase;
my @WORD = split;
my %REF_NGRAM = ();
my $length_translation_this_sentence = scalar(@WORD);
my ($closest_diff,$closest_length) = (9999,9999);
foreach my $reference (@{$REF[$s]}) {
# print "$s $_ <=> $reference\n";
$reference = lc($reference) if $lowercase;
my @WORD = split(' ',$reference);
my $length = scalar(@WORD);
my $diff = abs($length_translation_this_sentence-$length);
if ($diff < $closest_diff) {
$closest_diff = $diff;
$closest_length = $length;
# print STDERR "$s: closest diff ".abs($length_translation_this_sentence-$length)." = abs($length_translation_this_sentence-$length), setting len: $closest_length\n";
} elsif ($diff == $closest_diff) {
$closest_length = $length if $length < $closest_length;
# from two references with the same closeness to me
# take the *shorter* into account, not the "first" one.
}
for(my $n=1;$n<=4;$n++) {
my %REF_NGRAM_N = ();
for(my $start=0;$start<=$#WORD-($n-1);$start++) {
my $ngram = "$n";
for(my $w=0;$w<$n;$w++) {
$ngram .= " ".$WORD[$start+$w];
}
$REF_NGRAM_N{$ngram}++;
}
foreach my $ngram (keys %REF_NGRAM_N) {
if (!defined($REF_NGRAM{$ngram}) ||
$REF_NGRAM{$ngram} < $REF_NGRAM_N{$ngram}) {
$REF_NGRAM{$ngram} = $REF_NGRAM_N{$ngram};
# print "$i: REF_NGRAM{$ngram} = $REF_NGRAM{$ngram}<BR>\n";
}
}
}
}
$length_translation += $length_translation_this_sentence;
$length_reference += $closest_length;
for(my $n=1;$n<=4;$n++) {
my %T_NGRAM = ();
for(my $start=0;$start<=$#WORD-($n-1);$start++) {
my $ngram = "$n";
for(my $w=0;$w<$n;$w++) {
$ngram .= " ".$WORD[$start+$w];
}
$T_NGRAM{$ngram}++;
}
foreach my $ngram (keys %T_NGRAM) {
$ngram =~ /^(\d+) /;
my $n = $1;
# my $corr = 0;
# print "$i e $ngram $T_NGRAM{$ngram}<BR>\n";
$TOTAL[$n] += $T_NGRAM{$ngram};
if (defined($REF_NGRAM{$ngram})) {
if ($REF_NGRAM{$ngram} >= $T_NGRAM{$ngram}) {
$CORRECT[$n] += $T_NGRAM{$ngram};
# $corr = $T_NGRAM{$ngram};
# print "$i e correct1 $T_NGRAM{$ngram}<BR>\n";
}
else {
$CORRECT[$n] += $REF_NGRAM{$ngram};
# $corr = $REF_NGRAM{$ngram};
# print "$i e correct2 $REF_NGRAM{$ngram}<BR>\n";
}
}
# $REF_NGRAM{$ngram} = 0 if !defined $REF_NGRAM{$ngram};
# print STDERR "$ngram: {$s, $REF_NGRAM{$ngram}, $T_NGRAM{$ngram}, $corr}\n"
}
}
$s++;
}
my $brevity_penalty = 1;
my $bleu = 0;

my @bleu=();

for(my $n=1;$n<=4;$n++) {
if (defined ($TOTAL[$n])){
$bleu[$n]=($TOTAL[$n])?$CORRECT[$n]/$TOTAL[$n]:0;
# print STDERR "CORRECT[$n]:$CORRECT[$n] TOTAL[$n]:$TOTAL[$n]\n";
}else{
$bleu[$n]=0;
}
}

if ($length_reference==0){
printf "BLEU = 0, 0/0/0/0 (BP=0, ratio=0, hyp_len=0, ref_len=0)\n";
exit(1);
}

if ($length_translation<$length_reference) {
$brevity_penalty = exp(1-$length_reference/$length_translation);
}
$bleu = $brevity_penalty * exp((my_log( $bleu[1] ) +
my_log( $bleu[2] ) +
my_log( $bleu[3] ) +
my_log( $bleu[4] ) ) / 4) ;
printf "BLEU = %.4f, %.4f/%.4f/%.4f/%.4f (BP=%.3f, ratio=%.3f, hyp_len=%d, ref_len=%d)\n",
100*$bleu,
100*$bleu[1],
100*$bleu[2],
100*$bleu[3],
100*$bleu[4],
$brevity_penalty,
$length_translation / $length_reference,
$length_translation,
$length_reference;


print STDERR "It is in-advisable to publish scores from multi-bleu.perl. The scores depend on your tokenizer, which is unlikely to be reproducible from your paper or consistent across research groups. Instead you should detokenize then use mteval-v14.pl, which has a standard tokenization. Scores from multi-bleu.perl can still be used for internal purposes when you have a consistent tokenizer.\n";

sub my_log {
return -9999999999 unless $_[0];
return log($_[0]);
}
Loading

0 comments on commit 11f96a9

Please sign in to comment.