init

as-ideas · Jan 25, 2019 · ed1f2e0 · ed1f2e0
1 parent bcbb6ba
commit ed1f2e0
Show file tree

Hide file tree

Showing 27 changed files with 24,247 additions and 0 deletions.
diff --git a/AttentionUnit.py b/AttentionUnit.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# @Time    : 17-4-27 下午8:35
+# @Author  : Tianyu Liu
+
+import tensorflow as tf
+import pickle
+
+
+class AttentionWrapper(object):
+    def __init__(self, hidden_size, input_size, hs, scope_name):
+        self.hs = tf.transpose(hs, [1,0,2])
+        self.hidden_size = hidden_size
+        self.input_size = input_size
+        self.scope_name = scope_name
+        self.params = {}
+
+        with tf.variable_scope(scope_name):
+            self.Wh = tf.get_variable('Wh', [input_size, hidden_size])
+            self.bh = tf.get_variable('bh', [hidden_size])
+            self.Ws = tf.get_variable('Ws', [input_size, hidden_size])
+            self.bs = tf.get_variable('bs', [hidden_size])
+            self.Wo = tf.get_variable('Wo', [2*input_size, hidden_size])
+            self.bo = tf.get_variable('bo', [hidden_size])
+        self.params.update({'Wh': self.Wh, 'Ws': self.Ws, 'Wo': self.Wo,
+                            'bh': self.bh, 'bs': self.bs, 'bo': self.bo})
+
+        hs2d = tf.reshape(self.hs, [-1, input_size])
+        phi_hs2d = tf.tanh(tf.nn.xw_plus_b(hs2d, self.Wh, self.bh))
+        self.phi_hs = tf.reshape(phi_hs2d, tf.shape(self.hs))
+
+    def __call__(self, x, finished = None):
+        gamma_h = tf.tanh(tf.nn.xw_plus_b(x, self.Ws, self.bs))
+        weights = tf.reduce_sum(self.phi_hs * gamma_h, reduction_indices=2, keep_dims=True)
+        weight = weights
+        weights = tf.exp(weights - tf.reduce_max(weights, reduction_indices=0, keep_dims=True))
+        weights = tf.divide(weights, (1e-6 + tf.reduce_sum(weights, reduction_indices=0, keep_dims=True)))
+        context = tf.reduce_sum(self.hs * weights, reduction_indices=0)
+        # print wrt.get_shape().as_list()
+        out = tf.tanh(tf.nn.xw_plus_b(tf.concat([context, x], -1), self.Wo, self.bo))
+
+        if finished is not None:
+            out = tf.where(finished, tf.zeros_like(out), out)
+        return out, weights
+
+    def save(self, path):
+        param_values = {}
+        for param in self.params:
+            param_values[param] = self.params[param].eval()
+        with open(path, 'wb') as f:
+            pickle.dump(param_values, f, True)
+
+    def load(self, path):
+        param_values = pickle.load(open(path, 'rb'))
+        for param in param_values:
+            self.params[param].load(param_values[param])
diff --git a/DataLoader.py b/DataLoader.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# @Time    : 17-4-27 下午8:43
+# @Author  : Tianyu Liu
+
+import tensorflow as tf
+import time
+import numpy as np
+
+
+class DataLoader(object):
+    def __init__(self, data_dir, limits):
+        self.train_data_path = [data_dir + '/train/train.summary.id', data_dir + '/train/train.box.val.id',
+                                data_dir + '/train/train.box.lab.id', data_dir + '/train/train.box.pos',
+                                data_dir + '/train/train.box.rpos']
+        self.test_data_path = [data_dir + '/test/test.summary.id', data_dir + '/test/test.box.val.id',
+                               data_dir + '/test/test.box.lab.id', data_dir + '/test/test.box.pos',
+                               data_dir + '/test/test.box.rpos']
+        self.dev_data_path = [data_dir + '/valid/valid.summary.id', data_dir + '/valid/valid.box.val.id',
+                              data_dir + '/valid/valid.box.lab.id', data_dir + '/valid/valid.box.pos',
+                              data_dir + '/valid/valid.box.rpos']
+        self.limits = limits
+        self.man_text_len = 100
+        start_time = time.time()
+
+        print('Reading datasets ...')
+        self.train_set = self.load_data(self.train_data_path)
+        self.test_set = self.load_data(self.test_data_path)
+        # self.small_test_set = self.load_data(self.small_test_data_path)
+        self.dev_set = self.load_data(self.dev_data_path)
+        print ('Reading datasets comsumes %.3f seconds' % (time.time() - start_time))
+
+    def load_data(self, path):
+        summary_path, text_path, field_path, pos_path, rpos_path = path
+        summaries = open(summary_path, 'r').read().strip().split('\n')
+        texts = open(text_path, 'r').read().strip().split('\n')
+        fields = open(field_path, 'r').read().strip().split('\n')
+        poses = open(pos_path, 'r').read().strip().split('\n')
+        rposes = open(rpos_path, 'r').read().strip().split('\n')
+        if self.limits > 0:
+            summaries = summaries[:self.limits]
+            texts = texts[:self.limits]
+            fields = fields[:self.limits]
+            poses = poses[:self.limits]
+            rposes = rposes[:self.limits]
+        print summaries[0].strip().split(' ')
+        summaries = [list(map(int, summary.strip().split(' '))) for summary in summaries]
+        texts = [list(map(int, text.strip().split(' '))) for text in texts]
+        fields = [list(map(int, field.strip().split(' '))) for field in fields]
+        poses = [list(map(int, pos.strip().split(' '))) for pos in poses]
+        rposes = [list(map(int, rpos.strip().split(' '))) for rpos in rposes]
+        return summaries, texts, fields, poses, rposes
+
+    def batch_iter(self, data, batch_size, shuffle):
+        summaries, texts, fields, poses, rposes = data
+        data_size = len(summaries)
+        num_batches = int(data_size / batch_size) if data_size % batch_size == 0 \
+            else int(data_size / batch_size) + 1
+
+        if shuffle:
+            shuffle_indices = np.random.permutation(np.arange(data_size))
+            summaries = np.array(summaries)[shuffle_indices]
+            texts = np.array(texts)[shuffle_indices]
+            fields = np.array(fields)[shuffle_indices]
+            poses = np.array(poses)[shuffle_indices]
+            rposes = np.array(rposes)[shuffle_indices]
+
+        for batch_num in range(num_batches):
+            start_index = batch_num * batch_size
+            end_index = min((batch_num + 1) * batch_size, data_size)
+            max_summary_len = max([len(sample) for sample in summaries[start_index:end_index]])
+            max_text_len = max([len(sample) for sample in texts[start_index:end_index]])
+            batch_data = {'enc_in':[], 'enc_fd':[], 'enc_pos':[], 'enc_rpos':[], 'enc_len':[],
+                          'dec_in':[], 'dec_len':[], 'dec_out':[]}
+
+            for summary, text, field, pos, rpos in zip(summaries[start_index:end_index], texts[start_index:end_index],
+                                            fields[start_index:end_index], poses[start_index:end_index],
+                                            rposes[start_index:end_index]):
+                summary_len = len(summary)
+                text_len = len(text)
+                pos_len = len(pos)
+                rpos_len = len(rpos)
+                assert text_len == len(field)
+                assert pos_len == len(field)
+                assert rpos_len == pos_len
+                gold = summary + [2] + [0] * (max_summary_len - summary_len)
+                summary = summary + [0] * (max_summary_len - summary_len)
+                text = text + [0] * (max_text_len - text_len)
+                field = field + [0] * (max_text_len - text_len)
+                pos = pos + [0] * (max_text_len - text_len)
+                rpos = rpos + [0] * (max_text_len - text_len)
+
+                if max_text_len > self.man_text_len:
+                    text = text[:self.man_text_len]
+                    field = field[:self.man_text_len]
+                    pos = pos[:self.man_text_len]
+                    rpos = rpos[:self.man_text_len]
+                    text_len = min(text_len, self.man_text_len)
+
+                batch_data['enc_in'].append(text)
+                batch_data['enc_len'].append(text_len)
+                batch_data['enc_fd'].append(field)
+                batch_data['enc_pos'].append(pos)
+                batch_data['enc_rpos'].append(rpos)
+                batch_data['dec_in'].append(summary)
+                batch_data['dec_len'].append(summary_len)
+                batch_data['dec_out'].append(gold)
+
+            yield batch_data
diff --git a/LstmUnit.py b/LstmUnit.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# @Time    : 17-4-27 下午8:34
+# @Author  : Tianyu Liu
+
+import tensorflow as tf
+import pickle
+
+
+class LstmUnit(object):
+    def __init__(self, hidden_size, input_size, scope_name):
+        self.hidden_size = hidden_size
+        self.input_size = input_size
+        self.scope_name = scope_name
+        self.params = {}
+
+        with tf.variable_scope(scope_name):
+            self.W = tf.get_variable('W', [self.input_size+self.hidden_size, 4*self.hidden_size])
+            self.b = tf.get_variable('b', [4*self.hidden_size], initializer=tf.zeros_initializer([4*self.hidden_size]), dtype=tf.float32)
+
+        self.params.update({'W':self.W, 'b':self.b})
+
+    def __call__(self, x, s, finished = None):
+        h_prev, c_prev = s
+
+        x = tf.concat([x, h_prev], 1)
+        i, j, f, o = tf.split(tf.nn.xw_plus_b(x, self.W, self.b), 4, 1)
+
+        # Final Memory cell
+        c = tf.sigmoid(f+1.0) * c_prev + tf.sigmoid(i) * tf.tanh(j)
+        h = tf.sigmoid(o) * tf.tanh(c)
+
+        out, state = h, (h, c)
+        if finished is not None:
+            out = tf.where(finished, tf.zeros_like(h), h)
+            state = (tf.where(finished, h_prev, h), tf.where(finished, c_prev, c))
+            # out = tf.multiply(1 - finished, h)
+            # state = (tf.multiply(1 - finished, h) + tf.multiply(finished, h_prev),
+            #          tf.multiply(1 - finished, c) + tf.multiply(finished, c_prev))
+
+        return out, state
+
+    def save(self, path):
+        param_values = {}
+        for param in self.params:
+            param_values[param] = self.params[param].eval()
+        with open(path, 'wb') as f:
+            pickle.dump(param_values, f, True)
+
+    def load(self, path):
+        param_values = pickle.load(open(path, 'rb'))
+        for param in param_values:
+            self.params[param].load(param_values[param])