-
Notifications
You must be signed in to change notification settings - Fork 64
/
Copy pathpredict.py
55 lines (49 loc) · 2.56 KB
/
predict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import tensorflow as tf
from data_helpers import loadDataset, getBatches, sentence2enco
from model import Seq2SeqModel
import sys
import numpy as np
tf.app.flags.DEFINE_integer('rnn_size', 1024, 'Number of hidden units in each layer')
tf.app.flags.DEFINE_integer('num_layers', 2, 'Number of layers in each encoder and decoder')
tf.app.flags.DEFINE_integer('embedding_size', 1024, 'Embedding dimensions of encoder and decoder inputs')
tf.app.flags.DEFINE_float('learning_rate', 0.0001, 'Learning rate')
tf.app.flags.DEFINE_integer('batch_size', 128, 'Batch size')
tf.app.flags.DEFINE_integer('numEpochs', 30, 'Maximum # of training epochs')
tf.app.flags.DEFINE_integer('steps_per_checkpoint', 100, 'Save model checkpoint every this iteration')
tf.app.flags.DEFINE_string('model_dir', 'model/', 'Path to save model checkpoints')
tf.app.flags.DEFINE_string('model_name', 'chatbot.ckpt', 'File name used for model checkpoints')
FLAGS = tf.app.flags.FLAGS
data_path = 'E:\PycharmProjects\seq2seq_chatbot\seq2seq_chatbot_new\data\dataset-cornell-length10-filter1-vocabSize40000.pkl'
word2id, id2word, trainingSamples = loadDataset(data_path)
def predict_ids_to_seq(predict_ids, id2word, beam_szie):
'''
将beam_search返回的结果转化为字符串
:param predict_ids: 列表,长度为batch_size,每个元素都是decode_len*beam_size的数组
:param id2word: vocab字典
:return:
'''
for single_predict in predict_ids:
for i in range(beam_szie):
predict_list = np.ndarray.tolist(single_predict[:, :, i])
predict_seq = [id2word[idx] for idx in predict_list[0]]
print(" ".join(predict_seq))
with tf.Session() as sess:
model = Seq2SeqModel(FLAGS.rnn_size, FLAGS.num_layers, FLAGS.embedding_size, FLAGS.learning_rate, word2id,
mode='decode', use_attention=True, beam_search=True, beam_size=5, max_gradient_norm=5.0)
ckpt = tf.train.get_checkpoint_state(FLAGS.model_dir)
if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
print('Reloading model parameters..')
model.saver.restore(sess, ckpt.model_checkpoint_path)
else:
raise ValueError('No such file:[{}]'.format(FLAGS.model_dir))
sys.stdout.write("> ")
sys.stdout.flush()
sentence = sys.stdin.readline()
while sentence:
batch = sentence2enco(sentence, word2id)
predicted_ids = model.infer(sess, batch)
# print(predicted_ids)
predict_ids_to_seq(predicted_ids, id2word, 5)
print("> ", "")
sys.stdout.flush()
sentence = sys.stdin.readline()