diff --git a/CMakeLists.txt b/CMakeLists.txt index 748d88a351f..1403ae001ee 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,8 +6,16 @@ include(GNUInstallDirs) include(Utils) include(third_party/get_third_party) +# Should update cmake to a more recent version which supports FindPython3. +find_package(PythonInterp) +if(NOT PYTHON_EXECUTABLE OR PYTHON_VERSION_MAJOR LESS 3) + message(WARNING "Needs python3 to auto-generate most CMake files, but not found. " + "Will try `python3` directly...") + set(PYTHON_EXECUTABLE "python3") +endif() + message(STATUS "Running gen_cmake_skeleton.py") -execute_process(COMMAND python +execute_process(COMMAND ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/cmake/gen_cmake_skeleton.py" "${CMAKE_CURRENT_SOURCE_DIR}/src" "--quiet" @@ -28,11 +36,21 @@ if(BUILD_SHARED_LIBS) endif() endif() -set(MATHLIB "OpenBLAS" CACHE STRING "OpenBLAS|MKL|Accelerate") +if(APPLE) + # Use built-in BLAS on MacOS by default. + set(MATHLIB "Accelerate" CACHE STRING "OpenBLAS|MKL|Accelerate") +else() + set(MATHLIB "OpenBLAS" CACHE STRING "OpenBLAS|MKL|Accelerate") +endif() option(KALDI_BUILD_EXE "If disabled, will make add_kaldi_executable a no-op" ON) option(KALDI_BUILD_TEST "If disabled, will make add_kaldi_test_executable a no-op" ON) option(KALDI_USE_PATCH_NUMBER "Use MAJOR.MINOR.PATCH format, otherwise MAJOR.MINOR" OFF) +if (KALDI_BUILD_TEST) + include(CTest) + enable_testing() +endif() + link_libraries(${CMAKE_DL_LIBS}) find_package(Threads) @@ -53,6 +71,19 @@ elseif(MATHLIB STREQUAL "MKL") include_directories($ENV{MKLROOT}/include) # TODO: maybe not use env, idk, find_package doesnt handle includes... link_libraries(${BLAS_LIBRARIES} ${LAPACK_LIBRARIES}) elseif(MATHLIB STREQUAL "Accelerate") + execute_process(COMMAND sw_vers -productVersion + OUTPUT_VARIABLE MACOS_VERSION) + if(MACOS_VERSION VERSION_LESS "10.12" AND MACOS_VERSION VERSION_GREATER_EQUAL "10.11") + message(WARNING + "**BAD WARNING**: You are using OS X El Capitan. Some versions of this OS" + " have a bug in the BLAS implementation that affects Kaldi." + " After compiling, cd to matrix/ and type 'make test'. The" + " test will fail if the problem exists in your version." + " Eventually this issue will be fixed by system updates from" + " Apple. Unexplained crashes with reports of NaNs will" + " be caused by this bug, but some recipes will (sometimes) work." + ) + endif() set(BLA_VENDOR "Apple") find_package(BLAS REQUIRED) find_package(LAPACK REQUIRED) @@ -160,6 +191,11 @@ add_subdirectory(src/kws) add_subdirectory(src/itf) +if(TENSORFLOW_DIR) + add_subdirectory(src/tfrnnlm) + add_subdirectory(src/tfrnnlmbin) +endif() + # add all cuda libraries if(CUDA_FOUND) add_subdirectory(src/cudafeat) diff --git a/cmake/Utils.cmake b/cmake/Utils.cmake index 88dbefdacc9..c7f45827a99 100644 --- a/cmake/Utils.cmake +++ b/cmake/Utils.cmake @@ -36,6 +36,10 @@ macro(add_kaldi_test_executable) cmake_parse_arguments(kaldi_test_exe "" "NAME" "SOURCES;DEPENDS" ${ARGN}) add_executable(${kaldi_test_exe_NAME} ${kaldi_test_exe_SOURCES}) target_link_libraries(${kaldi_test_exe_NAME} PRIVATE ${kaldi_test_exe_DEPENDS}) + add_test( + NAME ${kaldi_test_exe_NAME} + COMMAND ${kaldi_test_exe_NAME} + WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}) # list(APPEND KALDI_TEST_EXECUTABLES ${kaldi_test_exe_NAME}) install(TARGETS ${kaldi_test_exe_NAME} RUNTIME DESTINATION testbin) diff --git a/cmake/gen_cmake_skeleton.py b/cmake/gen_cmake_skeleton.py index fa506943662..8c393630cfb 100644 --- a/cmake/gen_cmake_skeleton.py +++ b/cmake/gen_cmake_skeleton.py @@ -180,11 +180,13 @@ def gen_code(self): if len(self.cuda_source_list) > 0: self.source_list.append("${CUDA_OBJS}") - ret.append("cuda_include_directories(${CMAKE_CURRENT_SOURCE_DIR}/..)") - ret.append("cuda_compile(CUDA_OBJS") + ret.append("if(CUDA_FOUND)") + ret.append(" cuda_include_directories(${CMAKE_CURRENT_SOURCE_DIR}/..)") + ret.append(" cuda_compile(CUDA_OBJS") for f in self.cuda_source_list: - ret.append(" " + f) - ret.append(")\n") + ret.append(" " + f) + ret.append(" )") + ret.append("endif()\n") ret.append("add_library(" + self.target_name) for f in self.source_list: @@ -278,6 +280,8 @@ def write_file(self): subdirs = get_subdirectories(".") for d in subdirs: + if d.startswith('tfrnnlm'): + continue cmakelists = CMakeListsFile(d) if is_bin_dir(d): for f in get_files(d): diff --git a/egs/wsj/s5/steps/tfrnnlm/lstm.py b/egs/wsj/s5/steps/tfrnnlm/lstm.py index 433dc87b4c6..a66e7d69a35 100644 --- a/egs/wsj/s5/steps/tfrnnlm/lstm.py +++ b/egs/wsj/s5/steps/tfrnnlm/lstm.py @@ -25,32 +25,26 @@ from __future__ import division from __future__ import print_function -import sys - -import inspect -import time - -import numpy as np +import absl +import absl.flags as flags import tensorflow as tf import reader -flags = tf.flags -logging = tf.logging - flags.DEFINE_integer("hidden_size", 200, "hidden dim of RNN") flags.DEFINE_string("data_path", None, "Where the training/test data is stored.") flags.DEFINE_string("vocab_path", None, "Where the wordlist file is stored.") -flags.DEFINE_string("save_path", None, +flags.DEFINE_string("save_path", "export", "Model output directory.") flags.DEFINE_bool("use_fp16", False, "Train using 16-bit floats instead of 32bit floats") FLAGS = flags.FLAGS + class Config(object): init_scale = 0.1 learning_rate = 1.0 @@ -64,265 +58,183 @@ class Config(object): lr_decay = 0.5 batch_size = 64 + def data_type(): return tf.float16 if FLAGS.use_fp16 else tf.float32 -class RnnlmInput(object): - """The input data.""" +class RNNLMModel(tf.Module): + """The RNN model itself.""" - def __init__(self, config, data, name=None): - self.batch_size = batch_size = config.batch_size - self.num_steps = num_steps = config.num_steps - self.epoch_size = ((len(data) // batch_size) - 1) // num_steps - self.input_data, self.targets = reader.rnnlm_producer( - data, batch_size, num_steps, name=name) + def __init__(self, config, logits_bias_initializer=None): + super().__init__() + self._config = config - -class RnnlmModel(object): - """The RNNLM model.""" - - def __init__(self, is_training, config, input_): - self._input = input_ - - batch_size = input_.batch_size - num_steps = input_.num_steps size = config.hidden_size vocab_size = config.vocab_size + dt = data_type() def lstm_cell(): - # With the latest TensorFlow source code (as of Mar 27, 2017), - # the BasicLSTMCell will need a reuse parameter which is unfortunately not - # defined in TensorFlow 1.0. To maintain backwards compatibility, we add - # an argument check here: - if 'reuse' in inspect.getargspec( - tf.contrib.rnn.BasicLSTMCell.__init__).args: - return tf.contrib.rnn.BasicLSTMCell( - size, forget_bias=0.0, state_is_tuple=True, - reuse=tf.get_variable_scope().reuse) - else: - return tf.contrib.rnn.BasicLSTMCell( - size, forget_bias=0.0, state_is_tuple=True) - attn_cell = lstm_cell - if is_training and config.keep_prob < 1: - def attn_cell(): - return tf.contrib.rnn.DropoutWrapper( - lstm_cell(), output_keep_prob=config.keep_prob) - self.cell = tf.contrib.rnn.MultiRNNCell( - [attn_cell() for _ in range(config.num_layers)], state_is_tuple=True) - - self._initial_state = self.cell.zero_state(batch_size, data_type()) - self._initial_state_single = self.cell.zero_state(1, data_type()) - - self.initial = tf.reshape(tf.stack(axis=0, values=self._initial_state_single), [config.num_layers, 2, 1, size], name="test_initial_state") - - - # first implement the less efficient version - test_word_in = tf.placeholder(tf.int32, [1, 1], name="test_word_in") - - state_placeholder = tf.placeholder(tf.float32, [config.num_layers, 2, 1, size], name="test_state_in") - # unpacking the input state context - l = tf.unstack(state_placeholder, axis=0) - test_input_state = tuple( - [tf.contrib.rnn.LSTMStateTuple(l[idx][0],l[idx][1]) - for idx in range(config.num_layers)] - ) - - with tf.device("/cpu:0"): - self.embedding = tf.get_variable( - "embedding", [vocab_size, size], dtype=data_type()) - - inputs = tf.nn.embedding_lookup(self.embedding, input_.input_data) - test_inputs = tf.nn.embedding_lookup(self.embedding, test_word_in) - - # test time - with tf.variable_scope("RNN"): - (test_cell_output, test_output_state) = self.cell(test_inputs[:, 0, :], test_input_state) - - test_state_out = tf.reshape(tf.stack(axis=0, values=test_output_state), [config.num_layers, 2, 1, size], name="test_state_out") - test_cell_out = tf.reshape(test_cell_output, [1, size], name="test_cell_out") - # above is the first part of the graph for test - # test-word-in - # > ---- > test-state-out - # test-state-in > test-cell-out - - - # below is the 2nd part of the graph for test - # test-word-out - # > prob(word | test-word-out) - # test-cell-in - - test_word_out = tf.placeholder(tf.int32, [1, 1], name="test_word_out") - cellout_placeholder = tf.placeholder(tf.float32, [1, size], name="test_cell_in") - - softmax_w = tf.get_variable( - "softmax_w", [size, vocab_size], dtype=data_type()) - softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=data_type()) - - test_logits = tf.matmul(cellout_placeholder, softmax_w) + softmax_b - test_softmaxed = tf.nn.log_softmax(test_logits) - - p_word = test_softmaxed[0, test_word_out[0,0]] - test_out = tf.identity(p_word, name="test_out") - - if is_training and config.keep_prob < 1: - inputs = tf.nn.dropout(inputs, config.keep_prob) - - # Simplified version of models/tutorials/rnn/rnn.py's rnn(). - # This builds an unrolled LSTM for tutorial purposes only. - # In general, use the rnn() or state_saving_rnn() from rnn.py. - # - # The alternative version of the code below is: - # - # inputs = tf.unstack(inputs, num=num_steps, axis=1) - # outputs, state = tf.contrib.rnn.static_rnn( - # cell, inputs, initial_state=self._initial_state) - outputs = [] - state = self._initial_state - with tf.variable_scope("RNN"): - for time_step in range(num_steps): - if time_step > -1: tf.get_variable_scope().reuse_variables() - (cell_output, state) = self.cell(inputs[:, time_step, :], state) - outputs.append(cell_output) - - output = tf.reshape(tf.stack(axis=1, values=outputs), [-1, size]) - logits = tf.matmul(output, softmax_w) + softmax_b - loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example( - [logits], - [tf.reshape(input_.targets, [-1])], - [tf.ones([batch_size * num_steps], dtype=data_type())]) - self._cost = cost = tf.reduce_sum(loss) / batch_size - self._final_state = state - - if not is_training: - return - - self._lr = tf.Variable(0.0, trainable=False) - tvars = tf.trainable_variables() - grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), - config.max_grad_norm) - optimizer = tf.train.GradientDescentOptimizer(self._lr) - self._train_op = optimizer.apply_gradients( - list(zip(grads, tvars)), - global_step=tf.contrib.framework.get_or_create_global_step()) - - self._new_lr = tf.placeholder( - tf.float32, shape=[], name="new_learning_rate") - self._lr_update = tf.assign(self._lr, self._new_lr) - - def assign_lr(self, session, lr_value): - session.run(self._lr_update, feed_dict={self._new_lr: lr_value}) - - @property - def input(self): - return self._input - - @property - def initial_state(self): - return self._initial_state - - @property - def cost(self): - return self._cost - - @property - def final_state(self): - return self._final_state - - @property - def lr(self): - return self._lr - - @property - def train_op(self): - return self._train_op - -def run_epoch(session, model, eval_op=None, verbose=False): - """Runs the model on the given data.""" - start_time = time.time() - costs = 0.0 - iters = 0 - state = session.run(model.initial_state) - - fetches = { - "cost": model.cost, - "final_state": model.final_state, - } - if eval_op is not None: - fetches["eval_op"] = eval_op - - for step in range(model.input.epoch_size): - feed_dict = {} - for i, (c, h) in enumerate(model.initial_state): - feed_dict[c] = state[i].c - feed_dict[h] = state[i].h - - vals = session.run(fetches, feed_dict) - cost = vals["cost"] - state = vals["final_state"] - - costs += cost - iters += model.input.num_steps - - if verbose and step % (model.input.epoch_size // 10) == 10: - print("%.3f perplexity: %.3f speed: %.0f wps" % - (step * 1.0 / model.input.epoch_size, np.exp(costs / iters), - iters * model.input.batch_size / (time.time() - start_time))) - - return np.exp(costs / iters) + return tf.keras.layers.LSTMCell(size, dtype=dt, unit_forget_bias=False) + + def add_dropout(cell): + if config.keep_prob < 1: + cell = tf.nn.RNNCellDropoutWrapper(cell=cell, output_keep_prob=config.keep_prob) + return cell + + self.embedding = tf.keras.layers.Embedding(vocab_size, size, dtype=dt) + self.cells = [lstm_cell() for _ in range(config.num_layers)] + self.rnn = tf.keras.layers.RNN(self.cells, return_sequences=True) + + if logits_bias_initializer is None: + logits_bias_initializer = 'zeros' + self.fc = tf.keras.layers.Dense(vocab_size, bias_initializer=logits_bias_initializer) + + # only used in training + self.training_cells = [add_dropout(cell) for cell in self.cells] + self.training_rnn = tf.keras.layers.RNN(self.training_cells, return_sequences=True) + + def get_logits(self, word_ids, is_training=False): + rnn = self.training_rnn if is_training else self.rnn + inputs = self.embedding(word_ids) + if is_training and self._config.keep_prob < 1: + inputs = tf.nn.dropout(inputs, 1 - self._config.keep_prob) + rnn_out = rnn(inputs) + logits = self.fc(rnn_out) + return logits + + def get_loss(self, word_ids, labels, is_training=False): + logits = self.get_logits(word_ids, is_training) + loss_obj = tf.losses.SparseCategoricalCrossentropy(from_logits=True) + return loss_obj(labels, logits) + + def get_score(self, logits): + """Take logits as input, output a score.""" + return tf.nn.log_softmax(logits) + + @tf.function + def get_initial_state(self): + """Exported function which emits zeroed RNN context vector.""" + # This seems a bug in TensorFlow, but passing tf.int32 makes the state tensor also int32. + fake_input = tf.constant(0, dtype=tf.float32, shape=[1, 1]) + initial_state = tf.stack(self.rnn.get_initial_state(fake_input)) + return {"initial_state": initial_state} + + @tf.function + def single_step(self, context, word_id): + """Exported function which perform one step of the RNN model.""" + rnn = tf.keras.layers.RNN(self.cells, return_state=True) + context = tf.unstack(context) + context = [tf.unstack(c) for c in context] + + inputs = self.embedding(word_id) + rnn_out_and_states = rnn(inputs, initial_state=context) + + rnn_out = rnn_out_and_states[0] + rnn_states = tf.stack(rnn_out_and_states[1:]) + + logits = self.fc(rnn_out) + output = self.get_score(logits) + log_prob = output[0, word_id[0, 0]] + return {"log_prob": log_prob, "rnn_states": rnn_states, "rnn_out": rnn_out} + + +class RNNLMModelTrainer(tf.Module): + """This class contains training code.""" + + def __init__(self, model: RNNLMModel, config): + super().__init__() + self.model = model + self.learning_rate = tf.Variable(1e-3, dtype=tf.float32, trainable=False) + self.optimizer = tf.optimizers.SGD(learning_rate=self.learning_rate) + self.max_grad_norm = config.max_grad_norm + + self.eval_mean_loss = tf.metrics.Mean() + + def train_one_epoch(self, data_producer, learning_rate, verbose=True): + print("start epoch with learning rate {}".format(learning_rate)) + self.learning_rate.assign(learning_rate) + + for i, (inputs, labels) in enumerate(data_producer.iterate()): + loss = self._train_step(inputs, labels) + if verbose and i % (data_producer.epoch_size // 10) == 1: + print("{}/{}: loss={}".format(i, data_producer.epoch_size, loss)) + + @tf.function + def evaluate(self, data_producer): + self.eval_mean_loss.reset_states() + for i, (inputs, labels) in enumerate(data_producer.iterate()): + loss = self.model.get_loss(inputs, labels) + self.eval_mean_loss.update_state(loss) + + return self.eval_mean_loss.result() + + @tf.function + def _train_step(self, inputs, labels): + with tf.GradientTape() as tape: + loss = self.model.get_loss(inputs, labels, is_training=True) + + tvars = self.model.trainable_variables + grads = tape.gradient(loss, tvars) + clipped_grads, _ = tf.clip_by_global_norm(grads, self.max_grad_norm) + self.optimizer.apply_gradients(zip(clipped_grads, tvars)) + return loss def get_config(): return Config() + def main(_): - if not FLAGS.data_path: - raise ValueError("Must set --data_path to RNNLM data directory") + # Turn this on to try the model code with this source file itself! + __TESTING = False - raw_data = reader.rnnlm_raw_data(FLAGS.data_path, FLAGS.vocab_path) - train_data, valid_data, _, word_map = raw_data + if __TESTING: + (train_data, valid_data), word_map = reader.rnnlm_gen_data(__file__, reader.__file__) + else: + if not FLAGS.data_path: + raise ValueError("Must set --data_path to RNNLM data directory") + + raw_data = reader.rnnlm_raw_data(FLAGS.data_path, FLAGS.vocab_path) + train_data, valid_data, _, word_map = raw_data config = get_config() config.hidden_size = FLAGS.hidden_size config.vocab_size = len(word_map) - eval_config = get_config() - eval_config.batch_size = 1 - eval_config.num_steps = 1 - - with tf.Graph().as_default(): - initializer = tf.random_uniform_initializer(-config.init_scale, - config.init_scale) - - with tf.name_scope("Train"): - train_input = RnnlmInput(config=config, data=train_data, name="TrainInput") - with tf.variable_scope("Model", reuse=None, initializer=initializer): - m = RnnlmModel(is_training=True, config=config, input_=train_input) - tf.summary.scalar("Training Loss", m.cost) - tf.summary.scalar("Learning Rate", m.lr) - - with tf.name_scope("Valid"): - valid_input = RnnlmInput(config=config, data=valid_data, name="ValidInput") - with tf.variable_scope("Model", reuse=True, initializer=initializer): - mvalid = RnnlmModel(is_training=False, config=config, input_=valid_input) - tf.summary.scalar("Validation Loss", mvalid.cost) - - sv = tf.train.Supervisor(logdir=FLAGS.save_path) - with sv.managed_session() as session: - for i in range(config.max_max_epoch): - lr_decay = config.lr_decay ** max(i + 1 - config.max_epoch, 0.0) - m.assign_lr(session, config.learning_rate * lr_decay) - - print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) - train_perplexity = run_epoch(session, m, eval_op=m.train_op, - verbose=True) - - print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) - valid_perplexity = run_epoch(session, mvalid) - print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity)) - - if FLAGS.save_path: - print("Saving model to %s." % FLAGS.save_path) - sv.saver.save(session, FLAGS.save_path) + + if __TESTING: + # use a much smaller scale on our tiny test data + config.num_steps = 8 + config.batch_size = 4 + + model = RNNLMModel(config) + train_producer = reader.RNNLMProducer(train_data, config.batch_size, config.num_steps) + trainer = RNNLMModelTrainer(model, config) + + valid_producer = reader.RNNLMProducer(valid_data, config.batch_size, config.num_steps) + + # Save variables to disk if you want to prevent crash... + # Data producer can also be saved to preverse feeding progress. + checkpoint = tf.train.Checkpoint(trainer=trainer, data_feeder=train_producer) + manager = tf.train.CheckpointManager(checkpoint, "checkpoints/", 5) + + for i in range(config.max_max_epoch): + lr_decay = config.lr_decay ** max(i + 1 - config.max_epoch, 0.0) + lr = config.learning_rate * lr_decay + trainer.train_one_epoch(train_producer, lr) + manager.save() + + eval_loss = trainer.evaluate(valid_producer) + print("validating: loss={}".format(eval_loss)) + + # Export + print("Saving model to %s." % FLAGS.save_path) + spec = [tf.TensorSpec(shape=[config.num_layers, 2, 1, config.hidden_size], dtype=data_type(), name="context"), + tf.TensorSpec(shape=[1, 1], dtype=tf.int32, name="word_id")] + cfunc = model.single_step.get_concrete_function(*spec) + cfunc2 = model.get_initial_state.get_concrete_function() + tf.saved_model.save(model, FLAGS.save_path, signatures={"single_step": cfunc, "get_initial_state": cfunc2}) + if __name__ == "__main__": - tf.app.run() + absl.app.run(main) diff --git a/egs/wsj/s5/steps/tfrnnlm/lstm_fast.py b/egs/wsj/s5/steps/tfrnnlm/lstm_fast.py index ff6c7263804..e299f449636 100644 --- a/egs/wsj/s5/steps/tfrnnlm/lstm_fast.py +++ b/egs/wsj/s5/steps/tfrnnlm/lstm_fast.py @@ -25,32 +25,28 @@ from __future__ import division from __future__ import print_function -import sys - -import inspect -import time - -import numpy as np +import absl +import absl.flags as flags import tensorflow as tf +from tensorflow.python.keras.losses import LossFunctionWrapper import reader +from lstm import RNNLMModel, RNNLMModelTrainer -flags = tf.flags -logging = tf.logging - -flags.DEFINE_integer("hidden_size", 200, "hidden dim of RNN") - -flags.DEFINE_string("data_path", None, - "Where the training/test data is stored.") -flags.DEFINE_string("vocab_path", None, - "Where the wordlist file is stored.") -flags.DEFINE_string("save_path", None, - "Model output directory.") -flags.DEFINE_bool("use_fp16", False, - "Train using 16-bit floats instead of 32bit floats") +# flags.DEFINE_integer("hidden_size", 200, "hidden dim of RNN") +# +# flags.DEFINE_string("data_path", None, +# "Where the training/test data is stored.") +# flags.DEFINE_string("vocab_path", None, +# "Where the wordlist file is stored.") +# flags.DEFINE_string("save_path", "export", +# "Model output directory.") +# flags.DEFINE_bool("use_fp16", False, +# "Train using 16-bit floats instead of 32bit floats") FLAGS = flags.FLAGS + class Config(object): """Small config.""" init_scale = 0.1 @@ -65,280 +61,102 @@ class Config(object): lr_decay = 0.8 batch_size = 64 + def data_type(): return tf.float16 if FLAGS.use_fp16 else tf.float32 + # this new "softmax" function we show can train a "self-normalized" RNNLM where # the sum of the output is automatically (close to) 1.0 # which saves a lot of computation for lattice-rescoring def new_softmax(labels, logits): - target = tf.reshape(labels, [-1]) - f_logits = tf.exp(logits) - row_sums = tf.reduce_sum(f_logits, 1) # this is the negative part of the objf - - t2 = tf.expand_dims(target, 1) - range = tf.expand_dims(tf.range(tf.shape(target)[0]), 1) + flatten_labels = tf.reshape(labels, [-1]) + n_samples = tf.shape(flatten_labels)[0] + flatten_logits = tf.reshape(logits, shape=[n_samples, -1]) + f_logits = tf.exp(flatten_logits) + row_sums = tf.reduce_sum(f_logits, -1) # this is the negative part of the objf + + t2 = tf.expand_dims(flatten_labels, 1) + range = tf.expand_dims(tf.range(n_samples), 1) ind = tf.concat([range, t2], 1) - res = tf.gather_nd(logits, ind) + res = tf.gather_nd(flatten_logits, ind) return -res + row_sums - 1 -class RnnlmInput(object): - """The input data.""" - - def __init__(self, config, data, name=None): - self.batch_size = batch_size = config.batch_size - self.num_steps = num_steps = config.num_steps - self.epoch_size = ((len(data) // batch_size) - 1) // num_steps - self.input_data, self.targets = reader.rnnlm_producer( - data, batch_size, num_steps, name=name) - - -class RnnlmModel(object): - """The RNNLM model.""" - - def __init__(self, is_training, config, input_): - self._input = input_ - - batch_size = input_.batch_size - num_steps = input_.num_steps - size = config.hidden_size - vocab_size = config.vocab_size - - def lstm_cell(): - # With the latest TensorFlow source code (as of Mar 27, 2017), - # the BasicLSTMCell will need a reuse parameter which is unfortunately not - # defined in TensorFlow 1.0. To maintain backwards compatibility, we add - # an argument check here: - if 'reuse' in inspect.getargspec( - tf.contrib.rnn.BasicLSTMCell.__init__).args: - return tf.contrib.rnn.BasicLSTMCell( - size, forget_bias=0.0, state_is_tuple=True, - reuse=tf.get_variable_scope().reuse) - else: - return tf.contrib.rnn.BasicLSTMCell( - size, forget_bias=0.0, state_is_tuple=True) - attn_cell = lstm_cell - if is_training and config.keep_prob < 1: - def attn_cell(): - return tf.contrib.rnn.DropoutWrapper( - lstm_cell(), output_keep_prob=config.keep_prob) - self.cell = tf.contrib.rnn.MultiRNNCell( - [attn_cell() for _ in range(config.num_layers)], state_is_tuple=True) - - self._initial_state = self.cell.zero_state(batch_size, data_type()) - self._initial_state_single = self.cell.zero_state(1, data_type()) - - self.initial = tf.reshape(tf.stack(axis=0, values=self._initial_state_single), [config.num_layers, 2, 1, size], name="test_initial_state") - - # first implement the less efficient version - test_word_in = tf.placeholder(tf.int32, [1, 1], name="test_word_in") - - state_placeholder = tf.placeholder(tf.float32, [config.num_layers, 2, 1, size], name="test_state_in") - # unpacking the input state context - l = tf.unstack(state_placeholder, axis=0) - test_input_state = tuple( - [tf.contrib.rnn.LSTMStateTuple(l[idx][0],l[idx][1]) - for idx in range(config.num_layers)] - ) - - with tf.device("/cpu:0"): - self.embedding = tf.get_variable( - "embedding", [vocab_size, size], dtype=data_type()) - - inputs = tf.nn.embedding_lookup(self.embedding, input_.input_data) - test_inputs = tf.nn.embedding_lookup(self.embedding, test_word_in) - - # test time - with tf.variable_scope("RNN"): - (test_cell_output, test_output_state) = self.cell(test_inputs[:, 0, :], test_input_state) - - test_state_out = tf.reshape(tf.stack(axis=0, values=test_output_state), [config.num_layers, 2, 1, size], name="test_state_out") - test_cell_out = tf.reshape(test_cell_output, [1, size], name="test_cell_out") - # above is the first part of the graph for test - # test-word-in - # > ---- > test-state-out - # test-state-in > test-cell-out - - - # below is the 2nd part of the graph for test - # test-word-out - # > prob(word | test-word-out) - # test-cell-in - - test_word_out = tf.placeholder(tf.int32, [1, 1], name="test_word_out") - cellout_placeholder = tf.placeholder(tf.float32, [1, size], name="test_cell_in") - - softmax_w = tf.get_variable( - "softmax_w", [size, vocab_size], dtype=data_type()) - softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=data_type()) - softmax_b = softmax_b - 9.0 - - test_logits = tf.matmul(cellout_placeholder, tf.transpose(tf.nn.embedding_lookup(tf.transpose(softmax_w), test_word_out[0]))) + softmax_b[test_word_out[0,0]] - - p_word = test_logits[0, 0] - test_out = tf.identity(p_word, name="test_out") - - if is_training and config.keep_prob < 1: - inputs = tf.nn.dropout(inputs, config.keep_prob) - - # Simplified version of models/tutorials/rnn/rnn.py's rnn(). - # This builds an unrolled LSTM for tutorial purposes only. - # In general, use the rnn() or state_saving_rnn() from rnn.py. - # - # The alternative version of the code below is: - # - # inputs = tf.unstack(inputs, num=num_steps, axis=1) - # outputs, state = tf.contrib.rnn.static_rnn( - # cell, inputs, initial_state=self._initial_state) - outputs = [] - state = self._initial_state - with tf.variable_scope("RNN"): - for time_step in range(num_steps): - if time_step > -1: tf.get_variable_scope().reuse_variables() - (cell_output, state) = self.cell(inputs[:, time_step, :], state) - outputs.append(cell_output) - - output = tf.reshape(tf.stack(axis=1, values=outputs), [-1, size]) - logits = tf.matmul(output, softmax_w) + softmax_b - loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example( - [logits], - [tf.reshape(input_.targets, [-1])], - [tf.ones([batch_size * num_steps], dtype=data_type())], - softmax_loss_function=new_softmax) - self._cost = cost = tf.reduce_sum(loss) / batch_size - self._final_state = state - - if not is_training: - return - - self._lr = tf.Variable(0.0, trainable=False) - tvars = tf.trainable_variables() - grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), - config.max_grad_norm) - optimizer = tf.train.GradientDescentOptimizer(self._lr) - self._train_op = optimizer.apply_gradients( - list(zip(grads, tvars)), - global_step=tf.contrib.framework.get_or_create_global_step()) - - self._new_lr = tf.placeholder( - tf.float32, shape=[], name="new_learning_rate") - self._lr_update = tf.assign(self._lr, self._new_lr) - - def assign_lr(self, session, lr_value): - session.run(self._lr_update, feed_dict={self._new_lr: lr_value}) - - @property - def input(self): - return self._input - - @property - def initial_state(self): - return self._initial_state - - @property - def cost(self): - return self._cost - - @property - def final_state(self): - return self._final_state - - @property - def lr(self): - return self._lr - - @property - def train_op(self): - return self._train_op - -def run_epoch(session, model, eval_op=None, verbose=False): - """Runs the model on the given data.""" - start_time = time.time() - costs = 0.0 - iters = 0 - state = session.run(model.initial_state) - - fetches = { - "cost": model.cost, - "final_state": model.final_state, - } - if eval_op is not None: - fetches["eval_op"] = eval_op - - for step in range(model.input.epoch_size): - feed_dict = {} - for i, (c, h) in enumerate(model.initial_state): - feed_dict[c] = state[i].c - feed_dict[h] = state[i].h - - vals = session.run(fetches, feed_dict) - cost = vals["cost"] - state = vals["final_state"] - - - costs += cost - iters += model.input.num_steps - - if verbose and step % (model.input.epoch_size // 10) == 10: - print("%.3f perplexity: %.3f speed: %.0f wps" % - (step * 1.0 / model.input.epoch_size, np.exp(costs / iters), - iters * model.input.batch_size / (time.time() - start_time))) - - return np.exp(costs / iters) + +class MyFastLossFunction(LossFunctionWrapper): + def __init__(self): + super().__init__(new_softmax) + + +class FastRNNLMModel(RNNLMModel): + def __init__(self, config): + super().__init__(config, tf.constant_initializer(-9)) + + def get_loss(self, word_ids, labels, is_training=False): + logits = self.get_logits(word_ids, is_training) + loss_obj = MyFastLossFunction() + return loss_obj(labels, logits) + + def get_score(self, logits): + # In this implementation, logits can be used as dist output + return logits def get_config(): return Config() + def main(_): - if not FLAGS.data_path: - raise ValueError("Must set --data_path to RNNLM data directory") + # Turn this on to try the model code with this source file itself! + __TESTING = False - raw_data = reader.rnnlm_raw_data(FLAGS.data_path, FLAGS.vocab_path) - train_data, valid_data, _, word_map = raw_data + if __TESTING: + (train_data, valid_data), word_map = reader.rnnlm_gen_data(__file__, reader.__file__) + else: + if not FLAGS.data_path: + raise ValueError("Must set --data_path to RNNLM data directory") + + raw_data = reader.rnnlm_raw_data(FLAGS.data_path, FLAGS.vocab_path) + train_data, valid_data, _, word_map = raw_data config = get_config() config.hidden_size = FLAGS.hidden_size config.vocab_size = len(word_map) - eval_config = get_config() - eval_config.batch_size = 1 - eval_config.num_steps = 1 - - with tf.Graph().as_default(): - initializer = tf.random_uniform_initializer(-config.init_scale, - config.init_scale) - - with tf.name_scope("Train"): - train_input = RnnlmInput(config=config, data=train_data, name="TrainInput") - with tf.variable_scope("Model", reuse=None, initializer=initializer): - m = RnnlmModel(is_training=True, config=config, input_=train_input) - tf.summary.scalar("Training Loss", m.cost) - tf.summary.scalar("Learning Rate", m.lr) - - with tf.name_scope("Valid"): - valid_input = RnnlmInput(config=config, data=valid_data, name="ValidInput") - with tf.variable_scope("Model", reuse=True, initializer=initializer): - mvalid = RnnlmModel(is_training=False, config=config, input_=valid_input) - tf.summary.scalar("Validation Loss", mvalid.cost) - - sv = tf.train.Supervisor(logdir=FLAGS.save_path) - with sv.managed_session() as session: - for i in range(config.max_max_epoch): - lr_decay = config.lr_decay ** max(i + 1 - config.max_epoch, 0.0) - m.assign_lr(session, config.learning_rate * lr_decay) - - print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) - train_perplexity = run_epoch(session, m, eval_op=m.train_op, - verbose=True) - - print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) - valid_perplexity = run_epoch(session, mvalid) - print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity)) - - if FLAGS.save_path: - print("Saving model to %s." % FLAGS.save_path) - sv.saver.save(session, FLAGS.save_path) + + if __TESTING: + # use a much smaller scale on our tiny test data + config.num_steps = 8 + config.batch_size = 4 + + model = FastRNNLMModel(config) + train_producer = reader.RNNLMProducer(train_data, config.batch_size, config.num_steps) + trainer = RNNLMModelTrainer(model, config) + + valid_producer = reader.RNNLMProducer(valid_data, config.batch_size, config.num_steps) + + # Save variables to disk if you want to prevent crash... + # Data producer can also be saved to preverse feeding progress. + checkpoint = tf.train.Checkpoint(trainer=trainer, data_feeder=train_producer) + manager = tf.train.CheckpointManager(checkpoint, "checkpoints/", 5) + + for i in range(config.max_max_epoch): + lr_decay = config.lr_decay ** max(i + 1 - config.max_epoch, 0.0) + lr = config.learning_rate * lr_decay + trainer.train_one_epoch(train_producer, lr) + manager.save() + + eval_loss = trainer.evaluate(valid_producer) + print("validating: loss={}".format(eval_loss)) + + # Export + print("Saving model to %s." % FLAGS.save_path) + spec = [tf.TensorSpec(shape=[config.num_layers, 2, 1, config.hidden_size], dtype=data_type(), name="context"), + tf.TensorSpec(shape=[1, 1], dtype=tf.int32, name="word_id")] + cfunc = model.single_step.get_concrete_function(*spec) + cfunc2 = model.get_initial_state.get_concrete_function() + tf.saved_model.save(model, FLAGS.save_path, signatures={"single_step": cfunc, "get_initial_state": cfunc2}) + if __name__ == "__main__": - tf.app.run() + absl.app.run(main) diff --git a/egs/wsj/s5/steps/tfrnnlm/reader.py b/egs/wsj/s5/steps/tfrnnlm/reader.py index 80cdeccbb26..b0d0a7f563d 100644 --- a/egs/wsj/s5/steps/tfrnnlm/reader.py +++ b/egs/wsj/s5/steps/tfrnnlm/reader.py @@ -61,45 +61,61 @@ def rnnlm_raw_data(data_path, vocab_path): return train_data, valid_data, vocabulary, word_to_id -def rnnlm_producer(raw_data, batch_size, num_steps, name=None): - """Iterate on the raw RNNLM data. +def rnnlm_gen_data(*files): + """Generates data and vocab from files. - This chunks up raw_data into batches of examples and returns Tensors that - are drawn from these batches. + This function is used solely for testing. + """ + import collections + import re - Args: - raw_data: one of the raw data outputs from rnnlm_raw_data. - batch_size: int, the batch size. - num_steps: int, the number of unrolls. - name: the name of this operation (optional). + all_words = collections.Counter() + all_word_lists = [] + for f in files: + with open(f, mode="r") as fp: + text = fp.read() - Returns: - A pair of Tensors, each shaped [batch_size, num_steps]. The second element - of the tuple is the same data time-shifted to the right by one. + word_list = re.split("[^A-Za-z]", text) + word_list = list(filter(None, word_list)) + all_words.update(word_list) + all_word_lists.append(word_list) - Raises: - tf.errors.InvalidArgumentError: if batch_size or num_steps are too high. - """ - with tf.name_scope(name, "RNNLMProducer", [raw_data, batch_size, num_steps]): - raw_data = tf.convert_to_tensor(raw_data, name="raw_data", dtype=tf.int32) - - data_len = tf.size(raw_data) - batch_len = data_len // batch_size - data = tf.reshape(raw_data[0 : batch_size * batch_len], - [batch_size, batch_len]) - - epoch_size = (batch_len - 1) // num_steps - assertion = tf.assert_positive( - epoch_size, - message="epoch_size == 0, decrease batch_size or num_steps") - with tf.control_dependencies([assertion]): - epoch_size = tf.identity(epoch_size, name="epoch_size") - - i = tf.train.range_input_producer(epoch_size, shuffle=False).dequeue() - x = tf.strided_slice(data, [0, i * num_steps], - [batch_size, (i + 1) * num_steps]) - x.set_shape([batch_size, num_steps]) - y = tf.strided_slice(data, [0, i * num_steps + 1], - [batch_size, (i + 1) * num_steps + 1]) - y.set_shape([batch_size, num_steps]) - return x, y + word_to_id = {word: i for i, (word, _) in enumerate(all_words.most_common())} + + def convert(word_list): + return [word_to_id[word] for word in word_list] + + all_word_ids = [convert(word_list) for word_list in all_word_lists] + return all_word_ids, word_to_id + + +class RNNLMProducer(tf.Module): + """This is the data feeder.""" + + def __init__(self, raw_data, batch_size, num_steps, name=None): + super().__init__(name) + self.batch_size = batch_size + self.num_steps = num_steps + self.epoch_size = (len(raw_data) - 1) // num_steps // batch_size + + # load data into a variable so that it will be separated from graph + self._raw_data = tf.Variable(raw_data, dtype=tf.int32, trainable=False) + + ds_x = tf.data.Dataset.from_tensor_slices(self._raw_data) + ds_y = ds_x.skip(1) + ds = tf.data.Dataset.zip((ds_x, ds_y)) + # form samples + ds = ds.batch(num_steps, drop_remainder=True) + # form batches + self._ds = ds.batch(batch_size, drop_remainder=True) + + def iterate(self): + return self._ds + + +if __name__ == "__main__": + samples = list(range(100)) + ds = RNNLMProducer(samples, 4, 8) + print(ds.epoch_size) + for data in ds.iterate(): + print(data) diff --git a/src/tfrnnlm/CMakeLists.txt b/src/tfrnnlm/CMakeLists.txt new file mode 100644 index 00000000000..3b0b9aaa10a --- /dev/null +++ b/src/tfrnnlm/CMakeLists.txt @@ -0,0 +1,49 @@ +set(PUBLIC_HEADERS + tensorflow-rnnlm.h +) + +add_library(kaldi-tfrnnlm + tensorflow-rnnlm.cc +) + +if(NOT EXISTS ${TENSORFLOW_DIR}/bazel-bin/tensorflow/libtensorflow_framework.so + OR NOT EXISTS ${TENSORFLOW_DIR}/bazel-bin/tensorflow/libtensorflow_cc.so) + message(FATAL_ERROR "TensorFlow components are not built, please build TensorFlow first.") +endif() + +target_include_directories(kaldi-tfrnnlm PUBLIC + ${TENSORFLOW_DIR}/bazel-tensorflow/external/com_google_protobuf/src + ${TENSORFLOW_DIR}/bazel-genfiles + ${TENSORFLOW_DIR} + ${TENSORFLOW_DIR}/tensorflow/lite/tools/make/downloads/eigen + ${TENSORFLOW_DIR}/tensorflow/lite/tools/make/downloads/absl +) + +target_include_directories(kaldi-tfrnnlm PUBLIC + $ + $ +) + +target_link_libraries(kaldi-tfrnnlm PUBLIC + kaldi-lm + kaldi-util + kaldi-matrix + kaldi-base + -lz + -ldl + -fPIC + -L${TENSORFLOW_DIR}/bazel-bin/tensorflow + -ltensorflow_cc + -ltensorflow_framework +) + + +install(TARGETS kaldi-tfrnnlm + EXPORT kaldi-targets + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} +) + +install(FILES ${PUBLIC_HEADERS} DESTINATION include/kaldi/tfrnnlm) + diff --git a/src/tfrnnlm/tensorflow-rnnlm.cc b/src/tfrnnlm/tensorflow-rnnlm.cc index e4de98abd12..2f9268fa790 100644 --- a/src/tfrnnlm/tensorflow-rnnlm.cc +++ b/src/tfrnnlm/tensorflow-rnnlm.cc @@ -27,6 +27,8 @@ // Tensorflow includes were moved after tfrnnlm/tensorflow-rnnlm.h include to // avoid macro redefinitions. See also the note in tfrnnlm/tensorflow-rnnlm.h. +#include "tensorflow/cc/saved_model/loader.h" +#include "tensorflow/cc/saved_model/tag_constants.h" #include "tensorflow/core/public/session.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/protobuf/meta_graph.pb.h" @@ -64,42 +66,75 @@ void SetUnkPenalties(const string &filename, // Read tensorflow checkpoint files void KaldiTfRnnlmWrapper::ReadTfModel(const std::string &tf_model_path, int32 num_threads) { - string graph_path = tf_model_path + ".meta"; - tensorflow::SessionOptions session_options; + tensorflow::RunOptions run_options; session_options.config.set_intra_op_parallelism_threads(num_threads); session_options.config.set_inter_op_parallelism_threads(num_threads); - Status status = tensorflow::NewSession(session_options, - &session_); + Status status = tensorflow::LoadSavedModel( + session_options, run_options, tf_model_path, + {tensorflow::kSavedModelTagServe}, + &bundle_); if (!status.ok()) { KALDI_ERR << status.ToString(); } - tensorflow::MetaGraphDef graph_def; - status = tensorflow::ReadBinaryProto(tensorflow::Env::Default(), graph_path, - &graph_def); - if (!status.ok()) { - KALDI_ERR << status.ToString(); + // SavedModel maintains a list of "exported function signature" in its metadata. + // We are going to read it and get actual tensor name. + auto&& signature_map = bundle_.meta_graph_def.signature_def(); + auto signature_it = signature_map.find("single_step"); + if (signature_it == signature_map.end()) { + KALDI_ERR << "Cannot find signature `single_step' in SavedModel."; } - // Add the graph to the session - status = session_->Create(graph_def.graph_def()); - if (!status.ok()) { - KALDI_ERR << status.ToString(); + auto&& signature = signature_it->second; + + const std::vector> input_params = { + {"context", context_tensor_name_}, + {"word_id", word_id_tensor_name_}, + }; + + for (auto&& pair : input_params) { + auto&& map = signature.inputs(); + auto param_it = map.find(pair.first); + if (param_it == map.end()) { + KALDI_ERR << "Cannot find input param `" << pair.first << "' in signature, abort."; + } + pair.second = param_it->second.name(); + // printf("%s: %s\n", pair.first, pair.second.c_str()); } - Tensor checkpointPathTensor(tensorflow::DT_STRING, tensorflow::TensorShape()); - checkpointPathTensor.scalar()() = tf_model_path; + const std::vector> output_params = { + {"log_prob", log_prob_tensor_name_}, + {"rnn_out", rnn_out_tensor_name_}, + {"rnn_states", rnn_states_tensor_name_}, + }; + + for (auto&& pair : output_params) { + auto&& map = signature.outputs(); + auto param_it = map.find(pair.first); + if (param_it == map.end()) { + KALDI_ERR << "Cannot find output param `" << pair.first << "' in signature, abort."; + } + pair.second = param_it->second.name(); + // printf("%s: %s\n", pair.first, pair.second.c_str()); + } - status = session_->Run( - {{graph_def.saver_def().filename_tensor_name(), checkpointPathTensor} }, - {}, - {graph_def.saver_def().restore_op_name()}, - nullptr); - if (!status.ok()) { - KALDI_ERR << status.ToString(); + // We have another function which only emit initial RNN state + signature_it = signature_map.find("get_initial_state"); + if (signature_it == signature_map.end()) { + KALDI_ERR << "Cannot find signature `get_initial_state' in SavedModel."; + } + + { + auto&& signature = signature_it->second; + auto&& map = signature.outputs(); + auto param_it = map.find("initial_state"); + if (param_it == map.end()) { + KALDI_ERR << "Cannot find output param `initial_state' in signature, abort."; + } + initial_state_tensor_name_ = param_it->second.name(); } } @@ -177,13 +212,16 @@ KaldiTfRnnlmWrapper::KaldiTfRnnlmWrapper( delete fst_word_symbols; } +KaldiTfRnnlmWrapper::~KaldiTfRnnlmWrapper() { +} + void KaldiTfRnnlmWrapper::AcquireInitialTensors() { Status status; // get the initial context; this is basically the all-0 tensor { std::vector state; - status = session_->Run(std::vector >(), - {"Train/Model/test_initial_state"}, {}, &state); + status = bundle_.session->Run(std::vector >(), + {initial_state_tensor_name_}, {}, &state); if (!status.ok()) { KALDI_ERR << status.ToString(); } @@ -197,11 +235,11 @@ void KaldiTfRnnlmWrapper::AcquireInitialTensors() { bosword.scalar()() = eos_; // eos_ is more like a sentence boundary std::vector > inputs = { - {"Train/Model/test_word_in", bosword}, - {"Train/Model/test_state_in", initial_context_}, + {word_id_tensor_name_, bosword}, + {context_tensor_name_, initial_context_}, }; - status = session_->Run(inputs, {"Train/Model/test_cell_out"}, {}, &state); + status = bundle_.session->Run(inputs, {rnn_out_tensor_name_}, {}, &state); if (!status.ok()) { KALDI_ERR << status.ToString(); } @@ -215,27 +253,23 @@ BaseFloat KaldiTfRnnlmWrapper::GetLogProb(int32 word, const Tensor &cell_in, Tensor *context_out, Tensor *new_cell) { - std::vector > inputs; - Tensor thisword(tensorflow::DT_INT32, {1, 1}); - thisword.scalar()() = word; + std::vector outputs; - if (context_out != NULL) { - inputs = { - {"Train/Model/test_word_in", thisword}, - {"Train/Model/test_word_out", thisword}, - {"Train/Model/test_state_in", context_in}, - {"Train/Model/test_cell_in", cell_in}, - }; + std::vector > inputs = { + {word_id_tensor_name_, thisword}, + {context_tensor_name_, context_in}, + }; + if (context_out != NULL) { // The session will initialize the outputs // Run the session, evaluating our "c" operation from the graph - Status status = session_->Run(inputs, - {"Train/Model/test_out", - "Train/Model/test_state_out", - "Train/Model/test_cell_out"}, {}, &outputs); + Status status = bundle_.session->Run(inputs, + {log_prob_tensor_name_, + rnn_out_tensor_name_, + rnn_states_tensor_name_}, {}, &outputs); if (!status.ok()) { KALDI_ERR << status.ToString(); } @@ -243,14 +277,9 @@ BaseFloat KaldiTfRnnlmWrapper::GetLogProb(int32 word, *context_out = outputs[1]; *new_cell = outputs[2]; } else { - inputs = { - {"Train/Model/test_word_out", thisword}, - {"Train/Model/test_cell_in", cell_in}, - }; - // Run the session, evaluating our "c" operation from the graph - Status status = session_->Run(inputs, - {"Train/Model/test_out"}, {}, &outputs); + Status status = bundle_.session->Run(inputs, + {log_prob_tensor_name_}, {}, &outputs); if (!status.ok()) { KALDI_ERR << status.ToString(); } diff --git a/src/tfrnnlm/tensorflow-rnnlm.h b/src/tfrnnlm/tensorflow-rnnlm.h index 90b68755964..5b6b46bc64e 100644 --- a/src/tfrnnlm/tensorflow-rnnlm.h +++ b/src/tfrnnlm/tensorflow-rnnlm.h @@ -53,7 +53,7 @@ #undef DCHECK_GE #undef DCHECK_NE -#include "tensorflow/core/public/session.h" +#include "tensorflow/cc/saved_model/loader.h" using tensorflow::Session; using tensorflow::Tensor; @@ -97,9 +97,7 @@ class KaldiTfRnnlmWrapper { const std::string &word_symbol_table_rxfilename, const std::string &unk_prob_file, const std::string &tf_model_path); - ~KaldiTfRnnlmWrapper() { - session_->Close(); - } + ~KaldiTfRnnlmWrapper(); int32 GetEos() const { return eos_; } @@ -156,7 +154,14 @@ class KaldiTfRnnlmWrapper { // this corresponds to the RNNLM symbol table int32 num_rnn_words; - Session* session_; // for TF computation; pointer owned here + // for TF computation + tensorflow::SavedModelBundle bundle_; + std::string word_id_tensor_name_; + std::string context_tensor_name_; + std::string log_prob_tensor_name_; + std::string rnn_out_tensor_name_; + std::string rnn_states_tensor_name_; + std::string initial_state_tensor_name_; int32 eos_; int32 oos_; diff --git a/src/tfrnnlmbin/CMakeLists.txt b/src/tfrnnlmbin/CMakeLists.txt new file mode 100644 index 00000000000..bea32fbac02 --- /dev/null +++ b/src/tfrnnlmbin/CMakeLists.txt @@ -0,0 +1,2 @@ +add_kaldi_executable(NAME lattice-lmrescore-tf-rnnlm SOURCES lattice-lmrescore-tf-rnnlm.cc DEPENDS kaldi-tfrnnlm kaldi-lat) +add_kaldi_executable(NAME lattice-lmrescore-tf-rnnlm-pruned SOURCES lattice-lmrescore-tf-rnnlm-pruned.cc DEPENDS kaldi-tfrnnlm kaldi-lat)