LoadData & Train using an iterator instead of the for loop.

caewok · Jan 30, 2013 · 776b356 · 776b356
1 parent 2a91763
commit 776b356
Show file tree

Hide file tree

Showing 6 changed files with 358 additions and 276 deletions.
diff --git a/Neural Net Language Model/FProp.R b/Neural Net Language Model/FProp.R
@@ -1,110 +1,123 @@
+# Original FProp file, with minor modifications to accomodate R
+
 # library(Matrix)
 #library(matrixStats)
+library(R.matlab)
+library(matrixStats)
 
-fprop <- function(input_batch, weights, fn) {
-#      % This method forward propagates through a neural network.
-#      % Inputs:
-#      %   input_batch: The input data as a matrix of size numwords X batchsize where,
-#      %     numwords is the number of words, batchsize is the number of data points.
-#      %     So, if input_batch(i, j) = k then the ith word in data point j is word
-#      %     index k of the vocabulary.
-#      %
-#      %   word_embedding_weights: Word embedding as a matrix of size
-#      %     vocab_size X numhid1, where vocab_size is the size of the vocabulary
-#      %     numhid1 is the dimensionality of the embedding space.
-#      %
-#      %   embed_to_hid_weights: Weights between the word embedding layer and hidden
-#      %     layer as a matrix of size numhid1*numwords X numhid2, numhid2 is the
-#      %     number of hidden units.
-#      %
-#      %   hid_to_output_weights: Weights between the hidden layer and output softmax
-#      %               unit as a matrix of size numhid2 X vocab_size
-#      %
-#      %   hid_bias: Bias of the hidden layer as a matrix of size numhid2 X 1.
-#      %
-#      %   output_bias: Bias of the output layer as a matrix of size vocab_size X 1.
-#      %
-#      % Outputs:
-#      %   embedding_layer_state: State of units in the embedding layer as a matrix of
-#      %     size numhid1*numwords X batchsize
-#      %
-#      %   hidden_layer_state: State of units in the hidden layer as a matrix of size
-#      %     numhid2 X batchsize
-#      %
-#      %   output_layer_state: State of units in the output layer as a matrix of size
-#      %     vocab_size X batchsize
-#      %
-
-     tmp <- dim(input_batch)  # basically dim
+fprop <- function(input_batch, word_embedding_weights, embed_to_hid_weights, hid_to_output_weights, hid_bias, output_bias) {
+     #      % This method forward propagates through a neural network.
+     #      % Inputs:
+     #      %   input_batch: The input data as a matrix of size numwords X batchsize where,
+     #      %     numwords is the number of words, batchsize is the number of data points.
+     #      %     So, if input_batch(i, j) = k then the ith word in data point j is word
+     #      %     index k of the vocabulary.
+     #      %
+     #      %   word_embedding_weights: Word embedding as a matrix of size
+     #      %     vocab_size X numhid1, where vocab_size is the size of the vocabulary
+     #      %     numhid1 is the dimensionality of the embedding space.
+     #      %
+     #      %   embed_to_hid_weights: Weights between the word embedding layer and hidden
+     #      %     layer as a matrix of size numhid1*numwords X numhid2, numhid2 is the
+     #      %     number of hidden units.
+     #      %
+     #      %   hid_to_output_weights: Weights between the hidden layer and output softmax
+     #      %               unit as a matrix of size numhid2 X vocab_size
+     #      %
+     #      %   hid_bias: Bias of the hidden layer as a matrix of size numhid2 X 1.
+     #      %
+     #      %   output_bias: Bias of the output layer as a matrix of size vocab_size X 1.
+     #      %
+     #      % Outputs:
+     #      %   embedding_layer_state: State of units in the embedding layer as a matrix of
+     #      %     size numhid1*numwords X batchsize
+     #      %
+     #      %   hidden_layer_state: State of units in the hidden layer as a matrix of size
+     #      %     numhid2 X batchsize
+     #      %
+     #      %   output_layer_state: State of units in the output layer as a matrix of size
+     #      %     vocab_size X batchsize
+     #      %
+
+     tmp <- size(input_batch)  # basically dim
      numwords <- tmp[1]
      batchsize <- tmp[2]
 
-     tmp <- dim(weights$word_embedding)  # basically dim
+     tmp <- size(word_embedding_weights)  # basically dim
      vocab_size <- tmp[1]
      numhid1 <- tmp[2]
+     numhid2 <- size(embed_to_hid_weights, 2)
 
-     numhid2 <- ncol(weights$embed_to_hid)
-
-#      %% COMPUTE STATE OF WORD EMBEDDING LAYER.
-#      % Look up the inputs word indices in the word_embedding_weights matrix.
+     #      %% COMPUTE STATE OF WORD EMBEDDING LAYER.
+     #      % Look up the inputs word indices in the word_embedding_weights matrix.
      # each row of the word weights corresponds to a word (250 total)
      # input_batch contains 300 total words (3 * 100 batchsize)
      # each element of input_batch is a number between 1 and 249 (250?), corresponding to a word
      #embedding_layer_state2 <- matrix(as.numeric(weights$word_embedding[as.integer(input_batch), ]), nrow=numhid1 * numwords)
-     embedding_layer_state <- myReshape(weights$word_embedding[as.integer(input_batch),], nrows=numhid1 * numwords)    
+     #embedding_layer_state <- myReshape(weights$word_embedding[as.integer(input_batch),], nrows=numhid1 * numwords)    
+     #embedding_layer_state <- reshape(t(word_embedding_weights[reshape(input_batch, 1, []), ]), numhid1 * numwords, [])
+
+     # [] is allowed in reshape: one dimension remains unspecified and Octave will determine it automatically
+     tmp <- t(word_embedding_weights[reshape(input_batch, 1, length(input_batch)), ])
+     embedding_layer_state <- reshape(tmp, numhid1 * numwords, length(tmp) / (numhid1 * numwords))
 
      #embedding_layer_state2 <- matlab::reshape(as.matrix(weights$word_embedding[as.numeric(input_batch), ]), numhid1*numwords, 100)
-                                
-#      %% COMPUTE STATE OF HIDDEN LAYER.
-#      % Compute inputs to hidden units.
+
+     #      %% COMPUTE STATE OF HIDDEN LAYER.
+     #      % Compute inputs to hidden units.
      # crossprod = t(x) %*% y
-     inputs_to_hidden_units = myCrossProd(weights$embed_to_hid, embedding_layer_state) + fn(weights$hid_bias, 1, batchsize)     
-
-#      benchmark(
-#           tmp1 <- repmat(weights$hid_bias, 1, batchsize),
-#           tmp2 <- myRepMat4(weights$hid_bias, 1, batchsize),
-#           replications=10
-#           )
-
-
-#      % Apply logistic activation function.
-#      % FILL IN CODE. Replace the line below by one of the options.
-#       hidden_layer_state = zeros(numhid2, batchsize)
-#      % Options
-#      % (a) hidden_layer_state = 1 ./ (1 + exp(inputs_to_hidden_units));
-#      % (b) hidden_layer_state = 1 ./ (1 - exp(-inputs_to_hidden_units));
-    hidden_layer_state = 1 / (1 + exp(-inputs_to_hidden_units))
-#      % (d) hidden_layer_state = -1 ./ (1 + exp(-inputs_to_hidden_units));
-
-#      %% COMPUTE STATE OF OUTPUT LAYER.
-#      % Compute inputs to softmax.
-#      % FILL IN CODE. Replace the line below by one of the options.
-#       inputs_to_softmax = zeros(vocab_size, batchsize)
-#      % Options
-    inputs_to_softmax = myCrossProd(weights$hid_to_output, hidden_layer_state) +  fn(weights$output_bias, 1, batchsize)
-
-#      % (b) inputs_to_softmax = t(hid_to_output_weights) %*% hidden_layer_state +  repmat(output_bias, batchsize, 1);
-#      % (c) inputs_to_softmax = hidden_layer_state %*% t(hid_to_output_weights) +  repmat(output_bias, 1, batchsize);
-#      % (d) inputs_to_softmax = hid_to_output_weights %*% hidden_layer_state +  repmat(output_bias, batchsize, 1);
-
-#      % Subtract maximum. 
-#      % Remember that adding or subtracting the same constant from each input to a
-#      % softmax unit does not affect the outputs. Here we are subtracting maximum to
-#      % make all inputs <= 0. This prevents overflows when computing their
-#      % exponents.
+     #inputs_to_hidden_units = myCrossProd(weights$embed_to_hid, embedding_layer_state) + fn(weights$hid_bias, 1, batchsize)     
+     inputs_to_hidden_units <- t(embed_to_hid_weights) %*% embedding_layer_state + repmat(hid_bias, 1, batchsize)
+
+
+     #      benchmark(
+     #           tmp1 <- repmat(weights$hid_bias, 1, batchsize),
+     #           tmp2 <- myRepMat4(weights$hid_bias, 1, batchsize),
+     #           replications=10
+     #           )
+
+
+     #      % Apply logistic activation function.
+     #      % FILL IN CODE. Replace the line below by one of the options.
+     #       hidden_layer_state = zeros(numhid2, batchsize)
+     #      % Options
+     #      % (a) hidden_layer_state = 1 ./ (1 + exp(inputs_to_hidden_units));
+     #      % (b) hidden_layer_state = 1 ./ (1 - exp(-inputs_to_hidden_units));
+     hidden_layer_state = 1 / (1 + exp(-inputs_to_hidden_units))
+     #      % (d) hidden_layer_state = -1 ./ (1 + exp(-inputs_to_hidden_units));
+
+     #      %% COMPUTE STATE OF OUTPUT LAYER.
+     #      % Compute inputs to softmax.
+     #      % FILL IN CODE. Replace the line below by one of the options.
+     #       inputs_to_softmax = zeros(vocab_size, batchsize)
+     #      % Options
+     #inputs_to_softmax = myCrossProd(weights$hid_to_output, hidden_layer_state) +  fn(weights$output_bias, 1, batchsize)
+     inputs_to_softmax <- t(hid_to_output_weights) %*% hidden_layer_state + repmat(output_bias, 1, batchsize)
+
+     #      % (b) inputs_to_softmax = t(hid_to_output_weights) %*% hidden_layer_state +  repmat(output_bias, batchsize, 1);
+     #      % (c) inputs_to_softmax = hidden_layer_state %*% t(hid_to_output_weights) +  repmat(output_bias, 1, batchsize);
+     #      % (d) inputs_to_softmax = hid_to_output_weights %*% hidden_layer_state +  repmat(output_bias, batchsize, 1);
+
+     #      % Subtract maximum. 
+     #      % Remember that adding or subtracting the same constant from each input to a
+     #      % softmax unit does not affect the outputs. Here we are subtracting maximum to
+     #      % make all inputs <= 0. This prevents overflows when computing their
+     #      % exponents.
      # max in matlab returns max from each column by default
-#      benchmark(
-#      tmp <- apply(inputs_to_softmax, 2, max),
-#      tmp2 <- colMaxs(inputs_to_softmax),
-#      replications=10)
-     tmp <- apply(inputs_to_softmax, 2, max)
-     inputs_to_softmax = inputs_to_softmax - fn(tmp, vocab_size, 1)
-
-#      % Compute exp.
+     #      benchmark(
+     #      tmp <- apply(inputs_to_softmax, 2, max),
+     #      tmp2 <- colMaxs(inputs_to_softmax),
+     #      replications=10)
+     #tmp <- apply(inputs_to_softmax, 2, max)
+     #inputs_to_softmax = inputs_to_softmax - fn(tmp, vocab_size, 1)
+     inputs_to_softmax <- inputs_to_softmax - repmat(colMaxs(inputs_to_softmax), vocab_size, 1)
+
+     #      % Compute exp.
      output_layer_state = exp(inputs_to_softmax)
 
-#      % Normalize to get probability distribution.
-     output_layer_state = output_layer_state / fn(colSums(output_layer_state), vocab_size, 1)
+     #      % Normalize to get probability distribution.
+     #output_layer_state = output_layer_state / fn(colSums(output_layer_state), vocab_size, 1)
+     output_layer_state <- output_layer_state / repmat(matlab::sum(output_layer_state), vocab_size, 1)
 
      return(list(embedding_layer_state=embedding_layer_state, 
                  hidden_layer_state=hidden_layer_state, 

diff --git a/Neural Net Language Model/FProp_Original.R b/Neural Net Language Model/FProp_Original.R
@@ -5,7 +5,7 @@
 library(R.matlab)
 library(matrixStats)
 
-fprop <- function(input_batch, word_embedding_weights, embed_to_hid_weights, hid_to_output_weights, hid_bias, output_bias) {
+fprop_original <- function(input_batch, word_embedding_weights, embed_to_hid_weights, hid_to_output_weights, hid_bias, output_bias) {
      #      % This method forward propagates through a neural network.
      #      % Inputs:
      #      %   input_batch: The input data as a matrix of size numwords X batchsize where,

diff --git a/Neural Net Language Model/LoadData.R b/Neural Net Language Model/LoadData.R
@@ -1,6 +1,9 @@
+# Load Data Original, with minor changes to accomodate R
+
 library(R.matlab)
 # library(Matrix)
 library(parallel); options(mc.cores = 4)
+library(itertools)
 
 load_data <- function(N) {
      # % This method loads the training, validation and test set.
@@ -24,32 +27,18 @@ load_data <- function(N) {
                   vocab = unlist(data.mat$data[4,1,1])
      )
 
-     numdims = nrow(data$trainData)
+     numdims = size(data$trainData, 1)
      D = numdims - 1  # subtract 1 because 1:D is the number of input words and D is the predicted word
-     M = floor(ncol(data$trainData) / N)
+     M = floor(size(data$trainData, 2) / N)
 
      # shift to an list of M minibatches, each with D*N
      # looks like we threw out the remainder training data
-     splitMatrixIntoBatch <- function(dat, b, N, byCol=TRUE) {
-          # N is the size of each batch
-          # b is the requested batch
-          if(length(dim(dat)) == 0) {
-               if(byCol) dim(dat) <- c(1, length(dat)) else dim(dat) <- c(length(dat), 1)               
-          }
-          start <- ((b - 1) * N) + 1
-          end <- b * N
-
-          if(byCol) return(dat[,start:end]) else return(dat[start:end,])
-     } 
-     train_input <- mclapply(1:M, splitMatrixIntoBatch, N=N, dat=data$trainData[1:D,], byCol=TRUE)
-     train_target <- mclapply(1:M, splitMatrixIntoBatch, N=N, dat=data$trainData[D+1,], byCol=TRUE)
-
-     valid_input <- (data$validData[1:D,])
-     valid_target <- data$validData[D + 1,] 
-
-     test_input <- (data$testData[1:D,])
-     test_target <- data$testData[D + 1,]
-
+     train_input <- data$trainData[1:D, 1:(N*M), drop=F]
+     train_target <- data$trainData[D + 1, 1:(N*M), drop=F]
+     valid_input <- data$validData[1:D,, drop=F]
+     valid_target <- data$validData[D + 1, , drop=F]
+     test_input <- data$validData[1:D, , drop=F]
+     test_target <- data$testData[D + 1, , drop=F]
      vocab <- data$vocab
 
      return(list(train_input=train_input,
@@ -59,4 +48,70 @@ load_data <- function(N) {
                  test_input=test_input,
                  test_target=test_target,
                  vocab=vocab))
-}
+}
+
+# faster to access lists than indices in an array
+# but slower to create a set of lists than to reshape the training data into an array
+
+
+
+# train_input3 <- vector(M, "list")
+# fn <- function() {
+#      it <- isplitCols(data$trainData[1:D, 1:(N*M)], chunks = M)
+#      replicate(M, nextElem(it), simplify=F)
+# }
+# 
+# fn2 <- function() {
+#      enumerate(isplitCols(data$trainData[1:D, 1:(N*M)], chunks = M))   
+# }
+#      
+# myReshape <- function(A, ...) {
+#      if (!is.array(A)) {
+#           stop(sprintf("argument %s must be matrix or array", sQuote("A")))
+#      }
+#      nargs <- length(dots <- list(...))
+#      dims <- as.integer(if (nargs == 1 && matlab:::is.size_t(dots[[1]])) {
+#           dots[[1]]
+#      } else {
+#           unlist(dots)
+#      })
+#      if (!(length(dims) > 1)) {
+#           stop("dimensions must be of length greater than 1")
+#      }
+#      else if (!(all(dims > 0))) {
+#           stop("dimensions must be a positive quantity")
+#      }
+#      else if (prod(dims) != prod(dim(A))) {
+#           stop("number of elements must not change")
+#      }
+#      dim(A) <- dims
+#      
+#      return(A)
+#  
+# }
+# 
+# 
+# 
+#      
+# benchmark(
+#      train_input1 <- reshape(data$trainData[1:D, 1:(N*M), drop=F], D, N, M),
+#      train_input2 <- lapply(1:M, splitMatrixIntoBatch, N=N, dat=data$trainData[1:D,], byCol=TRUE),
+#      train_input3 <- fn(),
+#      train_input4 <- myReshape(data$trainData[1:D, 1:(N*M), drop=F], D, N, M), # close second
+#      train_input5 <- fn2(), # fastest
+#      replications=10
+#      )
+# # 
+# 
+# benchmark(
+#      data1 <- load_data_original(batchsize),
+#      data2 <- load_data(batchsize),
+#      
+#      replications <- 10
+#      )
+# 
+# benchmark(
+#      for(m in 1:372) tmp1 <- data1$train_input[,,m],
+#      for(m in 1:372) tmp2 <- data2$train_input[[m]],
+#      replications <- 2
+#           )
diff --git a/Neural Net Language Model/LoadData_Original.R b/Neural Net Language Model/LoadData_Original.R
@@ -4,7 +4,7 @@ library(R.matlab)
 # library(Matrix)
 #library(parallel); options(mc.cores = 4)
 
-load_data <- function(N) {
+load_data_original <- function(N) {
      # % This method loads the training, validation and test set.
      # % It also divides the training set into mini-batches.
      # % Inputs: