Skip to content

Commit

Permalink
LoadData & Train using an iterator instead of the for loop.
Browse files Browse the repository at this point in the history
  • Loading branch information
caewok committed Jan 30, 2013
1 parent 2a91763 commit 776b356
Show file tree
Hide file tree
Showing 6 changed files with 358 additions and 276 deletions.
189 changes: 101 additions & 88 deletions Neural Net Language Model/FProp.R
Original file line number Diff line number Diff line change
@@ -1,110 +1,123 @@
# Original FProp file, with minor modifications to accomodate R

# library(Matrix)
#library(matrixStats)
library(R.matlab)
library(matrixStats)

fprop <- function(input_batch, weights, fn) {
# % This method forward propagates through a neural network.
# % Inputs:
# % input_batch: The input data as a matrix of size numwords X batchsize where,
# % numwords is the number of words, batchsize is the number of data points.
# % So, if input_batch(i, j) = k then the ith word in data point j is word
# % index k of the vocabulary.
# %
# % word_embedding_weights: Word embedding as a matrix of size
# % vocab_size X numhid1, where vocab_size is the size of the vocabulary
# % numhid1 is the dimensionality of the embedding space.
# %
# % embed_to_hid_weights: Weights between the word embedding layer and hidden
# % layer as a matrix of size numhid1*numwords X numhid2, numhid2 is the
# % number of hidden units.
# %
# % hid_to_output_weights: Weights between the hidden layer and output softmax
# % unit as a matrix of size numhid2 X vocab_size
# %
# % hid_bias: Bias of the hidden layer as a matrix of size numhid2 X 1.
# %
# % output_bias: Bias of the output layer as a matrix of size vocab_size X 1.
# %
# % Outputs:
# % embedding_layer_state: State of units in the embedding layer as a matrix of
# % size numhid1*numwords X batchsize
# %
# % hidden_layer_state: State of units in the hidden layer as a matrix of size
# % numhid2 X batchsize
# %
# % output_layer_state: State of units in the output layer as a matrix of size
# % vocab_size X batchsize
# %

tmp <- dim(input_batch) # basically dim
fprop <- function(input_batch, word_embedding_weights, embed_to_hid_weights, hid_to_output_weights, hid_bias, output_bias) {
# % This method forward propagates through a neural network.
# % Inputs:
# % input_batch: The input data as a matrix of size numwords X batchsize where,
# % numwords is the number of words, batchsize is the number of data points.
# % So, if input_batch(i, j) = k then the ith word in data point j is word
# % index k of the vocabulary.
# %
# % word_embedding_weights: Word embedding as a matrix of size
# % vocab_size X numhid1, where vocab_size is the size of the vocabulary
# % numhid1 is the dimensionality of the embedding space.
# %
# % embed_to_hid_weights: Weights between the word embedding layer and hidden
# % layer as a matrix of size numhid1*numwords X numhid2, numhid2 is the
# % number of hidden units.
# %
# % hid_to_output_weights: Weights between the hidden layer and output softmax
# % unit as a matrix of size numhid2 X vocab_size
# %
# % hid_bias: Bias of the hidden layer as a matrix of size numhid2 X 1.
# %
# % output_bias: Bias of the output layer as a matrix of size vocab_size X 1.
# %
# % Outputs:
# % embedding_layer_state: State of units in the embedding layer as a matrix of
# % size numhid1*numwords X batchsize
# %
# % hidden_layer_state: State of units in the hidden layer as a matrix of size
# % numhid2 X batchsize
# %
# % output_layer_state: State of units in the output layer as a matrix of size
# % vocab_size X batchsize
# %

tmp <- size(input_batch) # basically dim
numwords <- tmp[1]
batchsize <- tmp[2]

tmp <- dim(weights$word_embedding) # basically dim
tmp <- size(word_embedding_weights) # basically dim
vocab_size <- tmp[1]
numhid1 <- tmp[2]
numhid2 <- size(embed_to_hid_weights, 2)

numhid2 <- ncol(weights$embed_to_hid)

# %% COMPUTE STATE OF WORD EMBEDDING LAYER.
# % Look up the inputs word indices in the word_embedding_weights matrix.
# %% COMPUTE STATE OF WORD EMBEDDING LAYER.
# % Look up the inputs word indices in the word_embedding_weights matrix.
# each row of the word weights corresponds to a word (250 total)
# input_batch contains 300 total words (3 * 100 batchsize)
# each element of input_batch is a number between 1 and 249 (250?), corresponding to a word
#embedding_layer_state2 <- matrix(as.numeric(weights$word_embedding[as.integer(input_batch), ]), nrow=numhid1 * numwords)
embedding_layer_state <- myReshape(weights$word_embedding[as.integer(input_batch),], nrows=numhid1 * numwords)
#embedding_layer_state <- myReshape(weights$word_embedding[as.integer(input_batch),], nrows=numhid1 * numwords)
#embedding_layer_state <- reshape(t(word_embedding_weights[reshape(input_batch, 1, []), ]), numhid1 * numwords, [])

# [] is allowed in reshape: one dimension remains unspecified and Octave will determine it automatically
tmp <- t(word_embedding_weights[reshape(input_batch, 1, length(input_batch)), ])
embedding_layer_state <- reshape(tmp, numhid1 * numwords, length(tmp) / (numhid1 * numwords))

#embedding_layer_state2 <- matlab::reshape(as.matrix(weights$word_embedding[as.numeric(input_batch), ]), numhid1*numwords, 100)
# %% COMPUTE STATE OF HIDDEN LAYER.
# % Compute inputs to hidden units.

# %% COMPUTE STATE OF HIDDEN LAYER.
# % Compute inputs to hidden units.
# crossprod = t(x) %*% y
inputs_to_hidden_units = myCrossProd(weights$embed_to_hid, embedding_layer_state) + fn(weights$hid_bias, 1, batchsize)

# benchmark(
# tmp1 <- repmat(weights$hid_bias, 1, batchsize),
# tmp2 <- myRepMat4(weights$hid_bias, 1, batchsize),
# replications=10
# )


# % Apply logistic activation function.
# % FILL IN CODE. Replace the line below by one of the options.
# hidden_layer_state = zeros(numhid2, batchsize)
# % Options
# % (a) hidden_layer_state = 1 ./ (1 + exp(inputs_to_hidden_units));
# % (b) hidden_layer_state = 1 ./ (1 - exp(-inputs_to_hidden_units));
hidden_layer_state = 1 / (1 + exp(-inputs_to_hidden_units))
# % (d) hidden_layer_state = -1 ./ (1 + exp(-inputs_to_hidden_units));

# %% COMPUTE STATE OF OUTPUT LAYER.
# % Compute inputs to softmax.
# % FILL IN CODE. Replace the line below by one of the options.
# inputs_to_softmax = zeros(vocab_size, batchsize)
# % Options
inputs_to_softmax = myCrossProd(weights$hid_to_output, hidden_layer_state) + fn(weights$output_bias, 1, batchsize)

# % (b) inputs_to_softmax = t(hid_to_output_weights) %*% hidden_layer_state + repmat(output_bias, batchsize, 1);
# % (c) inputs_to_softmax = hidden_layer_state %*% t(hid_to_output_weights) + repmat(output_bias, 1, batchsize);
# % (d) inputs_to_softmax = hid_to_output_weights %*% hidden_layer_state + repmat(output_bias, batchsize, 1);

# % Subtract maximum.
# % Remember that adding or subtracting the same constant from each input to a
# % softmax unit does not affect the outputs. Here we are subtracting maximum to
# % make all inputs <= 0. This prevents overflows when computing their
# % exponents.
#inputs_to_hidden_units = myCrossProd(weights$embed_to_hid, embedding_layer_state) + fn(weights$hid_bias, 1, batchsize)
inputs_to_hidden_units <- t(embed_to_hid_weights) %*% embedding_layer_state + repmat(hid_bias, 1, batchsize)


# benchmark(
# tmp1 <- repmat(weights$hid_bias, 1, batchsize),
# tmp2 <- myRepMat4(weights$hid_bias, 1, batchsize),
# replications=10
# )


# % Apply logistic activation function.
# % FILL IN CODE. Replace the line below by one of the options.
# hidden_layer_state = zeros(numhid2, batchsize)
# % Options
# % (a) hidden_layer_state = 1 ./ (1 + exp(inputs_to_hidden_units));
# % (b) hidden_layer_state = 1 ./ (1 - exp(-inputs_to_hidden_units));
hidden_layer_state = 1 / (1 + exp(-inputs_to_hidden_units))
# % (d) hidden_layer_state = -1 ./ (1 + exp(-inputs_to_hidden_units));

# %% COMPUTE STATE OF OUTPUT LAYER.
# % Compute inputs to softmax.
# % FILL IN CODE. Replace the line below by one of the options.
# inputs_to_softmax = zeros(vocab_size, batchsize)
# % Options
#inputs_to_softmax = myCrossProd(weights$hid_to_output, hidden_layer_state) + fn(weights$output_bias, 1, batchsize)
inputs_to_softmax <- t(hid_to_output_weights) %*% hidden_layer_state + repmat(output_bias, 1, batchsize)

# % (b) inputs_to_softmax = t(hid_to_output_weights) %*% hidden_layer_state + repmat(output_bias, batchsize, 1);
# % (c) inputs_to_softmax = hidden_layer_state %*% t(hid_to_output_weights) + repmat(output_bias, 1, batchsize);
# % (d) inputs_to_softmax = hid_to_output_weights %*% hidden_layer_state + repmat(output_bias, batchsize, 1);

# % Subtract maximum.
# % Remember that adding or subtracting the same constant from each input to a
# % softmax unit does not affect the outputs. Here we are subtracting maximum to
# % make all inputs <= 0. This prevents overflows when computing their
# % exponents.
# max in matlab returns max from each column by default
# benchmark(
# tmp <- apply(inputs_to_softmax, 2, max),
# tmp2 <- colMaxs(inputs_to_softmax),
# replications=10)
tmp <- apply(inputs_to_softmax, 2, max)
inputs_to_softmax = inputs_to_softmax - fn(tmp, vocab_size, 1)

# % Compute exp.
# benchmark(
# tmp <- apply(inputs_to_softmax, 2, max),
# tmp2 <- colMaxs(inputs_to_softmax),
# replications=10)
#tmp <- apply(inputs_to_softmax, 2, max)
#inputs_to_softmax = inputs_to_softmax - fn(tmp, vocab_size, 1)
inputs_to_softmax <- inputs_to_softmax - repmat(colMaxs(inputs_to_softmax), vocab_size, 1)

# % Compute exp.
output_layer_state = exp(inputs_to_softmax)

# % Normalize to get probability distribution.
output_layer_state = output_layer_state / fn(colSums(output_layer_state), vocab_size, 1)
# % Normalize to get probability distribution.
#output_layer_state = output_layer_state / fn(colSums(output_layer_state), vocab_size, 1)
output_layer_state <- output_layer_state / repmat(matlab::sum(output_layer_state), vocab_size, 1)

return(list(embedding_layer_state=embedding_layer_state,
hidden_layer_state=hidden_layer_state,
Expand Down
2 changes: 1 addition & 1 deletion Neural Net Language Model/FProp_Original.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
library(R.matlab)
library(matrixStats)

fprop <- function(input_batch, word_embedding_weights, embed_to_hid_weights, hid_to_output_weights, hid_bias, output_bias) {
fprop_original <- function(input_batch, word_embedding_weights, embed_to_hid_weights, hid_to_output_weights, hid_bias, output_bias) {
# % This method forward propagates through a neural network.
# % Inputs:
# % input_batch: The input data as a matrix of size numwords X batchsize where,
Expand Down
101 changes: 78 additions & 23 deletions Neural Net Language Model/LoadData.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# Load Data Original, with minor changes to accomodate R

library(R.matlab)
# library(Matrix)
library(parallel); options(mc.cores = 4)
library(itertools)

load_data <- function(N) {
# % This method loads the training, validation and test set.
Expand All @@ -24,32 +27,18 @@ load_data <- function(N) {
vocab = unlist(data.mat$data[4,1,1])
)

numdims = nrow(data$trainData)
numdims = size(data$trainData, 1)
D = numdims - 1 # subtract 1 because 1:D is the number of input words and D is the predicted word
M = floor(ncol(data$trainData) / N)
M = floor(size(data$trainData, 2) / N)

# shift to an list of M minibatches, each with D*N
# looks like we threw out the remainder training data
splitMatrixIntoBatch <- function(dat, b, N, byCol=TRUE) {
# N is the size of each batch
# b is the requested batch
if(length(dim(dat)) == 0) {
if(byCol) dim(dat) <- c(1, length(dat)) else dim(dat) <- c(length(dat), 1)
}
start <- ((b - 1) * N) + 1
end <- b * N

if(byCol) return(dat[,start:end]) else return(dat[start:end,])
}
train_input <- mclapply(1:M, splitMatrixIntoBatch, N=N, dat=data$trainData[1:D,], byCol=TRUE)
train_target <- mclapply(1:M, splitMatrixIntoBatch, N=N, dat=data$trainData[D+1,], byCol=TRUE)

valid_input <- (data$validData[1:D,])
valid_target <- data$validData[D + 1,]

test_input <- (data$testData[1:D,])
test_target <- data$testData[D + 1,]

train_input <- data$trainData[1:D, 1:(N*M), drop=F]
train_target <- data$trainData[D + 1, 1:(N*M), drop=F]
valid_input <- data$validData[1:D,, drop=F]
valid_target <- data$validData[D + 1, , drop=F]
test_input <- data$validData[1:D, , drop=F]
test_target <- data$testData[D + 1, , drop=F]
vocab <- data$vocab

return(list(train_input=train_input,
Expand All @@ -59,4 +48,70 @@ load_data <- function(N) {
test_input=test_input,
test_target=test_target,
vocab=vocab))
}
}

# faster to access lists than indices in an array
# but slower to create a set of lists than to reshape the training data into an array



# train_input3 <- vector(M, "list")
# fn <- function() {
# it <- isplitCols(data$trainData[1:D, 1:(N*M)], chunks = M)
# replicate(M, nextElem(it), simplify=F)
# }
#
# fn2 <- function() {
# enumerate(isplitCols(data$trainData[1:D, 1:(N*M)], chunks = M))
# }
#
# myReshape <- function(A, ...) {
# if (!is.array(A)) {
# stop(sprintf("argument %s must be matrix or array", sQuote("A")))
# }
# nargs <- length(dots <- list(...))
# dims <- as.integer(if (nargs == 1 && matlab:::is.size_t(dots[[1]])) {
# dots[[1]]
# } else {
# unlist(dots)
# })
# if (!(length(dims) > 1)) {
# stop("dimensions must be of length greater than 1")
# }
# else if (!(all(dims > 0))) {
# stop("dimensions must be a positive quantity")
# }
# else if (prod(dims) != prod(dim(A))) {
# stop("number of elements must not change")
# }
# dim(A) <- dims
#
# return(A)
#
# }
#
#
#
#
# benchmark(
# train_input1 <- reshape(data$trainData[1:D, 1:(N*M), drop=F], D, N, M),
# train_input2 <- lapply(1:M, splitMatrixIntoBatch, N=N, dat=data$trainData[1:D,], byCol=TRUE),
# train_input3 <- fn(),
# train_input4 <- myReshape(data$trainData[1:D, 1:(N*M), drop=F], D, N, M), # close second
# train_input5 <- fn2(), # fastest
# replications=10
# )
# #
#
# benchmark(
# data1 <- load_data_original(batchsize),
# data2 <- load_data(batchsize),
#
# replications <- 10
# )
#
# benchmark(
# for(m in 1:372) tmp1 <- data1$train_input[,,m],
# for(m in 1:372) tmp2 <- data2$train_input[[m]],
# replications <- 2
# )
2 changes: 1 addition & 1 deletion Neural Net Language Model/LoadData_Original.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ library(R.matlab)
# library(Matrix)
#library(parallel); options(mc.cores = 4)

load_data <- function(N) {
load_data_original <- function(N) {
# % This method loads the training, validation and test set.
# % It also divides the training set into mini-batches.
# % Inputs:
Expand Down
Loading

0 comments on commit 776b356

Please sign in to comment.