-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Inclusion of the prior R code for training. (labeled "Old")
- Loading branch information
Showing
5 changed files
with
753 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
# library(Matrix) | ||
#library(matrixStats) | ||
|
||
fprop <- function(input_batch, weights, fn) { | ||
# % This method forward propagates through a neural network. | ||
# % Inputs: | ||
# % input_batch: The input data as a matrix of size numwords X batchsize where, | ||
# % numwords is the number of words, batchsize is the number of data points. | ||
# % So, if input_batch(i, j) = k then the ith word in data point j is word | ||
# % index k of the vocabulary. | ||
# % | ||
# % word_embedding_weights: Word embedding as a matrix of size | ||
# % vocab_size X numhid1, where vocab_size is the size of the vocabulary | ||
# % numhid1 is the dimensionality of the embedding space. | ||
# % | ||
# % embed_to_hid_weights: Weights between the word embedding layer and hidden | ||
# % layer as a matrix of size numhid1*numwords X numhid2, numhid2 is the | ||
# % number of hidden units. | ||
# % | ||
# % hid_to_output_weights: Weights between the hidden layer and output softmax | ||
# % unit as a matrix of size numhid2 X vocab_size | ||
# % | ||
# % hid_bias: Bias of the hidden layer as a matrix of size numhid2 X 1. | ||
# % | ||
# % output_bias: Bias of the output layer as a matrix of size vocab_size X 1. | ||
# % | ||
# % Outputs: | ||
# % embedding_layer_state: State of units in the embedding layer as a matrix of | ||
# % size numhid1*numwords X batchsize | ||
# % | ||
# % hidden_layer_state: State of units in the hidden layer as a matrix of size | ||
# % numhid2 X batchsize | ||
# % | ||
# % output_layer_state: State of units in the output layer as a matrix of size | ||
# % vocab_size X batchsize | ||
# % | ||
|
||
tmp <- dim(input_batch) # basically dim | ||
numwords <- tmp[1] | ||
batchsize <- tmp[2] | ||
|
||
tmp <- dim(weights$word_embedding) # basically dim | ||
vocab_size <- tmp[1] | ||
numhid1 <- tmp[2] | ||
|
||
numhid2 <- ncol(weights$embed_to_hid) | ||
|
||
# %% COMPUTE STATE OF WORD EMBEDDING LAYER. | ||
# % Look up the inputs word indices in the word_embedding_weights matrix. | ||
# each row of the word weights corresponds to a word (250 total) | ||
# input_batch contains 300 total words (3 * 100 batchsize) | ||
# each element of input_batch is a number between 1 and 249 (250?), corresponding to a word | ||
#embedding_layer_state2 <- matrix(as.numeric(weights$word_embedding[as.integer(input_batch), ]), nrow=numhid1 * numwords) | ||
embedding_layer_state <- myReshape(weights$word_embedding[as.integer(input_batch),], nrows=numhid1 * numwords) | ||
|
||
#embedding_layer_state2 <- matlab::reshape(as.matrix(weights$word_embedding[as.numeric(input_batch), ]), numhid1*numwords, 100) | ||
|
||
# %% COMPUTE STATE OF HIDDEN LAYER. | ||
# % Compute inputs to hidden units. | ||
# crossprod = t(x) %*% y | ||
inputs_to_hidden_units = myCrossProd(weights$embed_to_hid, embedding_layer_state) + fn(weights$hid_bias, 1, batchsize) | ||
|
||
# benchmark( | ||
# tmp1 <- repmat(weights$hid_bias, 1, batchsize), | ||
# tmp2 <- myRepMat4(weights$hid_bias, 1, batchsize), | ||
# replications=10 | ||
# ) | ||
|
||
|
||
# % Apply logistic activation function. | ||
# % FILL IN CODE. Replace the line below by one of the options. | ||
# hidden_layer_state = zeros(numhid2, batchsize) | ||
# % Options | ||
# % (a) hidden_layer_state = 1 ./ (1 + exp(inputs_to_hidden_units)); | ||
# % (b) hidden_layer_state = 1 ./ (1 - exp(-inputs_to_hidden_units)); | ||
hidden_layer_state = 1 / (1 + exp(-inputs_to_hidden_units)) | ||
# % (d) hidden_layer_state = -1 ./ (1 + exp(-inputs_to_hidden_units)); | ||
|
||
# %% COMPUTE STATE OF OUTPUT LAYER. | ||
# % Compute inputs to softmax. | ||
# % FILL IN CODE. Replace the line below by one of the options. | ||
# inputs_to_softmax = zeros(vocab_size, batchsize) | ||
# % Options | ||
inputs_to_softmax = myCrossProd(weights$hid_to_output, hidden_layer_state) + fn(weights$output_bias, 1, batchsize) | ||
|
||
# % (b) inputs_to_softmax = t(hid_to_output_weights) %*% hidden_layer_state + repmat(output_bias, batchsize, 1); | ||
# % (c) inputs_to_softmax = hidden_layer_state %*% t(hid_to_output_weights) + repmat(output_bias, 1, batchsize); | ||
# % (d) inputs_to_softmax = hid_to_output_weights %*% hidden_layer_state + repmat(output_bias, batchsize, 1); | ||
|
||
# % Subtract maximum. | ||
# % Remember that adding or subtracting the same constant from each input to a | ||
# % softmax unit does not affect the outputs. Here we are subtracting maximum to | ||
# % make all inputs <= 0. This prevents overflows when computing their | ||
# % exponents. | ||
# max in matlab returns max from each column by default | ||
# benchmark( | ||
# tmp <- apply(inputs_to_softmax, 2, max), | ||
# tmp2 <- colMaxs(inputs_to_softmax), | ||
# replications=10) | ||
tmp <- apply(inputs_to_softmax, 2, max) | ||
inputs_to_softmax = inputs_to_softmax - fn(tmp, vocab_size, 1) | ||
|
||
# % Compute exp. | ||
output_layer_state = exp(inputs_to_softmax) | ||
|
||
# % Normalize to get probability distribution. | ||
output_layer_state = output_layer_state / fn(colSums(output_layer_state), vocab_size, 1) | ||
|
||
return(list(embedding_layer_state=embedding_layer_state, | ||
hidden_layer_state=hidden_layer_state, | ||
output_layer_state=output_layer_state)) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
library(R.matlab) | ||
# library(Matrix) | ||
library(parallel); options(mc.cores = 4) | ||
|
||
load_data <- function(N) { | ||
# % This method loads the training, validation and test set. | ||
# % It also divides the training set into mini-batches. | ||
# % Inputs: | ||
# % N: Mini-batch size. | ||
# % Outputs: | ||
# % train_input: An array of size D X N X M, where | ||
# % D: number of input dimensions (in this case, 3). | ||
# % N: size of each mini-batch (in this case, 100). | ||
# % M: number of minibatches. | ||
# % train_target: An array of size 1 X N X M. | ||
# % valid_input: An array of size D X number of points in the validation set. | ||
# % test: An array of size D X number of points in the test set. | ||
# % vocab: Vocabulary containing index to word mapping. | ||
|
||
data.mat <- readMat("Neural Net Language Model/data.mat") | ||
data <- list(testData = (data.mat$data[1,1,1][[1]]), | ||
trainData = (data.mat$data[2,1,1][[1]]), | ||
validData = (data.mat$data[3,1,1][[1]]), | ||
vocab = unlist(data.mat$data[4,1,1]) | ||
) | ||
|
||
numdims = nrow(data$trainData) | ||
D = numdims - 1 # subtract 1 because 1:D is the number of input words and D is the predicted word | ||
M = floor(ncol(data$trainData) / N) | ||
|
||
# shift to an list of M minibatches, each with D*N | ||
# looks like we threw out the remainder training data | ||
splitMatrixIntoBatch <- function(dat, b, N, byCol=TRUE) { | ||
# N is the size of each batch | ||
# b is the requested batch | ||
if(length(dim(dat)) == 0) { | ||
if(byCol) dim(dat) <- c(1, length(dat)) else dim(dat) <- c(length(dat), 1) | ||
} | ||
start <- ((b - 1) * N) + 1 | ||
end <- b * N | ||
|
||
if(byCol) return(dat[,start:end]) else return(dat[start:end,]) | ||
} | ||
train_input <- mclapply(1:M, splitMatrixIntoBatch, N=N, dat=data$trainData[1:D,], byCol=TRUE) | ||
train_target <- mclapply(1:M, splitMatrixIntoBatch, N=N, dat=data$trainData[D+1,], byCol=TRUE) | ||
|
||
valid_input <- (data$validData[1:D,]) | ||
valid_target <- data$validData[D + 1,] | ||
|
||
test_input <- (data$testData[1:D,]) | ||
test_target <- data$testData[D + 1,] | ||
|
||
vocab <- data$vocab | ||
|
||
return(list(train_input=train_input, | ||
train_target=train_target, | ||
valid_input=valid_input, | ||
valid_target=valid_target, | ||
test_input=test_input, | ||
test_target=test_target, | ||
vocab=vocab)) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# Necessary matlab functions | ||
|
||
# Matlab helper functions | ||
# Functions to replicate various matlab commands | ||
|
||
# most are found in the matlab library | ||
library(matlab) | ||
|
||
# HELPER FUNCTIONS | ||
randn <- function(x, y) matrix(data=rnorm(x*y), nrow=x, ncol=y) | ||
myPrintf <- function(txt, ...) writeLines(sprintf(txt, ...), sep="", con=stdout(), useBytes=TRUE) | ||
|
Oops, something went wrong.