-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathFProp_Old.R
112 lines (96 loc) · 5.24 KB
/
FProp_Old.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# library(Matrix)
#library(matrixStats)
fprop <- function(input_batch, weights, fn) {
# % This method forward propagates through a neural network.
# % Inputs:
# % input_batch: The input data as a matrix of size numwords X batchsize where,
# % numwords is the number of words, batchsize is the number of data points.
# % So, if input_batch(i, j) = k then the ith word in data point j is word
# % index k of the vocabulary.
# %
# % word_embedding_weights: Word embedding as a matrix of size
# % vocab_size X numhid1, where vocab_size is the size of the vocabulary
# % numhid1 is the dimensionality of the embedding space.
# %
# % embed_to_hid_weights: Weights between the word embedding layer and hidden
# % layer as a matrix of size numhid1*numwords X numhid2, numhid2 is the
# % number of hidden units.
# %
# % hid_to_output_weights: Weights between the hidden layer and output softmax
# % unit as a matrix of size numhid2 X vocab_size
# %
# % hid_bias: Bias of the hidden layer as a matrix of size numhid2 X 1.
# %
# % output_bias: Bias of the output layer as a matrix of size vocab_size X 1.
# %
# % Outputs:
# % embedding_layer_state: State of units in the embedding layer as a matrix of
# % size numhid1*numwords X batchsize
# %
# % hidden_layer_state: State of units in the hidden layer as a matrix of size
# % numhid2 X batchsize
# %
# % output_layer_state: State of units in the output layer as a matrix of size
# % vocab_size X batchsize
# %
tmp <- dim(input_batch) # basically dim
numwords <- tmp[1]
batchsize <- tmp[2]
tmp <- dim(weights$word_embedding) # basically dim
vocab_size <- tmp[1]
numhid1 <- tmp[2]
numhid2 <- ncol(weights$embed_to_hid)
# %% COMPUTE STATE OF WORD EMBEDDING LAYER.
# % Look up the inputs word indices in the word_embedding_weights matrix.
# each row of the word weights corresponds to a word (250 total)
# input_batch contains 300 total words (3 * 100 batchsize)
# each element of input_batch is a number between 1 and 249 (250?), corresponding to a word
#embedding_layer_state2 <- matrix(as.numeric(weights$word_embedding[as.integer(input_batch), ]), nrow=numhid1 * numwords)
embedding_layer_state <- myReshape(weights$word_embedding[as.integer(input_batch),], nrows=numhid1 * numwords)
#embedding_layer_state2 <- matlab::reshape(as.matrix(weights$word_embedding[as.numeric(input_batch), ]), numhid1*numwords, 100)
# %% COMPUTE STATE OF HIDDEN LAYER.
# % Compute inputs to hidden units.
# crossprod = t(x) %*% y
inputs_to_hidden_units = myCrossProd(weights$embed_to_hid, embedding_layer_state) + fn(weights$hid_bias, 1, batchsize)
# benchmark(
# tmp1 <- repmat(weights$hid_bias, 1, batchsize),
# tmp2 <- myRepMat4(weights$hid_bias, 1, batchsize),
# replications=10
# )
# % Apply logistic activation function.
# % FILL IN CODE. Replace the line below by one of the options.
# hidden_layer_state = zeros(numhid2, batchsize)
# % Options
# % (a) hidden_layer_state = 1 ./ (1 + exp(inputs_to_hidden_units));
# % (b) hidden_layer_state = 1 ./ (1 - exp(-inputs_to_hidden_units));
hidden_layer_state = 1 / (1 + exp(-inputs_to_hidden_units))
# % (d) hidden_layer_state = -1 ./ (1 + exp(-inputs_to_hidden_units));
# %% COMPUTE STATE OF OUTPUT LAYER.
# % Compute inputs to softmax.
# % FILL IN CODE. Replace the line below by one of the options.
# inputs_to_softmax = zeros(vocab_size, batchsize)
# % Options
inputs_to_softmax = myCrossProd(weights$hid_to_output, hidden_layer_state) + fn(weights$output_bias, 1, batchsize)
# % (b) inputs_to_softmax = t(hid_to_output_weights) %*% hidden_layer_state + repmat(output_bias, batchsize, 1);
# % (c) inputs_to_softmax = hidden_layer_state %*% t(hid_to_output_weights) + repmat(output_bias, 1, batchsize);
# % (d) inputs_to_softmax = hid_to_output_weights %*% hidden_layer_state + repmat(output_bias, batchsize, 1);
# % Subtract maximum.
# % Remember that adding or subtracting the same constant from each input to a
# % softmax unit does not affect the outputs. Here we are subtracting maximum to
# % make all inputs <= 0. This prevents overflows when computing their
# % exponents.
# max in matlab returns max from each column by default
# benchmark(
# tmp <- apply(inputs_to_softmax, 2, max),
# tmp2 <- colMaxs(inputs_to_softmax),
# replications=10)
tmp <- apply(inputs_to_softmax, 2, max)
inputs_to_softmax = inputs_to_softmax - fn(tmp, vocab_size, 1)
# % Compute exp.
output_layer_state = exp(inputs_to_softmax)
# % Normalize to get probability distribution.
output_layer_state = output_layer_state / fn(colSums(output_layer_state), vocab_size, 1)
return(list(embedding_layer_state=embedding_layer_state,
hidden_layer_state=hidden_layer_state,
output_layer_state=output_layer_state))
}