Skip to content

Commit

Permalink
[src] Add nnet2 Chunking on GPU (#3761)
Browse files Browse the repository at this point in the history
  • Loading branch information
btiplitz authored and danpovey committed Dec 19, 2019
1 parent ba92f60 commit f1c9ae0
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 17 deletions.
8 changes: 4 additions & 4 deletions src/nnet2/nnet-compute-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ void UnitTestNnetCompute() {
return;
CuMatrix<BaseFloat> output1(num_output_rows, output_dim);
NnetComputation(*nnet, input, pad_input, &output1);

CuMatrix<BaseFloat> output2(output1.NumRows(), output1.NumCols());
int32 cur_input_pos = 0, cur_output_pos = 0;

Expand Down Expand Up @@ -98,11 +97,12 @@ void UnitTestNnetComputeChunked() {

int32 num_output_rows = num_feats;
CuMatrix<BaseFloat> cu_output1(num_output_rows, output_dim);
Matrix<BaseFloat> output2(num_output_rows, output_dim);
CuMatrix<BaseFloat> cu_output2(num_output_rows, output_dim);
NnetComputation(*nnet, input, pad_input, &cu_output1);
NnetComputationChunked(*nnet, Matrix<BaseFloat>(input), chunk_size,
&output2);
NnetComputationChunked(*nnet, CuMatrix<BaseFloat>(input), chunk_size,
&cu_output2);
Matrix<BaseFloat> output1(cu_output1);
Matrix<BaseFloat> output2(cu_output2);
AssertEqual(output1, output2);
for (int32 i = 0; i < output1.NumRows(); i++) {
// just double-check that the frames near the end are right, in case
Expand Down
10 changes: 5 additions & 5 deletions src/nnet2/nnet-compute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -167,15 +167,15 @@ void NnetComputation(const Nnet &nnet,
}

void NnetComputationChunked(const Nnet &nnet,
const Matrix<BaseFloat> &input, // features
const CuMatrixBase<BaseFloat> &input, // features
int32 chunk_size,
Matrix<BaseFloat> *output) {
CuMatrixBase<BaseFloat> *output) {
int32 num_rows,
num_chunks = ceil((BaseFloat)input.NumRows() / chunk_size),
dim = input.NumCols(),
left_context = nnet.LeftContext(),
right_context = nnet.RightContext();
Matrix<BaseFloat> full_input;
CuMatrix<BaseFloat> full_input;
num_rows = left_context + input.NumRows() + right_context;
full_input.Resize(num_rows, dim);
full_input.Range(left_context, input.NumRows(),
Expand All @@ -190,15 +190,15 @@ void NnetComputationChunked(const Nnet &nnet,
int32 index = i * chunk_size,
offset = std::min(num_rows - chunk_size * i,
left_context + chunk_size + right_context);
SubMatrix<BaseFloat> chunk_input(full_input, index, offset, 0, dim);
CuSubMatrix<BaseFloat> chunk_input(full_input, index, offset, 0, dim);
CuMatrix<BaseFloat> cu_chunk_input(chunk_input);

// Note: we have already accounted for input padding, so we pass
// pad_input==false to the NnetComputer.
NnetComputer nnet_computer(nnet, cu_chunk_input, false, NULL);
nnet_computer.Propagate();
CuMatrix<BaseFloat> cu_chunk_output(nnet_computer.GetOutput());
SubMatrix<BaseFloat> chunk_out(*output, i * chunk_size,
CuSubMatrix<BaseFloat> chunk_out(*output, i * chunk_size,
cu_chunk_output.NumRows(), 0,
cu_chunk_output.NumCols());
chunk_out.CopyFromMat(cu_chunk_output);
Expand Down
4 changes: 2 additions & 2 deletions src/nnet2/nnet-compute.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,9 @@ void NnetComputation(const Nnet &nnet,
input.NumRows().
*/
void NnetComputationChunked(const Nnet &nnet,
const Matrix<BaseFloat> &input, // features
const CuMatrixBase<BaseFloat> &input, // features
int32 chunk_size,
Matrix<BaseFloat> *output); // posteriors.
CuMatrixBase<BaseFloat> *output); // posteriors.

/** Does the neural net computation and backprop, given input and labels.
Note: if pad_input==true the number of rows of input should be the
Expand Down
12 changes: 6 additions & 6 deletions src/nnet2bin/nnet-am-compute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -115,15 +115,15 @@ int main(int argc, char *argv[]) {
}

Matrix<BaseFloat> output(output_frames, output_dim);
if (chunk_size > 0 && chunk_size < feats.NumRows()) {
NnetComputationChunked(nnet, feats, chunk_size, &output);
CuMatrix<BaseFloat> cu_feats(feats);
CuMatrix<BaseFloat> cu_output(output);
if (chunk_size > 0 && chunk_size < feats.NumRows()) {
NnetComputationChunked(nnet, cu_feats, chunk_size, &cu_output);
} else {
CuMatrix<BaseFloat> cu_feats(feats);
CuMatrix<BaseFloat> cu_output(output);
NnetComputation(nnet, cu_feats, pad_input, &cu_output);
output.CopyFromMat(cu_output);
}

cu_output.Swap(&output);

if (divide_by_priors) {
output.MulColsVec(inv_priors); // scales each column by the corresponding element
// of inv_priors.
Expand Down

0 comments on commit f1c9ae0

Please sign in to comment.