Skip to content
This repository was archived by the owner on May 3, 2024. It is now read-only.

Commit 0a68b83

Browse files
authored
Merge pull request #35 from ROCmSoftwarePlatform/rocrand
Rocrand
2 parents 2bab3f0 + 117f540 commit 0a68b83

10 files changed

+147
-468
lines changed

include/caffe/internal_thread.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ namespace caffe {
1818
*/
1919
class InternalThread {
2020
public:
21-
InternalThread() : thread_() {}
21+
InternalThread() : device_(0), thread_() {}
2222
virtual ~InternalThread();
2323

2424
/**
@@ -34,6 +34,8 @@ class InternalThread {
3434
bool is_started() const;
3535

3636
protected:
37+
int device_;
38+
3739
/* Implement this method in your subclass
3840
with the code you want your thread to run. */
3941
virtual void InternalThreadEntry() {}

include/caffe/layers/cudnn_conv_layer.hpp

Lines changed: 10 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@ namespace caffe {
2929
template <typename Dtype>
3030
class CuDNNConvolutionLayer : public ConvolutionLayer<Dtype> {
3131
public:
32-
explicit CuDNNConvolutionLayer(const LayerParameter& param)
33-
: ConvolutionLayer<Dtype>(param), handles_setup_(false) {}
32+
explicit CuDNNConvolutionLayer(const LayerParameter& param);
33+
3434
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
3535
const vector<Blob<Dtype>*>& top);
3636
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
@@ -43,49 +43,32 @@ class CuDNNConvolutionLayer : public ConvolutionLayer<Dtype> {
4343
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
4444
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
4545

46-
4746
bool handles_setup_;
4847

4948
#ifdef USE_MIOPEN
50-
miopenHandle_t* handle_;
51-
hipStream_t* stream_;
5249

5350
// algorithms for forward and backwards convolutions
54-
miopenConvFwdAlgorithm_t* fwd_algo_;
55-
miopenConvBwdWeightsAlgorithm_t* bwd_weight_algo_;
56-
miopenConvBwdDataAlgorithm_t* bwd_data_algo_;
51+
vector<miopenConvFwdAlgorithm_t> fwd_algo_;
52+
vector<miopenConvBwdWeightsAlgorithm_t> bwd_weight_algo_;
53+
vector<miopenConvBwdDataAlgorithm_t> bwd_data_algo_;
5754

5855
vector<miopenTensorDescriptor_t> bottom_descs_, top_descs_;
5956
miopenTensorDescriptor_t bias_desc_;
6057
miopenTensorDescriptor_t filter_desc_;
6158
vector<miopenConvolutionDescriptor_t> conv_descs_;
6259

6360
int N_, C_, W_, H_;
64-
#endif
65-
66-
#ifdef USE_CUDNN
67-
cudnnHandle_t* handle_;
68-
cudaStream_t* stream_;
69-
70-
// algorithms for forward and backwards convolutions
71-
cudnnConvolutionFwdAlgo_t *fwd_algo_;
72-
cudnnConvolutionBwdFilterAlgo_t *bwd_filter_algo_;
73-
cudnnConvolutionBwdDataAlgo_t *bwd_data_algo_;
74-
75-
vector<cudnnTensorDescriptor_t> bottom_descs_, top_descs_;
76-
cudnnTensorDescriptor_t bias_desc_;
77-
cudnnFilterDescriptor_t filter_desc_;
78-
vector<cudnnConvolutionDescriptor_t> conv_descs_;
61+
miopenHandle_t handle_;
7962
#endif
8063

8164
int bottom_offset_, top_offset_, bias_offset_;
8265

83-
size_t *workspace_fwd_sizes_;
84-
size_t *workspace_bwd_data_sizes_;
85-
size_t *workspace_bwd_filter_sizes_;
66+
vector<size_t> workspace_fwd_sizes_;
67+
vector<size_t> workspace_bwd_filter_sizes_;
68+
vector<size_t> workspace_bwd_data_sizes_;
8669
size_t workspaceSizeInBytes; // size of underlying storage
8770
void *workspaceData; // underlying storage
88-
void **workspace; // aliases into workspaceData
71+
vector<void*> workspace; // aliases into workspaceData
8972
};
9073
#endif
9174

src/caffe/internal_thread.cpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,18 +20,15 @@ bool InternalThread::must_stop() {
2020

2121
void InternalThread::StartInternalThread() {
2222
CHECK(!is_started()) << "Threads should persist and not be restarted.";
23+
LOG(INFO) << "Starting internal thread on device " << device_;
2324

24-
int device = 0;
25-
#ifndef CPU_ONLY
26-
HIP_CHECK(hipGetDevice(&device));
27-
#endif
2825
Caffe::Brew mode = Caffe::mode();
2926
int rand_seed = caffe_rng_rand();
3027
int solver_count = Caffe::solver_count();
3128
bool root_solver = Caffe::root_solver();
3229

3330
try {
34-
thread_.reset(new boost::thread(&InternalThread::entry, this, device, mode,
31+
thread_.reset(new boost::thread(&InternalThread::entry, this, device_, mode,
3532
rand_seed, solver_count, root_solver));
3633
} catch (std::exception& e) {
3734
LOG(FATAL) << "Thread exception: " << e.what();
@@ -40,6 +37,7 @@ void InternalThread::StartInternalThread() {
4037

4138
void InternalThread::entry(int device, Caffe::Brew mode, int rand_seed,
4239
int solver_count, bool root_solver) {
40+
LOG(INFO) << "Started internal thread on device " << device;
4341
#ifndef CPU_ONLY
4442
HIP_CHECK(hipSetDevice(device));
4543
#endif

src/caffe/layers/base_data_layer.cpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -75,10 +75,7 @@ void BasePrefetchingDataLayer<Dtype>::LayerSetUp(
7575
template <typename Dtype>
7676
void BasePrefetchingDataLayer<Dtype>::InternalThreadEntry() {
7777
#ifndef CPU_ONLY
78-
hipStream_t stream;
79-
if (Caffe::mode() == Caffe::GPU) {
80-
HIP_CHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking));
81-
}
78+
hipStream_t stream = nullptr;
8279
#endif
8380

8481
try {
@@ -98,7 +95,8 @@ void BasePrefetchingDataLayer<Dtype>::InternalThreadEntry() {
9895
}
9996
#ifndef CPU_ONLY
10097
if (Caffe::mode() == Caffe::GPU) {
101-
HIP_CHECK(hipStreamDestroy(stream));
98+
if (stream != nullptr)
99+
HIP_CHECK(hipStreamDestroy(stream));
102100
}
103101
#endif
104102
}

0 commit comments

Comments
 (0)