Skip to content

Commit 3935714

Browse files
author
Fabian Tschopp
committed
Merge pull request weiliu89#1 from jyegerlehner/timing_sync
fix caffe time command.
2 parents c70039b + 1f1801b commit 3935714

File tree

3 files changed

+17
-0
lines changed

3 files changed

+17
-0
lines changed

include/caffe/common.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,8 @@ class Caffe {
162162
static void EnumerateDevices();
163163
// Prepares contexts for devices to use
164164
static void SetDevices(std::vector<int> device_ids);
165+
// Finish executing gpu kernels on the specified-device.
166+
static void Synchronize(int device_id);
165167

166168
// Get a device context
167169
static DeviceContext& GetDeviceContext(int id);

src/caffe/common.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,19 @@ void Caffe::set_random_seed(const unsigned int seed) {
150150
Get().random_generator_.reset(new RNG(seed));
151151
}
152152

153+
void Caffe::Synchronize(int device_id) {
154+
#ifdef USE_GREENTEA
155+
DeviceContext& device_context = Caffe::GetDeviceContext(device_id);
156+
if ( device_context.backend() == BACKEND_OpenCL ) {
157+
viennacl::ocl::context &ctx = viennacl::ocl::get_context(
158+
GetDeviceContext(device_id).id());
159+
ctx.get_queue().finish();
160+
}
161+
#else
162+
(void) device_id;
163+
#endif
164+
}
165+
153166
void Caffe::EnumerateDevices() {
154167
int cuda_device_count = 0;
155168
int greentea_device_count = 0;

tools/caffe.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,7 @@ int time() {
253253
for (int i = 0; i < layers.size(); ++i) {
254254
timer.Start();
255255
layers[i]->Forward(bottom_vecs[i], top_vecs[i]);
256+
Caffe::Synchronize(FLAGS_gpu);
256257
forward_time_per_layer[i] += timer.MicroSeconds();
257258
}
258259
forward_time += forward_timer.MicroSeconds();
@@ -261,6 +262,7 @@ int time() {
261262
timer.Start();
262263
layers[i]->Backward(top_vecs[i], bottom_need_backward[i],
263264
bottom_vecs[i]);
265+
Caffe::Synchronize(FLAGS_gpu);
264266
backward_time_per_layer[i] += timer.MicroSeconds();
265267
}
266268
backward_time += backward_timer.MicroSeconds();

0 commit comments

Comments
 (0)