Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Useful fixes for good wrapper OpenCL #3

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions src/opencl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@ struct Device_Info {
uint is_fp64_capable=0u, is_fp32_capable=0u, is_fp16_capable=0u, is_int64_capable=0u, is_int32_capable=0u, is_int16_capable=0u, is_int8_capable=0u;
uint cores=0u; // for CPUs, compute_units is the number of threads (twice the number of cores with hyperthreading)
float tflops=0.0f; // estimated device FP32 floating point performance in TeraFLOPs/s
inline Device_Info(const cl::Device& cl_device, const cl::Context& cl_context, const uint id) {
this->cl_device = cl_device; // see https://www.khronos.org/registry/OpenCL/sdk/1.2/docs/man/xhtml/clGetDeviceInfo.html
this->cl_context = cl_context;
inline Device_Info(const cl::Device& cl_device, const cl::Context& cl_context, const uint id)
: cl_device(cl_device), // see https://www.khronos.org/registry/OpenCL/sdk/1.2/docs/man/xhtml/clGetDeviceInfo.html
cl_context(cl_context)
{
this->id = id;
name = trim(cl_device.getInfo<CL_DEVICE_NAME>()); // device name
vendor = trim(cl_device.getInfo<CL_DEVICE_VENDOR>()); // device vendor
Expand Down Expand Up @@ -78,7 +79,7 @@ struct Device_Info {
}
intel_gpu_above_4gb_patch = (intel==8.0f)&&(memory>4096); // enable memory allocations greater than 4GB for Intel GPUs with >4GB VRAM
}
inline Device_Info() {}; // default constructor
inline Device_Info() = default; // default constructor
};

string get_opencl_c_code(); // implemented in kernel.hpp
Expand All @@ -105,7 +106,7 @@ inline vector<Device_Info> get_devices(const bool print_info=true) { // returns
//cl::Context cl_context(cl_devices); // same cl::Context for all devices (allocates extra VRAM on all other unused Nvidia GPUs)
for(uint j=0u; j<(uint)cl_devices.size(); j++) {
cl::Context cl_context(cl_devices[j]); // separate cl::Context for each device
devices.push_back(Device_Info(cl_devices[j], cl_context, id++));
devices.emplace_back(cl_devices[j], cl_context, id++);
}
}
if((uint)cl_platforms.size()==0u||(uint)devices.size()==0u) {
Expand Down Expand Up @@ -168,9 +169,8 @@ class Device {
;}
public:
Device_Info info;
inline Device(const Device_Info& info, const string& opencl_c_code=get_opencl_c_code()) {
print_device_info(info);
this->info = info;
inline Device(const Device_Info& info, const string& opencl_c_code=get_opencl_c_code()) : info(info) {
print_device_info(this->info);
this->cl_queue = cl::CommandQueue(info.cl_context, info.cl_device); // queue to push commands for the device
cl::Program::Sources cl_source;
const string kernel_code = enable_device_capabilities()+"\n"+opencl_c_code;
Expand All @@ -193,7 +193,7 @@ class Device {
#endif // PTX
this->exists = true;
}
inline Device() {} // default constructor
inline Device() = default; // default constructor
inline void barrier(const vector<Event>* event_waitlist=nullptr, Event* event_returned=nullptr) { cl_queue.enqueueBarrierWithWaitList(event_waitlist, event_returned); }
inline void finish_queue() { cl_queue.finish(); }
inline cl::Context get_cl_context() const { return info.cl_context; }
Expand Down Expand Up @@ -260,7 +260,7 @@ template<typename T> class Memory {
external_host_buffer = true;
write_to_device();
}
inline Memory() {} // default constructor
inline Memory() = default; // default constructor
inline ~Memory() {
delete_buffers();
}
Expand Down