diff --git a/src/opencl/opencl_device.cpp b/src/opencl/opencl_device.cpp index c8e99fc3..5f362f9e 100644 --- a/src/opencl/opencl_device.cpp +++ b/src/opencl/opencl_device.cpp @@ -1,6 +1,6 @@ /*************************************************************************** * penguinV: https://github.com/ihhub/penguinV * - * Copyright (C) 2017 - 2022 * + * Copyright (C) 2017 - 2024 * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * @@ -59,7 +59,8 @@ namespace struct MemsetKernelHolder { - MemsetKernelHolder() {} + MemsetKernelHolder() = default; + ~MemsetKernelHolder() { kernel.reset(); @@ -78,9 +79,10 @@ namespace multiCL::OpenCLDevice & device = multiCL::OpenCLDeviceManager::instance().device(); - std::map::const_iterator program = deviceProgram.find( device.deviceId() ); - if ( program != deviceProgram.cend() ) + auto program = deviceProgram.find( device.deviceId() ); + if ( program != deviceProgram.cend() ) { return *( program->second.kernel ); + } MemsetKernelHolder holder; holder.program = std::shared_ptr( new multiCL::OpenCLProgram( device.context(), memsetCode.data() ) ); @@ -101,7 +103,7 @@ namespace multiCL return OpenCLDeviceManager::instance().device( getDefaultDeviceId() ).allocator(); } - MemoryAllocator & memory( uint32_t deviceId ) + MemoryAllocator & memory( const uint32_t deviceId ) { return OpenCLDeviceManager::instance().device( deviceId ).allocator(); } @@ -134,55 +136,23 @@ namespace multiCL openCLCheck( error ); } - OpenCLContext::OpenCLContext( cl_context context ) - : _context( context ) - {} - OpenCLContext::~OpenCLContext() { clReleaseContext( _context ); } - OpenCLContext::OpenCLContext( const OpenCLContext & ) {} - - OpenCLContext & OpenCLContext::operator=( const OpenCLContext & ) - { - return ( *this ); - } - - cl_context OpenCLContext::operator()() const - { - return _context; - } - OpenCLQueue::OpenCLQueue( const OpenCLContext & context, cl_device_id deviceId ) { cl_int error; - _commandQueue = clCreateCommandQueue( context(), deviceId, 0, &error ); + _commandQueue = clCreateCommandQueueWithProperties( context(), deviceId, 0, &error ); openCLCheck( error ); } - OpenCLQueue::OpenCLQueue( cl_command_queue queue ) - : _commandQueue( queue ) - {} - OpenCLQueue::~OpenCLQueue() { clReleaseCommandQueue( _commandQueue ); } - OpenCLQueue::OpenCLQueue( const OpenCLQueue & ) {} - - OpenCLQueue & OpenCLQueue::operator=( const OpenCLQueue & ) - { - return ( *this ); - } - - cl_command_queue OpenCLQueue::operator()() const - { - return _commandQueue; - } - void OpenCLQueue::synchronize() { openCLCheck( clFinish( _commandQueue ) ); @@ -204,7 +174,7 @@ namespace multiCL openCLCheck( clGetContextInfo( context(), CL_CONTEXT_DEVICES, sizeof( cl_device_id ) * deviceCount, device.data(), NULL ) ); std::string fullLog; - for ( std::vector::iterator deviceId = device.begin(); deviceId != device.end(); ++deviceId ) { + for ( auto deviceId = device.begin(); deviceId != device.end(); ++deviceId ) { size_t logSize = 0; clGetProgramBuildInfo( _program, *deviceId, CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize ); @@ -223,33 +193,11 @@ namespace multiCL } } - OpenCLProgram::OpenCLProgram( cl_program program ) - : _program( program ) - {} - - OpenCLProgram::OpenCLProgram( OpenCLProgram && program ) - : _program( NULL ) - { - std::swap( _program, program._program ); - } - OpenCLProgram::~OpenCLProgram() { clReleaseProgram( _program ); } - OpenCLProgram::OpenCLProgram( const OpenCLProgram & ) {} - - OpenCLProgram & OpenCLProgram::operator=( const OpenCLProgram & ) - { - return ( *this ); - } - - cl_program OpenCLProgram::operator()() const - { - return _program; - } - OpenCLKernel::OpenCLKernel( const OpenCLProgram & program, const std::string & name ) : _parameterId( 0 ) { @@ -258,33 +206,11 @@ namespace multiCL openCLCheck( error ); } - OpenCLKernel::OpenCLKernel( cl_kernel kernel ) - : _kernel( kernel ) - , _parameterId( 0 ) - {} - OpenCLKernel::~OpenCLKernel() { clReleaseKernel( _kernel ); } - OpenCLKernel::OpenCLKernel( const OpenCLKernel & ) {} - - OpenCLKernel & OpenCLKernel::operator=( const OpenCLKernel & ) - { - return ( *this ); - } - - cl_kernel OpenCLKernel::operator()() const - { - return _kernel; - } - - void OpenCLKernel::reset() - { - _parameterId = 0; - } - void OpenCLKernel::_setArgument( size_t size, const void * data ) { openCLCheck( clSetKernelArg( _kernel, _parameterId, size, data ) ); @@ -305,34 +231,9 @@ namespace multiCL { delete _allocator; - for ( std::vector::iterator queueId = _queue.begin(); queueId != _queue.end(); ++queueId ) + for ( auto queueId = _queue.begin(); queueId != _queue.end(); ++queueId ) { delete ( *queueId ); - } - - OpenCLDevice::OpenCLDevice( const OpenCLDevice & ) - : _deviceId( NULL ) - , _context( _deviceId ) - , _currentQueueId( 0u ) - {} - - OpenCLDevice & OpenCLDevice::operator=( const OpenCLDevice & ) - { - return ( *this ); - } - - cl_device_id OpenCLDevice::deviceId() const - { - return _deviceId; - } - - OpenCLContext & OpenCLDevice::context() - { - return _context; - } - - const OpenCLContext & OpenCLDevice::context() const - { - return _context; + } } size_t OpenCLDevice::threadsPerBlock( const OpenCLKernel & kernel ) const @@ -356,10 +257,10 @@ namespace multiCL std::string OpenCLDevice::name() const { size_t nameLength = 0u; - std::vector deviceName; openCLCheck( clGetDeviceInfo( _deviceId, CL_DEVICE_NAME, 0, NULL, &nameLength ) ); + std::vector deviceName; deviceName.resize( nameLength ); openCLCheck( clGetDeviceInfo( _deviceId, CL_DEVICE_NAME, nameLength, deviceName.data(), NULL ) ); @@ -370,10 +271,10 @@ namespace multiCL std::string OpenCLDevice::computeCapability() const { size_t capabilityLength = 0u; - std::vector capability; openCLCheck( clGetDeviceInfo( _deviceId, CL_DEVICE_VERSION, 0, NULL, &capabilityLength ) ); + std::vector capability; capability.resize( capabilityLength ); openCLCheck( clGetDeviceInfo( _deviceId, CL_DEVICE_VERSION, capabilityLength, capability.data(), NULL ) ); @@ -383,88 +284,38 @@ namespace multiCL void OpenCLDevice::synchronize() { - for ( std::vector::iterator queueId = _queue.begin(); queueId != _queue.end(); ++queueId ) + for ( auto queueId = _queue.begin(); queueId != _queue.end(); ++queueId ) { ( *queueId )->synchronize(); - } - - size_t OpenCLDevice::currentQueueId() const - { - return _currentQueueId; - } - - void OpenCLDevice::setCurrentQueueId( size_t queueId ) - { - if ( _currentQueueId != queueId && queueId < _queue.size() ) - _currentQueueId = queueId; - } - - OpenCLQueue & OpenCLDevice::queue() - { - return *( _queue[_currentQueueId] ); - } - - const OpenCLQueue & OpenCLDevice::queue() const - { - return *( _queue[_currentQueueId] ); - } - - OpenCLQueue & OpenCLDevice::queue( size_t queueId ) - { - return *( _queue[queueId] ); - } - - const OpenCLQueue & OpenCLDevice::queue( size_t queueId ) const - { - return *( _queue[queueId] ); - } - - size_t OpenCLDevice::queueCount() const - { - return _queue.size(); + } } void OpenCLDevice::setQueueCount( size_t queueCount ) { - if ( queueCount > 255u ) // no real device needs more than 255 queues + // No real device needs more than 255 queues. + if ( queueCount > 255u ) { queueCount = 255u; + } if ( queueCount != _queue.size() ) { if ( queueCount > _queue.size() ) { - while ( queueCount != _queue.size() ) + while ( queueCount != _queue.size() ) { _queue.push_back( new OpenCLQueue( _context, _deviceId ) ); + } } else { - if ( _currentQueueId >= queueCount ) + if ( _currentQueueId >= queueCount ) { _currentQueueId = 0; + } - for ( std::vector::iterator queueId = _queue.begin() + static_cast( queueCount ); queueId != _queue.end(); ++queueId ) + for ( auto queueId = _queue.begin() + static_cast( queueCount ); queueId != _queue.end(); ++queueId ) { delete ( *queueId ); + } _queue.resize( queueCount ); } } } - MemoryAllocator & OpenCLDevice::allocator() - { - return *_allocator; - } - - const MemoryAllocator & OpenCLDevice::allocator() const - { - return *_allocator; - } - - OpenCLDeviceManager::OpenCLDeviceManager() - { - resetSupportedDevice(); - } - - OpenCLDeviceManager::~OpenCLDeviceManager() - { - closeDevices(); - } - OpenCLDeviceManager & OpenCLDeviceManager::instance() { static OpenCLDeviceManager manager; @@ -473,28 +324,34 @@ namespace multiCL void OpenCLDeviceManager::initializeDevices() { - for ( uint32_t deviceId = 0; deviceId < _supportedDeviceId.size(); ++deviceId ) + for ( uint32_t deviceId = 0; deviceId < _supportedDeviceId.size(); ++deviceId ) { initializeDevice( deviceId ); + } } void OpenCLDeviceManager::initializeDevice( uint32_t deviceId ) { - if ( deviceId >= _supportedDeviceId.size() ) + if ( deviceId >= _supportedDeviceId.size() ) { throw penguinVException( "System does not contain a device with such ID" ); + } - std::list::const_iterator foundDevice + auto foundDevice = std::find_if( _device.begin(), _device.end(), [&]( const OpenCLDevice * device ) { return device->deviceId() == _supportedDeviceId[deviceId]; } ); - if ( foundDevice == _device.end() ) + + if ( foundDevice == _device.end() ) { _device.push_back( new OpenCLDevice( _supportedDeviceId[deviceId] ) ); + } } void OpenCLDeviceManager::closeDevice( uint32_t deviceId ) { - if ( deviceId >= _supportedDeviceId.size() ) + if ( deviceId >= _supportedDeviceId.size() ) { throw penguinVException( "System does not contain a device with such ID" ); + } - std::list::iterator foundDevice + auto foundDevice = std::find_if( _device.begin(), _device.end(), [&]( const OpenCLDevice * device ) { return device->deviceId() == _supportedDeviceId[deviceId]; } ); + if ( foundDevice != _device.end() ) { delete ( *foundDevice ); _device.erase( foundDevice ); @@ -503,22 +360,13 @@ namespace multiCL void OpenCLDeviceManager::closeDevices() { - for ( std::list::iterator device = _device.begin(); device != _device.end(); ++device ) + for ( auto device = _device.begin(); device != _device.end(); ++device ) { delete ( *device ); + } _device.clear(); } - uint32_t OpenCLDeviceManager::deviceCount() const - { - return static_cast( _device.size() ); - } - - uint32_t OpenCLDeviceManager::supportedDeviceCount() const - { - return static_cast( _supportedDeviceId.size() ); - } - OpenCLDevice & OpenCLDeviceManager::device() { return device( getDefaultDeviceId() ); @@ -531,36 +379,44 @@ namespace multiCL OpenCLDevice & OpenCLDeviceManager::device( uint32_t deviceId ) { - if ( _device.empty() ) + if ( _device.empty() ) { throw penguinVException( "Device manager does not contain any devices" ); + } - std::list::iterator foundDevice + auto foundDevice = std::find_if( _device.begin(), _device.end(), [&]( const OpenCLDevice * device ) { return device->deviceId() == _supportedDeviceId[deviceId]; } ); - if ( foundDevice == _device.end() ) + + if ( foundDevice == _device.end() ) { throw penguinVException( "Device ID is invalid. Please check that you initialize devices!" ); + } return *( *foundDevice ); } const OpenCLDevice & OpenCLDeviceManager::device( uint32_t deviceId ) const { - if ( _device.empty() ) + if ( _device.empty() ) { throw penguinVException( "Device manager does not contain any devices" ); + } - std::list::const_iterator foundDevice + auto foundDevice = std::find_if( _device.begin(), _device.end(), [&]( const OpenCLDevice * device ) { return device->deviceId() == _supportedDeviceId[deviceId]; } ); - if ( foundDevice == _device.end() ) + + if ( foundDevice == _device.end() ) { throw penguinVException( "Device ID is invalid. Please check that you initialize devices!" ); + } return *( *foundDevice ); } void OpenCLDeviceManager::setActiveDevice( uint32_t deviceId ) { - std::list::iterator foundDevice + auto foundDevice = std::find_if( _device.begin(), _device.end(), [&]( const OpenCLDevice * device ) { return device->deviceId() == _supportedDeviceId[deviceId]; } ); - if ( foundDevice == _device.end() ) + + if ( foundDevice == _device.end() ) { throw penguinVException( "Device ID is invalid. Please check that you initialize devices!" ); + } setDefaultDeviceId( deviceId ); } @@ -586,7 +442,7 @@ namespace multiCL const cl_device_type deviceType = ( isGPUSupportEnabled ? CL_DEVICE_TYPE_GPU : 0u ) + ( isCPUSupportEnabled ? CL_DEVICE_TYPE_CPU : 0u ); uint32_t supportedDeviceCount = 0u; - for ( std::vector::iterator platform = platformId.begin(); platform != platformId.end(); ++platform ) { + for ( auto platform = platformId.begin(); platform != platformId.end(); ++platform ) { uint32_t deviceCount = 0u; if ( openCLSafeCheck( clGetDeviceIDs( *platform, deviceType, 0, NULL, &deviceCount ) ) ) { _supportedDeviceId.resize( supportedDeviceCount + deviceCount ); diff --git a/src/opencl/opencl_device.h b/src/opencl/opencl_device.h index 87e4b48a..0a740618 100644 --- a/src/opencl/opencl_device.h +++ b/src/opencl/opencl_device.h @@ -1,6 +1,6 @@ /*************************************************************************** * penguinV: https://github.com/ihhub/penguinV * - * Copyright (C) 2017 - 2022 * + * Copyright (C) 2017 - 2024 * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * @@ -20,7 +20,7 @@ #pragma once -#define CL_USE_DEPRECATED_OPENCL_1_2_APIS +#define CL_TARGET_OPENCL_VERSION 210 #if defined( __APPLE__ ) || defined( __MACOSX ) #include @@ -45,7 +45,7 @@ namespace multiCL MemoryAllocator & memory(); // Returns memory allocator for specified device ID - MemoryAllocator & memory( uint32_t deviceId ); + MemoryAllocator & memory( const uint32_t deviceId ); void memorySet( cl_mem data, const void * pattern, size_t patternSize, size_t offset, size_t size ); } @@ -54,43 +54,82 @@ namespace multiCL { public: explicit OpenCLContext( cl_device_id deviceId ); - explicit OpenCLContext( cl_context context ); + + explicit OpenCLContext( cl_context context ) + : _context( context ) + { + // Do nothing. + } + + OpenCLContext( const OpenCLContext & ) = delete; + + OpenCLContext & operator=( const OpenCLContext & ) = delete; + ~OpenCLContext(); - cl_context operator()() const; + cl_context operator()() const + { + return _context; + } private: cl_context _context; - - OpenCLContext( const OpenCLContext & ); - OpenCLContext & operator=( const OpenCLContext & ); }; class OpenCLProgram { public: OpenCLProgram( const OpenCLContext & context, const char * program ); - explicit OpenCLProgram( cl_program program ); - OpenCLProgram( OpenCLProgram && program ); + + explicit OpenCLProgram( cl_program program ) + : _program( program ) + { + // Do nothing. + } + + OpenCLProgram( OpenCLProgram && program ) + : _program( NULL ) + { + std::swap( _program, program._program ); + } + + OpenCLProgram( const OpenCLProgram & ) = delete; + + OpenCLProgram & operator=( const OpenCLProgram & ) = delete; + ~OpenCLProgram(); - cl_program operator()() const; + cl_program operator()() const + { + return _program; + } private: cl_program _program; - - OpenCLProgram( const OpenCLProgram & ); - OpenCLProgram & operator=( const OpenCLProgram & ); }; class OpenCLKernel { public: OpenCLKernel( const OpenCLProgram & program, const std::string & name ); - explicit OpenCLKernel( cl_kernel kernel ); + + explicit OpenCLKernel( cl_kernel kernel ) + : _kernel( kernel ) + , _parameterId( 0 ) + { + // Do nothing. + } + + OpenCLKernel( const OpenCLKernel & ) = delete; + + OpenCLKernel & operator=( const OpenCLKernel & ) = delete; + ~OpenCLKernel(); - cl_kernel operator()() const; + cl_kernel operator()() const + { + return _kernel; + } template void setArgument( T value ) @@ -105,15 +144,15 @@ namespace multiCL setArgument( args... ); } - void reset(); + void reset() + { + _parameterId = 0; + } private: cl_kernel _kernel; cl_uint _parameterId; - OpenCLKernel( const OpenCLKernel & ); - OpenCLKernel & operator=( const OpenCLKernel & ); - void _setArgument( size_t size, const void * data ); }; @@ -121,18 +160,28 @@ namespace multiCL { public: OpenCLQueue( const OpenCLContext & context, cl_device_id deviceId ); - explicit OpenCLQueue( cl_command_queue queue ); + + explicit OpenCLQueue( cl_command_queue queue ) + : _commandQueue( queue ) + { + // Do nothing. + } + + OpenCLQueue( const OpenCLQueue & ) = delete; + + OpenCLQueue & operator=( const OpenCLQueue & ) = delete; + ~OpenCLQueue(); - cl_command_queue operator()() const; + cl_command_queue operator()() const + { + return _commandQueue; + } void synchronize(); private: cl_command_queue _commandQueue; - - OpenCLQueue( const OpenCLQueue & ); - OpenCLQueue & operator=( const OpenCLQueue & ); }; class OpenCLDevice @@ -140,50 +189,108 @@ namespace multiCL public: friend class OpenCLDeviceManager; + OpenCLDevice( const OpenCLDevice & ) = delete; + + OpenCLDevice & operator=( const OpenCLDevice & ) = delete; + ~OpenCLDevice(); // Device information - cl_device_id deviceId() const; + cl_device_id deviceId() const + { + return _deviceId; + } + + // Maximum available number of threads per block. + size_t threadsPerBlock( const OpenCLKernel & kernel ) const; - size_t threadsPerBlock( const OpenCLKernel & kernel ) const; // maximum available number of threads per block + // Total available memory in bytes. + uint64_t totalMemorySize() const; - uint64_t totalMemorySize() const; // total available memory in bytes std::string name() const; + std::string computeCapability() const; // Device manipulation void synchronize(); // synchronize all operations on device with CPU - OpenCLContext & context(); - const OpenCLContext & context() const; + OpenCLContext & context() + { + return _context; + } + + const OpenCLContext & context() const + { + return _context; + } - size_t currentQueueId() const; // current queue ID which is used as a default value in queue() function - void setCurrentQueueId( size_t queueId ); + // Current queue ID which is used as a default value in queue() function. + size_t currentQueueId() const + { + return _currentQueueId; + } - OpenCLQueue & queue(); // a reference to current queue - const OpenCLQueue & queue() const; + void setCurrentQueueId( const size_t queueId ) + { + if ( _currentQueueId != queueId && queueId < _queue.size() ) { + _currentQueueId = queueId; + } + } - OpenCLQueue & queue( size_t queueId ); // a reference to queue with specified ID - const OpenCLQueue & queue( size_t queueId ) const; + // A reference to current queue. + OpenCLQueue & queue() + { + return *( _queue[_currentQueueId] ); + } + + const OpenCLQueue & queue() const + { + return *( _queue[_currentQueueId] ); + } + + // A reference to queue with specified ID. + OpenCLQueue & queue( size_t queueId ) + { + return *( _queue[queueId] ); + } + + const OpenCLQueue & queue( size_t queueId ) const + { + return *( _queue[queueId] ); + } + + // Total number of queues. + size_t queueCount() const + { + return _queue.size(); + } - size_t queueCount() const; // total number of queues void setQueueCount( size_t queueCount ); - MemoryAllocator & allocator(); // memory allocator associated with device - const MemoryAllocator & allocator() const; + // Memory allocator associated with device. + MemoryAllocator & allocator() + { + return *_allocator; + } + + const MemoryAllocator & allocator() const + { + return *_allocator; + } private: cl_device_id _deviceId; OpenCLContext _context; size_t _currentQueueId; - std::vector _queue; // array of queues within the device - MemoryAllocator * _allocator; // memory allocator on current device + // Array of queues within the device. + std::vector _queue; + + // Memory allocator on the current device. + MemoryAllocator * _allocator; explicit OpenCLDevice( cl_device_id deviceId ); - OpenCLDevice( const OpenCLDevice & ); - OpenCLDevice & operator=( const OpenCLDevice & ); }; class OpenCLDeviceManager @@ -191,29 +298,61 @@ namespace multiCL public: static OpenCLDeviceManager & instance(); - void initializeDevices(); // initializes all devices available in system - void initializeDevice( uint32_t deviceId ); // initializes a device with specified ID - void closeDevice( uint32_t deviceId ); // closes initialized device with specified ID - void closeDevices(); // closes all devices initialized by manager + // Initializes all devices available in system. + void initializeDevices(); + + // Initializes a device with specified ID. + void initializeDevice( uint32_t deviceId ); - uint32_t deviceCount() const; // initialized devices via manager - uint32_t supportedDeviceCount() const; // maximum available devices in the system + // Closes initialized device with specified ID. + void closeDevice( uint32_t deviceId ); - OpenCLDevice & device(); // returns device within current thread - const OpenCLDevice & device() const; // returns device within current thread + // Closes all devices initialized by manager. + void closeDevices(); - OpenCLDevice & device( uint32_t deviceId ); // returns device with specified ID - const OpenCLDevice & device( uint32_t deviceId ) const; // returns device with specified ID + // Initialized devices via manager. + uint32_t deviceCount() const + { + return static_cast( _device.size() ); + } - void setActiveDevice( uint32_t deviceId ); // set device with specified ID as a active device in current thread + // Maximum available devices in the system. + uint32_t supportedDeviceCount() const + { + return static_cast( _supportedDeviceId.size() ); + } + + // Returns device within current thread. + OpenCLDevice & device(); + + // Returns device within current thread. + const OpenCLDevice & device() const; + + // Returns device with specified ID. + OpenCLDevice & device( uint32_t deviceId ); + + // Returns device with specified ID. + const OpenCLDevice & device( uint32_t deviceId ) const; + + // Set device with specified ID as a active device in current thread. + void setActiveDevice( uint32_t deviceId ); void resetSupportedDevice(); private: - OpenCLDeviceManager(); - ~OpenCLDeviceManager(); + OpenCLDeviceManager() + { + resetSupportedDevice(); + } + + ~OpenCLDeviceManager() + { + closeDevices(); + } std::vector _supportedDeviceId; - std::list _device; // a list of initialized devices + + // A list of initialized devices. + std::list _device; }; } diff --git a/src/opencl/opencl_helper.cpp b/src/opencl/opencl_helper.cpp index 33da7b90..4e2079ec 100644 --- a/src/opencl/opencl_helper.cpp +++ b/src/opencl/opencl_helper.cpp @@ -1,6 +1,6 @@ /*************************************************************************** * penguinV: https://github.com/ihhub/penguinV * - * Copyright (C) 2017 - 2022 * + * Copyright (C) 2017 - 2024 * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * @@ -27,14 +27,22 @@ namespace { + bool isGPUSupportEnabled = true; + bool isCPUSupportEnabled = false; + struct dim3 { - size_t x, y, z; - dim3( size_t vx = 1, size_t vy = 1, size_t vz = 1 ) + size_t x{ 1 }; + size_t y{ 1 }; + size_t z{ 1 }; + + dim3( const size_t vx = 1, const size_t vy = 1, const size_t vz = 1 ) : x( vx ) , y( vy ) , z( vz ) - {} + { + // Do nothing. + } }; // Helper functions for internal calculations @@ -74,40 +82,43 @@ namespace multiCL::openCLCheck( clWaitForEvents( 1, &waitingEvent ) ); } - - bool isGPUSupportEnabled = true; - bool isCPUSupportEnabled = false; } namespace multiCL { bool isOpenCLSupported() { - if ( !isGPUSupportEnabled && !isCPUSupportEnabled ) + if ( !isGPUSupportEnabled && !isCPUSupportEnabled ) { return false; + } cl_uint platformCount = 0u; - if ( !openCLSafeCheck( clGetPlatformIDs( 0, NULL, &platformCount ) ) ) + if ( !openCLSafeCheck( clGetPlatformIDs( 0, NULL, &platformCount ) ) ) { return false; + } - if ( platformCount == 0u ) + if ( platformCount == 0u ) { return false; + } std::vector platform( platformCount ); - if ( !openCLSafeCheck( clGetPlatformIDs( platformCount, platform.data(), NULL ) ) ) + if ( !openCLSafeCheck( clGetPlatformIDs( platformCount, platform.data(), NULL ) ) ) { return false; + } const cl_device_type deviceType = ( isGPUSupportEnabled ? CL_DEVICE_TYPE_GPU : 0u ) + ( isCPUSupportEnabled ? CL_DEVICE_TYPE_CPU : 0u ); - for ( std::vector::const_iterator singlePlatform = platform.begin(); singlePlatform != platform.end(); ++singlePlatform ) { + for ( auto singlePlatform = platform.begin(); singlePlatform != platform.end(); ++singlePlatform ) { cl_uint deviceCount = 0u; - if ( !openCLSafeCheck( clGetDeviceIDs( *singlePlatform, deviceType, 0, NULL, &deviceCount ) ) ) + if ( !openCLSafeCheck( clGetDeviceIDs( *singlePlatform, deviceType, 0, NULL, &deviceCount ) ) ) { continue; + } - if ( deviceCount > 0u ) + if ( deviceCount > 0u ) { return true; + } } return false; @@ -127,8 +138,9 @@ namespace multiCL void openCLCheck( cl_int error ) { - if ( error != CL_SUCCESS ) + if ( error != CL_SUCCESS ) { throw penguinVException( std::string( "Failed to run OpenCL function with error " ) + std::to_string( error ) ); + } } bool openCLSafeCheck( cl_int error ) @@ -146,14 +158,16 @@ namespace multiCL std::fstream file; file.open( fileName, std::fstream::in | std::fstream::binary ); - if ( !file ) + if ( !file ) { return OpenCLProgram( context, "" ); + } file.seekg( 0, file.end ); const std::streamoff fileLength = file.tellg(); - if ( fileLength == std::char_traits::pos_type( -1 ) ) + if ( fileLength == std::char_traits::pos_type( -1 ) ) { return OpenCLProgram( context, "" ); + } file.seekg( 0, file.beg ); @@ -169,8 +183,13 @@ namespace multiCL KernelParameters::KernelParameters() : dimensionCount( 1 ) { - dimensionSize[0] = dimensionSize[1] = dimensionSize[2] = 1u; - threadsPerBlock[0] = threadsPerBlock[1] = threadsPerBlock[2] = 1u; + dimensionSize[0] = 1u; + dimensionSize[1] = 1u; + dimensionSize[2] = 1u; + + threadsPerBlock[0] = 1u; + threadsPerBlock[1] = 1u; + threadsPerBlock[2] = 1u; } KernelParameters::KernelParameters( size_t sizeX, size_t threadsPerX ) @@ -179,9 +198,12 @@ namespace multiCL assert( ( sizeX >= threadsPerX ) && ( threadsPerX > 0 ) && ( ( sizeX % threadsPerX ) == 0 ) ); dimensionSize[0] = sizeX; - dimensionSize[1] = dimensionSize[2] = 1u; + dimensionSize[1] = 1u; + dimensionSize[2] = 1u; + threadsPerBlock[0] = threadsPerX; - threadsPerBlock[1] = threadsPerBlock[2] = 1u; + threadsPerBlock[1] = 1u; + threadsPerBlock[2] = 1u; } KernelParameters::KernelParameters( size_t sizeX, size_t sizeY, size_t threadsPerX, size_t threadsPerY ) @@ -193,6 +215,7 @@ namespace multiCL dimensionSize[0] = sizeX; dimensionSize[1] = sizeY; dimensionSize[2] = 1u; + threadsPerBlock[0] = threadsPerX; threadsPerBlock[1] = threadsPerY; threadsPerBlock[2] = 1u; @@ -207,6 +230,7 @@ namespace multiCL dimensionSize[0] = sizeX; dimensionSize[1] = sizeY; dimensionSize[2] = sizeZ; + threadsPerBlock[0] = threadsPerX; threadsPerBlock[1] = threadsPerY; threadsPerBlock[2] = threadsPerZ; diff --git a/src/opencl/opencl_helper.h b/src/opencl/opencl_helper.h index 6c2397c4..cda68e83 100644 --- a/src/opencl/opencl_helper.h +++ b/src/opencl/opencl_helper.h @@ -1,6 +1,6 @@ /*************************************************************************** * penguinV: https://github.com/ihhub/penguinV * - * Copyright (C) 2017 - 2022 * + * Copyright (C) 2017 - 2024 * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * @@ -20,6 +20,8 @@ #pragma once +#define CL_TARGET_OPENCL_VERSION 210 + #if defined( __APPLE__ ) || defined( __MACOSX ) #include #else @@ -38,8 +40,11 @@ namespace multiCL void enableDeviceSupport( bool enableGPUSupport = true, bool enableCPUSupport = false ); void getDeviceSupportStatus( bool & isGPUSupportActive, bool & isCPUSupportActive ); - void openCLCheck( cl_int error ); // validates cl_int value and throws an exception if the value is not CL_SUCCESS - bool openCLSafeCheck( cl_int error ); // validates cl_int and returns true if the error is CL_SUCCESS + // Validates cl_int value and throws an exception if the value is not CL_SUCCESS. + void openCLCheck( cl_int error ); + + // Validates cl_int and returns true if the error is CL_SUCCESS. + bool openCLSafeCheck( cl_int error ); OpenCLProgram CreateProgramFromFile( const std::string & fileName ); OpenCLProgram CreateProgramFromFile( const std::string & fileName, const OpenCLContext & context ); @@ -53,8 +58,12 @@ namespace multiCL KernelParameters( size_t sizeX, size_t sizeY, size_t sizeZ, size_t threadsPerX, size_t threadsPerY, size_t threadsPerZ ); // 3D cl_uint dimensionCount; - size_t dimensionSize[3]; // Global work size - size_t threadsPerBlock[3]; // Local work size + + // Global work size. + size_t dimensionSize[3]; + + // Local work size. + size_t threadsPerBlock[3]; }; // Helper function which returns calculated KernelParameters structure for kernel to be executed on current OpenCL device diff --git a/src/opencl/opencl_memory.h b/src/opencl/opencl_memory.h index fbf1fc60..6c679a03 100644 --- a/src/opencl/opencl_memory.h +++ b/src/opencl/opencl_memory.h @@ -1,6 +1,6 @@ /*************************************************************************** * penguinV: https://github.com/ihhub/penguinV * - * Copyright (C) 2017 - 2022 * + * Copyright (C) 2017 - 2024 * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * @@ -20,6 +20,8 @@ #pragma once +#define CL_TARGET_OPENCL_VERSION 210 + #if defined( __APPLE__ ) || defined( __MACOSX ) #include #else @@ -42,10 +44,15 @@ namespace multiCL , _availableSize( availableSpace ) { const cl_int error = clGetDeviceInfo( deviceId, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( cl_uint ), &_minimumSizeChunk, NULL ); - if ( error != CL_SUCCESS ) + if ( error != CL_SUCCESS ) { throw std::logic_error( "Cannot get an information about minimum allocation size on OpenCL device" ); + } } + MemoryAllocator( const MemoryAllocator & allocator ) = delete; + + MemoryAllocator & operator=( const MemoryAllocator & ) = delete; + virtual ~MemoryAllocator() { _free(); @@ -68,33 +75,35 @@ namespace multiCL cl_int error; cl_mem memory = clCreateSubBuffer( _data, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error ); - if ( error != CL_SUCCESS ) + if ( error != CL_SUCCESS ) { throw std::logic_error( "Cannot allocate a subbuffer memory for OpenCL device" ); + } - _allocatedChunk.insert( - std::pair>( memory, std::pair( *( _freeChunk[level].begin() ), level ) ) ); + _allocatedChunk.emplace( + memory, std::pair( *( _freeChunk[level].begin() ), level ) ); _freeChunk[level].erase( _freeChunk[level].begin() ); return memory; } } - // if no space is in preallocated memory just allocate as usual memory + // If no space is in preallocated memory just allocate as usual memory. cl_int error; cl_mem memory = clCreateBuffer( _context, CL_MEM_READ_WRITE, size, NULL, &error ); - if ( error != CL_SUCCESS ) + if ( error != CL_SUCCESS ) { throw std::logic_error( "Cannot allocate a memory for OpenCL device" ); + } return memory; } - // deallocates a memory by given memory structure pointer - // if a pointer points on allocated chuck of memory inside the allocator then - // the allocator just removes a reference to such area without any cost - // otherwise OpenCL specific function will be called + // Deallocates a memory by given memory structure pointer. + // If a pointer points on allocated chuck of memory inside the allocator then + // the allocator just removes a reference to such area without any cost, + // otherwise OpenCL specific function will be called. void free( cl_mem memory ) { if ( _data != NULL ) { - std::map>::iterator pos = _allocatedChunk.find( memory ); + auto pos = _allocatedChunk.find( memory ); if ( pos != _allocatedChunk.end() ) { _freeChunk[pos->second.second].insert( pos->second.first ); @@ -103,8 +112,9 @@ namespace multiCL } } - if ( clReleaseMemObject( memory ) != CL_SUCCESS ) + if ( clReleaseMemObject( memory ) != CL_SUCCESS ) { throw std::logic_error( "Cannot deallocate a memory for OpenCL device" ); + } } // returns maximum available space which could be allocated by allocator @@ -116,56 +126,55 @@ namespace multiCL private: cl_context _context; cl_uint _minimumSizeChunk; - cl_mem _data; // a pointer to memory allocated chunk - const size_t _availableSize; // maximum available memory size - // a map which holds an information about allocated memory in preallocated memory chunk - // first paramter is a pointer to allocated memory in OpenCL terms - // second parameter is an offset from preallocated memory - // third parameter is a power of 2 (level) + // A pointer to memory allocated chunk. + cl_mem _data; + + // Maximum available memory size. + const size_t _availableSize; + + // A map which holds an information about allocated memory in preallocated memory chunk. + // Key is a pointer to allocated memory in OpenCL terms. + // Value is an offset from preallocated memory and level. std::map> _allocatedChunk; - // true memory allocation on OpenCL devices + // True memory allocation on OpenCL devices. virtual void _allocate( size_t size ) { - if ( size > _availableSize ) + if ( size > _availableSize ) { throw std::logic_error( "Memory size to be allocated is bigger than available size on device" ); + } if ( _size != size && size > 0 ) { - if ( !_allocatedChunk.empty() ) + if ( !_allocatedChunk.empty() ) { throw std::logic_error( "Cannot free a memory on OpenCL device. Not all objects were previously deallocated from allocator." ); + } _free(); cl_int error; _data = clCreateBuffer( _context, CL_MEM_READ_WRITE, size, NULL, &error ); - if ( error != CL_SUCCESS ) + if ( error != CL_SUCCESS ) { throw std::logic_error( "Cannot allocate a memory for OpenCL device" ); + } _size = size; } } - // true memory deallocation on OpenCL device + // True memory deallocation on OpenCL device. virtual void _deallocate() { - if ( _data != NULL ) { + if ( _data != nullptr ) { cl_int error = clReleaseMemObject( _data ); - if ( error != CL_SUCCESS ) + if ( error != CL_SUCCESS ) { throw std::logic_error( "Cannot deallocate a memory for OpenCL device" ); - _data = NULL; + } + + _data = nullptr; } _allocatedChunk.clear(); } - - MemoryAllocator( const MemoryAllocator & allocator ) - : BaseMemoryAllocator( allocator ) - , _availableSize( 0 ) - {} - MemoryAllocator & operator=( const MemoryAllocator & ) - { - return ( *this ); - } }; } diff --git a/src/opencl/opencl_types.h b/src/opencl/opencl_types.h index 6f80c2f5..886c37a4 100644 --- a/src/opencl/opencl_types.h +++ b/src/opencl/opencl_types.h @@ -1,6 +1,6 @@ /*************************************************************************** * penguinV: https://github.com/ihhub/penguinV * - * Copyright (C) 2017 - 2022 * + * Copyright (C) 2017 - 2024 * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * @@ -35,24 +35,24 @@ namespace multiCL { public: Type() - : _data( NULL ) { _allocate(); } Type( const TData & in ) - : _data( NULL ) { _allocate(); _copyFrom( in ); } Type( Type && in ) - : _data( NULL ) { _swap( in ); } + // Copy constructor is disabled to avoid a situation of assigning this type as a kernel argument. + Type( const Type & ) = delete; + ~Type() { _free(); @@ -89,14 +89,14 @@ namespace multiCL return _data; } - // Use this function if you want to retrieve a value from device to host + // Use this function if you want to retrieve a value from device to host. TData get() const { return _copyTo(); } private: - cl_mem _data; + cl_mem _data{ NULL }; void _free() { @@ -125,68 +125,59 @@ namespace multiCL void _copyFrom( const TData & in ) { - if ( _data != NULL ) { - cl_int error = clEnqueueWriteBuffer( OpenCLDeviceManager::instance().device().queue()(), _data, CL_TRUE, 0, sizeof( TData ), &in, 0, NULL, NULL ); - if ( error != CL_SUCCESS ) - throw penguinVException( "Cannot copy a memory into OpenCL device" ); - } - else { + if ( _data == NULL ) { throw penguinVException( "Memory in OpenCL device is not allocated" ); } + + cl_int error = clEnqueueWriteBuffer( OpenCLDeviceManager::instance().device().queue()(), _data, CL_TRUE, 0, sizeof( TData ), &in, 0, NULL, NULL ); + if ( error != CL_SUCCESS ) { + throw penguinVException( "Cannot copy a memory into OpenCL device" ); + } } TData _copyTo() const { TData out; - if ( _data != NULL ) { - cl_int error = clEnqueueReadBuffer( OpenCLDeviceManager::instance().device().queue()(), _data, CL_TRUE, 0, sizeof( TData ), &out, 0, NULL, NULL ); - if ( error != CL_SUCCESS ) - throw penguinVException( "Cannot copy a memory from OpenCL device" ); - } - else { + if ( _data == NULL ) { throw penguinVException( "Memory in OpenCL device is not allocated" ); } + cl_int error = clEnqueueReadBuffer( OpenCLDeviceManager::instance().device().queue()(), _data, CL_TRUE, 0, sizeof( TData ), &out, 0, NULL, NULL ); + if ( error != CL_SUCCESS ) { + throw penguinVException( "Cannot copy a memory from OpenCL device" ); + } + return out; } - - Type( const Type & ) // copy constructor is disabled to avoid a situation of assigning this type as a kernel argument - {} }; - // A class which contains an array of values of specific type + // A class which contains an array of values of specific type. template class Array { public: - Array() - : _data( NULL ) - , _size( 0 ) - {} + Array() = default; Array( const std::vector & data ) - : _data( NULL ) - , _size( 0 ) { _allocate( data.size() ); _copyFrom( data ); } Array( size_t size ) - : _data( NULL ) - , _size( 0 ) { _allocate( size ); } Array( Array && in ) - : _data( NULL ) - , _size( 0 ) { _swap( in ); } + // Copy constructor is disabled to avoid a situation of assigning this type as a kernel argument. + Array( const Array & ) = delete; + ~Array() { _free(); @@ -224,7 +215,7 @@ namespace multiCL return _data; } - // Use this function if you want to retrieve a value from device to host + // Use this function if you want to retrieve a value from device to host. std::vector get() const { return _copyTo(); @@ -246,8 +237,8 @@ namespace multiCL } private: - cl_mem _data; - size_t _size; + cl_mem _data{ NULL }; + size_t _size{ 0 }; void _free() { @@ -285,8 +276,9 @@ namespace multiCL if ( _data != NULL && _size == data.size() ) { cl_int error = clEnqueueWriteBuffer( OpenCLDeviceManager::instance().device().queue()(), _data, CL_TRUE, 0, _size * sizeof( TData ), data.data(), 0, NULL, NULL ); - if ( error != CL_SUCCESS ) + if ( error != CL_SUCCESS ) { throw penguinVException( "Cannot copy a memory into OpenCL device" ); + } } } @@ -297,14 +289,12 @@ namespace multiCL if ( _data != NULL ) { cl_int error = clEnqueueReadBuffer( OpenCLDeviceManager::instance().device().queue()(), _data, CL_TRUE, 0, _size * sizeof( TData ), out.data(), 0, NULL, NULL ); - if ( error != CL_SUCCESS ) + if ( error != CL_SUCCESS ) { throw penguinVException( "Cannot copy a memory from OpenCL device" ); + } } return out; } - - Array( const Array & ) // copy constructor is disabled to avoid a situation of assigning this type as a kernel argument - {} }; }