Skip to content

Cuda and OpenCL fixes #8

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion lib/CUDAKernels/kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ __device__ __forceinline__ double RSQRT(double val) { return rsqrt(val); }
// template<> __device__ __forceinline__ double RSQRT(double val) { return 1.0/sqrt(val); }



#if __CUDA_ARCH__ < 600
__device__ double atomicAdd(double* address, double val)
{
unsigned long long int* address_as_ull =
Expand All @@ -220,6 +220,7 @@ __device__ double atomicAdd(double* address, double val)
} while (assumed != old);
return __longlong_as_double(old);
}
#endif


__device__ __forceinline__ double atomicMin(double *address, double val)
Expand Down
2 changes: 1 addition & 1 deletion lib/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ NVCCVERSION=$(shell "${NVCC}" --version | grep ^Cuda | sed 's/^.* //g')
ifeq "${NVCCVERSION}" "V5.5.22"
NVCCFLAGS ?= -arch sm_20
else
NVCCFLAGS ?= -arch sm_30
NVCCFLAGS ?= -arch sm_50
endif

#NVCCFLAGS = -arch sm_35
Expand Down
4 changes: 3 additions & 1 deletion lib/include/cudadev.h
Original file line number Diff line number Diff line change
Expand Up @@ -710,12 +710,14 @@ namespace dev {
// jitOptionCount++;
// }



#if CUDA_VERSION < 6000
if(computeMode < CU_TARGET_COMPUTE_20)
{
fprintf(stderr,"Sapporo2 requires at least a Fermi or newer NVIDIA architecture.\n");
exit(-1);
}
#endif

//Set the architecture
// {
Expand Down
9 changes: 8 additions & 1 deletion lib/include/defines.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,15 @@ inline const char* get_kernelName(const int integrator,
case SIXTH:
if(precision == DOUBLESINGLE)
{
#ifdef _OCL_
fprintf(stderr, "ERROR: Sixth order integrator with double single precision");
fprintf(stderr, "ERROR: is not implemented in OpenCL, only in CUDA. Please");
fprintf(stderr, "ERROR: file an issue on GitHub if you need this combination.");
exit(1);
#else
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess there's no need for the else? Given that there's the exit above? In that case maybe change the #else into #endif

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem here actually was that float3 was undefined when compiling with OpenCL, causing a compiler error. So we could move the perThreadSM = ... line outside of the #ifdef, but then it wouldn't compile for OpenCL.

I think I later saw a header somewhere that aliases some OpenCL types to CUDA-like names, so maybe float3 can be added there to fix it instead, I'll have a look.

perThreadSM = sizeof(float4)*2 + sizeof(float4) + sizeof(float3);
return "dev_evaluate_gravity_sixth_DS";
#endif
return "dev_evaluate_gravity_sixth_DS";
}
else if(precision == DOUBLE){
#ifdef _OCL_
Expand Down
4 changes: 2 additions & 2 deletions lib/include/ocldev.h
Original file line number Diff line number Diff line change
Expand Up @@ -574,8 +574,8 @@ namespace dev {
void copy(const memory &src, const cl_bool OCL_BLOCKING = CL_TRUE) {
assert(ContextFlag);
if (n != src.n) {
ocl_free();
cmalloc(src.n, DeviceMemFlags);
ocl_free();
allocate(src.n, DeviceMemFlags);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this looks to be the right thing todo.

}
oclSafeCall(clEnqueueCopyBuffer(CommandQueue,
src.DeviceMem,
Expand Down