treecode · LourensVeen · Oct 31, 2023 · Oct 30, 2023 · Oct 31, 2023 · jbedorf
diff --git a/lib/CUDAKernels/kernels.cu b/lib/CUDAKernels/kernels.cu
@@ -206,7 +206,7 @@ __device__ __forceinline__  double RSQRT(double val) { return rsqrt(val); }
 // template<>           __device__ __forceinline__  double RSQRT(double val) { return 1.0/sqrt(val); }
 
 
-
+#if __CUDA_ARCH__ < 600
 __device__ double atomicAdd(double* address, double val)
 {
     unsigned long long int* address_as_ull =
@@ -220,6 +220,7 @@ __device__ double atomicAdd(double* address, double val)
     } while (assumed != old);
     return __longlong_as_double(old);
 }
+#endif
 
 
 __device__ __forceinline__ double atomicMin(double *address, double val)

diff --git a/lib/Makefile b/lib/Makefile
@@ -41,7 +41,7 @@ NVCCVERSION=$(shell "${NVCC}" --version | grep ^Cuda | sed 's/^.* //g')
 ifeq "${NVCCVERSION}" "V5.5.22"
 	NVCCFLAGS ?= -arch sm_20
 else
-	NVCCFLAGS ?= -arch sm_30
+	NVCCFLAGS ?= -arch sm_50
 endif
 
 #NVCCFLAGS = -arch sm_35

diff --git a/lib/include/cudadev.h b/lib/include/cudadev.h
@@ -710,12 +710,14 @@ namespace dev {
         //           jitOptionCount++;
         //         }
 
-
+
+#if CUDA_VERSION < 6000
         if(computeMode < CU_TARGET_COMPUTE_20)
         {
           fprintf(stderr,"Sapporo2 requires at least a Fermi or newer NVIDIA architecture.\n");
           exit(-1);
         }
+#endif
 
         //Set the architecture
         //         {

diff --git a/lib/include/defines.h b/lib/include/defines.h
@@ -55,8 +55,15 @@ inline const char* get_kernelName(const int integrator,
     case SIXTH:
       if(precision == DOUBLESINGLE)
       {
+#ifdef _OCL_
+          fprintf(stderr, "ERROR: Sixth order integrator with double single precision");
+          fprintf(stderr, "ERROR: is not implemented in OpenCL, only in CUDA. Please");
+          fprintf(stderr, "ERROR: file an issue on GitHub if you need this combination.");
+          exit(1);
+#else
           perThreadSM = sizeof(float4)*2 + sizeof(float4) + sizeof(float3);
-	  return "dev_evaluate_gravity_sixth_DS"; 
+#endif
+          return "dev_evaluate_gravity_sixth_DS"; 
       }
       else if(precision == DOUBLE){
 #ifdef _OCL_

diff --git a/lib/include/ocldev.h b/lib/include/ocldev.h
@@ -574,8 +574,8 @@ namespace dev {
     void copy(const memory &src, const cl_bool OCL_BLOCKING = CL_TRUE) {
       assert(ContextFlag);
       if (n != src.n) {
-	ocl_free();
-	cmalloc(src.n, DeviceMemFlags);
+        ocl_free();
+        allocate(src.n, DeviceMemFlags);
       }
       oclSafeCall(clEnqueueCopyBuffer(CommandQueue,
                                       src.DeviceMem,