diff --git a/Hello_World/Makefile b/Hello_World/Makefile new file mode 100644 index 0000000..d6fedae --- /dev/null +++ b/Hello_World/Makefile @@ -0,0 +1,11 @@ +CFLAGS=-I /opt/amdgpu-pro/include -DCL_TARGET_OPENCL_VERSION=200 +LDFLAGS=-L /opt/amdgpu-pro/lib64 -lOpenCL + +hello: hello.c + cc hello.c -o hello $(CFLAGS) $(LDFLAGS) + +run: hello + ./hello + +clean: + rm -f hello diff --git a/Hello_World/hello.c b/Hello_World/hello.c index 49350af..2393ff4 100644 --- a/Hello_World/hello.c +++ b/Hello_World/hello.c @@ -58,7 +58,11 @@ #include #include #include -#include +#ifdef MAC +#include +#else +#include +#endif //////////////////////////////////////////////////////////////////////////////// @@ -95,6 +99,7 @@ int main(int argc, char** argv) size_t global; // global domain size for our calculation size_t local; // local domain size for our calculation + cl_platform_id cpPlatform; // OpenCL platform cl_device_id device_id; // compute device id cl_context context; // compute context cl_command_queue commands; // compute command queue @@ -114,13 +119,15 @@ int main(int argc, char** argv) // Connect to a compute device // int gpu = 1; - err = clGetDeviceIDs(NULL, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, &device_id, NULL); + // bind to platform + err = clGetPlatformIDs(1, &cpPlatform, NULL); + err = clGetDeviceIDs(cpPlatform, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, &device_id, NULL); if (err != CL_SUCCESS) { printf("Error: Failed to create a device group!\n"); return EXIT_FAILURE; } - + // Create a compute context // context = clCreateContext(0, 1, &device_id, NULL, NULL, &err); @@ -132,7 +139,7 @@ int main(int argc, char** argv) // Create a command commands // - commands = clCreateCommandQueue(context, device_id, 0, &err); + commands = clCreateCommandQueueWithProperties(context, device_id, 0, &err); if (!commands) { printf("Error: Failed to create a command commands!\n"); diff --git a/README.md b/README.md index b1c5f1e..8cda78a 100644 --- a/README.md +++ b/README.md @@ -5,16 +5,19 @@ Simple examples of OpenCL code, which I am using to learn heterogeneous and GPU # Examples included -- `add_numbers`: add a list of numbers together. Includes detailed error handling which makes the code harder to read and understand -- `square_array`: computes *array*^2 (I am playing mostly with this one) -- `sum_array`: sums two arrays +`*` -- reproduced to work in 2025 on AMD GPU + +- *`add_numbers`: add a list of numbers together. Includes detailed error handling which makes the code harder to read and understand +- *`square_array`: computes *array*^2 (I am playing mostly with this one) +- *`sum_array`: sums two arrays - `cf4cl`: testing OpenCL C wrapper -- `Hello_World`: OpenCL "Hello World" by Apple -- `mandelbrot`: my attempt at a simple Mandelbrot set calculation +- *`Hello_World`: OpenCL "Hello World" by Apple +- *`mandelbrot`: my attempt at a simple Mandelbrot set calculation - `N-BodySimulation`: Apple's N-body simulator which clearly illustrates the speedup gained by using the GPU. Requires Xcode - `RayTraced_Quaternion_Julia-Set_Example`: Apple - `rng`: Illustrates how to generate random numbers in the host and in the GPU using the library [`clRNG`](http://clmathlibraries.github.io/clRNG/htmldocs/index.html) - `auger`: generates random cosmic rays on an isotropic sky +- *`waste`: compute cycle waster The examples that clearly demonstrate the computational advantage of using a GPU for processing are `N-BodySimulation`, `RayTraced_Quaternion_Julia-Set_Example` (both developed by Apple programmers) and `auger`. For `auger`, I got impressive speedups of >200x compared to a serial code on the CPU. diff --git a/RayTraced_Quaternion_Julia-Set_Example/CMakeLists.txt b/RayTraced_Quaternion_Julia-Set_Example/CMakeLists.txt new file mode 100644 index 0000000..ca3ebd2 --- /dev/null +++ b/RayTraced_Quaternion_Julia-Set_Example/CMakeLists.txt @@ -0,0 +1,24 @@ +cmake_minimum_required(VERSION 3.2) +set(OpenGL_GL_PREFERENCE GLVND) +set(NAME ray_traced_julia_set) + +project(${NAME} C) + +find_package(OpenCL REQUIRED) +find_package(OpenGL REQUIRED) +find_package(GLUT REQUIRED) + +add_executable(${NAME} qjulia.c) +target_include_directories(${NAME} PRIVATE ${OpenCL_INCLUDE_DIRS} ${GLUT_INCLUDE_DIR} ${OPENGL_INCLUDE_DIR}) +target_link_libraries(${NAME} m ${OpenCL_LIBRARY} ${OPENGL_LIBRARIES} ${GLUT_LIBRARIES}) + +configure_file(qjulia_kernel.cl qjulia_kernel.cl COPYONLY) + + +message(OPENGL_INCLUDE_DIR: {${OPENGL_INCLUDE_DIR}}) +message(OpenCL_INCLUDE_DIRS: {${OpenCL_INCLUDE_DIRS}}) +message(GLUT_INCLUDE_DIR: {${GLUT_INCLUDE_DIR}}) + +message(OpenCL_LIBRARY: {${OpenCL_LIBRARY}}) +message(OPENGL_LIBRARIES: {${OPENGL_LIBRARIES}}) +message(GLUT_LIBRARIES: {${GLUT_LIBRARIES}}) diff --git a/RayTraced_Quaternion_Julia-Set_Example/Makefile b/RayTraced_Quaternion_Julia-Set_Example/Makefile new file mode 100644 index 0000000..022ca27 --- /dev/null +++ b/RayTraced_Quaternion_Julia-Set_Example/Makefile @@ -0,0 +1,27 @@ +NAME=ray_traced_julia_set + +OS = $(shell uname -s) + +CFLAGS=-std=c99 -Wall -DUNIX -g -DDEBUG -I /opt/amdgpu-pro/include -DCL_TARGET_OPENCL_VERSION=200 +# No OpenCL +# CFLAGSS=-std=c99 -Wall -DUNIX -g -DDEBUG +# +# LDFLAGS=-L /opt/amdgpu-pro/lib64 -lOpenCL +# + +ifeq ($(OS),Linux) + #LIBS := -lOpenCL -lOpenGL -lGLU -lXi -lXmu -lglut + LIBS := -I /opt/amdgpu-pro/include -DCL_TARGET_OPENCL_VERSION=200 -L /opt/amdgpu-pro/lib64 -lOpenCL -lOpenGL -lGLU -lXi -lXmu -lglut +else + LIBS := -framework OpenCL -framework OpenGL -framework GLUT +endif + +all: $(NAME) + +$(NAME): qjulia.c qjulia_kernel.cl + gcc qjulia.c $(LIBS) -o $(NAME) + +fclean : + @/bin/rm -f $(NAME) + +re : fclean all diff --git a/RayTraced_Quaternion_Julia-Set_Example/README.md b/RayTraced_Quaternion_Julia-Set_Example/README.md index 5f4cfb9..ea3ad4c 100644 --- a/RayTraced_Quaternion_Julia-Set_Example/README.md +++ b/RayTraced_Quaternion_Julia-Set_Example/README.md @@ -1 +1,47 @@ -OpenCL RayTraced Quaternion Julia-Set Example ============================================= # DESCRIPTION: This example shows how to use OpenCL to raytrace a 4d quaternion Julia-Set fractal and intermix the results of a compute kernel with OpenGL for rendering. For theory and information regarding 4d quaternion julia-sets consult the following: http://local.wasp.uwa.edu.au/~pbourke/fractals/quatjulia/ http://www.omegafield.net/library/dynamical/quaternion_julia_sets.pdf http://www.evl.uic.edu/files/pdf/Sandin.RayTracerJuliaSetsbw.pdf http://www.cs.caltech.edu/~keenan/project_qjulia.html Note that the .cl compute kernel file(s) are loaded and compiled at runtime. The example source assumes that these files are in the same path as the built executable. For simplicity, this example is intended to be run from the command line. If run from within XCode, open the Run Log (Command-Shift-R) to see the output. Alternatively, run the applications from within a Terminal.app session to launch from the command line. Example downloaded from the [Apple OpenCL Developer website](https://developer.apple.com/opencl/). # BUILD REQUIREMENTS: Mac OS X v10.7 or later This demo uses float3 vector datatype which is only supported 10.7 and later. # RUNTIME REQUIREMENTS: Mac OS X v10.7 or later with OpenCL 1.1 # PACKAGING LIST: qjulia.c qjulia.xcodeproj qjulia_kernel.cl # CHANGES FROM PREVIOUS VERSIONS: Version 1.0 - First version. - - - Copyright (C) 2008 Apple Inc. All rights reserved. \ No newline at end of file +OpenCL RayTraced Quaternion Julia-Set Example +============================================= + +# DESCRIPTION: + +This example shows how to use OpenCL to raytrace a 4d quaternion Julia-Set +fractal and intermix the results of a compute kernel with OpenGL for rendering. + +For theory and information regarding 4d quaternion julia-sets consult the following: + +- http://local.wasp.uwa.edu.au/~pbourke/fractals/quatjulia/ +- http://www.omegafield.net/library/dynamical/quaternion_julia_sets.pdf +- http://www.evl.uic.edu/files/pdf/Sandin.RayTracerJuliaSetsbw.pdf +- http://www.cs.caltech.edu/~keenan/project_qjulia.html + +Note that the .cl compute kernel file(s) are loaded and compiled at +runtime. The example source assumes that these files are in the same +path as the built executable. + +For simplicity, this example is intended to be run from the command line. +If run from within XCode, open the Run Log (Command-Shift-R) to see the +output. Alternatively, run the applications from within a Terminal.app +session to launch from the command line. + +Example downloaded from the [Apple OpenCL Developer website](https://developer.apple.com/opencl/). + +# BUILD REQUIREMENTS: + +Mac OS X v10.7 or later + +This demo uses float3 vector datatype which is only supported 10.7 and later. + +# RUNTIME REQUIREMENTS: + +Mac OS X v10.7 or later with OpenCL 1.1 + +# PACKAGING LIST: + +- qjulia.c +- qjulia.xcodeproj +- qjulia_kernel.cl + +# CHANGES FROM PREVIOUS VERSIONS: + +Version 1.0 - First version. + +Copyright (C) 2008 Apple Inc. All rights reserved. diff --git a/RayTraced_Quaternion_Julia-Set_Example/qjulia.c b/RayTraced_Quaternion_Julia-Set_Example/qjulia.c index 1efef2c..12adec5 100644 --- a/RayTraced_Quaternion_Julia-Set_Example/qjulia.c +++ b/RayTraced_Quaternion_Julia-Set_Example/qjulia.c @@ -63,18 +63,23 @@ #include #include #include +#include +#ifdef __APPLE__ #include #include #include #include #include - -#include +#else +#include +#include +#include +#endif //////////////////////////////////////////////////////////////////////////////// -#define USE_GL_ATTACHMENTS (1) // enable OpenGL attachments for Compute results +#define USE_GL_ATTACHMENTS (0) // enable OpenGL attachments for Compute results #define DEBUG_INFO (0) #define COMPUTE_KERNEL_FILENAME ("qjulia_kernel.cl") #define COMPUTE_KERNEL_METHOD_NAME ("QJuliaKernel") @@ -159,7 +164,7 @@ DivideUp(int a, int b) static uint64_t GetCurrentTime() { - return mach_absolute_time(); + return time(NULL); } static double @@ -167,14 +172,6 @@ SubtractTime( uint64_t uiEndTime, uint64_t uiStartTime ) { static double s_dConversion = 0.0; uint64_t uiDifference = uiEndTime - uiStartTime; - if( 0.0 == s_dConversion ) - { - mach_timebase_info_data_t kTimebase; - kern_return_t kError = mach_timebase_info( &kTimebase ); - if( kError == 0 ) - s_dConversion = 1e-9 * (double) kTimebase.numer / (double) kTimebase.denom; - } - return s_dConversion * (double) uiDifference; } @@ -302,8 +299,7 @@ CreateTexture(uint width, uint height) TextureWidth = width; TextureHeight = height; - - glActiveTextureARB(ActiveTextureUnit); + glGenTextures(1, &TextureId); glBindTexture(TextureTarget, TextureId); glTexParameteri(TextureTarget, GL_TEXTURE_WRAP_S, GL_CLAMP); @@ -611,7 +607,7 @@ SetupComputeDevices(int gpu) // Create a context containing the compute device(s) // - ComputeContext = clCreateContext(0, 1, &ComputeDeviceId, clLogMessagesToStdoutAPPLE, NULL, &err); + ComputeContext = clCreateContext(0, 1, &ComputeDeviceId, NULL, NULL, &err); if (!ComputeContext) { printf("Error: Failed to create a compute context!\n"); diff --git a/add_numbers/Makefile b/add_numbers/Makefile old mode 100755 new mode 100644 index 5cb9fa7..66d0fba --- a/add_numbers/Makefile +++ b/add_numbers/Makefile @@ -2,10 +2,13 @@ PROJ=add_numbers CC=gcc -CFLAGS=-std=c99 -Wall -DUNIX -g -DDEBUG +#CFLAGS=-std=c99 -Wall -DUNIX -g -DDEBUG +CFLAGS=-std=c99 -Wall -DUNIX -g -DDEBUG -I /opt/amdgpu-pro/include -DCL_TARGET_OPENCL_VERSION=200 # No OpenCL CFLAGSS=-std=c99 -Wall -DUNIX -g -DDEBUG +LDFLAGS=-L /opt/amdgpu-pro/lib64 -lOpenCL + # Check for 32-bit vs 64-bit PROC_TYPE = $(strip $(shell uname -m | grep 64)) @@ -26,7 +29,8 @@ ifneq ($(DARWIN),) else # Linux OS -LIBS=-lOpenCL +#LIBS=-lOpenCL +LIBS=$(LDFLAGS) ifeq ($(PROC_TYPE),) CFLAGS+=-m32 else @@ -54,6 +58,9 @@ endif $(PROJ): $(PROJ).c $(CC) $(CFLAGS) -o $@ $^ $(INC_DIRS:%=-I%) $(LIB_DIRS:%=-L%) $(LIBS) +run: $(PROJ) + ./$(PROJ) + .PHONY: clean clean: diff --git a/add_numbers/add_numbers.c b/add_numbers/add_numbers.c index e85274f..2242b09 100755 --- a/add_numbers/add_numbers.c +++ b/add_numbers/add_numbers.c @@ -194,7 +194,7 @@ int main() { Does not support profiling or out-of-order-execution */ - queue = clCreateCommandQueue(context, device, 0, &err); + queue = clCreateCommandQueueWithProperties(context, device, 0, &err); if(err < 0) { perror("Couldn't create a command queue"); exit(1); diff --git a/auger/Makefile b/auger/Makefile index f1e313b..eb246ca 100644 --- a/auger/Makefile +++ b/auger/Makefile @@ -1,13 +1,24 @@ # C CC = gcc -CFLAGS = -O2 -std=c99 -Wall -DUNIX -g -DDEBUG +#CFLAGS = -O2 -std=c99 -Wall -DUNIX -g -DDEBUG +CFLAGS = -O2 -std=c99 -Wall -DUNIX -g -DDEBUG \ + -I /opt/amdgpu-pro/include -DCL_TARGET_OPENCL_VERSION=200 \ + -I . \ + -I clRNG \ + -I clRNG-1.0.0-beta-Linux64/include + +LDFLAGS=-L /opt/amdgpu-pro/lib64 -lOpenCL # Fortran FC = gfortran FFLAGS = -O # Additional libraries and source files -LIBS = -lm -lclRNG +#LIBS = -lm -lclRNG +LIBS = -lm \ + -L . -L clRNG -L clRNG-1.0.0-beta-Linux64/lib64 -lclRNG \ + $(LDFLAGS) + SRCS = clbuild.c # Check for 32-bit vs 64-bit @@ -23,7 +34,8 @@ ifneq ($(DARWIN),) LIBS += -framework OpenCL else # Linux OS - LIBS += -lOpenCL + #LIBS += -lOpenCL + LIBS += $(LDFLAGS) ifeq ($(PROC_TYPE),) CFLAGS+=-m32 else diff --git a/auger/cr.cl b/auger/cr.cl index 0e3d851..bcfa446 100644 --- a/auger/cr.cl +++ b/auger/cr.cl @@ -8,9 +8,10 @@ a random UHECR position. - n: total number of cosmic rays desired */ -#include - +//#include #include "exposure.clh" +#include "mrg31k3p.clh" + @@ -44,4 +45,4 @@ __kernel void cr(__global clrngMrg31k3pHostStream* streams, __global float* xa, break; } } -} \ No newline at end of file +} diff --git a/cf4cl/READMe.md b/cf4cl/README.md similarity index 81% rename from cf4cl/READMe.md rename to cf4cl/README.md index c37ef52..981dae4 100644 --- a/cf4cl/READMe.md +++ b/cf4cl/README.md @@ -5,6 +5,8 @@ Example on how to use the `cf4ocl` OpenCL wrapper for C. This is a library suppo Code taken from the [`cf4ocl` tutorial](http://www.fakenmc.com/cf4ocl/docs/latest/tut.html). +`cf4ocl` -- now archived on [GitHub](https://github.com/nunofachada/cf4ocl/releases/) + How to run: 1. Compile with `./make.sh` @@ -21,4 +23,4 @@ List of available OpenCL devices: (?) Select device (0-1) > ``` -You need a comparable number of lines of `cf4ocl` code compared to opencl, so I do not see immediately an advantage… \ No newline at end of file +You need a comparable number of lines of `cf4ocl` code compared to opencl, so I do not see immediately an advantage… diff --git a/mandelbrot/Makefile b/mandelbrot/Makefile old mode 100755 new mode 100644 index 8272bd7..09f4524 --- a/mandelbrot/Makefile +++ b/mandelbrot/Makefile @@ -2,10 +2,13 @@ PROJ=mandelbrot CC=gcc -CFLAGS=-std=c99 -Wall -DUNIX -g -DDEBUG +#CFLAGS=-std=c99 -Wall -DUNIX -g -DDEBUG +CFLAGS=-std=c99 -Wall -DUNIX -g -DDEBUG -I /opt/amdgpu-pro/include -DCL_TARGET_OPENCL_VERSION=200 # No OpenCL CFLAGSS=-std=c99 -Wall -DUNIX -g -DDEBUG +LDFLAGS=-L /opt/amdgpu-pro/lib64 -lOpenCL + # Check for 32-bit vs 64-bit PROC_TYPE = $(strip $(shell uname -m | grep 64)) @@ -26,7 +29,8 @@ ifneq ($(DARWIN),) else # Linux OS -LIBS=-lOpenCL +#LIBS=-lOpenCL +LIBS=-lm $(LDFLAGS) ifeq ($(PROC_TYPE),) CFLAGS+=-m32 else @@ -54,6 +58,9 @@ endif $(PROJ): $(PROJ).c $(CC) $(CFLAGS) -o $@ $^ clbuild.c $(INC_DIRS:%=-I%) $(LIB_DIRS:%=-L%) $(LIBS) +run: $(PROJ) + ./$(PROJ) + .PHONY: clean clean: diff --git a/mandelbrot/mandelbrot.c b/mandelbrot/mandelbrot.c old mode 100755 new mode 100644 index 1650d13..70dab24 --- a/mandelbrot/mandelbrot.c +++ b/mandelbrot/mandelbrot.c @@ -4,7 +4,7 @@ #include "defs.h" - +#include int main() { @@ -59,7 +59,7 @@ int main() { program = build_program(context, device, PROGRAM_FILE); /* Create a command queue */ - queue = clCreateCommandQueue(context, device, 0, &err); + queue = clCreateCommandQueueWithProperties(context, device, 0, &err); diff --git a/square_array/Makefile b/square_array/Makefile old mode 100755 new mode 100644 index 090ba3b..198b6d5 --- a/square_array/Makefile +++ b/square_array/Makefile @@ -6,6 +6,8 @@ CFLAGS=-std=c99 -Wall -DUNIX -g -DDEBUG # No OpenCL CFLAGSS=-std=c99 -Wall -DUNIX -g -DDEBUG +LDFLAGS=-L /opt/amdgpu-pro/lib64 -lOpenCL -lm + # Check for 32-bit vs 64-bit PROC_TYPE = $(strip $(shell uname -m | grep 64)) @@ -26,7 +28,7 @@ ifneq ($(DARWIN),) else # Linux OS -LIBS=-lOpenCL +LIBS=$(LDFLAGS) ifeq ($(PROC_TYPE),) CFLAGS+=-m32 else @@ -54,6 +56,9 @@ endif $(PROJ): $(PROJ).c $(CC) $(CFLAGS) -o $@ $^ clbuild.c $(INC_DIRS:%=-I%) $(LIB_DIRS:%=-L%) $(LIBS) +run: $(PROJ) + ./$(PROJ) + .PHONY: clean clean: diff --git a/square_array/square.c b/square_array/square.c old mode 100755 new mode 100644 index bf09b6e..995fd29 --- a/square_array/square.c +++ b/square_array/square.c @@ -3,6 +3,7 @@ #include "defs.h" +#include int main() { @@ -47,7 +48,7 @@ int main() { program = build_program(context, device, PROGRAM_FILE); /* Create a command queue */ - queue = clCreateCommandQueue(context, device, 0, &err); + queue = clCreateCommandQueueWithProperties(context, device, 0, &err); /* Create data buffer Create the input and output arrays in device memory for our @@ -94,7 +95,7 @@ int main() { err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local_size), &local_size, NULL); // Number of total work items - localSize must be devisor global_size = ceil(ARRAY_SIZE/(float)local_size)*local_size; - printf("global=%u, local=%u\n", global_size, local_size); + printf("global=%zu, local=%zu\n", global_size, local_size); //size_t global_size[3] = {ARRAY_SIZE, 0, 0}; // for 3D data //size_t local_size[3] = {WG_SIZE, 0, 0}; diff --git a/square_array/square.cl b/square_array/square.cl old mode 100755 new mode 100644 diff --git a/sum_array/Makefile b/sum_array/Makefile old mode 100755 new mode 100644 index ad118ca..96ad826 --- a/sum_array/Makefile +++ b/sum_array/Makefile @@ -1,5 +1,7 @@ PROJ=vecAdd +LDFLAGS=-L /opt/amdgpu-pro/lib64 -lOpenCL + CC=gcc CFLAGS=-std=c99 -Wall -DUNIX -g -DDEBUG @@ -26,7 +28,7 @@ ifneq ($(DARWIN),) else # Linux OS -LIBS=-lOpenCL +LIBS=-L /opt/amdgpu-pro/lib64 -lOpenCL -lm ifeq ($(PROC_TYPE),) CFLAGS+=-m32 else @@ -54,6 +56,9 @@ endif $(PROJ): $(PROJ).c $(CC) $(CFLAGS) -o $@ $^ $(INC_DIRS:%=-I%) $(LIB_DIRS:%=-L%) $(LIBS) +run: $(PROJ) + ./$(PROJ) + .PHONY: clean clean: diff --git a/sum_array/vecAdd.c b/sum_array/vecAdd.c index 35a6332..88007c7 100644 --- a/sum_array/vecAdd.c +++ b/sum_array/vecAdd.c @@ -11,6 +11,21 @@ #include #endif +// OpenCL kernel. Each work item takes care of one element of c +const char *kernelSource = + "#pragma OPENCL EXTENSION cl_khr_fp64 : enable \n" + "__kernel void vecAdd( __global double *a, \n" + " __global double *b, \n" + " __global double *c, \n" + " const unsigned int n) \n" + "{ \n" + " //Get our global thread ID \n" + " int id = get_global_id(0); \n" + " \n" + " //Make sure we do not go out of bounds \n" + " if (id < n) \n" + " c[id] = a[id] + b[id]; \n" + "} \n"; int main( int argc, char* argv[] ) { @@ -71,7 +86,7 @@ int main( int argc, char* argv[] ) context = clCreateContext(0, 1, &device_id, NULL, NULL, &err); // Create a command queue - queue = clCreateCommandQueue(context, device_id, 0, &err); + queue = clCreateCommandQueueWithProperties(context, device_id, 0, &err); // Create the compute program from the source buffer program = clCreateProgramWithSource(context, 1, @@ -132,4 +147,4 @@ int main( int argc, char* argv[] ) free(h_c); return 0; -} \ No newline at end of file +} diff --git a/waste/Makefile b/waste/Makefile old mode 100755 new mode 100644 index 435766a..0d70794 --- a/waste/Makefile +++ b/waste/Makefile @@ -6,6 +6,8 @@ CFLAGS=-std=c99 -Wall -DUNIX -g -DDEBUG # No OpenCL CFLAGSS=-std=c99 -Wall -DUNIX -g -DDEBUG +LDFLAGS=-L /opt/amdgpu-pro/lib64 -lOpenCL -lm + # Check for 32-bit vs 64-bit PROC_TYPE = $(strip $(shell uname -m | grep 64)) @@ -26,7 +28,7 @@ ifneq ($(DARWIN),) else # Linux OS -LIBS=-lOpenCL +LIBS=$(LDFLAGS) ifeq ($(PROC_TYPE),) CFLAGS+=-m32 else @@ -54,6 +56,9 @@ endif $(PROJ): $(PROJ).c $(CC) $(CFLAGS) -o $@ $^ clbuild.c $(INC_DIRS:%=-I%) $(LIB_DIRS:%=-L%) $(LIBS) +run: $(PROJ) + ./$(PROJ) 10 + .PHONY: clean clean: diff --git a/waste/waste.c b/waste/waste.c old mode 100755 new mode 100644 index 1731b89..3d4e3c8 --- a/waste/waste.c +++ b/waste/waste.c @@ -62,7 +62,7 @@ int main(int argc, char *argv[]) { program = build_program(context, device, PROGRAM_FILE); // Create a command queue - queue = clCreateCommandQueue(context, device, 0, &err); + queue = clCreateCommandQueueWithProperties(context, device, 0, &err); @@ -163,9 +163,9 @@ int main(int argc, char *argv[]) { clEnqueueReadBuffer(queue, doutput, CL_TRUE, 0, bytes, houtput, 0, NULL, NULL); // <=====GET OUTPUT /* Check result */ - /*for (i=0; i