Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
bf5b277
qjulia.c - added LINUX/APPLE include switch, use time instead of mach…
MikhailPasechnik Sep 6, 2019
ac65984
added CMakeLists.txt and Makefile
MikhailPasechnik Sep 6, 2019
51b7ba0
fix Makefile fclean
MikhailPasechnik Sep 6, 2019
5cc7a3c
Fixes sum_array example
robstewart57 Jan 9, 2020
d0ec3b3
compile Hello_World on Linux
doctor-rd Mar 27, 2021
132f904
Merge branch 'rsnemmen:master' into master
doctor-rd Jul 26, 2021
492491c
Merge pull request #2 from MikhailPasechnik/master
smokhov Jun 25, 2023
c1e75fd
Merge pull request #1 from robstewart57/master
smokhov Jun 25, 2023
0984c5e
Merge pull request #6 from NAG-DevOps/hello-linux
smokhov Nov 26, 2023
7b5375f
Merge pull request #4 from NAG-DevOps/julia-set
smokhov Nov 26, 2023
95c5722
Merge pull request #5 from NAG-DevOps/sum-array
smokhov Nov 26, 2023
9affd70
[add_numbers] adjust compile flags and make it compile and work.
smokhov Jun 19, 2025
25d3c2e
[Hellow_World][build] adjust compile flags for it to compile
smokhov Jun 19, 2025
e75be63
[Hello_World][C] switch to cpPlatform for it to compile and work.
smokhov Jun 19, 2025
9a1a9b2
[RayTraced_Quaternion_Julia-Set_Example][build] make it compile
smokhov Jun 19, 2025
0b2c4e0
[RayTraced_Quaternion_Julia-Set_Example][README] dos2unix and format
smokhov Jun 19, 2025
ab3fae3
[auger][build] adjust CLFAGS and add clRNG
smokhov Jun 20, 2025
c480a28
[mandelbrot][build] adjust CLFAGS, LIBS, and includes.
smokhov Jun 20, 2025
d2664ab
[vecAdd][build] adjust LIBS to make it link and run
smokhov Jun 20, 2025
7b9c7af
[cf4ocl][README] fix casing and add link to cf4ocl
smokhov Jun 20, 2025
7b45dba
[square_array] chmod a-x
smokhov Jun 20, 2025
012ec9a
[square_array][build] add LDFLAGS to link proper OpenCL and math
smokhov Jun 20, 2025
c95273a
[square_array] fix compile warnings for OpenCL 2 API and size_t
smokhov Jun 20, 2025
5c1a244
[waste] chmod a-x
smokhov Jun 20, 2025
1ebf4ac
[waste][build] add LFDLAGS to link the proper libs
smokhov Jun 20, 2025
72a72aa
[waste] fix API to OpenCL 2 and attempt restore output printing
smokhov Jun 20, 2025
a9f0038
[README] update testing status on EL7
smokhov Jun 20, 2025
ff73d60
[README] fix a minor typo
smokhov Jun 23, 2025
8c647c3
[build] add 'run' target for external automation to some examples
smokhov Jun 23, 2025
7b7729a
[build][waste] the example requires an argument; update 'run'
smokhov Jun 23, 2025
13e0015
switch to OpenCL 2's clCreateCommandQueueWithProperties API
smokhov Jun 27, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions Hello_World/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
CFLAGS=-I /opt/amdgpu-pro/include -DCL_TARGET_OPENCL_VERSION=200
LDFLAGS=-L /opt/amdgpu-pro/lib64 -lOpenCL

hello: hello.c
cc hello.c -o hello $(CFLAGS) $(LDFLAGS)

run: hello
./hello

clean:
rm -f hello
15 changes: 11 additions & 4 deletions Hello_World/hello.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,11 @@
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <OpenCL/opencl.h>
#ifdef MAC
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif

////////////////////////////////////////////////////////////////////////////////

Expand Down Expand Up @@ -95,6 +99,7 @@ int main(int argc, char** argv)
size_t global; // global domain size for our calculation
size_t local; // local domain size for our calculation

cl_platform_id cpPlatform; // OpenCL platform
cl_device_id device_id; // compute device id
cl_context context; // compute context
cl_command_queue commands; // compute command queue
Expand All @@ -114,13 +119,15 @@ int main(int argc, char** argv)
// Connect to a compute device
//
int gpu = 1;
err = clGetDeviceIDs(NULL, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, &device_id, NULL);
// bind to platform
err = clGetPlatformIDs(1, &cpPlatform, NULL);
err = clGetDeviceIDs(cpPlatform, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, &device_id, NULL);
if (err != CL_SUCCESS)
{
printf("Error: Failed to create a device group!\n");
return EXIT_FAILURE;
}

// Create a compute context
//
context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
Expand All @@ -132,7 +139,7 @@ int main(int argc, char** argv)

// Create a command commands
//
commands = clCreateCommandQueue(context, device_id, 0, &err);
commands = clCreateCommandQueueWithProperties(context, device_id, 0, &err);
if (!commands)
{
printf("Error: Failed to create a command commands!\n");
Expand Down
13 changes: 8 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,19 @@ Simple examples of OpenCL code, which I am using to learn heterogeneous and GPU

# Examples included

- `add_numbers`: add a list of numbers together. Includes detailed error handling which makes the code harder to read and understand
- `square_array`: computes *array*^2 (I am playing mostly with this one)
- `sum_array`: sums two arrays
`*` -- reproduced to work in 2025 on AMD GPU

- *`add_numbers`: add a list of numbers together. Includes detailed error handling which makes the code harder to read and understand
- *`square_array`: computes *array*^2 (I am playing mostly with this one)
- *`sum_array`: sums two arrays
- `cf4cl`: testing OpenCL C wrapper
- `Hello_World`: OpenCL "Hello World" by Apple
- `mandelbrot`: my attempt at a simple Mandelbrot set calculation
- *`Hello_World`: OpenCL "Hello World" by Apple
- *`mandelbrot`: my attempt at a simple Mandelbrot set calculation
- `N-BodySimulation`: Apple's N-body simulator which clearly illustrates the speedup gained by using the GPU. Requires Xcode
- `RayTraced_Quaternion_Julia-Set_Example`: Apple
- `rng`: Illustrates how to generate random numbers in the host and in the GPU using the library [`clRNG`](http://clmathlibraries.github.io/clRNG/htmldocs/index.html)
- `auger`: generates random cosmic rays on an isotropic sky
- *`waste`: compute cycle waster

The examples that clearly demonstrate the computational advantage of using a GPU for processing are `N-BodySimulation`, `RayTraced_Quaternion_Julia-Set_Example` (both developed by Apple programmers) and `auger`. For `auger`, I got impressive speedups of >200x compared to a serial code on the CPU.

Expand Down
24 changes: 24 additions & 0 deletions RayTraced_Quaternion_Julia-Set_Example/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
cmake_minimum_required(VERSION 3.2)
set(OpenGL_GL_PREFERENCE GLVND)
set(NAME ray_traced_julia_set)

project(${NAME} C)

find_package(OpenCL REQUIRED)
find_package(OpenGL REQUIRED)
find_package(GLUT REQUIRED)

add_executable(${NAME} qjulia.c)
target_include_directories(${NAME} PRIVATE ${OpenCL_INCLUDE_DIRS} ${GLUT_INCLUDE_DIR} ${OPENGL_INCLUDE_DIR})
target_link_libraries(${NAME} m ${OpenCL_LIBRARY} ${OPENGL_LIBRARIES} ${GLUT_LIBRARIES})

configure_file(qjulia_kernel.cl qjulia_kernel.cl COPYONLY)


message(OPENGL_INCLUDE_DIR: {${OPENGL_INCLUDE_DIR}})
message(OpenCL_INCLUDE_DIRS: {${OpenCL_INCLUDE_DIRS}})
message(GLUT_INCLUDE_DIR: {${GLUT_INCLUDE_DIR}})

message(OpenCL_LIBRARY: {${OpenCL_LIBRARY}})
message(OPENGL_LIBRARIES: {${OPENGL_LIBRARIES}})
message(GLUT_LIBRARIES: {${GLUT_LIBRARIES}})
27 changes: 27 additions & 0 deletions RayTraced_Quaternion_Julia-Set_Example/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
NAME=ray_traced_julia_set

OS = $(shell uname -s)

CFLAGS=-std=c99 -Wall -DUNIX -g -DDEBUG -I /opt/amdgpu-pro/include -DCL_TARGET_OPENCL_VERSION=200
# No OpenCL
# CFLAGSS=-std=c99 -Wall -DUNIX -g -DDEBUG
#
# LDFLAGS=-L /opt/amdgpu-pro/lib64 -lOpenCL
#

ifeq ($(OS),Linux)
#LIBS := -lOpenCL -lOpenGL -lGLU -lXi -lXmu -lglut
LIBS := -I /opt/amdgpu-pro/include -DCL_TARGET_OPENCL_VERSION=200 -L /opt/amdgpu-pro/lib64 -lOpenCL -lOpenGL -lGLU -lXi -lXmu -lglut
else
LIBS := -framework OpenCL -framework OpenGL -framework GLUT
endif

all: $(NAME)

$(NAME): qjulia.c qjulia_kernel.cl
gcc qjulia.c $(LIBS) -o $(NAME)

fclean :
@/bin/rm -f $(NAME)

re : fclean all
48 changes: 47 additions & 1 deletion RayTraced_Quaternion_Julia-Set_Example/README.md
Original file line number Diff line number Diff line change
@@ -1 +1,47 @@
OpenCL RayTraced Quaternion Julia-Set Example=============================================# DESCRIPTION:This example shows how to use OpenCL to raytrace a 4d quaternion Julia-Set fractal and intermix the results of a compute kernel with OpenGL for rendering.For theory and information regarding 4d quaternion julia-sets consult the following:http://local.wasp.uwa.edu.au/~pbourke/fractals/quatjulia/http://www.omegafield.net/library/dynamical/quaternion_julia_sets.pdfhttp://www.evl.uic.edu/files/pdf/Sandin.RayTracerJuliaSetsbw.pdfhttp://www.cs.caltech.edu/~keenan/project_qjulia.htmlNote that the .cl compute kernel file(s) are loaded and compiled atruntime. The example source assumes that these files are in the same path as the built executable.For simplicity, this example is intended to be run from the command line.If run from within XCode, open the Run Log (Command-Shift-R) to see the output. Alternatively, run the applications from within a Terminal.app session to launch from the command line.Example downloaded from the [Apple OpenCL Developer website](https://developer.apple.com/opencl/).# BUILD REQUIREMENTS:Mac OS X v10.7 or laterThis demo uses float3 vector datatype which is only supported 10.7 and later.# RUNTIME REQUIREMENTS:Mac OS X v10.7 or later with OpenCL 1.1# PACKAGING LIST:qjulia.cqjulia.xcodeprojqjulia_kernel.cl# CHANGES FROM PREVIOUS VERSIONS:Version 1.0- First version.- - - Copyright (C) 2008 Apple Inc. All rights reserved.
OpenCL RayTraced Quaternion Julia-Set Example
=============================================

# DESCRIPTION:

This example shows how to use OpenCL to raytrace a 4d quaternion Julia-Set
fractal and intermix the results of a compute kernel with OpenGL for rendering.

For theory and information regarding 4d quaternion julia-sets consult the following:

- http://local.wasp.uwa.edu.au/~pbourke/fractals/quatjulia/
- http://www.omegafield.net/library/dynamical/quaternion_julia_sets.pdf
- http://www.evl.uic.edu/files/pdf/Sandin.RayTracerJuliaSetsbw.pdf
- http://www.cs.caltech.edu/~keenan/project_qjulia.html

Note that the .cl compute kernel file(s) are loaded and compiled at
runtime. The example source assumes that these files are in the same
path as the built executable.

For simplicity, this example is intended to be run from the command line.
If run from within XCode, open the Run Log (Command-Shift-R) to see the
output. Alternatively, run the applications from within a Terminal.app
session to launch from the command line.

Example downloaded from the [Apple OpenCL Developer website](https://developer.apple.com/opencl/).

# BUILD REQUIREMENTS:

Mac OS X v10.7 or later

This demo uses float3 vector datatype which is only supported 10.7 and later.

# RUNTIME REQUIREMENTS:

Mac OS X v10.7 or later with OpenCL 1.1

# PACKAGING LIST:

- qjulia.c
- qjulia.xcodeproj
- qjulia_kernel.cl

# CHANGES FROM PREVIOUS VERSIONS:

Version 1.0 - First version.

Copyright (C) 2008 Apple Inc. All rights reserved.
26 changes: 11 additions & 15 deletions RayTraced_Quaternion_Julia-Set_Example/qjulia.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,18 +63,23 @@
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <time.h>

#ifdef __APPLE__
#include <OpenGL/OpenGL.h>
#include <OpenGL/gl.h>
#include <OpenGL/CGLDevice.h>
#include <GLUT/glut.h>
#include <OpenCL/opencl.h>

#include <mach/mach_time.h>
#else
#include <GL/gl.h>
#include <GL/glut.h>
#include <CL/opencl.h>
#endif

////////////////////////////////////////////////////////////////////////////////

#define USE_GL_ATTACHMENTS (1) // enable OpenGL attachments for Compute results
#define USE_GL_ATTACHMENTS (0) // enable OpenGL attachments for Compute results
#define DEBUG_INFO (0)
#define COMPUTE_KERNEL_FILENAME ("qjulia_kernel.cl")
#define COMPUTE_KERNEL_METHOD_NAME ("QJuliaKernel")
Expand Down Expand Up @@ -159,22 +164,14 @@ DivideUp(int a, int b)
static uint64_t
GetCurrentTime()
{
return mach_absolute_time();
return time(NULL);
}

static double
SubtractTime( uint64_t uiEndTime, uint64_t uiStartTime )
{
static double s_dConversion = 0.0;
uint64_t uiDifference = uiEndTime - uiStartTime;
if( 0.0 == s_dConversion )
{
mach_timebase_info_data_t kTimebase;
kern_return_t kError = mach_timebase_info( &kTimebase );
if( kError == 0 )
s_dConversion = 1e-9 * (double) kTimebase.numer / (double) kTimebase.denom;
}

return s_dConversion * (double) uiDifference;
}

Expand Down Expand Up @@ -302,8 +299,7 @@ CreateTexture(uint width, uint height)

TextureWidth = width;
TextureHeight = height;

glActiveTextureARB(ActiveTextureUnit);

glGenTextures(1, &TextureId);
glBindTexture(TextureTarget, TextureId);
glTexParameteri(TextureTarget, GL_TEXTURE_WRAP_S, GL_CLAMP);
Expand Down Expand Up @@ -611,7 +607,7 @@ SetupComputeDevices(int gpu)

// Create a context containing the compute device(s)
//
ComputeContext = clCreateContext(0, 1, &ComputeDeviceId, clLogMessagesToStdoutAPPLE, NULL, &err);
ComputeContext = clCreateContext(0, 1, &ComputeDeviceId, NULL, NULL, &err);
if (!ComputeContext)
{
printf("Error: Failed to create a compute context!\n");
Expand Down
11 changes: 9 additions & 2 deletions add_numbers/Makefile
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@ PROJ=add_numbers

CC=gcc

CFLAGS=-std=c99 -Wall -DUNIX -g -DDEBUG
#CFLAGS=-std=c99 -Wall -DUNIX -g -DDEBUG
CFLAGS=-std=c99 -Wall -DUNIX -g -DDEBUG -I /opt/amdgpu-pro/include -DCL_TARGET_OPENCL_VERSION=200
# No OpenCL
CFLAGSS=-std=c99 -Wall -DUNIX -g -DDEBUG

LDFLAGS=-L /opt/amdgpu-pro/lib64 -lOpenCL

# Check for 32-bit vs 64-bit
PROC_TYPE = $(strip $(shell uname -m | grep 64))

Expand All @@ -26,7 +29,8 @@ ifneq ($(DARWIN),)
else

# Linux OS
LIBS=-lOpenCL
#LIBS=-lOpenCL
LIBS=$(LDFLAGS)
ifeq ($(PROC_TYPE),)
CFLAGS+=-m32
else
Expand Down Expand Up @@ -54,6 +58,9 @@ endif
$(PROJ): $(PROJ).c
$(CC) $(CFLAGS) -o $@ $^ $(INC_DIRS:%=-I%) $(LIB_DIRS:%=-L%) $(LIBS)

run: $(PROJ)
./$(PROJ)

.PHONY: clean

clean:
Expand Down
2 changes: 1 addition & 1 deletion add_numbers/add_numbers.c
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ int main() {

Does not support profiling or out-of-order-execution
*/
queue = clCreateCommandQueue(context, device, 0, &err);
queue = clCreateCommandQueueWithProperties(context, device, 0, &err);
if(err < 0) {
perror("Couldn't create a command queue");
exit(1);
Expand Down
18 changes: 15 additions & 3 deletions auger/Makefile
Original file line number Diff line number Diff line change
@@ -1,13 +1,24 @@
# C
CC = gcc
CFLAGS = -O2 -std=c99 -Wall -DUNIX -g -DDEBUG
#CFLAGS = -O2 -std=c99 -Wall -DUNIX -g -DDEBUG
CFLAGS = -O2 -std=c99 -Wall -DUNIX -g -DDEBUG \
-I /opt/amdgpu-pro/include -DCL_TARGET_OPENCL_VERSION=200 \
-I . \
-I clRNG \
-I clRNG-1.0.0-beta-Linux64/include

LDFLAGS=-L /opt/amdgpu-pro/lib64 -lOpenCL

# Fortran
FC = gfortran
FFLAGS = -O

# Additional libraries and source files
LIBS = -lm -lclRNG
#LIBS = -lm -lclRNG
LIBS = -lm \
-L . -L clRNG -L clRNG-1.0.0-beta-Linux64/lib64 -lclRNG \
$(LDFLAGS)

SRCS = clbuild.c

# Check for 32-bit vs 64-bit
Expand All @@ -23,7 +34,8 @@ ifneq ($(DARWIN),)
LIBS += -framework OpenCL
else
# Linux OS
LIBS += -lOpenCL
#LIBS += -lOpenCL
LIBS += $(LDFLAGS)
ifeq ($(PROC_TYPE),)
CFLAGS+=-m32
else
Expand Down
7 changes: 4 additions & 3 deletions auger/cr.cl
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@ a random UHECR position.
- n: total number of cosmic rays desired
*/

#include <clRNG/mrg31k3p.clh>

//#include <clRNG/mrg31k3p.clh>
#include "exposure.clh"
#include "mrg31k3p.clh"




Expand Down Expand Up @@ -44,4 +45,4 @@ __kernel void cr(__global clrngMrg31k3pHostStream* streams, __global float* xa,
break;
}
}
}
}
4 changes: 3 additions & 1 deletion cf4cl/READMe.md → cf4cl/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ Example on how to use the `cf4ocl` OpenCL wrapper for C. This is a library suppo

Code taken from the [`cf4ocl` tutorial](http://www.fakenmc.com/cf4ocl/docs/latest/tut.html).

`cf4ocl` -- now archived on [GitHub](https://github.com/nunofachada/cf4ocl/releases/)

How to run:

1. Compile with `./make.sh`
Expand All @@ -21,4 +23,4 @@ List of available OpenCL devices:
(?) Select device (0-1) >
```

You need a comparable number of lines of `cf4ocl` code compared to opencl, so I do not see immediately an advantage…
You need a comparable number of lines of `cf4ocl` code compared to opencl, so I do not see immediately an advantage…
Loading