Skip to content

Commit

Permalink
ocl: moved common code into separate OpenCL/header files
Browse files Browse the repository at this point in the history
* Fixed incorrect type-cast (OPENCL_LIBSMM_SMM_KERNEL).
* Fixed c_dbcsr_acc_dev_mem_deallocate (svm_interop).
* Fixed warnings about unused variable (SMM-kernel).
* Inline include files (acc_opencl.sh).
  • Loading branch information
hfp committed Dec 18, 2023
1 parent 20980ef commit 4b8984b
Show file tree
Hide file tree
Showing 6 changed files with 244 additions and 163 deletions.
2 changes: 1 addition & 1 deletion src/acc/opencl/acc_opencl.c
Original file line number Diff line number Diff line change
Expand Up @@ -1509,7 +1509,7 @@ int c_dbcsr_acc_opencl_kernel(int source_is_file, const char source[], const cha
}
if (NULL != file_src) {
void* p = NULL;
LIBXSMM_ASSIGN127(&p, (const void*)&source);
LIBXSMM_ASSIGN127(&p, (const void**)&source);
assert(0 != source_is_file);
libxsmm_free(p);
}
Expand Down
73 changes: 49 additions & 24 deletions src/acc/opencl/acc_opencl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
# shellcheck disable=SC2048,SC2129

BASENAME=$(command -v basename)
DIRNAME=$(command -v dirname)
SORT=$(command -v sort)
SED=$(command -v gsed)
CPP=$(command -v cpp)
Expand All @@ -28,8 +29,51 @@ if [ ! "${SED}" ]; then
SED=$(command -v sed)
fi

if [ "${BASENAME}" ] && [ "${SORT}" ] && [ "${SED}" ] && \
[ "${TR}" ] && [ "${RM}" ] && [ "${WC}" ];
trap_exit() {
if [ "0" != "$?" ] && [ "${HFILE}" ]; then ${RM} -f "${OFILE}"; fi
}

process_pre() {
if [ "${CPP}" ] && \
[ "$(eval "${CPP} ${CPPBASEFLAGS} $1" 2>/dev/null >/dev/null && echo "YES")" ];
then
if [ "${CPPFLAGS}" ] && \
[ "$(eval "${CPP} ${CPPFLAGS} ${CPPBASEFLAGS} $1" 2>/dev/null >/dev/null && echo "YES")" ];
then
eval "${CPP} ${CPPFLAGS} ${CPPBASEFLAGS} $1" 2>/dev/null
else
eval "${CPP} ${CPPBASEFLAGS} $1" 2>/dev/null
fi
else # fallback to sed
${SED} -r ':a;s%(.*)/\*.*\*/%\1%;ta;/\/\*/!b;N;ba' "$1"
fi
}

process() {
IFS=$'\n'
while read -r LINE; do
INCLUDE=$(${SED} -n "s/#[[:space:]]*include[[:space:]][[:space:]]*\"/\"/p" <<<"${LINE}")
if [ "${INCLUDE}" ] && [ "$1" ] && [ -e "$1" ]; then
CLINC=$(${SED} "s/\"//g" <<<"${INCLUDE}")
CLPATH=$(${DIRNAME} "$1")
FILE=${CLPATH}/${CLINC}
if [ "${FILE}" ] && [ -e "${FILE}" ]; then
process_pre "${FILE}" | process "${FILE}"
else
>&2 echo "ERROR: header file ${FILE} not found!"
exit 1
fi
else
${SED} <<<"${LINE}" \
-e '/^[[:space:]]*$/d' -e 's/[[:space:]]*$//' \
-e 's/\\/\\\\/g' -e 's/"/\\"/g' -e 's/^/ "/' -e 's/$/\\n" \\/'
fi
done
unset IFS
}

if [ "${BASENAME}" ] && [ "${DIRNAME}" ] && [ "${SORT}" ] && \
[ "${SED}" ] && [ "${TR}" ] && [ "${RM}" ] && [ "${WC}" ];
then
for OFILE in "$@"; do :; done
while test $# -gt 0; do
Expand All @@ -48,7 +92,7 @@ then
*) break;;
esac
done
HERE="$(cd "$(dirname "$0")" && pwd -P)"
HERE="$(cd "$(${DIRNAME} "$0")" && pwd -P)"
PARAMDIR=${PARAMDIR:-${PARAMS}}
PARAMDIR=${PARAMDIR:-${HERE}/smm/params}
PARAMDIR=$(echo -e "${PARAMDIR}" | ${TR} -d '\t')
Expand All @@ -70,6 +114,7 @@ then
echo "$0 $*"
fi
fi
trap 'trap_exit' EXIT
NFILES_OCL=0
for CLFILE in ${*:1:${#@}-1}; do
if [ "${CLFILE##*.}" = "cl" ]; then
Expand All @@ -84,29 +129,12 @@ then
fi
echo "#define ${MNAME} ${VNAME}" >>"${OFILE}"
echo "#define ${SNAME} \\" >>"${OFILE}"
if [ "${CPP}" ] && \
[ "$(eval "${CPP} ${CPPBASEFLAGS} ${CLFILE}" 2>/dev/null >/dev/null && echo "YES")" ];
then
if [ "" != "${CPPFLAGS}" ] && \
[ "$(eval "${CPP} ${CPPFLAGS} ${CPPBASEFLAGS} ${CLFILE}" 2>/dev/null >/dev/null && echo "YES")" ];
then
eval "${CPP} ${CPPFLAGS} ${CPPBASEFLAGS} ${CLFILE}" 2>/dev/null
else
eval "${CPP} ${CPPBASEFLAGS} ${CLFILE}" 2>/dev/null
fi
else # fallback to sed
${SED} -r ':a;s%(.*)/\*.*\*/%\1%;ta;/\/\*/!b;N;ba' "${CLFILE}"
fi | \
${SED} \
-e '/^[[:space:]]*$/d' -e 's/[[:space:]]*$//' \
-e 's/\\/\\\\/g' -e 's/"/\\"/g' -e 's/^/ "/' -e 's/$/\\n" \\/' \
>>"${OFILE}"
process_pre "${CLFILE}" | process "${CLFILE}" >>"${OFILE}"
echo " \"\"" >>"${OFILE}"
echo "static const char ${VNAME}[] = ${SNAME};" >>"${OFILE}"
NFILES_OCL=$((NFILES_OCL+1))
else
>&2 echo "ERROR: ${CLFILE} does not exist!"
if [ "${HFILE}" ]; then ${RM} -f "${OFILE}"; fi
exit 1
fi
else
Expand All @@ -116,7 +144,6 @@ then
done
if [ "0" = "${NFILES_OCL}" ]; then
>&2 echo "ERROR: no OpenCL file was given!"
if [ "${HFILE}" ]; then ${RM} -f "${OFILE}"; fi
exit 1
fi
NFILES_CSV=0
Expand All @@ -127,7 +154,6 @@ then
fi
else
>&2 echo "ERROR: ${CSVFILE} is not a CSV file!"
if [ "${HFILE}" ]; then ${RM} -f "${OFILE}"; fi
exit 1
fi
done
Expand All @@ -153,7 +179,6 @@ then
fi
if [ "${ERRFILE}" ] && [ -f "${ERRFILE}" ]; then
>&2 echo "ERROR: ${ERRFILE} is malformed!"
if [ "${HFILE}" ]; then ${RM} -f "${OFILE}"; fi
exit 1
fi
done
Expand Down
4 changes: 2 additions & 2 deletions src/acc/opencl/acc_opencl_mem.c
Original file line number Diff line number Diff line change
Expand Up @@ -320,8 +320,8 @@ int c_dbcsr_acc_dev_mem_deallocate(void* dev_mem) {
const int tid = ACC_OPENCL_OMP_TID();
if (0 != c_dbcsr_acc_opencl_config.device[tid].svm_interop) {
void* const ptr = (0 != c_dbcsr_acc_opencl_config.device[tid].svm_interop ? c_dbcsr_acc_opencl_get_hostptr(buffer) : NULL);
assert(NULL != c_dbcsr_acc_opencl_config.device[tid].context);
clSVMFree(c_dbcsr_acc_opencl_config.device[tid].context, ptr);
const cl_context context = c_dbcsr_acc_opencl_context(NULL /*thread_id*/);
clSVMFree(context, ptr);
}
}
# endif
Expand Down
134 changes: 134 additions & 0 deletions src/acc/opencl/common/opencl_atomics.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
/*------------------------------------------------------------------------------------------------*/
/* Copyright (C) by the DBCSR developers group - All rights reserved */
/* This file is part of the DBCSR library. */
/* */
/* For information on the license, see the LICENSE file. */
/* For further information please visit https://dbcsr.cp2k.org */
/* SPDX-License-Identifier: GPL-2.0+ */
/*------------------------------------------------------------------------------------------------*/
#ifndef OPENCL_ATOMICS_H
#define OPENCL_ATOMICS_H

#include "opencl_common.h"

#define GLOBAL_VOLATILE(A) global volatile A

#if defined(ATOMIC_PROTOTYPES) || defined(__opencl_c_ext_fp64_global_atomic_add)
# if defined(__opencl_c_ext_fp64_global_atomic_add)
# undef ATOMIC_ADD_GLOBAL
# if defined(TF)
# define ATOMIC_ADD_GLOBAL(A, B) \
atomic_fetch_add_explicit((GLOBAL_VOLATILE(TF)*)A, B, memory_order_relaxed, memory_scope_work_group)
# else
# define ATOMIC_ADD_GLOBAL(A, B) atomic_add(A, B)
# endif
# elif (2 < ATOMIC_PROTOTYPES) && defined(TF)
# undef ATOMIC_ADD_GLOBAL
# define ATOMIC_ADD_GLOBAL(A, B) \
__opencl_atomic_fetch_add((GLOBAL_VOLATILE(TF)*)A, B, memory_order_relaxed, memory_scope_work_group)
# else
# if defined(TF) && (!defined(ATOMIC_PROTOTYPES) || 1 < ATOMIC_PROTOTYPES)
__attribute__((overloadable)) T atomic_fetch_add_explicit(GLOBAL_VOLATILE(TF) *, T, memory_order, memory_scope);
# else
__attribute__((overloadable)) T atomic_add(GLOBAL_VOLATILE(T) *, T);
# endif
# endif
#endif

#define ACCUMULATE(A, B) ATOMIC_ADD_GLOBAL(A, B)


#if !defined(cl_intel_global_float_atomics) || (1 != TN)
# if defined(ATOMIC32_ADD64)
__attribute__((always_inline)) inline void atomic32_add64_global(GLOBAL_VOLATILE(double) * dst, double inc) {
*dst += inc; /* TODO */
}
# endif
#endif


#if !defined(cl_intel_global_float_atomics) || (1 != TN)
# if defined(CMPXCHG)
__attribute__((always_inline)) inline void atomic_add_global_cmpxchg(GLOBAL_VOLATILE(T) * dst, T inc) {
# if !defined(ATOMIC32_ADD64)
union {
T f;
TA a;
} exp_val, try_val, cur_val = {.f = *dst};
do {
exp_val.a = cur_val.a;
try_val.f = exp_val.f + inc;
# if defined(TA2)
if (0 == atomic_compare_exchange_weak_explicit((GLOBAL_VOLATILE(TA2)*)dst, &cur_val.a, try_val.a, memory_order_relaxed,
memory_order_relaxed, memory_scope_work_group))
continue;
# else
cur_val.a = CMPXCHG((GLOBAL_VOLATILE(TA)*)dst, exp_val.a, try_val.a);
# endif
} while (cur_val.a != exp_val.a);
# else
atomic32_add64_global(dst, inc);
# endif
}
# endif
#endif


#if !defined(cl_intel_global_float_atomics) || (1 != TN)
# if defined(ATOMIC_ADD2_GLOBAL) && (1 == TN)
__attribute__((always_inline)) inline void atomic_add_global_cmpxchg2(GLOBAL_VOLATILE(float) * dst, float2 inc) {
union {
float2 f;
long a;
} exp_val, try_val, cur_val = {.f = (float2)(dst[0], dst[1])};
do {
exp_val.a = cur_val.a;
try_val.f = exp_val.f + inc;
# if defined(TA2)
if (0 == atomic_compare_exchange_weak_explicit((GLOBAL_VOLATILE(atomic_long)*)dst, &cur_val.a, try_val.a, memory_order_relaxed,
memory_order_relaxed, memory_scope_work_group))
continue;
# else
cur_val.a = atom_cmpxchg((GLOBAL_VOLATILE(long)*)dst, exp_val.a, try_val.a);
# endif
} while (cur_val.a != exp_val.a);
}
# endif
#endif


#if !defined(cl_intel_global_float_atomics) || (1 != TN)
# if defined(XCHG) || (defined(__NV_CL_C_VERSION) && !defined(CMPXCHG) && !defined(ATOMIC_PROTOTYPES))
__attribute__((always_inline)) inline void atomic_add_global_xchg(GLOBAL_VOLATILE(T) * dst, T inc) {
# if !defined(ATOMIC32_ADD64)
# if (defined(__NV_CL_C_VERSION) && !defined(XCHG)) && (1 == TN)
asm("{ .reg .f32 t; atom.global.add.f32 t, [%0], %1; }" ::"l"(dst), "f"(inc));
# elif (defined(__NV_CL_C_VERSION) && !defined(XCHG)) && (3 == TN)
asm("{ .reg .f64 t; atom.global.add.f64 t, [%0], %1; }" ::"l"(dst), "d"(inc));
# else
union {
T f;
TA a;
} exp_val = {.f = inc}, try_val, cur_val = {/*.f = ZERO*/ .a = 0};
do {
# if defined(TA2)
try_val.a = atomic_exchange_explicit((GLOBAL_VOLATILE(TA2)*)dst, cur_val.a, memory_order_relaxed, memory_scope_work_group);
# else
try_val.a = XCHG((GLOBAL_VOLATILE(TA)*)dst, cur_val.a);
# endif
try_val.f += exp_val.f;
# if defined(TA2)
exp_val.a = atomic_exchange_explicit((GLOBAL_VOLATILE(TA2)*)dst, try_val.a, memory_order_relaxed, memory_scope_work_group);
# else
exp_val.a = XCHG((GLOBAL_VOLATILE(TA)*)dst, try_val.a);
# endif
} while (cur_val.a != exp_val.a);
# endif
# else
atomic32_add64_global(dst, inc);
# endif
}
# endif
#endif

#endif /*OPENCL_ATOMICS_H*/
41 changes: 41 additions & 0 deletions src/acc/opencl/common/opencl_common.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*------------------------------------------------------------------------------------------------*/
/* Copyright (C) by the DBCSR developers group - All rights reserved */
/* This file is part of the DBCSR library. */
/* */
/* For information on the license, see the LICENSE file. */
/* For further information please visit https://dbcsr.cp2k.org */
/* SPDX-License-Identifier: GPL-2.0+ */
/*------------------------------------------------------------------------------------------------*/
#ifndef OPENCL_COMMON_H
#define OPENCL_COMMON_H

#if (200 /*CL_VERSION_2_0*/ <= __OPENCL_VERSION__) || defined(__NV_CL_C_VERSION)
# define UNROLL_FORCE(N) __attribute__((opencl_unroll_hint(N)))
#else
# define UNROLL_FORCE(N)
#endif

#define MIN(A, B) ((A) < (B) ? (A) : (B))
#define MAX(A, B) ((A) < (B) ? (B) : (A))

#if !defined(LU) || (-1 == LU)
# define UNROLL_OUTER(N)
# define UNROLL(N)
#else
# if (1 <= LU)
# define UNROLL_OUTER(N) UNROLL_FORCE(1)
# else
# define UNROLL_OUTER(N) UNROLL_FORCE(N)
# endif
# define UNROLL(N) UNROLL_FORCE(N)
#endif

#if (1 == TN)
# define ZERO 0.f
#elif (3 == TN)
# define ZERO 0.0
#else
# define ZERO 0
#endif

#endif /*OPENCL_COMMON_H*/
Loading

0 comments on commit 4b8984b

Please sign in to comment.