-
Notifications
You must be signed in to change notification settings - Fork 19
Mikael's hacks for HPX Kokkos OctoTiger with clang CUDA
Mikael Simberg edited this page Oct 3, 2019
·
1 revision
This was run on Piz Daint with the new Cray clang compiler.
module load daint-gpu
module load cudatoolkit/9.2.148_3.19-6.0.7.1_2.1__g3d9acc8
module switch cce/8.7.3 cce/9.0.1
export CXX=/opt/cray/pe/cce/9.0.1/cce-clang/x86_64/bin/clang++ # Do *not* use the Cray compiler wrapper
Get Boost/jemalloc/hwloc for HPX any way you like. The following may work:
module load jemalloc
module load hwloc
module load Boost
Built this branch: https://github.com/msimberg/hpx/tree/fix-cuda-clang using commit 79519358fa78da4f0708418133fea660c9609cb1.
cmake -DHPX_WITH_CXX14=ON -DHPX_PROGRAM_OPTIONS_WITH_BOOST_PROGRAM_OPTIONS_COMPATIBILITY=OFF -DCMAKE_CXX_FLAGS="-fdiagnostics-color --cuda-gpu-arch=sm_60" -DCMAKE_CXX_COMPILER="$CXX" -DCMAKE_BUILD_TYPE=Debug -DHPX_WITH_CUDA=ON -DHPX_WITH_CUDA_CLANG=ON -DBOOST_ROOT=$YOUR_BOOST -DHWLOC_ROOT=$YOUR_HWLOC -DHPX_WITH_MALLOC=system -DCMAKE_INSTALL_PREFIX=$YOUR_INSTALL_PREFIX -DHPX_WITH_EXAMPLES=OFF $HPX_SOURCE_DIR && make -jN install
Built develop branch of Kokkos using commit 445c17625fe6dcfa8ddf98046301a648550c5738.
# Kokkos configuration does not find CUDA correctly with clang (bug? unfinished?)
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/cray/nvidia/default/lib64
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/nvidia/cudatoolkit9.2/9.2.148_3.19-6.0.7.1_2.1__g3d9acc8/lib64
cmake -DHPX_DIR=$HPX_INSTALL_PREFIX/lib64/cmake/HPX/ -DKokkos_ENABLE_HPX=ON -DKokkos_ENABLE_CUDA=ON -DKokkos_CXX_STANDARD=14 -DCMAKE_INSTALL_PREFIX=$KOKKOS_INSTALL_PREFIX -DCMAKE_CXX_FLAGS="--cuda-gpu-arch=sm_60 -fPIC" $KOKKOS_SOURCE_DIR && make -jN install
Apply the following diff to the Kokkos CMakeLists.txt
:
diff --git a/cmake/kokkos_tribits.cmake b/cmake/kokkos_tribits.cmake
index 77f27622..71602c6d 100644
--- a/cmake/kokkos_tribits.cmake
+++ b/cmake/kokkos_tribits.cmake
@@ -257,22 +257,21 @@ FUNCTION(KOKKOS_LINK_TPLS LIBRARY_NAME)
IF (KOKKOS_ENABLE_CUDA)
IF (KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
SET(LIB_cuda "-lcuda -lcudart")
- find_library( cuda_lib_ NAMES libcuda cuda HINTS ${KOKKOS_CUDA_DIR}/lib64 ENV LD_LIBRARY_PATH ENV PATH )
+ #find_library( cuda_lib_ NAMES libcuda cuda HINTS ${KOKKOS_CUDA_DIR}/lib64 ENV LD_LIBRARY_PATH ENV PATH )
find_library( cudart_lib_ NAMES libcudart cudart HINTS ${KOKKOS_CUDA_DIR}/lib64 ENV LD_LIBRARY_PATH ENV PATH )
- if (cuda_lib_)
- TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PUBLIC ${cuda_lib_})
- else()
- MESSAGE(SEND_ERROR "libcuda is required but could not be found. Make sure to include it in your LD_LIBRARY_PATH.")
- endif()
+ #if (cuda_lib_)
+ #TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PUBLIC ${cuda_lib_})
+ #else()
+ #MESSAGE(SEND_ERROR "libcuda is required but could not be found. Make sure to include it in your LD_LIBRARY_PATH.")
+ #endif()
if (cudart_lib_)
TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PUBLIC ${cudart_lib_})
else()
MESSAGE(SEND_ERROR "libcudart is required but could not be found. Make sure to include it in your LD_LIBRARY_PATH.")
endif()
- else()
- SET(LIB_cuda "-lcuda")
- TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PUBLIC cuda)
endif()
+ SET(LIB_cuda "-lcuda")
+ TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PUBLIC cuda)
ENDIF()
IF (KOKKOS_ENABLE_HPX)
Built using the branch https://github.com/msimberg/octotiger/tree/kokkos-kernel-test and commit 2a02fd3e590c314c056dcadcf71d16120959691e.
Most of the non-kokkos paths and options are taken from https://github.com/biddisco/biddisco.github.io/wiki/Octotiger-on-Daint.
cmake -DCMAKE_CXX_FLAGS="--cuda-gpu-arch=sm_60" -DOCTOTIGER_WITH_CUDA=ON -DCMAKE_BUILD_TYPE=Debug -DBOOST_ROOT=$YOUR_BOOST_ROOT -DSilo_DIR=$YOUR_SILO_ROOT -DHDF5_ROOT=$YOUR_HDF5_ROOT -DOCTOTIGER_WITH_BLAST_TEST=OFF -DOCTOTIGER_WITH_Vc=ON -DVc_DIR=$YOUR_VC_ROOT -DHPX_DIR=$HPX_INSTALL_PREFIX/lib64/cmake/HPX/ -DKokkos_DIR=$KOKKOS_INSTALL_PREFIX/lib64/cmake/Kokkos/ -DOCTOTIGER_WITH_KOKKOS=ON $OCTOTIGER_SOURCE_DIR
make kokkos_kernel_test
salloc -C gpu
srun ./kokkos_kernel_test # Should print configuration with HPX and Cuda enabled, and 10 times hello from the host and device execution spaces