Skip to content

Mikael's hacks for HPX Kokkos OctoTiger with clang CUDA

Mikael Simberg edited this page Oct 3, 2019 · 1 revision

This was run on Piz Daint with the new Cray clang compiler.

Load the environment

module load daint-gpu
module load cudatoolkit/9.2.148_3.19-6.0.7.1_2.1__g3d9acc8
module switch cce/8.7.3 cce/9.0.1
export CXX=/opt/cray/pe/cce/9.0.1/cce-clang/x86_64/bin/clang++ # Do *not* use the Cray compiler wrapper

Get Boost/jemalloc/hwloc for HPX any way you like. The following may work:

module load jemalloc
module load hwloc
module load Boost

Build HPX

Built this branch: https://github.com/msimberg/hpx/tree/fix-cuda-clang using commit 79519358fa78da4f0708418133fea660c9609cb1.

cmake -DHPX_WITH_CXX14=ON -DHPX_PROGRAM_OPTIONS_WITH_BOOST_PROGRAM_OPTIONS_COMPATIBILITY=OFF -DCMAKE_CXX_FLAGS="-fdiagnostics-color --cuda-gpu-arch=sm_60" -DCMAKE_CXX_COMPILER="$CXX" -DCMAKE_BUILD_TYPE=Debug -DHPX_WITH_CUDA=ON -DHPX_WITH_CUDA_CLANG=ON -DBOOST_ROOT=$YOUR_BOOST -DHWLOC_ROOT=$YOUR_HWLOC -DHPX_WITH_MALLOC=system -DCMAKE_INSTALL_PREFIX=$YOUR_INSTALL_PREFIX -DHPX_WITH_EXAMPLES=OFF $HPX_SOURCE_DIR && make -jN install

Build Kokkos

Built develop branch of Kokkos using commit 445c17625fe6dcfa8ddf98046301a648550c5738.

# Kokkos configuration does not find CUDA correctly with clang (bug? unfinished?)
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/cray/nvidia/default/lib64
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/nvidia/cudatoolkit9.2/9.2.148_3.19-6.0.7.1_2.1__g3d9acc8/lib64
cmake -DHPX_DIR=$HPX_INSTALL_PREFIX/lib64/cmake/HPX/ -DKokkos_ENABLE_HPX=ON -DKokkos_ENABLE_CUDA=ON -DKokkos_CXX_STANDARD=14 -DCMAKE_INSTALL_PREFIX=$KOKKOS_INSTALL_PREFIX -DCMAKE_CXX_FLAGS="--cuda-gpu-arch=sm_60 -fPIC" $KOKKOS_SOURCE_DIR && make -jN install

Apply the following diff to the Kokkos CMakeLists.txt:

diff --git a/cmake/kokkos_tribits.cmake b/cmake/kokkos_tribits.cmake
index 77f27622..71602c6d 100644
--- a/cmake/kokkos_tribits.cmake
+++ b/cmake/kokkos_tribits.cmake
@@ -257,22 +257,21 @@ FUNCTION(KOKKOS_LINK_TPLS LIBRARY_NAME)
   IF (KOKKOS_ENABLE_CUDA)
     IF (KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
        SET(LIB_cuda "-lcuda -lcudart")
-       find_library( cuda_lib_ NAMES libcuda cuda HINTS ${KOKKOS_CUDA_DIR}/lib64 ENV LD_LIBRARY_PATH ENV PATH )
+       #find_library( cuda_lib_ NAMES libcuda cuda HINTS ${KOKKOS_CUDA_DIR}/lib64 ENV LD_LIBRARY_PATH ENV PATH )
        find_library( cudart_lib_ NAMES libcudart cudart HINTS ${KOKKOS_CUDA_DIR}/lib64 ENV LD_LIBRARY_PATH ENV PATH )
-       if (cuda_lib_)
-          TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PUBLIC ${cuda_lib_})
-       else()
-          MESSAGE(SEND_ERROR "libcuda is required but could not be found. Make sure to include it in your LD_LIBRARY_PATH.")
-       endif()
+       #if (cuda_lib_)
+          #TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PUBLIC ${cuda_lib_})
+       #else()
+          #MESSAGE(SEND_ERROR "libcuda is required but could not be found. Make sure to include it in your LD_LIBRARY_PATH.")
+       #endif()
        if (cudart_lib_)
           TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PUBLIC ${cudart_lib_})
        else()
          MESSAGE(SEND_ERROR "libcudart is required but could not be found. Make sure to include it in your LD_LIBRARY_PATH.")
        endif()
-    else()
-       SET(LIB_cuda "-lcuda")
-       TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PUBLIC cuda)
     endif()
+    SET(LIB_cuda "-lcuda")
+    TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PUBLIC cuda)
   ENDIF()
 
   IF (KOKKOS_ENABLE_HPX)

Build OctoTiger

Built using the branch https://github.com/msimberg/octotiger/tree/kokkos-kernel-test and commit 2a02fd3e590c314c056dcadcf71d16120959691e.

Most of the non-kokkos paths and options are taken from https://github.com/biddisco/biddisco.github.io/wiki/Octotiger-on-Daint.

cmake -DCMAKE_CXX_FLAGS="--cuda-gpu-arch=sm_60" -DOCTOTIGER_WITH_CUDA=ON -DCMAKE_BUILD_TYPE=Debug -DBOOST_ROOT=$YOUR_BOOST_ROOT -DSilo_DIR=$YOUR_SILO_ROOT -DHDF5_ROOT=$YOUR_HDF5_ROOT -DOCTOTIGER_WITH_BLAST_TEST=OFF -DOCTOTIGER_WITH_Vc=ON -DVc_DIR=$YOUR_VC_ROOT -DHPX_DIR=$HPX_INSTALL_PREFIX/lib64/cmake/HPX/ -DKokkos_DIR=$KOKKOS_INSTALL_PREFIX/lib64/cmake/Kokkos/ -DOCTOTIGER_WITH_KOKKOS=ON $OCTOTIGER_SOURCE_DIR
make kokkos_kernel_test
salloc -C gpu
srun ./kokkos_kernel_test # Should print configuration with HPX and Cuda enabled, and 10 times hello from the host and device execution spaces