Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into ac/dps-profile
Browse files Browse the repository at this point in the history
  • Loading branch information
Angelyr committed Sep 5, 2023
2 parents 5d8d984 + 53a9d0d commit e5d7b77
Show file tree
Hide file tree
Showing 54 changed files with 425 additions and 486 deletions.
18 changes: 17 additions & 1 deletion .github/workflows/cmake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ name: build-test
on:
push:
pull_request:
types: [opened, reopened]
schedule:
- cron: '12 14 * * 3'

Expand All @@ -15,15 +16,24 @@ jobs:

strategy:
matrix:
build_type: [Release, Debug]
build_type: [Release, RelWithDebInfo]
memory_test: [ON, OFF]
compiler: [g++]
language: ['cpp']
exclude:
- build_type: Release
memory_test: ON
- build_type: RelWithDebInfo
memory_test: OFF

steps:

- name: Install mpi
run: sudo apt-get install -yq mpich libmpich-dev

- name: Install Valgrind
run: sudo apt-get install -yq valgrind

# Build Kokkos

- name: Cache Kokkos Build
Expand Down Expand Up @@ -182,6 +192,7 @@ jobs:
-DCMAKE_CXX_COMPILER=mpicxx
-DIS_TESTING=ON
-DPS_IS_TESTING=ON
-DPP_ENABLE_MEMCHECK=${{matrix.memory_test}}
-DCMAKE_BUILD_TYPE=${{matrix.build_type}}
-DTEST_DATA_DIR=$GITHUB_WORKSPACE/pumi-pic/pumipic-data
-DOmega_h_PREFIX=${{ runner.temp }}/build-omega_h/install
Expand All @@ -196,8 +207,13 @@ jobs:
run: cmake --build ${{ runner.temp }}/build-pumi-pic -j8 --target install

- name: PUMI-PIC Test
if: ${{matrix.memory_test == 'OFF'}}
run: ctest --test-dir ${{ runner.temp }}/build-pumi-pic

- name: PUMI-PIC Test
if: ${{matrix.memory_test == 'ON'}}
run: ctest -E "medium|large|120" --test-dir ${{ runner.temp }}/build-pumi-pic

- name: PUMI-PIC Print
if: always()
run: cat ${{ runner.temp }}/build-pumi-pic/Testing/Temporary/LastTest.log
Expand Down
11 changes: 10 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,10 @@ set(pumipic_USE_Kokkos_DEFAULT ON)
bob_public_dep(Kokkos)
set(KOKKOS_ENABLED true)

if(Kokkos_VERSION VERSION_LESS 4.0.01)
message(FATAL_ERROR "Kokkos version >= 4.0.01 required.")
endif()

set(debug_flag)
if (PP_ENABLE_DEBUG_SYMBOLS)
set(debug_flag "-g")
Expand All @@ -87,11 +91,16 @@ endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${debug_flag} ${opt_flag}")
message(STATUS "CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}")

if (PP_ENABLE_MEMCHECK)
find_program(VALGRIND "valgrind")
set(VALGRIND_ARGS "--suppressions=${CMAKE_SOURCE_DIR}/valgrind.supp" "--leak-check=yes" "--error-exitcode=1")
endif()

# testing helper function
function(mpi_test TESTNAME PROCS EXE)
add_test(
NAME ${TESTNAME}
COMMAND ${MPIRUN} ${MPIRUN_PROCFLAG} ${PROCS} ${VALGRIND} ${VALGRIND_ARGS} ${EXE} ${ARGN}
COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} ${PROCS} ${VALGRIND} ${VALGRIND_ARGS} ${EXE} ${ARGN}
)
endfunction(mpi_test)

Expand Down
2 changes: 0 additions & 2 deletions cdash/Project.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
<Project name="pumi-pic">
<SubProject name="pumipic-master-omegah1050">
</SubProject>
<SubProject name="pumipic-master-omegahMaster">
</SubProject>
</Project>
15 changes: 1 addition & 14 deletions cdash/nightly.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ SET(CTEST_DO_SUBMIT ON)
SET(CTEST_TEST_TYPE Nightly)

set(CTEST_SITE "cranium.scorec.rpi.edu" )
set(CTEST_DASHBOARD_ROOT "/lore/cwsmith/nightlyBuilds/pumipic")
set(CTEST_DASHBOARD_ROOT "/lore/castia5/nightlyBuilds/pumipic")
set(CTEST_CMAKE_GENERATOR "Unix Makefiles" )
set(CTEST_BUILD_CONFIGURATION RelWithDebInfo)

Expand Down Expand Up @@ -103,15 +103,6 @@ macro(setup_repo repo_name repo_url)
submit_part(${repo_name} "Update")
endmacro(setup_repo)

set(OMEGAH_1050_INSTALL
"${CTEST_DASHBOARD_ROOT}/build-omegah1050-cranium-cuda114/install/lib/cmake/Omega_h")
SET(CONFIGURE_MASTER_OMEGAH1050
"-DCMAKE_CXX_COMPILER=mpicxx"
"-DIS_TESTING=ON"
"-DPS_IS_TESTING=ON"
"-DOmega_h_PREFIX=${OMEGAH_1050_INSTALL}"
"-DTEST_DATA_DIR=${CTEST_DASHBOARD_ROOT}/repos/pumipic/pumipic-data")

set(OMEGAH_MASTER_INSTALL
"${CTEST_DASHBOARD_ROOT}/build-omegah-cranium-cuda114/install/lib/cmake/Omega_h")
SET(CONFIGURE_MASTER_OMEGAH_MASTER
Expand All @@ -121,10 +112,6 @@ SET(CONFIGURE_MASTER_OMEGAH_MASTER
"-DOmega_h_PREFIX=${OMEGAH_MASTER_INSTALL}"
"-DTEST_DATA_DIR=${CTEST_DASHBOARD_ROOT}/repos/pumipic/pumipic-data")

message(STATUS "configure options ${CONFIGURE_MASTER_OMEGAH1050}")
build_subproject(pumipic-master-omegah1050 "${CONFIGURE_MASTER_OMEGAH1050}")
test_subproject(pumipic-master-omegah1050)

message(STATUS "configure options ${CONFIGURE_MASTER_OMEGAH_MASTER}")
build_subproject(pumipic-master-omegahMaster "${CONFIGURE_MASTER_OMEGAH_MASTER}")
test_subproject(pumipic-master-omegahMaster)
Expand Down
34 changes: 8 additions & 26 deletions cdash/nightly.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,12 @@
source /etc/profile.d/modules.sh
source /etc/profile

export root=/lore/cwsmith/nightlyBuilds/pumipic
export root=/lore/castia5/nightlyBuilds/pumipic

module use /opt/scorec/spack/dev/lmod/linux-rhel7-x86_64/Core
module unuse /opt/scorec/spack/lmod/linux-rhel7-x86_64/Core
module load gcc/7.4.0-c5aaloy cuda/11.4
module load mpich/3.3.1-bfezl2l
module load cmake
module unuse /opt/scorec/spack/lmod/linux-rhel7-x86_64/Core
module use /opt/scorec/spack/v0154_2/lmod/linux-rhel7-x86_64/Core
module load gcc/10.1.0 mpich
module load cuda/11.4 cmake

function getname() {
name=$1
Expand All @@ -20,13 +19,12 @@ function getname() {
export engpar=$root/`getname engpar`/install # This is where engpar will be (or is) installed
export kk=$root/`getname kokkos`/install # This is where kokkos will be (or is) installed
export oh=$root/`getname omegah`/install # This is where omega_h will be (or is) installed
export oh1050=$root/`getname omegah1050`/install
export pumipic=$root/`getname pumipic`/install # This is where PumiPIC will be (or is) installed
export CMAKE_PREFIX_PATH=$engpar:$kk:$oh:$pumipic:$CMAKE_PREFIX_PATH
export MPICH_CXX=$root/kokkos/bin/nvcc_wrapper

cd $root
[ ! -d kokkos ] && git clone -b 3.4.01 [email protected]:kokkos/kokkos.git
[ ! -d kokkos ] && git clone -b 4.0.01 [email protected]:kokkos/kokkos.git
[ -d $kk ] && rm -rf ${kk%%install}
cmake -S kokkos -B ${kk%%install} \
-DCMAKE_CXX_COMPILER=$root/kokkos/bin/nvcc_wrapper \
Expand All @@ -40,7 +38,7 @@ cmake -S kokkos -B ${kk%%install} \
cmake --build ${kk%%install} --target install -j 24

cd $root
[ ! -d EnGPar] && git clone [email protected]:SCOREC/EnGPar.git
[ ! -d EnGPar ] && git clone [email protected]:SCOREC/EnGPar.git
cd EnGPar && git pull && cd -
[ -d $engpar ] && rm -rf ${engpar%%install}
cmake -S EnGPar -B ${engpar%%install} \
Expand Down Expand Up @@ -71,26 +69,10 @@ cmake -S omega_h -B ${oh%%install} \
-DKokkos_PREFIX=$kk/lib64/cmake
cmake --build ${oh%%install} --target install -j8

cd omega_h && git checkout scorec-v10.5.0 && cd -
[ -d $oh1050 ] && rm -rf ${oh1050%%install}
cmake -S omega_h -B ${oh1050%%install} \
-DCMAKE_CXX_COMPILER=mpicxx \
-DCMAKE_C_COMPILER=mpicc \
-DCMAKE_BUILD_TYPE=debug \
-DCMAKE_INSTALL_PREFIX=$oh1050 \
-DBUILD_SHARED_LIBS=OFF \
-DOmega_h_USE_Kokkos=ON \
-DOmega_h_USE_CUDA=on \
-DOmega_h_CUDA_ARCH=75 \
-DOmega_h_USE_MPI=on \
-DBUILD_TESTING=on \
-DKokkos_PREFIX=$kk/lib64/cmake
cmake --build ${oh1050%%install} --target install -j8

set +e
set +x

d=/lore/cwsmith/nightlyBuilds/pumipic
d=/lore/castia5/nightlyBuilds/pumipic
cd $d
#remove old compilation
[ -d build_pumipic ] && rm -rf build_pumipic/
Expand Down
33 changes: 14 additions & 19 deletions particle_structs/src/cabm/cabm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,13 @@ namespace pumipic {
void printFormat(const char* prefix) const;

// Do not call these functions:
kkLidView buildOffset(const kkLidView particles_per_element, const lid_t num_ptcls, const double padding, lid_t &padding_start);
typename ParticleStructure<DataTypes, MemSpace>::kkLidView
buildOffset(const kkLidView particles_per_element, const lid_t num_ptcls, const double padding, lid_t &padding_start);
AoSoA_t* makeAoSoA(const lid_t capacity, const lid_t num_soa);
kkLidView getParentElms(const lid_t num_elements, const lid_t num_soa, const kkLidView offsets);

typename ParticleStructure<DataTypes, MemSpace>::kkLidView
getParentElms(const lid_t num_elements, const lid_t num_soa, const kkLidView offsets);

void setActive(const kkLidView particles_per_element);
void createGlobalMapping(const kkGidView element_gids, kkGidView& lid_to_gid, GID_Mapping& gid_to_lid);
void fillAoSoA(const kkLidView particle_elements, const MTVs particle_info);
Expand Down Expand Up @@ -242,35 +246,27 @@ namespace pumipic {
return;

// move function pointer to GPU (if needed)
FunctionType* fn_d;
#ifdef PP_USE_CUDA
cudaMalloc(&fn_d, sizeof(FunctionType));
cudaMemcpy(fn_d,&fn, sizeof(FunctionType), cudaMemcpyHostToDevice);
#else
fn_d = &fn;
#endif
FunctionType* fn_d = gpuMemcpy(fn);
kkLidView parentElms_cpy = parentElms_;
const auto soa_len = AoSoA_t::vector_length;
const auto activeSliceIdx = aosoa_->number_of_members-1;
const auto mask = Cabana::slice<activeSliceIdx>(*aosoa_); // get active mask
const auto mask = Cabana::slice<CM_DT::size-1>(*aosoa_); // get active mask
Cabana::SimdPolicy<soa_len,execution_space> simd_policy(0, capacity_);
Cabana::simd_parallel_for(simd_policy,
KOKKOS_LAMBDA( const lid_t soa, const lid_t ptcl ) {
const lid_t elm = parentElms_cpy(soa); // calculate element
const lid_t particle_id = soa*soa_len + ptcl; // calculate overall index
(*fn_d)(elm, particle_id, mask.access(soa,ptcl));
}, name);
#ifdef PP_USE_CUDA
cudaFree(fn_d);
#ifdef PP_USE_GPU
gpuFree(fn_d);
#endif

}

template <class DataTypes, typename MemSpace>
void CabM<DataTypes, MemSpace>::printMetrics() const {
// Sum number of empty cells
const auto activeSliceIdx = aosoa_->number_of_members-1;
auto mask = Cabana::slice<activeSliceIdx>(*aosoa_);
auto mask = Cabana::slice<CM_DT::size-1>(*aosoa_);
kkLidView padded_cells("num_padded_cells",1);
Kokkos::parallel_for("count_padding", capacity_,
KOKKOS_LAMBDA(const lid_t ptcl_id) {
Expand Down Expand Up @@ -319,8 +315,7 @@ namespace pumipic {
const auto soa_len = AoSoA_t::vector_length;

kkLidView mask(Kokkos::ViewAllocateWithoutInitializing("offsets_host"), capacity_);
const auto activeSliceIdx = aosoa_->number_of_members-1;
auto mask_slice = Cabana::slice<activeSliceIdx>(*aosoa_);
auto mask_slice = Cabana::slice<CM_DT::size-1>(*aosoa_);
Kokkos::parallel_for("copy_mask", capacity_,
KOKKOS_LAMBDA(const lid_t ptcl_id) {
mask(ptcl_id) = mask_slice(ptcl_id);
Expand Down Expand Up @@ -376,7 +371,7 @@ namespace pumipic {

template<class DataTypes, typename MemSpace>
template <class MSpace>
CabM<DataTypes, MemSpace>::Mirror<MSpace>* CabM<DataTypes, MemSpace>::copy() {
typename CabM<DataTypes, MemSpace>::template Mirror<MSpace>* CabM<DataTypes, MemSpace>::copy() {
if (std::is_same<memory_space, typename MSpace::memory_space>::value) {
fprintf(stderr, "[ERROR] Copy to same memory space not supported\n");
exit(EXIT_FAILURE);
Expand Down Expand Up @@ -436,7 +431,7 @@ namespace pumipic {
using typename ParticleStructure<DataTypes, MemSpace>::kkGidHostMirror;
using typename ParticleStructure<DataTypes, MemSpace>::MTVs;
template<std::size_t N>
using Slice = typename ParticleStructure<DataTypes, MemSpace>::Slice<N>;
using Slice = typename ParticleStructure<DataTypes, MemSpace>::template Slice<N>;

using host_space = Kokkos::HostSpace;
typedef Kokkos::TeamPolicy<execution_space> PolicyType;
Expand Down
3 changes: 1 addition & 2 deletions particle_structs/src/cabm/cabm_buildFns.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,7 @@ namespace pumipic {

const lid_t num_elements = particles_per_element.size();
const auto soa_len = AoSoA_t::vector_length;
const auto activeSliceIdx = aosoa_->number_of_members-1;
auto active = Cabana::slice<activeSliceIdx>(*aosoa_);
auto active = Cabana::slice<CM_DT::size-1>(*aosoa_);
Cabana::SimdPolicy<soa_len,execution_space> simd_policy(0, capacity_);
Cabana::simd_parallel_for(simd_policy,
KOKKOS_LAMBDA( const lid_t soa, const lid_t ptcl ) {
Expand Down
9 changes: 5 additions & 4 deletions particle_structs/src/cabm/cabm_migrate.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,14 @@ namespace pumipic {
}
}

PS_Comm_Waitall<device_type>(num_recv_ranks, count_recv_requests, MPI_STATUSES_IGNORE);
delete [] count_recv_requests;

// Gather sending particle data
// Perform an ex-sum on num_send_particles & num_recv_particles
kkLidView offset_send_particles("offset_send_particles", comm_size+1);
kkLidView offset_send_particles_temp(Kokkos::ViewAllocateWithoutInitializing("offset_send_particles_temp"), comm_size + 1);
exclusive_scan(num_send_particles, offset_send_particles);
exclusive_scan(num_send_particles, offset_send_particles, execution_space());
Kokkos::deep_copy(offset_send_particles_temp, offset_send_particles);
kkLidHostMirror offset_send_particles_host = deviceToHost(offset_send_particles);

Expand Down Expand Up @@ -103,8 +106,6 @@ namespace pumipic {
new_process,
send_index);

PS_Comm_Waitall<device_type>(num_recv_ranks, count_recv_requests, MPI_STATUSES_IGNORE);
delete [] count_recv_requests;

// Count the number of processes being sent to and recv from
lid_t num_sending_to = 0, num_receiving_from = 0;
Expand Down Expand Up @@ -135,7 +136,7 @@ namespace pumipic {

// Offset the recv particles
kkLidView offset_recv_particles("offset_recv_particles", comm_size+1);
exclusive_scan(num_recv_particles, offset_recv_particles);
exclusive_scan(num_recv_particles, offset_recv_particles, execution_space());
kkLidHostMirror offset_recv_particles_host = deviceToHost(offset_recv_particles);
int np_recv = offset_recv_particles_host(comm_size);

Expand Down
3 changes: 1 addition & 2 deletions particle_structs/src/cabm/cabm_rebuild.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@ namespace pumipic {
const auto num_new_ptcls = new_particle_elements.size();
const auto soa_len = AoSoA_t::vector_length;
kkLidView elmDegree_d("elmDegree", num_elems);
const auto activeSliceIdx = aosoa_->number_of_members-1;
auto active = Cabana::slice<activeSliceIdx>(*aosoa_);
auto active = Cabana::slice<CM_DT::size-1>(*aosoa_);

// first loop to count number of particles per new element (atomic)
assert(new_element.size() == capacity_);
Expand Down
14 changes: 4 additions & 10 deletions particle_structs/src/csr/CSR.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,13 +183,7 @@ namespace pumipic {
void CSR<DataTypes, MemSpace>::parallel_for(FunctionType& fn, std::string name) {
if (nPtcls() == 0)
return;
FunctionType* fn_d;
#ifdef PP_USE_CUDA
cudaMalloc(&fn_d, sizeof(FunctionType));
cudaMemcpy(fn_d,&fn, sizeof(FunctionType), cudaMemcpyHostToDevice);
#else
fn_d = &fn;
#endif
FunctionType* fn_d = gpuMemcpy(fn);
const lid_t league_size = num_elems;
const lid_t team_size = policy.team_size();
const PolicyType policy(league_size, team_size);
Expand All @@ -209,8 +203,8 @@ namespace pumipic {
(*fn_d)(elm, particle_id, mask);
});
});
#ifdef PP_USE_CUDA
cudaFree(fn_d);
#ifdef PP_USE_GPU
gpuFree(fn_d);
#endif
}

Expand Down Expand Up @@ -269,7 +263,7 @@ namespace pumipic {

template<class DataTypes, typename MemSpace>
template <class MSpace>
CSR<DataTypes, MemSpace>::Mirror<MSpace>* CSR<DataTypes, MemSpace>::copy() {
typename CSR<DataTypes, MemSpace>::template Mirror<MSpace>* CSR<DataTypes, MemSpace>::copy() {
if (std::is_same<memory_space, typename MSpace::memory_space>::value) {
fprintf(stderr, "[ERROR] Copy to same memory space not supported\n");
exit(EXIT_FAILURE);
Expand Down
2 changes: 1 addition & 1 deletion particle_structs/src/csr/CSR_buildFns.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ namespace pumipic {
// see pumi-pic/support/SupportKK.h for the exclusive_scan helper function
offsets = kkLidView(Kokkos::ViewAllocateWithoutInitializing("offsets"), num_elems+1);
Kokkos::resize(ptcls_per_elem, ptcls_per_elem.size()+1);
exclusive_scan(ptcls_per_elem, offsets);
exclusive_scan(ptcls_per_elem, offsets, execution_space());

// get global ids
if (element_gids.size() > 0) {
Expand Down
Loading

0 comments on commit e5d7b77

Please sign in to comment.