Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,9 @@ if (NOT BUILD_TESTS_ONLY)
-fgpu-rdc
)

# Enable warp sync builtins
target_compile_definitions(${PROJECT_NAME} PRIVATE HIP_ENABLE_WARP_SYNC_BUILTINS=1)

#############################################################################
# INSTALL
#############################################################################
Expand Down
2 changes: 2 additions & 0 deletions src/envvar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ namespace envvar {
const var<std::string> provider("PROVIDER", "");
const var<bool> alternate_qp_ports("ALTERNATE_QP_PORTS", "", true);
const var<uint8_t> traffic_class("TRAFFIC_CLASS", "", 0);
const var<size_t> num_qps_per_pe_default_ctx("NUM_QPS_PER_PE_DEFAULT_CTX", "", 2);
const var<size_t> num_qps_per_pe_usr_ctx("NUM_QPS_PER_PE_USR_CTX", "", 2);
} // namespace gda

namespace _detail {
Expand Down
4 changes: 4 additions & 0 deletions src/envvar.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,10 @@ namespace envvar {
extern const var<std::string> provider;
extern const var<bool> alternate_qp_ports;
extern const var<uint8_t> traffic_class;
// Number of QPs to create per PE for the default context
extern const var<size_t> num_qps_per_pe_default_ctx;
// Number of QPs to create per PE for each user context
extern const var<size_t> num_qps_per_pe_usr_ctx;
} // namespace gda
} // namespace envvar
} // namespace rocshmem
Expand Down
31 changes: 18 additions & 13 deletions src/gda/backend_gda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,14 @@ void GDABackend::init() {

select_nic();

// Determine number of QPs to create per PE
num_qps_per_pe = envvar::gda::num_qps_per_pe_default_ctx.get_value() +
envvar::gda::num_qps_per_pe_usr_ctx.get_value() *
envvar::max_num_contexts;

// Total number of QPs created
num_qps = num_qps_per_pe * num_pes;

//TODO setup_host_interface();
/* Initialize the host interface */
if (MPI_COMM_NULL != backend_comm)
Expand Down Expand Up @@ -743,7 +751,7 @@ void GDABackend::exchange_qp_dest_info() {
dest_info[i].gid = gid;
}

for (size_t i = 0; i < envvar::max_num_contexts + 1; i++) {
for (size_t i = 0; i < num_qps_per_pe; i++) {
if (backend_comm != MPI_COMM_NULL) {
mpilib_ftable_.Alltoall(MPI_IN_PLACE, sizeof(dest_info_t), MPI_CHAR, dest_info.data() + i * num_pes, sizeof(dest_info_t), MPI_CHAR, backend_comm);
} else {
Expand Down Expand Up @@ -794,7 +802,7 @@ void GDABackend::setup_gpu_qps() {
size_t qp_objs_count;
size_t qp_objs_mem_size;

qp_objs_count = (envvar::max_num_contexts + 1) * num_pes;
qp_objs_count = num_qps;
qp_objs_mem_size = sizeof(QueuePair) * qp_objs_count;

CHECK_HIP(hipMalloc(&gpu_qps, qp_objs_mem_size));
Expand All @@ -813,7 +821,7 @@ void GDABackend::setup_gpu_qps() {
void GDABackend::cleanup_gpu_qps() {
size_t qp_objs_count;

qp_objs_count = (envvar::max_num_contexts + 1) * num_pes;
qp_objs_count = num_qps;

for (size_t i = 0; i < qp_objs_count; i++) {
host_qps[i].~QueuePair();
Expand Down Expand Up @@ -1031,23 +1039,20 @@ void GDABackend::modify_qps_rtr_to_rts() {

void GDABackend::create_queues() {
int ncqes;
size_t resize_length;

if (gda_provider == GDAProvider::IONIC) {
ncqes = envvar::sq_size << 1;
} else {
ncqes = envvar::sq_size;
}

resize_length = (envvar::max_num_contexts + 1) * num_pes;

dest_info.resize(resize_length);
cqs.resize(resize_length);
qps.resize(resize_length);
dest_info.resize(num_qps);
cqs.resize(num_qps);
qps.resize(num_qps);

bnxt_scqs.resize(resize_length);
bnxt_rcqs.resize(resize_length);
bnxt_qps.resize(resize_length);
bnxt_scqs.resize(num_qps);
bnxt_rcqs.resize(num_qps);
bnxt_qps.resize(num_qps);

if (gda_provider == GDAProvider::BNXT) {
bnxt_create_cqs(ncqes);
Expand Down Expand Up @@ -1092,7 +1097,7 @@ void GDABackend::alternate_qp_ports() {
*/

/* Re-Map each context */
for (size_t i = 1; i < (envvar::max_num_contexts + 1); i += 2) {
for (size_t i = 1; i < num_qps_per_pe; i += 2) {
for (size_t p = 0; p < num_pes; p += 2) {
cur_qp_idx = (i * num_pes) + p;
new_qp_idx = cur_qp_idx + 1;
Expand Down
13 changes: 13 additions & 0 deletions src/gda/backend_gda.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,19 @@ class GDABackend : public Backend {
uint64_t *gpu_db_sq = nullptr;
/* GDA_IONIC END */

/**
* Determine number of QPs to create per PE =
* ROCSHMEM_GDA_NUM_QPS_PER_PE_DEFAULT_CTX +
* ROCSHMEM_GDA_NUM_QPS_PER_PE_USR_CTX * ROCSHMEM_MAX_NUM_CONTEXTS
*/
size_t num_qps_per_pe {1};

/**
* Total number of QPs created =
* num_qps_per_pe * num_pes;
*/
uint32_t num_qps {1};

/**
* @brief Choose nic device according to locality/user preferences
*/
Expand Down
Loading