@@ -310,21 +310,16 @@ static int ompi_comm_ext_cid_new_block (ompi_communicator_t *newcomm, ompi_commu
310310 const void * arg0 , const void * arg1 , bool send_first , int mode ,
311311 ompi_request_t * * req )
312312{
313- pmix_info_t pinfo , * results = NULL ;
313+ pmix_info_t * pinfo , * results = NULL ;
314314 size_t nresults ;
315- opal_process_name_t * name_array = NULL ;
316- char * tag = NULL ;
317- size_t proc_count ;
318- size_t cid_base = 0 ;
315+ opal_process_name_t opal_proc_name ;
319316 bool cid_base_set = false;
317+ char * tag = NULL ;
318+ size_t proc_count = 0 , rproc_count = 0 , cid_base = 0UL , ninfo ;
320319 int rc , leader_rank ;
321- int ret = OMPI_SUCCESS ;
322- pmix_proc_t * procs = NULL ;
323-
324- rc = ompi_group_to_proc_name_array (newcomm -> c_local_group , & name_array , & proc_count );
325- if (OPAL_UNLIKELY (OMPI_SUCCESS != rc )) {
326- return rc ;
327- }
320+ pmix_proc_t * procs ;
321+ void * grpinfo = NULL , * list = NULL ;
322+ pmix_data_array_t darray ;
328323
329324 switch (mode ) {
330325 case OMPI_COMM_CID_GROUP_NEW :
@@ -341,15 +336,71 @@ static int ompi_comm_ext_cid_new_block (ompi_communicator_t *newcomm, ompi_commu
341336 break ;
342337 }
343338
344- PMIX_INFO_LOAD (& pinfo , PMIX_GROUP_ASSIGN_CONTEXT_ID , NULL , PMIX_BOOL );
339+ grpinfo = PMIx_Info_list_start ();
340+ if (NULL == grpinfo ) {
341+ return OMPI_ERR_OUT_OF_RESOURCE ;
342+ }
343+
344+ rc = PMIx_Info_list_add (grpinfo , PMIX_GROUP_ASSIGN_CONTEXT_ID , NULL , PMIX_BOOL );
345+ if (PMIX_SUCCESS != rc ) {
346+ OPAL_OUTPUT_VERBOSE ((10 , ompi_comm_output , "PMIx_Info_list_add failed %s %d" , PMIx_Error_string (rc ), __LINE__ ));
347+ return OMPI_ERR_OUT_OF_RESOURCE ;
348+ }
349+
350+ list = PMIx_Info_list_start ();
351+
352+ size_t c_index = (size_t )newcomm -> c_index ;
353+ rc = PMIx_Info_list_add (list , PMIX_GROUP_LOCAL_CID , & c_index , PMIX_SIZE );
354+ if (PMIX_SUCCESS != rc ) {
355+ OPAL_OUTPUT_VERBOSE ((10 , ompi_comm_output , "PMIx_Info_list_add failed %s %d" , PMIx_Error_string (rc ), __LINE__ ));
356+ return OMPI_ERR_OUT_OF_RESOURCE ;
357+ }
358+
359+ rc = PMIx_Info_list_convert (list , & darray );
360+ if (PMIX_SUCCESS != rc ) {
361+ OPAL_OUTPUT_VERBOSE ((10 , ompi_comm_output , "PMIx_Info_list_convert failed %s %d" , PMIx_Error_string (rc ), __LINE__ ));
362+ return OMPI_ERR_OUT_OF_RESOURCE ;
363+ }
364+ rc = PMIx_Info_list_add (grpinfo , PMIX_GROUP_INFO , & darray , PMIX_DATA_ARRAY );
365+ if (PMIX_SUCCESS != rc ) {
366+ OPAL_OUTPUT_VERBOSE ((10 , ompi_comm_output , "PMIx_Info_list_add failed %s %d" , PMIx_Error_string (rc ), __LINE__ ));
367+ return OMPI_ERR_OUT_OF_RESOURCE ;
368+ }
369+ PMIx_Info_list_release (list );
370+ PMIX_DATA_ARRAY_DESTRUCT (& darray );
371+
372+
373+ rc = PMIx_Info_list_convert (grpinfo , & darray );
374+ if (PMIX_SUCCESS != rc ) {
375+ OPAL_OUTPUT_VERBOSE ((10 , ompi_comm_output , "PMIx_Info_list_convert failed %s %d" , PMIx_Error_string (rc ), __LINE__ ));
376+ return OMPI_ERR_OUT_OF_RESOURCE ;
377+ }
378+
379+ pinfo = (pmix_info_t * )darray .array ;
380+ ninfo = darray .size ;
381+ PMIx_Info_list_release (grpinfo );
382+
383+ proc_count = newcomm -> c_local_group -> grp_proc_count ;
384+ if ( OMPI_COMM_IS_INTER (newcomm ) ){
385+ rproc_count = newcomm -> c_remote_group -> grp_proc_count ;
386+ }
387+
388+ PMIX_PROC_CREATE (procs , proc_count + rproc_count );
345389
346- PMIX_PROC_CREATE (procs , proc_count );
347390 for (size_t i = 0 ; i < proc_count ; ++ i ) {
348- OPAL_PMIX_CONVERT_NAME (& procs [i ],& name_array [i ]);
391+ opal_proc_name = ompi_group_get_proc_name (newcomm -> c_local_group , i );
392+ OPAL_PMIX_CONVERT_NAME (& procs [i ],& opal_proc_name );
393+ }
394+ for (size_t i = 0 ; i < rproc_count ; ++ i ) {
395+ opal_proc_name = ompi_group_get_proc_name (newcomm -> c_remote_group , i );
396+ OPAL_PMIX_CONVERT_NAME (& procs [proc_count + i ],& opal_proc_name );
349397 }
350398
351- rc = PMIx_Group_construct (tag , procs , proc_count , & pinfo , 1 , & results , & nresults );
352- PMIX_INFO_DESTRUCT (& pinfo );
399+
400+ OPAL_OUTPUT_VERBOSE ((10 , ompi_comm_output , "calling PMIx_Group_construct - tag %s size %ld ninfo %ld cid_base %ld\n" ,
401+ tag , proc_count + rproc_count , ninfo , cid_base ));
402+ rc = PMIx_Group_construct (tag , procs , proc_count + rproc_count , pinfo , ninfo , & results , & nresults );
403+ PMIX_DATA_ARRAY_DESTRUCT (& darray );
353404 if (PMIX_SUCCESS != rc ) {
354405 char msg_string [1024 ];
355406 switch (rc ) {
@@ -361,7 +412,7 @@ static int ompi_comm_ext_cid_new_block (ompi_communicator_t *newcomm, ompi_commu
361412 "MPI_Comm_create_from_group/MPI_Intercomm_create_from_groups" ,
362413 msg_string );
363414
364- ret = MPI_ERR_UNSUPPORTED_OPERATION ;
415+ rc = MPI_ERR_UNSUPPORTED_OPERATION ;
365416 break ;
366417 case PMIX_ERR_NOT_SUPPORTED :
367418 sprintf (msg_string ,"PMIx server does not support PMIx Group operations" );
@@ -370,10 +421,10 @@ static int ompi_comm_ext_cid_new_block (ompi_communicator_t *newcomm, ompi_commu
370421 true,
371422 "MPI_Comm_create_from_group/MPI_Intercomm_create_from_groups" ,
372423 msg_string );
373- ret = MPI_ERR_UNSUPPORTED_OPERATION ;
424+ rc = MPI_ERR_UNSUPPORTED_OPERATION ;
374425 break ;
375426 default :
376- ret = opal_pmix_convert_status (rc );
427+ rc = opal_pmix_convert_status (rc );
377428 break ;
378429 }
379430 goto fn_exit ;
@@ -383,23 +434,28 @@ static int ompi_comm_ext_cid_new_block (ompi_communicator_t *newcomm, ompi_commu
383434 if (PMIX_CHECK_KEY (& results [i ], PMIX_GROUP_CONTEXT_ID )) {
384435 PMIX_VALUE_GET_NUMBER (rc , & results [i ].value , cid_base , size_t );
385436 if (PMIX_SUCCESS != rc ) {
386- ret = opal_pmix_convert_status (rc );
437+ rc = opal_pmix_convert_status (rc );
387438 goto fn_exit ;
388439 }
389440 cid_base_set = true;
390441 break ;
391442 }
392443 }
393444
445+ OPAL_OUTPUT_VERBOSE ((10 , ompi_comm_output , "PMIx_Group_construct - tag %s size %ld ninfo %ld cid_base %ld\n" ,
446+ tag , proc_count + rproc_count , ninfo , cid_base ));
447+
448+ /* destruct the group */
394449 rc = PMIx_Group_destruct (tag , NULL , 0 );
395450 if (PMIX_SUCCESS != rc ) {
396- ret = opal_pmix_convert_status (rc );
451+ OPAL_OUTPUT_VERBOSE ((10 , ompi_comm_output , "PMIx_Group_destruct failed %s" , PMIx_Error_string (rc )));
452+ rc = opal_pmix_convert_status (rc );
397453 goto fn_exit ;
398454 }
399455
400456 if (!cid_base_set ) {
401457 opal_show_help ("help-comm.txt" , "cid-base-not-set" , true);
402- ret = OMPI_ERROR ;
458+ rc = OMPI_ERROR ;
403459 goto fn_exit ;
404460 }
405461
@@ -416,12 +472,7 @@ static int ompi_comm_ext_cid_new_block (ompi_communicator_t *newcomm, ompi_commu
416472 procs = NULL ;
417473 }
418474
419- if (NULL != name_array ) {
420- free (name_array );
421- name_array = NULL ;
422- }
423-
424- return ret ;
475+ return rc ;
425476}
426477
427478static int ompi_comm_nextcid_ext_nb (ompi_communicator_t * newcomm , ompi_communicator_t * comm ,
@@ -446,6 +497,15 @@ static int ompi_comm_nextcid_ext_nb (ompi_communicator_t *newcomm, ompi_communic
446497 block = & comm -> c_contextidb ;
447498 }
448499
500+ for (unsigned int i = ompi_mpi_communicators .lowest_free ; i < mca_pml .pml_max_contextid ; ++ i ) {
501+ bool flag = opal_pointer_array_test_and_set_item (& ompi_mpi_communicators , i , newcomm );
502+ if (true == flag ) {
503+ newcomm -> c_index = i ;
504+ break ;
505+ }
506+ }
507+ assert (newcomm -> c_index > 2 );
508+
449509 if (NULL == arg1 ) {
450510 if (OMPI_COMM_CID_GROUP == mode || OMPI_COMM_CID_GROUP_NEW == mode ||
451511 !ompi_comm_extended_cid_block_available (& comm -> c_contextidb )) {
@@ -464,18 +524,11 @@ static int ompi_comm_nextcid_ext_nb (ompi_communicator_t *newcomm, ompi_communic
464524 is_new_block = true;
465525 }
466526
527+
467528 if (block != & newcomm -> c_contextidb ) {
468529 (void ) ompi_comm_extended_cid_block_new (block , & newcomm -> c_contextidb , is_new_block );
469530 }
470531
471- for (unsigned int i = ompi_mpi_communicators .lowest_free ; i < mca_pml .pml_max_contextid ; ++ i ) {
472- bool flag = opal_pointer_array_test_and_set_item (& ompi_mpi_communicators , i , newcomm );
473- if (true == flag ) {
474- newcomm -> c_index = i ;
475- break ;
476- }
477- }
478-
479532 newcomm -> c_contextid = newcomm -> c_contextidb .block_cid ;
480533
481534 opal_hash_table_set_value_ptr (& ompi_comm_hash , & newcomm -> c_contextid ,
@@ -498,7 +551,7 @@ int ompi_comm_nextcid_nb (ompi_communicator_t *newcomm, ompi_communicator_t *com
498551
499552 /* old CID algorighm */
500553
501- /* if we got here and comm is NULL then that means the app is invoking MPI-4 Sessions or later
554+ /* if we got here and comm is NULL then that means the app is invoking MPI-4 Sessions or later
502555 functions but the pml does not support these functions so return not supported */
503556 if (NULL == comm ) {
504557 char msg_string [1024 ];
@@ -963,6 +1016,64 @@ int ompi_comm_activate (ompi_communicator_t **newcomm, ompi_communicator_t *comm
9631016 return rc ;
9641017}
9651018
1019+ int ompi_comm_get_remote_cid (ompi_communicator_t * comm , int dest , uint32_t * remote_cid )
1020+ {
1021+ ompi_proc_t * ompi_proc ;
1022+ pmix_proc_t pmix_proc ;
1023+ pmix_info_t tinfo [2 ];
1024+ pmix_value_t * val = NULL ;
1025+ ompi_comm_extended_cid_t excid ;
1026+ int rc = OMPI_SUCCESS ;
1027+ size_t remote_cid64 ;
1028+
1029+ assert (NULL != remote_cid );
1030+
1031+ if (OMPI_COMM_IS_GLOBAL_INDEX (comm )) {
1032+ * remote_cid = comm -> c_index ;
1033+ } else {
1034+ ompi_proc = ompi_comm_peer_lookup (comm , dest );
1035+ OPAL_PMIX_CONVERT_NAME (& pmix_proc , & ompi_proc -> super .proc_name );
1036+
1037+ PMIx_Info_construct (& tinfo [0 ]);
1038+ PMIX_INFO_LOAD (& tinfo [0 ], PMIX_TIMEOUT , & ompi_pmix_connect_timeout , PMIX_UINT32 );
1039+
1040+ excid = ompi_comm_get_extended_cid (comm );
1041+
1042+ PMIX_INFO_CONSTRUCT (& tinfo [1 ]);
1043+ PMIX_INFO_LOAD (& tinfo [1 ], PMIX_GROUP_CONTEXT_ID , & excid .cid_base , PMIX_SIZE );
1044+ PMIX_INFO_SET_QUALIFIER (& tinfo [1 ]);
1045+ if (PMIX_SUCCESS != (rc = PMIx_Get (& pmix_proc , PMIX_GROUP_LOCAL_CID , tinfo , 2 , & val ))) {
1046+ OPAL_OUTPUT_VERBOSE ((10 , ompi_comm_output , "PMIx_Get failed for PMIX_GROUP_LOCAL_CID cid_base %ld %s" , excid .cid_base , PMIx_Error_string (rc )));
1047+ }
1048+
1049+ if (NULL == val ) {
1050+ OPAL_OUTPUT_VERBOSE ((10 , ompi_comm_output , "PMIx_Get failed for PMIX_GROUP_LOCAL_CID val returned NULL" ));
1051+ rc = OMPI_ERR_NOT_FOUND ;
1052+ goto done ;
1053+ }
1054+
1055+ if (val -> type != PMIX_SIZE ) {
1056+ OPAL_OUTPUT_VERBOSE ((10 , ompi_comm_output , "PMIx_Get failed for PMIX_GROUP_LOCAL_CID type mismatch" ));
1057+ rc = OMPI_ERR_TYPE_MISMATCH ;
1058+ goto done ;
1059+ }
1060+
1061+ if (PMIX_SUCCESS == rc ) {
1062+ PMIX_VALUE_GET_NUMBER (rc , val , remote_cid64 , size_t );
1063+ rc = OMPI_SUCCESS ;
1064+ * remote_cid = (uint32_t )remote_cid64 ;
1065+ OPAL_OUTPUT_VERBOSE ((10 , ompi_comm_output , "PMIx_Get PMIX_GROUP_LOCAL_CID %d for cid_base %ld" , * remote_cid , excid .cid_base ));
1066+ }
1067+ }
1068+
1069+ done :
1070+ if (NULL != val ) {
1071+ PMIX_VALUE_RELEASE (val );
1072+ }
1073+
1074+ return rc ;
1075+ }
1076+
9661077static int ompi_comm_activate_nb_complete (ompi_comm_request_t * request )
9671078{
9681079 ompi_comm_cid_context_t * context = (ompi_comm_cid_context_t * ) request -> context ;
0 commit comments