@@ -23,6 +23,10 @@ struct urMultiQueueLaunchMemcpyTest
2323 std::vector<ur_program_handle_t > programs;
2424 std::vector<ur_kernel_handle_t > kernels;
2525 std::vector<void *> SharedMem;
26+
27+ // Track P2P pairs that we successfully enabled in SetUp, so we can
28+ // disable them in TearDown without interfering with other tests
29+ std::set<std::pair<ur_device_handle_t , ur_device_handle_t >> enabledP2PPairs;
2630
2731 static constexpr char ProgramName[] = " increment" ;
2832 static constexpr size_t ArraySize = 100 ;
@@ -49,29 +53,49 @@ struct urMultiQueueLaunchMemcpyTest
4953 urDeviceGetInfo (devices[0 ], UR_DEVICE_INFO_USM_P2P_SUPPORT_EXP,
5054 sizeof (usm_p2p_support), &usm_p2p_support, nullptr ));
5155 if (usm_p2p_support) {
56+ // Track unique physical device pairs to avoid enabling P2P multiple times.
57+ // Important: When devices are duplicated (same physical GPU appears multiple
58+ // times in the devices vector), we must only enable P2P once per unique
59+ // physical GPU pair to avoid CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED.
60+ // We store successfully enabled pairs in member variable so we can disable
61+ // them in TearDown, ensuring clean state for subsequent tests.
62+
5263 for (size_t i = 0 ; i < devices.size (); i++) {
53- for (size_t j = 0 ; j < devices.size (); j++) {
54- // Skip if same device (either by index or handle)
55- if (i != j && devices[i] != devices[j]) {
56- // First check if P2P is actually supported between this pair
57- int32_t p2p_access_supported = 0 ;
58- auto query_result = urUsmP2PPeerAccessGetInfoExp (
59- devices[i], devices[j], UR_EXP_PEER_INFO_UR_PEER_ACCESS_SUPPORTED,
60- sizeof (p2p_access_supported), &p2p_access_supported, nullptr );
61-
62- if (query_result != UR_RESULT_SUCCESS || !p2p_access_supported) {
63- // P2P not supported between this pair, skip the test
64- GTEST_SKIP () << " P2P access not supported between device pair ("
65- << i << " , " << j << " )" ;
66- }
67-
68- // Enable peer access from device[i] to device[j]
69- // Ignore errors - P2P may already be enabled by another test
70- auto result = urUsmP2PEnablePeerAccessExp (devices[i], devices[j]);
71- if (result != UR_RESULT_SUCCESS &&
72- result != UR_RESULT_ERROR_INVALID_OPERATION) {
73- ASSERT_SUCCESS (result);
74- }
64+ for (size_t j = i + 1 ; j < devices.size (); j++) {
65+ // Skip if same physical device (duplicate handles)
66+ if (devices[i] == devices[j]) {
67+ continue ;
68+ }
69+
70+ // Create canonical pair (always smaller handle first) to track unique pairs
71+ auto pair = devices[i] < devices[j]
72+ ? std::make_pair (devices[i], devices[j])
73+ : std::make_pair (devices[j], devices[i]);
74+
75+ // Skip if we already processed this physical device pair
76+ if (enabledP2PPairs.count (pair)) {
77+ continue ;
78+ }
79+
80+ // First check if P2P is actually supported between this physical pair
81+ int32_t p2p_access_supported = 0 ;
82+ auto query_result = urUsmP2PPeerAccessGetInfoExp (
83+ devices[i], devices[j], UR_EXP_PEER_INFO_UR_PEER_ACCESS_SUPPORTED,
84+ sizeof (p2p_access_supported), &p2p_access_supported, nullptr );
85+
86+ if (query_result != UR_RESULT_SUCCESS || !p2p_access_supported) {
87+ GTEST_SKIP () << " P2P access not supported between device pair ("
88+ << i << " , " << j << " )" ;
89+ }
90+
91+ // Enable bidirectional peer access (both directions needed for memcpy)
92+ auto result_ij = urUsmP2PEnablePeerAccessExp (devices[i], devices[j]);
93+ auto result_ji = urUsmP2PEnablePeerAccessExp (devices[j], devices[i]);
94+
95+ // Only track pairs that WE successfully enabled (not already-enabled ones)
96+ // This way TearDown only disables what we enabled, avoiding conflicts
97+ if (result_ij == UR_RESULT_SUCCESS && result_ji == UR_RESULT_SUCCESS) {
98+ enabledP2PPairs.insert (pair);
7599 }
76100 }
77101 }
@@ -133,10 +157,17 @@ struct urMultiQueueLaunchMemcpyTest
133157 urProgramRelease (program);
134158 }
135159
136- // NOTE: We intentionally do NOT disable P2P access here.
137- // P2P state is per-context and shared across all test instances.
138- // Disabling P2P in one test's TearDown would break other concurrent tests.
139- // P2P can safely remain enabled for the lifetime of the process.
160+ // Disable P2P access for pairs that WE enabled in SetUp.
161+ // This ensures clean state for subsequent tests and prevents race conditions
162+ // where multiple test instances try to enable the same P2P pairs.
163+ // We only disable pairs we successfully enabled (tracked in enabledP2PPairs).
164+ for (const auto &pair : enabledP2PPairs) {
165+ // Disable bidirectionally (both directions)
166+ // Ignore errors - pair might already be disabled by another test
167+ urUsmP2PDisablePeerAccessExp (pair.first , pair.second );
168+ urUsmP2PDisablePeerAccessExp (pair.second , pair.first );
169+ }
170+ enabledP2PPairs.clear ();
140171
141172 UUR_RETURN_ON_FATAL_FAILURE (
142173 uur::urMultiQueueMultiDeviceTestWithParam<minDevices, T>::TearDown ());
0 commit comments