@@ -59,6 +59,7 @@ typedef struct shmStruct_st {
59
59
size_t nprocesses;
60
60
int barrier;
61
61
int sense;
62
+ cudaMemAllocationHandleType handleType;
62
63
int devices[MAX_DEVICES];
63
64
cudaMemPoolPtrExportData exportPtrData[MAX_DEVICES];
64
65
} shmStruct;
@@ -126,7 +127,7 @@ static void childProcess(int id) {
126
127
127
128
std::vector<cudaMemPool_t> pools (shm->nprocesses );
128
129
129
- cudaMemAllocationHandleType handleType = cudaMemHandleTypePosixFileDescriptor ;
130
+ cudaMemAllocationHandleType handleType = shm-> handleType ;
130
131
131
132
// Import mem pools from all the devices created in the master
132
133
// process using shareable handles received via socket
@@ -239,6 +240,7 @@ static void parentProcess(char *app) {
239
240
volatile shmStruct *shm = NULL ;
240
241
std::vector<void *> ptrs;
241
242
std::vector<Process> processes;
243
+ cudaMemAllocationHandleType handleType = cudaMemHandleTypeNone;
242
244
243
245
checkCudaErrors (cudaGetDeviceCount (&devCount));
244
246
std::vector<CUdevice> devices (devCount);
@@ -270,22 +272,32 @@ static void parentProcess(char *app) {
270
272
printf (" Device %d does not support cuda memory pools, skipping...\n " , i);
271
273
continue ;
272
274
}
273
- int deviceSupportsIpcHandle = 0 ;
274
- #if defined(__linux__)
275
- checkCudaErrors (cuDeviceGetAttribute (
276
- &deviceSupportsIpcHandle,
277
- CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED,
278
- devices[i]));
279
- #else
280
- cuDeviceGetAttribute (&deviceSupportsIpcHandle,
281
- CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED,
282
- devices[i]);
283
- #endif
284
-
285
- if (!deviceSupportsIpcHandle) {
286
- printf (" Device %d does not support CUDA IPC Handle, skipping...\n " , i);
275
+ int supportedHandleTypes = 0 ;
276
+ checkCudaErrors (cudaDeviceGetAttribute (&supportedHandleTypes,
277
+ cudaDevAttrMemoryPoolSupportedHandleTypes, i));
278
+ if (supportedHandleTypes == 0 ) {
279
+ printf (" Device %d does not support Memory pool based IPC, skipping...\n " , i);
287
280
continue ;
288
281
}
282
+
283
+ if (handleType == cudaMemHandleTypeNone) {
284
+ if (supportedHandleTypes & cudaMemHandleTypePosixFileDescriptor) {
285
+ handleType = cudaMemHandleTypePosixFileDescriptor;
286
+ }
287
+ else if (supportedHandleTypes & cudaMemHandleTypeWin32) {
288
+ handleType = cudaMemHandleTypeWin32;
289
+ }
290
+ else {
291
+ printf (" Device %d does not support any supported handle types, skipping...\n " , i);
292
+ continue ;
293
+ }
294
+ }
295
+ else {
296
+ if ((supportedHandleTypes & handleType) != handleType) {
297
+ printf (" Mixed handle types are not supported, waiving test\n " );
298
+ exit (EXIT_WAIVED);
299
+ }
300
+ }
289
301
// This sample requires two processes accessing each device, so we need
290
302
// to ensure exclusive or prohibited mode is not set
291
303
if (prop.computeMode != cudaComputeModeDefault) {
@@ -337,6 +349,11 @@ static void parentProcess(char *app) {
337
349
exit (EXIT_WAIVED);
338
350
}
339
351
352
+ if (handleType == cudaMemHandleTypeNone) {
353
+ printf (" No supported handle types found, waiving test\n " );
354
+ exit (EXIT_WAIVED);
355
+ }
356
+
340
357
std::vector<ShareableHandle> shareableHandles (shm->nprocesses );
341
358
std::vector<cudaStream_t> streams (shm->nprocesses );
342
359
std::vector<cudaMemPool_t> pools (shm->nprocesses );
@@ -352,16 +369,14 @@ static void parentProcess(char *app) {
352
369
cudaMemPoolProps poolProps;
353
370
memset (&poolProps, 0 , sizeof (cudaMemPoolProps));
354
371
poolProps.allocType = cudaMemAllocationTypePinned;
355
- poolProps.handleTypes = cudaMemHandleTypePosixFileDescriptor ;
372
+ poolProps.handleTypes = handleType ;
356
373
357
374
poolProps.location .type = cudaMemLocationTypeDevice;
358
375
poolProps.location .id = shm->devices [i];
359
376
360
377
checkCudaErrors (cudaMemPoolCreate (&pools[i], &poolProps));
361
378
362
379
// Query the shareable handle for the pool
363
- cudaMemAllocationHandleType handleType =
364
- cudaMemHandleTypePosixFileDescriptor;
365
380
// Allocate memory in a stream from the pool just created
366
381
checkCudaErrors (cudaMallocAsync (&ptr, DATA_SIZE, pools[i], streams[i]));
367
382
@@ -378,6 +393,8 @@ static void parentProcess(char *app) {
378
393
ptrs.push_back (ptr);
379
394
}
380
395
396
+ shm->handleType = handleType;
397
+
381
398
// Launch the child processes!
382
399
for (i = 0 ; i < shm->nprocesses ; i++) {
383
400
char devIdx[10 ];
@@ -430,7 +447,7 @@ static void parentProcess(char *app) {
430
447
int main (int argc, char **argv) {
431
448
#if defined(__arm__) || defined(__aarch64__) || defined(WIN32) || \
432
449
defined (_WIN32) || defined (WIN64) || defined (_WIN64)
433
- printf (" Not supported on ARM\n " );
450
+ printf (" Not supported on ARM or Windows \n " );
434
451
return EXIT_WAIVED;
435
452
#else
436
453
if (argc == 1 ) {
0 commit comments