From e2ae685702638e3f8fae8091344f0f7ea045a1f9 Mon Sep 17 00:00:00 2001
From: Kamil Tokarski <ktokarski@nvidia.com>
Date: Mon, 11 Mar 2024 11:21:04 +0100
Subject: [PATCH] Use synchronous copy to framework array in the absence of a
 stream (#5364)

Signed-off-by: Kamil Tokarski <ktokarski@nvidia.com>
---
 dali/python/nvidia/dali/plugin/mxnet.py            | 3 ++-
 dali/python/nvidia/dali/plugin/paddle.py           | 3 ++-
 dali/python/nvidia/dali/plugin/pytorch/__init__.py | 6 ++++--
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/dali/python/nvidia/dali/plugin/mxnet.py b/dali/python/nvidia/dali/plugin/mxnet.py
index 8205ee36ac3..2b9cf423ae4 100644
--- a/dali/python/nvidia/dali/plugin/mxnet.py
+++ b/dali/python/nvidia/dali/plugin/mxnet.py
@@ -72,12 +72,13 @@ def feed_ndarray(dali_tensor, arr, cuda_stream=None):
     ptr = ctypes.c_void_p()
     mx.base._LIB.MXNDArrayGetData(arr.handle, ctypes.byref(ptr))
 
+    non_blocking = cuda_stream is not None
     cuda_stream = types._raw_cuda_stream(cuda_stream)
 
     # Copy data from DALI tensor to ptr
     if isinstance(dali_tensor, (TensorGPU, TensorListGPU)):
         stream = None if cuda_stream is None else ctypes.c_void_p(cuda_stream)
-        dali_tensor.copy_to_external(ptr, stream, non_blocking=True)
+        dali_tensor.copy_to_external(ptr, stream, non_blocking=non_blocking)
     else:
         dali_tensor.copy_to_external(ptr)
 
diff --git a/dali/python/nvidia/dali/plugin/paddle.py b/dali/python/nvidia/dali/plugin/paddle.py
index e15a2bd40ce..04fc1208f46 100644
--- a/dali/python/nvidia/dali/plugin/paddle.py
+++ b/dali/python/nvidia/dali/plugin/paddle.py
@@ -73,12 +73,13 @@ def feed_ndarray(dali_tensor, ptr, cuda_stream=None):
                     (if not provided, an internal user stream will be selected)
     """
 
+    non_blocking = cuda_stream is not None
     cuda_stream = types._raw_cuda_stream(cuda_stream)
 
     c_type_pointer = ctypes.c_void_p(ptr)
     if isinstance(dali_tensor, (TensorGPU, TensorListGPU)):
         stream = None if cuda_stream is None else ctypes.c_void_p(cuda_stream)
-        dali_tensor.copy_to_external(c_type_pointer, stream, non_blocking=True)
+        dali_tensor.copy_to_external(c_type_pointer, stream, non_blocking=non_blocking)
     else:
         dali_tensor.copy_to_external(c_type_pointer)
     return ptr
diff --git a/dali/python/nvidia/dali/plugin/pytorch/__init__.py b/dali/python/nvidia/dali/plugin/pytorch/__init__.py
index 6bd17796d69..907bb1d820b 100644
--- a/dali/python/nvidia/dali/plugin/pytorch/__init__.py
+++ b/dali/python/nvidia/dali/plugin/pytorch/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2017-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2017-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -85,13 +85,15 @@ def feed_ndarray(
     ), "Shapes do not match: DALI tensor has size {0}, but PyTorch Tensor has size {1}".format(
         dali_tensor.shape(), list(arr.size())
     )
+
+    non_blocking = cuda_stream is not None
     cuda_stream = types._raw_cuda_stream(cuda_stream)
 
     # turn raw int to a c void pointer
     c_type_pointer = ctypes.c_void_p(arr.data_ptr())
     if isinstance(dali_tensor, (TensorGPU, TensorListGPU)):
         stream = None if cuda_stream is None else ctypes.c_void_p(cuda_stream)
-        dali_tensor.copy_to_external(c_type_pointer, stream, non_blocking=True)
+        dali_tensor.copy_to_external(c_type_pointer, stream, non_blocking=non_blocking)
     else:
         dali_tensor.copy_to_external(c_type_pointer)
     return arr