@@ -229,41 +229,58 @@ inline region_t allocate(
229
229
return allocate_in_current_context (size_in_bytes, stream_handle);
230
230
}
231
231
232
- } // namespace detail_
233
-
234
- // / Free a region of device-side memory (regardless of how it was allocated)
235
- inline void free (void * ptr)
232
+ #if CUDA_VERSION >= 11020
233
+ inline void free (
234
+ context::handle_t context_handle,
235
+ void * allocated_region_start,
236
+ optional<stream::handle_t > stream_handle = {})
237
+ #else
238
+ inline void free (
239
+ context::handle_t context_handle,
240
+ void * allocated_region_start)
241
+ #endif
236
242
{
237
- auto result = cuMemFree (address (ptr));
243
+ #if CUDA_VERSION >= 11020
244
+ if (stream_handle) {
245
+ auto status = cuMemFreeAsync (device::address (allocated_region_start), *stream_handle);
246
+ throw_if_error_lazy (status,
247
+ " Failed scheduling an asynchronous freeing of the global memory region starting at "
248
+ + cuda::detail_::ptr_as_hex (allocated_region_start) + " on "
249
+ + stream::detail_::identify (*stream_handle, context_handle));
250
+ return ;
251
+ }
252
+ #endif
253
+ auto result = cuMemFree (address (allocated_region_start));
238
254
#ifdef CAW_THROW_ON_FREE_IN_DESTROYED_CONTEXT
239
255
if (result == status::success) { return ; }
240
256
#else
241
257
if (result == status::success or result == status::context_is_destroyed) { return ; }
242
258
#endif
243
- throw runtime_error (result, " Freeing device memory at " + cuda::detail_::ptr_as_hex (ptr ));
259
+ throw runtime_error (result, " Freeing device memory at " + cuda::detail_::ptr_as_hex (allocated_region_start ));
244
260
}
245
261
246
- // / @copydoc free(void*)
247
- inline void free (region_t region) { free (region.start ()); }
262
+ } // namespace detail_
248
263
264
+ // / Free a region of device-side memory (regardless of how it was allocated)
249
265
#if CUDA_VERSION >= 11020
250
- namespace async {
251
-
252
- namespace detail_ {
266
+ inline void free (void * region_start, optional_ref<const stream_t > stream = {});
267
+ #else
268
+ inline void free (void * ptr);
269
+ #endif
253
270
254
- inline void free (
255
- context::handle_t context_handle,
256
- stream::handle_t stream_handle,
257
- void * allocated_region_start)
271
+ // / @copydoc free(void*, optional_ref<const stream_t>)
272
+ #if CUDA_VERSION >= 11020
273
+ inline void free (region_t region, optional_ref<const stream_t > stream = {})
274
+ #else
275
+ inline void free (region_t region)
276
+ #endif
258
277
{
259
- auto status = cuMemFreeAsync (device::address (allocated_region_start), stream_handle);
260
- throw_if_error_lazy (status,
261
- " Failed scheduling an asynchronous freeing of the global memory region starting at "
262
- + cuda::detail_::ptr_as_hex (allocated_region_start) + " on "
263
- + stream::detail_::identify (stream_handle, context_handle) );
278
+ free (region.start (), stream);
264
279
}
265
280
266
- } // namespace detail_
281
+ #if CUDA_VERSION >= 11020
282
+
283
+ namespace async {
267
284
268
285
/* *
269
286
* Schedule a de-allocation of device-side memory on a CUDA stream.
0 commit comments