Skip to content

Commit

Permalink
Fixes #678: Reworked unique_span; it's no longer templated on a delet…
Browse files Browse the repository at this point in the history
…er, but takes it by value
  • Loading branch information
eyalroz committed Sep 10, 2024
1 parent a9008d5 commit b7ef9ae
Show file tree
Hide file tree
Showing 6 changed files with 248 additions and 76 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,8 @@ void outputBandwidthMatrix(P2PEngine mechanism, bool test_p2p, P2PDataTransfer p
int numElems = 10000000;
int repeat = 5;
vector<cuda::stream_t> streams;
vector<cuda::memory::device::unique_span<int>> buffers;
vector<cuda::memory::device::unique_span<int>> buffersD2D; // buffer for D2D, that is, intra-GPU copy
vector<cuda::unique_span<int>> buffers;
vector<cuda::unique_span<int>> buffersD2D; // buffer for D2D, that is, intra-GPU copy
vector<cuda::event_t> start;
vector<cuda::event_t> stop;

Expand Down Expand Up @@ -294,8 +294,8 @@ void outputBidirectionalBandwidthMatrix(P2PEngine p2p_mechanism, bool test_p2p)

vector<cuda::stream_t> streams_0;
vector<cuda::stream_t> streams_1;
vector<cuda::memory::device::unique_span<int>> buffers;
vector<cuda::memory::device::unique_span<int>> buffersD2D; // buffer for D2D, that is, intra-GPU copy
vector<cuda::unique_span<int>> buffers;
vector<cuda::unique_span<int>> buffersD2D; // buffer for D2D, that is, intra-GPU copy
vector<cuda::event_t> start;
vector<cuda::event_t> stop;

Expand Down Expand Up @@ -405,8 +405,8 @@ void outputLatencyMatrix(P2PEngine p2p_mechanism, bool test_p2p, P2PDataTransfer
//

vector<cuda::stream_t> streams;
vector<cuda::memory::device::unique_span<int>> buffers;
vector<cuda::memory::device::unique_span<int>> buffersD2D; // buffer for D2D, that is, intra-GPU copy
vector<cuda::unique_span<int>> buffers;
vector<cuda::unique_span<int>> buffersD2D; // buffer for D2D, that is, intra-GPU copy
vector<cuda::event_t> start;
vector<cuda::event_t> stop;

Expand Down
12 changes: 6 additions & 6 deletions examples/other/io_compute_overlap_with_streams.cu
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,12 @@ constexpr I div_rounding_up(I dividend, const I2 divisor) noexcept
}

struct buffer_set_t {
cuda::memory::host::unique_span<element_t> host_lhs;
cuda::memory::host::unique_span<element_t> host_rhs;
cuda::memory::host::unique_span<element_t> host_result;
cuda::memory::device::unique_span<element_t> device_lhs;
cuda::memory::device::unique_span<element_t> device_rhs;
cuda::memory::device::unique_span<element_t> device_result;
cuda::unique_span<element_t> host_lhs;
cuda::unique_span<element_t> host_rhs;
cuda::unique_span<element_t> host_result;
cuda::unique_span<element_t> device_lhs;
cuda::unique_span<element_t> device_rhs;
cuda::unique_span<element_t> device_result;
};

std::vector<buffer_set_t> generate_buffers(
Expand Down
4 changes: 2 additions & 2 deletions examples/other/new_cpp_standard/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ cuda::device::id_t get_current_device_id()

void unique_spans()
{
cuda::memory::host::unique_span<float> data1(nullptr, 0);
cuda::memory::host::unique_span<float> data2(nullptr, 0);
cuda::unique_span<float> data1(nullptr, 0, cuda::detail_::default_span_deleter<float>);
cuda::unique_span<float> data2(nullptr, 0, cuda::detail_::default_span_deleter<float>);

data1 = std::move(data2);
}
Expand Down
154 changes: 123 additions & 31 deletions src/cuda/api/detail/unique_span.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,45 +33,46 @@ namespace cuda {
* included in, C++14. It can be though of as a variation on std::array, with the the size and capacity
* set dynamically, at construction time, rather than statically.
*
* @note unique_span = unique_span+typing or span+ownership+non_null
* @note unique_span = unique_span+typing or span+ownership+non_null . Well, sort of, because this
* class supports complex construction-allocation and deletion patterns, through deleter objects.
*
* @tparam T the type of individual elements in the unique_span
* @tparam T an individual element in the unique_span
*/
template<typename T, typename Deleter = ::std::default_delete<T[]>>
template<typename T>
class unique_span : public ::cuda::span<T> {
public: // span types
using span_type = span<T>;

// Exposing some span type definitions, strictly for terseness
// (they're all visible on the outside anyway)
using size_type = typename span<T>::size_type;
using pointer = typename span<T>::pointer;
using reference = typename span<T>::reference;
using deleter_type = Deleter;
using size_type = typename span_type::size_type;
using pointer = typename span_type::pointer;
using reference = typename span_type::reference;
using deleter_type = void (*)(span_type);

public: // exposing span data members
using span<T>::data;
using span<T>::size;
public: // exposing span data members & adding our own
using span_type::data;
using span_type::size;
deleter_type deleter_;

public: // constructors and destructor

constexpr unique_span() noexcept = default;
// Note: span_type's default ctor will create a {nullptr, 0} empty span.
constexpr unique_span() noexcept : span_type(), deleter_{nullptr} {}

// Disable copy construction - as this class never allocates;
unique_span(const unique_span&) = delete;
// ... and also match other kinds of unique_span's, which may get converted into
// a span and thus leak memory on construction!
template<typename U, typename UDeleter>
unique_span(const unique_span<U, UDeleter>&) = delete;
template<typename U>
unique_span(const unique_span<U>&) = delete;

// Note: This template provides constructibility of unique_span<const T> from unique_span<const T>
template<typename U, typename UDeleter>
unique_span(unique_span<U,UDeleter>&& other)
: unique_span{ other.release() }
template<typename U>
unique_span(unique_span<U>&& other) : unique_span{ other.release(), other.deleter_ }
{
static_assert(
::std::is_assignable<span_type, span<U>>::value and
::std::is_assignable<Deleter, UDeleter>::value,
::std::is_assignable<span_type, span<U>>::value,
"Invalid unique_span initializer");
}

Expand All @@ -81,25 +82,33 @@ class unique_span : public ::cuda::span<T> {
/// of a non-owned span when passing to a function, then trying to release that
/// memory returning from it.
///@{
explicit unique_span(span_type span) noexcept : span_type{span} { }
explicit unique_span(pointer data, size_type size) noexcept : unique_span{span_type{data, size}} { }
explicit unique_span(span_type span, deleter_type deleter) noexcept
: span_type{span}, deleter_(deleter) { }
explicit unique_span(pointer data, size_type size, deleter_type deleter) noexcept
: unique_span(span_type{data, size}, deleter) { }
explicit unique_span(memory::region_t region, deleter_type deleter) NOEXCEPT_IF_NDEBUG
: unique_span(span_type{region.start(), region.size() / sizeof(T)}, deleter)
{
#ifndef NDEBUG
if (sizeof(T) * size != region.size()) {
throw ::std::invalid_argument("Attempt to create a unique_span with a memory region which"
"does not comprise an integral number of areas of the element type size");
}
#endif
}

///@}

// Note: No constructor which also takes a deleter. We do not hold a deleter
// member - unlike unique_ptr's. Perhaps we should?

/** A move constructor.
*
* @note Moving is the only way a unique_span may have its @ref data_ field become
* null; the user is strongly assumed not to use the `unique_span` after moving from
* it.
*/
unique_span(unique_span&& other) noexcept : unique_span{ other.release() } { }
/// A move constructor.
///
/// @TODO Can we drop this one in favor of the general move ctor?
unique_span(unique_span&& other) noexcept : unique_span(other.release(), other.deleter_) { }

~unique_span() noexcept
{
if (data() != nullptr) {
deleter_type{}(data());
deleter_(*this);
}
#ifndef NDEBUG
span_type::operator=(span_type{static_cast<T*>(nullptr), 0});
Expand Down Expand Up @@ -142,27 +151,110 @@ class unique_span : public ::cuda::span<T> {
* @note This is not marked nodiscard by the same argument as for std::unique_ptr;
* see also @url https://stackoverflow.com/q/60535399/1593077 and
* @url http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2017/p0600r1.pdf
*
* @note it is the caller's responsibility to ensure it has a copy of the deleter
* for the released span.
*/
span_type release() noexcept
{
span_type released { data(), size() };
span_type::operator=(span_type{ static_cast<T*>(nullptr), 0 });
// Note that we are _not_ replacing deleter.
return released;
}
}; // class unique_span

namespace detail_ {

// @note you can't just use this always. Thus, only one of the make_ functions
// below uses it.
//
// @note that if a nullptr happens to be deleted - that's not a problem;
// it is supported by the delete operation(s).
template <typename T>
inline void default_span_deleter(span<T> sp)
{
delete[] sp.data();
}

} // namespace detail_


/**
* A parallel of ::std::make_unique_for_overwrite, for @ref unique_span<T>'s, i.e. which maintains
* the number of elements allocated.
*
* @param size the number of elements in the unique_span to be created. It may legitimately be 0.
*
* @tparam T the type of elements in the allocated @ref unique_span.
*
* @param size The number of @tparam T elements to allocate
*/
template <typename T>
unique_span<T> make_unique_span(size_t size)
{
return unique_span<T>{ new T[size], size };
// Note: It _is_ acceptable pass 0 here.
// See https://stackoverflow.com/q/1087042/1593077
return unique_span<T>(new T[size], size, detail_::default_span_deleter<T>);
}

namespace detail_ {

template <typename T>
inline void elementwise_destruct(span<T> sp)
{
for (auto& element : sp) { element.~T(); }
}

// Use this structure to wrap a deleter which takes trivially-destructible/raw memory,
// to then pass on for use with a typed span<T>
//
// Note: Ignores alignment.
template <typename RawDeleter>
struct deleter_with_elementwise_destruction {
template <typename T>
void operator()(span<T> sp)
{
elementwise_destruct(sp);
raw_deleter(static_cast<void *>(sp.data()));
}
RawDeleter raw_deleter;
};

template <typename T, typename RawDeleter>
void delete_with_elementwise_destruction(span<T> sp, RawDeleter raw_deleter)
{
elementwise_destruct(sp);
raw_deleter(static_cast<void *>(sp.data()));
}

} // namespace detail_

/**
* The alternative to `std::generate` and similar functions, for the unique_span, seeing
* how its elements must be constructed as it is constructed.
*
* @param size the number of elements in the unique_span to be created. It may legitimately be 0.
* @param gen a function for generating new values for move-construction into the new unique_span
*
* @tparam T the type of elements in the allocated @ref unique_span.
* @tparam Generator A type invokable with the element index, to produce a T-constructor-argument
*
* @param size The number of @tparam T elements to allocate
*/
template <typename T, typename Generator>
unique_span <T> generate_unique_span(size_t size, Generator generator_by_index) noexcept
{
// Q: Do I need to check the alignment here? Perhaps allocate more to ensure alignment?
auto result_data = static_cast<T*>(::operator new(sizeof(T) * size));
for (size_t i = 0; i < size; i++) {
new(&result_data[i]) T(generator_by_index(i));
}
auto deleter = [](span<T> sp) {
auto raw_deleter = [](void* ptr) { ::operator delete(ptr); };
detail_::delete_with_elementwise_destruction(sp, raw_deleter);
};
return unique_span<T>(result_data, size, deleter);
}

} // namespace cuda
Expand Down
Loading

0 comments on commit b7ef9ae

Please sign in to comment.