Skip to content

Commit

Permalink
Add sanity checks for predicates on vectors
Browse files Browse the repository at this point in the history
This commit adds four new sanity checks, `true_for_all`,
`false_for_all`, `true_for_any`, and `false_for_any` which basically do
what they say on the tin.
  • Loading branch information
stephenswat committed Sep 18, 2024
1 parent 884457a commit 9ff9e07
Show file tree
Hide file tree
Showing 7 changed files with 584 additions and 0 deletions.
3 changes: 3 additions & 0 deletions cmake/traccc-compiler-options-cuda.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ set( CMAKE_CUDA_ARCHITECTURES "52" CACHE STRING
# not marked with __device__.
traccc_add_flag( CMAKE_CUDA_FLAGS "--expt-relaxed-constexpr" )

# Allow the use of lambdas with __device__ specifiers.
traccc_add_flag( CMAKE_CUDA_FLAGS "--extended-lambda" )

# Make CUDA generate debug symbols for the device code as well in a debug
# build.
traccc_add_flag( CMAKE_CUDA_FLAGS_DEBUG "-G --keep -src-in-ptx" )
Expand Down
129 changes: 129 additions & 0 deletions device/cuda/src/sanity/predicate.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
/**
* traccc library, part of the ACTS project (R&D line)
*
* (c) 2024 CERN for the benefit of the ACTS project
*
* Mozilla Public License Version 2.0
*/

#pragma once

// Project include(s).
#include "../utils/cuda_error_handling.hpp"
#include "traccc/cuda/utils/stream.hpp"

// VecMem include(s).
#include <vecmem/containers/data/vector_view.hpp>
#include <vecmem/containers/device_vector.hpp>
#include <vecmem/memory/memory_resource.hpp>
#include <vecmem/memory/unique_ptr.hpp>
#include <vecmem/utils/copy.hpp>

// CUDA include
#include <cuda_runtime.h>

// System include
#include <concepts>

namespace traccc::cuda {
namespace kernels {
template <typename P, typename T>
requires std::predicate<P, T> __global__ void true_for_all_kernel(
P projection, vecmem::data::vector_view<T> _in, bool* out) {
int tid = threadIdx.x + blockIdx.x * blockDim.x;

vecmem::device_vector<T> in(_in);

if (tid < in.size()) {
if (!projection(in.at(tid))) {
*out = false;
}
}
}
} // namespace kernels

/**
* @brief Sanity check that a predicate is true for all elements of a vector.
*
* @note This function runs in O(n) time.
*
* @tparam P The type of the predicate.
* @tparam T The type of the vector.
* @param predicate A projection object of type `P`.
* @param mr A memory resource used for allocating intermediate memory.
* @param copy A copy object.
* @param stream A wrapped CUDA stream.
* @param vector The vector which to check for contiguity.
* @return true If `predicate` is true for all elements of `vector`.
* @return false Otherwise.
*/
template <typename P, typename T>
requires std::predicate<P, T> bool true_for_all(
P&& predicate, vecmem::memory_resource& mr, vecmem::copy& copy,
stream& stream, vecmem::data::vector_view<T> vector) {
// This should never be a performance-critical step, so we can keep the
// block size fixed.
constexpr int block_size = 512;

cudaStream_t cuda_stream =
reinterpret_cast<cudaStream_t>(stream.cudaStream());

// Grab the number of elements in our vector.
uint32_t n = copy.get_size(vector);

// Allocate memory for outputs, then set them up.
vecmem::unique_alloc_ptr<bool> device_out =
vecmem::make_unique_alloc<bool>(mr);

bool initial_out = true;

TRACCC_CUDA_ERROR_CHECK(
cudaMemcpyAsync(device_out.get(), &initial_out, sizeof(bool),
cudaMemcpyHostToDevice, cuda_stream));

// Launch the main kernel.
kernels::true_for_all_kernel<P, T>
<<<(n + block_size - 1) / block_size, block_size, 0, cuda_stream>>>(
predicate, vector, device_out.get());

TRACCC_CUDA_ERROR_CHECK(cudaGetLastError());

// Copy the total number of squashed elements, e.g. the size of the
// resulting vector.
bool host_out;

TRACCC_CUDA_ERROR_CHECK(
cudaMemcpyAsync(&host_out, device_out.get(), sizeof(bool),
cudaMemcpyDeviceToHost, cuda_stream));

stream.synchronize();

return host_out;
}

template <typename P, typename T>
requires std::predicate<P, T> bool false_for_all(
P&& projection, vecmem::memory_resource& mr, vecmem::copy& copy,
stream& stream, vecmem::data::vector_view<T> vector) {
return true_for_all(
[projection] __device__<typename... Args>(Args && ... args) {
return !projection(std::forward<Args>(args)...);
},
mr, copy, stream, vector);
}

template <typename P, typename T>
requires std::predicate<P, T> bool true_for_any(
P&& projection, vecmem::memory_resource& mr, vecmem::copy& copy,
stream& stream, vecmem::data::vector_view<T> vector) {
return !false_for_all(std::forward<P>(projection), mr, copy, stream,
vector);
}

template <typename P, typename T>
requires std::predicate<P, T> bool false_for_any(
P&& projection, vecmem::memory_resource& mr, vecmem::copy& copy,
stream& stream, vecmem::data::vector_view<T> vector) {
return !true_for_all(std::forward<P>(projection), mr, copy, stream, vector);
}
} // namespace traccc::cuda
125 changes: 125 additions & 0 deletions device/sycl/src/sanity/predicate.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
/**
* traccc library, part of the ACTS project (R&D line)
*
* (c) 2024 CERN for the benefit of the ACTS project
*
* Mozilla Public License Version 2.0
*/

#pragma once

// Project include(s).
#include <traccc/sycl/utils/queue_wrapper.hpp>

#include "../utils/get_queue.hpp"

// VecMem include(s).
#include <vecmem/containers/data/vector_view.hpp>
#include <vecmem/containers/device_vector.hpp>
#include <vecmem/memory/memory_resource.hpp>
#include <vecmem/memory/unique_ptr.hpp>
#include <vecmem/utils/copy.hpp>

// CUDA include
#include <cuda_runtime.h>

// System include
#include <concepts>

namespace traccc::sycl {

/**
* @brief Sanity check that a predicate is true for all elements of a vector.
*
* @note This function runs in O(n) time.
*
* @tparam P The type of the predicate.
* @tparam T The type of the vector.
* @param predicate A projection object of type `P`.
* @param mr A memory resource used for allocating intermediate memory.
* @param copy A copy object.
* @param stream A wrapped CUDA stream.
* @param vector The vector which to check for contiguity.
* @return true If `predicate` is true for all elements of `vector`.
* @return false Otherwise.
*/
template <typename P, typename T>
requires std::predicate<P, T> bool true_for_all(
P&& predicate, vecmem::memory_resource& mr, vecmem::copy& copy,
queue_wrapper& queue_wrapper, vecmem::data::vector_view<T> vector) {
// This should never be a performance-critical step, so we can keep the
// block size fixed.
constexpr int block_size = 512;

cl::sycl::queue& queue = details::get_queue(queue_wrapper);

// Grab the number of elements in our vector.
uint32_t n = copy.get_size(vector);

// Allocate memory for outputs, then set them up.
vecmem::unique_alloc_ptr<bool> device_out =
vecmem::make_unique_alloc<bool>(mr);

bool initial_out = true;

cl::sycl::event kernel1_memcpy1 =
queue.memcpy(out.get(), &initial_out, sizeof(bool));

// Launch the main kernel.
cl::sycl::nd_range<1> kernel_range{
cl::sycl::range<1>(((n + block_size - 1) / block_size) * block_size),
cl::sycl::range<1>(block_size)};

cl::sycl::event kernel1 = queue.submit([&](cl::sycl::handler& h) {
h.depends_on(kernel1_memcpy1);
h.parallel_for<kernels::TrueForAllPredicate<P>>(
kernel_range, [=, out = out.get()](cl::sycl::nd_item<1> item) {
std::size_t tid = item.get_global_linear_id();

vecmem::device_vector<T> in(_in);

if (tid < in.size()) {
if (!projection(in.at(tid))) {
*out = false;
}
}
});
});

// Copy the total number of squashed elements, e.g. the size of the
// resulting vector.
bool host_out;

queue.memcpy(&host_out, out.get(), sizeof(bool), {kernel1})
.wait_and_throw();

return host_out;
}

template <typename P, typename T>
requires std::predicate<P, T> bool false_for_all(
P&& projection, vecmem::memory_resource& mr, vecmem::copy& copy,
queue_wrapper& queue_wrapper, vecmem::data::vector_view<T> vector) {
return true_for_all(
[projection]<typename... Args>(Args && ... args) {
return !projection(std::forward<Args>(args)...);
},
mr, copy, queue_wrapper, vector);
}

template <typename P, typename T>
requires std::predicate<P, T> bool true_for_any(
P&& projection, vecmem::memory_resource& mr, vecmem::copy& copy,
queue_wrapper& queue_wrapper, vecmem::data::vector_view<T> vector) {
return !false_for_all(std::forward<P>(projection), mr, copy, queue_wrapper,
vector);
}

template <typename P, typename T>
requires std::predicate<P, T> bool false_for_any(
P&& projection, vecmem::memory_resource& mr, vecmem::copy& copy,
queue_wrapper& queue_wrapper, vecmem::data::vector_view<T> vector) {
return !true_for_all(std::forward<P>(projection), mr, copy, queue_wrapper,
vector);
}
} // namespace traccc::sycl
1 change: 1 addition & 0 deletions tests/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ traccc_add_test(
test_unique_lock.cu
test_sanity_contiguous_on.cu
test_sanity_ordered_on.cu
test_sanity_predicate.cu
test_sort.cu

LINK_LIBRARIES
Expand Down
Loading

0 comments on commit 9ff9e07

Please sign in to comment.