diff --git a/src/PointNeighbors.jl b/src/PointNeighbors.jl index b7a8203..ec32a65 100644 --- a/src/PointNeighbors.jl +++ b/src/PointNeighbors.jl @@ -24,6 +24,7 @@ export TrivialNeighborhoodSearch, GridNeighborhoodSearch, PrecomputedNeighborhoo export DictionaryCellList, FullGridCellList export ParallelUpdate, SemiParallelUpdate, SerialUpdate export initialize!, update!, initialize_grid!, update_grid! +export PolyesterBackend, ThreadsDynamicBackend, ThreadsStaticBackend export PeriodicBox, copy_neighborhood_search end # module PointNeighbors diff --git a/src/neighborhood_search.jl b/src/neighborhood_search.jl index dacbc10..4f1ba6b 100644 --- a/src/neighborhood_search.jl +++ b/src/neighborhood_search.jl @@ -36,6 +36,16 @@ in this case to avoid unnecessary updates. The first flag in `points_moving` indicates if points in `x` are moving. The second flag indicates if points in `y` are moving. +!!! warning "Experimental Feature: Backend Specification" + The keyword argument `parallelization_backend` allows users to specify the + multithreading backend. This feature is currently considered experimental! + + Possible parallelization backends are: + - [`ThreadsDynamicBackend`](@ref) to use `Threads.@threads :dynamic` + - [`ThreadsStaticBackend`](@ref) to use `Threads.@threads :static` + - [`PolyesterBackend`](@ref) to use `Polyester.@batch` + - `KernelAbstractions.Backend` to launch a GPU kernel + See also [`initialize!`](@ref). """ @inline function update!(search::AbstractNeighborhoodSearch, x, y; @@ -130,7 +140,7 @@ Note that `system_coords` and `neighbor_coords` can be identical. See also [`initialize!`](@ref), [`update!`](@ref). """ function foreach_point_neighbor(f::T, system_coords, neighbor_coords, neighborhood_search; - parallel::Union{Bool, KernelAbstractions.Backend} = true, + parallel::Union{Bool, ParallelizationBackend} = true, points = axes(system_coords, 2)) where {T} # The type annotation above is to make Julia specialize on the type of the function. # Otherwise, unspecialized code will cause a lot of allocations @@ -141,8 +151,7 @@ function foreach_point_neighbor(f::T, system_coords, neighbor_coords, neighborho # threaded loop with `Polyester.@batch`, or, when `system_coords` is a GPU array, # launch the loop as a kernel on the GPU. parallel_ = Val(parallel) - elseif parallel isa KernelAbstractions.Backend - # WARNING! Undocumented, experimental feature: + elseif parallel isa ParallelizationBackend # When a `KernelAbstractions.Backend` is passed, launch the loop as a GPU kernel # on this backend. This is useful to test the GPU code on the CPU by passing # `parallel = KernelAbstractions.CPU()`, even though `system_coords isa Array`. @@ -165,7 +174,7 @@ end # When a `KernelAbstractions.Backend` is passed, launch a GPU kernel on this backend @inline function foreach_point_neighbor(f, system_coords, neighbor_coords, neighborhood_search, points, - backend::KernelAbstractions.Backend) + backend::ParallelizationBackend) @threaded backend for point in points foreach_neighbor(f, system_coords, neighbor_coords, neighborhood_search, point) end diff --git a/src/util.jl b/src/util.jl index d979ac2..c44e6ff 100644 --- a/src/util.jl +++ b/src/util.jl @@ -22,6 +22,35 @@ end return floor(Int, i) end +abstract type AbstractThreadingBackend end + +""" + PolyesterBackend() + +Pass as first argument to the [`@threaded`](@ref) macro to make the loop multithreaded +with `Polyester.@batch`. +""" +struct PolyesterBackend <: AbstractThreadingBackend end + +""" + ThreadsDynamicBackend() + +Pass as first argument to the [`@threaded`](@ref) macro to make the loop multithreaded +with `Threads.@threads :dynamic`. +""" +struct ThreadsDynamicBackend <: AbstractThreadingBackend end + +""" + ThreadsStaticBackend() + + +Pass as first argument to the [`@threaded`](@ref) macro to make the loop multithreaded +with `Threads.@threads :static`. +""" +struct ThreadsStaticBackend <: AbstractThreadingBackend end + +const ParallelizationBackend = Union{AbstractThreadingBackend, KernelAbstractions.Backend} + """ @threaded x for ... end @@ -30,15 +59,21 @@ Semantically the same as `Threads.@threads` when iterating over a `AbstractUnitR but without guarantee that the underlying implementation uses `Threads.@threads` or works for more general `for` loops. -The first argument must either be a `KernelAbstractions.Backend` or an array from which the -backend can be derived to determine if the loop must be run threaded on the CPU +The first argument must either be a parallelization backend (see below) or an array from +which the backend can be derived to determine if the loop must be run threaded on the CPU or launched as a kernel on the GPU. Passing `KernelAbstractions.CPU()` will run the GPU kernel on the CPU. +Possible parallelization backends are: +- [`PolyesterBackend`](@ref) to use `Polyester.@batch` +- [`ThreadsDynamicBackend`](@ref) to use `Threads.@threads :dynamic` +- [`ThreadsStaticBackend`](@ref) to use `Threads.@threads :static` +- `KernelAbstractions.Backend` to execute the loop as a GPU kernel + In particular, the underlying threading capabilities might be provided by other packages such as [Polyester.jl](https://github.com/JuliaSIMD/Polyester.jl). -!!! warn +!!! warning "Warning" This macro does not necessarily work for general `for` loops. For example, it does not necessarily support general iterables such as `eachline(filename)`. """ @@ -61,12 +96,27 @@ macro threaded(system, expr) end # Use `Polyester.@batch` for low-overhead threading +# This is currently the default when x::Array @inline function parallel_foreach(f, iterator, x) Polyester.@batch for i in iterator @inline f(i) end end +# Use `Threads.@threads :dynamic` +@inline function parallel_foreach(f, iterator, x::ThreadsDynamicBackend) + Threads.@threads :dynamic for i in iterator + @inline f(i) + end +end + +# Use `Threads.@threads :static` +@inline function parallel_foreach(f, iterator, x::ThreadsStaticBackend) + Threads.@threads :static for i in iterator + @inline f(i) + end +end + # On GPUs, execute `f` inside a GPU kernel with KernelAbstractions.jl @inline function parallel_foreach(f, iterator, x::Union{AbstractGPUArray, KernelAbstractions.Backend})