|  | 
| 1 | 1 | # reversing | 
| 2 | 2 | 
 | 
| 3 |  | -# the kernel works by treating the array as 1d. after reversing by dimension x an element at | 
| 4 |  | -# pos [i1, i2, i3, ... , i{x},            ..., i{n}] will be at | 
| 5 |  | -# pos [i1, i2, i3, ... , d{x} - i{x} + 1, ..., i{n}] where d{x} is the size of dimension x | 
| 6 |  | - | 
| 7 |  | -# out-of-place version, copying a single value per thread from input to output | 
| 8 |  | -function _reverse(input::AnyCuArray{T, N}, output::AnyCuArray{T, N}; | 
| 9 |  | -                  dims=1:ndims(input)) where {T, N} | 
| 10 |  | -    @assert size(input) == size(output) | 
| 11 |  | -    rev_dims = ntuple((d)-> d in dims && size(input, d) > 1, N) | 
| 12 |  | -    ref = size(input) .+ 1 | 
| 13 |  | -    # converts an ND-index in the data array to the linear index | 
| 14 |  | -    lin_idx = LinearIndices(input) | 
| 15 |  | -    # converts a linear index in a reduced array to an ND-index, but using the reduced size | 
| 16 |  | -    nd_idx = CartesianIndices(input) | 
| 17 |  | - | 
| 18 |  | -    ## COV_EXCL_START | 
| 19 |  | -    function kernel(input::AbstractArray{T, N}, output::AbstractArray{T, N}) where {T, N} | 
| 20 |  | -        offset_in = blockDim().x * (blockIdx().x - 1i32) | 
| 21 |  | -        index_in = offset_in + threadIdx().x | 
| 22 |  | - | 
| 23 |  | -        @inbounds if index_in <= length(input) | 
| 24 |  | -            idx = Tuple(nd_idx[index_in]) | 
| 25 |  | -            idx = ifelse.(rev_dims, ref .- idx, idx) | 
| 26 |  | -            index_out =  lin_idx[idx...] | 
| 27 |  | -            output[index_out] = input[index_in] | 
| 28 |  | -        end | 
| 29 |  | - | 
| 30 |  | -        return | 
| 31 |  | -    end | 
| 32 |  | -    ## COV_EXCL_STOP | 
| 33 |  | - | 
| 34 |  | -    nthreads = 256 | 
| 35 |  | -    nblocks = cld(length(input), nthreads) | 
| 36 |  | - | 
| 37 |  | -    @cuda threads=nthreads blocks=nblocks kernel(input, output) | 
| 38 |  | -end | 
| 39 |  | - | 
| 40 |  | -# in-place version, swapping elements on half the number of threads | 
| 41 |  | -function _reverse!(data::AnyCuArray{T, N}; dims=1:ndims(data)) where {T, N} | 
| 42 |  | -    rev_dims = ntuple((d)-> d in dims && size(data, d) > 1, N) | 
| 43 |  | -    half_dim = findlast(rev_dims) | 
| 44 |  | -    if isnothing(half_dim) | 
| 45 |  | -        # no reverse operation needed at all in this case. | 
| 46 |  | -        return | 
| 47 |  | -    end | 
| 48 |  | -    ref = size(data) .+ 1 | 
| 49 |  | -    # converts an ND-index in the data array to the linear index | 
| 50 |  | -    lin_idx = LinearIndices(data) | 
| 51 |  | -    reduced_size = ntuple((d)->ifelse(d==half_dim, cld(size(data,d),2), size(data,d)), N) | 
| 52 |  | -    reduced_length = prod(reduced_size) | 
| 53 |  | -    # converts a linear index in a reduced array to an ND-index, but using the reduced size | 
| 54 |  | -    nd_idx = CartesianIndices(reduced_size) | 
| 55 |  | - | 
| 56 |  | -    ## COV_EXCL_START | 
| 57 |  | -    function kernel(data::AbstractArray{T, N}) where {T, N} | 
| 58 |  | -        offset_in = blockDim().x * (blockIdx().x - 1i32) | 
| 59 |  | - | 
| 60 |  | -        index_in = offset_in + threadIdx().x | 
| 61 |  | - | 
| 62 |  | -        @inbounds if index_in <= reduced_length | 
| 63 |  | -            idx = Tuple(nd_idx[index_in]) | 
| 64 |  | -            index_in = lin_idx[idx...] | 
| 65 |  | -            idx = ifelse.(rev_dims, ref .- idx, idx) | 
| 66 |  | -            index_out =  lin_idx[idx...] | 
| 67 |  | - | 
| 68 |  | -            if index_in < index_out | 
| 69 |  | -                temp = data[index_out] | 
| 70 |  | -                data[index_out] = data[index_in] | 
| 71 |  | -                data[index_in] = temp | 
| 72 |  | -            end | 
| 73 |  | -        end | 
| 74 |  | - | 
| 75 |  | -        return | 
| 76 |  | -    end | 
| 77 |  | -    ## COV_EXCL_STOP | 
| 78 |  | - | 
| 79 |  | -    # NOTE: we launch slightly more than half the number of elements in the array as threads. | 
| 80 |  | -    # The last non-singleton dimension along which to reverse is used to define how the array is split. | 
| 81 |  | -    # Only the middle row in case of an odd array dimension could cause trouble, but this is prevented by | 
| 82 |  | -    # ignoring the threads that cross the mid-point | 
| 83 |  | - | 
| 84 |  | -    nthreads = 256 | 
| 85 |  | -    nblocks = cld(prod(reduced_size), nthreads) | 
| 86 |  | - | 
| 87 |  | -    @cuda threads=nthreads blocks=nblocks kernel(data) | 
| 88 |  | -end | 
|  | 3 | +# # the kernel works by treating the array as 1d. after reversing by dimension x an element at | 
|  | 4 | +# # pos [i1, i2, i3, ... , i{x},            ..., i{n}] will be at | 
|  | 5 | +# # pos [i1, i2, i3, ... , d{x} - i{x} + 1, ..., i{n}] where d{x} is the size of dimension x | 
|  | 6 | + | 
|  | 7 | +# # out-of-place version, copying a single value per thread from input to output | 
|  | 8 | +# function _reverse(input::AnyCuArray{T, N}, output::AnyCuArray{T, N}; | 
|  | 9 | +#                   dims=1:ndims(input)) where {T, N} | 
|  | 10 | +#     @assert size(input) == size(output) | 
|  | 11 | +#     rev_dims = ntuple((d)-> d in dims && size(input, d) > 1, N) | 
|  | 12 | +#     ref = size(input) .+ 1 | 
|  | 13 | +#     # converts an ND-index in the data array to the linear index | 
|  | 14 | +#     lin_idx = LinearIndices(input) | 
|  | 15 | +#     # converts a linear index in a reduced array to an ND-index, but using the reduced size | 
|  | 16 | +#     nd_idx = CartesianIndices(input) | 
|  | 17 | + | 
|  | 18 | +#     ## COV_EXCL_START | 
|  | 19 | +#     function kernel(input::AbstractArray{T, N}, output::AbstractArray{T, N}) where {T, N} | 
|  | 20 | +#         offset_in = blockDim().x * (blockIdx().x - 1i32) | 
|  | 21 | +#         index_in = offset_in + threadIdx().x | 
|  | 22 | + | 
|  | 23 | +#         @inbounds if index_in <= length(input) | 
|  | 24 | +#             idx = Tuple(nd_idx[index_in]) | 
|  | 25 | +#             idx = ifelse.(rev_dims, ref .- idx, idx) | 
|  | 26 | +#             index_out =  lin_idx[idx...] | 
|  | 27 | +#             output[index_out] = input[index_in] | 
|  | 28 | +#         end | 
|  | 29 | + | 
|  | 30 | +#         return | 
|  | 31 | +#     end | 
|  | 32 | +#     ## COV_EXCL_STOP | 
|  | 33 | + | 
|  | 34 | +#     nthreads = 256 | 
|  | 35 | +#     nblocks = cld(length(input), nthreads) | 
|  | 36 | + | 
|  | 37 | +#     @cuda threads=nthreads blocks=nblocks kernel(input, output) | 
|  | 38 | +# end | 
|  | 39 | + | 
|  | 40 | +# # in-place version, swapping elements on half the number of threads | 
|  | 41 | +# function _reverse!(data::AnyCuArray{T, N}; dims=1:ndims(data)) where {T, N} | 
|  | 42 | +#     rev_dims = ntuple((d)-> d in dims && size(data, d) > 1, N) | 
|  | 43 | +#     half_dim = findlast(rev_dims) | 
|  | 44 | +#     if isnothing(half_dim) | 
|  | 45 | +#         # no reverse operation needed at all in this case. | 
|  | 46 | +#         return | 
|  | 47 | +#     end | 
|  | 48 | +#     ref = size(data) .+ 1 | 
|  | 49 | +#     # converts an ND-index in the data array to the linear index | 
|  | 50 | +#     lin_idx = LinearIndices(data) | 
|  | 51 | +#     reduced_size = ntuple((d)->ifelse(d==half_dim, cld(size(data,d),2), size(data,d)), N) | 
|  | 52 | +#     reduced_length = prod(reduced_size) | 
|  | 53 | +#     # converts a linear index in a reduced array to an ND-index, but using the reduced size | 
|  | 54 | +#     nd_idx = CartesianIndices(reduced_size) | 
|  | 55 | + | 
|  | 56 | +#     ## COV_EXCL_START | 
|  | 57 | +#     function kernel(data::AbstractArray{T, N}) where {T, N} | 
|  | 58 | +#         offset_in = blockDim().x * (blockIdx().x - 1i32) | 
|  | 59 | + | 
|  | 60 | +#         index_in = offset_in + threadIdx().x | 
|  | 61 | + | 
|  | 62 | +#         @inbounds if index_in <= reduced_length | 
|  | 63 | +#             idx = Tuple(nd_idx[index_in]) | 
|  | 64 | +#             index_in = lin_idx[idx...] | 
|  | 65 | +#             idx = ifelse.(rev_dims, ref .- idx, idx) | 
|  | 66 | +#             index_out =  lin_idx[idx...] | 
|  | 67 | + | 
|  | 68 | +#             if index_in < index_out | 
|  | 69 | +#                 temp = data[index_out] | 
|  | 70 | +#                 data[index_out] = data[index_in] | 
|  | 71 | +#                 data[index_in] = temp | 
|  | 72 | +#             end | 
|  | 73 | +#         end | 
|  | 74 | + | 
|  | 75 | +#         return | 
|  | 76 | +#     end | 
|  | 77 | +#     ## COV_EXCL_STOP | 
|  | 78 | + | 
|  | 79 | +#     # NOTE: we launch slightly more than half the number of elements in the array as threads. | 
|  | 80 | +#     # The last non-singleton dimension along which to reverse is used to define how the array is split. | 
|  | 81 | +#     # Only the middle row in case of an odd array dimension could cause trouble, but this is prevented by | 
|  | 82 | +#     # ignoring the threads that cross the mid-point | 
|  | 83 | + | 
|  | 84 | +#     nthreads = 256 | 
|  | 85 | +#     nblocks = cld(prod(reduced_size), nthreads) | 
|  | 86 | + | 
|  | 87 | +#     @cuda threads=nthreads blocks=nblocks kernel(data) | 
|  | 88 | +# end | 
| 89 | 89 | 
 | 
| 90 | 90 | 
 | 
| 91 | 91 | # n-dimensional API | 
| 92 | 92 | 
 | 
| 93 |  | -function Base.reverse!(data::AnyCuArray{T, N}; dims=:) where {T, N} | 
| 94 |  | -    if isa(dims, Colon) | 
| 95 |  | -        dims = 1:ndims(data) | 
| 96 |  | -    end | 
| 97 |  | -    if !applicable(iterate, dims) | 
| 98 |  | -        throw(ArgumentError("dimension $dims is not an iterable")) | 
| 99 |  | -    end | 
| 100 |  | -    if !all(1 .≤ dims .≤ ndims(data)) | 
| 101 |  | -        throw(ArgumentError("dimension $dims is not 1 ≤ $dims ≤ $(ndims(data))")) | 
| 102 |  | -    end | 
| 103 |  | - | 
| 104 |  | -    _reverse!(data; dims=dims) | 
| 105 |  | - | 
| 106 |  | -    return data | 
| 107 |  | -end | 
| 108 |  | - | 
| 109 |  | -# out-of-place | 
| 110 |  | -function Base.reverse(input::AnyCuArray{T, N}; dims=:) where {T, N} | 
| 111 |  | -    if isa(dims, Colon) | 
| 112 |  | -        dims = 1:ndims(input) | 
| 113 |  | -    end | 
| 114 |  | -    if !applicable(iterate, dims) | 
| 115 |  | -        throw(ArgumentError("dimension $dims is not an iterable")) | 
| 116 |  | -    end | 
| 117 |  | -    if !all(1 .≤ dims .≤ ndims(input)) | 
| 118 |  | -        throw(ArgumentError("dimension $dims is not 1 ≤ $dims ≤ $(ndims(input))")) | 
| 119 |  | -    end | 
| 120 |  | - | 
| 121 |  | -    if all(size(input)[[dims...]].==1) | 
| 122 |  | -        # no reverse operation needed at all in this case. | 
| 123 |  | -        return copy(input) | 
| 124 |  | -    else | 
| 125 |  | -        output = similar(input) | 
| 126 |  | -        _reverse(input, output; dims=dims) | 
| 127 |  | -        return output | 
| 128 |  | -    end | 
| 129 |  | -end | 
|  | 93 | +# function Base.reverse!(data::AnyCuArray{T, N}; dims=:) where {T, N} | 
|  | 94 | +#     if isa(dims, Colon) | 
|  | 95 | +#         dims = 1:ndims(data) | 
|  | 96 | +#     end | 
|  | 97 | +#     if !applicable(iterate, dims) | 
|  | 98 | +#         throw(ArgumentError("dimension $dims is not an iterable")) | 
|  | 99 | +#     end | 
|  | 100 | +#     if !all(1 .≤ dims .≤ ndims(data)) | 
|  | 101 | +#         throw(ArgumentError("dimension $dims is not 1 ≤ $dims ≤ $(ndims(data))")) | 
|  | 102 | +#     end | 
|  | 103 | + | 
|  | 104 | +#     _reverse!(data; dims=dims) | 
|  | 105 | + | 
|  | 106 | +#     return data | 
|  | 107 | +# end | 
|  | 108 | + | 
|  | 109 | +# # out-of-place | 
|  | 110 | +# function Base.reverse(input::AnyCuArray{T, N}; dims=:) where {T, N} | 
|  | 111 | +#     if isa(dims, Colon) | 
|  | 112 | +#         dims = 1:ndims(input) | 
|  | 113 | +#     end | 
|  | 114 | +#     if !applicable(iterate, dims) | 
|  | 115 | +#         throw(ArgumentError("dimension $dims is not an iterable")) | 
|  | 116 | +#     end | 
|  | 117 | +#     if !all(1 .≤ dims .≤ ndims(input)) | 
|  | 118 | +#         throw(ArgumentError("dimension $dims is not 1 ≤ $dims ≤ $(ndims(input))")) | 
|  | 119 | +#     end | 
|  | 120 | + | 
|  | 121 | +#     if all(size(input)[[dims...]].==1) | 
|  | 122 | +#         # no reverse operation needed at all in this case. | 
|  | 123 | +#         return copy(input) | 
|  | 124 | +#     else | 
|  | 125 | +#         output = similar(input) | 
|  | 126 | +#         _reverse(input, output; dims=dims) | 
|  | 127 | +#         return output | 
|  | 128 | +#     end | 
|  | 129 | +# end | 
| 130 | 130 | 
 | 
| 131 | 131 | 
 | 
| 132 | 132 | # 1-dimensional API | 
| 133 | 133 | 
 | 
| 134 |  | -# in-place | 
| 135 |  | -Base.@propagate_inbounds function Base.reverse!(data::AnyCuVector{T}, start::Integer, | 
| 136 |  | -                                                stop::Integer=length(data)) where {T} | 
| 137 |  | -    _reverse!(view(data, start:stop)) | 
| 138 |  | -    return data | 
| 139 |  | -end | 
|  | 134 | +# # in-place | 
|  | 135 | +# Base.@propagate_inbounds function Base.reverse!(data::AnyCuVector{T}, start::Integer, | 
|  | 136 | +#                                                 stop::Integer=length(data)) where {T} | 
|  | 137 | +#     _reverse!(view(data, start:stop)) | 
|  | 138 | +#     return data | 
|  | 139 | +# end | 
| 140 | 140 | 
 | 
| 141 |  | -Base.reverse!(data::AnyCuVector{T}) where {T} = @inbounds reverse!(data, 1, length(data)) | 
|  | 141 | +# Base.reverse!(data::AnyCuVector{T}) where {T} = @inbounds reverse!(data, 1, length(data)) | 
| 142 | 142 | 
 | 
| 143 |  | -# out-of-place | 
| 144 |  | -Base.@propagate_inbounds function Base.reverse(input::AnyCuVector{T}, start::Integer, | 
| 145 |  | -                                               stop::Integer=length(input)) where {T} | 
| 146 |  | -    output = similar(input) | 
|  | 143 | +# # out-of-place | 
|  | 144 | +# Base.@propagate_inbounds function Base.reverse(input::AnyCuVector{T}, start::Integer, | 
|  | 145 | +#                                                stop::Integer=length(input)) where {T} | 
|  | 146 | +#     output = similar(input) | 
| 147 | 147 | 
 | 
| 148 |  | -    start > 1 && copyto!(output, 1, input, 1, start-1) | 
| 149 |  | -    _reverse(view(input, start:stop), view(output, start:stop)) | 
| 150 |  | -    stop < length(input) && copyto!(output, stop+1, input, stop+1) | 
|  | 148 | +#     start > 1 && copyto!(output, 1, input, 1, start-1) | 
|  | 149 | +#     _reverse(view(input, start:stop), view(output, start:stop)) | 
|  | 150 | +#     stop < length(input) && copyto!(output, stop+1, input, stop+1) | 
| 151 | 151 | 
 | 
| 152 |  | -    return output | 
| 153 |  | -end | 
|  | 152 | +#     return output | 
|  | 153 | +# end | 
| 154 | 154 | 
 | 
| 155 |  | -Base.reverse(data::AnyCuVector{T}) where {T} = @inbounds reverse(data, 1, length(data)) | 
|  | 155 | +# Base.reverse(data::AnyCuVector{T}) where {T} = @inbounds reverse(data, 1, length(data)) | 
0 commit comments