Add direct constructors for sparse arrays#684
Add direct constructors for sparse arrays#684nHackel wants to merge 12 commits intoJuliaGPU:masterfrom
Conversation
General GPUSparseMatrix defaults to COO, which defaults to sparse
|
Your PR requires formatting changes to meet the project's style guidelines. Click here to view the suggested changes.diff --git a/lib/JLArrays/src/JLArrays.jl b/lib/JLArrays/src/JLArrays.jl
index 3bd3571..b76a237 100644
--- a/lib/JLArrays/src/JLArrays.jl
+++ b/lib/JLArrays/src/JLArrays.jl
@@ -150,7 +150,7 @@ mutable struct JLSparseMatrixCSC{Tv, Ti} <: GPUArrays.AbstractGPUSparseMatrixCSC
new{Tv, Ti}(colPtr, rowVal, nzVal, dims, length(nzVal))
end
end
-function GPUSparseMatrixCSC(colPtr::JLArray{Ti, 1}, rowVal::JLArray{Ti, 1}, nzVal::JLArray{Tv, 1}, dims::NTuple{2,<:Integer}) where {Tv, Ti <: Integer}
+function GPUSparseMatrixCSC(colPtr::JLArray{Ti, 1}, rowVal::JLArray{Ti, 1}, nzVal::JLArray{Tv, 1}, dims::NTuple{2, <:Integer}) where {Tv, Ti <: Integer}
return JLSparseMatrixCSC(colPtr, rowVal, nzVal, dims)
end
function JLSparseMatrixCSC(colPtr::JLArray{Ti, 1}, rowVal::JLArray{Ti, 1}, nzVal::JLArray{Tv, 1}, dims::NTuple{2,<:Integer}) where {Tv, Ti <: Integer}
@@ -184,7 +184,7 @@ end
function JLSparseMatrixCSR(rowPtr::JLArray{Ti, 1}, colVal::JLArray{Ti, 1}, nzVal::JLArray{Tv, 1}, dims::NTuple{2,<:Integer}) where {Tv, Ti <: Integer}
return JLSparseMatrixCSR{Tv, Ti}(rowPtr, colVal, nzVal, dims)
end
-function GPUSparseMatrixCSR(rowPtr::JLArray{Ti, 1}, colVal::JLArray{Ti, 1}, nzVal::JLArray{Tv, 1}, dims::NTuple{2,<:Integer}) where {Tv, Ti <: Integer}
+function GPUSparseMatrixCSR(rowPtr::JLArray{Ti, 1}, colVal::JLArray{Ti, 1}, nzVal::JLArray{Tv, 1}, dims::NTuple{2, <:Integer}) where {Tv, Ti <: Integer}
return JLSparseMatrixCSR(rowPtr, colVal, nzVal, dims)
end
function SparseArrays.SparseMatrixCSC(x::JLSparseMatrixCSR)
diff --git a/src/host/sparse.jl b/src/host/sparse.jl
index f12cfed..7be3e11 100644
--- a/src/host/sparse.jl
+++ b/src/host/sparse.jl
@@ -14,9 +14,9 @@ abstract type AbstractGPUSparseMatrixBSR{Tv, Ti} <: AbstractGPUSparseArray{Tv, T
function GPUSparseMatrixCOO end
function GPUSparseMatrixCSC end
function GPUSparseMatrixCSR end
-function GPUSparseMatrixBSR end
+function GPUSparseMatrixBSR end
-function compute_sparse_pointers(indices::AbstractGPUVector{T}, n::Integer) where T
+function compute_sparse_pointers(indices::AbstractGPUVector{T}, n::Integer) where {T}
ptr = similar(indices, T, n + 1)
fill!(ptr, zero(T))
@@ -27,34 +27,34 @@ function compute_sparse_pointers(indices::AbstractGPUVector{T}, n::Integer) wher
end
Atomix.@atomic ptr[indices[idx] + 1] += one(T)
end
-
+
backend = get_backend(indices)
kernel! = count_indices(backend)
- kernel!(indices, ptr, ndrange=length(indices))
+ kernel!(indices, ptr, ndrange = length(indices))
return cumsum(ptr)
end
function GPUSparseMatrix(I::AbstractGPUVector, J::AbstractGPUVector, V::AbstractGPUVector, dims::NTuple{2, <:Integer}; format = default_sparse_format(I))
- m, n = dims
+ m, n = dims
if format == :coo
- GPUSparseMatrix(Val(:coo), I, J, V, dims)
+ GPUSparseMatrix(Val(:coo), I, J, V, dims)
elseif format == :csc
# Create composite key for sorting
key = J .* (m + 1) .+ I
perm = sortperm(key)
-
+
ptr = compute_sparse_pointers(J[perm], n)
return GPUSparseMatrix(Val(:csc), ptr, I[perm], V[perm], dims)
elseif format == :csr
# Create composite key for sorting
key = I .* (n + 1) .+ J
perm = sortperm(key)
-
+
ptr = compute_sparse_pointers(I[perm], m)
return GPUSparseMatrix(Val(:csr), ptr, J[perm], V[perm], dims)
else
throw(ArgumentError("Conversion to sparse format $format is not implemented"))
- end
+ end
end
diff --git a/test/testsuite/sparse.jl b/test/testsuite/sparse.jl
index 146801e..ecfee75 100644
--- a/test/testsuite/sparse.jl
+++ b/test/testsuite/sparse.jl
@@ -154,11 +154,11 @@ end
# Helper function to derive direct matrix formats:
# Create colptr, rowval, nzval for m x n matrix with 3 values per column
-function csc_vectors(m::Int, n::Int, ::Type{ET}; I::Type{<:Integer}=Int32) where {ET}
+function csc_vectors(m::Int, n::Int, ::Type{ET}; I::Type{<:Integer} = Int32) where {ET}
# Fixed, deterministic 3 nnz per column; random nz values
colptr = Vector{I}(undef, n + 1)
rowval = Vector{I}()
- nzval = Vector{ET}()
+ nzval = Vector{ET}()
colptr[1] = I(1)
nnz_acc = 0
@@ -172,29 +172,29 @@ function csc_vectors(m::Int, n::Int, ::Type{ET}; I::Type{<:Integer}=Int32) where
end
return colptr, rowval, nzval
end
-function csr_vectors(m::Int, n::Int, ::Type{ET}; I::Type{<:Integer}=Int32) where {ET}
+function csr_vectors(m::Int, n::Int, ::Type{ET}; I::Type{<:Integer} = Int32) where {ET}
# Build CSC for (n, m), then interpret as CSR for (m, n)
- colptr_nm, rowval_nm, nzval_nm = csc_vectors(n, m, ET; I=I)
+ colptr_nm, rowval_nm, nzval_nm = csc_vectors(n, m, ET; I = I)
rowptr = colptr_nm
colind = rowval_nm
- nzval = nzval_nm
+ nzval = nzval_nm
return rowptr, colind, nzval
end
# Construct appropriate sparse arrays
-function construct_sparse_matrix(AT::Type{<:GPUArrays.AbstractGPUSparseMatrixCSC}, ::Type{ET}, m::Int, n::Int; I::Type{<:Integer}=Int32) where {ET}
- colptr, rowval, nzval = csc_vectors(m, n, ET; I=I)
+function construct_sparse_matrix(AT::Type{<:GPUArrays.AbstractGPUSparseMatrixCSC}, ::Type{ET}, m::Int, n::Int; I::Type{<:Integer} = Int32) where {ET}
+ colptr, rowval, nzval = csc_vectors(m, n, ET; I = I)
dense_AT = GPUArrays.dense_array_type(AT)
d_colptr = dense_AT(colptr)
d_rowval = dense_AT(rowval)
- d_nzval = dense_AT(nzval)
+ d_nzval = dense_AT(nzval)
return GPUSparseMatrixCSC(d_colptr, d_rowval, d_nzval, (m, n))
end
-function construct_sparse_matrix(AT::Type{<:GPUArrays.AbstractGPUSparseMatrixCSR}, ::Type{ET}, m::Int, n::Int; I::Type{<:Integer}=Int32) where {ET}
- rowptr, colind, nzval = csr_vectors(m, n, ET; I=I)
+function construct_sparse_matrix(AT::Type{<:GPUArrays.AbstractGPUSparseMatrixCSR}, ::Type{ET}, m::Int, n::Int; I::Type{<:Integer} = Int32) where {ET}
+ rowptr, colind, nzval = csr_vectors(m, n, ET; I = I)
dense_AT = GPUArrays.dense_array_type(AT)
d_rowptr = dense_AT(rowptr)
d_colind = dense_AT(colind)
- d_nzval = dense_AT(nzval)
+ d_nzval = dense_AT(nzval)
return GPUSparseMatrixCSR(d_rowptr, d_colind, d_nzval, (m, n))
end
function direct_vector_construction(AT::Type{<:GPUArrays.AbstractGPUSparseMatrix}, eltypes)
@@ -205,6 +205,7 @@ function direct_vector_construction(AT::Type{<:GPUArrays.AbstractGPUSparseMatrix
@test x isa AT{ET}
@test size(x) == (m, n)
end
+ return
end
function direct_vector_construction(AT, eltypes)
# NOP |
General GPUSparseMatrix defaults to COO, which defaults to sparse
7f75c5e to
2301530
Compare
|
Sorry for neglecting this, I'll try to take a look today |
|
So one question I have is whether COO is really the best default format, or whether we should allow users/developers to set the format somehow with a |
|
@kshyatt that's a good question. From a user perspective, I think a changeable default matrix would make using the default constructor difficult unless the user can overwrite the default. Otherwise since the inputs are interpreted differently between the formats, a user would need to check the default format and construct the appropriate inputs. At which point they could also just call the appropriate "direct" constructor anyways. So maybe we have a default of: function GPUSparseMatrix(args...; kwargs..., format = :coo)
if format == :coo
GPUSparseMatrixCOO(args...; kwargs...)
elseif format == :csc
# ...
else
throw(ArgumentError("Unexpected format specifier $format"))
end
end
# Optionally in this setting CUDA could overwrite like this
GPUSparseMatrix(i::CuVector, args...; kwargs..., format = :csc) = ...or with a default option from a backend via a function: function GPUSparseMatrix(gpu::GPUArray, args...; kwargs..., format = default_sparse_format(typeof(GPU)))
if format == :coo
GPUSparseMatrixCOO(args...; kwargs...)
elseif format == :csc
# ...
else
throw(ArgumentError("Unexpected format specifier $format"))
end
endor we just drop the |
|
I think |
|
Should a default constructor always expect COO/normal function GPUSparseMatrix(I, J, V; format = default_sparse_format(I))
if format == :coo
GPUSparseMatrixCOO(I, J, V)
elseif format == :csc
# calculate colptr and use in function call
GPUSparseMatrixCSC(...)
elseif format == :csr
# calculate rowptr and use in function call
GPUSparseMatrixCSR(...)
# ...
end
endor should it just pass along arguments as in the version in my previous comment? I think CUDA has a similar interface for |
|
Well I guess we could also pass a |
|
I can do that, my main worry/question is what we should do with the inputs for the default setting. If we just pass along the arguments, a user can run into this issue: colptr = ...
rowval = ...
vals = ...
GPUSparseMatrix(colptr, rowval, vals; format = :csc) # result should be as expected by user
# Would most likely result in transpose of desired matrix
# Since CSR expects arguments (rowptr, colval, vals)
# In this case the user chose that, in the case of a backend specific default, the user doesn't necessarily
# know the default backend
GPUSparseMatrix(colptr, rowval, vals; format = :csr)If we always expect arguments to be in COO format, we could try to compute the approriate indices: GPUSparseMatrix(I, J, V; format = default_sparse_format(I)) = GPUSparseMatrix(format, I, J, V)
GPUSparseMatrix(format::Symbol, args...) = GPUSparseMatrix(Val(format), args...)
function GPUSparseMatrix(::Val{:csc}, I, J, V)
colptr = # based on I
return GPUSparseMatrixCSC(colptr, J, V)
end
function GPUSparseMatrix(::Val{:csr}, I, J, V)
rowptr = # based on J
return GPUSparseMatrixCSC(rowptr, I, V)
endor we drop the GPUSparseMatrix and just provde |
|
I agree users could be tricked here... but on the other hand at some point they have to use the constructor responsibly. It's also the case that they can inadvertently create a transposed matrix with |
|
Maybe the default could indeed be COO but then if the user passes a |
|
Then etc? |
|
That sounds like a good middle-ground between the options! I'll try that out as soon as I can |
|
@kshyatt do we want to support a "smart" default , i.e. function GPUSparseMatrix(I::AbstractGPUVector, J::AbstractGPUVector, V::AbstractGPUVector, args...; format = default_sparse_format(I))
if format == :coo
GPUSparseMatrix(Val(:format), I, J, V)
elseif format == :csc
# calculate colptr and use in function call
GPUSparseMatrix(Val(:format), ...)
elseif format == :csr
# calculate rowptr and use in function call
GPUSparseMatrix(Val(:format), ...)
# ...
end
endor a simple default: GPUSparseMatrix(I::AbstractGPUVector, J::AbstractGPUVector, V::AbstractGPUVector) = GPUSparseMatrix(Val(:coo), I, J, V)and in either case have: GPUSparseMatrix(::Val{:coo}, I, J, V) = GPUSparseMatrixCOO(I, J, V)
GPUSparseMatrix(::Val{:csc}, colPtr, rowVal, nzVal) = GPUSparseMatrixCSC(colPtr, rowVal, nzVal)
GPUSparseMatrix(::Val{:csr}, rowPtr, colVal, nzVal) = GPUSparseMatrixCSR(rowPtr, colVal, nzVal)
GPUSparseMatrix(::Val{:bsr}, ...) = GPUSparseMatrixBSR(...)I'm not sure if the work of implementing the smart default in a GPU compatible manner and the maintenance of it is proportional to the benefit/usage? |
|
Sorry, this fell off my radar again... I like the smart default idea! Would you be up to trying that? |
|
@kshyatt I've implemented an inital version of the "smart" default for conversion from :coo -> :csc and :csr. To get this to work I've had to use Atomix as a dependency (or use KernelAbstractions.Atomix). I didn't figure out a nice way to write tests for this though, since JLArrays doesn't implement any sorting or accumulation at the moment. I've tried extending JLArrays with AcceleratedKernels like it was done in AMDGPU.jl: # in JLArrays.jl
import AcceleratedKernels as AK
# ...
## AcceleratedKernels, sorting and accumulation
Base.sort!(x::AnyJLArray; kwargs...) = (AK.sort!(x; kwargs...); return x)
Base.sortperm!(ix::AnyJLArray, x::AnyJLArray; kwargs...) = (AK.sortperm!(ix, x; kwargs...); return ix)
Base.sortperm(x::AnyJLArray; kwargs...) = sortperm!(JLArray(1:length(x)), x; kwargs...)
Base.accumulate!(op, B::AnyJLArray, A::AnyJLArray; init=zero(eltype(A)), kwargs...) =
AK.accumulate!(op, B, A, JLBackend(); init, kwargs...)
Base.accumulate(op, A::AnyJLArray; init=zero(eltype(A)), kwargs...) =
AK.accumulate(op, A, JLBackend(); init, kwargs...)
Base.cumsum(src::AnyJLArray; kwargs...) = AK.cumsum(src, JLBackend(); kwargs...)
Base.cumprod(src::AnyJLArray; kwargs...) = AK.cumprod(src, JLBackend(); kwargs...)but this results in the AK kernels throwing an error stating that With CUDA we now would have this behaviour: julia> using CUDA, GPUArrays
julia> I = [1, 1, 2, 2, 3, 3, 3, 4]
julia> J = [1, 2, 2, 4, 3, 4, 5, 6]
julia> V = [10, 20, 30, 40, 50, 60, 70, 80]
julia> shape = (4, 6)
julia> GPUSparseMatrix(cu(I), cu(J), cu(V), shape; format = :csr)
4×6 CUDA.CUSPARSE.CuSparseMatrixCSR{Int64, Int64} with 8 stored entries:
10 20 ⋅ ⋅ ⋅ ⋅
⋅ 30 ⋅ 40 ⋅ ⋅
⋅ ⋅ 50 60 70 ⋅
⋅ ⋅ ⋅ ⋅ ⋅ 80
julia> GPUSparseMatrix(cu(I), cu(J), cu(V), shape; format = :csc)
4×6 CUDA.CUSPARSE.CuSparseMatrixCSC{Int64, Int64} with 8 stored entries:
10 20 ⋅ ⋅ ⋅ ⋅
⋅ 30 ⋅ 40 ⋅ ⋅
⋅ ⋅ 50 60 70 ⋅
⋅ ⋅ ⋅ ⋅ ⋅ 80At the moment the "smart" function is also missing some checks:
Once we have an idea on how to test this with JLArrays, I could add those checks and their respective tests. Then the kernel itself could be marked with |
|
I've also dropped the |
|
Hi, just dropping by to say a few things:
using GPUArrays
using LinearAlgebra
using SparseArrays
check_common_backend(args::Vararg{Any,N}) where {N} = only(unique(map(get_backend, args)))
mutable struct GenericGPUSparseMatrixCOO{
T<:Number,Ti<:Integer,V<:DenseVector{T},Vi<:DenseVector{Ti}
} <: GPUArrays.AbstractGPUSparseMatrix{T,Ti}
rowVal::Vi
colVal::Vi
nzVal::V
dims::NTuple{2,Int}
nnz::Ti
function GenericGPUSparseMatrixCOO(rowVal, colVal, nzVal, dims, nnz)
check_common_backend(rowVal, colVal, nzVal)
return new{
eltype(nzVal),
promote_type(eltype(rowVal), eltype(colVal)),
typeof(nzVal),
promote_type(typeof(rowVal), typeof(colVal)),
}(
rowVal, colVal, nzVal, dims, nnz
)
end
end
## AbstractArray interface
Base.size(A::GenericGPUSparseMatrixCOO) = (A.m, A.n)
function Base.getindex(A::GPUSparseMatrixCOO{T}, i::Integer, j::Integer) where {T}
(; rowval, colval, nzval) = A
for k in eachindex(rowval, colval, nzval)
if rowval[k] == i && colval[k] == j
return nzval[k]
end
end
return zero(T)
end
## AbstractSparseArray interface
SparseArrays.nnz(A::GenericGPUSparseMatrixCOO) = A.nnz
SparseArrays.nonzeros(A::GenericGPUSparseMatrixCOO) = A.nzVal
## KernelAbstractions interface
function KernelAbstractions.get_backend(A::GenericGPUSparseMatrixCOO)
return common_backend(A.rowVal, A.colVal, A.nzVal)
end
## GPUArrays interface
# from the new sparse stuff
GPUArrays.dense_array_type(::GenericGPUSparseMatrixCOO{T,Ti,V,Vi}) where {T,Ti,V,Vi} = V
GPUArrays.sparse_array_type(::GenericGPUSparseMatrixCOO) = GenericGPUSparseMatrixCOO
GPUArrays.csc_type(::GenericGPUSparseMatrixCOO) = GenericGPUSparseMatrixCSC
GPUArrays.csr_type(::GenericGPUSparseMatrixCOO) = GenericGPUSparseMatrixCSR
GPUArrays.coo_type(::GenericGPUSparseMatrixCOO) = GenericGPUSparseMatrixCOO
# translation to device types? not sure it is needed?
# from this PR
function GPUArrays.GPUSparseMatrixCOO(rowVal, colVal, nzVal, dims, nnz)
return GenericGPUSparseMatrixCOO(rowVal, colVal, nzVal, dims, nnz)
endRelated: |
|
I'm happy with any solution that ends up with me having a direct/format specific constructor that is either supported directly in the backends or even just has a generic fallback 😅 We could reduce this PR to its original setup which just adds the generic functions, maybe keep a PR for the default constructor and then see how we can get a generic implementation going. At a quick glance, it seems like GenericSparseArrays.jl has a lot/most tooling for this already. Or we add GenericSparseArrays as a dependency to GPUArrays or we roll a new implementation |
|
My take is this PR has been chilling for long enough, so let's try to shape up what @nHackel already has and we can address further suggestions/a possible package extension in a subsequent PR. For JLArrays sorting/accumulation, you could do something ugly/hacky for now and just pipe the underlying (CPU) array to the existing Base method? |
This PR adds direct constructors for the sparse matrix types defined in GPUArrays.jl or rather adds a function for backends to generically define methods for, see also #677.
Changes:
There currently is no COO and BSR implementation for JLArrays. I could add one as well, at least for the BSR I'd need to take a look at the format first or adapt from maybe CUDA.jl