diff --git a/Project.toml b/Project.toml index 9c96fe55..265f39bb 100644 --- a/Project.toml +++ b/Project.toml @@ -4,6 +4,7 @@ version = "11.4.0" [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" +Atomix = "a9b6321e-bd34-4604-b9c9-b65b8de01458" GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527" KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" LLVM = "929cbde3-209d-540e-8aea-75f648917ca0" @@ -24,6 +25,7 @@ JLD2Ext = "JLD2" [compat] Adapt = "4.0" +Atomix = "1" GPUArraysCore = "= 0.2.0" JLD2 = "0.4, 0.5, 0.6" KernelAbstractions = "0.9.28, 0.10" diff --git a/lib/JLArrays/src/JLArrays.jl b/lib/JLArrays/src/JLArrays.jl index 14a4d296..3bd35712 100644 --- a/lib/JLArrays/src/JLArrays.jl +++ b/lib/JLArrays/src/JLArrays.jl @@ -13,7 +13,7 @@ using GPUArrays using Adapt using SparseArrays, LinearAlgebra -import GPUArrays: dense_array_type +import GPUArrays: dense_array_type, GPUSparseMatrixCSC, GPUSparseMatrixCSR import KernelAbstractions import KernelAbstractions: Adapt, StaticArrays, Backend, Kernel, StaticSize, DynamicSize, partition, blocks, workitems, launch_config @@ -150,6 +150,9 @@ mutable struct JLSparseMatrixCSC{Tv, Ti} <: GPUArrays.AbstractGPUSparseMatrixCSC new{Tv, Ti}(colPtr, rowVal, nzVal, dims, length(nzVal)) end end +function GPUSparseMatrixCSC(colPtr::JLArray{Ti, 1}, rowVal::JLArray{Ti, 1}, nzVal::JLArray{Tv, 1}, dims::NTuple{2,<:Integer}) where {Tv, Ti <: Integer} + return JLSparseMatrixCSC(colPtr, rowVal, nzVal, dims) +end function JLSparseMatrixCSC(colPtr::JLArray{Ti, 1}, rowVal::JLArray{Ti, 1}, nzVal::JLArray{Tv, 1}, dims::NTuple{2,<:Integer}) where {Tv, Ti <: Integer} return JLSparseMatrixCSC{Tv, Ti}(colPtr, rowVal, nzVal, dims) end @@ -181,6 +184,9 @@ end function JLSparseMatrixCSR(rowPtr::JLArray{Ti, 1}, colVal::JLArray{Ti, 1}, nzVal::JLArray{Tv, 1}, dims::NTuple{2,<:Integer}) where {Tv, Ti <: Integer} return JLSparseMatrixCSR{Tv, Ti}(rowPtr, colVal, nzVal, dims) end +function GPUSparseMatrixCSR(rowPtr::JLArray{Ti, 1}, colVal::JLArray{Ti, 1}, nzVal::JLArray{Tv, 1}, dims::NTuple{2,<:Integer}) where {Tv, Ti <: Integer} + return JLSparseMatrixCSR(rowPtr, colVal, nzVal, dims) +end function SparseArrays.SparseMatrixCSC(x::JLSparseMatrixCSR) x_transpose = SparseMatrixCSC(size(x, 2), size(x, 1), Array(x.rowPtr), Array(x.colVal), Array(x.nzVal)) return SparseMatrixCSC(transpose(x_transpose)) diff --git a/src/GPUArrays.jl b/src/GPUArrays.jl index a35c1ff0..8afa88e8 100644 --- a/src/GPUArrays.jl +++ b/src/GPUArrays.jl @@ -16,6 +16,7 @@ using Reexport @reexport using GPUArraysCore using KernelAbstractions +using Atomix # device functionality include("device/abstractarray.jl") diff --git a/src/host/sparse.jl b/src/host/sparse.jl index c5608b9d..f12cfed1 100644 --- a/src/host/sparse.jl +++ b/src/host/sparse.jl @@ -1,5 +1,6 @@ using LinearAlgebra using LinearAlgebra: BlasFloat +export GPUSparseMatrix, GPUSparseMatrixCSC, GPUSparseMatrixCSR, GPUSparseMatrixCOO, GPUSparseMatrixBSR abstract type AbstractGPUSparseArray{Tv, Ti, N} <: AbstractSparseArray{Tv, Ti, N} end const AbstractGPUSparseVector{Tv, Ti} = AbstractGPUSparseArray{Tv, Ti, 1} @@ -10,6 +11,59 @@ abstract type AbstractGPUSparseMatrixCSR{Tv, Ti} <: AbstractGPUSparseArray{Tv, T abstract type AbstractGPUSparseMatrixCOO{Tv, Ti} <: AbstractGPUSparseArray{Tv, Ti, 2} end abstract type AbstractGPUSparseMatrixBSR{Tv, Ti} <: AbstractGPUSparseArray{Tv, Ti, 2} end +function GPUSparseMatrixCOO end +function GPUSparseMatrixCSC end +function GPUSparseMatrixCSR end +function GPUSparseMatrixBSR end + +function compute_sparse_pointers(indices::AbstractGPUVector{T}, n::Integer) where T + ptr = similar(indices, T, n + 1) + fill!(ptr, zero(T)) + + @kernel function count_indices(@Const(indices), ptr) + idx = @index(Global, Linear) + if idx == 1 + ptr[idx] = one(T) + end + Atomix.@atomic ptr[indices[idx] + 1] += one(T) + end + + backend = get_backend(indices) + kernel! = count_indices(backend) + kernel!(indices, ptr, ndrange=length(indices)) + return cumsum(ptr) +end + +function GPUSparseMatrix(I::AbstractGPUVector, J::AbstractGPUVector, V::AbstractGPUVector, dims::NTuple{2, <:Integer}; format = default_sparse_format(I)) + m, n = dims + if format == :coo + GPUSparseMatrix(Val(:coo), I, J, V, dims) + elseif format == :csc + # Create composite key for sorting + key = J .* (m + 1) .+ I + perm = sortperm(key) + + ptr = compute_sparse_pointers(J[perm], n) + return GPUSparseMatrix(Val(:csc), ptr, I[perm], V[perm], dims) + elseif format == :csr + # Create composite key for sorting + key = I .* (n + 1) .+ J + perm = sortperm(key) + + ptr = compute_sparse_pointers(I[perm], m) + return GPUSparseMatrix(Val(:csr), ptr, J[perm], V[perm], dims) + else + throw(ArgumentError("Conversion to sparse format $format is not implemented")) + end +end + + +GPUSparseMatrix(::Val{:coo}, I, J, V, dims) = GPUSparseMatrixCOO(I, J, V, dims) +GPUSparseMatrix(::Val{:csc}, colPtr, rowVal, nzVal, dims) = GPUSparseMatrixCSC(colPtr, rowVal, nzVal, dims) +GPUSparseMatrix(::Val{:csr}, rowPtr, colVal, nzVal, dims) = GPUSparseMatrixCSR(rowPtr, colVal, nzVal, dims) +#GPUSparseMatrix(::Val{:bsr}, ...) = GPUSparseMatrixBSR(...) + + const AbstractGPUSparseVecOrMat = Union{AbstractGPUSparseVector,AbstractGPUSparseMatrix} SparseArrays.nnz(g::T) where {T<:AbstractGPUSparseArray} = g.nnz diff --git a/test/testsuite/sparse.jl b/test/testsuite/sparse.jl index a31abe6d..146801e6 100644 --- a/test/testsuite/sparse.jl +++ b/test/testsuite/sparse.jl @@ -9,6 +9,7 @@ elseif sparse_AT <: AbstractSparseMatrix matrix(sparse_AT, eltypes) matrix_construction(sparse_AT, eltypes) + direct_vector_construction(sparse_AT, eltypes) broadcasting_matrix(sparse_AT, eltypes) mapreduce_matrix(sparse_AT, eltypes) linalg(sparse_AT, eltypes) @@ -151,6 +152,64 @@ function matrix_construction(AT, eltypes) end end +# Helper function to derive direct matrix formats: +# Create colptr, rowval, nzval for m x n matrix with 3 values per column +function csc_vectors(m::Int, n::Int, ::Type{ET}; I::Type{<:Integer}=Int32) where {ET} + # Fixed, deterministic 3 nnz per column; random nz values + colptr = Vector{I}(undef, n + 1) + rowval = Vector{I}() + nzval = Vector{ET}() + + colptr[1] = I(1) + nnz_acc = 0 + for j in 1:n + # Magic numbers + rows_j = sort(unique(mod.(j .+ (1, 7, 13), m) .+ 1)) + append!(rowval, I.(rows_j)) + append!(nzval, rand(ET, length(rows_j))) + nnz_acc += length(rows_j) + colptr[j + 1] = I(nnz_acc + 1) + end + return colptr, rowval, nzval +end +function csr_vectors(m::Int, n::Int, ::Type{ET}; I::Type{<:Integer}=Int32) where {ET} + # Build CSC for (n, m), then interpret as CSR for (m, n) + colptr_nm, rowval_nm, nzval_nm = csc_vectors(n, m, ET; I=I) + rowptr = colptr_nm + colind = rowval_nm + nzval = nzval_nm + return rowptr, colind, nzval +end +# Construct appropriate sparse arrays +function construct_sparse_matrix(AT::Type{<:GPUArrays.AbstractGPUSparseMatrixCSC}, ::Type{ET}, m::Int, n::Int; I::Type{<:Integer}=Int32) where {ET} + colptr, rowval, nzval = csc_vectors(m, n, ET; I=I) + dense_AT = GPUArrays.dense_array_type(AT) + d_colptr = dense_AT(colptr) + d_rowval = dense_AT(rowval) + d_nzval = dense_AT(nzval) + return GPUSparseMatrixCSC(d_colptr, d_rowval, d_nzval, (m, n)) +end +function construct_sparse_matrix(AT::Type{<:GPUArrays.AbstractGPUSparseMatrixCSR}, ::Type{ET}, m::Int, n::Int; I::Type{<:Integer}=Int32) where {ET} + rowptr, colind, nzval = csr_vectors(m, n, ET; I=I) + dense_AT = GPUArrays.dense_array_type(AT) + d_rowptr = dense_AT(rowptr) + d_colind = dense_AT(colind) + d_nzval = dense_AT(nzval) + return GPUSparseMatrixCSR(d_rowptr, d_colind, d_nzval, (m, n)) +end +function direct_vector_construction(AT::Type{<:GPUArrays.AbstractGPUSparseMatrix}, eltypes) + for ET in eltypes + m = 25 + n = 35 + x = construct_sparse_matrix(AT, ET, m, n) + @test x isa AT{ET} + @test size(x) == (m, n) + end +end +function direct_vector_construction(AT, eltypes) + # NOP +end + function broadcasting_vector(AT, eltypes) dense_AT = GPUArrays.dense_array_type(AT) for ET in eltypes