From 817326f3af6c8ddd87b47b99f009b509560a839f Mon Sep 17 00:00:00 2001 From: nHackel Date: Fri, 23 Jan 2026 15:31:21 +0100 Subject: [PATCH 01/11] Add direct constructors for CSC, CSR and COO matrices General GPUSparseMatrix defaults to COO, which defaults to sparse --- src/host/sparse.jl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/host/sparse.jl b/src/host/sparse.jl index c5608b9d..c8110f2b 100644 --- a/src/host/sparse.jl +++ b/src/host/sparse.jl @@ -1,5 +1,6 @@ using LinearAlgebra using LinearAlgebra: BlasFloat +export GPUSparseMatrix, GPUSparseMatrixCSC, GPUSparseMatrixCSR, GPUSparseMatrixCOO abstract type AbstractGPUSparseArray{Tv, Ti, N} <: AbstractSparseArray{Tv, Ti, N} end const AbstractGPUSparseVector{Tv, Ti} = AbstractGPUSparseArray{Tv, Ti, 1} @@ -10,6 +11,12 @@ abstract type AbstractGPUSparseMatrixCSR{Tv, Ti} <: AbstractGPUSparseArray{Tv, T abstract type AbstractGPUSparseMatrixCOO{Tv, Ti} <: AbstractGPUSparseArray{Tv, Ti, 2} end abstract type AbstractGPUSparseMatrixBSR{Tv, Ti} <: AbstractGPUSparseArray{Tv, Ti, 2} end +GPUSparseMatrix(I::AbstractGPUVector, J::AbstractGPUVector, V::AbstractGPUVector, args...; kwargs...) = GPUSparseMatrixCOO(I, J, V, args...; kwargs...) +GPUSparseMatrixCOO(I::AbstractGPUVector, J::AbstractGPUVector, V::AbstractGPUVector, args...; kwargs...) = sparse(I, J, V, args...; kwargs...) +function GPUSparseMatrixCSC end +function GPUSparseMatrixCSR end + + const AbstractGPUSparseVecOrMat = Union{AbstractGPUSparseVector,AbstractGPUSparseMatrix} SparseArrays.nnz(g::T) where {T<:AbstractGPUSparseArray} = g.nnz From 12919bd14f268b3755efcb0281672f5f86a71144 Mon Sep 17 00:00:00 2001 From: nHackel Date: Fri, 23 Jan 2026 16:01:33 +0100 Subject: [PATCH 02/11] Add JLArrays sparse constructors for CSC and CSR --- lib/JLArrays/src/JLArrays.jl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/JLArrays/src/JLArrays.jl b/lib/JLArrays/src/JLArrays.jl index 14a4d296..3bd35712 100644 --- a/lib/JLArrays/src/JLArrays.jl +++ b/lib/JLArrays/src/JLArrays.jl @@ -13,7 +13,7 @@ using GPUArrays using Adapt using SparseArrays, LinearAlgebra -import GPUArrays: dense_array_type +import GPUArrays: dense_array_type, GPUSparseMatrixCSC, GPUSparseMatrixCSR import KernelAbstractions import KernelAbstractions: Adapt, StaticArrays, Backend, Kernel, StaticSize, DynamicSize, partition, blocks, workitems, launch_config @@ -150,6 +150,9 @@ mutable struct JLSparseMatrixCSC{Tv, Ti} <: GPUArrays.AbstractGPUSparseMatrixCSC new{Tv, Ti}(colPtr, rowVal, nzVal, dims, length(nzVal)) end end +function GPUSparseMatrixCSC(colPtr::JLArray{Ti, 1}, rowVal::JLArray{Ti, 1}, nzVal::JLArray{Tv, 1}, dims::NTuple{2,<:Integer}) where {Tv, Ti <: Integer} + return JLSparseMatrixCSC(colPtr, rowVal, nzVal, dims) +end function JLSparseMatrixCSC(colPtr::JLArray{Ti, 1}, rowVal::JLArray{Ti, 1}, nzVal::JLArray{Tv, 1}, dims::NTuple{2,<:Integer}) where {Tv, Ti <: Integer} return JLSparseMatrixCSC{Tv, Ti}(colPtr, rowVal, nzVal, dims) end @@ -181,6 +184,9 @@ end function JLSparseMatrixCSR(rowPtr::JLArray{Ti, 1}, colVal::JLArray{Ti, 1}, nzVal::JLArray{Tv, 1}, dims::NTuple{2,<:Integer}) where {Tv, Ti <: Integer} return JLSparseMatrixCSR{Tv, Ti}(rowPtr, colVal, nzVal, dims) end +function GPUSparseMatrixCSR(rowPtr::JLArray{Ti, 1}, colVal::JLArray{Ti, 1}, nzVal::JLArray{Tv, 1}, dims::NTuple{2,<:Integer}) where {Tv, Ti <: Integer} + return JLSparseMatrixCSR(rowPtr, colVal, nzVal, dims) +end function SparseArrays.SparseMatrixCSC(x::JLSparseMatrixCSR) x_transpose = SparseMatrixCSC(size(x, 2), size(x, 1), Array(x.rowPtr), Array(x.colVal), Array(x.nzVal)) return SparseMatrixCSC(transpose(x_transpose)) From d7491b2697cd777da62268538ae412e6019bb5fa Mon Sep 17 00:00:00 2001 From: nHackel Date: Fri, 23 Jan 2026 16:01:59 +0100 Subject: [PATCH 03/11] Add initial tests for direct sparse matrix constructors --- test/testsuite/sparse.jl | 59 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/test/testsuite/sparse.jl b/test/testsuite/sparse.jl index a31abe6d..146801e6 100644 --- a/test/testsuite/sparse.jl +++ b/test/testsuite/sparse.jl @@ -9,6 +9,7 @@ elseif sparse_AT <: AbstractSparseMatrix matrix(sparse_AT, eltypes) matrix_construction(sparse_AT, eltypes) + direct_vector_construction(sparse_AT, eltypes) broadcasting_matrix(sparse_AT, eltypes) mapreduce_matrix(sparse_AT, eltypes) linalg(sparse_AT, eltypes) @@ -151,6 +152,64 @@ function matrix_construction(AT, eltypes) end end +# Helper function to derive direct matrix formats: +# Create colptr, rowval, nzval for m x n matrix with 3 values per column +function csc_vectors(m::Int, n::Int, ::Type{ET}; I::Type{<:Integer}=Int32) where {ET} + # Fixed, deterministic 3 nnz per column; random nz values + colptr = Vector{I}(undef, n + 1) + rowval = Vector{I}() + nzval = Vector{ET}() + + colptr[1] = I(1) + nnz_acc = 0 + for j in 1:n + # Magic numbers + rows_j = sort(unique(mod.(j .+ (1, 7, 13), m) .+ 1)) + append!(rowval, I.(rows_j)) + append!(nzval, rand(ET, length(rows_j))) + nnz_acc += length(rows_j) + colptr[j + 1] = I(nnz_acc + 1) + end + return colptr, rowval, nzval +end +function csr_vectors(m::Int, n::Int, ::Type{ET}; I::Type{<:Integer}=Int32) where {ET} + # Build CSC for (n, m), then interpret as CSR for (m, n) + colptr_nm, rowval_nm, nzval_nm = csc_vectors(n, m, ET; I=I) + rowptr = colptr_nm + colind = rowval_nm + nzval = nzval_nm + return rowptr, colind, nzval +end +# Construct appropriate sparse arrays +function construct_sparse_matrix(AT::Type{<:GPUArrays.AbstractGPUSparseMatrixCSC}, ::Type{ET}, m::Int, n::Int; I::Type{<:Integer}=Int32) where {ET} + colptr, rowval, nzval = csc_vectors(m, n, ET; I=I) + dense_AT = GPUArrays.dense_array_type(AT) + d_colptr = dense_AT(colptr) + d_rowval = dense_AT(rowval) + d_nzval = dense_AT(nzval) + return GPUSparseMatrixCSC(d_colptr, d_rowval, d_nzval, (m, n)) +end +function construct_sparse_matrix(AT::Type{<:GPUArrays.AbstractGPUSparseMatrixCSR}, ::Type{ET}, m::Int, n::Int; I::Type{<:Integer}=Int32) where {ET} + rowptr, colind, nzval = csr_vectors(m, n, ET; I=I) + dense_AT = GPUArrays.dense_array_type(AT) + d_rowptr = dense_AT(rowptr) + d_colind = dense_AT(colind) + d_nzval = dense_AT(nzval) + return GPUSparseMatrixCSR(d_rowptr, d_colind, d_nzval, (m, n)) +end +function direct_vector_construction(AT::Type{<:GPUArrays.AbstractGPUSparseMatrix}, eltypes) + for ET in eltypes + m = 25 + n = 35 + x = construct_sparse_matrix(AT, ET, m, n) + @test x isa AT{ET} + @test size(x) == (m, n) + end +end +function direct_vector_construction(AT, eltypes) + # NOP +end + function broadcasting_vector(AT, eltypes) dense_AT = GPUArrays.dense_array_type(AT) for ET in eltypes From 7f75c5ec0511d99ffa7772b4889d91b4355c9087 Mon Sep 17 00:00:00 2001 From: nHackel Date: Fri, 23 Jan 2026 16:16:16 +0100 Subject: [PATCH 04/11] Also export BSR function (no matching JLArray atm) --- src/host/sparse.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/host/sparse.jl b/src/host/sparse.jl index c8110f2b..0ed43c65 100644 --- a/src/host/sparse.jl +++ b/src/host/sparse.jl @@ -1,6 +1,6 @@ using LinearAlgebra using LinearAlgebra: BlasFloat -export GPUSparseMatrix, GPUSparseMatrixCSC, GPUSparseMatrixCSR, GPUSparseMatrixCOO +export GPUSparseMatrix, GPUSparseMatrixCSC, GPUSparseMatrixCSR, GPUSparseMatrixCOO, GPUSparseMatrixBSR abstract type AbstractGPUSparseArray{Tv, Ti, N} <: AbstractSparseArray{Tv, Ti, N} end const AbstractGPUSparseVector{Tv, Ti} = AbstractGPUSparseArray{Tv, Ti, 1} @@ -15,6 +15,7 @@ GPUSparseMatrix(I::AbstractGPUVector, J::AbstractGPUVector, V::AbstractGPUVector GPUSparseMatrixCOO(I::AbstractGPUVector, J::AbstractGPUVector, V::AbstractGPUVector, args...; kwargs...) = sparse(I, J, V, args...; kwargs...) function GPUSparseMatrixCSC end function GPUSparseMatrixCSR end +function GPUSparseMatrixBSR end const AbstractGPUSparseVecOrMat = Union{AbstractGPUSparseVector,AbstractGPUSparseMatrix} From bb73fc677f1d50897894f29b6637b8de443b71cd Mon Sep 17 00:00:00 2001 From: nHackel Date: Fri, 23 Jan 2026 15:31:21 +0100 Subject: [PATCH 05/11] Add direct constructors for CSC, CSR and COO matrices General GPUSparseMatrix defaults to COO, which defaults to sparse --- src/host/sparse.jl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/host/sparse.jl b/src/host/sparse.jl index c5608b9d..c8110f2b 100644 --- a/src/host/sparse.jl +++ b/src/host/sparse.jl @@ -1,5 +1,6 @@ using LinearAlgebra using LinearAlgebra: BlasFloat +export GPUSparseMatrix, GPUSparseMatrixCSC, GPUSparseMatrixCSR, GPUSparseMatrixCOO abstract type AbstractGPUSparseArray{Tv, Ti, N} <: AbstractSparseArray{Tv, Ti, N} end const AbstractGPUSparseVector{Tv, Ti} = AbstractGPUSparseArray{Tv, Ti, 1} @@ -10,6 +11,12 @@ abstract type AbstractGPUSparseMatrixCSR{Tv, Ti} <: AbstractGPUSparseArray{Tv, T abstract type AbstractGPUSparseMatrixCOO{Tv, Ti} <: AbstractGPUSparseArray{Tv, Ti, 2} end abstract type AbstractGPUSparseMatrixBSR{Tv, Ti} <: AbstractGPUSparseArray{Tv, Ti, 2} end +GPUSparseMatrix(I::AbstractGPUVector, J::AbstractGPUVector, V::AbstractGPUVector, args...; kwargs...) = GPUSparseMatrixCOO(I, J, V, args...; kwargs...) +GPUSparseMatrixCOO(I::AbstractGPUVector, J::AbstractGPUVector, V::AbstractGPUVector, args...; kwargs...) = sparse(I, J, V, args...; kwargs...) +function GPUSparseMatrixCSC end +function GPUSparseMatrixCSR end + + const AbstractGPUSparseVecOrMat = Union{AbstractGPUSparseVector,AbstractGPUSparseMatrix} SparseArrays.nnz(g::T) where {T<:AbstractGPUSparseArray} = g.nnz From 065e55edbc67409cdea4d1b17389ecd28edf282c Mon Sep 17 00:00:00 2001 From: nHackel Date: Fri, 23 Jan 2026 16:01:33 +0100 Subject: [PATCH 06/11] Add JLArrays sparse constructors for CSC and CSR --- lib/JLArrays/src/JLArrays.jl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/JLArrays/src/JLArrays.jl b/lib/JLArrays/src/JLArrays.jl index 14a4d296..3bd35712 100644 --- a/lib/JLArrays/src/JLArrays.jl +++ b/lib/JLArrays/src/JLArrays.jl @@ -13,7 +13,7 @@ using GPUArrays using Adapt using SparseArrays, LinearAlgebra -import GPUArrays: dense_array_type +import GPUArrays: dense_array_type, GPUSparseMatrixCSC, GPUSparseMatrixCSR import KernelAbstractions import KernelAbstractions: Adapt, StaticArrays, Backend, Kernel, StaticSize, DynamicSize, partition, blocks, workitems, launch_config @@ -150,6 +150,9 @@ mutable struct JLSparseMatrixCSC{Tv, Ti} <: GPUArrays.AbstractGPUSparseMatrixCSC new{Tv, Ti}(colPtr, rowVal, nzVal, dims, length(nzVal)) end end +function GPUSparseMatrixCSC(colPtr::JLArray{Ti, 1}, rowVal::JLArray{Ti, 1}, nzVal::JLArray{Tv, 1}, dims::NTuple{2,<:Integer}) where {Tv, Ti <: Integer} + return JLSparseMatrixCSC(colPtr, rowVal, nzVal, dims) +end function JLSparseMatrixCSC(colPtr::JLArray{Ti, 1}, rowVal::JLArray{Ti, 1}, nzVal::JLArray{Tv, 1}, dims::NTuple{2,<:Integer}) where {Tv, Ti <: Integer} return JLSparseMatrixCSC{Tv, Ti}(colPtr, rowVal, nzVal, dims) end @@ -181,6 +184,9 @@ end function JLSparseMatrixCSR(rowPtr::JLArray{Ti, 1}, colVal::JLArray{Ti, 1}, nzVal::JLArray{Tv, 1}, dims::NTuple{2,<:Integer}) where {Tv, Ti <: Integer} return JLSparseMatrixCSR{Tv, Ti}(rowPtr, colVal, nzVal, dims) end +function GPUSparseMatrixCSR(rowPtr::JLArray{Ti, 1}, colVal::JLArray{Ti, 1}, nzVal::JLArray{Tv, 1}, dims::NTuple{2,<:Integer}) where {Tv, Ti <: Integer} + return JLSparseMatrixCSR(rowPtr, colVal, nzVal, dims) +end function SparseArrays.SparseMatrixCSC(x::JLSparseMatrixCSR) x_transpose = SparseMatrixCSC(size(x, 2), size(x, 1), Array(x.rowPtr), Array(x.colVal), Array(x.nzVal)) return SparseMatrixCSC(transpose(x_transpose)) From 21ddaafc2a8a1a18f8fd02477ab325794f467c04 Mon Sep 17 00:00:00 2001 From: nHackel Date: Fri, 23 Jan 2026 16:01:59 +0100 Subject: [PATCH 07/11] Add initial tests for direct sparse matrix constructors --- test/testsuite/sparse.jl | 59 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/test/testsuite/sparse.jl b/test/testsuite/sparse.jl index a31abe6d..146801e6 100644 --- a/test/testsuite/sparse.jl +++ b/test/testsuite/sparse.jl @@ -9,6 +9,7 @@ elseif sparse_AT <: AbstractSparseMatrix matrix(sparse_AT, eltypes) matrix_construction(sparse_AT, eltypes) + direct_vector_construction(sparse_AT, eltypes) broadcasting_matrix(sparse_AT, eltypes) mapreduce_matrix(sparse_AT, eltypes) linalg(sparse_AT, eltypes) @@ -151,6 +152,64 @@ function matrix_construction(AT, eltypes) end end +# Helper function to derive direct matrix formats: +# Create colptr, rowval, nzval for m x n matrix with 3 values per column +function csc_vectors(m::Int, n::Int, ::Type{ET}; I::Type{<:Integer}=Int32) where {ET} + # Fixed, deterministic 3 nnz per column; random nz values + colptr = Vector{I}(undef, n + 1) + rowval = Vector{I}() + nzval = Vector{ET}() + + colptr[1] = I(1) + nnz_acc = 0 + for j in 1:n + # Magic numbers + rows_j = sort(unique(mod.(j .+ (1, 7, 13), m) .+ 1)) + append!(rowval, I.(rows_j)) + append!(nzval, rand(ET, length(rows_j))) + nnz_acc += length(rows_j) + colptr[j + 1] = I(nnz_acc + 1) + end + return colptr, rowval, nzval +end +function csr_vectors(m::Int, n::Int, ::Type{ET}; I::Type{<:Integer}=Int32) where {ET} + # Build CSC for (n, m), then interpret as CSR for (m, n) + colptr_nm, rowval_nm, nzval_nm = csc_vectors(n, m, ET; I=I) + rowptr = colptr_nm + colind = rowval_nm + nzval = nzval_nm + return rowptr, colind, nzval +end +# Construct appropriate sparse arrays +function construct_sparse_matrix(AT::Type{<:GPUArrays.AbstractGPUSparseMatrixCSC}, ::Type{ET}, m::Int, n::Int; I::Type{<:Integer}=Int32) where {ET} + colptr, rowval, nzval = csc_vectors(m, n, ET; I=I) + dense_AT = GPUArrays.dense_array_type(AT) + d_colptr = dense_AT(colptr) + d_rowval = dense_AT(rowval) + d_nzval = dense_AT(nzval) + return GPUSparseMatrixCSC(d_colptr, d_rowval, d_nzval, (m, n)) +end +function construct_sparse_matrix(AT::Type{<:GPUArrays.AbstractGPUSparseMatrixCSR}, ::Type{ET}, m::Int, n::Int; I::Type{<:Integer}=Int32) where {ET} + rowptr, colind, nzval = csr_vectors(m, n, ET; I=I) + dense_AT = GPUArrays.dense_array_type(AT) + d_rowptr = dense_AT(rowptr) + d_colind = dense_AT(colind) + d_nzval = dense_AT(nzval) + return GPUSparseMatrixCSR(d_rowptr, d_colind, d_nzval, (m, n)) +end +function direct_vector_construction(AT::Type{<:GPUArrays.AbstractGPUSparseMatrix}, eltypes) + for ET in eltypes + m = 25 + n = 35 + x = construct_sparse_matrix(AT, ET, m, n) + @test x isa AT{ET} + @test size(x) == (m, n) + end +end +function direct_vector_construction(AT, eltypes) + # NOP +end + function broadcasting_vector(AT, eltypes) dense_AT = GPUArrays.dense_array_type(AT) for ET in eltypes From 2301530e7f5c0f23356020e8fd451ed969c4b369 Mon Sep 17 00:00:00 2001 From: nHackel Date: Fri, 23 Jan 2026 16:16:16 +0100 Subject: [PATCH 08/11] Also export BSR function (no matching JLArray atm) --- src/host/sparse.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/host/sparse.jl b/src/host/sparse.jl index c8110f2b..0ed43c65 100644 --- a/src/host/sparse.jl +++ b/src/host/sparse.jl @@ -1,6 +1,6 @@ using LinearAlgebra using LinearAlgebra: BlasFloat -export GPUSparseMatrix, GPUSparseMatrixCSC, GPUSparseMatrixCSR, GPUSparseMatrixCOO +export GPUSparseMatrix, GPUSparseMatrixCSC, GPUSparseMatrixCSR, GPUSparseMatrixCOO, GPUSparseMatrixBSR abstract type AbstractGPUSparseArray{Tv, Ti, N} <: AbstractSparseArray{Tv, Ti, N} end const AbstractGPUSparseVector{Tv, Ti} = AbstractGPUSparseArray{Tv, Ti, 1} @@ -15,6 +15,7 @@ GPUSparseMatrix(I::AbstractGPUVector, J::AbstractGPUVector, V::AbstractGPUVector GPUSparseMatrixCOO(I::AbstractGPUVector, J::AbstractGPUVector, V::AbstractGPUVector, args...; kwargs...) = sparse(I, J, V, args...; kwargs...) function GPUSparseMatrixCSC end function GPUSparseMatrixCSR end +function GPUSparseMatrixBSR end const AbstractGPUSparseVecOrMat = Union{AbstractGPUSparseVector,AbstractGPUSparseMatrix} From 435f5f5178fe68bbb0ee8d6753eab9e19cd99d4b Mon Sep 17 00:00:00 2001 From: nHackel Date: Fri, 13 Mar 2026 15:07:06 +0100 Subject: [PATCH 09/11] Add Atomix dependency for sparse default --- Project.toml | 2 ++ src/GPUArrays.jl | 1 + 2 files changed, 3 insertions(+) diff --git a/Project.toml b/Project.toml index 00977c2c..35621a57 100644 --- a/Project.toml +++ b/Project.toml @@ -4,6 +4,7 @@ version = "11.3.4" [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" +Atomix = "a9b6321e-bd34-4604-b9c9-b65b8de01458" GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527" KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" LLVM = "929cbde3-209d-540e-8aea-75f648917ca0" @@ -24,6 +25,7 @@ JLD2Ext = "JLD2" [compat] Adapt = "4.0" +Atomix = "1" GPUArraysCore = "= 0.2.0" JLD2 = "0.4, 0.5, 0.6" KernelAbstractions = "0.9.28, 0.10" diff --git a/src/GPUArrays.jl b/src/GPUArrays.jl index a35c1ff0..8afa88e8 100644 --- a/src/GPUArrays.jl +++ b/src/GPUArrays.jl @@ -16,6 +16,7 @@ using Reexport @reexport using GPUArraysCore using KernelAbstractions +using Atomix # device functionality include("device/abstractarray.jl") From df12698f35a189d531c4ab057f0b947c68c0d606 Mon Sep 17 00:00:00 2001 From: nHackel Date: Fri, 13 Mar 2026 15:07:53 +0100 Subject: [PATCH 10/11] Add conversions from :coo to :csc and :csr --- src/host/sparse.jl | 50 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/src/host/sparse.jl b/src/host/sparse.jl index 0ed43c65..17456dff 100644 --- a/src/host/sparse.jl +++ b/src/host/sparse.jl @@ -11,11 +11,57 @@ abstract type AbstractGPUSparseMatrixCSR{Tv, Ti} <: AbstractGPUSparseArray{Tv, T abstract type AbstractGPUSparseMatrixCOO{Tv, Ti} <: AbstractGPUSparseArray{Tv, Ti, 2} end abstract type AbstractGPUSparseMatrixBSR{Tv, Ti} <: AbstractGPUSparseArray{Tv, Ti, 2} end -GPUSparseMatrix(I::AbstractGPUVector, J::AbstractGPUVector, V::AbstractGPUVector, args...; kwargs...) = GPUSparseMatrixCOO(I, J, V, args...; kwargs...) GPUSparseMatrixCOO(I::AbstractGPUVector, J::AbstractGPUVector, V::AbstractGPUVector, args...; kwargs...) = sparse(I, J, V, args...; kwargs...) function GPUSparseMatrixCSC end function GPUSparseMatrixCSR end -function GPUSparseMatrixBSR end +function GPUSparseMatrixBSR end + +function compute_sparse_pointers(indices::AbstractGPUVector{T}, n::Integer) where T + ptr = similar(indices, T, n + 1) + fill!(ptr, zero(T)) + + @kernel function count_indices(@Const(indices), ptr) + idx = @index(Global, Linear) + if idx == 1 + ptr[idx] = one(T) + end + Atomix.@atomic ptr[indices[idx] + 1] += one(T) + end + + backend = get_backend(indices) + kernel! = count_indices(backend) + kernel!(indices, ptr, ndrange=length(indices)) + return cumsum(ptr) +end + +function GPUSparseMatrix(I::AbstractGPUVector, J::AbstractGPUVector, V::AbstractGPUVector, dims::NTuple{2, <:Integer}; format = default_sparse_format(I)) + m, n = dims + if format == :coo + GPUSparseMatrix(Val(:coo), I, J, V, dims) + elseif format == :csc + # Create composite key for sorting + key = J .* (m + 1) .+ I + perm = sortperm(key) + + ptr = compute_sparse_pointers(J[perm], n) + return GPUSparseMatrix(Val(:csc), ptr, I[perm], V[perm], dims) + elseif format == :csr + # Create composite key for sorting + key = I .* (n + 1) .+ J + perm = sortperm(key) + + ptr = compute_sparse_pointers(I[perm], m) + return GPUSparseMatrix(Val(:csr), ptr, J[perm], V[perm], dims) + else + throw(ArgumentError("Conversion to sparse format $format is not implemented")) + end +end + + +GPUSparseMatrix(::Val{:coo}, I, J, V, dims) = GPUSparseMatrixCOO(I, J, V, dims) +GPUSparseMatrix(::Val{:csc}, colPtr, rowVal, nzVal, dims) = GPUSparseMatrixCSC(colPtr, rowVal, nzVal, dims) +GPUSparseMatrix(::Val{:csr}, rowPtr, colVal, nzVal, dims) = GPUSparseMatrixCSR(rowPtr, colVal, nzVal, dims) +#GPUSparseMatrix(::Val{:bsr}, ...) = GPUSparseMatrixBSR(...) const AbstractGPUSparseVecOrMat = Union{AbstractGPUSparseVector,AbstractGPUSparseMatrix} From 63b3b1aed73dbe526e2a41138899b44d28e233dc Mon Sep 17 00:00:00 2001 From: nHackel Date: Fri, 13 Mar 2026 15:35:25 +0100 Subject: [PATCH 11/11] Drop GPUSparseMatrix = sparse(...) to have consistent interface between for dims --- src/host/sparse.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/host/sparse.jl b/src/host/sparse.jl index 17456dff..f12cfed1 100644 --- a/src/host/sparse.jl +++ b/src/host/sparse.jl @@ -11,7 +11,7 @@ abstract type AbstractGPUSparseMatrixCSR{Tv, Ti} <: AbstractGPUSparseArray{Tv, T abstract type AbstractGPUSparseMatrixCOO{Tv, Ti} <: AbstractGPUSparseArray{Tv, Ti, 2} end abstract type AbstractGPUSparseMatrixBSR{Tv, Ti} <: AbstractGPUSparseArray{Tv, Ti, 2} end -GPUSparseMatrixCOO(I::AbstractGPUVector, J::AbstractGPUVector, V::AbstractGPUVector, args...; kwargs...) = sparse(I, J, V, args...; kwargs...) +function GPUSparseMatrixCOO end function GPUSparseMatrixCSC end function GPUSparseMatrixCSR end function GPUSparseMatrixBSR end