From 817326f3af6c8ddd87b47b99f009b509560a839f Mon Sep 17 00:00:00 2001
From: nHackel <nHackel@users.noreply.github.com>
Date: Fri, 23 Jan 2026 15:31:21 +0100
Subject: [PATCH 01/11] Add direct constructors for CSC, CSR and COO matrices

General GPUSparseMatrix defaults to COO, which defaults to sparse
---
 src/host/sparse.jl | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/host/sparse.jl b/src/host/sparse.jl
index c5608b9d..c8110f2b 100644
--- a/src/host/sparse.jl
+++ b/src/host/sparse.jl
@@ -1,5 +1,6 @@
 using LinearAlgebra
 using LinearAlgebra: BlasFloat
+export GPUSparseMatrix, GPUSparseMatrixCSC, GPUSparseMatrixCSR, GPUSparseMatrixCOO
 
 abstract type AbstractGPUSparseArray{Tv, Ti, N} <: AbstractSparseArray{Tv, Ti, N} end
 const AbstractGPUSparseVector{Tv, Ti} = AbstractGPUSparseArray{Tv, Ti, 1}
@@ -10,6 +11,12 @@ abstract type AbstractGPUSparseMatrixCSR{Tv, Ti} <: AbstractGPUSparseArray{Tv, T
 abstract type AbstractGPUSparseMatrixCOO{Tv, Ti} <: AbstractGPUSparseArray{Tv, Ti, 2} end
 abstract type AbstractGPUSparseMatrixBSR{Tv, Ti} <: AbstractGPUSparseArray{Tv, Ti, 2} end
 
+GPUSparseMatrix(I::AbstractGPUVector, J::AbstractGPUVector, V::AbstractGPUVector, args...; kwargs...) = GPUSparseMatrixCOO(I, J, V, args...; kwargs...)
+GPUSparseMatrixCOO(I::AbstractGPUVector, J::AbstractGPUVector, V::AbstractGPUVector, args...; kwargs...) = sparse(I, J, V, args...; kwargs...)
+function GPUSparseMatrixCSC end
+function GPUSparseMatrixCSR end
+
+
 const AbstractGPUSparseVecOrMat = Union{AbstractGPUSparseVector,AbstractGPUSparseMatrix}
 
 SparseArrays.nnz(g::T) where {T<:AbstractGPUSparseArray} = g.nnz

From 12919bd14f268b3755efcb0281672f5f86a71144 Mon Sep 17 00:00:00 2001
From: nHackel <nHackel@users.noreply.github.com>
Date: Fri, 23 Jan 2026 16:01:33 +0100
Subject: [PATCH 02/11] Add JLArrays sparse constructors for CSC and CSR

---
 lib/JLArrays/src/JLArrays.jl | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/lib/JLArrays/src/JLArrays.jl b/lib/JLArrays/src/JLArrays.jl
index 14a4d296..3bd35712 100644
--- a/lib/JLArrays/src/JLArrays.jl
+++ b/lib/JLArrays/src/JLArrays.jl
@@ -13,7 +13,7 @@ using GPUArrays
 using Adapt
 using SparseArrays, LinearAlgebra
 
-import GPUArrays: dense_array_type
+import GPUArrays: dense_array_type, GPUSparseMatrixCSC, GPUSparseMatrixCSR
 
 import KernelAbstractions
 import KernelAbstractions: Adapt, StaticArrays, Backend, Kernel, StaticSize, DynamicSize, partition, blocks, workitems, launch_config
@@ -150,6 +150,9 @@ mutable struct JLSparseMatrixCSC{Tv, Ti} <: GPUArrays.AbstractGPUSparseMatrixCSC
         new{Tv, Ti}(colPtr, rowVal, nzVal, dims, length(nzVal))
     end
 end
+function GPUSparseMatrixCSC(colPtr::JLArray{Ti, 1}, rowVal::JLArray{Ti, 1}, nzVal::JLArray{Tv, 1}, dims::NTuple{2,<:Integer}) where {Tv, Ti <: Integer}
+    return JLSparseMatrixCSC(colPtr, rowVal, nzVal, dims)
+end
 function JLSparseMatrixCSC(colPtr::JLArray{Ti, 1}, rowVal::JLArray{Ti, 1}, nzVal::JLArray{Tv, 1}, dims::NTuple{2,<:Integer}) where {Tv, Ti <: Integer}
     return JLSparseMatrixCSC{Tv, Ti}(colPtr, rowVal, nzVal, dims)
 end
@@ -181,6 +184,9 @@ end
 function JLSparseMatrixCSR(rowPtr::JLArray{Ti, 1}, colVal::JLArray{Ti, 1}, nzVal::JLArray{Tv, 1}, dims::NTuple{2,<:Integer}) where {Tv, Ti <: Integer}
     return JLSparseMatrixCSR{Tv, Ti}(rowPtr, colVal, nzVal, dims)
 end
+function GPUSparseMatrixCSR(rowPtr::JLArray{Ti, 1}, colVal::JLArray{Ti, 1}, nzVal::JLArray{Tv, 1}, dims::NTuple{2,<:Integer}) where {Tv, Ti <: Integer}
+    return JLSparseMatrixCSR(rowPtr, colVal, nzVal, dims)
+end
 function SparseArrays.SparseMatrixCSC(x::JLSparseMatrixCSR) 
     x_transpose = SparseMatrixCSC(size(x, 2), size(x, 1), Array(x.rowPtr), Array(x.colVal), Array(x.nzVal))
     return SparseMatrixCSC(transpose(x_transpose))

From d7491b2697cd777da62268538ae412e6019bb5fa Mon Sep 17 00:00:00 2001
From: nHackel <nHackel@users.noreply.github.com>
Date: Fri, 23 Jan 2026 16:01:59 +0100
Subject: [PATCH 03/11] Add initial tests for direct sparse matrix constructors

---
 test/testsuite/sparse.jl | 59 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

diff --git a/test/testsuite/sparse.jl b/test/testsuite/sparse.jl
index a31abe6d..146801e6 100644
--- a/test/testsuite/sparse.jl
+++ b/test/testsuite/sparse.jl
@@ -9,6 +9,7 @@
         elseif sparse_AT <: AbstractSparseMatrix
             matrix(sparse_AT, eltypes)
             matrix_construction(sparse_AT, eltypes)
+            direct_vector_construction(sparse_AT, eltypes)
             broadcasting_matrix(sparse_AT, eltypes)
             mapreduce_matrix(sparse_AT, eltypes)
             linalg(sparse_AT, eltypes)
@@ -151,6 +152,64 @@ function matrix_construction(AT, eltypes)
     end
 end
 
+# Helper function to derive direct matrix formats:
+# Create colptr, rowval, nzval for m x n matrix with 3 values per column
+function csc_vectors(m::Int, n::Int, ::Type{ET}; I::Type{<:Integer}=Int32) where {ET}
+    # Fixed, deterministic 3 nnz per column; random nz values
+    colptr = Vector{I}(undef, n + 1)
+    rowval = Vector{I}()
+    nzval  = Vector{ET}()
+
+    colptr[1] = I(1)
+    nnz_acc = 0
+    for j in 1:n
+        # Magic numbers
+        rows_j = sort(unique(mod.(j .+ (1, 7, 13), m) .+ 1))
+        append!(rowval, I.(rows_j))
+        append!(nzval, rand(ET, length(rows_j)))
+        nnz_acc += length(rows_j)
+        colptr[j + 1] = I(nnz_acc + 1)
+    end
+    return colptr, rowval, nzval
+end
+function csr_vectors(m::Int, n::Int, ::Type{ET}; I::Type{<:Integer}=Int32) where {ET}
+    # Build CSC for (n, m), then interpret as CSR for (m, n)
+    colptr_nm, rowval_nm, nzval_nm = csc_vectors(n, m, ET; I=I)
+    rowptr = colptr_nm
+    colind = rowval_nm
+    nzval  = nzval_nm
+    return rowptr, colind, nzval
+end
+# Construct appropriate sparse arrays
+function construct_sparse_matrix(AT::Type{<:GPUArrays.AbstractGPUSparseMatrixCSC}, ::Type{ET}, m::Int, n::Int; I::Type{<:Integer}=Int32) where {ET}
+    colptr, rowval, nzval = csc_vectors(m, n, ET; I=I)
+    dense_AT = GPUArrays.dense_array_type(AT)
+    d_colptr = dense_AT(colptr)
+    d_rowval = dense_AT(rowval)
+    d_nzval  = dense_AT(nzval)
+    return GPUSparseMatrixCSC(d_colptr, d_rowval, d_nzval, (m, n))
+end
+function construct_sparse_matrix(AT::Type{<:GPUArrays.AbstractGPUSparseMatrixCSR}, ::Type{ET}, m::Int, n::Int; I::Type{<:Integer}=Int32) where {ET}
+    rowptr, colind, nzval = csr_vectors(m, n, ET; I=I)
+    dense_AT = GPUArrays.dense_array_type(AT)
+    d_rowptr = dense_AT(rowptr)
+    d_colind = dense_AT(colind)
+    d_nzval  = dense_AT(nzval)
+    return GPUSparseMatrixCSR(d_rowptr, d_colind, d_nzval, (m, n))
+end
+function direct_vector_construction(AT::Type{<:GPUArrays.AbstractGPUSparseMatrix}, eltypes)
+    for ET in eltypes
+        m = 25
+        n = 35
+        x = construct_sparse_matrix(AT, ET, m, n)
+        @test x isa AT{ET}
+        @test size(x) == (m, n)
+    end
+end
+function direct_vector_construction(AT, eltypes)
+    # NOP
+end
+
 function broadcasting_vector(AT, eltypes)
     dense_AT = GPUArrays.dense_array_type(AT)
     for ET in eltypes

From 7f75c5ec0511d99ffa7772b4889d91b4355c9087 Mon Sep 17 00:00:00 2001
From: nHackel <nHackel@users.noreply.github.com>
Date: Fri, 23 Jan 2026 16:16:16 +0100
Subject: [PATCH 04/11] Also export BSR function (no matching JLArray atm)

---
 src/host/sparse.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/host/sparse.jl b/src/host/sparse.jl
index c8110f2b..0ed43c65 100644
--- a/src/host/sparse.jl
+++ b/src/host/sparse.jl
@@ -1,6 +1,6 @@
 using LinearAlgebra
 using LinearAlgebra: BlasFloat
-export GPUSparseMatrix, GPUSparseMatrixCSC, GPUSparseMatrixCSR, GPUSparseMatrixCOO
+export GPUSparseMatrix, GPUSparseMatrixCSC, GPUSparseMatrixCSR, GPUSparseMatrixCOO, GPUSparseMatrixBSR
 
 abstract type AbstractGPUSparseArray{Tv, Ti, N} <: AbstractSparseArray{Tv, Ti, N} end
 const AbstractGPUSparseVector{Tv, Ti} = AbstractGPUSparseArray{Tv, Ti, 1}
@@ -15,6 +15,7 @@ GPUSparseMatrix(I::AbstractGPUVector, J::AbstractGPUVector, V::AbstractGPUVector
 GPUSparseMatrixCOO(I::AbstractGPUVector, J::AbstractGPUVector, V::AbstractGPUVector, args...; kwargs...) = sparse(I, J, V, args...; kwargs...)
 function GPUSparseMatrixCSC end
 function GPUSparseMatrixCSR end
+function GPUSparseMatrixBSR end
 
 
 const AbstractGPUSparseVecOrMat = Union{AbstractGPUSparseVector,AbstractGPUSparseMatrix}

From bb73fc677f1d50897894f29b6637b8de443b71cd Mon Sep 17 00:00:00 2001
From: nHackel <nHackel@users.noreply.github.com>
Date: Fri, 23 Jan 2026 15:31:21 +0100
Subject: [PATCH 05/11] Add direct constructors for CSC, CSR and COO matrices

General GPUSparseMatrix defaults to COO, which defaults to sparse
---
 src/host/sparse.jl | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/host/sparse.jl b/src/host/sparse.jl
index c5608b9d..c8110f2b 100644
--- a/src/host/sparse.jl
+++ b/src/host/sparse.jl
@@ -1,5 +1,6 @@
 using LinearAlgebra
 using LinearAlgebra: BlasFloat
+export GPUSparseMatrix, GPUSparseMatrixCSC, GPUSparseMatrixCSR, GPUSparseMatrixCOO
 
 abstract type AbstractGPUSparseArray{Tv, Ti, N} <: AbstractSparseArray{Tv, Ti, N} end
 const AbstractGPUSparseVector{Tv, Ti} = AbstractGPUSparseArray{Tv, Ti, 1}
@@ -10,6 +11,12 @@ abstract type AbstractGPUSparseMatrixCSR{Tv, Ti} <: AbstractGPUSparseArray{Tv, T
 abstract type AbstractGPUSparseMatrixCOO{Tv, Ti} <: AbstractGPUSparseArray{Tv, Ti, 2} end
 abstract type AbstractGPUSparseMatrixBSR{Tv, Ti} <: AbstractGPUSparseArray{Tv, Ti, 2} end
 
+GPUSparseMatrix(I::AbstractGPUVector, J::AbstractGPUVector, V::AbstractGPUVector, args...; kwargs...) = GPUSparseMatrixCOO(I, J, V, args...; kwargs...)
+GPUSparseMatrixCOO(I::AbstractGPUVector, J::AbstractGPUVector, V::AbstractGPUVector, args...; kwargs...) = sparse(I, J, V, args...; kwargs...)
+function GPUSparseMatrixCSC end
+function GPUSparseMatrixCSR end
+
+
 const AbstractGPUSparseVecOrMat = Union{AbstractGPUSparseVector,AbstractGPUSparseMatrix}
 
 SparseArrays.nnz(g::T) where {T<:AbstractGPUSparseArray} = g.nnz

From 065e55edbc67409cdea4d1b17389ecd28edf282c Mon Sep 17 00:00:00 2001
From: nHackel <nHackel@users.noreply.github.com>
Date: Fri, 23 Jan 2026 16:01:33 +0100
Subject: [PATCH 06/11] Add JLArrays sparse constructors for CSC and CSR

---
 lib/JLArrays/src/JLArrays.jl | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/lib/JLArrays/src/JLArrays.jl b/lib/JLArrays/src/JLArrays.jl
index 14a4d296..3bd35712 100644
--- a/lib/JLArrays/src/JLArrays.jl
+++ b/lib/JLArrays/src/JLArrays.jl
@@ -13,7 +13,7 @@ using GPUArrays
 using Adapt
 using SparseArrays, LinearAlgebra
 
-import GPUArrays: dense_array_type
+import GPUArrays: dense_array_type, GPUSparseMatrixCSC, GPUSparseMatrixCSR
 
 import KernelAbstractions
 import KernelAbstractions: Adapt, StaticArrays, Backend, Kernel, StaticSize, DynamicSize, partition, blocks, workitems, launch_config
@@ -150,6 +150,9 @@ mutable struct JLSparseMatrixCSC{Tv, Ti} <: GPUArrays.AbstractGPUSparseMatrixCSC
         new{Tv, Ti}(colPtr, rowVal, nzVal, dims, length(nzVal))
     end
 end
+function GPUSparseMatrixCSC(colPtr::JLArray{Ti, 1}, rowVal::JLArray{Ti, 1}, nzVal::JLArray{Tv, 1}, dims::NTuple{2,<:Integer}) where {Tv, Ti <: Integer}
+    return JLSparseMatrixCSC(colPtr, rowVal, nzVal, dims)
+end
 function JLSparseMatrixCSC(colPtr::JLArray{Ti, 1}, rowVal::JLArray{Ti, 1}, nzVal::JLArray{Tv, 1}, dims::NTuple{2,<:Integer}) where {Tv, Ti <: Integer}
     return JLSparseMatrixCSC{Tv, Ti}(colPtr, rowVal, nzVal, dims)
 end
@@ -181,6 +184,9 @@ end
 function JLSparseMatrixCSR(rowPtr::JLArray{Ti, 1}, colVal::JLArray{Ti, 1}, nzVal::JLArray{Tv, 1}, dims::NTuple{2,<:Integer}) where {Tv, Ti <: Integer}
     return JLSparseMatrixCSR{Tv, Ti}(rowPtr, colVal, nzVal, dims)
 end
+function GPUSparseMatrixCSR(rowPtr::JLArray{Ti, 1}, colVal::JLArray{Ti, 1}, nzVal::JLArray{Tv, 1}, dims::NTuple{2,<:Integer}) where {Tv, Ti <: Integer}
+    return JLSparseMatrixCSR(rowPtr, colVal, nzVal, dims)
+end
 function SparseArrays.SparseMatrixCSC(x::JLSparseMatrixCSR) 
     x_transpose = SparseMatrixCSC(size(x, 2), size(x, 1), Array(x.rowPtr), Array(x.colVal), Array(x.nzVal))
     return SparseMatrixCSC(transpose(x_transpose))

From 21ddaafc2a8a1a18f8fd02477ab325794f467c04 Mon Sep 17 00:00:00 2001
From: nHackel <nHackel@users.noreply.github.com>
Date: Fri, 23 Jan 2026 16:01:59 +0100
Subject: [PATCH 07/11] Add initial tests for direct sparse matrix constructors

---
 test/testsuite/sparse.jl | 59 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

diff --git a/test/testsuite/sparse.jl b/test/testsuite/sparse.jl
index a31abe6d..146801e6 100644
--- a/test/testsuite/sparse.jl
+++ b/test/testsuite/sparse.jl
@@ -9,6 +9,7 @@
         elseif sparse_AT <: AbstractSparseMatrix
             matrix(sparse_AT, eltypes)
             matrix_construction(sparse_AT, eltypes)
+            direct_vector_construction(sparse_AT, eltypes)
             broadcasting_matrix(sparse_AT, eltypes)
             mapreduce_matrix(sparse_AT, eltypes)
             linalg(sparse_AT, eltypes)
@@ -151,6 +152,64 @@ function matrix_construction(AT, eltypes)
     end
 end
 
+# Helper function to derive direct matrix formats:
+# Create colptr, rowval, nzval for m x n matrix with 3 values per column
+function csc_vectors(m::Int, n::Int, ::Type{ET}; I::Type{<:Integer}=Int32) where {ET}
+    # Fixed, deterministic 3 nnz per column; random nz values
+    colptr = Vector{I}(undef, n + 1)
+    rowval = Vector{I}()
+    nzval  = Vector{ET}()
+
+    colptr[1] = I(1)
+    nnz_acc = 0
+    for j in 1:n
+        # Magic numbers
+        rows_j = sort(unique(mod.(j .+ (1, 7, 13), m) .+ 1))
+        append!(rowval, I.(rows_j))
+        append!(nzval, rand(ET, length(rows_j)))
+        nnz_acc += length(rows_j)
+        colptr[j + 1] = I(nnz_acc + 1)
+    end
+    return colptr, rowval, nzval
+end
+function csr_vectors(m::Int, n::Int, ::Type{ET}; I::Type{<:Integer}=Int32) where {ET}
+    # Build CSC for (n, m), then interpret as CSR for (m, n)
+    colptr_nm, rowval_nm, nzval_nm = csc_vectors(n, m, ET; I=I)
+    rowptr = colptr_nm
+    colind = rowval_nm
+    nzval  = nzval_nm
+    return rowptr, colind, nzval
+end
+# Construct appropriate sparse arrays
+function construct_sparse_matrix(AT::Type{<:GPUArrays.AbstractGPUSparseMatrixCSC}, ::Type{ET}, m::Int, n::Int; I::Type{<:Integer}=Int32) where {ET}
+    colptr, rowval, nzval = csc_vectors(m, n, ET; I=I)
+    dense_AT = GPUArrays.dense_array_type(AT)
+    d_colptr = dense_AT(colptr)
+    d_rowval = dense_AT(rowval)
+    d_nzval  = dense_AT(nzval)
+    return GPUSparseMatrixCSC(d_colptr, d_rowval, d_nzval, (m, n))
+end
+function construct_sparse_matrix(AT::Type{<:GPUArrays.AbstractGPUSparseMatrixCSR}, ::Type{ET}, m::Int, n::Int; I::Type{<:Integer}=Int32) where {ET}
+    rowptr, colind, nzval = csr_vectors(m, n, ET; I=I)
+    dense_AT = GPUArrays.dense_array_type(AT)
+    d_rowptr = dense_AT(rowptr)
+    d_colind = dense_AT(colind)
+    d_nzval  = dense_AT(nzval)
+    return GPUSparseMatrixCSR(d_rowptr, d_colind, d_nzval, (m, n))
+end
+function direct_vector_construction(AT::Type{<:GPUArrays.AbstractGPUSparseMatrix}, eltypes)
+    for ET in eltypes
+        m = 25
+        n = 35
+        x = construct_sparse_matrix(AT, ET, m, n)
+        @test x isa AT{ET}
+        @test size(x) == (m, n)
+    end
+end
+function direct_vector_construction(AT, eltypes)
+    # NOP
+end
+
 function broadcasting_vector(AT, eltypes)
     dense_AT = GPUArrays.dense_array_type(AT)
     for ET in eltypes

From 2301530e7f5c0f23356020e8fd451ed969c4b369 Mon Sep 17 00:00:00 2001
From: nHackel <nHackel@users.noreply.github.com>
Date: Fri, 23 Jan 2026 16:16:16 +0100
Subject: [PATCH 08/11] Also export BSR function (no matching JLArray atm)

---
 src/host/sparse.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/host/sparse.jl b/src/host/sparse.jl
index c8110f2b..0ed43c65 100644
--- a/src/host/sparse.jl
+++ b/src/host/sparse.jl
@@ -1,6 +1,6 @@
 using LinearAlgebra
 using LinearAlgebra: BlasFloat
-export GPUSparseMatrix, GPUSparseMatrixCSC, GPUSparseMatrixCSR, GPUSparseMatrixCOO
+export GPUSparseMatrix, GPUSparseMatrixCSC, GPUSparseMatrixCSR, GPUSparseMatrixCOO, GPUSparseMatrixBSR
 
 abstract type AbstractGPUSparseArray{Tv, Ti, N} <: AbstractSparseArray{Tv, Ti, N} end
 const AbstractGPUSparseVector{Tv, Ti} = AbstractGPUSparseArray{Tv, Ti, 1}
@@ -15,6 +15,7 @@ GPUSparseMatrix(I::AbstractGPUVector, J::AbstractGPUVector, V::AbstractGPUVector
 GPUSparseMatrixCOO(I::AbstractGPUVector, J::AbstractGPUVector, V::AbstractGPUVector, args...; kwargs...) = sparse(I, J, V, args...; kwargs...)
 function GPUSparseMatrixCSC end
 function GPUSparseMatrixCSR end
+function GPUSparseMatrixBSR end
 
 
 const AbstractGPUSparseVecOrMat = Union{AbstractGPUSparseVector,AbstractGPUSparseMatrix}

From 435f5f5178fe68bbb0ee8d6753eab9e19cd99d4b Mon Sep 17 00:00:00 2001
From: nHackel <nHackel@users.noreply.github.com>
Date: Fri, 13 Mar 2026 15:07:06 +0100
Subject: [PATCH 09/11] Add Atomix dependency for sparse default

---
 Project.toml     | 2 ++
 src/GPUArrays.jl | 1 +
 2 files changed, 3 insertions(+)

diff --git a/Project.toml b/Project.toml
index 00977c2c..35621a57 100644
--- a/Project.toml
+++ b/Project.toml
@@ -4,6 +4,7 @@ version = "11.3.4"
 
 [deps]
 Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
+Atomix = "a9b6321e-bd34-4604-b9c9-b65b8de01458"
 GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527"
 KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
 LLVM = "929cbde3-209d-540e-8aea-75f648917ca0"
@@ -24,6 +25,7 @@ JLD2Ext = "JLD2"
 
 [compat]
 Adapt = "4.0"
+Atomix = "1"
 GPUArraysCore = "= 0.2.0"
 JLD2 = "0.4, 0.5, 0.6"
 KernelAbstractions = "0.9.28, 0.10"
diff --git a/src/GPUArrays.jl b/src/GPUArrays.jl
index a35c1ff0..8afa88e8 100644
--- a/src/GPUArrays.jl
+++ b/src/GPUArrays.jl
@@ -16,6 +16,7 @@ using Reexport
 @reexport using GPUArraysCore
 
 using KernelAbstractions
+using Atomix
 
 # device functionality
 include("device/abstractarray.jl")

From df12698f35a189d531c4ab057f0b947c68c0d606 Mon Sep 17 00:00:00 2001
From: nHackel <nHackel@users.noreply.github.com>
Date: Fri, 13 Mar 2026 15:07:53 +0100
Subject: [PATCH 10/11] Add conversions from :coo to :csc and :csr

---
 src/host/sparse.jl | 50 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 48 insertions(+), 2 deletions(-)

diff --git a/src/host/sparse.jl b/src/host/sparse.jl
index 0ed43c65..17456dff 100644
--- a/src/host/sparse.jl
+++ b/src/host/sparse.jl
@@ -11,11 +11,57 @@ abstract type AbstractGPUSparseMatrixCSR{Tv, Ti} <: AbstractGPUSparseArray{Tv, T
 abstract type AbstractGPUSparseMatrixCOO{Tv, Ti} <: AbstractGPUSparseArray{Tv, Ti, 2} end
 abstract type AbstractGPUSparseMatrixBSR{Tv, Ti} <: AbstractGPUSparseArray{Tv, Ti, 2} end
 
-GPUSparseMatrix(I::AbstractGPUVector, J::AbstractGPUVector, V::AbstractGPUVector, args...; kwargs...) = GPUSparseMatrixCOO(I, J, V, args...; kwargs...)
 GPUSparseMatrixCOO(I::AbstractGPUVector, J::AbstractGPUVector, V::AbstractGPUVector, args...; kwargs...) = sparse(I, J, V, args...; kwargs...)
 function GPUSparseMatrixCSC end
 function GPUSparseMatrixCSR end
-function GPUSparseMatrixBSR end
+function GPUSparseMatrixBSR end 
+
+function compute_sparse_pointers(indices::AbstractGPUVector{T}, n::Integer) where T
+    ptr = similar(indices, T, n + 1)
+    fill!(ptr, zero(T))
+
+    @kernel function count_indices(@Const(indices), ptr)
+        idx = @index(Global, Linear)
+        if idx == 1
+            ptr[idx] = one(T)
+        end
+        Atomix.@atomic ptr[indices[idx] + 1] += one(T)
+    end
+    
+    backend = get_backend(indices)
+    kernel! = count_indices(backend)
+    kernel!(indices, ptr, ndrange=length(indices))
+    return cumsum(ptr)
+end
+
+function GPUSparseMatrix(I::AbstractGPUVector, J::AbstractGPUVector, V::AbstractGPUVector, dims::NTuple{2, <:Integer}; format = default_sparse_format(I))
+	m, n = dims
+    if format == :coo
+		GPUSparseMatrix(Val(:coo), I, J, V, dims)
+    elseif format == :csc
+        # Create composite key for sorting
+        key = J .* (m + 1) .+ I
+        perm = sortperm(key)
+        
+        ptr = compute_sparse_pointers(J[perm], n)
+        return GPUSparseMatrix(Val(:csc), ptr, I[perm], V[perm], dims)
+    elseif format == :csr
+        # Create composite key for sorting
+        key = I .* (n + 1) .+ J
+        perm = sortperm(key)
+        
+        ptr = compute_sparse_pointers(I[perm], m)
+        return GPUSparseMatrix(Val(:csr), ptr, J[perm], V[perm], dims)
+    else
+        throw(ArgumentError("Conversion to sparse format $format is not implemented"))
+	end
+end
+
+
+GPUSparseMatrix(::Val{:coo}, I, J, V, dims) = GPUSparseMatrixCOO(I, J, V, dims)
+GPUSparseMatrix(::Val{:csc}, colPtr, rowVal, nzVal, dims) = GPUSparseMatrixCSC(colPtr, rowVal, nzVal, dims)
+GPUSparseMatrix(::Val{:csr}, rowPtr, colVal, nzVal, dims) = GPUSparseMatrixCSR(rowPtr, colVal, nzVal, dims)
+#GPUSparseMatrix(::Val{:bsr}, ...) = GPUSparseMatrixBSR(...)
 
 
 const AbstractGPUSparseVecOrMat = Union{AbstractGPUSparseVector,AbstractGPUSparseMatrix}

From 63b3b1aed73dbe526e2a41138899b44d28e233dc Mon Sep 17 00:00:00 2001
From: nHackel <nHackel@users.noreply.github.com>
Date: Fri, 13 Mar 2026 15:35:25 +0100
Subject: [PATCH 11/11] Drop GPUSparseMatrix = sparse(...) to have consistent
 interface between for dims

---
 src/host/sparse.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/host/sparse.jl b/src/host/sparse.jl
index 17456dff..f12cfed1 100644
--- a/src/host/sparse.jl
+++ b/src/host/sparse.jl
@@ -11,7 +11,7 @@ abstract type AbstractGPUSparseMatrixCSR{Tv, Ti} <: AbstractGPUSparseArray{Tv, T
 abstract type AbstractGPUSparseMatrixCOO{Tv, Ti} <: AbstractGPUSparseArray{Tv, Ti, 2} end
 abstract type AbstractGPUSparseMatrixBSR{Tv, Ti} <: AbstractGPUSparseArray{Tv, Ti, 2} end
 
-GPUSparseMatrixCOO(I::AbstractGPUVector, J::AbstractGPUVector, V::AbstractGPUVector, args...; kwargs...) = sparse(I, J, V, args...; kwargs...)
+function GPUSparseMatrixCOO end
 function GPUSparseMatrixCSC end
 function GPUSparseMatrixCSR end
 function GPUSparseMatrixBSR end