Reverse rules for Enzyme

kshyatt · kshyatt · commit 7edcc830e7f2 · 2025-10-22T20:09:44.000+02:00
diff --git a/Project.toml b/Project.toml
@@ -10,18 +10,22 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
 ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
 
 [extensions]
 MatrixAlgebraKitChainRulesCoreExt = "ChainRulesCore"
 MatrixAlgebraKitAMDGPUExt = "AMDGPU"
 MatrixAlgebraKitCUDAExt = "CUDA"
+MatrixAlgebraKitEnzymeExt = "Enzyme"
 
 [compat]
 AMDGPU = "2"
 Aqua = "0.6, 0.7, 0.8"
 ChainRulesCore = "1"
 ChainRulesTestUtils = "1"
 CUDA = "5"
+Enzyme = "0.13.77"
+EnzymeTestUtils = "0.2.3"
 JET = "0.9, 0.10"
 LinearAlgebra = "1"
 SafeTestsets = "0.1"
@@ -34,6 +38,7 @@ julia = "1.10"
 [extras]
 Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
 ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a"
+EnzymeTestUtils = "12d8515a-0907-448a-8884-5fe00fdf1c5a"
 JET = "c3a54625-cd67-489e-a8e7-0a5a0ff4e31b"
 SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f"
 StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
@@ -42,4 +47,4 @@ TestExtras = "5ed8adda-3752-4e41-b88a-e8b09835ee3a"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [targets]
-test = ["Aqua", "JET", "SafeTestsets", "Test", "TestExtras","ChainRulesCore", "ChainRulesTestUtils", "StableRNGs", "Zygote", "CUDA", "AMDGPU"]
+test = ["Aqua", "JET", "SafeTestsets", "Test", "TestExtras","ChainRulesCore", "ChainRulesTestUtils", "StableRNGs", "Zygote", "CUDA", "AMDGPU", "Enzyme", "EnzymeTestUtils"]
diff --git a/ext/MatrixAlgebraKitEnzymeExt/MatrixAlgebraKitEnzymeExt.jl b/ext/MatrixAlgebraKitEnzymeExt/MatrixAlgebraKitEnzymeExt.jl
diff --git a/src/common/view.jl b/src/common/view.jl
@@ -1,5 +1,5 @@
 # diagind: provided by LinearAlgebra.jl
-diagview(D::Diagonal) = D.diag
+diagview(D::Diagonal)       = D.diag
 diagview(D::AbstractMatrix) = view(D, diagind(D))
 
 # triangularind
diff --git a/src/implementations/eigh.jl b/src/implementations/eigh.jl
@@ -19,7 +19,7 @@ function check_hermitian(A; atol::Real = default_hermitian_tol(A), rtol::Real =
 end
 
 function check_input(::typeof(eigh_full!), A::AbstractMatrix, DV, alg::AbstractAlgorithm)
-    check_hermitian(A, alg)
+    #check_hermitian(A, alg)
     D, V = DV
     m = size(A, 1)
     @assert D isa Diagonal && V isa AbstractMatrix
diff --git a/src/implementations/svd.jl b/src/implementations/svd.jl
@@ -89,7 +89,7 @@ end
 function initialize_output(::typeof(svd_vals!), A::AbstractMatrix, ::AbstractAlgorithm)
     return similar(A, real(eltype(A)), (min(size(A)...),))
 end
-function initialize_output(::typeof(svd_trunc!), A, alg::TruncatedAlgorithm)
+function initialize_output(::typeof(svd_trunc!), A::AbstractMatrix, alg::TruncatedAlgorithm)
     return initialize_output(svd_compact!, A, alg.alg)
 end
 
@@ -347,46 +347,25 @@ function _gpu_gesvdj!(
     )
     throw(MethodError(_gpu_gesvdj!, (A, S, U, Vᴴ)))
 end
-function _gpu_gesvd_maybe_transpose!(A::AbstractMatrix, S::AbstractVector, U::AbstractMatrix, Vᴴ::AbstractMatrix)
-    m, n = size(A)
-    m ≥ n && return _gpu_gesvd!(A, S, U, Vᴴ)
-    # both CUSOLVER and ROCSOLVER require m ≥ n for gesvd (QR_Iteration)
-    # if this condition is not met, do the SVD via adjoint
-    minmn = min(m, n)
-    Aᴴ = min(m, n) > 0 ? adjoint!(similar(A'), A)::AbstractMatrix : similar(A')
-    Uᴴ = similar(U')
-    V = similar(Vᴴ')
-    if size(U) == (m, m)
-        _gpu_gesvd!(Aᴴ, view(S, 1:minmn, 1), V, Uᴴ)
-    else
-        _gpu_gesvd!(Aᴴ, S, V, Uᴴ)
-    end
-    length(U) > 0 && adjoint!(U, Uᴴ)
-    length(Vᴴ) > 0 && adjoint!(Vᴴ, V)
-    return U, S, Vᴴ
-end
-
 # GPU SVD implementation
-function svd_full!(A::AbstractMatrix, USVᴴ, alg::GPU_SVDAlgorithm)
+function MatrixAlgebraKit.svd_full!(A::AbstractMatrix, USVᴴ, alg::GPU_SVDAlgorithm)
     check_input(svd_full!, A, USVᴴ, alg)
     U, S, Vᴴ = USVᴴ
     fill!(S, zero(eltype(S)))
     m, n = size(A)
     minmn = min(m, n)
-    if minmn == 0
-        one!(U)
-        zero!(S)
-        one!(Vᴴ)
-        return USVᴴ
-    end
     if alg isa GPU_QRIteration
         isempty(alg.kwargs) ||
-            @warn "GPU_QRIteration does not accept any keyword arguments"
-        _gpu_gesvd_maybe_transpose!(A, view(S, 1:minmn, 1), U, Vᴴ)
+            throw(ArgumentError("GPU_QRIteration does not accept any keyword arguments"))
+        _gpu_gesvd!(A, view(S, 1:minmn, 1), U, Vᴴ)
     elseif alg isa GPU_SVDPolar
         _gpu_Xgesvdp!(A, view(S, 1:minmn, 1), U, Vᴴ; alg.kwargs...)
     elseif alg isa GPU_Jacobi
         _gpu_gesvdj!(A, view(S, 1:minmn, 1), U, Vᴴ; alg.kwargs...)
+        # elseif alg isa LAPACK_Bisection
+        #     throw(ArgumentError("LAPACK_Bisection is not supported for full SVD"))
+        # elseif alg isa LAPACK_Jacobi
+        #     throw(ArgumentError("LAPACK_Bisection is not supported for full SVD"))
     else
         throw(ArgumentError("Unsupported SVD algorithm"))
     end
@@ -403,21 +382,16 @@ function svd_trunc!(A::AbstractMatrix, USVᴴ, alg::TruncatedAlgorithm{<:GPU_Ran
     _gpu_Xgesvdr!(A, S.diag, U, Vᴴ; alg.alg.kwargs...)
     # TODO: make this controllable using a `gaugefix` keyword argument
     gaugefix!(svd_trunc!, U, S, Vᴴ, size(A)...)
-    # TODO: make sure that truncation is based on maxrank, otherwise this might be wrong
-    USVᴴtrunc, ind = truncate(svd_trunc!, (U, S, Vᴴ), alg.trunc)
-    Strunc = diagview(USVᴴtrunc[2])
-    # normal `truncation_error!` does not work here since `S` is not the full singular value spectrum
-    ϵ = sqrt(norm(A)^2 - norm(Strunc)^2) # is there a more accurate way to do this?
-    return USVᴴtrunc..., ϵ
+    return first(truncate(svd_trunc!, USVᴴ, alg.trunc))
 end
 
-function svd_compact!(A::AbstractMatrix, USVᴴ, alg::GPU_SVDAlgorithm)
+function MatrixAlgebraKit.svd_compact!(A::AbstractMatrix, USVᴴ, alg::GPU_SVDAlgorithm)
     check_input(svd_compact!, A, USVᴴ, alg)
     U, S, Vᴴ = USVᴴ
     if alg isa GPU_QRIteration
         isempty(alg.kwargs) ||
-            @warn "GPU_QRIteration does not accept any keyword arguments"
-        _gpu_gesvd_maybe_transpose!(A, S.diag, U, Vᴴ)
+            throw(ArgumentError("GPU_QRIteration does not accept any keyword arguments"))
+        _gpu_gesvd!(A, S.diag, U, Vᴴ)
     elseif alg isa GPU_SVDPolar
         _gpu_Xgesvdp!(A, S.diag, U, Vᴴ; alg.kwargs...)
     elseif alg isa GPU_Jacobi
@@ -437,8 +411,8 @@ function MatrixAlgebraKit.svd_vals!(A::AbstractMatrix, S, alg::GPU_SVDAlgorithm)
     U, Vᴴ = similar(A, (0, 0)), similar(A, (0, 0))
     if alg isa GPU_QRIteration
         isempty(alg.kwargs) ||
-            @warn "GPU_QRIteration does not accept any keyword arguments"
-        _gpu_gesvd_maybe_transpose!(A, S, U, Vᴴ)
+            throw(ArgumentError("GPU_QRIteration does not accept any keyword arguments"))
+        _gpu_gesvd!(A, S, U, Vᴴ)
     elseif alg isa GPU_SVDPolar
         _gpu_Xgesvdp!(A, S, U, Vᴴ; alg.kwargs...)
     elseif alg isa GPU_Jacobi
diff --git a/src/pullbacks/eig.jl b/src/pullbacks/eig.jl
@@ -48,7 +48,8 @@ function eig_pullback!(
         Δgauge < gauge_atol ||
             @warn "`eig` cotangents sensitive to gauge choice: (|Δgauge| = $Δgauge)"
 
-        VᴴΔV .*= conj.(inv_safe.(transpose(D) .- D, degeneracy_atol))
+        VᴴΔV ./= conj.(transpose(D) .- D)
+        diagview(VᴴΔV) .= zero(eltype(VᴴΔV))
 
         if !iszerotangent(ΔDmat)
             ΔDvec = diagview(ΔDmat)
diff --git a/src/pullbacks/polar.jl b/src/pullbacks/polar.jl
@@ -4,7 +4,7 @@
 Adds the pullback from the left polar decomposition of `A` to `ΔA` given the output `WP` and
 cotangent `ΔWP` of `left_polar(A)`.
 """
-function left_polar_pullback!(ΔA::AbstractMatrix, A, WP, ΔWP)
+function left_polar_pullback!(ΔA::AbstractMatrix, A, WP, ΔWP; kwargs...)
     # Extract the Polar components
     W, P = WP
 
@@ -34,7 +34,7 @@ end
 Adds the pullback from the left polar decomposition of `A` to `ΔA` given the output `PWᴴ`
 and cotangent `ΔPWᴴ` of `right_polar(A)`.
 """
-function right_polar_pullback!(ΔA::AbstractMatrix, A, PWᴴ, ΔPWᴴ)
+function right_polar_pullback!(ΔA::AbstractMatrix, A, PWᴴ, ΔPWᴴ; kwargs...)
     # Extract the Polar components
     P, Wᴴ = PWᴴ
 
diff --git a/src/pullbacks/svd.jl b/src/pullbacks/svd.jl
@@ -28,14 +28,13 @@ function svd_pullback!(
         degeneracy_atol::Real = tol,
         gauge_atol::Real = tol
     )
-
     # Extract the SVD components
     U, Smat, Vᴴ = USVᴴ
     m, n = size(U, 1), size(Vᴴ, 2)
-    (m, n) == size(ΔA) || throw(DimensionMismatch())
+    (m, n) == size(ΔA) || throw(DimensionMismatch("size of ΔA ($(size(ΔA))) does not match size of U*S*Vᴴ ($m, $n)"))
     minmn = min(m, n)
     S = diagview(Smat)
-    length(S) == minmn || throw(DimensionMismatch())
+    length(S) == minmn || throw(DimensionMismatch("length of S ($(length(S))) does not matrix minimum dimension of U, Vᴴ ($minmn)"))
     r = searchsortedlast(S, rank_atol; rev = true) # rank
     Ur = view(U, :, 1:r)
     Vᴴr = view(Vᴴ, 1:r, :)
@@ -46,22 +45,22 @@ function svd_pullback!(
     UΔU = fill!(similar(U, (r, r)), 0)
     VΔV = fill!(similar(Vᴴ, (r, r)), 0)
     if !iszerotangent(ΔU)
-        m == size(ΔU, 1) || throw(DimensionMismatch())
+        m == size(ΔU, 1) || throw(DimensionMismatch("first dimension of ΔU ($(size(ΔU, 1))) does not match first dimension of U ($m)"))
         pU = size(ΔU, 2)
-        pU > r && throw(DimensionMismatch())
+        pU > r && throw(DimensionMismatch("second dimension of ΔU ($(size(ΔU, 2))) does not match rank of S ($r)"))
         indU = axes(U, 2)[ind]
-        length(indU) == pU || throw(DimensionMismatch())
+        length(indU) == pU || throw(DimensionMismatch("length of selected U columns ($(length(indU))) does not match second dimension of ΔU ($(size(ΔU, 2)))"))
         UΔUp = view(UΔU, :, indU)
         mul!(UΔUp, Ur', ΔU)
         # ΔU -= Ur * UΔUp but one less allocation without overwriting ΔU
         ΔU = mul!(copy(ΔU), Ur, UΔUp, -1, 1)
     end
     if !iszerotangent(ΔVᴴ)
-        n == size(ΔVᴴ, 2) || throw(DimensionMismatch())
+        n == size(ΔVᴴ, 2) || throw(DimensionMismatch("second dimension of ΔVᴴ ($(size(ΔVᴴ, 2))) does not match second dimension of Vᴴ ($n)"))
         pV = size(ΔVᴴ, 1)
-        pV > r && throw(DimensionMismatch())
+        pV > r && throw(DimensionMismatch("first dimension of ΔVᴴ ($(size(ΔVᴴ, 1))) does not match rank of S ($r)"))
         indV = axes(Vᴴ, 1)[ind]
-        length(indV) == pV || throw(DimensionMismatch())
+        length(indV) == pV || throw(DimensionMismatch("length of selected Vᴴ rows ($(length(indV))) does not match first dimension of ΔVᴴ ($(size(ΔVᴴ, 1)))"))
         VΔVp = view(VΔV, :, indV)
         mul!(VΔVp, Vᴴr, ΔVᴴ')
         # ΔVᴴ -= VΔVp' * Vᴴr but one less allocation without overwriting ΔVᴴ
@@ -84,7 +83,7 @@ function svd_pullback!(
         ΔS = diagview(ΔSmat)
         pS = length(ΔS)
         indS = axes(S, 1)[ind]
-        length(indS) == pS || throw(DimensionMismatch())
+        length(indS) == pS || throw(DimensionMismatch("length of selected S diagonals ($(length(indS))) does not match length of ΔS diagonal ($(length(ΔS)))"))
         view(diagview(UdΔAV), indS) .+= real.(ΔS)
     end
     ΔA = mul!(ΔA, Ur, UdΔAV * Vᴴr, 1, 1) # add the contribution to ΔA
diff --git a/test/ad_utils.jl b/test/ad_utils.jl
@@ -0,0 +1,26 @@
+function remove_svdgauge_dependence!(ΔU, ΔVᴴ, U, S, Vᴴ;
+                                  degeneracy_atol=MatrixAlgebraKit.default_pullback_gaugetol(S))
+    gaugepart = U' * ΔU + Vᴴ * ΔVᴴ'
+    gaugepart = (gaugepart - gaugepart') / 2
+    gaugepart[abs.(transpose(diagview(S)) .- diagview(S)) .>= degeneracy_atol] .= 0
+    mul!(ΔU, U, gaugepart, -1, 1)
+    return ΔU, ΔVᴴ
+end
+function remove_eiggauge_dependence!(ΔV, D, V;
+                                  degeneracy_atol=MatrixAlgebraKit.default_pullback_gaugetol(S))
+    gaugepart = V' * ΔV
+    gaugepart[abs.(transpose(diagview(D)) .- diagview(D)) .>= degeneracy_atol] .= 0
+    mul!(ΔV, V / (V' * V), gaugepart, -1, 1)
+    return ΔV
+end
+function remove_eighgauge_dependence!(ΔV, D, V;
+                                   degeneracy_atol=MatrixAlgebraKit.default_pullback_gaugetol(S))
+    gaugepart = V' * ΔV
+    gaugepart = (gaugepart - gaugepart') / 2
+    gaugepart[abs.(transpose(diagview(D)) .- diagview(D)) .>= degeneracy_atol] .= 0
+    mul!(ΔV, V / (V' * V), gaugepart, -1, 1)
+    return ΔV
+end
+
+precision(::Type{<:Union{Float32,Complex{Float32}}}) = sqrt(eps(Float32))
+precision(::Type{<:Union{Float64,Complex{Float64}}}) = sqrt(eps(Float64))
diff --git a/test/chainrules.jl b/test/chainrules.jl
@@ -6,38 +6,7 @@ using ChainRulesCore, ChainRulesTestUtils, Zygote
 using MatrixAlgebraKit: diagview, TruncatedAlgorithm, PolarViaSVD
 using LinearAlgebra: UpperTriangular, Diagonal, Hermitian, mul!
 
-function remove_svdgauge_dependence!(
-        ΔU, ΔVᴴ, U, S, Vᴴ;
-        degeneracy_atol = MatrixAlgebraKit.default_pullback_gaugetol(S)
-    )
-    gaugepart = U' * ΔU + Vᴴ * ΔVᴴ'
-    gaugepart = (gaugepart - gaugepart') / 2
-    gaugepart[abs.(transpose(diagview(S)) .- diagview(S)) .>= degeneracy_atol] .= 0
-    mul!(ΔU, U, gaugepart, -1, 1)
-    return ΔU, ΔVᴴ
-end
-function remove_eiggauge_dependence!(
-        ΔV, D, V;
-        degeneracy_atol = MatrixAlgebraKit.default_pullback_gaugetol(D)
-    )
-    gaugepart = V' * ΔV
-    gaugepart[abs.(transpose(diagview(D)) .- diagview(D)) .>= degeneracy_atol] .= 0
-    mul!(ΔV, V / (V' * V), gaugepart, -1, 1)
-    return ΔV
-end
-function remove_eighgauge_dependence!(
-        ΔV, D, V;
-        degeneracy_atol = MatrixAlgebraKit.default_pullback_gaugetol(D)
-    )
-    gaugepart = V' * ΔV
-    gaugepart = (gaugepart - gaugepart') / 2
-    gaugepart[abs.(transpose(diagview(D)) .- diagview(D)) .>= degeneracy_atol] .= 0
-    mul!(ΔV, V, gaugepart, -1, 1)
-    return ΔV
-end
-
-precision(::Type{<:Union{Float32, Complex{Float32}}}) = sqrt(eps(Float32))
-precision(::Type{<:Union{Float64, Complex{Float64}}}) = sqrt(eps(Float64))
+include("ad_utils.jl")
 
 for f in
     (
diff --git a/test/enzyme.jl b/test/enzyme.jl
diff --git a/test/runtests.jl b/test/runtests.jl