|
| 1 | +module MatrixAlgebraKitMooncakeExt |
| 2 | + |
| 3 | +using Mooncake |
| 4 | +using Mooncake: DefaultCtx, CoDual, Dual, NoRData, rrule!!, frule!!, arrayify, @is_primitive |
| 5 | +using MatrixAlgebraKit |
| 6 | +using MatrixAlgebraKit: inv_safe, diagview |
| 7 | +using MatrixAlgebraKit: qr_pullback!, lq_pullback! |
| 8 | +using MatrixAlgebraKit: qr_null_pullback!, lq_null_pullback! |
| 9 | +using MatrixAlgebraKit: eig_pullback!, eigh_pullback! |
| 10 | +using MatrixAlgebraKit: left_polar_pullback!, right_polar_pullback! |
| 11 | +using LinearAlgebra |
| 12 | + |
| 13 | +# two-argument factorizations like LQ, QR, EIG |
| 14 | +for (f, pb, adj) in ( |
| 15 | + (qr_full!, qr_pullback!, :dqr_adjoint), |
| 16 | + (qr_compact!, qr_pullback!, :dqr_adjoint), |
| 17 | + (lq_full!, lq_pullback!, :dlq_adjoint), |
| 18 | + (lq_compact!, lq_pullback!, :dlq_adjoint), |
| 19 | + (eig_full!, eig_pullback!, :deig_adjoint), |
| 20 | + (eigh_full!, eigh_pullback!, :deigh_adjoint), |
| 21 | + (left_polar!, left_polar_pullback!, :dleft_polar_adjoint), |
| 22 | + (right_polar!, right_polar_pullback!, :dright_polar_adjoint), |
| 23 | + ) |
| 24 | + |
| 25 | + @eval begin |
| 26 | + @is_primitive Mooncake.DefaultCtx Mooncake.ReverseMode Tuple{typeof($f), AbstractMatrix, Tuple{<:AbstractMatrix, <:AbstractMatrix}, MatrixAlgebraKit.AbstractAlgorithm} |
| 27 | + function Mooncake.rrule!!(::CoDual{typeof($f)}, A_dA::CoDual{<:AbstractMatrix}, args_dargs::CoDual, alg_dalg::CoDual{<:MatrixAlgebraKit.AbstractAlgorithm}; kwargs...) |
| 28 | + A, dA = arrayify(A_dA) |
| 29 | + dA .= zero(eltype(A)) |
| 30 | + args = Mooncake.primal(args_dargs) |
| 31 | + dargs = Mooncake.tangent(args_dargs) |
| 32 | + arg1, darg1 = arrayify(args[1], dargs[1]) |
| 33 | + arg2, darg2 = arrayify(args[2], dargs[2]) |
| 34 | + function $adj(::Mooncake.NoRData) |
| 35 | + dA = $pb(dA, A, (arg1, arg2), (darg1, darg2); kwargs...) |
| 36 | + return Mooncake.NoRData(), Mooncake.NoRData(), Mooncake.NoRData(), Mooncake.NoRData() |
| 37 | + end |
| 38 | + args = $f(A, args, Mooncake.primal(alg_dalg); kwargs...) |
| 39 | + darg1 .= zero(eltype(arg1)) |
| 40 | + darg2 .= zero(eltype(arg2)) |
| 41 | + return Mooncake.CoDual(args, dargs), $adj |
| 42 | + end |
| 43 | + end |
| 44 | +end |
| 45 | + |
| 46 | +for (f, f_full, pb, adj) in ( |
| 47 | + (qr_null!, qr_full, qr_null_pullback!, :dqr_null_adjoint), |
| 48 | + (lq_null!, lq_full, lq_null_pullback!, :dlq_null_adjoint), |
| 49 | + ) |
| 50 | + @eval begin |
| 51 | + @is_primitive Mooncake.DefaultCtx Mooncake.ReverseMode Tuple{typeof($f), AbstractMatrix, AbstractMatrix, MatrixAlgebraKit.AbstractAlgorithm} |
| 52 | + function Mooncake.rrule!!(f_df::CoDual{typeof($f)}, A_dA::CoDual{<:AbstractMatrix}, arg_darg::CoDual{<:AbstractMatrix}, alg_dalg::CoDual{<:MatrixAlgebraKit.AbstractAlgorithm}; kwargs...) |
| 53 | + A, dA = arrayify(A_dA) |
| 54 | + Ac = MatrixAlgebraKit.copy_input($f_full, A) |
| 55 | + arg, darg = arrayify(Mooncake.primal(arg_darg), Mooncake.tangent(arg_darg)) |
| 56 | + arg = $f(Ac, arg, Mooncake.primal(alg_dalg)) |
| 57 | + function $adj(::Mooncake.NoRData) |
| 58 | + dA .= zero(eltype(A)) |
| 59 | + $pb(dA, A, arg, darg; kwargs...) |
| 60 | + return Mooncake.NoRData(), Mooncake.NoRData(), Mooncake.NoRData(), Mooncake.NoRData() |
| 61 | + end |
| 62 | + return arg_darg, $adj |
| 63 | + end |
| 64 | + end |
| 65 | +end |
| 66 | + |
| 67 | +@is_primitive Mooncake.DefaultCtx Mooncake.ReverseMode Tuple{typeof(MatrixAlgebraKit.eig_vals!), AbstractMatrix, AbstractVector, MatrixAlgebraKit.AbstractAlgorithm} |
| 68 | +function Mooncake.rrule!!(::CoDual{<:typeof(MatrixAlgebraKit.eig_vals!)}, A_dA::CoDual, D_dD::CoDual, alg_dalg::CoDual; kwargs...) |
| 69 | + # compute primal |
| 70 | + D_ = Mooncake.primal(D_dD) |
| 71 | + dD_ = Mooncake.tangent(D_dD) |
| 72 | + A_ = Mooncake.primal(A_dA) |
| 73 | + dA_ = Mooncake.tangent(A_dA) |
| 74 | + A, dA = arrayify(A_, dA_) |
| 75 | + D, dD = arrayify(D_, dD_) |
| 76 | + dA .= zero(eltype(dA)) |
| 77 | + # update primal |
| 78 | + DV = eig_full(A, Mooncake.primal(alg_dalg); kwargs...) |
| 79 | + V = DV[2] |
| 80 | + dD .= zero(eltype(D)) |
| 81 | + function deig_vals_adjoint(::Mooncake.NoRData) |
| 82 | + PΔV = V' \ Diagonal(dD) |
| 83 | + if eltype(dA) <: Real |
| 84 | + ΔAc = PΔV * V' |
| 85 | + dA .+= real.(ΔAc) |
| 86 | + else |
| 87 | + mul!(dA, PΔV, V', 1, 0) |
| 88 | + end |
| 89 | + return Mooncake.NoRData(), Mooncake.NoRData(), Mooncake.NoRData(), Mooncake.NoRData() |
| 90 | + end |
| 91 | + return Mooncake.CoDual(DV[1].diag, dD_), deig_vals_adjoint |
| 92 | +end |
| 93 | + |
| 94 | +@is_primitive Mooncake.DefaultCtx Mooncake.ReverseMode Tuple{typeof(MatrixAlgebraKit.eigh_vals!), AbstractMatrix, AbstractVector, MatrixAlgebraKit.AbstractAlgorithm} |
| 95 | +function Mooncake.rrule!!(::CoDual{<:typeof(MatrixAlgebraKit.eigh_vals!)}, A_dA::CoDual, D_dD::CoDual, alg_dalg::CoDual; kwargs...) |
| 96 | + # compute primal |
| 97 | + D_ = Mooncake.primal(D_dD) |
| 98 | + dD_ = Mooncake.tangent(D_dD) |
| 99 | + A_ = Mooncake.primal(A_dA) |
| 100 | + dA_ = Mooncake.tangent(A_dA) |
| 101 | + A, dA = arrayify(A_, dA_) |
| 102 | + D, dD = arrayify(D_, dD_) |
| 103 | + DV = eigh_full(A, Mooncake.primal(alg_dalg); kwargs...) |
| 104 | + function deigh_vals_adjoint(::Mooncake.NoRData) |
| 105 | + mul!(dA, DV[2] * Diagonal(real(dD)), DV[2]', 1, 0) |
| 106 | + return Mooncake.NoRData(), Mooncake.NoRData(), Mooncake.NoRData(), Mooncake.NoRData() |
| 107 | + end |
| 108 | + return Mooncake.CoDual(DV[1].diag, dD_), deigh_vals_adjoint |
| 109 | +end |
| 110 | + |
| 111 | + |
| 112 | +for (f, St) in ((svd_full!, :AbstractMatrix), (svd_compact!, :Diagonal)) |
| 113 | + @eval begin |
| 114 | + @is_primitive Mooncake.DefaultCtx Mooncake.ReverseMode Tuple{typeof($f), AbstractMatrix, Tuple{<:AbstractMatrix, <:$St, <:AbstractMatrix}, MatrixAlgebraKit.AbstractAlgorithm} |
| 115 | + function Mooncake.rrule!!(::CoDual{typeof($f)}, A_dA::CoDual, USVᴴ_dUSVᴴ::CoDual, alg_dalg::CoDual; kwargs...) |
| 116 | + A, dA = arrayify(A_dA) |
| 117 | + USVᴴ = Mooncake.primal(USVᴴ_dUSVᴴ) |
| 118 | + dUSVᴴ = Mooncake.tangent(USVᴴ_dUSVᴴ) |
| 119 | + U, dU = arrayify(USVᴴ[1], dUSVᴴ[1]) |
| 120 | + S, dS = arrayify(USVᴴ[2], dUSVᴴ[2]) |
| 121 | + Vᴴ, dVᴴ = arrayify(USVᴴ[3], dUSVᴴ[3]) |
| 122 | + USVᴴ = $f(A, USVᴴ, Mooncake.primal(alg_dalg); kwargs...) |
| 123 | + function dsvd_adjoint(::Mooncake.NoRData) |
| 124 | + dA .= zero(eltype(A)) |
| 125 | + minmn = min(size(A)...) |
| 126 | + if size(U, 2) == size(Vᴴ, 1) == minmn # compact |
| 127 | + dA = MatrixAlgebraKit.svd_pullback!(dA, A, (U, S, Vᴴ), (dU, dS, dVᴴ)) |
| 128 | + else # full |
| 129 | + vU = view(U, :, 1:minmn) |
| 130 | + vS = Diagonal(diagview(S)[1:minmn]) |
| 131 | + vVᴴ = view(Vᴴ, 1:minmn, :) |
| 132 | + vdU = view(dU, :, 1:minmn) |
| 133 | + vdS = Diagonal(diagview(dS)[1:minmn]) |
| 134 | + vdVᴴ = view(dVᴴ, 1:minmn, :) |
| 135 | + dA = MatrixAlgebraKit.svd_pullback!(dA, A, (vU, vS, vVᴴ), (vdU, vdS, vdVᴴ)) |
| 136 | + end |
| 137 | + return Mooncake.NoRData(), Mooncake.NoRData(), Mooncake.NoRData(), Mooncake.NoRData() |
| 138 | + end |
| 139 | + return Mooncake.CoDual(USVᴴ, dUSVᴴ), dsvd_adjoint |
| 140 | + end |
| 141 | + end |
| 142 | +end |
| 143 | + |
| 144 | +@is_primitive Mooncake.DefaultCtx Mooncake.ReverseMode Tuple{typeof(MatrixAlgebraKit.svd_vals!), AbstractMatrix, AbstractVector, MatrixAlgebraKit.AbstractAlgorithm} |
| 145 | +function Mooncake.rrule!!(::CoDual{<:typeof(MatrixAlgebraKit.svd_vals!)}, A_dA::CoDual, S_dS::CoDual, alg_dalg::CoDual; kwargs...) |
| 146 | + # compute primal |
| 147 | + S_ = Mooncake.primal(S_dS) |
| 148 | + dS_ = Mooncake.tangent(S_dS) |
| 149 | + A_ = Mooncake.primal(A_dA) |
| 150 | + dA_ = Mooncake.tangent(A_dA) |
| 151 | + A, dA = arrayify(A_, dA_) |
| 152 | + S, dS = arrayify(S_, dS_) |
| 153 | + U, nS, Vᴴ = svd_compact(A, Mooncake.primal(alg_dalg); kwargs...) |
| 154 | + S .= diagview(nS) |
| 155 | + dS .= zero(eltype(S)) |
| 156 | + function dsvd_vals_adjoint(::Mooncake.NoRData) |
| 157 | + dA .= U * Diagonal(dS) * Vᴴ |
| 158 | + return Mooncake.NoRData(), Mooncake.NoRData(), Mooncake.NoRData(), Mooncake.NoRData() |
| 159 | + end |
| 160 | + return S_dS, dsvd_vals_adjoint |
| 161 | +end |
| 162 | + |
| 163 | +end |
0 commit comments