|
| 1 | +module MatrixAlgebraKitMooncakeExt |
| 2 | + |
| 3 | +using Mooncake |
| 4 | +using Mooncake: DefaultCtx, CoDual, Dual, NoRData, rrule!!, frule!!, arrayify, @is_primitive |
| 5 | +using MatrixAlgebraKit |
| 6 | +using MatrixAlgebraKit: inv_safe, diagview, copy_input |
| 7 | +using MatrixAlgebraKit: qr_pullback!, lq_pullback! |
| 8 | +using MatrixAlgebraKit: qr_null_pullback!, lq_null_pullback! |
| 9 | +using MatrixAlgebraKit: eig_pullback!, eigh_pullback! |
| 10 | +using MatrixAlgebraKit: left_polar_pullback!, right_polar_pullback! |
| 11 | +using MatrixAlgebraKit: svd_pullback! |
| 12 | +using LinearAlgebra |
| 13 | + |
| 14 | + |
| 15 | +@is_primitive Mooncake.DefaultCtx Mooncake.ReverseMode Tuple{typeof(copy_input), Any, Any} |
| 16 | +function Mooncake.rrule!!(::CoDual{typeof(copy_input)}, f_df::CoDual, A_dA::CoDual) |
| 17 | + Ac = copy_input(Mooncake.primal(f_df), Mooncake.primal(A_dA)) |
| 18 | + dAc = Mooncake.zero_tangent(Ac) |
| 19 | + function copy_input_pb(::Mooncake.NoRData) |
| 20 | + Mooncake.increment!!(Mooncake.tangent(A_dA), dAc) |
| 21 | + return Mooncake.NoRData(), Mooncake.NoRData(), Mooncake.NoRData() |
| 22 | + end |
| 23 | + return CoDual(Ac, dAc), copy_input_pb |
| 24 | +end |
| 25 | + |
| 26 | +# two-argument factorizations like LQ, QR, EIG |
| 27 | +for (f, pb, adj) in ( |
| 28 | + (qr_full!, qr_pullback!, :dqr_adjoint), |
| 29 | + (qr_compact!, qr_pullback!, :dqr_adjoint), |
| 30 | + (lq_full!, lq_pullback!, :dlq_adjoint), |
| 31 | + (lq_compact!, lq_pullback!, :dlq_adjoint), |
| 32 | + (eig_full!, eig_pullback!, :deig_adjoint), |
| 33 | + (eigh_full!, eigh_pullback!, :deigh_adjoint), |
| 34 | + (left_polar!, left_polar_pullback!, :dleft_polar_adjoint), |
| 35 | + (right_polar!, right_polar_pullback!, :dright_polar_adjoint), |
| 36 | + ) |
| 37 | + |
| 38 | + @eval begin |
| 39 | + @is_primitive Mooncake.DefaultCtx Mooncake.ReverseMode Tuple{typeof($f), Any, Tuple{<:Any, <:Any}, MatrixAlgebraKit.AbstractAlgorithm} |
| 40 | + function Mooncake.rrule!!(::CoDual{typeof($f)}, A_dA::CoDual, args_dargs::CoDual, alg_dalg::CoDual{<:MatrixAlgebraKit.AbstractAlgorithm}; kwargs...) |
| 41 | + A, dA = arrayify(A_dA) |
| 42 | + args = Mooncake.primal(args_dargs) |
| 43 | + dargs = Mooncake.tangent(args_dargs) |
| 44 | + arg1, darg1 = arrayify(args[1], dargs[1]) |
| 45 | + arg2, darg2 = arrayify(args[2], dargs[2]) |
| 46 | + Ac = copy(A) |
| 47 | + arg1c = copy(arg1) |
| 48 | + arg2c = copy(arg2) |
| 49 | + output = $f(A, args, Mooncake.primal(alg_dalg); kwargs...) |
| 50 | + function $adj(::Mooncake.NoRData) |
| 51 | + $pb(dA, A, (arg1, arg2), (darg1, darg2); kwargs...) |
| 52 | + arg1 .= arg1c |
| 53 | + arg2 .= arg2c |
| 54 | + A .= Ac |
| 55 | + MatrixAlgebraKit.zero!(darg1) |
| 56 | + MatrixAlgebraKit.zero!(darg2) |
| 57 | + return Mooncake.NoRData(), Mooncake.NoRData(), Mooncake.NoRData(), Mooncake.NoRData() |
| 58 | + end |
| 59 | + return Mooncake.CoDual(args, dargs), $adj |
| 60 | + end |
| 61 | + end |
| 62 | +end |
| 63 | + |
| 64 | +for (f, f_full, pb, adj) in ( |
| 65 | + (qr_null!, qr_full, qr_null_pullback!, :dqr_null_adjoint), |
| 66 | + (lq_null!, lq_full, lq_null_pullback!, :dlq_null_adjoint), |
| 67 | + ) |
| 68 | + @eval begin |
| 69 | + @is_primitive Mooncake.DefaultCtx Mooncake.ReverseMode Tuple{typeof($f), Any, Any, MatrixAlgebraKit.AbstractAlgorithm} |
| 70 | + function Mooncake.rrule!!(f_df::CoDual{typeof($f)}, A_dA::CoDual, arg_darg::CoDual, alg_dalg::CoDual{<:MatrixAlgebraKit.AbstractAlgorithm}; kwargs...) |
| 71 | + A, dA = arrayify(A_dA) |
| 72 | + Ac = copy(A) |
| 73 | + arg, darg = arrayify(arg_darg) |
| 74 | + argc = copy(arg) |
| 75 | + # WHY is this copy needed? |
| 76 | + arg = $f(copy(A), arg, Mooncake.primal(alg_dalg)) |
| 77 | + function $adj(::Mooncake.NoRData) |
| 78 | + $pb(dA, A, arg, darg; kwargs...) |
| 79 | + A .= Ac |
| 80 | + arg .= argc |
| 81 | + MatrixAlgebraKit.zero!(darg) |
| 82 | + return Mooncake.NoRData(), Mooncake.NoRData(), Mooncake.NoRData(), Mooncake.NoRData() |
| 83 | + end |
| 84 | + return arg_darg, $adj |
| 85 | + end |
| 86 | + end |
| 87 | +end |
| 88 | + |
| 89 | +@is_primitive Mooncake.DefaultCtx Mooncake.ReverseMode Tuple{typeof(MatrixAlgebraKit.eig_vals!), Any, Any, MatrixAlgebraKit.AbstractAlgorithm} |
| 90 | +function Mooncake.rrule!!(::CoDual{<:typeof(MatrixAlgebraKit.eig_vals!)}, A_dA::CoDual, D_dD::CoDual, alg_dalg::CoDual; kwargs...) |
| 91 | + # compute primal |
| 92 | + D_ = Mooncake.primal(D_dD) |
| 93 | + dD_ = Mooncake.tangent(D_dD) |
| 94 | + A_ = Mooncake.primal(A_dA) |
| 95 | + dA_ = Mooncake.tangent(A_dA) |
| 96 | + A, dA = arrayify(A_, dA_) |
| 97 | + D, dD = arrayify(D_, dD_) |
| 98 | + Ac = copy(A) |
| 99 | + Dc = copy(D) |
| 100 | + # update primal |
| 101 | + DV = eig_full(A, Mooncake.primal(alg_dalg); kwargs...) |
| 102 | + V = DV[2] |
| 103 | + eig_vals!(A, D, Mooncake.primal(alg_dalg)) |
| 104 | + function deig_vals_adjoint(::Mooncake.NoRData) |
| 105 | + A .= Ac |
| 106 | + eig_pullback!(dA, A, (D, V), (dD, nothing); kwargs...) |
| 107 | + D .= Dc |
| 108 | + MatrixAlgebraKit.zero!(dD) |
| 109 | + return Mooncake.NoRData(), Mooncake.NoRData(), Mooncake.NoRData(), Mooncake.NoRData() |
| 110 | + end |
| 111 | + return D_dD, deig_vals_adjoint |
| 112 | +end |
| 113 | + |
| 114 | +@is_primitive Mooncake.DefaultCtx Mooncake.ReverseMode Tuple{typeof(MatrixAlgebraKit.eigh_vals!), Any, Any, MatrixAlgebraKit.AbstractAlgorithm} |
| 115 | +function Mooncake.rrule!!(::CoDual{<:typeof(MatrixAlgebraKit.eigh_vals!)}, A_dA::CoDual, D_dD::CoDual, alg_dalg::CoDual; kwargs...) |
| 116 | + # compute primal |
| 117 | + D_ = Mooncake.primal(D_dD) |
| 118 | + dD_ = Mooncake.tangent(D_dD) |
| 119 | + A_ = Mooncake.primal(A_dA) |
| 120 | + dA_ = Mooncake.tangent(A_dA) |
| 121 | + A, dA = arrayify(A_, dA_) |
| 122 | + Ac = copy(A) |
| 123 | + D, dD = arrayify(D_, dD_) |
| 124 | + Dc = copy(D) |
| 125 | + DV = eigh_full(A, Mooncake.primal(alg_dalg); kwargs...) |
| 126 | + D .= diagview(DV[1]) |
| 127 | + V = DV[2] |
| 128 | + function deigh_vals_adjoint(::Mooncake.NoRData) |
| 129 | + A .= Ac |
| 130 | + eigh_pullback!(dA, A, (D, V), (dD, nothing); kwargs...) |
| 131 | + D .= Dc |
| 132 | + MatrixAlgebraKit.zero!(dD) |
| 133 | + return Mooncake.NoRData(), Mooncake.NoRData(), Mooncake.NoRData(), Mooncake.NoRData() |
| 134 | + end |
| 135 | + return Mooncake.CoDual(DV[1].diag, dD_), deigh_vals_adjoint |
| 136 | +end |
| 137 | + |
| 138 | + |
| 139 | +for f in (svd_full!, svd_compact!) |
| 140 | + @eval begin |
| 141 | + @is_primitive Mooncake.DefaultCtx Mooncake.ReverseMode Tuple{typeof($f), AbstractMatrix, Tuple{<:Any, <:Any, <:Any}, MatrixAlgebraKit.AbstractAlgorithm} |
| 142 | + function Mooncake.rrule!!(::CoDual{typeof($f)}, A_dA::CoDual, USVᴴ_dUSVᴴ::CoDual, alg_dalg::CoDual; kwargs...) |
| 143 | + A, dA = arrayify(A_dA) |
| 144 | + Ac = copy(A) |
| 145 | + USVᴴ = Mooncake.primal(USVᴴ_dUSVᴴ) |
| 146 | + dUSVᴴ = Mooncake.tangent(USVᴴ_dUSVᴴ) |
| 147 | + U, dU = arrayify(USVᴴ[1], dUSVᴴ[1]) |
| 148 | + S, dS = arrayify(USVᴴ[2], dUSVᴴ[2]) |
| 149 | + Vᴴ, dVᴴ = arrayify(USVᴴ[3], dUSVᴴ[3]) |
| 150 | + Uc = copy(U) |
| 151 | + Sc = copy(S) |
| 152 | + Vᴴc = copy(Vᴴ) |
| 153 | + USVᴴ = $f(A, USVᴴ, Mooncake.primal(alg_dalg); kwargs...) |
| 154 | + minmn = min(size(A)...) |
| 155 | + function dsvd_adjoint(::Mooncake.NoRData) |
| 156 | + A .= Ac |
| 157 | + if ($f == svd_compact!) |
| 158 | + svd_pullback!(dA, A, (U, S, Vᴴ), (dU, dS, dVᴴ)) |
| 159 | + else # full |
| 160 | + vU = view(U, :, 1:minmn) |
| 161 | + vS = Diagonal(diagview(S)[1:minmn]) |
| 162 | + vVᴴ = view(Vᴴ, 1:minmn, :) |
| 163 | + vdU = view(dU, :, 1:minmn) |
| 164 | + vdS = Diagonal(diagview(dS)[1:minmn]) |
| 165 | + vdVᴴ = view(dVᴴ, 1:minmn, :) |
| 166 | + svd_pullback!(dA, A, (vU, vS, vVᴴ), (vdU, vdS, vdVᴴ)) |
| 167 | + end |
| 168 | + U .= Uc |
| 169 | + S .= Sc |
| 170 | + Vᴴ .= Vᴴc |
| 171 | + MatrixAlgebraKit.zero!(dU) |
| 172 | + MatrixAlgebraKit.zero!(dS) |
| 173 | + MatrixAlgebraKit.zero!(dVᴴ) |
| 174 | + return Mooncake.NoRData(), Mooncake.NoRData(), Mooncake.NoRData(), Mooncake.NoRData() |
| 175 | + end |
| 176 | + return Mooncake.CoDual(USVᴴ, dUSVᴴ), dsvd_adjoint |
| 177 | + end |
| 178 | + end |
| 179 | +end |
| 180 | + |
| 181 | +@is_primitive Mooncake.DefaultCtx Mooncake.ReverseMode Tuple{typeof(MatrixAlgebraKit.svd_vals!), Any, Any, MatrixAlgebraKit.AbstractAlgorithm} |
| 182 | +function Mooncake.rrule!!(::CoDual{<:typeof(MatrixAlgebraKit.svd_vals!)}, A_dA::CoDual, S_dS::CoDual, alg_dalg::CoDual; kwargs...) |
| 183 | + # compute primal |
| 184 | + S_ = Mooncake.primal(S_dS) |
| 185 | + dS_ = Mooncake.tangent(S_dS) |
| 186 | + A_ = Mooncake.primal(A_dA) |
| 187 | + dA_ = Mooncake.tangent(A_dA) |
| 188 | + A, dA = arrayify(A_, dA_) |
| 189 | + S, dS = arrayify(S_, dS_) |
| 190 | + Ac = copy(A) |
| 191 | + U, nS, Vᴴ = svd_compact(A, Mooncake.primal(alg_dalg); kwargs...) |
| 192 | + S .= diagview(nS) |
| 193 | + function dsvd_vals_adjoint(::Mooncake.NoRData) |
| 194 | + A .= Ac |
| 195 | + svd_pullback!(dA, A, (U, S, Vᴴ), (nothing, dS, nothing)) |
| 196 | + MatrixAlgebraKit.zero!(dS) |
| 197 | + return Mooncake.NoRData(), Mooncake.NoRData(), Mooncake.NoRData(), Mooncake.NoRData() |
| 198 | + end |
| 199 | + return S_dS, dsvd_vals_adjoint |
| 200 | +end |
| 201 | + |
| 202 | +end |
0 commit comments