diff --git a/Copulas.jl b/Copulas.jl deleted file mode 160000 index 7c61c1498..000000000 --- a/Copulas.jl +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 7c61c1498b7567bc025f8bacb8a7479fb0ec96fd diff --git a/Project.toml b/Project.toml index 9014ba5a7..69a061f28 100644 --- a/Project.toml +++ b/Project.toml @@ -9,7 +9,6 @@ Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" HCubature = "19dc6840-f33b-545b-b366-655c7e3ffd49" -InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" LambertW = "984bce1d-4616-540c-a9ee-88d1112d94c9" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LogExpFunctions = "2ab3a3ac-af41-5b50-aa03-7779005ae688" @@ -24,7 +23,7 @@ SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" -WilliamsonTransforms = "48feb556-9bdd-43a2-8e10-96100ec25e22" +TaylorSeries = "6aa5eb33-94cf-58f4-a9d0-e4b2c4fc25ea" [weakdeps] Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" @@ -59,9 +58,9 @@ StableRNGs = "1" Statistics = "1" StatsBase = "0.33, 0.34" StatsFuns = "0.9, 1.3" +TaylorSeries = "0.20" Test = "1" TestItemRunner = "1" -WilliamsonTransforms = "0.2" julia = "1" [extras] diff --git a/README.md b/README.md index 9e7302c06..0c835fb04 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,6 @@ Build Status Aqua QA -
License: MIT ColPrac: Contributor's Guide on Collaborative Practices for Community Packages diff --git a/docs/src/bestiary/archimedean.md b/docs/src/bestiary/archimedean.md index 3fc0660e4..5b1b6fd2c 100644 --- a/docs/src/bestiary/archimedean.md +++ b/docs/src/bestiary/archimedean.md @@ -96,7 +96,7 @@ In this package, we implemented it through the [`WilliamsonGenerator`](@ref) cla `WilliamsonGenerator(X::UnivariateRandomVariable, d)`. -This function computes the Williamson d-transform of the provided random variable $X$ using the [`WilliamsonTransforms.jl`](https://github.com/lrnv/WilliamsonTransforms.jl) package. See [williamson1955multiply, mcneil2009](@cite) for the literature. +This function computes the Williamson d-transform of the provided random variable $X$. See [williamson1955multiply, mcneil2009](@cite) for the literature. !!! info "`max_monotony` of Williamson generators" The $d$-transform of a positive random variable is $d$-monotone but not $k$-monotone for any $k > d$. Its max monotony is therefore $d$. This has a few implications, one of the biggest is that the $d$-variate Archimedean copula that corresponds has no density. @@ -115,7 +115,7 @@ The Williamson d-transform is a bijective transformation[^1] from the set of pos This bijection is to be taken carefuly: the bijection is between random variables *with unit scales* and generators *with common value at 1*, sicne on both rescaling does not change the underlying copula. -This transformation is implemented through one method in the Generator interface that is worth talking a bit about : `williamson_dist(G::Generator, d)`. This function computes the inverse Williamson d-transform of the d-monotone archimedean generator ϕ, still using the [`WilliamsonTransforms.jl`](https://github.com/lrnv/WilliamsonTransforms.jl) package. See [williamson1955multiply, mcneil2009](@cite). +This transformation is implemented through one method in the Generator interface that is worth talking a bit about : `williamson_dist(G::Generator, d)`. This function computes the inverse Williamson d-transform of the d-monotone archimedean generator ϕ. See [williamson1955multiply, mcneil2009](@cite). To put it in a nutshell, for ``\phi`` a ``d``-monotone archimedean generator, the inverse Williamson-d-transform of ``\\phi`` is the cumulative distribution function ``F`` of a non-negative random variable ``R``, defined by : @@ -123,7 +123,7 @@ To put it in a nutshell, for ``\phi`` a ``d``-monotone archimedean generator, th F(x) = 𝒲_{d}^{-1}(\phi)(x) = 1 - \frac{(-x)^{d-1} \phi_+^{(d-1)}(x)}{k!} - \sum_{k=0}^{d-2} \frac{(-x)^k \phi^{(k)}(x)}{k!} ``` -The [`WilliamsonTransforms.jl`](https://github.com/lrnv/WilliamsonTransforms.jl) package implements this transformation (and its inverse, the Williamson d-transfrom) in all generality. It returns this cumulative distribution function in the form of the corresponding random variable `<:Distributions.ContinuousUnivariateDistribution` from `Distributions.jl`. You may then compute : +It returns this cumulative distribution function in the form of the corresponding random variable `<:Distributions.ContinuousUnivariateDistribution` from `Distributions.jl`. You may then compute : * The cdf via `Distributions.cdf` * The pdf via `Distributions.pdf` and the logpdf via `Distributions.logpdf` * Samples from the distribution via `rand(X,n)`. @@ -184,14 +184,14 @@ This is why `williamson_dist(G::Generator,d)` is such an important function in t ```@example using Copulas: williamson_dist, FrankGenerator -williamson_dist(FrankGenerator(7), Val{3}()) +williamson_dist(FrankGenerator(7), 3) ``` For the Frank Copula, as for many classic copulas, the distribution used is known. We pull some of them from `Distributions.jl` but implement a few more, as this Logarithmic one. Another useful example are negatively-dependent Clayton copulas: ```@example using Copulas: williamson_dist, ClaytonGenerator -williamson_dist(ClaytonGenerator(-0.2), Val{3}()) +williamson_dist(ClaytonGenerator(-0.2), 3) ``` for which the corresponding distribution is known but has no particular name, thus we implemented it under the `ClaytonWilliamsonDistribution` name. @@ -209,7 +209,7 @@ for which the corresponding distribution is known but has no particular name, th We use this fraily approach for several generators, since sometimes it is faster, including e.g. the Clayton one with positive dependence: ```@example using Copulas: williamson_dist, ClaytonGenerator - williamson_dist(ClaytonGenerator(10), Val{3}()) + williamson_dist(ClaytonGenerator(10), 3) ``` diff --git a/docs/src/bestiary/empirical.md b/docs/src/bestiary/empirical.md index ed914ef46..7b5e52a8d 100644 --- a/docs/src/bestiary/empirical.md +++ b/docs/src/bestiary/empirical.md @@ -216,7 +216,7 @@ Usage: - Build from data `u::d×n` (raw or pseudos): `Ĝ = EmpiricalGenerator(u; pseudo_values=true)` - Use directly in an Archimedean copula: `Ĉ = ArchimedeanCopula(d, Ĝ)` -- Access the fitted radial law: `R̂ = williamson_dist(Ĝ, Val{d}())` +- Access the fitted radial law: `R̂ = williamson_dist(Ĝ, d)` ```@docs; canonical=false EmpiricalGenerator diff --git a/docs/src/examples/archimedean_radial_estimation.md b/docs/src/examples/archimedean_radial_estimation.md index 675d92b47..d494ab787 100644 --- a/docs/src/examples/archimedean_radial_estimation.md +++ b/docs/src/examples/archimedean_radial_estimation.md @@ -205,7 +205,7 @@ R = Dirac(1.0) d, n = 3, 1000 u = spl_cop(R, d, n) Ghat = EmpiricalGenerator(u) -Rhat = Copulas.williamson_dist(Ghat, Val{d}()) +Rhat = Copulas.williamson_dist(Ghat, d) diagnose_plots(u, Rhat; R=R) ``` @@ -216,7 +216,7 @@ R = DiscreteNonParametric([1.0, 4.0, 8.0], fill(1/3, 3)) d, n = 2, 1000 u = spl_cop(R, d, n) Ghat = EmpiricalGenerator(u) -Rhat = Copulas.williamson_dist(Ghat, Val{d}()) +Rhat = Copulas.williamson_dist(Ghat, d) diagnose_plots(u, Rhat; R=R) ``` @@ -227,7 +227,7 @@ R = DiscreteNonParametric([1.0, 4.0, 8.0], fill(1/3, 3)) d, n = 3, 1000 u = spl_cop(R, d, n) Ghat = EmpiricalGenerator(u) -Rhat = Copulas.williamson_dist(Ghat, Val{d}()) +Rhat = Copulas.williamson_dist(Ghat, d) diagnose_plots(u, Rhat; R=R) ``` @@ -238,7 +238,7 @@ R = LogNormal(1, 3) d, n = 10, 1000 u = spl_cop(R, d, n) Ghat = EmpiricalGenerator(u) -Rhat = Copulas.williamson_dist(Ghat, Val{d}()) +Rhat = Copulas.williamson_dist(Ghat, d) diagnose_plots(u, Rhat; R=R, logged=true) ``` @@ -249,7 +249,7 @@ R = Pareto(1.0, 1/2) d, n = 10, 1000 u = spl_cop(R, d, n) Ghat = EmpiricalGenerator(u) -Rhat = Copulas.williamson_dist(Ghat, Val{d}()) +Rhat = Copulas.williamson_dist(Ghat, d) diagnose_plots(u, Rhat; R=R, logged=true) ``` diff --git a/docs/src/manual/fitting_interface.md b/docs/src/manual/fitting_interface.md index 509f6f75c..62318eed0 100644 --- a/docs/src/manual/fitting_interface.md +++ b/docs/src/manual/fitting_interface.md @@ -39,7 +39,7 @@ plot(Ĉ) ### Full Model (with metadata) ```@example fitting_interface -M = fit(CopulaModel, GumbelCopula, U; method=:default, summaries=true) +M = fit(CopulaModel, GumbelCopula, U; method=:default) M ``` @@ -48,7 +48,7 @@ Returns a `CopulaModel` with: * `result` (the fitted copula), `n`, `ll` (log-likelihood), * `method`, `converged`, `iterations`, `elapsed_sec`, * `vcov` (if available), -* `method_details` (a named tuple with method metadata and, if `summaries=true`, **pairwise summaries**: means, deviations, minima, and maxima of empirical τ/ρ/β/γ). +* `method_details` (a named tuple with method metadata). --- @@ -56,7 +56,7 @@ Returns a `CopulaModel` with: - fit operates on types, not on pre-constructed parameterized instances. Always pass a Copula or SklarDist *type* to `fit`, e.g. `fit(GumbelCopula, U)` or `fit(CopulaModel, SklarDist{ClaytonCopula,Tuple{Normal,LogNormal}}, X)`. If you already have a constructed instance `C0`, re-estimate its parameters by calling `fit(typeof(C0), U)`. -- Default method selection: each family exposes the list of available fitting strategies via `_available_fitting_methods(CT)`. When `method = :default` the first element of that tuple is used. Example: `Copulas._available_fitting_methods(MyCopula)`. +- Default method selection: each family exposes the list of available fitting strategies via `_available_fitting_methods(CT, d)`. When `method = :default` the first element of that tuple is used. Example: `Copulas._available_fitting_methods(MyCopula, d)`. - `CopulaModel` is the full result object returned by the fits performed via `Distributions.fit(::Type{CopulaModel}, ...)`. The light-weight shortcut `fit(MyCopula, U)` returns only a copula instance; use `fit(CopulaModel, ...)` to get diagnostics and metadata. @@ -80,26 +80,7 @@ The `CopulaModel{CT} <: StatsBase.StatisticalModel` type stores the result and s | `hqc(M)` | Hannan–Quinn criterion | -Quick access to the contained copula: `_copula_of(M)` (returns the copula even if `result` is a `SklarDist`). - - -### Pairwise summaries and `method_details` - -When you request `summaries=true` (default) the returned `CopulaModel` contains extra pre-computed pairwise statistics inside `M.method_details`. Typical keys are: - -- `:tau_mean`, `:tau_sd`, `:tau_min`, `:tau_max` -- `:rho_mean`, `:rho_sd`, `:rho_min`, `:rho_max` -- `:beta_mean`, `:beta_sd`, `:beta_min`, `:beta_max` -- `:gamma_mean`, `:gamma_sd`, `:gamma_min`, `:gamma_max` - -Access example: - -```@example fitting_interface -M = fit(CopulaModel, GumbelCopula, U; summaries=true) -M.method_details.tau_mean # average pairwise Kendall's tau -``` - -If `summaries=false` these keys will be absent and `method_details` will be smaller. +By default, the returned `CopulaModel` contains a lot of extra statistics, that you can see by printing the model in the REPL. ### `vcov` and inference notes @@ -143,7 +124,7 @@ plot(Ŝ.result) The names and availiability of fitting methods depends on the model. You can check what is available with the following internal call : ```@example fitting_interface -Copulas._available_fitting_methods(ClaytonCopula) +Copulas._available_fitting_methods(ClaytonCopula, 3) ``` The first method in the list is the one used by default. @@ -168,7 +149,7 @@ When you add a new copula family, implement the following so the generic `fit` f 1. `_example(CT, d)` — return a representative instance (used to obtain default params and initial values). 2. `_unbound_params(CT, d, params)` — transform the family `NamedTuple` parameters to an unconstrained `Vector{Float64}` used by optimizers. 3. `_rebound_params(CT, d, α)` — invert `_unbound_params`, returning a `NamedTuple` suitable for `CT(d, ...)` construction. -4. `_available_fitting_methods(::Type{<:YourCopula})` — declare supported methods (examples: `:mle, :itau, :irho, :ibeta, ...`). +4. `_available_fitting_methods(::Type{<:YourCopula}, d::Int)` — declare supported methods (examples: `:mle, :itau, :irho, :ibeta, ...`). 5. `_fit(::Type{<:YourCopula}, U, ::Val{:mle})` (and other `Val{}` methods) — implement the method and return `(fitted_copula, meta::NamedTuple)`; include keys such as `:θ̂`, `:optimizer`, `:converged`, `:iterations` and optionally `:vcov`. Place this checklist and a minimal `_fit` skeleton in `docs/src/manual/developer_fitting.md` where contributors can copy/paste and adapt. diff --git a/docs/src/manual/intro.md b/docs/src/manual/intro.md index d9ebe4669..2ba46be7b 100644 --- a/docs/src/manual/intro.md +++ b/docs/src/manual/intro.md @@ -61,9 +61,9 @@ You may define a copula object in Julia by simply calling its constructor: ```@example 1 using Copulas -d = 4 # The dimension of the model +d = 3 # The dimension of the model θ = 7 # Parameter -C = ClaytonCopula(4,7) # A 4-dimensional clayton copula with parameter θ = 7. +C = ClaytonCopula(d,7) # A 3-dimensional clayton copula with parameter θ = 7. ``` This object is a random vector, and behaves exactly as you would expect a random vector from `Distributions.jl` to behave: you may sample it with `rand(C,100)`, compute its pdf or cdf with `pdf(C,x)` and `cdf(C,x)`, etc: @@ -84,6 +84,18 @@ plot(C, :logpdf) See [the visualizations page](@ref viz_page) for details on the visualisations tools. It’s often useful to get an intuition by looking at scatter plots. +!!! example "Independence" + To give another example, the function + + $\Pi : \boldsymbol x \mapsto \prod_{i=1}^d x_i = \boldsymbol x^{\boldsymbol 1}$ is a copula, corresponding to independent random vectors. + + This copula can be constructed using the [`IndependentCopula(d)`](@ref IndependentCopula) syntax as follows: + + ```@example 1 + Π = IndependentCopula(d) # A 4-variate independence structure. + nothing # hide + ``` + One of the reasons that makes copulas so useful is the bijective map from the Sklar Theorem [sklar1959](@cite): !!! theorem "Sklar (1959)" @@ -95,25 +107,14 @@ One of the reasons that makes copulas so useful is the bijective map from the Sk This result allows to decompose the distribution of $\boldsymbol X$ into several components: the marginal distributions on one side, and the copula on the other side, which governs the dependence structure between the marginals. This object is central in our work, and therefore deserves a moment of attention. -!!! example "Independence" - The function - - $\Pi : \boldsymbol x \mapsto \prod_{i=1}^d x_i = \boldsymbol x^{\boldsymbol 1}$ is a copula, corresponding to independent random vectors. -The independence copula can be constructed using the [`IndependentCopula(d)`](@ref IndependentCopula) syntax as follows: - -```@example 1 -Π = IndependentCopula(d) # A 4-variate independence structure. -nothing # hide -``` We can then leverage the Sklar theorem to construct multivariate random vectors from a copula-marginals specification. The implementation we have of this theorem allows building multivariate distributions by specifying separately their marginals and dependence structures as follows: ```@example 1 X₁, X₂, X₃ = Gamma(2,3), Pareto(), LogNormal(0,1) # Marginals -C = ClaytonCopula(3,0.7) # A 3-variate Clayton Copula with θ = 0.7 -D = SklarDist(C, (X₁,X₂,X₃)) # The final distribution +D = SklarDist(C, (X₁,X₂,X₃)) # The final distribution, using the previous copula C. plot(D, scale=:sklar) nothing # hide ``` @@ -132,7 +133,7 @@ Sklar's theorem can be used the other way around (from the marginal space to the !!! info "Independent random vectors" - `Distributions.jl` provides the [`product_distribution`](https://juliastats.org/Distributions.jl/stable/multivariate/#Product-distributions) function to create independent random vectors with given marginals. `product_distribution(args...)` is essentially equivalent to `SklarDist(Π, args)`, but our approach generalizes to other dependence structures. + `Distributions.jl` provides the [`product_distribution`](https://juliastats.org/Distributions.jl/stable/multivariate/#Product-distributions) function to create independent random vectors with given marginals. `product_distribution(args...)` is essentially equivalent to `SklarDist(IndependentCopula(d), args)`, but our approach generalizes to other dependence structures. Copulas are bounded functions with values in [0,1] since they correspond to probabilities. But their range can be bounded more precisely, and [lux2017](@cite) gives us: @@ -309,3 +310,5 @@ The documentation of this package aims to combine theoretical information and re Pages = [@__FILE__] Canonical = false ``` + + diff --git a/src/ArchimaxCopula.jl b/src/ArchimaxCopula.jl index 16f7a5dac..1c1aa2749 100644 --- a/src/ArchimaxCopula.jl +++ b/src/ArchimaxCopula.jl @@ -155,7 +155,7 @@ function _rebound_params(CT::Type{<:ArchimaxCopula{2, <:Generator, <:Tail}}, d, NamedTuple{all_names}(all_vals) end -_available_fitting_methods(::Type{<:ArchimaxCopula}) = (:mle,) +_available_fitting_methods(::Type{<:ArchimaxCopula}, d) = (:mle,) # Fast conditional distortion binding (bivariate) DistortionFromCop(C::ArchimaxCopula{2}, js::NTuple{1,Int}, uⱼₛ::NTuple{1,Float64}, ::Int) = BivArchimaxDistortion(C.gen, C.tail, Int8(js[1]), float(uⱼₛ[1])) @@ -177,7 +177,7 @@ end # --- log-PDF stable --- function Distributions._logpdf(C::ArchimaxCopula{2, TG, TT}, u) where {TG, TT} - T = promote_type(Float64, eltype(u)) + T = typeof(A(C.tail, one(ϕ(C.gen, one(eltype(u))))/2)) @assert length(u) == 2 u1, u2 = u (0.0 < u1 ≤ 1.0 && 0.0 < u2 ≤ 1.0) || return T(-Inf) @@ -201,7 +201,7 @@ function Distributions._logpdf(C::ArchimaxCopula{2, TG, TT}, u) where {TG, TT} s = S * A0 φp = ϕ⁽¹⁾(C.gen, s) # < 0 - φpp = ϕ⁽ᵏ⁾(C.gen, Val(2), s) # > 0 + φpp = ϕ⁽ᵏ⁾(C.gen, 2, s) # > 0 base = su*sv + (φp/φpp)*suv base > 0 || return T(-Inf) diff --git a/src/ArchimedeanCopula.jl b/src/ArchimedeanCopula.jl index 5f8abd459..5ee7e8ed1 100644 --- a/src/ArchimedeanCopula.jl +++ b/src/ArchimedeanCopula.jl @@ -65,12 +65,12 @@ function Distributions._logpdf(C::ArchimedeanCopula{d,TG}, u) where {d,TG} if !all(0 .< u .< 1) return eltype(u)(-Inf) end - return log(max(ϕ⁽ᵏ⁾(C.G, Val{d}(), sum(ϕ⁻¹.(C.G, u))) * prod(ϕ⁻¹⁽¹⁾.(C.G, u)), 0)) + return log(max(ϕ⁽ᵏ⁾(C.G, d, sum(ϕ⁻¹.(C.G, u))) * prod(ϕ⁻¹⁽¹⁾.(C.G, u)), 0)) end function Distributions._rand!(rng::Distributions.AbstractRNG, C::ArchimedeanCopula{d, TG}, x::AbstractVector{T}) where {T<:Real, d, TG} # By default, we use the Williamson sampling. Random.randexp!(rng,x) - r = rand(rng, williamson_dist(C.G, Val{d}())) + r = rand(rng, williamson_dist(C.G, d)) sx = sum(x) for i in 1:length(C) x[i] = ϕ(C.G,r * x[i]/sx) @@ -135,7 +135,7 @@ function rosenblatt(C::ArchimedeanCopula{d,TG}, u::AbstractMatrix{<:Real}) where if iszero(rⱼ) U[j,i] = zero(rⱼ) else - A, B = ϕ⁽ᵏ⁾(C.G, Val(j - 1), rⱼ), ϕ⁽ᵏ⁾(C.G, Val(j - 1), rⱼ₋₁) + A, B = ϕ⁽ᵏ⁾(C.G, j - 1, rⱼ), ϕ⁽ᵏ⁾(C.G, j - 1, rⱼ₋₁) U[j,i] = A / B end end @@ -155,8 +155,8 @@ function inverse_rosenblatt(C::ArchimedeanCopula{d,TG}, u::AbstractMatrix{<:Real elseif !isfinite(Cᵢⱼ) U[j,i] = zero(Cᵢⱼ) else - Dᵢⱼ = ϕ⁽ᵏ⁾(C.G, Val{j - 1}(), Cᵢⱼ) * u[j,i] - R = ϕ⁽ᵏ⁾⁻¹(C.G, Val{j - 1}(), Dᵢⱼ; start_at=Cᵢⱼ) + Dᵢⱼ = ϕ⁽ᵏ⁾(C.G, j - 1, Cᵢⱼ) * u[j,i] + R = ϕ⁽ᵏ⁾⁻¹(C.G, j - 1, Dᵢⱼ; start_at=Cᵢⱼ) U[j, i] = ϕ(C.G, R - Cᵢⱼ) Cᵢⱼ = R end @@ -172,10 +172,10 @@ function DistortionFromCop(C::ArchimedeanCopula, js::NTuple{p,Int}, uⱼₛ::NTu @inbounds for u in uⱼₛ sJ += ϕ⁻¹(C.G, u) end - return ArchimedeanDistortion(C.G, p, float(sJ), float(T(ϕ⁽ᵏ⁾(C.G, Val{p}(), sJ)))) + return ArchimedeanDistortion(C.G, p, float(sJ), float(T(ϕ⁽ᵏ⁾(C.G, p, sJ)))) end function ConditionalCopula(C::ArchimedeanCopula{D}, ::NTuple{p,Int}, uⱼₛ::NTuple{p,Float64}) where {D, p} - return ArchimedeanCopula(D - p, TiltedGenerator(C.G, Val{p}(), sum(ϕ⁻¹.(C.G, uⱼₛ)))) + return ArchimedeanCopula(D - p, TiltedGenerator(C.G, p, sum(ϕ⁻¹.(C.G, uⱼₛ)))) end SubsetCopula(C::ArchimedeanCopula{d,TG}, dims::NTuple{p, Int}) where {d,TG,p} = ArchimedeanCopula(length(dims), C.G) @@ -191,11 +191,11 @@ _example(::Type{<:ArchimedeanCopula{d,<:FrailtyGenerator} where {d}}, d) = throw _unbound_params(CT::Type{<:ArchimedeanCopula}, d, θ) = _unbound_params(generatorof(CT), d, θ) _rebound_params(CT::Type{<:ArchimedeanCopula}, d, α) = _rebound_params(generatorof(CT), d, α) -_available_fitting_methods(::Type{ArchimedeanCopula}) = (:gnz2011,) -_available_fitting_methods(::Type{<:ArchimedeanCopula{d,GT} where {d,GT<:Generator}}) = (:mle,) -_available_fitting_methods(::Type{<:ArchimedeanCopula{d,GT} where {d,GT<:UnivariateGenerator}}) = (:mle, :itau, :irho, :ibeta) -_available_fitting_methods(::Type{<:ArchimedeanCopula{d,<:WilliamsonGenerator{d2, TX}} where {d,d2, TX}}) = Tuple{}() # No fitting method. -_available_fitting_methods(::Type{<:ArchimedeanCopula{d,<:WilliamsonGenerator{d2, <:Distributions.DiscreteNonParametric}} where {d,d2}}) = (:gnz2011,) +_available_fitting_methods(::Type{ArchimedeanCopula}, d) = (:gnz2011,) +_available_fitting_methods(::Type{<:ArchimedeanCopula{d,GT} where {d,GT<:Generator}}, d) = (:mle,) +_available_fitting_methods(::Type{<:ArchimedeanCopula{d,GT} where {d,GT<:UnivariateGenerator}}, d) = (:mle, :itau, :irho, :ibeta) +_available_fitting_methods(::Type{<:ArchimedeanCopula{d,<:WilliamsonGenerator{d2, TX}} where {d,d2, TX}}, d) = Tuple{}() # No fitting method. +_available_fitting_methods(::Type{<:ArchimedeanCopula{d,<:WilliamsonGenerator{d2, <:Distributions.DiscreteNonParametric}} where {d,d2}}, d) = (:gnz2011,) function _fit(::Union{Type{ArchimedeanCopula},Type{<:ArchimedeanCopula{d,<:WilliamsonGenerator{d2, <:Distributions.DiscreteNonParametric}} where {d,d2}}}, U, ::Val{:gnz2011}) @@ -215,7 +215,7 @@ function _fit(CT::Type{<:ArchimedeanCopula{d, GT} where {d, GT<:UnivariateGenera θs = map(v -> invf(GT, clamp(v, -1, 1)), upper_triangle_flat) θ = clamp(Statistics.mean(θs), _θ_bounds(GT, d)...) - return CT(d, θ), (; θ̂=θ, eps) + return CT(d, θ), (; θ̂=(θ=θ,)) end function _fit(CT::Type{<:ArchimedeanCopula{d, GT} where {d, GT<:UnivariateGenerator}}, U, ::Val{:ibeta}) d = size(U,1); δ = 1e-8; GT = generatorof(CT) @@ -226,27 +226,26 @@ function _fit(CT::Type{<:ArchimedeanCopula{d, GT} where {d, GT<:UnivariateGenera βmin, βmax = fβ(a0), fβ(b0) if βmin > βmax; βmin, βmax = βmax, βmin; end θ = βobs ≤ βmin ? a0 : βobs ≥ βmax ? b0 : Roots.find_zero(θ -> fβ(θ)-βobs, (a0,b0), Roots.Brent(); xatol=1e-8, rtol=0) - return CT(d,θ), (; θ̂=θ) + return CT(d,θ), (; θ̂=(θ=θ,)) end function _fit(CT::Type{<:ArchimedeanCopula{d, GT} where {d, GT<:UnivariateGenerator}}, U, ::Val{:mle}; start::Union{Symbol,Real}=:itau, xtol::Real=1e-8) d = size(U,1) GT = generatorof(CT) lo, hi = _θ_bounds(GT, d) - θ₀ = [(lo+hi)/2] + θ₀ = [1.0] if start isa Real θ₀[1] = start elseif start ∈ (:itau, :irho) - try - θ₀[1] = only(Distributions.params(_fit(CT, U, Val{start}())[1])) - catch e - end + θ₀[1] = _fit(CT, U, Val{start}())[2].θ̂[1] + end + if θ₀[1] <= lo || θ₀[1] >= hi + θ₀[1] = Distributions.params(_example(CT, d))[1] end - θ₀[1] = clamp(θ₀[1], lo, hi) f(θ) = -Distributions.loglikelihood(CT(d, θ[1]), U) res = Optim.optimize(f, lo, hi, θ₀, Optim.Fminbox(Optim.LBFGS()), autodiff = :forward) - θ̂ = Optim.minimizer(res)[1] - return CT(d, θ̂), (; θ̂=θ̂, optimizer=:GradientDescent, + θ = Optim.minimizer(res)[1] + return CT(d, θ), (; θ̂=(θ=θ,), optimizer=Optim.summary(res), xtol=xtol, converged=Optim.converged(res), iterations=Optim.iterations(res)) end \ No newline at end of file diff --git a/src/Copula.jl b/src/Copula.jl index 431bb5b25..0d5721f5c 100644 --- a/src/Copula.jl +++ b/src/Copula.jl @@ -52,43 +52,15 @@ function β(C::Copula{d}) where {d} Cbar0 = Distributions.cdf(SurvivalCopula(C, Tuple(1:d)), u) return (2.0^(d-1) * C0 + Cbar0 - 1) / (2^(d-1) - 1) end -function γ(C::Copula{d}; nmc::Int=100_000, rng::Random.AbstractRNG=Random.MersenneTwister(123)) where {d} - d ≥ 2 || throw(ArgumentError("γ(C) requires d≥2")) - if d == 2 - f(t) = Distributions.cdf(C, [t, t]) + Distributions.cdf(C, [t, 1 - t]) - I, _ = QuadGK.quadgk(f, 0.0, 1.0; rtol=sqrt(eps())) - return -2 + 4I - end - @inline _A(u) = (minimum(u) + max(sum(u) - d + 1, 0.0)) / 2 - @inline _Abar(u) = (1 - maximum(u) + max(1 - sum(u), 0.0)) / 2 - @inline invfac(k::Integer) = exp(-SpecialFunctions.logfactorial(k)) - s = 0.0 - @inbounds for i in 0:d - s += (isodd(i) ? -1.0 : 1.0) * binomial(d, i) * invfac(i + 1) - end - a_d = 1/(d + 1) + 0.5*invfac(d + 1) + 0.5*s - b_d = 2/3 + 4.0^(1 - d) / 3 - U = rand(rng, C, nmc) - m = 0.0 - @inbounds for j in 1:nmc - u = @view U[:, j] - m += _A(u) + _Abar(u) - end - m /= nmc - return (m - a_d) / (b_d - a_d) +function γ(C::Copula{d}) where {d} + _integrand(u) = (1 + minimum(u) - maximum(u) + max(abs(sum(u) - d/2) - (d - 2)/2, 0.0)) / 2 + I = Distributions.expectation(_integrand, C; nsamples=10^4) + a = 1/(d+1) + 1/factorial(d+1) # independence + b = (2 + 4.0^(1-d)) / 3 # comonotonicity + return (I - a) / (b - a) end - -function ι(C::Copula{d}; nmc::Int=100_000, rng::Random.AbstractRNG=Random.MersenneTwister(123)) where {d} - U = rand(rng, C, nmc) - s = 0.0 - @inbounds for j in 1:nmc - u = @view U[:, j] - lp = Distributions.logpdf(C, u) - isfinite(lp) || throw(DomainError(lp, "logpdf(C,u) non-finite.")) - s -= lp - end - H = s / nmc - return H +function ι(C::Copula{d}) where {d} + return Distributions.expectation(u -> -Distributions.logpdf(C, u), C; nsamples=10^4) end function λₗ(C::Copula{d}; ε::Float64 = 1e-10) where {d} g(e) = Distributions.cdf(C, fill(e, d)) / e @@ -128,51 +100,24 @@ function ρ(U::AbstractMatrix) return h * (2.0^d * μ - 1.0) end function γ(U::AbstractMatrix) - # Assumes pseudo-data given. Multivariate Gini’s gamma (Behboodian–Dolati–Úbeda, 2007) d, n = size(U) - if d == 2 - # Schechtman–Yitzhaki symmetric Gini over ranks (copular invariant) - r1 = StatsBase.tiedrank(@view U[1, :]) - r2 = StatsBase.tiedrank(@view U[2, :]) - m = n - h = m + 1 - acc = 0.0 - @inbounds @simd for k in 1:m - acc += abs(r1[k] + r2[k] - h) - abs(r1[k] - r2[k]) - end - return 2*acc / (m*h) - else - @inline _A(u) = (minimum(u) + max(sum(u) - d + 1, 0.0)) / 2 - @inline _Abar(u) = (1 - maximum(u) + max(1 - sum(u), 0.0)) / 2 - invfac(k::Integer) = exp(-SpecialFunctions.logfactorial(k)) - s = 0.0 - binomf(d,i) = exp(SpecialFunctions.loggamma(d+1) - SpecialFunctions.loggamma(i+1) - SpecialFunctions.loggamma(d-i+1)) - @inbounds for i in 0:d - s += (isodd(i) ? -1.0 : 1.0) * binomf(d,i) * invfac(i + 1) - end - a_d = 1/(d + 1) + 0.5*invfac(d + 1) + 0.5*s - b_d = 2/3 + 4.0^(1 - d) / 3 - m = 0.0 - @inbounds for j in 1:n - u = @view U[:, j] - m += _A(u) + _Abar(u) - end - m /= n - return (m - a_d) / (b_d - a_d) + I = zero(eltype(U)) + for j in 1:n + u = U[:,j] + I += (1 + minimum(u) - maximum(u) + max(abs(sum(u) - d/2) - (d - 2)/2, 0.0)) / 2 end + I /= n + a = 1/(d+1) + 1/factorial(d+1) + b = (2 + 4.0^(1-d)) / 3 + return (I - a) / (b - a) end function _λ(U::AbstractMatrix; t::Symbol=:upper, p::Union{Nothing,Real}=nothing) # Assumes pseudo-data given. Multivariate tail’s lambda (Schmidt, R. & Stadtmüller, U. 2006) - d, m = size(U) - m ≥ 4 || throw(ArgumentError("At least 4 observations are required")) - p === nothing && (p = 1/sqrt(m)) + p === nothing && (p = 1/sqrt(size(U, 2))) (0 < p < 1) || throw(ArgumentError("p must be in (0,1)")) - V = t === :upper ? (1 .- Float64.(U)) : Float64.(U) - cnt = 0 - @inbounds @views for j in 1:m - cnt += all(V[:, j] .<= p) # vista sin copiar gracias a @views - end - return clamp(cnt / (p*m), 0.0, 1.0) + in_tail = t=== :upper ? Base.Fix2(>=, 1-p) : Base.Fix2(<=, p) + prob = Statistics.mean(all(in_tail, U, dims=1)) + return clamp(prob/p, 0.0, 1.0) end λₗ(U::AbstractMatrix; p::Union{Nothing,Real}=nothing) = _λ(U; t=:lower, p=p) λᵤ(U::AbstractMatrix; p::Union{Nothing,Real}=nothing) = _λ(U; t=:upper, p=p) diff --git a/src/Copulas.jl b/src/Copulas.jl index 0000d3e6c..81f9b1202 100644 --- a/src/Copulas.jl +++ b/src/Copulas.jl @@ -2,7 +2,6 @@ module Copulas import Base import Random - import InteractiveUtils import SpecialFunctions import Roots import Distributions @@ -12,7 +11,6 @@ module Copulas import ForwardDiff import HCubature import MvNormalCDF - import WilliamsonTransforms import Combinatorics import LogExpFunctions import QuadGK @@ -22,6 +20,7 @@ module Copulas import LambertW import Optim import Printf + import TaylorSeries # Main code include("utils.jl") @@ -82,6 +81,7 @@ module Copulas include("EllipticalCopulas/TCopula.jl") # Archimedean copulas + include("WilliamsonTransforms.jl") include("Generator.jl") include("ArchimedeanCopula.jl") diff --git a/src/EllipticalCopulas/GaussianCopula.jl b/src/EllipticalCopulas/GaussianCopula.jl index c3e0ce49e..44e7f1470 100644 --- a/src/EllipticalCopulas/GaussianCopula.jl +++ b/src/EllipticalCopulas/GaussianCopula.jl @@ -82,7 +82,7 @@ N(::Type{T}) where T<: GaussianCopula = Distributions.MvNormal function _cdf(C::CT,u) where {CT<:GaussianCopula} x = StatsBase.quantile.(Distributions.Normal(), u) d = length(C) - return MvNormalCDF.mvnormcdf(C.Σ, fill(-Inf, d), x, m=10_0000d)[1] + return MvNormalCDF.mvnormcdf(C.Σ, fill(-Inf, d), x)[1] end function rosenblatt(C::GaussianCopula, u::AbstractMatrix{<:Real}) @@ -139,6 +139,6 @@ end function _fit(CT::Type{<:GaussianCopula}, u, ::Val{:mle}) dd = Distributions.fit(N(CT), StatsBase.quantile.(U(CT),u)) Σ = Matrix(dd.Σ) - return GaussianCopula(Σ), (;) + return GaussianCopula(Σ), (; θ̂ = (; Σ = Σ)) end -_available_fitting_methods(::Type{<:GaussianCopula}) = (:itau, :irho, :ibeta, :mle) \ No newline at end of file +_available_fitting_methods(::Type{<:GaussianCopula}, d) = (:mle, :itau, :irho, :ibeta) \ No newline at end of file diff --git a/src/EllipticalCopulas/TCopula.jl b/src/EllipticalCopulas/TCopula.jl index 41016a640..2044db595 100644 --- a/src/EllipticalCopulas/TCopula.jl +++ b/src/EllipticalCopulas/TCopula.jl @@ -97,4 +97,4 @@ function _rebound_params(::Type{<:TCopula}, d::Int, α::AbstractVector{T}) where return (; ν = ν, Σ = Σ) end -_available_fitting_methods(::Type{<:TCopula}) = (:mle,) \ No newline at end of file +_available_fitting_methods(::Type{<:TCopula}, d) = (:mle,) \ No newline at end of file diff --git a/src/ExtremeValueCopula.jl b/src/ExtremeValueCopula.jl index 3c4fa4086..f404bea83 100644 --- a/src/ExtremeValueCopula.jl +++ b/src/ExtremeValueCopula.jl @@ -137,9 +137,9 @@ _example(CT::Type{<:ExtremeValueCopula}, d) = CT(d; _rebound_params(CT, d, fill( _unbound_params(CT::Type{<:ExtremeValueCopula}, d, θ) = _unbound_params(tailof(CT), d, θ) _rebound_params(CT::Type{<:ExtremeValueCopula}, d, α) = _rebound_params(tailof(CT), d, α) -_available_fitting_methods(::Type{ExtremeValueCopula}) = (:ols, :cfg, :pickands) -_available_fitting_methods(CT::Type{<:ExtremeValueCopula}) = (:mle,) -_available_fitting_methods(CT::Type{<:ExtremeValueCopula{2,GT} where {GT<:UnivariateTail2}}) = (:mle, :itau, :irho, :ibeta, :iupper) +_available_fitting_methods(::Type{ExtremeValueCopula}, d) = (:ols, :cfg, :pickands) +_available_fitting_methods(CT::Type{<:ExtremeValueCopula}, d) = (:mle,) +_available_fitting_methods(CT::Type{<:ExtremeValueCopula{2,GT} where {GT<:UnivariateTail2}}, d) = (:mle, :itau, :irho, :ibeta, :iupper) # Fitting empírico (OLS, CFG, Pickands): function _fit(::Type{ExtremeValueCopula}, U, method::Union{Val{:ols}, Val{:cfg}, Val{:pickands}}; @@ -152,25 +152,28 @@ function _fit(CT::Type{<:ExtremeValueCopula{d, GT} where {d, GT<:UnivariateTail2 m isa Val{:irho} ? ρ⁻¹(CT, StatsBase.corspearman(U')[1,2]) : β⁻¹(CT, corblomqvist(U')[1,2]) θ = clamp(θ, _θ_bounds(tailof(CT), 2)...) - return CT(2, θ), (; θ̂=θ) + return CT(2, θ), (; θ̂=(θ=θ,)) end function _fit(CT::Type{<:ExtremeValueCopula{d, GT} where {d, GT<:UnivariateTail2}}, U, ::Val{:iupper}) θ = clamp(λᵤ⁻¹(CT, λᵤ(U)), _θ_bounds(tailof(CT), 2)...) - return CT(2, θ), (; θ̂=θ) + return CT(2, θ), (; θ̂=(θ=θ,)) end function _fit(CT::Type{<:ExtremeValueCopula{d, GT} where {d, GT<:UnivariateTail2}}, U, ::Val{:mle}; start::Union{Symbol,Real}=:itau, xtol::Real=1e-8) d = size(U,1) TT = tailof(CT) lo, hi = _θ_bounds(TT, d) - θ0 = start isa Real ? start : - start ∈ (:itau, :irho, :ibeta, :iupper) ? _fit(CT, U, Val{start}())[2].θ̂ : - only(Distributions.params(_example(CT, d))) - θ0 = clamp(θ0, lo, hi) + θ0_val = if start isa Real + start + else + initial_params = start ∈ (:itau, :irho, :ibeta, :iupper) ? _fit(CT, U, Val{start}())[2].θ̂ : only(Distributions.params(_example(CT, d))) + initial_params.θ + end + θ0_clamped = clamp(θ0_val, lo, hi) f(θ) = -Distributions.loglikelihood(CT(d, θ[1]), U) - res = Optim.optimize(f, lo, hi, [θ0], Optim.Fminbox(Optim.LBFGS()), autodiff = :forward) + res = Optim.optimize(f, lo, hi, [θ0_clamped], Optim.Fminbox(Optim.LBFGS()), autodiff = :forward) θ̂ = Optim.minimizer(res)[1] - return CT(d, θ̂), (; θ̂=θ̂, optimizer=:GradientDescent, + return CT(d, θ̂), (; θ̂=(;θ=θ̂), optimizer=:GradientDescent, xtol=xtol, converged=Optim.converged(res), iterations=Optim.iterations(res)) end diff --git a/src/Fitting.jl b/src/Fitting.jl index 309f8c9ca..692361cb8 100644 --- a/src/Fitting.jl +++ b/src/Fitting.jl @@ -1,4 +1,3 @@ - ############################################################################### ##### Fitting interface ##### User-facing function: @@ -18,8 +17,6 @@ ##### ############################################################################### - - """ CopulaModel{CT, TM, TD} <: StatsBase.StatisticalModel @@ -44,7 +41,7 @@ for statistical inference and model comparison. [`StatsBase.nobs`](@ref), [`StatsBase.coef`](@ref), [`StatsBase.coefnames`](@ref), [`StatsBase.vcov`](@ref), [`StatsBase.aic`](@ref), [`StatsBase.bic`](@ref), [`StatsBase.deviance`](@ref), etc. -See also [`Distributions.fit`](@ref) and [`_copula_of`](@ref). +See also [`Distributions.fit`](@ref). """ struct CopulaModel{CT, TM<:Union{Nothing,AbstractMatrix}, TD<:NamedTuple} <: StatsBase.StatisticalModel result :: CT @@ -65,7 +62,7 @@ struct CopulaModel{CT, TM<:Union{Nothing,AbstractMatrix}, TD<:NamedTuple} <: Sta end end -# Fallbacks that throw if the interface s not implemented correctly. +# Fallbacks that throw if the interface is not implemented correctly. """ Distributions.params(C::Copula) Distributions.params(S::SklarDist) @@ -83,24 +80,21 @@ _example(CT::Type{<:Copula}, d) = throw("You need to specify the `_example(CT::T _unbound_params(CT::Type{Copula}, d, θ) = throw("You need to specify the _unbound_param method, that takes the namedtuple returned by `Distributions.params(CT(d, θ))` and trasform it into a raw vector living in R^p.") _rebound_params(CT::Type{Copula}, d, α) = throw("You need to specify the _rebound_param method, that takes the output of _unbound_params and reconstruct the namedtuple that `Distributions.params(C)` would have returned.") function _fit(CT::Type{<:Copula}, U, ::Val{:mle}) - # @info "Running the MLE routine from the generic implementation" + # generic MLE routine (agnostic to vcov/inference) d = size(U,1) - function cop(α) - par = _rebound_params(CT, d, α) - return CT(d, par...) ####### Using a "," here forces the constructor to accept raw values, while a ";" passes named values. Not sure which is best. - end + cop(α) = CT(d, _rebound_params(CT, d, α)...) α₀ = _unbound_params(CT, d, Distributions.params(_example(CT, d))) - loss(C) = -Distributions.loglikelihood(C, U) res = try Optim.optimize(loss ∘ cop, α₀, Optim.LBFGS(); autodiff=:forward) catch err - # @warn "LBFGS with AD failed ($err), retrying with NelderMead" Optim.optimize(loss ∘ cop, α₀, Optim.NelderMead()) end θhat = _rebound_params(CT, d, Optim.minimizer(res)) - return CT(d, θhat...), - (; θ̂=θhat, optimizer = Optim.summary(res), converged = Optim.converged(res), iterations = Optim.iterations(res)) + return CT(d, θhat...), (; θ̂=θhat, + optimizer = Optim.summary(res), + converged = Optim.converged(res), + iterations = Optim.iterations(res)) end """ @@ -117,62 +111,61 @@ This is not intended for direct use by end–users. Use [`Distributions.fit(CopulaModel, ...)`] instead. """ function _fit(CT::Type{<:Copula}, U, method::Union{Val{:itau}, Val{:irho}, Val{:ibeta}}) - # @info "Running the itau/irho/ibeta routine from the generic implementation" + # generic rank-based routine (agnostic to vcov/inference) d = size(U,1) - cop(α) = CT(d, _rebound_params(CT, d, α)...) - α₀ = _unbound_params(CT, d, Distributions.params(_example(CT, d))) - @assert length(α₀) <= d*(d-1)/2 "Cannot use $method since there are too much parameters." - - fun = method isa Val{:itau} ? StatsBase.corkendall : method isa Val{:irho} ? StatsBase.corspearman : corblomqvist - est = fun(U') + α₀ = _unbound_params(CT, d, Distributions.params(_example(CT, d))) + @assert length(α₀) <= d*(d-1)÷2 "Cannot use $method since there are too much parameters." + fun = method isa Val{:itau} ? StatsBase.corkendall : + method isa Val{:irho} ? StatsBase.corspearman : corblomqvist + est = fun(U') loss(C) = sum(abs2, est .- fun(C)) - - res = Optim.optimize(loss ∘ cop, α₀, Optim.NelderMead()) + res = Optim.optimize(loss ∘ cop, α₀, Optim.NelderMead()) θhat = _rebound_params(CT, d, Optim.minimizer(res)) - return CT(d, θhat...), - (; θ̂=θhat, optimizer = Optim.summary(res), converged = Optim.converged(res), iterations = Optim.iterations(res)) + return CT(d, θhat...), (; θ̂=θhat, + optimizer = Optim.summary(res), + converged = Optim.converged(res), + iterations = Optim.iterations(res)) end + + """ Distributions.fit(CT::Type{<:Copula}, U; kwargs...) -> CT -Quick fit: devuelve solo la cópula ajustada (atajo de `Distributions.fit(CopulaModel, CT, U; summaries=false, kwargs...).result`). +Quick fit: devuelve solo la cópula ajustada (atajo de `Distributions.fit(CopulaModel, CT, U; kwargs...)`). """ @inline Distributions.fit(T::Type{<:Union{Copula, SklarDist}}, U, method; kwargs...) = Distributions.fit(T, U; method=method, kwargs...) @inline Distributions.fit(::Type{CopulaModel}, T::Type{<:Copula}, U, method; kwargs...) = Distributions.fit(CopulaModel, T, U; method=method, kwargs...) @inline Distributions.fit(::Type{CopulaModel}, T::Type{<:SklarDist}, U, method; kwargs...) = Distributions.fit(CopulaModel, T, U; copula_method=method, kwargs...) -@inline Distributions.fit(T::Type{<:Union{Copula, SklarDist}}, U; kwargs...) = Distributions.fit(CopulaModel, T, U; summaries=false, kwargs...).result +@inline Distributions.fit(T::Type{<:Union{Copula, SklarDist}}, U; kwargs...) = Distributions.fit(CopulaModel, T, U; quick_fit=true, kwargs...).result """ - _available_fitting_methods(::Type{<:Copula}) + _available_fitting_methods(::Type{<:Copula}, d::Int) -Return the tuple of fitting methods available for a given copula family. +Return the tuple of fitting methods available for a given copula family in a given dimension. This is used internally by [`Distributions.fit`](@ref) to check validity of the `method` argument and to select a default method when `method=:default`. # Example ```julia -_available_fitting_methods(GumbelCopula) +_available_fitting_methods(GumbelCopula, 3) # → (:mle, :itau, :irho, :ibeta) ``` """ -_available_fitting_methods(::Type{<:Copula}) = (:mle, :itau, :irho, :ibeta) -_available_fitting_methods(C::Copula) = _available_fitting_methods(typeof(C)) +_available_fitting_methods(::Type{<:Copula}, d) = (:mle, :itau, :irho, :ibeta) +_available_fitting_methods(C::Copula, d) = _available_fitting_methods(typeof(C), d) -function _find_method(CT, method) - avail = _available_fitting_methods(CT) +function _find_method(CT, d, method) + avail = _available_fitting_methods(CT, d) isempty(avail) && error("No fitting methods available for $CT.") - if method === :default - method = avail[1] - # @info "Choosing default method '$(method)' among $avail..." - elseif method ∉ avail - error("Method '$method' not available for $CT. Available: $(join(avail, ", ")).") - end + method === :default && return avail[1] + method ∉ avail && error("Method '$method' not available for $CT. Available: $(join(avail, ", ")).") return method end + """ - fit(CopulaModel, CT::Type{<:Copula}, U; method=:default, summaries=true, kwargs...) + fit(CopulaModel, CT::Type{<:Copula}, U; method=:default, kwargs...) Fit a copula of type `CT` to pseudo-observations `U`. @@ -182,8 +175,6 @@ Fit a copula of type `CT` to pseudo-observations `U`. margins and copula simultaneously. - `method::Symbol` — fitting method; defaults to the first available one (see [`_available_fitting_methods`](@ref)). -- `summaries::Bool` — whether to compute pairwise summary statistics - (Kendall's τ, Spearman's ρ, Blomqvist's β). - `kwargs...` — additional method-specific keyword arguments (e.g. `pseudo_values=true`, `grid=401` for extreme-value tails, etc.). @@ -201,15 +192,37 @@ println(M) C = fit(GumbelCopula, U; method=:itau) ``` """ -function Distributions.fit(::Type{CopulaModel}, CT::Type{<:Copula}, U; method = :default, summaries=true, kwargs...) +function Distributions.fit(::Type{CopulaModel}, CT::Type{<:Copula}, U; + method=:default, quick_fit=false, derived_measures=true, + vcov=true, vcov_method=nothing, kwargs...) d, n = size(U) - # Choose the fitting method: - method = _find_method(CT, method) - + method = _find_method(CT, d, method) t = @elapsed (rez = _fit(CT, U, Val{method}(); kwargs...)) C, meta = rez + quick_fit && return (result=C,) # as soon as possible. ll = Distributions.loglikelihood(C, U) - md = (; d, n, method, meta..., null_ll=0.0, elapsed_sec=t, _extra_pairwise_stats(U, !summaries)...) + + if vcov && C isa TCopula + vcov = false + @info "Setting vcov = false for TCopula since _beta_inc_inv derivative are not implemented" + end + if vcov && C isa tEVCopula + vcov = false + @info "Setting vcov = false for tEVCopula since _beta_inc_inv derivative are not implemented" + end + if vcov && C isa FGMCopula && method==:mle + vcov = false + @info "Setting vcov = false for FGMCopula with method=:mle since unimplemented right now" + end + + if vcov && haskey(meta, :θ̂) + vcov, vmeta = _vcov(CT, U, meta.θ̂; method=method, override=vcov_method) + meta = (; meta..., vcov, vmeta...) + end + + md = (; d, n, method, meta..., null_ll=0.0, + elapsed_sec=t, derived_measures, U=U) + return CopulaModel(C, n, ll, method; vcov = get(md, :vcov, nothing), converged = get(md, :converged, true), @@ -218,76 +231,241 @@ function Distributions.fit(::Type{CopulaModel}, CT::Type{<:Copula}, U; method = method_details = md) end -_available_fitting_methods(::Type{SklarDist}) = (:ifm, :ecdf) +_available_fitting_methods(::Type{SklarDist}, d) = (:ifm, :ecdf) """ fit(CopulaModel, SklarDist{CT, TplMargins}, X; copula_method=:default, sklar_method=:default, - summaries=true, margins_kwargs=NamedTuple(), copula_kwargs=NamedTuple()) + margins_kwargs=NamedTuple(), copula_kwargs=NamedTuple()) Joint margin and copula adjustment (Sklar approach). `sklar_method ∈ (:ifm, :ecdf)` controls whether parametric CDFs (`:ifm`) or pseudo-observations (`:ecdf`) are used. """ -function Distributions.fit(::Type{CopulaModel},::Type{SklarDist{CT,TplMargins}}, X; copula_method = :default, sklar_method = :default, - summaries = true, margins_kwargs = NamedTuple(), copula_kwargs = NamedTuple()) where - {CT<:Copulas.Copula, TplMargins<:Tuple} - - sklar_method = _find_method(SklarDist, sklar_method) - copula_method = _find_method(CT, copula_method) +function Distributions.fit(::Type{CopulaModel}, ::Type{SklarDist{CT,TplMargins}}, X; quick_fit = false, + copula_method = :default, sklar_method = :default, margins_kwargs = NamedTuple(), + copula_kwargs = NamedTuple(), derived_measures = true, vcov = true, + vcov_method=nothing) where {CT<:Copulas.Copula, TplMargins<:Tuple} + # Get methods: d, n = size(X) - marg_types = TplMargins.parameters - (length(marg_types) == d) || throw(ArgumentError("SklarDist: #marginals $(length(marg_types)) ≠ d=$d")) - m = ntuple(i -> Distributions.fit(marg_types[i], @view X[i, :]; margins_kwargs...), d) - # Only one margins_kwargs while people mught want to pass diferent kwargs for diferent marginals... but OK for the moment. + sklar_method = _find_method(SklarDist, d, sklar_method) + copula_method = _find_method(CT, d, copula_method) - + # Fit marginals: + m = ntuple(i -> Distributions.fit(TplMargins.parameters[i], @view X[i, :]; margins_kwargs...), d) + + # Make pseudo-observations U = similar(X) if sklar_method === :ifm for i in 1:d U[i,:] .= Distributions.cdf.(m[i], X[i,:]) end - elseif sklar_method === :ecdf + else # :ecdf then U .= pseudos(X) end - # Copula fit... with method specific - C, cmeta = _fit(CT, U, Val{copula_method}(); copula_kwargs...) - - S = SklarDist(C, m) - ll = Distributions.loglikelihood(S, X) + # Fit the copula + copM = Distributions.fit(CopulaModel, CT, U; quick_fit=quick_fit, + method=copula_method, derived_measures=derived_measures, + vcov=vcov, vcov_method=vcov_method, copula_kwargs...) + + S = SklarDist(copM.result, m) + quick_fit && return (result=S,) - null_ll = 0.0 - @inbounds for j in axes(X, 2) + # Marginal vcov: compute via θ-Hessian fallback only if vcov=true + Vm = Vector{Union{Nothing, Matrix{Float64}}}(undef, d) + if vcov for i in 1:d - null_ll += Distributions.logpdf.(m[i], X[i, j]) + p = length(Distributions.params(m[i])) + Vm[i] = nothing + Vg = _vcov_margin_generic(m[i], @view X[i, :]) + if Vg !== nothing && ndims(Vg) == 2 && size(Vg) == (p, p) && all(isfinite, Matrix(Vg)) + Vm[i] = Matrix{Float64}(Vg) + end end + else + fill!(Vm, nothing) end - return CopulaModel(S, n, ll, copula_method; - vcov = get(cmeta, :vcov, nothing), # vcov of the copula (if you compute it) - converged = get(cmeta, :converged, true), - iterations = get(cmeta, :iterations, 0), - elapsed_sec = get(cmeta, :elapsed_sec, NaN), - method_details = (; cmeta..., null_ll, sklar_method, margins = map(typeof, m), - has_summaries = summaries, d=d, n=n, _extra_pairwise_stats(U, !summaries)...)) + # Copula Vcov: + Vfull = StatsBase.vcov(copM) + + # total and null loglikelihood + ll = Distributions.loglikelihood(S, X) + null_ll = Distributions.loglikelihood(SklarDist(IndependentCopula(d), m), X) + return CopulaModel( + S, n, ll, copula_method; + vcov = Vfull, + converged = copM.converged, + iterations = copM.iterations, + elapsed_sec = copM.elapsed_sec, + method_details = (; + copM.method_details..., + vcov_copula = Vfull, + vcov_margins = Vm, + null_ll, + sklar_method, + margins = map(typeof, m), + d = d, n = n, + elapsed_sec = copM.elapsed_sec, + derived_measures, + # no raw X_margins stored to keep model lightweight + ) + ) +end +####### vcov functions... + +# objetive this functions: try get the vcov from marginals... +function _vcov_margin_generic(d::TD, x::AbstractVector) where {TD<:Distributions.UnivariateDistribution} + # Compute observed information directly on the parameter (θ) scale at current params. + p_nt = Distributions.params(d) + θ0 = p_nt isa NamedTuple ? Float64.(collect(values(p_nt))) : Float64.(collect(p_nt)) + + # Find the distribution constructor: + MyDist = TD.name.wrapper + # Observed information = - Hessian of log-likelihood at θ0 + H = ForwardDiff.hessian(θ -> Distributions.loglikelihood(MyDist(θ...), x), θ0) + # Small ridge for numerical stability + Vθ = inv(-H + 1e-8 .* LinearAlgebra.I) + Vθ = (Vθ + Vθ')/2 + return LinearAlgebra.Symmetric(Matrix{Float64}(Vθ)) +end + +function _vcov(CT::Type{<:Copula}, U::AbstractMatrix, θ::NamedTuple; method::Symbol, override::Union{Symbol,Nothing}=nothing) + vcovm = !isnothing(override) ? override : + method === :mle ? :hessian : + method === :itau ? :godambe : + method === :irho ? :godambe : + method === :ibeta ? :godambe : + method === :iupper ? :godambe : :jackknife + + if vcovm ∉ (:hessian, :godambe, :godambe_pairwise) + return _vcov(CT, U, θ, Val{vcovm}(), Val{method}()) # you can write new methods through this interface, as the jacknife method below. + end + + d, n = size(U) + α = _unbound_params(CT, d, θ) + cop(α) = CT(d, _rebound_params(CT,d,α)...) + _upper_triangle(A) = [A[idx] for idx in CartesianIndices(A) if idx[1] < idx[2]] + + if vcovm === :hessian + ℓ(α) = Distributions.loglikelihood(cop(α), U) + H = ForwardDiff.hessian(ℓ, α) + Iα = .-H + if any(!isfinite, Iα) + @warn "vcov(:hessian): non-finite Fisher information; falling back" Iα + return _vcov(CT, U, θ, Val{:bootstrap}(), Val{method}()) + end + Iα = (Iα + Iα')/2 + p = size(Iα, 1) + I_p = Matrix{Float64}(LinearAlgebra.I, p, p) + λ = 1e-8 + Vα = nothing + @inbounds for _ in 1:8 + A = Iα + λ*I_p + ch = LinearAlgebra.cholesky(LinearAlgebra.Symmetric(A); check=false) + if ch.info == 0 # is p.d. + Vα = ch \ I_p # It is equivalent to inv(A), but stable, we could use pinv but I don't know how optimal it is... + break + end + λ *= 10 + end + if Vα === nothing || any(!isfinite, Vα) + @warn "vcov(:hessian): failed to stabilize Fisher; falling back" λ_final=λ + return _vcov(CT, U, θ, Val{:bootstrap}(), Val{method}()) + end + else + + pairwise_φ = method isa Val{:itau} ? StatsBase.corkendall : + method isa Val{:irho} ? StatsBase.corspearman : + method isa Val{:ibeta} ? corblomqvist : coruppertail + φ = method isa Val{:itau} ? τ : + method isa Val{:irho} ? ρ : + method isa Val{:ibeta} ? β : λᵤ + if vcovm === :godambe + q = 1 + ψ = α -> [φ(cop(α))] + ψ_emp = u ->[φ(u)] + else # then :godambe_pairwise + q = d*(d-1) ÷ 2 + ψ = α -> _upper_triangle(pairwise_φ(cop(α))) + ψ_emp = U -> _upper_triangle(pairwise_φ(U')) + end + + Dα = ForwardDiff.jacobian(ψ, α) + Dα = reshape(Dα, q, length(α)) + + # Ω bootstrap + B = clamp(Int(floor(sqrt(n))), 10, 200) + M = Matrix{Float64}(undef, B, q) + idx = Vector{Int}(undef, n) + rng = Random.default_rng() + + @inbounds for b in 1:B + for i in 1:n + idx[i] = rand(rng, 1:n) + end + Mb = @view U[:, idx] + M[b, :] = ψ_emp(Mb) + end + Ω = n * Statistics.cov(M; corrected=true) + DtD = Dα' * Dα + ϵI = 1e-10LinearAlgebra.I + Vα = inv(DtD + ϵI) * (Dα' * Ω * Dα) * inv(DtD + ϵI) / n + end + # Delta method Jacobian from α (unbounded) to θ (original params), flattened + J = ForwardDiff.jacobian(αv -> _flatten_params(_rebound_params(CT, d, αv))[2], α) + Vθ = J * Vα * J' + # <<<<<<< KEY CHANGE >>>>>>>>> + # Check for finiteness BEFORE calling eigen. + # If the matrix already contains Inf/NaN, the estimate was unstable. + # We activate the fallback to jackknife immediately. + if !all(isfinite, Vθ) + return _vcov(CT, U, θ, Val{:bootstrap}(), Val{method}()) + end + Vθ = (Vθ + Vθ')/2 + λ, Q = LinearAlgebra.eigen(Matrix(Vθ)) + λ_reg = map(x -> max(x, 1e-12), λ) + Vθ = LinearAlgebra.Symmetric(Q * LinearAlgebra.Diagonal(λ_reg) * Q') + # This final check is now a double security. + any(!isfinite, Matrix(Vθ)) && return _vcov(CT, U, θ, Val{:jackknife}(), Val{method}()) + return Vθ, (; vcov_method=vcovm) end +function _vcov(CT::Type{<:Copula}, U::AbstractMatrix, θ::NamedTuple, ::Val{:jackknife}, ::Val{method}) where {method} + d, n = size(U) + θminus = zeros(n, length(θ)) + idx = Vector{Int}(undef, n-1) -function _uppertriangle_stats(mat) - # compute the mean and std of the upper triangular part of the matrix (diagonal excluded) - gen = [mat[idx] for idx in CartesianIndices(mat) if idx[1] < idx[2]] - return Statistics.mean(gen), length(gen) == 1 ? zero(gen[1]) : Statistics.std(gen), minimum(gen), maximum(gen) + for j in 1:n + k = 1; for t in 1:n; if t == j; continue; end; idx[k] = t; k += 1; end + Uminus = @view U[:, idx] + θminus[j, :] .= _flatten_params(_fit(CT, Uminus, Val{method}())[2].θ̂)[2] + end + + θbar = vec(Statistics.mean(θminus, dims=1)) + V = (n-1)/n * (LinearAlgebra.transpose(θminus .- θbar') * (θminus .- θbar')) ./ (n-1) + return V, (; vcov_method=:jackknife_obs) end -function _extra_pairwise_stats(U::AbstractMatrix, bypass::Bool) - bypass && return (;) - τm, τs, τmin, τmax = _uppertriangle_stats(StatsBase.corkendall(U')) - ρm, ρs, ρmin, ρmax = _uppertriangle_stats(StatsBase.corspearman(U')) - βm, βs, βmin, βmax = _uppertriangle_stats(corblomqvist(U')) - γm, γs, γmin, γmax = _uppertriangle_stats(corgini(U')) - return (; tau_mean=τm, tau_sd=τs, tau_min=τmin, tau_max=τmax, - rho_mean=ρm, rho_sd=ρs, rho_min=ρmin, rho_max=ρmax, - beta_mean=βm, beta_sd=βs, beta_min=βmin, beta_max=βmax, - gamma_mean=γm, gamma_sd=γs, gamma_min=γmin, gamma_max=γmax) +# Fallback fast: bootstrap refit (B < n) +function _vcov(CT::Type{<:Copula}, U::AbstractMatrix, θ::NamedTuple, ::Val{:bootstrap}, ::Val{method}) where {method} + d, n = size(U) + p = length(_flatten_params(θ)[2]) + B = clamp(Int(floor(sqrt(n))), 10, 200) + Θ = Matrix{Float64}(undef, B, p) + idx = Vector{Int}(undef, n) + rng = Random.default_rng() + @inbounds for b in 1:B + for i in 1:n + idx[i] = rand(rng, 1:n) + end + θminus = @view U[:, idx] + Θ[b, :] .= _flatten_params(_fit(CT, θminus, Val{method}())[2].θ̂)[2] + end + V = Statistics.cov(Θ; corrected=true) + return V, (; vcov_method=:bootstrap, B=B) end + + +##### StatsBase interfaces. """ nobs(M::CopulaModel) -> Int @@ -302,7 +480,7 @@ StatsBase.isfitted(::CopulaModel) = true Deviation of the fitted model (-2 * loglikelihood). """ StatsBase.deviance(M::CopulaModel) = -2 * M.ll -StatsBase.dof(M::CopulaModel) = StatsBase.dof(M.result) +StatsBase.dof(M::CopulaModel) = length(StatsBase.coef(M)) """ _copula_of(M::CopulaModel) @@ -316,15 +494,63 @@ _copula_of(M::CopulaModel) = M.result isa SklarDist ? M.result.C : M.result Vector with the estimated parameters of the copula. """ -StatsBase.coef(M::CopulaModel) = collect(values(Distributions.params(_copula_of(M)))) # why ? params of the marginals should also be taken into account. +StatsBase.coef(M::CopulaModel) = _flatten_params(M.method_details.θ̂)[2] + """ - coefnames(M::CopulaModel) -> Vector{String} +coefnames(M::CopulaModel) -> Vector{String} Names of the estimated copula parameters. """ -StatsBase.coefnames(M::CopulaModel) = string.(keys(Distributions.params(_copula_of(M)))) -StatsBase.dof(C::Copulas.Copula) = length(values(Distributions.params(C))) +StatsBase.coefnames(M::CopulaModel) = _flatten_params(M.method_details.θ̂)[1] + + +# Flatten a NamedTuple of parameters into a Vector{Float64}, +# consistent with the generic linearization used in show(). +function _flatten_params(params_nt::NamedTuple) + nm = String[] + θ = Any[] + sidx = ["₁", "₂", "₃", "₄", "₅", "₆", "₇", "₈", "₉"] + for (k, v) in pairs(params_nt) + if v isa Number + push!(nm, String(k)) + push!(θ, v) + elseif v isa AbstractMatrix + if maximum(size(v)) > 9 + @inbounds for j in 2:size(v,2), i in 1:j-1 + push!(nm, "$(k)_$(i)_$(j)") + push!(θ, v[i,j]) + end + else + @inbounds for j in 2:size(v,2), i in 1:j-1 + push!(nm, "$(k)$(sidx[i])$(sidx[j])") + push!(θ, v[i,j]) + end + end + elseif v isa AbstractVector + if length(v) > 9 + for i in eachindex(v) + push!(nm, "$(k)_$(i)") + push!(θ, v[i]) + end + else + for i in eachindex(v) + push!(nm, "$(k)$(sidx[i])") + push!(θ, v[i]) + end + end + else + try + push!(nm, String(k)) + push!(θ, v) + catch + end + end + end + return nm, [x for x in promote(θ...)] +end + + #(optional vcov) and vcov its very important... for inference """ @@ -378,4 +604,42 @@ function StatsBase.nullloglikelihood(M::CopulaModel) throw(ArgumentError("nullloglikelihood not available in method_details.")) end end -StatsBase.nulldeviance(M::CopulaModel) = -2 * StatsBase.nullloglikelihood(M) \ No newline at end of file +StatsBase.nulldeviance(M::CopulaModel) = -2 * StatsBase.nullloglikelihood(M) +""" + StatsBase.residuals(M::CopulaModel; transform=:uniform) + +Compute Rosenblatt residuals of a fitted copula model. + +# Arguments +- `transform = :uniform` → returns Rosenblatt residuals in [0,1]. +- `transform = :normal` → applies Φ⁻¹ to obtain pseudo-normal residuals. + +# Notes +The residuals should be i.i.d. Uniform(0,1) under a correctly specified model. +""" +StatsBase.residuals(M::CopulaModel; transform=:uniform) = begin + haskey(M.method_details, :U) || throw(ArgumentError("method_details must contain pseudo-observations :U")) + U = M.method_details[:U] + R = rosenblatt(_copula_of(M), U) + return transform === :normal ? Distributions.quantile.(Distributions.Normal(), R) : R +end +""" + StatsBase.predict(M::CopulaModel; newdata=nothing, what=:cdf, nsim=0) + +Predict or simulate from a fitted copula model. + +# Keyword arguments +- `newdata` — matrix of points in [0,1]^d at which to evaluate (`what=:cdf` or `:pdf`). +- `what` — one of `:cdf`, `:pdf`, or `:simulate`. +- `nsim` — number of samples to simulate if `what=:simulate`. + +# Returns +- Vector or matrix of predicted probabilities/densities, or simulated samples. +""" +function StatsBase.predict(M::CopulaModel; newdata=nothing, what=:cdf, nsim=0) + C = _copula_of(M) + return what === :simulate ? rand(C, nsim > 0 ? nsim : M.n) : + what === :cdf ? (newdata === nothing ? throw(ArgumentError("`newdata` required for `:cdf`")) : Distributions.cdf(C, newdata)) : + what === :pdf ? (newdata === nothing ? throw(ArgumentError("`newdata` required for `:pdf`")) : Distributions.pdf(C, newdata)) : + throw(ArgumentError("`what` must be one of :simulate, :cdf, or :pdf. Got `$what`.")) +end diff --git a/src/Generator.jl b/src/Generator.jl index 76b402351..021d7529f 100644 --- a/src/Generator.jl +++ b/src/Generator.jl @@ -24,9 +24,9 @@ More methods can be implemented for performance, althouhg there are implement de * `ϕ⁻¹( G::Generator, x)` gives the inverse function of the generator. * `ϕ⁽¹⁾(G::Generator, t)` gives the first derivative of the generator -* `ϕ⁽ᵏ⁾(G::Generator, ::Val{k}, t) where k` gives the kth derivative of the generator +* `ϕ⁽ᵏ⁾(G::Generator, k::Int, t)` gives the kth derivative of the generator * `ϕ⁻¹⁽¹⁾(G::Generator, t)` gives the first derivative of the inverse generator. -* `williamson_dist(G::Generator, ::Val{d}) where d` gives the Wiliamson d-transform of the generator, see [WilliamsonTransforms.jl](https://github.com/lrnv/WilliamsonTransforms.jl). +* `williamson_dist(G::Generator, d::Int)` gives the Wiliamson d-transform of the generator. References: * [mcneil2009](@cite) McNeil, A. J., & Nešlehová, J. (2009). Multivariate Archimedean copulas, d-monotone functions and ℓ 1-norm symmetric distributions. @@ -44,9 +44,9 @@ max_monotony(G::Generator) = throw("This generator does not have a defined max m ϕ⁻¹( G::Generator, x) = Roots.find_zero(t -> ϕ(G,t) - x, (0.0, Inf)) ϕ⁽¹⁾(G::Generator, t) = ForwardDiff.derivative(x -> ϕ(G,x), t) ϕ⁻¹⁽¹⁾(G::Generator, t) = ForwardDiff.derivative(x -> ϕ⁻¹(G, x), t) -ϕ⁽ᵏ⁾(G::Generator, ::Val{k}, t) where k = WilliamsonTransforms.taylor(ϕ(G), t, Val{k}())[end] * factorial(k) -ϕ⁽ᵏ⁾⁻¹(G::Generator, ::Val{k}, t; start_at=t) where {k} = Roots.find_zero(x -> ϕ⁽ᵏ⁾(G, Val{k}(), x) - t, start_at) -williamson_dist(G::Generator, ::Val{d}) where d = WilliamsonTransforms.𝒲₋₁(ϕ(G), Val{d}()) +ϕ⁽ᵏ⁾(G::Generator, k::Int, t) = taylor(ϕ(G), t, k)[end] * factorial(k) +ϕ⁽ᵏ⁾⁻¹(G::Generator, k::Int, t; start_at=t) = Roots.find_zero(x -> ϕ⁽ᵏ⁾(G, k, x) - t, start_at) +williamson_dist(G::Generator, d::Int) = 𝒲₋₁(ϕ(G), d) # TODO: Move the \phi^(1) to defer to \phi^(k=1), and implement \phi(k=1) in generators instead of \phi^(1) @@ -104,7 +104,7 @@ abstract type AbstractFrailtyGenerator<:Generator end frailty(::AbstractFrailtyGenerator) = throw("This generator was not defined as it should, you should provide its frailty") max_monotony(::AbstractFrailtyGenerator) = Inf ϕ(G::AbstractFrailtyGenerator, t) = Distributions.mgf(frailty(G), -t) -williamson_dist(G::AbstractFrailtyGenerator, ::Val{d}) where d = WilliamsonFromFrailty(frailty(G), Val{d}()) +williamson_dist(G::AbstractFrailtyGenerator, d::Int) = WilliamsonFromFrailty(frailty(G), d) struct FrailtyGenerator{TF}<:AbstractFrailtyGenerator F::TF @@ -138,7 +138,7 @@ Constructor WilliamsonGenerator(atoms::AbstractVector, weights::AbstractVector, d) i𝒲(atoms::AbstractVector, weights::AbstractVector, d) -The `WilliamsonGenerator` (alias `i𝒲`) allows to construct a d-monotonous archimedean generator from a positive random variable `X::Distributions.UnivariateDistribution`. The transformation, which is called the inverse Williamson transformation, is implemented in [WilliamsonTransforms.jl](https://www.github.com/lrnv/WilliamsonTransforms.jl). +The `WilliamsonGenerator` (alias `i𝒲`) allows to construct a d-monotonous archimedean generator from a positive random variable `X::Distributions.UnivariateDistribution`. The transformation, which is called the inverse Williamson transformation, is implemented fully generically in the package. For a univariate non-negative random variable ``X``, with cumulative distribution function ``F`` and an integer ``d\\ge 2``, the Williamson-d-transform of ``X`` is the real function supported on ``[0,\\infty[`` given by: @@ -165,8 +165,7 @@ Special case (finite-support discrete X) - If `X isa Distributions.DiscreteUnivariateDistribution` and `support(X)` is finite, or if you pass directly atoms and weights to the constructor, the produced generator is piecewise-polynomial `ϕ(t) = ∑_j w_j · (1 − t/r_j)_+^(d−1)` matching the Williamson transform of a discrete radial law. It has specialized methods. - For infinite-support discrete distributions or when the support is not accessible as a finite - iterable, the standard `WilliamsonGenerator` is constructed and will defer to - `WilliamsonTransforms.jl`. + iterable, the standard `WilliamsonGenerator` is constructed. References: * [williamson1955multiply](@cite) Williamson, R. E. (1956). Multiply monotone functions and their Laplace transforms. Duke Math. J. 23 189–207. MR0077581 @@ -174,19 +173,19 @@ References: """ struct WilliamsonGenerator{d, TX} <: Generator X::TX - function WilliamsonGenerator(X, D::Val{d}) where d + function WilliamsonGenerator(X, d::Int) if X isa Distributions.DiscreteUnivariateDistribution # If X has finite, positive support, build an empirical generator sp = collect(Distributions.support(X)) ws = Distributions.pdf.(X, sp) keep = ws .> 0 - return WilliamsonGenerator(sp[keep], ws[keep], D) + return WilliamsonGenerator(sp[keep], ws[keep], d) end # else: fall back to a regular Williamson generator # check that X is indeed a positively supported random variable... return new{d, typeof(X)}(X) end - function WilliamsonGenerator(r::AbstractVector, w::AbstractVector, ::Val{d}) where d + function WilliamsonGenerator(r::AbstractVector, w::AbstractVector, d::Int) length(r) == length(w) || throw(ArgumentError("length(r) != length(w)")) !isempty(r) || throw(ArgumentError("no atoms given")) all(isfinite, r) && all(>=(0), r) || throw(ArgumentError("atoms must be positive and finite")) @@ -201,15 +200,14 @@ struct WilliamsonGenerator{d, TX} <: Generator end end const i𝒲 = WilliamsonGenerator -WilliamsonGenerator(X, d::Int) = WilliamsonGenerator(X, Val(d)) -WilliamsonGenerator(r, w, d::Int) = WilliamsonGenerator(r, w, Val(d)) Distributions.params(G::WilliamsonGenerator) = (G.X,) max_monotony(::WilliamsonGenerator{d, TX}) where {d, TX} = d -williamson_dist(G::WilliamsonGenerator{d, TX}, ::Val{d}) where {d, TX} = G.X # if its the right dim. -ϕ(G::WilliamsonGenerator{d, TX}, t) where {d, TX} = WilliamsonTransforms.𝒲(G.X, Val{d}())(t) +ϕ(G::WilliamsonGenerator{d, TX}, t) where {d, TX} = 𝒲(G.X, d)(t) +williamson_dist(G::WilliamsonGenerator{D, TX}, d::Int) where {D, TX} = d==D ? G.X : 𝒲₋₁(ϕ(G), d) # if its the right dim. + # TODO: The following method for Kendall's tau is currently faulty and produces incorrect results. -# τ(G::WilliamsonGenerator) = 4*Distributions.expectation(Base.Fix1(ϕ, G), Copulas.williamson_dist(G, Val(2)))-1 # McNeil & Neshelova 2009 +# τ(G::WilliamsonGenerator) = 4*Distributions.expectation(Base.Fix1(ϕ, G), Copulas.williamson_dist(G, 2))-1 # McNeil & Neshelova 2009 # Investigate the correct formula for Kendall's tau for WilliamsonGenerator. Check if the expectation is being computed with respect to the correct measure and if the implementation matches the reference (McNeil & Nešlehová 2009). Fix this method when the correct approach is established. @@ -229,9 +227,6 @@ Returns - `Vector{Float64}` of length `n` with values in `(0,1)`. """ function _kendall_sample(u::AbstractMatrix) - - - d, n = size(u) # Apply ordinal ranks per margin to remove ties consistently with `pseudos` R = Matrix{Int}(undef, d, n) @@ -302,17 +297,27 @@ function EmpiricalGenerator(u::AbstractMatrix) eps = 1e-14 a, b = 0.0, max(r[k+1] - eps, 0.0) ga, gb = gk(a), gk(b) + # Ensure a valid bracket: gk is nonincreasing in y, target is x[k] + # Expand upper bound slightly if needed to include the target if !(ga + 1e-12 >= x[k] >= gb - 1e-12) + # Try with full [0, r[k+1]] first a, b = 0.0, r[k+1] + ga, gb = gk(a), gk(b) + end + if !(ga >= x[k] >= gb) + # As a last resort, project x[k] into [gb, ga] + xk = clamp(x[k], gb, ga) + r[k] = Roots.find_zero(y -> gk(y) - xk, (a, b); bisection=true) + else + r[k] = Roots.find_zero(y -> gk(y) - x[k], (a, b); bisection=true) end - r[k] = Roots.find_zero(y -> gk(y) - x[k], (a, b); verbose=false) r[k] = clamp(r[k], 0.0, r[k+1] - eps) end - return WilliamsonGenerator(r, w, Val(d)) + return WilliamsonGenerator(r, w, d) end # Optimized methods for discrete nonparametric Williamson generators (covers EmpiricalGenerator) -function ϕ(G::WilliamsonGenerator{d, TX}, t::Real) where {d, TX<:Distributions.DiscreteNonParametric} +function ϕ(G::WilliamsonGenerator{d, TX}, t) where {d, TX<:Distributions.DiscreteNonParametric} r = Distributions.support(G.X) w = Distributions.probs(G.X) Tt = promote_type(eltype(r), typeof(t)) @@ -327,7 +332,7 @@ function ϕ(G::WilliamsonGenerator{d, TX}, t::Real) where {d, TX<:Distributions. return S end -function ϕ⁽¹⁾(G::WilliamsonGenerator{d, TX}, t::Real) where {d, TX<:Distributions.DiscreteNonParametric} +function ϕ⁽¹⁾(G::WilliamsonGenerator{d, TX}, t) where {d, TX<:Distributions.DiscreteNonParametric} r = Distributions.support(G.X) w = Distributions.probs(G.X) Tt = promote_type(eltype(r), typeof(t)) @@ -342,7 +347,7 @@ function ϕ⁽¹⁾(G::WilliamsonGenerator{d, TX}, t::Real) where {d, TX<:Distri return - (d-1) * S end -function ϕ⁽ᵏ⁾(G::WilliamsonGenerator{d, TX}, ::Val{k}, t::Real) where {d, k, TX<:Distributions.DiscreteNonParametric} +function ϕ⁽ᵏ⁾(G::WilliamsonGenerator{d, TX}, k::Int, t) where {d, TX<:Distributions.DiscreteNonParametric} r = Distributions.support(G.X) w = Distributions.probs(G.X) Tt = promote_type(eltype(r), typeof(t)) @@ -359,7 +364,7 @@ function ϕ⁽ᵏ⁾(G::WilliamsonGenerator{d, TX}, ::Val{k}, t::Real) where {d, return S * (isodd(k) ? -1 : 1) * Base.factorial(d - 1) / Base.factorial(d - 1 - k) end -function ϕ⁻¹(G::WilliamsonGenerator{d, TX}, x::Real) where {d, TX<:Distributions.DiscreteNonParametric} +function ϕ⁻¹(G::WilliamsonGenerator{d, TX}, x) where {d, TX<:Distributions.DiscreteNonParametric} r = Distributions.support(G.X) Tx = promote_type(eltype(r), typeof(x)) x >= 1 && return zero(Tx) @@ -377,22 +382,22 @@ function ϕ⁻¹(G::WilliamsonGenerator{d, TX}, x::Real) where {d, TX<:Distribut return Tx(r[end]) end -function ϕ⁽ᵏ⁾⁻¹(G::WilliamsonGenerator{d, TX}, ::Val{p}, y; start_at=nothing) where {d, p, TX<:Distributions.DiscreteNonParametric} +function ϕ⁽ᵏ⁾⁻¹(G::WilliamsonGenerator{d, TX}, p::Int, y; start_at=nothing) where {d, TX<:Distributions.DiscreteNonParametric} r = Distributions.support(G.X) Ty = promote_type(eltype(r), typeof(y)) p == 0 && return ϕ⁻¹(G, y) sign = iseven(p) ? 1 : -1 s_y = sign*y s_y <= 0 && return Ty(r[end]) - s_y >= sign*ϕ⁽ᵏ⁾(G, Val{p}(), 0) && return Ty(0) + s_y >= sign*ϕ⁽ᵏ⁾(G, p, 0) && return Ty(0) for k in eachindex(r) - ϕp_rk = sign * ϕ⁽ᵏ⁾(G, Val{p}(), r[k]) + ϕp_rk = sign * ϕ⁽ᵏ⁾(G, p, r[k]) if s_y > ϕp_rk - if s_y < sign * ϕ⁽ᵏ⁾(G, Val{p}(), prevfloat(r[k])) + if s_y < sign * ϕ⁽ᵏ⁾(G, p, prevfloat(r[k])) return Ty(prevfloat(r[k])) end a = (k==1 ? 0 : r[k-1]); b = r[k] - return Ty(Roots.find_zero(t -> ϕ⁽ᵏ⁾(G, Val{p}(), t) - y, (a, b); bisection=true)) + return Ty(Roots.find_zero(t -> ϕ⁽ᵏ⁾(G, p, t) - y, (a, b); bisection=true)) end end return Ty(r[end]) @@ -423,15 +428,15 @@ struct TiltedGenerator{TG, T, p} <: Generator G::TG sJ::T den::T - function TiltedGenerator(G::Generator, ::Val{p}, sJ::T) where {p,T<:Real} - den = ϕ⁽ᵏ⁾(G, Val{p}(), sJ) + function TiltedGenerator(G::Generator, p::Int, sJ::T) where {T<:Real} + den = ϕ⁽ᵏ⁾(G, p, sJ) return new{typeof(G), T, p}(G, sJ, den) end end max_monotony(G::TiltedGenerator{TG, T, p}) where {TG, T, p} = max(0, max_monotony(G.G) - p) -ϕ(G::TiltedGenerator{TG, T, p}, t::Real) where {TG, T, p} = ϕ⁽ᵏ⁾(G.G, Val{p}(), G.sJ + t) / G.den -ϕ⁻¹(G::TiltedGenerator{TG, T, p}, x::Real) where {TG, T, p} = ϕ⁽ᵏ⁾⁻¹(G.G, Val{p}(), x * G.den; start_at = G.sJ) - G.sJ -ϕ⁽ᵏ⁾(G::TiltedGenerator{TG, T, p}, ::Val{k}, t::Real) where {TG, T, p, k} = ϕ⁽ᵏ⁾(G.G, Val{k + p}(), G.sJ + t) / G.den -ϕ⁽ᵏ⁾⁻¹(G::TiltedGenerator{TG, T, p}, ::Val{k}, y::Real; start_at = G.sJ) where {TG, T, p, k} = ϕ⁽ᵏ⁾⁻¹(G.G, Val{k + p}(), y * G.den; start_at = start_at) - G.sJ -ϕ⁽¹⁾(G::TiltedGenerator{TG, T, p}, t) where {TG, T, p} = ϕ⁽ᵏ⁾(G, Val{1}(), t) +ϕ(G::TiltedGenerator{TG, T, p}, t) where {TG, T, p} = ϕ⁽ᵏ⁾(G.G, p, G.sJ + t) / G.den +ϕ⁻¹(G::TiltedGenerator{TG, T, p}, x) where {TG, T, p} = ϕ⁽ᵏ⁾⁻¹(G.G, p, x * G.den; start_at = G.sJ) - G.sJ +ϕ⁽ᵏ⁾(G::TiltedGenerator{TG, T, p}, k::Int, t) where {TG, T, p} = ϕ⁽ᵏ⁾(G.G, k + p, G.sJ + t) / G.den +ϕ⁽ᵏ⁾⁻¹(G::TiltedGenerator{TG, T, p}, k::Int, y; start_at = G.sJ) where {TG, T, p} = ϕ⁽ᵏ⁾⁻¹(G.G, k + p, y * G.den; start_at = start_at) - G.sJ +ϕ⁽¹⁾(G::TiltedGenerator{TG, T, p}, t) where {TG, T, p} = ϕ⁽ᵏ⁾(G, 1, t) Distributions.params(G::TiltedGenerator) = (Distributions.params(G.G)..., sJ = G.sJ) \ No newline at end of file diff --git a/src/Generator/AMHGenerator.jl b/src/Generator/AMHGenerator.jl index 5842355f7..00a5d166b 100644 --- a/src/Generator/AMHGenerator.jl +++ b/src/Generator/AMHGenerator.jl @@ -93,9 +93,9 @@ end ϕ( G::AMHGenerator, t) = (1-G.θ)/(exp(t)-G.θ) ϕ⁻¹(G::AMHGenerator, t) = log(G.θ + (1-G.θ)/t) ϕ⁽¹⁾(G::AMHGenerator, t) = -((1-G.θ) * exp(t)) / (exp(t) - G.θ)^2 -ϕ⁽ᵏ⁾(G::AMHGenerator, ::Val{k}, t) where k = (-1)^k * (1 - G.θ) / G.θ * PolyLog.reli(-k, G.θ * exp(-t)) +ϕ⁽ᵏ⁾(G::AMHGenerator, k::Int, t) = (-1)^k * (1 - G.θ) / G.θ * PolyLog.reli(-k, G.θ * exp(-t)) ϕ⁻¹⁽¹⁾(G::AMHGenerator, t) = (G.θ - 1) / (G.θ * (t - 1) * t + t) -williamson_dist(G::AMHGenerator, ::Val{d}) where d = G.θ >= 0 ? WilliamsonFromFrailty(1 + Distributions.Geometric(1-G.θ),Val{d}()) : WilliamsonTransforms.𝒲₋₁(t -> ϕ(G,t),Val{d}()) +williamson_dist(G::AMHGenerator, d::Int) = G.θ >= 0 ? WilliamsonFromFrailty(1 + Distributions.Geometric(1-G.θ),d) : 𝒲₋₁(ϕ(G),d) frailty(G::AMHGenerator) = G.θ >= 0 ? Distributions.Geometric(1-G.θ) : throw("No frailty exists for AMH when θ < 0") function _amh_tau(θ) if abs(θ) < 0.01 diff --git a/src/Generator/BB10Generator.jl b/src/Generator/BB10Generator.jl index 1c3530c88..b6f7790df 100644 --- a/src/Generator/BB10Generator.jl +++ b/src/Generator/BB10Generator.jl @@ -41,7 +41,7 @@ _rebound_params(::Type{<:BB10Generator}, d, α) = (; θ = exp(α[1]), δ = 1 / ( ϕ(G::BB10Generator, s) = begin θ, δ = G.θ, G.δ - exp( (1/θ) * (log1p(-δ) - log(expm1(s) + (1 - δ))) ) + exp( (1/θ) * (log1p(-δ) - log(exp(s) - δ))) end ϕ⁻¹(G::BB10Generator, t) = begin @@ -55,12 +55,26 @@ function ϕ⁽¹⁾(G::BB10Generator, s) ψ = ϕ(G, s) return -(1/θ) * es/(es - δ) * ψ end -function ϕ⁽ᵏ⁾(G::BB10Generator, ::Val{2}, s) - θ, δ = G.θ, G.δ - es = exp(s) - ψ = ϕ(G, s) # ya usa forma estable con log1p/expm1 - den = es - δ - return ψ * (es / (den^2)) * (es/θ^2 + δ/θ) +function ϕ⁽ᵏ⁾(G::BB10Generator, k::Int, s::Real) + if k==2 + θ, δ = G.θ, G.δ + es = exp(s) + ψ = ϕ(G, s) # stable with log1p/expm1 + den = es - δ + return ψ * (es / (den^2)) * (es/θ^2 + δ/θ) + end + return @invoke ϕ⁽ᵏ⁾(G::Generator, k, s) + # b = inv(G.θ) + # k == 0 && return ϕ(G, s) + # T = typeof(b) + # A = zeros(T, k + 1, k + 1) + # A[1, 1] = -b + # for i in 2:k, j in 1:i + # A[i, j] = (j ≤ i-1 ? j * A[i-1, j] : 0.0) - (j > 1 ? (b + j - 1) * A[i-1, j-1] : 0.0) + # end + # es = exp(s) + # acc = sum(A[k, j] * es^j * (es - G.δ)^(-b - j) for j in 1:k) + # return (1 - G.δ)^b * acc end ϕ⁻¹⁽¹⁾(G::BB10Generator, t) = begin @@ -79,8 +93,8 @@ function _cdf(C::ArchimedeanCopula{2,G}, u) where {G<:BB10Generator} end # --- log-density -function Distributions._logpdf(C::ArchimedeanCopula{2,G}, u) where {G<:BB10Generator} - T = promote_type(Float64, eltype(u)) +function Distributions._logpdf(C::ArchimedeanCopula{2,BB10Generator{TF}}, u) where {TF} + T = promote_type(TF, eltype(u)) (0.0 < u[1] ≤ 1.0 && 0.0 < u[2] ≤ 1.0) || return T(-Inf) θ, δ = C.G.θ, C.G.δ diff --git a/src/Generator/BB1Generator.jl b/src/Generator/BB1Generator.jl index 6f488510d..d7c8b80e9 100644 --- a/src/Generator/BB1Generator.jl +++ b/src/Generator/BB1Generator.jl @@ -47,16 +47,34 @@ function ϕ⁽¹⁾(G::BB1Generator, s) a, b, ls = inv(G.δ), inv(G.θ), log(s) return -(a*b) * exp((a-1)*ls - (b+1)*log1p(exp(a*ls))) end -function ϕ⁽ᵏ⁾(G::BB1Generator, ::Val{2}, s) # only d=2 case, other cases are not implemented. - a, b, ls = inv(G.δ), inv(G.θ), log(s) - spa = exp(a*ls) - return (a*b) * exp((a-2)*ls) * exp(-(b+2)*log1p(exp(a*ls))) * ( (1 + a*b)*spa - (a - 1) ) -end -function ϕ⁻¹⁽¹⁾(G::BB1Generator, t) - lt = log(t) - return -G.δ*G.θ * exp(-lt*(G.θ+1)) * exp((G.δ-1)*log(expm1(-lt*G.θ))) +function ϕ⁽ᵏ⁾(G::BB1Generator, k::Int, s::Real; tol::Float64=1e-9, maxiter::Int=10_000, miniter::Int=5) + if k==2 + a, b, ls = inv(G.δ), inv(G.θ), log(s) + spa = exp(a*ls) + return (a*b) * exp((a-2)*ls) * exp(-(b+2)*log1p(exp(a*ls))) * ( (1 + a*b)*spa - (a - 1) ) + end + return @invoke ϕ⁽ᵏ⁾(G::Generator, k, s) + + # a, b = inv(G.δ), inv(G.θ) + # k == 0 && return ϕ(G, s) + # ls = log(s); r = exp(a * ls); sk = exp(-k * ls) + # acc, rpow, coef = 0.0, 1.0, 1.0 + # @inbounds for m in 0:maxiter + # am = a * m + # ff = prod(am - j for j in 0:k-1) + # term = ((m & 1 == 1) ? -coef : coef) * ff * rpow + # acc_new = acc + term + # m ≥ miniter && abs(term) ≤ tol * (abs(acc_new) + eps()) && return sk * acc_new + # acc = acc_new + # m == maxiter && @warn "ϕ⁽ᵏ⁾(BB1): reached maxiter" k s G.θ G.δ + # rpow *= r + # coef *= (b + m) / (m + 1) + # end + # return sk * acc end + + # Frailty: M = S_{1/δ} * Gamma_{1/θ}^{δ} frailty(G::BB1Generator) = GammaStoppedPositiveStable(inv(G.δ), inv(G.θ)) # --- CDF and logpdf (d=2), numeric stable version --- @@ -69,8 +87,8 @@ function _cdf(C::ArchimedeanCopula{2,G}, u) where {G<:BB1Generator} return exp( - (1/θ) * log1p(sa) ) end -function Distributions._logpdf(C::ArchimedeanCopula{2,G}, u) where {G<:BB1Generator} - T = promote_type(Float64, eltype(u)) +function Distributions._logpdf(C::ArchimedeanCopula{2,BB1Generator{TF}}, u) where {TF} + T = promote_type(TF, eltype(u)) (0.0 < u[1] ≤ 1.0 && 0.0 < u[2] ≤ 1.0) || return T(-Inf) θ, δ = C.G.θ, C.G.δ diff --git a/src/Generator/BB2Generator.jl b/src/Generator/BB2Generator.jl index 73c33f796..e2524a6cc 100644 --- a/src/Generator/BB2Generator.jl +++ b/src/Generator/BB2Generator.jl @@ -42,13 +42,16 @@ function ϕ⁽¹⁾(G::BB2Generator, s) v = (1+1/θ) * log1p(u/δ) + log(θ) + log(δ) + u return -exp(-v) end -function ϕ⁽ᵏ⁾(G::BB2Generator, ::Val{2}, s) - θ, δ = G.θ, G.δ - logA = log1p(log1p(s)/δ) - inv1p = inv(1+s) - term1 = exp(-(1+1/θ) * logA) - term2 = ((1/θ) + 1) * exp(-(2+1/θ) * logA) / δ - return (1/(θ*δ)) * inv1p^2 * (term1 + term2) +function ϕ⁽ᵏ⁾(G::BB2Generator, k::Int, s) + if k == 2 + θ, δ = G.θ, G.δ + logA = log1p(log1p(s)/δ) + inv1p = inv(1+s) + term1 = exp(-(1+1/θ) * logA) + term2 = ((1/θ) + 1) * exp(-(2+1/θ) * logA) / δ + return (1/(θ*δ)) * inv1p^2 * (term1 + term2) + end + return @invoke ϕ⁽ᵏ⁾(G::Generator, k, s) end function ϕ⁻¹⁽¹⁾(G::BB2Generator, t) lt = log(t) @@ -56,13 +59,16 @@ function ϕ⁻¹⁽¹⁾(G::BB2Generator, t) B = G.δ * exp(-(1+G.θ)*lt) return - G.θ * B * exp(A) end -function ϕ⁽ᵏ⁾⁻¹(G::BB2Generator, ::Val{1}, x; start_at=x) - # compute the inverse of ϕ⁽¹⁾ - θ, δ = G.θ, G.δ - a = 1 + 1/θ # a > 0 - logv = -log(a) + (δ + (a - 1) * log(δ) - log(- θ * x)) / a - w = LambertW.lambertw(exp(logv)) - return expm1(a * w - δ) +function ϕ⁽ᵏ⁾⁻¹(G::BB2Generator, k::Int, x; start_at=x) + if k == 1 + # compute the inverse of ϕ⁽¹⁾ + θ, δ = G.θ, G.δ + a = 1 + 1/θ # a > 0 + logv = -log(a) + (δ + (a - 1) * log(δ) - log(- θ * x)) / a + w = LambertW.lambertw(exp(logv)) + return expm1(a * w - δ) + end + return @invoke ϕ⁽ᵏ⁾⁻¹(G::Generator, k, x; start_at=x) end # Frailty: M = S_{1/δ} * Gamma_{1/θ}^{δ} @@ -134,14 +140,12 @@ function ρ(G::Copulas.BB2Generator{T}; rtol=1e-7, atol=1e-9) where {T} end -function λᵤ(C::BB2Generator{T}; tsmall::Float64=1e-10) where {T} - G = C.G +function λᵤ(G::BB2Generator{T}; tsmall::Float64=1e-10) where {T} r = ϕ⁽¹⁾(G, 2tsmall) / ϕ⁽¹⁾(G, tsmall) return 2 - 2*r end -function λₗ(C::BB2Generator{T}; tlarge::Float64=1e6) where {T} - G = C.G +function λₗ(G::BB2Generator{T}; tlarge::Float64=1e6) where {T} r = ϕ⁽¹⁾(G, 2tlarge) / ϕ⁽¹⁾(G, tlarge) return 2*r end \ No newline at end of file diff --git a/src/Generator/BB3Generator.jl b/src/Generator/BB3Generator.jl index b91f3725e..cdfa5b759 100644 --- a/src/Generator/BB3Generator.jl +++ b/src/Generator/BB3Generator.jl @@ -44,23 +44,48 @@ function ϕ⁽¹⁾(G::BB3Generator, s) B = exp((pw-1)*log(A)) return -(pw*a) * B * inv(1+s) * ϕ(G,s) end - -function ϕ⁽ᵏ⁾(G::BB3Generator, ::Val{2}, s) - a = inv(G.δ); pw = inv(G.θ) - A = a * log1p(s); inv1p = inv(1+s) - B = exp((pw-1)*log(A)) - C = exp((pw-2)*log(A)) - φ = ϕ(G,s) - K = (pw*a) * B * inv1p - K′ = (pw*a) * inv1p^2 * ((pw-1)*a*C - B) - return φ * (K^2 - K′) +function ϕ⁽ᵏ⁾(G::BB3Generator, k::Int, s) + + if k==2 # old version. + a = inv(G.δ); pw = inv(G.θ) + A = a * log1p(s); inv1p = inv(1+s) + B = exp((pw-1)*log(A)) + C = exp((pw-2)*log(A)) + φ = ϕ(G,s) + K = (pw*a) * B * inv1p + K′ = (pw*a) * inv1p^2 * ((pw-1)*a*C - B) + return φ * (K^2 - K′) + end + return @invoke ϕ⁽ᵏ⁾(G::Generator, k, s) + + # T = promote_type(typeof(s), typeof(G.θ), typeof(G.δ)) + # θ, δ, r = T(G.θ), T(G.δ), one(T) / T(G.θ) + # t, a = log1p(T(s)), δ^(-r) + # ϕ0 = exp(-a * t^r) + # k == 0 && return ϕ0 + # fall = one(T) + # x = [begin fall *= (r - (j-1)); -a * fall * t^(r - j) end for j in 1:k] # Derivates of h(t) = -a·t^r: h⁽ʲ⁾(t) = -a·(r)ⱼ·t^(r-j) + # B = zeros(T, k + 1); B[1] = one(T) + # for n in 1:k + # B[n + 1] = sum(binomial(n-1, j-1) * x[j] * B[n - j + 1] for j in 1:n) # Bell Bₘ via recurrency: Bₙ = Σⱼ C(n-1,j-1)·xⱼ·Bₙ₋ⱼ + # end + # row = [one(T)] + # for n in 1:k + # row = [sum((m-1 ≥ 0 ? row[m] : zero(T)) + (m ≤ n-1 ? -T(n-1) * row[m+1] : zero(T)) for _ in 1:1) for m in 0:n] # Stirling numberss(k,m) via recurrency: s(n,m) = s(n-1,m-1) - (n-1)·s(n-1,m) + # end + # acc = sum(row[m+1] * B[m+1] for m in 1:k) # Sum final: Σₘ s(k,m)·Bₘ + # return ϕ0 * (one(T) + T(s))^(-k) * acc end ϕ⁻¹⁽¹⁾(G::BB3Generator, t) = -(G.δ*G.θ) * inv(t) * exp(G.δ * exp(G.θ * log(-log(t)))) * (-log(t))^(G.θ - 1) function _f_for_BB3_ϕ⁽¹⁾⁻¹(lt, a, δ, lny) t = exp(lt) return (a-1)*lt - δ*t - exp(a*lt) - lny end -function ϕ⁽ᵏ⁾⁻¹(G::BB3Generator, ::Val{1}, x; start_at=x) +function ϕ⁽ᵏ⁾⁻¹(G::BB3Generator, k::Int, x; start_at=x) + if k != 1 + # Only k==1 is implemented here, fall back to generic otherwise. + return @invoke ϕ⁽ᵏ⁾⁻¹(G::Generator, k, x; start_at=start_at) + end # compute the inverse of ϕ⁽¹⁾ θ, δ = G.θ, G.δ a = 1/θ diff --git a/src/Generator/BB6Generator.jl b/src/Generator/BB6Generator.jl index c7d327d1c..59901280c 100644 --- a/src/Generator/BB6Generator.jl +++ b/src/Generator/BB6Generator.jl @@ -51,15 +51,37 @@ function ϕ⁽¹⁾(G::BB6Generator, s) H = 1 - E return -(a*b) * s^(b-1) * E * H^(a-1) end - -function ϕ⁽ᵏ⁾(G::BB6Generator, ::Val{2}, s) - a = inv(G.θ); b = inv(G.δ) - r = s^b - E = exp(-r) - H = 1 - E - term = (b - 1) * s^(b - 2) - b * s^(2b - 2) + (a - 1) * b * s^(2b - 2) * (E / H) - return -a * b * E * H^(a - 1) * term +function ϕ⁽ᵏ⁾(G::BB6Generator, k::Int, s; tol::Float64=1e-9, maxm::Int=10_000) + + if k==2 + a = inv(G.θ); b = inv(G.δ) + r = s^b + E = exp(-r) + H = 1 - E + term = (b - 1) * s^(b - 2) - b * s^(2b - 2) + (a - 1) * b * s^(2b - 2) * (E / H) + return -a * b * E * H^(a - 1) * term + end + return @invoke ϕ⁽ᵏ⁾(G::Generator, k, s) + + # a, b = inv(G.δ), inv(G.θ) + # k == 0 && return ϕ(G, s) + # sa = s^a + # acc, cm = 0.0, 1.0 + # @inbounds for m in 1:maxm + # cm = (m == 1) ? b : cm * (b - (m - 1)) / m + # abs(cm) < eps() && break + # xs = [(-m) * prod(a - j for j in 0:r-1) * s^(a - r) for r in 1:k] + # B = ones(Float64, k + 1) + # for n in 1:k + # B[n + 1] = sum(binomial(n - 1, j - 1) * xs[j] * B[n - j + 1] for j in 1:n) + # end + # term = (-1)^(m + 1) * cm * exp(-m * sa) * B[end] + # acc += term + # abs(term) ≤ tol * (abs(acc) + eps()) && break + # end + # return acc end + function ϕ⁻¹⁽¹⁾(G::BB6Generator, u::Real) θ, δ = G.θ, G.δ h = 1 - (1 - u)^θ # ∈ (0,1] @@ -81,8 +103,8 @@ function _cdf(C::ArchimedeanCopula{2,G}, u) where {G<:BB6Generator} end # ------------------ log-PDF (d = 2) ------------------ -function Distributions._logpdf(C::ArchimedeanCopula{2,G}, u) where {G<:BB6Generator} - Tret = promote_type(Float64, eltype(u)) +function Distributions._logpdf(C::ArchimedeanCopula{2,BB6Generator{TF}}, u) where {TF} + Tret = promote_type(TF, eltype(u)) (0.0 < u[1] ≤ 1.0 && 0.0 < u[2] ≤ 1.0) || return Tret(-Inf) θ, δ = C.G.θ, C.G.δ diff --git a/src/Generator/BB7Generator.jl b/src/Generator/BB7Generator.jl index 746772e31..f2935e5b3 100644 --- a/src/Generator/BB7Generator.jl +++ b/src/Generator/BB7Generator.jl @@ -52,13 +52,32 @@ function ϕ⁽¹⁾(G::BB7Generator, s) return -(1/(G.θ*G.δ)) * (1 - exp(-inv(G.δ)*log1p(s)))^(inv(G.θ)-1) * (1+s)^(-inv(G.δ)-1) end -function ϕ⁽ᵏ⁾(G::BB7Generator, ::Val{2}, s) - θ, δ = G.θ, G.δ - invθ, invδ = inv(θ), inv(δ) - a = exp(-invδ * log1p(s)) # (1+s)^(-1/δ) - fac = exp(-(invδ + 2) * log1p(s)) # a/(1+s)^2 = (1+s)^(-1/δ - 2) - return (invθ*invδ) * fac * (1 - a)^(invθ - 2) * - ( (1 + invδ) - (1 + invθ*invδ)*a ) +function ϕ⁽ᵏ⁾(G::BB7Generator, k::Int, s::Real; tol::Float64=1e-10, maxiter::Int=20_000, miniter::Int=5) + if k==2 + θ, δ = G.θ, G.δ + invθ, invδ = inv(θ), inv(δ) + a = exp(-invδ * log1p(s)) # (1+s)^(-1/δ) + fac = exp(-(invδ + 2) * log1p(s)) # a/(1+s)^2 = (1+s)^(-1/δ - 2) + return (invθ*invδ) * fac * (1 - a)^(invθ - 2) * + ( (1 + invδ) - (1 + invθ*invδ)*a ) + end + return @invoke ϕ⁽ᵏ⁾(G::Generator, k, s) + + # b, p = inv(G.θ), -inv(G.δ) + # k == 0 && return ϕ(G, s) + # log1ps = log1p(s) + # acc, cm = 0.0, 1.0 + # @inbounds for m in 1:maxiter + # cm = (m == 1) ? b : cm * (b - m + 1) / m + # abs(cm) < eps() && break + # pm = m * p + # ff = prod(pm - j for j in 0:k-1) + # term = (-1)^(m + 1) * cm * ff * exp((pm - k) * log1ps) + # acc += term + # m ≥ miniter && abs(term) ≤ tol * (abs(acc) + eps()) && break + # m == maxiter && @warn "ϕ⁽ᵏ⁾(BB7): reached maxiter" k s G.θ G.δ + # end + # return acc end ϕ⁻¹⁽¹⁾(G::BB7Generator, u) = begin @@ -89,8 +108,8 @@ function _cdf(C::ArchimedeanCopula{2,G}, u) where {G<:BB7Generator} return 1 - exp( (1/θ)*log1p(-t) ) end -function Distributions._logpdf(C::ArchimedeanCopula{2,G}, u) where {G<:BB7Generator} - Tret = promote_type(Float64, eltype(u)) +function Distributions._logpdf(C::ArchimedeanCopula{2,BB7Generator{TF}}, u) where {TF} + Tret = promote_type(TF, eltype(u)) u1, u2 = u (0.0 < u1 ≤ 1.0 && 0.0 < u2 ≤ 1.0) || return Tret(-Inf) diff --git a/src/Generator/BB8Generator.jl b/src/Generator/BB8Generator.jl index 0d87b3382..2a9ff59ec 100644 --- a/src/Generator/BB8Generator.jl +++ b/src/Generator/BB8Generator.jl @@ -46,14 +46,31 @@ _rebound_params(::Type{<:BB8Generator}, d, α) = (; ϑ = 1 + exp(α[1]), δ = 1 ϕ(G::BB8Generator, s) = (1/G.δ) * (1 - (1 - _η(G)*exp(-s))^(inv(G.ϑ))) ϕ⁻¹(G::BB8Generator, t) = -log((1 - (1 - G.δ*t)^G.ϑ)/_η(G)) ϕ⁽¹⁾(G::BB8Generator, s) = -(_η(G)/(G.δ*G.ϑ)) * exp(-s) * (1 - _η(G)*exp(-s))^(inv(G.ϑ)-1) - -function ϕ⁽ᵏ⁾(G::BB8Generator, ::Val{2}, s) - δ, ϑ = G.δ, G.ϑ - α, β = inv(δ), inv(ϑ) - ηv = _η(G) - u = exp(-s) - b = 1 - ηv*u - return (α*β*ηv) * u * b^(β - 2) * (1 - β*ηv*u) +function ϕ⁽ᵏ⁾(G::BB8Generator, k::Int, s::Real; tol::Float64=1e-10, maxiter::Int=20_000, miniter::Int=5) + if k==2 + δ, ϑ = G.δ, G.ϑ + α, β = inv(δ), inv(ϑ) + ηv = _η(G) + u = exp(-s) + b = 1 - ηv*u + return (α*β*ηv) * u * b^(β - 2) * (1 - β*ηv*u) + end + return @invoke ϕ⁽ᵏ⁾(G::Generator, k, s) + # δ, b, η = G.δ, inv(G.ϑ), 1 - G.δ + # k == 0 && return ϕ(G, s) + # acc, cm, η_pow, exp_term = 0.0, 1.0, 1.0, 1.0 + # exp_s_neg = exp(-s) + # @inbounds for m in 1:maxiter + # cm = (m == 1) ? b : cm * (b - m + 1) / m + # η_pow *= η + # exp_term *= exp_s_neg + # abs(cm) < eps() && break + # term = (-1)^(m + 1) * cm * η_pow * (-m)^k * exp_term + # acc += term + # m ≥ miniter && abs(term) ≤ tol * (abs(acc) + eps()) && break + # m == maxiter && @warn "ϕ⁽ᵏ⁾(BB8): reached maxiter" k s G.ϑ G.δ + # end + # return acc / δ end ϕ⁻¹⁽¹⁾(G::BB8Generator, t) = -G.ϑ*G.δ * (1 - G.δ*t)^(G.ϑ - 1) / (1 - (1 - G.δ*t)^G.ϑ) @@ -68,8 +85,8 @@ function _cdf(C::ArchimedeanCopula{2,G}, u) where {G<:BB8Generator} return (1/δ) * (1 - t) end -function Distributions._logpdf(C::ArchimedeanCopula{2,G}, u) where {G<:BB8Generator} - Tret = promote_type(Float64, eltype(u)) +function Distributions._logpdf(C::ArchimedeanCopula{2,BB8Generator{TF}}, u) where {TF} + Tret = promote_type(TF, eltype(u)) u1, u2 = u (0.0 < u1 ≤ 1.0 && 0.0 < u2 ≤ 1.0) || return Tret(-Inf) diff --git a/src/Generator/BB9Generator.jl b/src/Generator/BB9Generator.jl index ac885b637..234028233 100644 --- a/src/Generator/BB9Generator.jl +++ b/src/Generator/BB9Generator.jl @@ -46,13 +46,24 @@ function ϕ⁽¹⁾(G::BB9Generator, s) a = inv(G.θ); c = G.δ^(-G.θ) ϕ(G,s) * ( -a * (s + c)^(a-1) ) end -function ϕ⁽ᵏ⁾(G::BB9Generator, ::Val{2}, s) - a = inv(G.θ); c = G.δ^(-G.θ) - φ = ϕ(G,s) - t = s + c - φ * ( a^2 * t^(2a-2) - a*(a-1) * t^(a-2) ) +function ϕ⁽ᵏ⁾(G::BB9Generator, k::Int, s::Real) + if k==2 + a = inv(G.θ); c = G.δ^(-G.θ) + φ = ϕ(G,s) + t = s + c + φ * ( a^2 * t^(2a-2) - a*(a-1) * t^(a-2) ) + end + return @invoke ϕ⁽ᵏ⁾(G::Generator, k, s) + # k == 0 && return ϕ(G, s) + # a, c = inv(G.θ), G.δ^(-G.θ) + # T = promote_type(typeof(a), typeof(s)) + # xs = [-prod(a - i for i in 0:j-1) * (s + c)^(a - j) for j in 1:k] + # B = ones(T, k + 1) + # for n in 1:k + # B[n + 1] = sum(binomial(n - 1, j - 1) * xs[j] * B[n - j + 1] for j in 1:n) + # end + # return ϕ(G, s) * B[end] end - ϕ⁻¹⁽¹⁾(G::BB9Generator, t) = -G.θ * (inv(G.δ) - log(t))^(G.θ - 1) / t frailty(G::BB9Generator) = TiltedPositiveStable(inv(G.θ), G.δ^(-G.θ)) @@ -65,8 +76,8 @@ function _cdf(C::ArchimedeanCopula{2,G}, u) where {G<:BB9Generator} return exp(inv(δ) - A) end -function Distributions._logpdf(C::ArchimedeanCopula{2,G}, u) where {G<:BB9Generator} - T = promote_type(Float64, eltype(u)) +function Distributions._logpdf(C::ArchimedeanCopula{2,BB9Generator{TF}}, u) where {TF} + T = promote_type(TF, eltype(u)) (0.0 < u[1] ≤ 1.0 && 0.0 < u[2] ≤ 1.0) || return T(-Inf) θ, δ = C.G.θ, C.G.δ diff --git a/src/Generator/ClaytonGenerator.jl b/src/Generator/ClaytonGenerator.jl index 105352d07..9ab4eaa10 100644 --- a/src/Generator/ClaytonGenerator.jl +++ b/src/Generator/ClaytonGenerator.jl @@ -54,12 +54,12 @@ max_monotony(G::ClaytonGenerator) = G.θ >= 0 ? Inf : (1 - 1/G.θ) ϕ⁻¹(G::ClaytonGenerator, t) = (t^(-G.θ)-1)/G.θ ϕ⁽¹⁾(G::ClaytonGenerator, t) = (1+G.θ*t) ≤ 0 ? 0 : - (1+G.θ*t)^(-1/G.θ -1) ϕ⁻¹⁽¹⁾(G::ClaytonGenerator, t) = -t^(-G.θ-1) -ϕ⁽ᵏ⁾(G::ClaytonGenerator, ::Val{k}, t) where k = (1+G.θ*t) ≤ 0 ? 0 : (1 + G.θ * t)^(-1/G.θ - k) * prod(-1-ℓ*G.θ for ℓ in 0:k-1; init=1) -ϕ⁽ᵏ⁾⁻¹(G::ClaytonGenerator, ::Val{k}, t; start_at=t) where k = ((t / prod(-1-ℓ*G.θ for ℓ in 0:k-1; init=1))^(1/(-1/G.θ - k)) -1)/G.θ +ϕ⁽ᵏ⁾(G::ClaytonGenerator, k::Int, t) = (1+G.θ*t) ≤ 0 ? 0 : (1 + G.θ * t)^(-1/G.θ - k) * prod(-1-ℓ*G.θ for ℓ in 0:k-1; init=1) +ϕ⁽ᵏ⁾⁻¹(G::ClaytonGenerator, k::Int, t; start_at=t) = ((t / prod(-1-ℓ*G.θ for ℓ in 0:k-1; init=1))^(1/(-1/G.θ - k)) -1)/G.θ τ(G::ClaytonGenerator) = ifelse(isfinite(G.θ), G.θ/(G.θ+2), 1) τ⁻¹(::Type{<:ClaytonGenerator},τ) = ifelse(τ == 1,Inf,2τ/(1-τ)) -williamson_dist(G::ClaytonGenerator, ::Val{d}) where d = G.θ >= 0 ? WilliamsonFromFrailty(Distributions.Gamma(1/G.θ,G.θ), Val{d}()) : ClaytonWilliamsonDistribution(G.θ,d) +williamson_dist(G::ClaytonGenerator, d::Int) = G.θ >= 0 ? WilliamsonFromFrailty(Distributions.Gamma(1/G.θ,G.θ), d) : ClaytonWilliamsonDistribution(G.θ,d) frailty(G::ClaytonGenerator) = G.θ >= 0 ? Distributions.Gamma(1/G.θ, G.θ) : throw(ArgumentError("Clayton frailty is only defined for θ ≥ 0 (positive dependence). Got θ = $(G.θ).")) function Distributions._rand!(rng::Distributions.AbstractRNG, C::ClaytonCopula, A::DenseMatrix{<:Real}) @@ -82,7 +82,7 @@ function Distributions._logpdf(C::ClaytonCopula{d,TG}, u) where {d,TG<:ClaytonGe return log(θ + 1) * (d - 1) - (θ + 1) * S2 + (-1 / θ - d) * log(S1 - d + 1) end ### only for test... -@inline function _C_clayton(u::Float64, v::Float64, θ::Float64) +@inline function _C_clayton(u::Float64, v::Float64, θ::Real) s = u^(-θ) + v^(-θ) - 1 if θ < 0 return (s <= 0) ? 0.0 : s^(-1/θ) # soporte recortado para θ<0 @@ -90,7 +90,7 @@ end return s^(-1/θ) # para θ>0 siempre s≥1 end end -# Spearman (vía CDF) — con integrando seguro +# Spearman (vía CDF) — with _safett Integral function ρ(G::ClaytonGenerator; rtol=1e-8, atol=1e-10) θ = float(G.θ) θ ≤ -1 && throw(ArgumentError("Para Clayton: θ > -1.")) @@ -101,21 +101,21 @@ function ρ(G::ClaytonGenerator; rtol=1e-8, atol=1e-10) return 12I - 3 end -# Inversa ρ → θ para Clayton (sin recortar a [0,1]) +# Inverse ρ → θ for Clayton (without trimming to [0,1]) function ρ⁻¹(::Type{<:ClaytonGenerator}, ρ̂; atol=1e-10) _ρ = float(ρ̂) if isapprox(_ρ, 0.0; atol=1e-14) return 0.0 end - # Semillas: aproximamos τ ≈ (2/3)ρ y θ ≈ 2τ/(1-τ) + # Seeds: we approximate τ ≈ (2/3)ρ and θ ≈ 2τ/(1-τ) τ0 = clamp((2/3)*_ρ, -0.99, 0.99) θ0 = 2*τ0/(1 - τ0) θ0 = clamp(θ0, -1 + sqrt(eps(Float64)), 1e6) - θ1 = θ0 + (_ρ > 0 ? 0.25 : -0.25) # segunda semilla hacia el lado correcto + θ1 = θ0 + (_ρ > 0 ? 0.25 : -0.25) # second seed towards the right side f(θ) = ρ(ClaytonGenerator(θ)) - _ρ - # Secante con dos semillas; no requiere bracketing + # Two-seeded blotter; no bracketing required θ = Roots.find_zero(f, (θ0, θ1), Roots.Order2(); xatol=atol) return θ end \ No newline at end of file diff --git a/src/Generator/FrankGenerator.jl b/src/Generator/FrankGenerator.jl index 9d25fb699..01c325823 100644 --- a/src/Generator/FrankGenerator.jl +++ b/src/Generator/FrankGenerator.jl @@ -49,11 +49,11 @@ _θ_bounds(::Type{<:FrankGenerator}, d) = d==2 ? (-Inf, Inf) : (0, Inf) ϕ(G::FrankGenerator, t) = G.θ > 0 ? -LogExpFunctions.log1mexp(LogExpFunctions.log1mexp(-G.θ)-t)/G.θ : -log1p(exp(-t) * expm1(-G.θ))/G.θ ϕ⁽¹⁾(G::FrankGenerator, t) = (1 - 1 / (1 + exp(-t)*expm1(-G.θ))) / G.θ ϕ⁻¹⁽¹⁾(G::FrankGenerator, t) = G.θ / (-expm1(G.θ * t)) -function ϕ⁽ᵏ⁾(G::FrankGenerator, ::Val{k}, t) where k +function ϕ⁽ᵏ⁾(G::FrankGenerator, k::Int, t) return (-1)^k * (1 / G.θ) * PolyLog.reli(-(k - 1), -expm1(-G.θ) * exp(-t)) end ϕ⁻¹(G::FrankGenerator, t) = G.θ > 0 ? LogExpFunctions.log1mexp(-G.θ) - LogExpFunctions.log1mexp(-t*G.θ) : -log(expm1(-t*G.θ)/expm1(-G.θ)) -williamson_dist(G::FrankGenerator, ::Val{d}) where d = G.θ > 0 ? WilliamsonFromFrailty(Logarithmic(-G.θ), Val{d}()) : WilliamsonTransforms.𝒲₋₁(t -> ϕ(G,t),Val{d}()) +williamson_dist(G::FrankGenerator, d::Int) = G.θ > 0 ? WilliamsonFromFrailty(Logarithmic(-G.θ), d) : 𝒲₋₁(ϕ(G), d) frailty(G::FrankGenerator) = G.θ > 0 ? Logarithmic(-G.θ) : throw("The frank copula has no frailty when θ < 0") Debye(x, k::Int=1) = k / x^k * QuadGK.quadgk(t -> t^k/expm1(t), 0, x)[1] diff --git a/src/Generator/GumbelBarnettGenerator.jl b/src/Generator/GumbelBarnettGenerator.jl index 6e4235400..a57d82f8d 100644 --- a/src/Generator/GumbelBarnettGenerator.jl +++ b/src/Generator/GumbelBarnettGenerator.jl @@ -87,7 +87,7 @@ end ϕ⁽¹⁾(G::GumbelBarnettGenerator, t) = -exp((1 - exp(t)) / G.θ) * exp(t) / G.θ ϕ⁻¹(G::GumbelBarnettGenerator, t) = log1p(-G.θ * log(t)) ϕ⁻¹⁽¹⁾(G::GumbelBarnettGenerator, t) = -G.θ / (t - G.θ * t * log(t)) -function ϕ⁽ᵏ⁾(G::GumbelBarnettGenerator, ::Val{k}, t) where k +function ϕ⁽ᵏ⁾(G::GumbelBarnettGenerator, k::Int, t) α = 1/G.θ C = -α*exp(t) R = C * exp(α + C) @@ -99,48 +99,6 @@ end # See this htread ;: https://discourse.julialang.org/t/solving-for-transcendental-equation/131229/16 -# function lower_bound_on_leftmost_root(::Val{n}) where n -# n==2 && return - 1.0 -# n==3 && return - 1 / 0.380 -# n==4 && return - 1 / 0.216 -# n==5 && return - 1 / 0.145 -# n==6 && return - 1 / 0.106 -# n==7 && return - 1 / 0.082 -# n==8 && return - 1 / 0.066 -# n==9 && return - 1 / 0.055 -# n==10 && return - 1 / 0.046 -# C2 = binomial(n, 2) -# C3 = binomial(n, 3) -# C4 = binomial(n, 4) -# term1 = C2 / n -# discr = C2^2 - (2n / (n - 1)) * (C3 + 3*C4) -# term2 = (n - 1)/n * sqrt(discr) -# lower_bound = (term1 + term2) -# return -lower_bound -# end -# leftmost_critical_point(::Val{n}) where n = lower_bound_on_leftmost_root(Val{n+1}()) -# starting_point(::Val{n}) where n = leftmost_critical_point(Val{n+1}()) -# last_summit(::Val{n}) where n = _fₙ(leftmost_critical_point(Val{n}()), Val{n}()) - -# function _fₙ(x, ::Val{n}, s2::NTuple{n, Int}=ntuple(i->stirlings2(n, i), n)) where n -# n==1 && return exp(x) -# return x*evalpoly(x, s2) * exp(x) -# end -# function _inv_fₙ(x, ::Val{n}) where n -# n==1 && return log(x) -# x₀ = starting_point(Val{n}()) -# return find_zero( -# let s2 = ntuple(i->stirlings2(n, i), n) -# t -> _fₙ(t, Val{n}(), s2)-x -# end, -# x₀ -# ) -# end -# function ϕ⁽ᵏ⁾⁻¹(G::GumbelBarnettGenerator, ::Val{k}, t; start_at=t) where k -# @show k, t -# return log(- G.θ * _inv_fₙ(t * exp(-1/G.θ), Val{k}())) -# end - function _gumbelbarnett_tau(θ) iszero(θ) && return θ r, _ = QuadGK.quadgk(x -> (1-θ*log(x)) * log1p(-θ*log(x)) * x, 0, 1) diff --git a/src/Generator/GumbelGenerator.jl b/src/Generator/GumbelGenerator.jl index b9cb0da3b..de9c53398 100644 --- a/src/Generator/GumbelGenerator.jl +++ b/src/Generator/GumbelGenerator.jl @@ -43,6 +43,7 @@ Distributions.params(G::GumbelGenerator) = (θ = G.θ,) _unbound_params(::Type{<:GumbelGenerator}, d, θ) = [log(θ.θ - 1)] # θ ≥ 1 _rebound_params(::Type{<:GumbelGenerator}, d, α) = (; θ = 1 + exp(α[1])) _θ_bounds(::Type{<:GumbelGenerator}, d) = (1, Inf) +_available_fitting_methods(::Type{<:ArchimedeanCopula{d,<:GumbelGenerator} where {d}}, d) = (:mle, :itau, :ibeta) # disable :irho because taking ages. ϕ( G::GumbelGenerator, t) = exp(-exp(log(t)/G.θ)) ϕ⁻¹(G::GumbelGenerator, t) = exp(log(-log(t))*G.θ) @@ -56,7 +57,7 @@ end # The folliwng function got commented because it does WORSE in term of runtime than the # corredponsing generic :) -function ϕ⁽ᵏ⁾(G::GumbelGenerator, ::Val{d}, t) where d +function ϕ⁽ᵏ⁾(G::GumbelGenerator, d::Int, t) α = 1 / G.θ return eltype(t)(ϕ(G, t) * t^(-d) * sum( α^j * Float64(BigCombinatorics.Stirling1(d, j)) * sum(Float64(BigCombinatorics.Stirling2(j, k)) * (-t^α)^k for k in 1:j) for j in 1:d @@ -76,8 +77,8 @@ function _cdf(C::ArchimedeanCopula{2,G}, u) where {G<:GumbelGenerator} lx₁, lx₂ = log(x₁), log(x₂) return 1 - LogExpFunctions.cexpexp(LogExpFunctions.logaddexp(θ * lx₁, θ * lx₂) / θ) end -function Distributions._logpdf(C::ArchimedeanCopula{2,G}, u) where {G<:GumbelGenerator} - T = promote_type(Float64, eltype(u)) +function Distributions._logpdf(C::ArchimedeanCopula{2,GumbelGenerator{TF}}, u) where {TF} + T = promote_type(TF, eltype(u)) !all(0 .< u .<= 1) && return T(-Inf) # if not in range return -Inf θ = C.G.θ diff --git a/src/Generator/InvGaussianGenerator.jl b/src/Generator/InvGaussianGenerator.jl index 122035d3c..49e5e700d 100644 --- a/src/Generator/InvGaussianGenerator.jl +++ b/src/Generator/InvGaussianGenerator.jl @@ -57,7 +57,7 @@ function ϕ⁽¹⁾(G::InvGaussianGenerator, t) end end -function ϕ⁽ᵏ⁾(G::InvGaussianGenerator, ::Val{k}, t) where {k} +function ϕ⁽ᵏ⁾(G::InvGaussianGenerator, k::Int, t) k == 0 && return ϕ(G, t) k == 1 && return ϕ⁽¹⁾(G, t) # Closed-form via Faà di Bruno: ϕ^{(k)} = ϕ * Y_k(f', f'', ..., f^{(k)}) diff --git a/src/Generator/JoeGenerator.jl b/src/Generator/JoeGenerator.jl index aa0fbdb75..4362afc14 100644 --- a/src/Generator/JoeGenerator.jl +++ b/src/Generator/JoeGenerator.jl @@ -47,7 +47,7 @@ _θ_bounds(::Type{<:JoeGenerator}, d) = (1, Inf) ϕ( G::JoeGenerator, t) = 1-(-expm1(-t))^(1/G.θ) ϕ⁻¹(G::JoeGenerator, t) = -log1p(-(1-t)^G.θ) ϕ⁽¹⁾(G::JoeGenerator, t) = (-expm1(-t))^(1/G.θ) / (G.θ - G.θ * exp(t)) -function ϕ⁽ᵏ⁾(G::JoeGenerator, ::Val{d}, t) where d +function ϕ⁽ᵏ⁾(G::JoeGenerator, d::Int, t) # TODO: test if this ϕ⁽ᵏ⁾ is really more 'efficient' than the default one, # as we already saw that for the Gumbel is wasn't the case. α = 1 / G.θ diff --git a/src/MiscellaneousCopulas/BernsteinCopula.jl b/src/MiscellaneousCopulas/BernsteinCopula.jl index 7b13be7ea..a35113f6e 100644 --- a/src/MiscellaneousCopulas/BernsteinCopula.jl +++ b/src/MiscellaneousCopulas/BernsteinCopula.jl @@ -184,7 +184,7 @@ end # Fitting colocated. StatsBase.dof(::BernsteinCopula) = 0 -_available_fitting_methods(::Type{<:BernsteinCopula}) = (:bernstein,) +_available_fitting_methods(::Type{<:BernsteinCopula}, d) = (:bernstein,) """ _fit(::Type{<:BernsteinCopula}, U, ::Val{:bernstein}; m::Union{Int,Tuple,Nothing}=nothing, pseudo_values::Bool=true, kwargs...) -> (C, meta) diff --git a/src/MiscellaneousCopulas/BetaCopula.jl b/src/MiscellaneousCopulas/BetaCopula.jl index f1f823bdc..203f76450 100644 --- a/src/MiscellaneousCopulas/BetaCopula.jl +++ b/src/MiscellaneousCopulas/BetaCopula.jl @@ -124,7 +124,7 @@ end # Fitting collocated StatsBase.dof(::BetaCopula) = 0 -_available_fitting_methods(::Type{<:BetaCopula}) = (:beta,) +_available_fitting_methods(::Type{<:BetaCopula}, d) = (:beta,) """ _fit(::Type{<:BetaCopula}, U, ::Val{:beta}; kwargs...) -> (C, meta) diff --git a/src/MiscellaneousCopulas/CheckerboardCopula.jl b/src/MiscellaneousCopulas/CheckerboardCopula.jl index 7ff6c69ad..9e1960c8e 100644 --- a/src/MiscellaneousCopulas/CheckerboardCopula.jl +++ b/src/MiscellaneousCopulas/CheckerboardCopula.jl @@ -157,7 +157,7 @@ end # Fit API: mirror constructor for the moment until we get a better API ? # Fitting plug-in (empírico) para CheckerboardCopula — mismo patrón que BetaCopula StatsBase.dof(::CheckerboardCopula) = 0 -_available_fitting_methods(::Type{<:CheckerboardCopula}) = (:exact,) +_available_fitting_methods(::Type{<:CheckerboardCopula}, d) = (:exact,) """ _fit(::Type{<:CheckerboardCopula}, U, ::Val{:exact}; m=nothing, pseudo_values::Bool=true, kwargs...) -> (C, meta) diff --git a/src/MiscellaneousCopulas/EmpiricalCopula.jl b/src/MiscellaneousCopulas/EmpiricalCopula.jl index 3b15b3dcf..eb990d75a 100644 --- a/src/MiscellaneousCopulas/EmpiricalCopula.jl +++ b/src/MiscellaneousCopulas/EmpiricalCopula.jl @@ -58,7 +58,7 @@ end # Fitting colocated. StatsBase.dof(::EmpiricalCopula) = 0 -_available_fitting_methods(::Type{<:EmpiricalCopula}) = (:deheuvels,) +_available_fitting_methods(::Type{<:EmpiricalCopula}, d) = (:deheuvels,) """ _fit(::Type{<:EmpiricalCopula}, U, ::Val{:deheuvels}; pseudo_values::Bool=true, kwargs...) -> (C, meta) diff --git a/src/MiscellaneousCopulas/FGMCopula.jl b/src/MiscellaneousCopulas/FGMCopula.jl index 247b89de9..e2a95d5af 100644 --- a/src/MiscellaneousCopulas/FGMCopula.jl +++ b/src/MiscellaneousCopulas/FGMCopula.jl @@ -30,10 +30,16 @@ struct FGMCopula{d, Tθ, Tf} <: Copula{d} θ::Tθ fᵢ::Tf function FGMCopula(d, θ) - vθ = θ isa Vector ? promote(θ...,1.0)[1:end-1] : [promote(θ,1.0)[1]] - if all(θ .== 0) - return IndependentCopula(d) + if (θ isa NTuple) || (θ isa Vector) + vθ = collect(promote(θ..., 1.0))[1:end-1] + else + vθ = [promote(θ, 1.0)[1]] end + + all(vθ .== 0) && return IndependentCopula(d) + d==2 && vθ[1]==1 && return MCopula(2) + d==2 && vθ[1]==-1 && return WCopula(2) + # Check first restrictions on parameters any(abs.(vθ) .> 1) && throw(ArgumentError("Each component of the parameter vector must satisfy that |θᵢ| ≤ 1")) length(vθ) != 2^d - d - 1 && throw(ArgumentError("Number of parameters (θ) must match the dimension ($d): 2ᵈ-d-1")) @@ -41,7 +47,7 @@ struct FGMCopula{d, Tθ, Tf} <: Copula{d} # Last check: for epsilon in Base.product(fill([-1, 1], d)...) if 1 + _fgm_red(vθ, epsilon) < 0 - throw(ArgumentError("Invalid parameters. The parameters do not meet the condition to be an FGM copula")) + throw(ArgumentError("Invalid parameters θ = $vθ. The parameters do not meet the condition to be an FGM copula")) end end @@ -52,72 +58,6 @@ struct FGMCopula{d, Tθ, Tf} <: Copula{d} end FGMCopula{D, T1, T2}(d, θ) where {D, T1, T2} = FGMCopula(d, θ) end -Base.eltype(C::FGMCopula) = eltype(C.θ) - -# Fitting/params interface -Distributions.params(C::FGMCopula) = (θ = collect(C.θ),) -_example(::Type{<:FGMCopula}, d) = FGMCopula(d, fill(0.1, 2^d - d - 1)) -_available_fitting_methods(::Type{<:FGMCopula{2}}) = (:mle, :itau, :irho, :ibeta) -_available_fitting_methods(::Type{<:FGMCopula}) = (:mle,) - -# Compute the maximal λ so that all FGM constraints are strictly satisfied -function _max_lambda(β, d) - λmax = 1.0 - for epsilon in Base.product(fill([-1, 1], d)...) - red = _fgm_red(β, epsilon) - if red != 0 - λmax = min(λmax, 1 / abs(red)) - end - end - # Also ensure |θᵢ| < 1 for all i - for b in β - if b != 0 - λmax = min(λmax, 1 / abs(b)) - end - end - # Stay strictly inside the polytope - return 0.999 * λmax -end - -function _rebound_params(::Type{<:FGMCopula}, d, α) - if d == 2 - # Only one parameter, strictly invertible - return (; θ = tanh.(α)) - end - # For d >= 3, use a safe directional mapping (not fully surjective, but stays in the interior) - β = α - normβ = LinearAlgebra.norm(β) - if normβ == 0 - θ = zeros(length(β)) - else - direction = β / normβ - # Find the maximal λ in this direction, then stay well inside - λmax = _max_lambda(direction, d) - r = exp(normβ) / (1 + exp(normβ)) - λ = 0.95 * λmax * r # 0.95 to stay strictly inside - θ = λ * direction - end - return (; θ = θ) -end - -function _unbound_params(::Type{<:FGMCopula}, d, θ) - θvec = collect(θ.θ) - if d == 2 - # Only one parameter, strictly invertible - return atanh.(θvec) - end - # For d >= 3, use the fast directional mapping, but ensure safety - normθ = LinearAlgebra.norm(θvec) - if normθ == 0 - return zeros(length(θvec)) - end - direction = θvec / normθ - λmax = _max_lambda(direction, d) - # Clamp r to (0, 1-eps()) to avoid Inf/NaN - r = clamp(normθ / λmax, 0.0, 1.0 - eps()) - normβ = log(r / (1 - r)) - return direction * normβ -end function _fgm_red(θ, v) # This function implements the reduction over combinations of the fgm copula. # It is non-alocative thus performant :) @@ -130,6 +70,27 @@ function _fgm_red(θ, v) end return rez end +Base.eltype(C::FGMCopula) = eltype(C.θ) + +# Fitting/params interface +Distributions.params(C::FGMCopula) = (θ = collect(C.θ),) +_example(::Type{<:FGMCopula}, d) = FGMCopula(d, fill(0.5 / (2^d - d - 1), 2^d - d - 1)) +_available_fitting_methods(::Type{<:FGMCopula}, d) = d==2 ? (:mle, :itau, :irho, :ibeta) : (:mle,) +function _rebound_params(::Type{<:FGMCopula}, d, α) + d==2 && return (; θ = tanh.(α)) + throw("Cannot do that when d > 2") +end +function _unbound_params(::Type{<:FGMCopula}, d, θ) + d == 2 && return atanh.(collect(θ.θ)) + throw("Cannot do that when d > 2") +end + + + + + + + _cdf(fgm::FGMCopula, u::Vector{T}) where {T} = prod(u) * (1 + _fgm_red(fgm.θ, 1 .-u)) Distributions._logpdf(fgm::FGMCopula, u) = log1p(_fgm_red(fgm.θ, 1 .-2u)) function Distributions._rand!(rng::Distributions.AbstractRNG, fgm::FGMCopula{d, Tθ, Tf}, x::AbstractVector{T}) where {d,Tθ, Tf, T <: Real} @@ -188,3 +149,67 @@ end DistortionFromCop(C::FGMCopula{2}, js::NTuple{1,Int}, uⱼₛ::NTuple{1,Float64}, ::Int) = BivFGMDistortion(float(C.θ[1]), Int8(js[1]), float(uⱼₛ[1])) + + +function _fit(CT::Type{<:FGMCopula}, U, ::Val{:mle}) + d = size(U,1) + + # → 1. Easy case: d == 2, parameter mapping is bijective. + if d == 2 + # generic rank-based routine (agnostic to vcov/inference) + res = Optim.optimize( + α -> -Distributions.loglikelihood(FGMCopula(2, tanh(α[1])), U), + [0.1], + Optim.LBFGS(); + autodiff=:forward + ) + θ = tanh(Optim.minimizer(res)[1]) + return CT(d, θ), (; θ̂=(θ=θ,), + optimizer = Optim.summary(res), + converged = Optim.converged(res), + iterations = Optim.iterations(res)) + end + + # → 2. General FGM (d > 2) with log-barrier or soft barrier + # Construct helper functions + cop(θ) = FGMCopula(d, θ) + θ₀ = Distributions.params(_example(CT, d))[:θ] # starting point in θ-space + + # Log-barrier penalty: ensures all inequalities 1 + _fgm_red(θ, ε) > 0 + function barrier_penalty(θ; μ=1e-3, soft=false) + total = 0.0 + for ε in Base.product(fill([-1,1], d)...) + v = 1 + _fgm_red(θ, ε) + if soft + # Softplus barrier: smooth penalty, finite outside feasible region + total += log1p(exp(-10*v)) / 10 # mild smoothness + else + if v <= 0 + return Inf # hard barrier: outside feasible set + end + total -= μ * log(v) + end + end + return μ * total + end + + # Negative log-likelihood + barrier + function loss(θ) + try + C = cop(θ) + return -Distributions.loglikelihood(C, U) + barrier_penalty(θ) + catch + # If FGMCopula constructor fails (invalid params), return large penalty + return 1e10 + end + end + + # Optimise in θ-space directly (no need for unbound/rebound) + res = Optim.optimize(loss, θ₀, Optim.LBFGS(); autodiff=:forward) + θhat = Optim.minimizer(res) + return FGMCopula(d, θhat), + (; θ̂ = (θ = θhat,), + optimizer = Optim.summary(res), + converged = Optim.converged(res), + iterations = Optim.iterations(res)) +end \ No newline at end of file diff --git a/src/MiscellaneousCopulas/IndependentCopula.jl b/src/MiscellaneousCopulas/IndependentCopula.jl index 3b7bb3f6b..3e1da7ddd 100644 --- a/src/MiscellaneousCopulas/IndependentCopula.jl +++ b/src/MiscellaneousCopulas/IndependentCopula.jl @@ -31,6 +31,8 @@ inverse_rosenblatt(::IndependentCopula{d}, u::AbstractMatrix{<:Real}) where {d} τ(::IndependentCopula) = 0 ρ(::IndependentCopula) = 0 +γ(::IndependentCopula) = 0 +ι(::IndependentCopula) = 0 StatsBase.corkendall(::IndependentCopula{d}) where d = one(zeros(d,d)) StatsBase.corspearman(::IndependentCopula{d}) where d = one(zeros(d,d)) diff --git a/src/MiscellaneousCopulas/MCopula.jl b/src/MiscellaneousCopulas/MCopula.jl index 6c266b188..c6c5bfea2 100644 --- a/src/MiscellaneousCopulas/MCopula.jl +++ b/src/MiscellaneousCopulas/MCopula.jl @@ -23,6 +23,8 @@ function Distributions._rand!(rng::Distributions.AbstractRNG, ::MCopula{d}, x::A end τ(::MCopula) = 1 ρ(::MCopula) = 1 +γ(::MCopula) = 1 +ι(::MCopula) = -Inf StatsBase.corkendall(::MCopula{d}) where d = ones(d,d) StatsBase.corspearman(::MCopula{d}) where d = ones(d,d) diff --git a/src/MiscellaneousCopulas/SurvivalCopula.jl b/src/MiscellaneousCopulas/SurvivalCopula.jl index 33cabb956..9814ee53b 100644 --- a/src/MiscellaneousCopulas/SurvivalCopula.jl +++ b/src/MiscellaneousCopulas/SurvivalCopula.jl @@ -31,14 +31,14 @@ References: """ struct SurvivalCopula{d,CT,flips} <: Copula{d} C::CT - function SurvivalCopula{d,CT,flips}(C::CT) where {d,CT,flips} + function SurvivalCopula{d,CT,flips}(C::Copula{d}) where {d,CT,flips} if length(flips) == 0 return C end if typeof(C) == IndependentCopula return C end - return new{d,CT,flips}(C) + return new{d,typeof(C),flips}(C) end SurvivalCopula(C::CT, flips::Tuple) where {d, CT<:Copula{d}} = SurvivalCopula{d,CT,flips}(C) SurvivalCopula(C::CT, flips) where {d, CT<:Copula{d}} = SurvivalCopula(C, tuple(flips...)) @@ -94,7 +94,7 @@ function _fit(::Type{<:SurvivalCopula{d,subCT,flips}}, U, m::Val{:mle}; kwargs.. return SurvivalCopula{d,subCT,flips}(C), meta end -_available_fitting_methods(::Type{<:SurvivalCopula{d,subCT,flips}}) where {d, subCT, flips} = _available_fitting_methods(subCT) +_available_fitting_methods(::Type{<:SurvivalCopula{D,subCT,flips}}, d) where {D, subCT, flips} = _available_fitting_methods(subCT, d) _example(CT::Type{<:SurvivalCopula{D,subCT,flips}}, d) where {D, subCT, flips} = SurvivalCopula(_example(subCT, d), flips) diff --git a/src/SklarDist.jl b/src/SklarDist.jl index 1217982ea..7dd5c678a 100644 --- a/src/SklarDist.jl +++ b/src/SklarDist.jl @@ -59,7 +59,8 @@ Base.eltype(S::SklarDist{CT,TplMargins}) where {CT,TplMargins} = Base.eltype(S.C Distributions.cdf(S::SklarDist{CT,TplMargins},x) where {CT,TplMargins} = Distributions.cdf(S.C, collect(Distributions.cdf.(S.m,x))) function Distributions._rand!(rng::Distributions.AbstractRNG, S::SklarDist{CT,TplMargins}, x::AbstractVector{T}) where {CT,TplMargins,T} Random.rand!(rng,S.C,x) - x .= Distributions.quantile.(S.m,x) + clamp!(x, nextfloat(T(0)), prevfloat(T(1))) + x .= Distributions.quantile.(S.m,x) end function Distributions._logpdf(S::SklarDist{CT,TplMargins},u) where {CT,TplMargins} sum(Distributions.logpdf(S.m[i],u[i]) for i in eachindex(u)) + Distributions.logpdf(S.C,clamp.(Distributions.cdf.(S.m,u),0,1)) diff --git a/src/Subsetting.jl b/src/Subsetting.jl index c54e8eafb..195d34002 100644 --- a/src/Subsetting.jl +++ b/src/Subsetting.jl @@ -33,7 +33,7 @@ function SubsetCopula(CS::SubsetCopula{d,CT}, dims2::NTuple{p, Int}) where {d,CT @assert 2 <= p <= d return SubsetCopula(CS.C, ntuple(i -> CS.dims[dims2[i]], p)) end -_available_fitting_methods(::Type{<:SubsetCopula}) = Tuple{}() # cannot be fitted. +_available_fitting_methods(::Type{<:SubsetCopula}, d) = Tuple{}() # cannot be fitted. Base.eltype(C::SubsetCopula{d,CT}) where {d,CT} = Base.eltype(C.C) function Distributions._rand!(rng::Distributions.AbstractRNG, C::SubsetCopula{d,CT}, x::AbstractVector{T}) where {T<:Real, d,CT} u = Random.rand(rng,C.C) @@ -94,11 +94,16 @@ subsetdims(C::Union{Copula, SklarDist}, dims) = subsetdims(C, Tuple(collect(Int, # Pairwise dependence metrics, leveraging subsetting: function _as_biv(f::F, C::Copula{d}) where {F, d} - K = ones(d,d) + first_val = f(SubsetCopula(C, (1,2))) + K = ones(eltype(first_val),d,d) + K[1,2] = first_val + K[2,1] = first_val for i in 1:d for j in i+1:d - K[i,j] = f(SubsetCopula(C, (i,j))) - K[j,i] = K[i,j] + if (i,j) != (1,2) + K[i,j] = f(SubsetCopula(C, (i,j))) + K[j,i] = K[i,j] + end end end return K @@ -107,6 +112,6 @@ StatsBase.corkendall(C::Copula) = _as_biv(τ, C) StatsBase.corspearman(C::Copula) = _as_biv(ρ, C) corblomqvist(C::Copula) = _as_biv(β, C) corgini(C::Copula) = _as_biv(γ, C) -corentropy(C::Copula) = _as_biv(ι, C) +corentropy(C::Copula) = _as_biv(ι, C) - LinearAlgebra.I coruppertail(C::Copula) = _as_biv(λᵤ, C) corlowertail(C::Copula) = _as_biv(λₗ, C) diff --git a/src/Tail/EmpiricalEVTail.jl b/src/Tail/EmpiricalEVTail.jl index 9838bc19f..d86c86c30 100644 --- a/src/Tail/EmpiricalEVTail.jl +++ b/src/Tail/EmpiricalEVTail.jl @@ -168,7 +168,7 @@ end # Fitting plug-in (empírico) para EmpiricalEVCopula StatsBase.dof(::EmpiricalEVCopula) = 0 -_available_fitting_methods(::Type{<:EmpiricalEVCopula}) = (:ols, :cfg, :pickands) +_available_fitting_methods(::Type{<:EmpiricalEVCopula}, d) = (:ols, :cfg, :pickands) """ _fit(::Type{<:EmpiricalEVCopula}, U, method::Union{Val{:ols}, Val{:cfg}, Val{:pickands}}; grid::Int=401, eps::Real=1e-3, pseudo_values::Bool=true, kwargs...) -> (C, meta) diff --git a/src/UnivariateDistribution/Distortions/ArchimedeanDistortion.jl b/src/UnivariateDistribution/Distortions/ArchimedeanDistortion.jl index 80d92a955..96a67a8dd 100644 --- a/src/UnivariateDistribution/Distortions/ArchimedeanDistortion.jl +++ b/src/UnivariateDistribution/Distortions/ArchimedeanDistortion.jl @@ -8,10 +8,10 @@ struct ArchimedeanDistortion{TG, T, p} <: Distortion ArchimedeanDistortion(G::TG, p::Int, sJ::T, den::T) where {T<:Real, TG} = new{TG, T, p}(G, sJ, den) end function Distributions.cdf(D::ArchimedeanDistortion{TG, T, p}, u::Real) where {TG, T, p} - return ϕ⁽ᵏ⁾(D.G, Val{p}(), D.sJ + ϕ⁻¹(D.G, float(u))) / D.den + return ϕ⁽ᵏ⁾(D.G, p, D.sJ + ϕ⁻¹(D.G, float(u))) / D.den end function Distributions.quantile(D::ArchimedeanDistortion{TG, T, p}, α::Real) where {TG, T, p} - y = ϕ⁽ᵏ⁾⁻¹(D.G, Val{p}(), α * D.den; start_at = D.sJ) + y = ϕ⁽ᵏ⁾⁻¹(D.G, p, α * D.den; start_at = D.sJ) return ϕ(D.G, y - D.sJ) end ## ConditionalCopula moved next to ArchimedeanCopula definition diff --git a/src/UnivariateDistribution/Frailties/Logarithmic.jl b/src/UnivariateDistribution/Frailties/Logarithmic.jl index 9eef403c8..9a9ef3423 100644 --- a/src/UnivariateDistribution/Frailties/Logarithmic.jl +++ b/src/UnivariateDistribution/Frailties/Logarithmic.jl @@ -9,7 +9,7 @@ struct Logarithmic{T<:Real} <: Distributions.DiscreteUnivariateDistribution end Logarithmic{T}(h) where T = Logarithmic(T(h)) end -Base.eltype(::Logarithmic{T}) where T = promote_type(T,Float64) +Base.eltype(::Logarithmic{T}) where T = T function Distributions.logpdf(d::Logarithmic{T}, x::Real) where T insupport(d, x) ? x*log1p(-d.α) - log(x) - log(-log(d.α)) : log(zero(T)) end diff --git a/src/UnivariateDistribution/Radials/WilliamsonFromFrailty.jl b/src/UnivariateDistribution/Radials/WilliamsonFromFrailty.jl index 7c0679c85..a075dbe53 100644 --- a/src/UnivariateDistribution/Radials/WilliamsonFromFrailty.jl +++ b/src/UnivariateDistribution/Radials/WilliamsonFromFrailty.jl @@ -1,9 +1,8 @@ struct WilliamsonFromFrailty{TF,d} <: Distributions.ContinuousUnivariateDistribution frailty_dist::TF - function WilliamsonFromFrailty(frailty_dist,::Val{d}) where d - return new{typeof(frailty_dist),d}(frailty_dist) + function WilliamsonFromFrailty(frailty_dist, d::Int) + return new{typeof(frailty_dist), d}(frailty_dist) end - WilliamsonFromFrailty(frailty_dist,d) = WilliamsonFromFrailty(frailty_dist,Val{d}()) end function Distributions.rand(rng::Distributions.AbstractRNG, D::WilliamsonFromFrailty{TF,d}) where {TF,d} f = rand(rng,D.frailty_dist) diff --git a/src/WilliamsonTransforms.jl b/src/WilliamsonTransforms.jl new file mode 100644 index 000000000..3fbf474cc --- /dev/null +++ b/src/WilliamsonTransforms.jl @@ -0,0 +1,106 @@ +""" + 𝒲(X,d)(x) + +Computes the Williamson d-transform of the random variable X, taken at point x. + +For a univariate non-negative random variable ``X``, with cumulative distribution function ``F`` and an integer ``d\\ge 2``, the Williamson-d-transform of ``X`` is the real function supported on ``[0,\\infty[`` given by: + +```math +\\phi(t) = 𝒲_{d}(X)(t) = \\int_{t}^{\\infty} \\left(1 - \\frac{t}{x}\\right)^{d-1} dF(x) = \\mathbb E\\left( (1 - \\frac{t}{X})^{d-1}_+\\right) \\mathbb 1_{t > 0} + \\left(1 - F(0)\\right)\\mathbb 1_{t <0} +``` + +This function has several properties: + - We have that ``\\phi(0) = 1`` and ``\\phi(Inf) = 0`` + - ``\\phi`` is ``d-2`` times derivable, and the signs of its derivatives alternates : ``\\forall k \\in 0,...,d-2, (-1)^k \\phi^{(k)} \\ge 0``. + - ``\\phi^{(d-2)}`` is convex. + +These properties makes this function what is called an *archimedean generator*, able to generate *archimedean copulas* in dimensions up to ``d``. + +References: +- Williamson, R. E. (1956). Multiply monotone functions and their Laplace transforms. Duke Math. J. 23 189–207. MR0077581 +- McNeil, Alexander J., and Johanna Nešlehová. "Multivariate Archimedean copulas, d-monotone functions and ℓ 1-norm symmetric distributions." (2009): 3059-3097. +""" +struct 𝒲{TX, d} + X::TX + function 𝒲(X::TX, d::Int) where {TX<:Distributions.UnivariateDistribution} + @assert Base.minimum(X) ≥ 0 && Base.maximum(X) ≤ Inf + @assert d ≥ 2 && isinteger(d) + return new{typeof(X), d}(X) + end +end +function (ϕ::𝒲{TX, d})(x) where {TX,d} + x <= 0 && return 1 - Distributions.cdf(ϕ.X,0) + return Distributions.expectation(y -> (1 - x/y)^(d-1) * (y > x), ϕ.X) +end +function (ϕ::𝒲{TX, d})(x::TaylorSeries.Taylor1{TF}) where {TX,d, TF} + x <= 0 && return 1 - Distributions.cdf(ϕ.X,0) + zero(x) + x₀ = x.coeffs[1] + p = length(x.coeffs) + function f(i, y) + y < x₀ && return zero(y) + xᵢ = TaylorSeries.Taylor1(x.coeffs[1:i]) + r = (1 - x/y)^(d-1) + return r.coeffs[i] + end + return TaylorSeries.Taylor1([Distributions.expectation(y -> f(i, y), ϕ.X) for i in 1:p]) +end + +""" + 𝒲₋₁(ϕ,d) + + +Computes the inverse Williamson d-transform of the d-monotone archimedean generator ϕ. + +A ``d``-monotone archimedean generator is a function ``\\phi`` on ``\\mathbb R_+`` that has these three properties: +- ``\\phi(0) = 1`` and ``\\phi(Inf) = 0`` +- ``\\phi`` is ``d-2`` times derivable, and the signs of its derivatives alternates : ``\\forall k \\in 0,...,d-2, (-1)^k \\phi^{(k)} \\ge 0``. +- ``\\phi^{(d-2)}`` is convex. + +For such a function ``\\phi``, the inverse Williamson-d-transform of ``\\phi`` is the cumulative distribution function ``F`` of a non-negative random variable ``X``, defined by : + +```math +F(x) = 𝒲_{d}^{-1}(\\phi)(x) = 1 - \\frac{(-x)^{d-1} \\phi_+^{(d-1)}(x)}{k!} - \\sum_{k=0}^{d-2} \\frac{(-x)^k \\phi^{(k)}(x)}{k!} +``` + +We return this cumulative distribution function in the form of the corresponding random variable `<:Distributions.ContinuousUnivariateDistribution` from `Distributions.jl`. You may then compute : + - The cdf via `Distributions.cdf` + - The pdf via `Distributions.pdf` and the logpdf via `Distributions.logpdf` + - Samples from the distribution via `rand(X,n)` + +References: + - Williamson, R. E. (1956). Multiply monotone functions and their Laplace transforms. Duke Math. J. 23 189–207. MR0077581 + - McNeil, Alexander J., and Johanna Nešlehová. "Multivariate Archimedean copulas, d-monotone functions and ℓ 1-norm symmetric distributions." (2009): 3059-3097. +""" +struct 𝒲₋₁{Tϕ, d} <: Distributions.ContinuousUnivariateDistribution + # Woul dprobably be much more efficient if it took the generator and not the function itself. + ϕ::Tϕ + function 𝒲₋₁(ϕ, d::Int) + @assert ϕ(0.0) == 1.0 + @assert ϕ(float(Inf)) == 0.0 + @assert isinteger(d) + return new{typeof(ϕ),d}(ϕ) + end +end +function Distributions.cdf(dist::𝒲₋₁{Tϕ, d}, x) where {Tϕ, d} + x ≤ 0 && return zero(x) + rez, x_pow = zero(x), one(x) + c = taylor(dist.ϕ, x, d-1) + for k in 1:d + rez += iszero(c[k]) ? 0 : x_pow * c[k] + x_pow *= -x + end + return isnan(rez) ? one(x) : 1 - rez +end + +Distributions.logpdf(dist::𝒲₋₁{Tϕ, d}, x) where {Tϕ, d} = log(max(0, taylor(x -> Distributions.cdf(dist,x), x, 1)[end])) +_quantile(dist::𝒲₋₁, p) = Roots.find_zero(x -> (Distributions.cdf(dist, x) - p), (0.0, Inf)) +Distributions.rand(rng::Distributions.AbstractRNG, dist::𝒲₋₁) = _quantile(dist, rand(rng)) +Base.minimum(::𝒲₋₁) = 0.0 +Base.maximum(::𝒲₋₁) = Inf +function Distributions.quantile(dist::𝒲₋₁, p::Real) + # Validate that p is in the range [0, 1] + @assert 0 <= p <= 1 + return _quantile(dist, p) +end + + diff --git a/src/show.jl b/src/show.jl index 64f0c9794..7845d5826 100644 --- a/src/show.jl +++ b/src/show.jl @@ -49,220 +49,235 @@ end function Base.show(io::IO, C::CheckerboardCopula{d}) where {d} print(io, "CheckerboardCopula{", d, "} ⟨m=", C.m, "⟩") end +function _fmt_copula_family(C) + fam = String(nameof(typeof(C))) + fam = endswith(fam, "Copula") ? fam[1:end-6] : fam + return string(fam, " d=", length(C)) +end +""" +Small horizontal rule for section separation. +""" +_hr(io) = println(io, "────────────────────────────────────────────────────────────────────────────────") + +""" +Pretty p-value formatting: show very small values as inequalities. +""" +_pstr(p) = p < 1e-16 ? "<1e-16" : Printf.@sprintf("%.4g", p) + +""" +Key-value aligned printing for header lines. +""" +function _kv(io, key::AbstractString, val) + Printf.@printf(io, "%-22s %s\n", key * ":", val) +end + +""" +Render a section header with optional suffix, surrounded by horizontal rules. +""" +function _section(io, title::AbstractString; suffix::Union{Nothing,AbstractString}=nothing) + _hr(io) + if suffix === nothing + println(io, "[ ", title, " ]") + else + println(io, "[ ", title, " ] ", suffix) + end + _hr(io) +end + +""" +Print a standardized parameter section with optional covariance matrix and vcov method note. +""" +function _print_param_section(io, title::AbstractString, nm::Vector{String}, θ::Vector{Float64}; + V::Union{Nothing,AbstractMatrix}=nothing, + vcov_method::Union{Nothing,AbstractString,Symbol}=nothing) + suf = vcov_method === nothing ? nothing : string("(vcov=", String(vcov_method), ")") + _section(io, title; suffix=suf) + _print_param_table(io, nm, θ; V=V) +end + +""" +Print dependence metrics if available/supported by the copula C. +""" +function _print_dependence_metrics(io, C; derived_measures::Bool=true) + _section(io, "Dependence metrics") + if !derived_measures + println(io, "(suppressed)") + return + end + _has(f) = isdefined(Copulas, f) && hasmethod(getfield(Copulas, f), Tuple{typeof(C)}) + shown_any = false + try + if _has(:τ); _kv(io, "Kendall τ", Printf.@sprintf("%.4f", Copulas.τ(C))); shown_any = true; end + if _has(:ρ); _kv(io, "Spearman ρ", Printf.@sprintf("%.4f", Copulas.ρ(C))); shown_any = true; end + if _has(:β); _kv(io, "Blomqvist β",Printf.@sprintf("%.4f", Copulas.β(C))); shown_any = true; end + if _has(:γ); _kv(io, "Gini γ", Printf.@sprintf("%.4f", Copulas.γ(C))); shown_any = true; end + if _has(:λᵤ); _kv(io, "Upper λᵤ", Printf.@sprintf("%.4f", Copulas.λᵤ(C))); shown_any = true; end + if _has(:λₗ); _kv(io, "Lower λₗ", Printf.@sprintf("%.4f", Copulas.λₗ(C))); shown_any = true; end + if _has(:ι); _kv(io, "Entropy ι", Printf.@sprintf("%.4f", Copulas.ι(C))); shown_any = true; end + catch + # proceed without failing show + end + if !shown_any + println(io, "(none available)") + end +end +function _print_param_table(io, nm::Vector{String}, θ::Vector{Float64}; V::Union{Nothing, AbstractMatrix}=nothing) + if V === nothing || isempty(θ) + Printf.@printf(io, "%-10s %10s\n", "Parameter", "Estimate") + @inbounds for (j, name) in pairs(nm) + Printf.@printf(io, "%-10s %10.4f\n", String(name), θ[j]) + end + return + end + se = sqrt.(LinearAlgebra.diag(V)) + z = θ ./ se + p = 2 .* Distributions.ccdf.(Distributions.Normal(), abs.(z)) + lo, hi = (θ .- 1.959963984540054 .* se, θ .+ 1.959963984540054 .* se) + Printf.@printf(io, "%-10s %10s %9s %9s %8s %10s %10s\n", + "Parameter","Estimate","Std.Err","z-value","p-val","95% Lo","95% Hi") + @inbounds for j in eachindex(θ) + Printf.@printf(io, "%-10s %10.4f %9.4f %9.3f %8s %10.4f %10.4f\n", + String(nm[j]), θ[j], se[j], z[j], _pstr(p[j]), lo[j], hi[j]) + end +end + +function _margin_param_names(mi) + T = typeof(mi) + return if T <: Distributions.Gamma; ("α","θ") + elseif T <: Distributions.Beta; ("α","β") + elseif T <: Distributions.LogNormal; ("μ","σ") + elseif T <: Distributions.Normal; ("μ","σ") + elseif T <: Distributions.Exponential; ("θ",) + elseif T <: Distributions.Weibull; ("k","λ") + elseif T <: Distributions.Pareto; ("α","θ") + else + k = length(Distributions.params(mi)); ntuple(j->"θ$(j)", k) + end +end + function Base.show(io::IO, M::CopulaModel) R = M.result - # Header: family/margins without helper functions + # Split: [ CopulaModel: ... ] vs [ Fit metrics ] if R isa SklarDist - # Build copula family label - famC = String(nameof(typeof(R.C))) - famC = endswith(famC, "Copula") ? famC[1:end-6] : famC - famC = string(famC, " d=", length(R.C)) - # Margins label + famC = _fmt_copula_family(R.C) mnames = map(mi -> String(nameof(typeof(mi))), R.m) margins_lbl = "(" * join(mnames, ", ") * ")" - println(io, "SklarDist{Copula=", famC, ", Margins=", margins_lbl, "} fitted via ", M.method) + _section(io, "CopulaModel: SklarDist"; suffix="(Copula=" * famC * ", Margins=" * margins_lbl * ")") else - fam = String(nameof(typeof(R))) - fam = endswith(fam, "Copula") ? fam[1:end-6] : fam - fam = string(fam, " d=", length(R)) - println(io, fam, " fitted via ", M.method) + _section(io, "CopulaModel: " * _fmt_copula_family(R)) end - - n = StatsBase.nobs(M) - ll = M.ll - Printf.@printf(io, "Number of observations: %9d\n", n) - - ll0 = get(M.method_details, :null_ll, NaN) - if isfinite(ll0) - Printf.@printf(io, "Null Loglikelihood: %12.4f\n", ll0) + if R isa SklarDist + famC = _fmt_copula_family(R.C) + mnames = map(mi -> String(nameof(typeof(mi))), R.m) + margins_lbl = "(" * join(mnames, ", ") * ")" + skm = get(M.method_details, :sklar_method, nothing) + _kv(io, "Copula", famC) + _kv(io, "Margins", margins_lbl) + if skm === nothing + _kv(io, "Methods", "copula=" * String(M.method)) + else + _kv(io, "Methods", "copula=" * String(M.method) * ", sklar=" * String(skm)) + end + else + _kv(io, "Method", String(M.method)) end - Printf.@printf(io, "Loglikelihood: %12.4f\n", ll) + _kv(io, "Number of observations", Printf.@sprintf("%d", StatsBase.nobs(M))) - # Para el test LR usa g.l. de la CÓPULA si es SklarDist - kcop = (R isa SklarDist) ? StatsBase.dof(_copula_of(M)) : StatsBase.dof(M) + _section(io, "Fit metrics") + ll = M.ll + ll0 = get(M.method_details, :null_ll, NaN) + if isfinite(ll0); _kv(io, "Null Loglikelihood", Printf.@sprintf("%12.4f", ll0)); end + _kv(io, "Loglikelihood", Printf.@sprintf("%12.4f", ll)) + kcop = StatsBase.dof(M) if isfinite(ll0) && kcop > 0 LR = 2*(ll - ll0) p = Distributions.ccdf(Distributions.Chisq(kcop), LR) - Printf.@printf(io, "LR Test (vs indep. copula): %.2f ~ χ²(%d) => p = %.4g\n", LR, kcop, p) + _kv(io, "LR (vs indep.)", Printf.@sprintf("%.2f ~ χ²(%d) ⇒ p = %s", LR, kcop, _pstr(p))) end - aic = StatsBase.aic(M); bic = StatsBase.bic(M) - Printf.@printf(io, "AIC: %.3f BIC: %.3f\n", aic, bic) + _kv(io, "AIC", Printf.@sprintf("%.3f", aic)) + _kv(io, "BIC", Printf.@sprintf("%.3f", bic)) if isfinite(M.elapsed_sec) || M.iterations != 0 || M.converged != true conv = M.converged ? "true" : "false" + _kv(io, "Converged", conv) + _kv(io, "Iterations", string(M.iterations)) tsec = isfinite(M.elapsed_sec) ? Printf.@sprintf("%.3fs", M.elapsed_sec) : "NA" - println(io, "Converged: $(conv) Iterations: $(M.iterations) Elapsed: $(tsec)") + _kv(io, "Elapsed", tsec) end - # Branches: SklarDist → sections; empirical → summary; else → coefficient table if R isa SklarDist - # [ Copula ] section - C = _copula_of(M) - θ = StatsBase.coef(M) + # [ Dependence metrics ] section + C = M.result isa SklarDist ? M.result.C : M.result + _print_dependence_metrics(io, C; derived_measures=get(M.method_details, :derived_measures, true)) + + # [ Copula parameters ] section + θ = StatsBase.coef(M) nm = StatsBase.coefnames(M) - V = StatsBase.vcov(M) - lvl = 95 - println(io, "──────────────────────────────────────────────────────────") - println(io, "[ Copula ]") - println(io, "──────────────────────────────────────────────────────────") - fam = String(nameof(typeof(C))); fam = endswith(fam, "Copula") ? fam[1:end-6] : fam; fam = string(fam, " d=", length(C)) - Printf.@printf(io, "%-16s %-9s %10s %10s %12s\n", "Family","Param","Estimate","Std.Err","$lvl% CI") - if V === nothing || isempty(θ) - @inbounds for j in eachindex(θ) - Printf.@printf(io, "%-16s %-9s %10.3g %10s %12s\n", fam, String(nm[j]), θ[j], "—", "—") - end - else - se = sqrt.(LinearAlgebra.diag(V)) - lo, hi = StatsBase.confint(M; level=0.95) - @inbounds for j in eachindex(θ) - Printf.@printf(io, "%-16s %-9s %10.3g %10.3g [%0.3g, %0.3g]\n", fam, String(nm[j]), θ[j], se[j], lo[j], hi[j]) - end - end - if isdefined(Copulas, :τ) && hasmethod(Copulas.τ, Tuple{typeof(C)}) - τth = Copulas.τ(C) - Printf.@printf(io, "%-16s %-9s %10.3g %10s %12s\n", "Kendall", "τ(θ)", τth, "—", "—") - end + md = M.method_details + Vcop = get(md, :vcov_copula, nothing) + vcovm = get(md, :vcov_method, nothing) + _print_param_section(io, "Copula parameters", nm, θ; V=Vcop, vcov_method=vcovm) # [ Marginals ] section - S = R::SklarDist - println(io, "──────────────────────────────────────────────────────────") - println(io, "[ Marginals ]") - println(io, "──────────────────────────────────────────────────────────") - Printf.@printf(io, "%-6s %-12s %-7s %10s %10s %12s\n", "Margin","Dist","Param","Estimate","Std.Err","$lvl% CI") - for (i, mi) in enumerate(S.m) - pname = String(nameof(typeof(mi))) - θi = Distributions.params(mi) - # Inline param name mapping - T = typeof(mi) - names = if T <: Distributions.Gamma; ("α","θ") - elseif T <: Distributions.Beta; ("α","β") - elseif T <: Distributions.LogNormal; ("μ","σ") - elseif T <: Distributions.Normal; ("μ","σ") - elseif T <: Distributions.Exponential; ("θ",) - elseif T <: Distributions.Weibull; ("k","λ") - elseif T <: Distributions.Pareto; ("α","θ") - else - k = length(θi); ntuple(j->"θ$(j)", k) - end - @inbounds for j in eachindex(θi) - lab = (j == 1) ? "#$(i)" : "" - Printf.@printf(io, "%-6s %-12s %-7s %10.3g %10s %12s\n", lab, pname, names[j], θi[j], "—", "—") - end - end - elseif StatsBase.dof(M) == 0 || M.method == :emp - # Empirical summary - md = M.method_details - kind = get(md, :emp_kind, :unspecified) - d = get(md, :d, missing) - n = get(md, :n, missing) - pv = get(md, :pseudo_values, missing) + _print_marginals_section(io, R::SklarDist, get(M.method_details, :vcov_margins, nothing)) + else + # Copula-only fits: dependence metrics and parameters + C0 = M.result isa SklarDist ? M.result.C : M.result + _print_dependence_metrics(io, C0; derived_measures=get(M.method_details, :derived_measures, true)) + θ = StatsBase.coef(M) + nm = StatsBase.coefnames(M) + vcovm = get(M.method_details, :vcov_method, nothing) + _print_param_section(io, "Copula parameters", nm, θ; V=StatsBase.vcov(M), vcov_method=vcovm) - hdr = "d=$(d), n=$(n)" * (pv === missing ? "" : ", pseudo_values=$(pv)") - extra = "" - if kind === :bernstein - m = get(md, :m, nothing) - extra = m === nothing ? "" : ", m=$(m)" - elseif kind === :exact - m = get(md, :m, nothing) - extra = m === nothing ? "" : ", m=$(m)" - elseif kind === :ev_tail - method = get(md, :method, :unspecified) - grid = get(md, :grid, missing) - eps = get(md, :eps, missing) - extra = ", method=$(method), grid=$(grid), eps=$(eps)" - end + end +end - println(io, "Empirical summary ($kind)") - println(io, hdr * extra) +""" +Print the Marginals section for a SklarDist using precomputed Vm if available. +""" +function _print_marginals_section(io, S::SklarDist, Vm) + _section(io, "Marginals") + Printf.@printf(io, "%-6s %-10s %-6s %10s %9s %s\n", + "Margin","Dist","Param","Estimate","Std.Err","95% CI") - # Estadísticos clásicos - has_tau = all(haskey.(Ref(md), (:tau_mean, :tau_sd, :tau_min, :tau_max))) - has_rho = all(haskey.(Ref(md), (:rho_mean, :rho_sd, :rho_min, :rho_max))) - has_beta = all(haskey.(Ref(md), (:beta_mean, :beta_sd, :beta_min, :beta_max))) - has_gamma = all(haskey.(Ref(md), (:gamma_mean, :gamma_sd, :gamma_min, :gamma_max))) + crit = 1.959963984540054 - if d === missing || d == 2 - println(io, "────────────────────────────") - Printf.@printf(io, "%-10s %18s\n", "Stat", "Value") - println(io, "────────────────────────────") - if has_tau; Printf.@printf(io, "%-10s %18.3f\n", "tau", md[:tau_mean]); end - if has_rho; Printf.@printf(io, "%-10s %18.3f\n", "rho", md[:rho_mean]); end - if has_beta; Printf.@printf(io, "%-10s %18.3f\n", "beta", md[:beta_mean]); end - if has_gamma; Printf.@printf(io, "%-10s %18.3f\n", "gamma", md[:gamma_mean]); end - println(io, "────────────────────────────") - else - println(io, "───────────────────────────────────────────────────────") - Printf.@printf(io, "%-10s %10s %10s %10s %10s\n", "Stat", "Mean", "SD", "Min", "Max") - println(io, "───────────────────────────────────────────────────────") - if has_tau - Printf.@printf(io, "%-10s %10.3f %10.3f %10.3f %10.3f\n", - "tau", md[:tau_mean], md[:tau_sd], md[:tau_min], md[:tau_max]) - end - if has_rho - Printf.@printf(io, "%-10s %10.3f %10.3f %10.3f %10.3f\n", - "rho", md[:rho_mean], md[:rho_sd], md[:rho_min], md[:rho_max]) - end - if has_beta - Printf.@printf(io, "%-10s %10.3f %10.3f %10.3f %10.3f\n", - "beta", md[:beta_mean], md[:beta_sd], md[:beta_min], md[:beta_max]) - end - if has_gamma - Printf.@printf(io, "%-10s %10.3f %10.3f %10.3f %10.3f\n", - "gamma", md[:gamma_mean], md[:gamma_sd], md[:gamma_min], md[:gamma_max]) - end - println(io, "───────────────────────────────────────────────────────") - end - else - # Coefficient table - params = Distributions.params(_copula_of(M)) + _valid_cov(V, p) = V !== nothing && + ndims(V) == 2 && + size(V) == (p, p) && + all(isfinite, Matrix(V)) && + all(LinearAlgebra.diag(Matrix(V)) .>= 0.0) - # Linearize the parameters: - θ = Float64[] - nm = String[] - for (k, v) in pairs(params) - if isa(v, Number) - push!(θ, float(v)) - push!(nm, String(k)) - elseif isa(v, AbstractMatrix) - for i in axes(v, 1), j in axes(v, 2) - push!(θ, float(v[i, j])) - push!(nm, "$(k)_$(i)_$(j)") - end - elseif isa(v, AbstractVector) - for i in eachindex(v) - push!(θ, float(v[i])) - push!(nm, "$(k)_$(i)") - end - else - try - push!(θ, float(v)) - push!(nm, String(k)) - catch - end + for (i, mi) in enumerate(S.m) + pname = String(nameof(typeof(mi))) + θi_nt = Distributions.params(mi) + names = _margin_param_names(mi) + vals = Float64.(collect(θi_nt)) + p = length(vals) + + # Use only the precomputed covariance from fitting, if available and valid + Vi = nothing + if Vm isa Vector && 1 <= i <= length(Vm) + Vh = Vm[i] + if _valid_cov(Vh, p) + Vi = Vh end end - V = StatsBase.vcov(M) - if V === nothing || isempty(θ) - println(io, "────────────────────────────────────────") - Printf.@printf(io, "%-14s %12s\n", "Parameter", "Estimate") - println(io, "────────────────────────────────────────") - @inbounds for (j, name) in pairs(nm) - Printf.@printf(io, "%-14s %12.6g\n", String(name), θ[j]) - end - println(io, "────────────────────────────────────────") - else - se = sqrt.(LinearAlgebra.diag(V)) - z = θ ./ se - p = 2 .* Distributions.ccdf(Distributions.Normal(), abs.(z)) - lo, hi = StatsBase.confint(M; level=0.95) - println(io, "────────────────────────────────────────────────────────────────────────────────────────") - Printf.@printf(io, "%-14s %12s %12s %9s %10s %12s %12s\n", "Parameter","Estimate","Std.Err","z-value","Pr(>|z|)","95% Lo","95% Hi") - println(io, "────────────────────────────────────────────────────────────────────────────────────────") - @inbounds for j in eachindex(θ) - Printf.@printf(io, "%-14s %12.6g %12.6g %9.3f %10.3g %12.6g %12.6g\n", String(nm[j]), θ[j], se[j], z[j], p[j], lo[j], hi[j]) + dV = (Vi !== nothing) ? LinearAlgebra.diag(Matrix(Vi)) : fill(NaN, p) + se = sqrt.(max.(dV, 0.0)) + @inbounds for j in 1:p + lab = (j == 1) ? "#$(i)" : "" + distcol = (j == 1) ? pname : "" + est_str = Printf.@sprintf("%.4f", vals[j]) + se_str = isfinite(se[j]) ? Printf.@sprintf("%.4f", se[j]) : "—" + if isfinite(se[j]) + ci_str = Printf.@sprintf("[%.4f, %.4f]", vals[j] - crit*se[j], vals[j] + crit*se[j]) + else + ci_str = "—" end - println(io, "────────────────────────────────────────────────────────────────────────────────────────") + Printf.@printf(io, "%-6s %-10s %-6s %10s %9s %s\n", + lab, distcol, names[j], est_str, se_str, ci_str) end end end diff --git a/src/utils.jl b/src/utils.jl index ddc8cb5f0..012f8c239 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -12,7 +12,31 @@ _invmono(f; tol=1e-8, θmax=1e6, a=0.0, b=1.0) = begin Roots.find_zero(f, (a,b), Roots.Brent(); atol=tol, rtol=tol) end +""" + taylor(f::F, x₀, d::Int) where {F} + +Compute the Taylor series expansion of the function `f` around the point `x₀` up to order `d`, and gives you back the derivatives as a vector of length d+1. (first value is f(x₀)). +# Arguments +- `f`: A function to be expanded. +- `x₀`: The point around which to expand the Taylor series. +- `d`: The order up to which the Taylor series is computed. + +# Returns +A tuple with value ``(f(x₀), f'(x₀),...,f^{(d)}(x₀))``. +""" +function taylor(f::F, x₀, d::Int) where {F} + rez = f(x₀ + TaylorSeries.Taylor1(eltype(x₀), d)).coeffs + p = length(rez) + # The length of rez is no longer always equal to d+1 since updates in TaylorSeries.jl, so we enforce it: + p == d+1 && return rez + if p < d+1 + v = zeros(d+1) + v[1:p] .= rez + return v + end + return rez[1:d+1] +end """ @@ -124,26 +148,29 @@ function corentropy(X::AbstractMatrix{<:Real}; k::Int=5, p::Real=Inf, leafsize:: end Ucol = [Cnan[j] ? Float64[] : collect(@view X[:, j]) for j in 1:n] H = zeros(Float64, n, n) + H = zeros(Float64, n, n) Ub = Array{Float64}(undef, 2, m) @inbounds for j in 2:n if Cnan[j] - H[:, j] .= NaN; H[j, :] .= NaN; H[j, j] = 0.0 + H[:, j] .= NaN + H[j, :] .= NaN + H[j, j] = 0.0 continue end uj = Ucol[j] for i in 1:j-1 if Cnan[i] - H[i, j] = H[j, i] = NaN + H[i, j] = NaN + H[j, i] = NaN continue end ui = Ucol[i] Ub[1, :] .= ui; Ub[2, :] .= uj - entropy = ι(Ub; k=k, p=p, leafsize=leafsize) - H[i, j] = entropy - H[j, i] = entropy + H[i, j] = ι(Ub; k=k, p=p, leafsize=leafsize) end end return H + return H end function _cortail(X::AbstractMatrix{<:Real}; t = :lower, method = :SchmidtStadtmueller, p = nothing) # We expect the number of dimension to be the second axes here, diff --git a/test/ArchimedeanCopulas.jl b/test/ArchimedeanCopulas.jl index 31c3e9f5d..2cb0b4b66 100644 --- a/test/ArchimedeanCopulas.jl +++ b/test/ArchimedeanCopulas.jl @@ -208,7 +208,7 @@ end @test Copulas.ρ⁻¹(AMHCopula, -0.2246) ≈ -0.8 atol=1.0e-3 end -@testitem "Testing empirical tail values of certain copula samples" tags=[:ArchimedeanCopula, :ClaytonCopula, :GumbelCopula, :AMHCopula, :FrankCopula] begin +@testitem "Testing empirical tail values of certain copula samples" tags=[:ArchimedeanCopula, :ClaytonCopula, :GumbelCopula, :AMHCopula, :FrankCopula, :Heavy] begin # [GenericTests integration]: Probably too stochastic and slow for generic; relies on large random samples and fragile tail estimates. # Keep as targeted property tests here; if needed, add a lighter tail-coherency smoke test generically. using StableRNGs @@ -232,28 +232,27 @@ end # Gumbel rng = StableRNG(123) - x = rand(rng,GumbelCopula(3,2.), 100_000) - @test_broken tail(x[:,1], x[:,2], "r") ≈ 2-2^(1/2) atol=1.0e-1 + x = rand(rng,GumbelCopula(3,2.), 40_000) @test_broken tail(x[:,1], x[:,2], "r") ≈ 2-2^(1/2) atol=1.0e-1 @test tail(x[:,1], x[:,2], "l", 0.00001) ≈ 0. @test tail(x[:,1], x[:,3], "l", 0.00001) ≈ 0. # Clayton rng = StableRNG(123) - x = rand(rng,ClaytonCopula(3,1.), 100_000) + x = rand(rng,ClaytonCopula(3,1.), 40_000) @test_broken tail(x[:,1], x[:,2], "l") ≈ 2.0^(-1) atol=1.0e-1 @test_broken tail(x[:,1], x[:,3], "l") ≈ 2.0^(-1) atol=1.0e-1 @test tail(x[:,1], x[:,2], "r", 0.0001) ≈ 0 # AMH rng = StableRNG(123) - x = rand(rng,AMHCopula(3,0.8), 100_000) + x = rand(rng,AMHCopula(3,0.8), 40_000) @test tail(x[:,1], x[:,2], "l", 0.0001) ≈ 0 @test tail(x[:,1], x[:,2], "r", 0.0001) ≈ 0 # Frank rng = StableRNG(123) - x = rand(rng,FrankCopula(3,0.8), 100_000) + x = rand(rng,FrankCopula(3,0.8), 40_000) @test tail(x[:,1], x[:,2], "l", 0.0001) ≈ 0 @test tail(x[:,1], x[:,2], "r", 0.0001) ≈ 0 end @@ -278,7 +277,7 @@ end @test check_rnd(InvGaussianCopula, 0, 1/2, 10) end -@testitem "Test of ρ ∘ ρ⁻¹ = Id" tags=[:ArchimedeanCopula, :ClaytonCopula, :GumbelCopula, :AMHCopula, :FrankCopula, :GumbelBarnettCopula, :InvGaussianCopula, :OneRing] begin +@testitem "Test of ρ ∘ ρ⁻¹ = Id" tags=[:ArchimedeanCopula, :ClaytonCopula, :GumbelCopula, :AMHCopula, :FrankCopula, :GumbelBarnettCopula, :InvGaussianCopula] begin # [GenericTests integration]: Not yet. ρ⁻¹ is not uniformly available/accurate; keep here as broken placeholders until APIs solidify. using Random using InteractiveUtils diff --git a/test/ConditionalDistribution.jl b/test/ConditionalDistribution.jl index ffb10ca76..83640cbf6 100644 --- a/test/ConditionalDistribution.jl +++ b/test/ConditionalDistribution.jl @@ -112,7 +112,7 @@ end # Compare to MVNormal conditioning on z-scale I = Tuple(setdiff(1:d, J)) dI = length(I) - for _ in 1:5 + for _ in 1:3 uI = rand(rng, dI)./5 .+ 2/5 zI = quantile.(Normal(), uI) zJ = quantile.(Normal(), collect(uJ)) @@ -154,8 +154,8 @@ end SJ = sum(Copulas.ϕ⁻¹(C.G, v) for v in uJ) SI = sum(Copulas.ϕ⁻¹(C.G, u) for u in uI) S_full = SJ + SI - num = Copulas.ϕ⁽ᵏ⁾(C.G, Val{p}(), S_full) - den = Copulas.ϕ⁽ᵏ⁾(C.G, Val{p}(), SJ) + num = Copulas.ϕ⁽ᵏ⁾(C.G, p, S_full) + den = Copulas.ϕ⁽ᵏ⁾(C.G, p, SJ) expected = num / den # Evaluate model got = cdf(CC, collect(uI)) @@ -198,7 +198,7 @@ end J = Tuple(reverse(collect(js))) Y = condition(X, J, xⱼₛ) - for _ in 1:5 + for _ in 1:3 t = randn(rng, 2) A, r = mvnormcdf(Y_mock, fill(-Inf, 2), t) B = cdf(Y, t) diff --git a/test/ExtremeValueCopulas.jl b/test/ExtremeValueCopulas.jl index 7e8fb3ba6..aa4ee796b 100644 --- a/test/ExtremeValueCopulas.jl +++ b/test/ExtremeValueCopulas.jl @@ -118,9 +118,9 @@ end end -@testitem "Extreme Galambos density test" tags=[:ExtremeValueCopula, :GalambosCopula] begin +@testitem "Extreme Galambos density test" tags=[:ExtremeValueCopula, :GalambosCopula, :Heavy] begin # [GenericTests integration]: No. This is a trivial smoke test to catch crashes at extreme params; keep as minimal targeted test. - rand(GalambosCopula(2, 19.7), 1000) - rand(GalambosCopula(2, 210.0), 1000) + rand(GalambosCopula(2, 19.7), 400) + rand(GalambosCopula(2, 210.0), 400) @test true end \ No newline at end of file diff --git a/test/FittingTest.jl b/test/FittingTest.jl index 7f6d8cd7f..55a08fa44 100644 --- a/test/FittingTest.jl +++ b/test/FittingTest.jl @@ -1,99 +1,188 @@ +@testitem "Fitting + vcov + StatsBase interfaces" tags=[:fitting, :vcov, :statsbase] begin + using Test, Random, Distributions, Copulas, StableRNGs, LinearAlgebra, Statistics, StatsBase + rng = StableRNG(2025) + function _flatten_params(p::NamedTuple) + if haskey(p, :Σ) + Σ = p.Σ + return [Σ[i, j] for i in 1:size(Σ,1)-1 for j in (i+1):size(Σ,2)] + end + vals = Any[] + for v in values(p) + if isa(v, Number) + push!(vals, Float64(v)) + else + append!(vals, vec(Float64.(v))) + end + end + return vals + end + reps = [ + # Elliptical + (GaussianCopula, 2, :mle), + (GaussianCopula, 3, :mle), + # Archimedean one parameter + (ClaytonCopula, 2, :mle), + (GumbelCopula, 2, :itau), + (FrankCopula, 2, :mle), + (JoeCopula, 2, :itau), -@testitem "Fitting smoke test" tags=[:Fitting] begin - using Test - using Random - using Distributions - using Copulas - using StableRNGs - rng = StableRNG(123) + # Archimedean two params + (BB6Copula, 2, :mle), + (BB7Copula, 2, :mle), + + # Bivariate Extreme Value + (GalambosCopula, 2, :mle), + (HuslerReissCopula, 2, :mle), + ] + + # helper + function psd_ok(V; tol=1e-7) + vals = eigvals(Symmetric(Matrix(V))) + minimum(vals) >= -tol + end + + n = 500 # maybe this size is large? + + for (CT, d, method) in reps + @info "Testing: $CT, d=$d, method=$method..." + C0 = Copulas._example(CT, d) + true_θ = _flatten_params(Distributions.params(C0)) + U = rand(rng, C0, n) + M = fit(CopulaModel, CT, U; method=method, vcov=true, derived_measures=false) + + @testset "Core Fitting & Inference" begin + estimated_θ = StatsBase.coef(M) + @test estimated_θ ≈ true_θ atol=0.5 + + @test isa(StatsBase.vcov(M), AbstractMatrix) + @test size(StatsBase.vcov(M)) == (StatsBase.dof(M), StatsBase.dof(M)) + @test psd_ok(StatsBase.vcov(M)) + + se = StatsBase.stderror(M) + @test length(se) == StatsBase.dof(M) + lo, hi = StatsBase.confint(M; level=0.95) + @test length(lo) == length(hi) == StatsBase.dof(M) + end + + @testset "Information Criteria" begin + k = StatsBase.dof(M) + ll = M.ll + @test isfinite(StatsBase.aic(M)) + @test isfinite(StatsBase.bic(M)) + @test isfinite(Copulas.aicc(M)) + @test isfinite(Copulas.hqc(M)) + @test aic(M) ≈ 2*k - 2*ll + @test bic(M) ≈ k*log(n) - 2*ll + end + + @testset "Residuals API" begin + R_unif = StatsBase.residuals(M) + @test size(R_unif) == (d, n) + @test all(0 .<= R_unif .<= 1) + R_norm = StatsBase.residuals(M, transform=:normal) + @test size(R_norm) == (d, n) + @test abs(mean(R_norm)) < 0.2 + @test 0.8 < std(R_norm) < 1.2 + end + + @testset "Predict API" begin + sim_data = StatsBase.predict(M, what=:simulate, nsim=100) + @test size(sim_data) == (d, 100) + @test all(0 .<= sim_data .<= 1) + newdata = rand(rng, d, 50) + preds_cdf = StatsBase.predict(M, newdata=newdata, what=:cdf) + @test length(preds_cdf) == 50 + @test all(0 .<= preds_cdf .<= 1) + preds_pdf = StatsBase.predict(M, newdata=newdata, what=:pdf) + @test length(preds_pdf) == 50 + @test all(preds_pdf .>= 0) + end + end - using Copulas: ClaytonGenerator, WilliamsonGenerator, GumbelGenerator, GalambosTail, MixedTail, ExtremeValueCopula, FrailtyGenerator # to avoid typing "Copulas." in front. - - # Structured manifest of test cases - # Each entry: (Type, dims::String) - # - dims: string of digits among "2","3","4"; remove a digit to skip that dimension - cases = [ - # No parameters - (IndependentCopula, "234"), - (MCopula, "234"), - (WCopula, "2"), - - # Empirical/misc (all d) - (BernsteinCopula, "234"), - (BetaCopula, "234"), - (CheckerboardCopula, "234"), - (EmpiricalCopula, "234"), - (ArchimedeanCopula, "234"), - (ExtremeValueCopula, "2"), - - - # # Elliptical (all d) - (GaussianCopula, "234"), - # (TCopula, "234"), # takes a loooooot of time. - - # Archimedean families wiht one parameters - (AMHCopula, "234"), - (ClaytonCopula, "234"), - (FrankCopula, "234"), - (GumbelBarnettCopula, "234"), - (GumbelCopula, "234"), - (InvGaussianCopula, "234"), - (JoeCopula, "234"), - - # Archimedeans families with two parameters. - (BB1Copula, "234"), - (BB3Copula, "234"), - (BB6Copula, "234"), - (BB7Copula, "234"), - (BB8Copula, "234"), - (BB9Copula, "234"), - (BB10Copula, "234"), - - # Bivariate-only miscellaneous - (FGMCopula, "2"), - (PlackettCopula, "2"), - (RafteryCopula, "2"), - - # # Bivariate EV families - (GalambosCopula, "2"), - (HuslerReissCopula, "2"), - (LogCopula, "2"), - (MixedCopula, "2"), - (CuadrasAugeCopula, "2"), - (BC2Copula, "2"), - (tEVCopula, "2"), - (MOCopula, "2"), - (AsymLogCopula, "2"), - (AsymGalambosCopula, "2"), - (AsymMixedCopula, "2"), - - # # Archimax (bivariate only) - (ArchimaxCopula{2, GumbelGenerator, MixedTail}, "2"), - (BB4Copula, "2"), - (BB5Copula, "2"), + @testset "API Error Handling" begin + dummy_copula = IndependentCopula(2) + M_dummy = Copulas.CopulaModel(dummy_copula, 10, 0.0, :dummy) + @test_throws ArgumentError StatsBase.residuals(M_dummy) + @test_throws ArgumentError StatsBase.predict(M_dummy, what=:foo) + end +end + +@testitem "Dependence Metrics" tags=[:metrics] begin + using Test, Random, Distributions, Copulas, StableRNGs, LinearAlgebra, Statistics, StatsBase, SpecialFunctions, HCubature, QuadGK + + rng = StableRNG(123) + n_samples = 2000 + test_copulas = [ + (d=3, copula=GumbelCopula(2, 3.5), description="3D Gumbel with upper tail dependence"), + (d=3, copula=ClaytonCopula(2, 4.0), description="Clayton 3D with lower tail dependence"), + (d=4, copula=GumbelCopula(2, 3.5), description="Gumbel 4D with lower tail dependence"), + (d=4, copula=ClaytonCopula(2, 4.0), description="Clayton 4D with lower tail dependence"), + (d=2, copula=GalambosCopula(2, 4.0), description="2D Galambos with lower tail dependence"), + (d=2, copula=HuslerReissCopula(2, 4.0), description="Husler Reiss 2D with lower tail dependence"), + (d=2, copula=LogCopula(2, 4.0), description="2D Logistic with lower tail dependency") ] - for d in (2, 3, 4) - U = rand(rng, d, 100) - for (CT, dims) in cases - occursin(string(d), dims) || continue - avail = Copulas._available_fitting_methods(CT) - if isempty(avail) - @warn "Empty method list for $CT" - continue - end - @testset "CT=$CT, d=$d" begin - for m in avail - @testset "CT=$CT, d=$d, method=$m" begin - @info "CT=$CT, d=$d, method=$m..." - fitres = fit(CopulaModel, CT, U; method=m) - @test length(Copulas._copula_of(fitres)) == d - @test isa(fitres, CopulaModel) - end - end + @testset "Multivariate Metrics (Copula vs. Data)" begin + for tc in test_copulas + C = tc.copula + d = tc.d + U = rand(rng, C, n_samples) + + @testset "$(tc.description)" begin + # Spearman's ρ + true_rho = Copulas.ρ(C) + emp_rho = Copulas.ρ(U) + @test emp_rho ≈ true_rho atol=0.1 + + # Kendall's τ + true_tau = Copulas.τ(C) + emp_tau = Copulas.τ(U) + @test emp_tau ≈ true_tau atol=0.1 + + # Blomqvist's β + true_beta = Copulas.β(C) + emp_beta = Copulas.β(U) + @test emp_beta ≈ true_beta atol=0.1 + + # Gini's γ + true_gamma = Copulas.γ(C) + emp_gamma = Copulas.γ(U) + @test emp_gamma ≈ true_gamma atol=0.15 + + # Copula Entropy ι + true_entropy = Copulas.ι(C) + emp_entropy = Copulas.ι(U) + + @test true_entropy ≈ emp_entropy atol=0.15 end end end -end + @testset "Pairwise Metrics (on Data Matrix)" begin + for tc in test_copulas + d = tc.d + d == 2 || continue + + C = tc.copula + U = rand(rng, C, n_samples) + X = U' + + @testset "$(tc.description)" begin + # corblomqvist + B = Copulas.corblomqvist(X) + @test B[1,2] ≈ Copulas.β(C) atol=0.1 + + # corgini + G = Copulas.corgini(X) + @test B[1,2] ≈ Copulas.γ(C) atol=0.1 + + # corentropy + H = Copulas.corentropy(X) + @test size(H) == (d,d) + @test H[1,1] == 0.0 + @test isfinite(H[1,2]) + end + end + end +end \ No newline at end of file diff --git a/test/GenericTests.jl b/test/GenericTests.jl index 864cd0e52..5e5b14678 100644 --- a/test/GenericTests.jl +++ b/test/GenericTests.jl @@ -1,9 +1,9 @@ @testmodule M begin using Copulas - using HypothesisTests, Distributions, Random, WilliamsonTransforms + using HypothesisTests, Distributions, Random using InteractiveUtils using ForwardDiff - using StatsBase: corkendall + using StatsBase using StableRNGs using HCubature using Test @@ -96,7 +96,7 @@ is_bivariate(C::CT) where CT = (length(C) == 2) has_subsetdims(C::CT) where CT = (length(C) >= 3) can_check_pdf_positivity(C::CT) where CT = can_pdf(C) && !((CT<:GumbelCopula) && (C.G.θ >= 19)) - kendall_coherency_enabled(C::CT) where CT = !(CT<:Union{MOCopula, Copulas.ExtremeValueCopula{2, <:Copulas.EmpiricalEVTail}}) + dep_coherency_enabled(C::CT) where CT = !(CT<:Union{MOCopula, Copulas.ExtremeValueCopula{2, <:Copulas.EmpiricalEVTail}}) can_check_biv_conditioning_ad(C::CT) where CT = is_bivariate(C) && can_ad(C) can_check_highdim_conditioning_ad(C::CT) where CT = (length(C) > 2) && can_ad(C) has_uniform_margins(C::CT) where CT = !(CT<:EmpiricalCopula) @@ -104,9 +104,9 @@ is_extremevalue(C::CT) where CT = (CT <: Copulas.ExtremeValueCopula) is_archimax(C::CT) where CT = (CT <: Copulas.ArchimaxCopula) - can_be_fitted(C::CT) where CT = length(Copulas._available_fitting_methods(CT)) > 0 + can_be_fitted(C::CT, d) where CT = length(Copulas._available_fitting_methods(CT, d)) > 0 has_parameters(C::CT) where CT = !(CT <: Union{IndependentCopula, MCopula, WCopula}) - has_unbounded_params(C::CT) where CT = has_parameters(C) && :mle ∈ Copulas._available_fitting_methods(CT) && (length(Distributions.params(C)) > 0) && !(CT<:EmpiricalEVCopula) + has_unbounded_params(C::CT, d) where CT = has_parameters(C) && :mle ∈ Copulas._available_fitting_methods(CT, d) && (length(Distributions.params(C)) > 0) && !(CT<:EmpiricalEVCopula) && !(d>2 && CT<:FGMCopula) unbounding_is_a_bijection(C::Copulas.Copula{d}) where d = !(typeof(C)<:FGMCopula && d>2) function check(C::Copulas.Copula{d}) where d @@ -120,7 +120,7 @@ spl1 = rand(rng, C) spl10 = rand(rng, C, 10) - spl1000 = rand(rng, C, 1000) + spl1000 = rand(rng, C, 800) splZ10 = rand(rng, Z, 10) @testset "Basics" begin @@ -171,7 +171,56 @@ @test (all(r10 .>= 0) && all(isfinite.(r10))) end - @testif kendall_coherency_enabled(C) "Corkendall coeherency" begin + + # This test takes more than 5 hours to run + # This is clarly unacceptable, but moreover we dont know which copula takes the most time + # sadly ;) + + # @testif dep_coherency_enabled(C) "Dependence metrics coherency" begin + # # Empirical vs theoretical for available metrics, mirroring Kendall’s pattern + # metrics = ( + # ("tau", Copulas.τ, StatsBase.corkendall, 0.10, -1, 1), + # ("rho", Copulas.ρ, StatsBase.corspearman, 0.10, -1 , 1), + # ("beta", Copulas.β, Copulas.corblomqvist, 0.10, -1 , 1), + # ("gamma", Copulas.γ, Copulas.corgini, 0.15, -1 , 1), + # ("iota", Copulas.ι, Copulas.corentropy, 0.15, -Inf , 0) + # ) + # for (name, f, corf, tol, lb, ub) in metrics + # @testset "$name" begin + # thf = f(C) + # thcorf = corf(C) + # empf = f(spl1000) + + # @test isapprox(empf, thf; atol=tol) + # @test lb ≤ thf ≤ ub + # @test lb ≤ empf ≤ ub + # @test all(lb .≤ thcorf .≤ ub) + + # if which(f, (CT,)) != which(f, (Copulas.Copula{d},)) + # thf_gen = @invoke f(C::Copulas.Copula{d}) + # # Allow tiny numerical discrepancies + # @test isapprox(thf, thf_gen; atol= (C isa GaussianCopula ? 0.1 : 0.001)) + # end + # if d == 2 + # @test isapprox(thf, thcorf[1,2]; atol=0.1) + # else + # @test all(lb .<= thcorf .<= ub) + # end + # if check_rosenblatt(C) + # U = rosenblatt(C, spl1000) + # empfu = f(U) + # empcorfu = corf(U') + # @test isapprox(empfu, 0.0; atol=tol+0.05) + # for i in 1:(d - 1) + # for j in (i + 1):d + # @test empcorfu[i,j] ≈ 0.0 atol = 0.15 + # end + # end + # end + # end + # end + + @testif dep_coherency_enabled(C) "Corkendall coeherency" begin K = corkendall(spl1000') Kth = corkendall(C) @test all(-1 .<= Kth .<= 1) @@ -181,18 +230,18 @@ @testif can_integrate_pdf(C) "Testing pdf integration" begin # 1) ∫_{[0,1]^d} pdf = 1 (hcubature if d≤3; si no, MC) - v, r, _ = integrate_pdf_rect(rng, C, zeros(d), ones(d), 10_000, 10_000) + v, r, _ = integrate_pdf_rect(rng, C, zeros(d), ones(d), 3_000, 3_000) @test isapprox(v, 1; atol=max(5*sqrt(r), 1e-3)) # 2) ∫_{[0,0.5]^d} pdf = C(0.5,…,0.5) b = ones(d)/2 - v2, r2, _ = integrate_pdf_rect(rng, C, zeros(d), b, 10_000, 10_000) + v2, r2, _ = integrate_pdf_rect(rng, C, zeros(d), b, 3_000, 3_000) @test isapprox(v2, cdf(C, b); atol=max(10*sqrt(r2), 1e-3)) # 3) random rectangle, compare with measure (cdf based) a = rand(rng, d) b = a .+ rand(rng, d) .* (1 .- a) - v3, r3, _ = integrate_pdf_rect(rng, C, a, b, 10_000, 10_000) + v3, r3, _ = integrate_pdf_rect(rng, C, a, b, 3_000, 3_000) @test (isapprox(v3, Copulas.measure(C, a, b); atol=max(20*sqrt(r3), 1e-3)) || max(v3, Copulas.measure(C, a, b)) < eps(Float64)) # wide tolerence, should pass. end @@ -339,10 +388,10 @@ @testif is_archimedean_with_generator(C) "ArchimedeanCopula specific tests" begin # Only test things if there are specilized versions of the functions. spe_ϕ1 = which(Copulas.ϕ⁽¹⁾, (typeof(C.G), Float64)) != which(Copulas.ϕ⁽¹⁾, (Copulas.Generator, Float64)) - spe_ϕk = which(Copulas.ϕ⁽ᵏ⁾, (typeof(C.G), Val{1}, Float64)) != which(Copulas.ϕ⁽ᵏ⁾, (Copulas.Generator, Val{1}, Float64)) + spe_ϕk = which(Copulas.ϕ⁽ᵏ⁾, (typeof(C.G), Int, Float64)) != which(Copulas.ϕ⁽ᵏ⁾, (Copulas.Generator, Int, Float64)) spe_ϕinv = which(Copulas.ϕ⁻¹, (typeof(C.G), Float64)) != which(Copulas.ϕ⁻¹, (Copulas.Generator, Float64)) spe_ϕinv1 = which(Copulas.ϕ⁻¹⁽¹⁾, (typeof(C.G), Float64)) != which(Copulas.ϕ⁻¹⁽¹⁾, (Copulas.Generator, Float64)) - spe_ϕkinv = which(Copulas.ϕ⁽ᵏ⁾⁻¹, (typeof(C.G), Val{1}, Float64)) != which(Copulas.ϕ⁽ᵏ⁾⁻¹, (Copulas.Generator, Val{1}, Float64)) + spe_ϕkinv = which(Copulas.ϕ⁽ᵏ⁾⁻¹, (typeof(C.G), Int, Float64)) != which(Copulas.ϕ⁽ᵏ⁾⁻¹, (Copulas.Generator, Int, Float64)) mm = Copulas.max_monotony(C.G) @@ -361,14 +410,14 @@ end @testif spe_ϕk "Check d(ϕ) == ϕ⁽ᵏ⁾(k=1)" begin - @test ForwardDiff.derivative(x -> Copulas.ϕ(C.G, x), 0.1) ≈ Copulas.ϕ⁽ᵏ⁾(C.G, Val{1}(), 0.1) + @test ForwardDiff.derivative(x -> Copulas.ϕ(C.G, x), 0.1) ≈ Copulas.ϕ⁽ᵏ⁾(C.G, 1, 0.1) end @testif (spe_ϕ1 || spe_ϕk) "Check ϕ⁽¹⁾ == ϕ⁽ᵏ⁾(k=1)" begin - @test Copulas.ϕ⁽¹⁾(C.G, 0.1) ≈ Copulas.ϕ⁽ᵏ⁾(C.G, Val{1}(), 0.1) + @test Copulas.ϕ⁽¹⁾(C.G, 0.1) ≈ Copulas.ϕ⁽ᵏ⁾(C.G, 1, 0.1) end @testif (spe_ϕ1 || spe_ϕk) "Check d(ϕ⁽¹⁾) == ϕ⁽ᵏ⁾(k=2)" begin - @test ForwardDiff.derivative(x -> Copulas.ϕ⁽¹⁾(C.G, x), 0.1) ≈ Copulas.ϕ⁽ᵏ⁾(C.G, Val{2}(), 0.1) + @test ForwardDiff.derivative(x -> Copulas.ϕ⁽¹⁾(C.G, x), 0.1) ≈ Copulas.ϕ⁽ᵏ⁾(C.G, 2, 0.1) end @testif spe_ϕinv1 "Check d(ϕ⁻¹) == ϕ⁻¹⁽¹⁾" begin @@ -377,13 +426,13 @@ @testif spe_ϕkinv "Check ϕ⁽ᵏ⁾⁻¹ ∘ ϕ⁽ᵏ⁾ == Id for k in 1:d-2" begin for k in 1:d-2 - @test Copulas.ϕ⁽ᵏ⁾⁻¹(C.G,Val{k}(), Copulas.ϕ⁽ᵏ⁾(C.G, Val{k}(), 0.1)) ≈ 0.1 + @test Copulas.ϕ⁽ᵏ⁾⁻¹(C.G,k, Copulas.ϕ⁽ᵏ⁾(C.G, k, 0.1)) ≈ 0.1 end end # For generators that are only d-monotonous, this does not need to be true. @testif (spe_ϕkinv && (mm > d)) "Check ϕ⁽ᵏ⁾⁻¹ ∘ ϕ⁽ᵏ⁾ == Id for k=d-1" begin - @test Copulas.ϕ⁽ᵏ⁾⁻¹(C.G,Val{d-1}(), Copulas.ϕ⁽ᵏ⁾(C.G, Val{d-1}(), 0.1)) ≈ 0.1 + @test Copulas.ϕ⁽ᵏ⁾⁻¹(C.G,d-1, Copulas.ϕ⁽ᵏ⁾(C.G, d-1, 0.1)) ≈ 0.1 end @testif can_τinv "Check τ ∘ τ⁻¹ == Id" begin @@ -410,12 +459,12 @@ @testset "Kendall-Radial coherency test" begin # On radial-level: R1 = dropdims(sum(Copulas.ϕ⁻¹.(C.G,spl1000),dims=1),dims=1) - R2 = rand(rng,Copulas.williamson_dist(C.G, Val{d}()),1000) + R2 = rand(rng,Copulas.williamson_dist(C.G, d),1000) @test pvalue(ApproximateTwoSampleKSTest(R1,R2)) > 0.005 # On kendall-level: U1 = Distributions.cdf(C, spl1000) - U2 = Copulas.ϕ.(Ref(C.G), rand(rng,Copulas.williamson_dist(C.G, Val{d}()),1000)) + U2 = Copulas.ϕ.(Ref(C.G), rand(rng,Copulas.williamson_dist(C.G, d),1000)) @test pvalue(ApproximateTwoSampleKSTest(U1, U2)) > 0.005 end end @@ -487,14 +536,14 @@ @test isapprox(exp(lp), c_h; rtol=1e-6, atol=1e-8) end - for r in _archimax_mc_rectangles_cdf(C; N=20_000, seed=321) + for r in _archimax_mc_rectangles_cdf(C; N=8_000, seed=321) @test abs(r.p_hat - r.p_th) ≤ max(5*r.se, 2e-3) end end - @testif can_be_fitted(C) "Fitting interface" begin + @testif can_be_fitted(C, d) "Fitting interface" begin - @testif has_unbounded_params(C) "Unbouding and rebounding params" begin + @testif has_unbounded_params(C, d) "Unbouding and rebounding params" begin # First on the _example copula. θ₀ = Distributions.params(Copulas._example(CT, d)) θ₁ = Copulas._rebound_params(CT, d, Copulas._unbound_params(CT, d, θ₀)) @@ -512,17 +561,17 @@ @test Copulas._unbound_params(CT, d, Distributions.params(CT(d, θ₀...))) == Copulas._unbound_params(CT, d, θ₀) end - for m in Copulas._available_fitting_methods(CT) + for m in Copulas._available_fitting_methods(CT, d) @testset "Fitting CT for $(m)" begin r1 = fit(CopulaModel, CT, spl1000, m) r2 = fit(CT, spl1000, m) newCT = typeof(r2) @test typeof(r1.result) == newCT - if !(newCT<:ArchimedeanCopula{d, <:WilliamsonGenerator}) && !(newCT<:PlackettCopula) && has_parameters(r2) && has_unbounded_params(r2) && !(CT<:RafteryCopula && d==3 && m==:itau) + if !(newCT<:ArchimedeanCopula{d, <:WilliamsonGenerator}) && !(newCT<:PlackettCopula) && has_parameters(r2) && has_unbounded_params(r2, d) && !(CT<:RafteryCopula && d==3 && m==:itau) α1 = Copulas._unbound_params(typeof(r1.result), d, Distributions.params(r1.result)) α2 = Copulas._unbound_params(typeof(r2), d, Distributions.params(r2)) - @test α1 ≈ α2 atol=1e-3 + @test α1 ≈ α2 atol= (CT<:GaussianCopula ? 1e-2 : 1e-5) end # Can we check that the copula returned by the sklar fit is the same as the copula returned by the copula fit alone ? @@ -534,10 +583,10 @@ r4 = fit(SklarDist{CT, NTuple{d, Normal}}, splZ10) newCT = typeof(r4.C) @test typeof(r3.result.C) == newCT - if !(newCT<:ArchimedeanCopula{d, <:WilliamsonGenerator}) && !(newCT<:PlackettCopula) && has_parameters(r4.C) && has_unbounded_params(r4.C) + if !(newCT<:ArchimedeanCopula{d, <:WilliamsonGenerator}) && !(newCT<:PlackettCopula) && has_parameters(r4.C) && has_unbounded_params(r4.C, d) α1 = Copulas._unbound_params(typeof(r3.result.C), d, Distributions.params(r3.result.C)) α2 = Copulas._unbound_params(typeof(r4.C), d, Distributions.params(r4.C)) - @test α1 ≈ α2 atol=1e-3 + @test α1 ≈ α2 atol= (CT<:GaussianCopula ? 1e-2 : 1e-5) end end end @@ -559,7 +608,7 @@ H = ForwardDiff.hessian(f, [u1, u2]) # ∂²/∂u1∂u2 max(H[1,2], 0.0) # numerical clip end - function _archimax_mc_rectangles_cdf(C; N::Int=300_000, seed::Integer=123, + function _archimax_mc_rectangles_cdf(C; N::Int=120_000, seed::Integer=123, rects::Tuple{Vararg{Tuple{<:Real,<:Real}}}=((0.5,0.5),(0.3,0.7),(0.8,0.2))) rng = StableRNG(seed) U = rand(rng, C, N) diff --git a/test/SklarDist.jl b/test/SklarDist.jl index ab0582b8d..b27331879 100644 --- a/test/SklarDist.jl +++ b/test/SklarDist.jl @@ -9,7 +9,6 @@ u = rand(rng,MyD,1000) rand!(rng, MyD,u) fit(SklarDist{ClaytonCopula,Tuple{LogNormal,Pareto,Beta}},u) - fit(SklarDist{GaussianCopula,Tuple{LogNormal,Pareto,Beta}},u) @test 1==1 # loglikelyhood(MyD,u) end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 46fb7b62e..9174d9e5d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -4,4 +4,4 @@ using TestItemRunner # @run_package_tests filter=ti->(:GumbelBarnettCopula in ti.tags || :ArchimedeanCopula in ti.tags || :FrankCopula in ti.tags) # you can add verbose=true here -@run_package_tests \ No newline at end of file +@run_package_tests