From 7a750b154acca6a41a84c08c5f2bd1137bbae511 Mon Sep 17 00:00:00 2001 From: Pat Alt <55311242+pat-alt@users.noreply.github.com> Date: Thu, 7 Sep 2023 10:53:16 +0200 Subject: [PATCH 1/2] done --- Project.toml | 2 +- src/conformal_models/conformal_models.jl | 1 - src/conformal_models/plotting.jl | 322 ----------------------- test/classification.jl | 30 --- test/regression.jl | 18 -- test/runtests.jl | 2 - test/utils.jl | 3 - 7 files changed, 1 insertion(+), 377 deletions(-) delete mode 100644 src/conformal_models/plotting.jl delete mode 100644 test/utils.jl diff --git a/Project.toml b/Project.toml index c5434b2..0fa116f 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "ConformalPrediction" uuid = "98bfc277-1877-43dc-819b-a3e38c30242f" authors = ["Patrick Altmeyer"] -version = "0.1.9" +version = "0.1.10" [deps] CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" diff --git a/src/conformal_models/conformal_models.jl b/src/conformal_models/conformal_models.jl index 7899d07..22bcf0b 100644 --- a/src/conformal_models/conformal_models.jl +++ b/src/conformal_models/conformal_models.jl @@ -17,7 +17,6 @@ const ConformalModel = Union{ include("utils.jl") include("heuristics.jl") -include("plotting.jl") # Main API call to wrap model: """ diff --git a/src/conformal_models/plotting.jl b/src/conformal_models/plotting.jl deleted file mode 100644 index 5e4f55d..0000000 --- a/src/conformal_models/plotting.jl +++ /dev/null @@ -1,322 +0,0 @@ -using CategoricalArrays -using LinearAlgebra -using NaturalSort -using Plots - -""" - generate_lims(x1, x2, xlims, ylims) - -Small helper function then generates the `xlims` and `ylims` for the plot. -""" -function generate_lims(x1, x2, xlims, ylims, zoom) - if isnothing(xlims) - xlims = (minimum(x1), maximum(x1)) .+ (zoom, -zoom) - else - xlims = xlims .+ (zoom, -zoom) - end - if isnothing(ylims) - ylims = (minimum(x2), maximum(x2)) .+ (zoom, -zoom) - else - ylims = ylims .+ (zoom, -zoom) - end - return xlims, ylims -end - -""" - get_names(X) -Helper function to get variables names of `X`. -""" -function get_names(X) - try - global _names = MMI.schema(X).names - catch - X = MMI.table(X) - global _names = MMI.schema(X).names - end - return _names -end - -@doc raw""" - Plots.contourf(conf_model::ConformalModel,fitresult,X,y;kwargs...) - -A `Plots.jl` recipe/method extension that can be used to visualize the conformal predictions of a fitted conformal classifier with exactly two input variable. Data (`X`,`y`) are plotted as dots and overlaid with predictions sets. `y` is used to indicate the ground-truth labels of samples by colour. Samples are visualized in a two-dimensional feature space, so it is expected that `X` ``\in \mathcal{R}^2``. By default, a contour is used to visualize the softmax output of the conformal classifier for the target label, where `target` indicates can be used to define the index of the target label. Transparent regions indicate that the prediction set does not include the `target` label. - -## Target - -In the binary case, `target` defaults to `2`, indexing the second label: assuming the labels are `[0,1]` then the softmax output for `1` is shown. In the multi-class cases, `target` defaults to the first class: for example, if the labels are `["🐶", "🐱", "🐭"]` (in that order) then the contour indicates the softmax output for `"🐶"`. - -## Set Size - -If `plot_set_size` is set to `true`, then the contour instead visualises the the set size. - -## Univariate and Higher Dimensional Inputs - -For univariate of multiple inputs (>2), this function is not applicable. See [`Plots.areaplot(conf_model::ConformalProbabilisticSet, fitresult, X, y; kwargs...)`](@ref) for an alternative way to visualize prediction for any conformal classifier. - -""" -function Plots.contourf( - conf_model::ConformalProbabilisticSet, - fitresult, - X, - y; - target::Union{Nothing,Real}=nothing, - ntest=50, - zoom=-1, - xlims=nothing, - ylims=nothing, - plot_set_size=false, - plot_classification_loss=false, - plot_set_loss=false, - temp=nothing, - κ=0, - loss_matrix=UniformScaling(1.0), - kwargs..., -) - - # Setup: - X = permutedims(MMI.matrix(X)) - - @assert size(X, 1) == 2 "Can only create contour plot for conformal classifier with exactly two input variables." - - x1 = X[1, :] - x2 = X[2, :] - - # Plot limits: - xlims, ylims = generate_lims(x1, x2, xlims, ylims, zoom) - - # Surface range: - x1range = range(xlims[1]; stop=xlims[2], length=ntest) - x2range = range(ylims[1]; stop=ylims[2], length=ntest) - - # Target - if !isnothing(target) - @assert target in levels(y) "Specified target does not match any of the labels." - end - if length(unique(y)) > 1 - if isnothing(target) - @info "No target label supplied, using first." - end - target = isnothing(target) ? levels(y)[1] : target - if plot_set_size - _default_title = "Set size" - elseif plot_set_loss - _default_title = "Smooth set loss" - elseif plot_classification_loss - _default_title = "ℒ(C,$(target))" - else - _default_title = "p̂(y=$(target))" - end - else - if plot_set_size - _default_title = "Set size" - elseif plot_set_loss - _default_title = "Smooth set loss" - elseif plot_classification_loss - _default_title = "ℒ(C,$(target-1))" - else - _default_title = "p̂(y=$(target-1))" - end - end - title = !@isdefined(title) ? _default_title : title - - # Predictions - Z = [] - for x2 in x2range, x1 in x1range - p̂ = predict(conf_model, fitresult, table([x1 x2]))[1] - if plot_set_size - z = ismissing(p̂) ? 0 : sum(pdf.(p̂, p̂.decoder.classes) .> 0) - elseif plot_classification_loss - _target = categorical([target]; levels=levels(y)) - z = ConformalPrediction.classification_loss( - conf_model, fitresult, [x1 x2], _target; temp=temp, loss_matrix=loss_matrix - ) - elseif plot_set_loss - z = ConformalPrediction.smooth_size_loss( - conf_model, fitresult, [x1 x2]; κ=κ, temp=temp - ) - else - z = ismissing(p̂) ? [missing for i in 1:length(levels(y))] : pdf.(p̂, levels(y)) - z = replace(z, 0 => missing) - end - push!(Z, z) - end - Z = reduce(hcat, Z) - Z = Z[findall(levels(y) .== target)[1][1], :] - - # Contour: - if plot_set_size - _n = length(unique(y)) - clim = (0, _n) - plt = contourf( - x1range, - x2range, - Z; - title=title, - xlims=xlims, - ylims=ylims, - c=cgrad(:blues, _n + 1; categorical=true), - clim=clim, - kwargs..., - ) - else - plt = contourf( - x1range, - x2range, - Z; - title=title, - xlims=xlims, - ylims=ylims, - c=cgrad(:blues), - linewidth=0, - kwargs..., - ) - end - - # Samples: - y = typeof(y) <: CategoricalArrays.CategoricalArray ? y : Int.(y) - return scatter!(plt, x1, x2; group=y, kwargs...) -end - -""" - Plots.areaplot( - conf_model::ConformalProbabilisticSet, fitresult, X, y; - input_var::Union{Nothing,Int,Symbol}=nothing, - kwargs... - ) - -A `Plots.jl` recipe/method extension that can be used to visualize the conformal predictions of any fitted conformal classifier. Using a stacked area chart, this function plots the softmax output(s) contained the the conformal predictions set on the vertical axis against an input variable `X` on the horizontal axis. In the case of multiple input variables, the `input_var` argument can be used to specify the desired input variable. -""" -function Plots.areaplot( - conf_model::ConformalProbabilisticSet, - fitresult, - X, - y; - input_var::Union{Nothing,Int,Symbol}=nothing, - kwargs..., -) - - # Setup: - Xraw = deepcopy(X) - _names = get_names(Xraw) - X = permutedims(MMI.matrix(X)) - - # Dimensions: - if size(X, 1) > 1 - if isnothing(input_var) - @info "Multiple inputs no input variable (`input_var`) specified: defaulting to first variable." - idx = 1 - else - if typeof(input_var) == Int - idx = input_var - else - @assert input_var ∈ _names "$(input_var) is not among the variable names of `X`." - idx = findall(_names .== input_var)[1] - end - end - x = X[idx, :] - else - idx = 1 - x = X - end - - # Predictions: - ŷ = predict(conf_model, fitresult, Xraw) - nout = length(levels(y)) - ŷ = - map(_y -> ismissing(_y) ? [0 for i in 1:nout] : pdf.(_y, levels(y)), ŷ) |> _y -> reduce(hcat, _y) - ŷ = permutedims(ŷ) - - return areaplot(x, ŷ; kwargs...) -end - -""" - Plots.plot( - conf_model::ConformalInterval, fitresult, X, y; - kwrgs... - ) - -A `Plots.jl` recipe/method extension that can be used to visualize the conformal predictions of a fitted conformal regressor. Data (`X`,`y`) are plotted as dots and overlaid with predictions intervals. `y` is plotted on the vertical axis against a single variable `X` on the horizontal axis. A shaded area indicates the prediction interval. The line in the center of the interval is the midpoint of the interval and can be interpreted as the point estimate of the conformal regressor. In case `X` is multi-dimensional, `input_var` can be used to specify the input variable of interest that will be used for the horizontal axis. If unspecified, the first variable will be plotting by default. -""" -function Plots.plot( - conf_model::ConformalInterval, - fitresult, - X, - y; - input_var::Union{Nothing,Int,Symbol}=nothing, - xlims::Union{Nothing,Tuple}=nothing, - ylims::Union{Nothing,Tuple}=nothing, - zoom::Real=-0.5, - train_lab::Union{Nothing,String}=nothing, - test_lab::Union{Nothing,String}=nothing, - ymid_lw::Int=1, - kwargs..., -) - - # Setup - title = !@isdefined(title) ? "" : title - train_lab = isnothing(train_lab) ? "Observed" : train_lab - test_lab = isnothing(test_lab) ? "Predicted" : test_lab - - Xraw = deepcopy(X) - _names = get_names(Xraw) - X = permutedims(MMI.matrix(X)) - - # Dimensions: - if size(X, 1) > 1 - if isnothing(input_var) - @info "Multivariate input for regression with no input variable (`input_var`) specified: defaulting to first variable." - idx = 1 - else - if typeof(input_var) == Int - idx = input_var - else - @assert input_var ∈ _names "$(input_var) is not among the variable names of `X`." - idx = findall(_names .== input_var)[1] - end - end - x = X[idx, :] - else - idx = 1 - x = X - end - - # Plot limits: - xlims, ylims = generate_lims(x, y, xlims, ylims, zoom) - - # Plot training data: - plt = scatter( - vec(x), vec(y); label=train_lab, xlim=xlims, ylim=ylims, title=title, kwargs... - ) - - # Plot predictions: - ŷ = predict(conf_model, fitresult, Xraw) - lb, ub = eachcol(reduce(vcat, map(y -> permutedims(collect(y)), ŷ))) - ymid = (lb .+ ub) ./ 2 - yerror = (ub .- lb) ./ 2 - xplot = vec(x) - _idx = sortperm(xplot) - return plot!( - plt, - xplot[_idx], - ymid[_idx]; - label=test_lab, - ribbon=(yerror, yerror), - lw=ymid_lw, - kwargs..., - ) -end - -""" - Plots.bar(conf_model::ConformalModel, fitresult, X; label="", xtickfontsize=6, kwrgs...) - -A `Plots.jl` recipe/method extension that can be used to visualize the set size distribution of a conformal predictor. In the regression case, prediction interval widths are stratified into discrete bins. It can be useful to plot the distribution of set sizes in order to visually asses how adaptive a conformal predictor is. For more adaptive predictors the distribution of set sizes is typically spread out more widely, which reflects that “the procedure is effectively distinguishing between easy and hard inputs”. This is desirable: when for a given sample it is difficult to make predictions, this should be reflected in the set size (or interval width in the regression case). Since ‘difficult’ lies on some spectrum that ranges from ‘very easy’ to ‘very difficult’ the set size should very across the spectrum of ‘empty set’ to ‘all labels included’. -""" -function Plots.bar( - conf_model::ConformalModel, fitresult, X; label="", xtickfontsize=6, kwrgs... -) - ŷ = predict(conf_model, fitresult, X) - idx = size_indicator(ŷ) - x = sort(levels(idx); lt=natural) - y = [sum(idx .== _x) for _x in x] - return Plots.bar(x, y; label=label, xtickfontsize=xtickfontsize, kwrgs...) -end diff --git a/test/classification.jl b/test/classification.jl index 32b28a7..d4e448b 100644 --- a/test/classification.jl +++ b/test/classification.jl @@ -51,36 +51,6 @@ conformal_models = merge(values(available_models[:classification])...) @test !isnothing(conf_model.scores) predict(mach, selectrows(X, test)) - # Plotting: - @test isplot(bar(mach.model, mach.fitresult, X)) - @test isplot(areaplot(mach.model, mach.fitresult, X, y)) - @test isplot( - areaplot(mach.model, mach.fitresult, X, y; input_var=1) - ) - @test isplot( - areaplot(mach.model, mach.fitresult, X, y; input_var=:x1) - ) - if data_set[:specs][1] != 2 - @test_throws AssertionError contourf( - mach.model, mach.fitresult, X, y - ) - else - @test isplot(contourf(mach.model, mach.fitresult, X, y)) - @test isplot( - contourf( - mach.model, - mach.fitresult, - X, - y; - zoom=-1, - plot_set_size=true, - ), - ) - @test isplot( - contourf(mach.model, mach.fitresult, X, y; target=1) - ) - end - # Evaluation: # Evaluation takes some time, so only testing for one method. if _method == :simple_inductive && data_set[:specs][1] > 1 diff --git a/test/regression.jl b/test/regression.jl index 15f8f2c..f41a24e 100644 --- a/test/regression.jl +++ b/test/regression.jl @@ -49,24 +49,6 @@ conformal_models = merge(values(available_models[:regression])...) @test !isnothing(conf_model.scores) predict(mach, selectrows(X, test)) - # Plotting: - @test isplot(plot(mach.model, mach.fitresult, X, y)) - @test isplot( - plot( - mach.model, - mach.fitresult, - X, - y; - input_var=1, - xlims=(-1, 1), - ylims=(-1, 1), - ), - ) - @test isplot( - plot(mach.model, mach.fitresult, X, y; input_var=:x1) - ) - @test isplot(bar(mach.model, mach.fitresult, X)) - # Evaluation: # Evaluation takes some time, so only testing for one method. if _method == :simple_inductive diff --git a/test/runtests.jl b/test/runtests.jl index 7a901ab..58b7716 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -7,8 +7,6 @@ using Test # Doctests: doctest(ConformalPrediction) -include("utils.jl") - # Test suite: @testset "ConformalPrediction.jl" begin include("classification.jl") diff --git a/test/utils.jl b/test/utils.jl deleted file mode 100644 index 1c3a400..0000000 --- a/test/utils.jl +++ /dev/null @@ -1,3 +0,0 @@ -using Plots - -isplot(plt) = typeof(plt) <: Plots.Plot From b9e249820e267a2f2cf641b33b44fc385c100f1c Mon Sep 17 00:00:00 2001 From: Pat Alt <55311242+pat-alt@users.noreply.github.com> Date: Thu, 7 Sep 2023 16:16:16 +0200 Subject: [PATCH 2/2] actually removed Plots dep --- Project.toml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Project.toml b/Project.toml index 0fa116f..e6d805d 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "ConformalPrediction" uuid = "98bfc277-1877-43dc-819b-a3e38c30242f" authors = ["Patrick Altmeyer"] -version = "0.1.10" +version = "0.1.11" [deps] CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" @@ -16,7 +16,6 @@ MLJFlux = "094fc8d1-fd35-5302-93ea-dabda2abf845" MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea" MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54" NaturalSort = "c020b1a1-e9b0-503a-9c33-f039bfc54a85" -Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" @@ -34,7 +33,6 @@ MLJFlux = "0.2.10" MLJModelInterface = "1" MLUtils = "0.4.2" NaturalSort = "1" -Plots = "1" ProgressMeter = "1" StatsBase = "0.33, 0.34.0" Tables = "1"