Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add fields and parallelize with nothing #119

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
TaijaBase = "10284c91-9f28-4c9a-abbf-ee43576dfff6"

[compat]
Aqua = "0.8"
Expand All @@ -39,6 +40,7 @@ ProgressMeter = "1"
Random = "1.7, 1.8, 1.9, 1.10"
StatsBase = "0.33, 0.34.0"
Tables = "1"
TaijaBase = "1"
Test = "1.7, 1.8, 1.9, 1.10"
julia = "1.7, 1.8, 1.9, 1.10"

Expand Down
2 changes: 2 additions & 0 deletions src/ConformalPrediction.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
module ConformalPrediction

using TaijaBase

# Conformal Models:
include("conformal_models/conformal_models.jl")
export ConformalModel
Expand Down
13 changes: 3 additions & 10 deletions src/conformal_models/conformal_models.jl
Original file line number Diff line number Diff line change
Expand Up @@ -50,26 +50,19 @@ function conformal_model(
return conf_model
end

# Inductive Models:
include("inductive/inductive_models.jl")

# Regression Models:
include("inductive_regression.jl")
include("transductive_regression.jl")

# Classification Models
include("inductive_classification.jl")
include("transductive_classification.jl")

# Training:
include("ConformalTraining/ConformalTraining.jl")
using .ConformalTraining

# Type unions:
const InductiveModel = Union{
SimpleInductiveRegressor,
SimpleInductiveClassifier,
AdaptiveInductiveClassifier,
ConformalQuantileRegressor,
}

const TransductiveModel = Union{
NaiveRegressor,
JackknifeRegressor,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,50 +1,36 @@
"""
score(conf_model::ConformalProbabilisticSet, fitresult, X, y=nothing)

Generic score method for the [`ConformalProbabilisticSet`](@ref). It computes nonconformity scores using the heuristic function `h` and the softmax probabilities of the true class. Method is dispatched for different Conformal Probabilistic Sets and atomic models.
"""
function score(conf_model::ConformalProbabilisticSet, fitresult, X, y=nothing)
return score(conf_model, conf_model.model, fitresult, X, y)
end

"""
split_data(conf_model::ConformalProbabilisticSet, indices::Base.OneTo{Int})

Splits the data into a proper training and calibration set.
"""
function split_data(conf_model::ConformalProbabilisticSet, X, y)
train, calibration = partition(eachindex(y), conf_model.train_ratio)
Xtrain = selectrows(X, train)
ytrain = y[train]
Xcal = selectrows(X, calibration)
ycal = y[calibration]

return Xtrain, ytrain, Xcal, ycal
end

# Simple
"The `SimpleInductiveClassifier` is the simplest approach to Inductive Conformal Classification. Contrary to the [`NaiveClassifier`](@ref) it computes nonconformity scores using a designated calibration dataset."
mutable struct SimpleInductiveClassifier{Model<:Supervised} <: ConformalProbabilisticSet
model::Model
coverage::AbstractFloat
scores::Union{Nothing,Dict{Any,Any}}
heuristic::Function
parallelizer::Union{Nothing,AbstractParallelizer}
train_ratio::AbstractFloat
end

function SimpleInductiveClassifier(
model::Supervised;
coverage::AbstractFloat=0.95,
heuristic::Function=minus_softmax,
parallelizer::Union{Nothing,AbstractParallelizer}=nothing,
train_ratio::AbstractFloat=0.5,
)
return SimpleInductiveClassifier(model, coverage, nothing, heuristic, train_ratio)
return SimpleInductiveClassifier(
model, coverage, nothing, heuristic, parallelizer, train_ratio
)
end

"""
@doc raw"""
score(conf_model::SimpleInductiveClassifier, ::Type{<:Supervised}, fitresult, X, y::Union{Nothing,AbstractArray}=nothing)

Score method for the [`SimpleInductiveClassifier`](@ref) dispatched for any `<:Supervised` model.
Score method for the [`SimpleInductiveClassifier`](@ref) dispatched for any `<:Supervised` model. For the [`SimpleInductiveClassifier`](@ref) nonconformity scores are computed as follows:

``
S_i^{\text{CAL}} = s(X_i, Y_i) = h(\hat\mu(X_i), Y_i), \ i \in \mathcal{D}_{\text{calibration}}
``

A typical choice for the heuristic function is ``h(\hat\mu(X_i), Y_i)=1-\hat\mu(X_i)_{Y_i}`` where ``\hat\mu(X_i)_{Y_i}`` denotes the softmax output of the true class and ``\hat\mu`` denotes the model fitted on training data ``\mathcal{D}_{\text{train}}``. The simple approach only takes the softmax probability of the true label into account.
"""
function score(
conf_model::SimpleInductiveClassifier, atomic::Supervised, fitresult, X, y=nothing
Expand All @@ -61,34 +47,6 @@ function score(
end
end

@doc raw"""
MMI.fit(conf_model::SimpleInductiveClassifier, verbosity, X, y)

For the [`SimpleInductiveClassifier`](@ref) nonconformity scores are computed as follows:

``
S_i^{\text{CAL}} = s(X_i, Y_i) = h(\hat\mu(X_i), Y_i), \ i \in \mathcal{D}_{\text{calibration}}
``

A typical choice for the heuristic function is ``h(\hat\mu(X_i), Y_i)=1-\hat\mu(X_i)_{Y_i}`` where ``\hat\mu(X_i)_{Y_i}`` denotes the softmax output of the true class and ``\hat\mu`` denotes the model fitted on training data ``\mathcal{D}_{\text{train}}``. The simple approach only takes the softmax probability of the true label into account.
"""
function MMI.fit(conf_model::SimpleInductiveClassifier, verbosity, X, y)

# Data Splitting:
Xtrain, ytrain, Xcal, ycal = split_data(conf_model, X, y)

# Training:
fitresult, cache, report = MMI.fit(
conf_model.model, verbosity, MMI.reformat(conf_model.model, Xtrain, ytrain)...
)

# Nonconformity Scores:
cal_scores, scores = score(conf_model, fitresult, Xcal, ycal)
conf_model.scores = Dict(:calibration => cal_scores, :all => scores)

return (fitresult, cache, report)
end

@doc raw"""
MMI.predict(conf_model::SimpleInductiveClassifier, fitresult, Xnew)

Expand Down Expand Up @@ -127,42 +85,20 @@ mutable struct AdaptiveInductiveClassifier{Model<:Supervised} <: ConformalProbab
coverage::AbstractFloat
scores::Union{Nothing,Dict{Any,Any}}
heuristic::Function
parallelizer::Union{Nothing,AbstractParallelizer}
train_ratio::AbstractFloat
end

function AdaptiveInductiveClassifier(
model::Supervised;
coverage::AbstractFloat=0.95,
heuristic::Function=minus_softmax,
parallelizer::Union{Nothing,AbstractParallelizer}=nothing,
train_ratio::AbstractFloat=0.5,
)
return AdaptiveInductiveClassifier(model, coverage, nothing, heuristic, train_ratio)
end

@doc raw"""
MMI.fit(conf_model::AdaptiveInductiveClassifier, verbosity, X, y)

For the [`AdaptiveInductiveClassifier`](@ref) nonconformity scores are computed by cumulatively summing the ranked scores of each label in descending order until reaching the true label ``Y_i``:

``
S_i^{\text{CAL}} = s(X_i,Y_i) = \sum_{j=1}^k \hat\mu(X_i)_{\pi_j} \ \text{where } \ Y_i=\pi_k, i \in \mathcal{D}_{\text{calibration}}
``
"""
function MMI.fit(conf_model::AdaptiveInductiveClassifier, verbosity, X, y)

# Data Splitting:
Xtrain, ytrain, Xcal, ycal = split_data(conf_model, X, y)

# Training:
fitresult, cache, report = MMI.fit(
conf_model.model, verbosity, MMI.reformat(conf_model.model, Xtrain, ytrain)...
return AdaptiveInductiveClassifier(
model, coverage, nothing, heuristic, parallelizer, train_ratio
)

# Nonconformity Scores:
cal_scores, scores = score(conf_model, fitresult, Xcal, ycal)
conf_model.scores = Dict(:calibration => cal_scores, :all => scores)

return (fitresult, cache, report)
end

"""
Expand Down
64 changes: 64 additions & 0 deletions src/conformal_models/inductive/inductive_models.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Type unions:
include("classification.jl")
include("regression.jl")

const InductiveModel = Union{
SimpleInductiveRegressor,
SimpleInductiveClassifier,
AdaptiveInductiveClassifier,
ConformalQuantileRegressor,
}

"""
split_data(conf_model::InductiveModel, indices::Base.OneTo{Int})

Splits the data into a proper training and calibration set for inductive models.
"""
function split_data(conf_model::InductiveModel, X, y)
train, calibration = partition(eachindex(y), conf_model.train_ratio)
Xtrain = selectrows(X, train)
ytrain = y[train]
Xcal = selectrows(X, calibration)
ycal = y[calibration]

return Xtrain, ytrain, Xcal, ycal
end

"""
score(conf_model::InductiveModel, fitresult, X, y=nothing)

Generic score method for the [`InductiveModel`](@ref). It computes nonconformity scores using the heuristic function `h` and the softmax probabilities of the true class. Method is dispatched for different Conformal Probabilistic Sets and atomic models.
"""
function score(conf_model::InductiveModel, fitresult, X, y=nothing)
return score(conf_model, conf_model.model, fitresult, X, y)
end

"""
fit_atomic(conf_model::InductiveModel, verbosity, X, y)

Fits the atomic model for the [`InductiveModel`](@ref). In the case of inductive models, the atomic model is fit once on the proper training data.
"""
function fit_atomic(conf_model::InductiveModel, verbosity, X, y)
fitresult, cache, report = MMI.fit(conf_model.model, verbosity, MMI.reformat(conf_model.model, X, y)...)
pat-alt marked this conversation as resolved.
Show resolved Hide resolved
return fitresult, cache, report
end

@doc raw"""
MMI.fit(conf_model::InductiveModel, verbosity, X, y)

Fits the [`InductiveModel`](@ref) model.
"""
function MMI.fit(conf_model::InductiveModel, verbosity, X, y)

# Data Splitting:
Xtrain, ytrain, Xcal, ycal = split_data(conf_model, X, y)

# Training:
fitresult, cache, report = fit_atomic(conf_model, verbosity, Xtrain, ytrain)

# Nonconformity Scores:
cal_scores, scores = score(conf_model, fitresult, Xcal, ycal)
conf_model.scores = Dict(:calibration => cal_scores, :all => scores)

return (fitresult, cache, report)
end
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,26 @@ using MLJLinearModels: MLJLinearModels
mutable struct SimpleInductiveRegressor{Model<:Supervised} <: ConformalInterval
model::Model
coverage::AbstractFloat
scores::Union{Nothing,AbstractArray}
scores::Union{Nothing,Dict{Any,Any}}
heuristic::Function
parallelizer::Union{Nothing,AbstractParallelizer}
train_ratio::AbstractFloat
end

function SimpleInductiveRegressor(
model::Supervised;
coverage::AbstractFloat=0.95,
heuristic::Function=absolute_error,
parallelizer::Union{Nothing,AbstractParallelizer}=nothing,
train_ratio::AbstractFloat=0.5,
)
return SimpleInductiveRegressor(model, coverage, nothing, heuristic, train_ratio)
return SimpleInductiveRegressor(
model, coverage, nothing, heuristic, parallelizer, train_ratio
)
end

@doc raw"""
MMI.fit(conf_model::SimpleInductiveRegressor, verbosity, X, y)
score(conf_model::SimpleInductiveRegressor, atomic::Supervised, fitresult, X, y=nothing)

For the [`SimpleInductiveRegressor`](@ref) nonconformity scores are computed as follows:

Expand All @@ -29,27 +33,16 @@ S_i^{\text{CAL}} = s(X_i, Y_i) = h(\hat\mu(X_i), Y_i), \ i \in \mathcal{D}_{\tex

A typical choice for the heuristic function is ``h(\hat\mu(X_i),Y_i)=|Y_i-\hat\mu(X_i)|`` where ``\hat\mu`` denotes the model fitted on training data ``\mathcal{D}_{\text{train}}``.
"""
function MMI.fit(conf_model::SimpleInductiveRegressor, verbosity, X, y)

# Data Splitting:
train, calibration = partition(eachindex(y), conf_model.train_ratio)
Xtrain = selectrows(X, train)
ytrain = y[train]
Xcal = selectrows(X, calibration)
ycal = y[calibration]

# Training:
fitresult, cache, report = MMI.fit(
conf_model.model, verbosity, MMI.reformat(conf_model.model, Xtrain, ytrain)...
)

# Nonconformity Scores:
ŷ = reformat_mlj_prediction(
MMI.predict(conf_model.model, fitresult, MMI.reformat(conf_model.model, Xcal)...)
)
conf_model.scores = @.(conf_model.heuristic(ycal, ŷ))

return (fitresult, cache, report)
function score(
conf_model::SimpleInductiveRegressor, atomic::Supervised, fitresult, X, y=nothing
)
ŷ = reformat_mlj_prediction(MMI.predict(atomic, fitresult, MMI.reformat(atomic, X)...))
scores = @.(conf_model.heuristic(y, ŷ))
if isnothing(y)
return scores
else
return scores, scores
end
end

# Prediction
Expand Down Expand Up @@ -84,6 +77,7 @@ mutable struct ConformalQuantileRegressor{Model<:QuantileModel} <: ConformalInte
coverage::AbstractFloat
scores::Union{Nothing,AbstractArray}
heuristic::Function
parallelizer::Union{Nothing,AbstractParallelizer}
train_ratio::AbstractFloat
end

Expand All @@ -93,11 +87,19 @@ function ConformalQuantileRegressor(
heuristic::Function=function f(y, ŷ_lb, ŷ_ub)
return reduce((x, y) -> max.(x, y), [ŷ_lb - y, y - ŷ_ub])
end,
parallelizer::Union{Nothing,AbstractParallelizer}=nothing,
train_ratio::AbstractFloat=0.5,
)
return ConformalQuantileRegressor(model, coverage, nothing, heuristic, train_ratio)
return ConformalQuantileRegressor(
model, coverage, nothing, heuristic, parallelizer, train_ratio
)
end

# function fit_atomic(conf_model::ConformalQuantileRegressor, verbosity, X, y)
# fitresult, cache, report = MMI.fit(conf_model.model, verbosity, MMI.reformat(conf_model.model, X, y)...)
# return fitresult, cache, report
# end

@doc raw"""
MMI.fit(conf_model::ConformalQuantileRegressor, verbosity, X, y)

Expand All @@ -114,13 +116,7 @@ A typical choice for the heuristic function is ``h(\hat\mu_{\alpha_{lo}}(X_i), \
function MMI.fit(conf_model::ConformalQuantileRegressor, verbosity, X, y)

# Data Splitting:
train, calibration = partition(eachindex(y), conf_model.train_ratio)
Xtrain = selectrows(X, train)
ytrain = y[train]
Xtrain, ytrain = MMI.reformat(conf_model.model, Xtrain, ytrain)
Xcal = selectrows(X, calibration)
ycal = y[calibration]
Xcal, ycal = MMI.reformat(conf_model.model, Xcal, ycal)
Xtrain, ytrain, Xcal, ycal = split_data(conf_model, X, y)

# Training:
fitresult, cache, report, y_pred = ([], [], [], [])
Expand Down
Loading
Loading