Skip to content

Commit

Permalink
Merge pull request #536 from alan-turing-institute/dev
Browse files Browse the repository at this point in the history
For a 0.18.1 release
  • Loading branch information
ablaom authored Apr 19, 2021
2 parents 8c87e4c + 7781275 commit 773508f
Show file tree
Hide file tree
Showing 8 changed files with 61 additions and 101 deletions.
6 changes: 3 additions & 3 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "MLJBase"
uuid = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
authors = ["Anthony D. Blaom <[email protected]>"]
version = "0.18.0"
version = "0.18.1"

[deps]
CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
Expand Down Expand Up @@ -33,15 +33,15 @@ ComputationalResources = "^0.3"
Distributions = "0.22, 0.23, 0.24"
InvertedIndices = "^1"
LossFunctions = "0.5, 0.6"
MLJModelInterface = "^0.4.1"
MLJModelInterface = "^0.4.1, 1.0"
MLJScientificTypes = "^0.4.1"
Missings = "^0.4"
OrderedCollections = "^1.1"
Parameters = "^0.12"
PrettyTables = "^0.8,^0.9,^0.10,^0.11"
ProgressMeter = "^1.3"
StatisticalTraits = "^0.1.1, 1.0"
StatsBase = "^0.32,^0.33"
StatisticalTraits = "^0.1.1"
Tables = "^0.2,^1.0"
julia = "1"

Expand Down
2 changes: 1 addition & 1 deletion src/composition/models/pipelines.jl
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ not_unsupervised_alert(v) =
pipe_argument_error(v) =
pipe_alert("Encountered `$v` where a "*
"model instance, model type, function, "*
"or key-word assignement was expected. ")
"or key-word assignment was expected. ")

function super_type(prediction_type::Symbol)
if prediction_type == :deterministic
Expand Down
33 changes: 15 additions & 18 deletions src/univariate_finite/methods.jl
Original file line number Diff line number Diff line change
Expand Up @@ -64,15 +64,13 @@ CategoricalArrays.isordered(u::UnivariateFiniteArray) = isordered(classes(u))

## DISPLAY

_round_prob(p) = p
_round_prob(p::Union{Float32,Float64}) = round(p, sigdigits=3)

function Base.show(stream::IO, d::UnivariateFinite)
pairs = Distributions.params(d).probs
x1, p1 = pairs[1]
str = "UnivariateFinite{$(d.scitype)}($x1=>$(round(p1, sigdigits=3))"
for pair in pairs[2:end]
str *= ", $(pair[1])=>$(round(pair[2], sigdigits=3))"
end
str *= ")"
print(stream, str)
arg_str = join(["$(pr[1])=>$(_round_prob(pr[2]))" for pr in pairs], ", ")
print(stream, "UnivariateFinite{$(d.scitype)}($arg_str)")
end

show_prefix(u::UnivariateFiniteArray{S,V,R,P,1}) where {S,V,R,P} =
Expand Down Expand Up @@ -142,7 +140,7 @@ function Base.isapprox(d1::UnivariateFiniteArray,
for c in support1
c in support2 || return false
isapprox(pdf.(d1, c), pdf.(d2, c); kwargs...) ||
return false
return false
end
return true
end
Expand Down Expand Up @@ -270,10 +268,11 @@ end
"""
_cumulative(d::UnivariateFinite)
Return the cumulative probability vector `[0, ..., 1]` for the
distribution `d`, using only classes in the support of `d`, ordered
according to the categorical elements used at instantiation of
`d`. Used only to implement random sampling from `d`.
Return the cumulative probability vector `C` for the distribution `d`,
using only classes in the support of `d`, ordered according to the
categorical elements used at instantiation of `d`. Used only to
implement random sampling from `d`. We have `C[1] == 0` and `C[end] ==
1`, assuming the probabilities have been normalized.
"""
function _cumulative(d::UnivariateFinite{S,V,R,P}) where {S,V,R,P<:Real}
Expand All @@ -283,8 +282,7 @@ function _cumulative(d::UnivariateFinite{S,V,R,P}) where {S,V,R,P<:Real}
K = length(p)
p_cumulative = Array{P}(undef, K + 1)
p_cumulative[1] = zero(P)
p_cumulative[K + 1] = one(P)
for i in 2:K
for i in 2:K + 1
p_cumulative[i] = p_cumulative[i-1] + p[i-1]
end
return p_cumulative
Expand All @@ -294,13 +292,12 @@ end
_rand(rng, p_cumulative, R)
Randomly sample the distribution with discrete support `R(1):R(n)`
which has cumulative probability vector `p_cumulative=[0, ..., 1]` (of
length `n+1`). Does not check the first and last elements of
`p_cumulative` but does not use them either.
which has cumulative probability vector `p_cumulative` (see
[`_cummulative`](@ref)).
"""
function _rand(rng, p_cumulative, R)
real_sample = rand(rng)
real_sample = rand(rng)*p_cumulative[end]
K = R(length(p_cumulative))
index = K
for i in R(2):R(K)
Expand Down
51 changes: 26 additions & 25 deletions src/univariate_finite/types.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,10 @@ const UnivariateFiniteSuper = Dist.Distribution{Dist.Univariate,NonEuclidean}
# V - type of class labels (eg, Char in `categorical(['a', 'b'])`)
# P - raw probability type
# S - scitype of samples
# L - raw type of labels, eg `Symbol` or `String`

# Note that the keys of `prob_given_ref` need not exhaust all the
# refs of all classes but will be ordered (LittleDicts preserve order)
struct UnivariateFinite{S,V,R,P<:Real} <: UnivariateFiniteSuper
struct UnivariateFinite{S,V,R,P} <: UnivariateFiniteSuper
scitype::Type{S}
decoder::CategoricalDecoder{V,R}
prob_given_ref::LittleDict{R,P,Vector{R}, Vector{P}}
Expand All @@ -35,9 +34,11 @@ UnivariateFinite(a...; kwargs...) = MMI.UnivariateFinite(a...; kwargs...)

## CHECKS AND ERROR MESSAGES

const Prob{P} = Union{P, AbstractArray{P}} where P <: Real
# checks that scalar probabilities lie in [0, 1] and checks that
# vector probabilities sum to one have now been dropped, except where
# `augment=true` is specified.

prob_error = ArgumentError("Probabilities must have `Real` type. ")
const Prob{P} = Union{P, AbstractArray{P}} where P

_err_01() = throw(DomainError("Probabilities must be in [0,1]."))
_err_sum_1() = throw(DomainError(
Expand Down Expand Up @@ -65,7 +66,7 @@ function _check_pool(pool)
end
_check_probs_01(probs) =
all(0 .<= probs .<= 1) || _err_01()
_check_probs_sum(probs::Vector{<:Prob{P}}) where P<:Real =
_check_probs_sum(probs::Vector{<:Prob{P}}) where P =
all(x -> xone(P), sum(probs)) || _err_sum_1()
_check_probs(probs) = (_check_probs_01(probs); _check_probs_sum(probs))
_check_augmentable(support, probs) = _check_probs_01(probs) &&
Expand Down Expand Up @@ -124,8 +125,8 @@ function MMI.UnivariateFinite(
# this constructor ignores kwargs

probs = values(prob_given_class) |> collect
_check_probs_01.(probs)
_check_probs_sum(probs)
# _check_probs_01.(probs)
# _check_probs_sum(probs)

# retrieve decoder and classes from element
class1 = first(keys(prob_given_class))
Expand All @@ -134,20 +135,20 @@ function MMI.UnivariateFinite(

# `LittleDict`s preserve order of keys, which we need for rand():

support = keys(prob_given_class) |> collect |> sort
_support = keys(prob_given_class) |> collect |> sort

issubset(support, parent_classes) ||
issubset(_support, parent_classes) ||
error("Categorical elements are not from the same pool. ")

pairs = [int(c) => prob_given_class[c]
for c in support]
for c in _support]

probs1 = first(values(prob_given_class))
S = scitype(class1)
if probs1 isa Real
return UnivariateFinite(S, parent_decoder, LittleDict(pairs...))
else
if probs1 isa AbstractArray
return UnivariateFiniteArray(S, parent_decoder, LittleDict(pairs...))
else
return UnivariateFinite(S, parent_decoder, LittleDict(pairs...))
end
end

Expand Down Expand Up @@ -188,14 +189,15 @@ end
## CONSTRUCTORS - FROM ARRAYS

# example: _get_on_last(A, 4) = A[:, :, 4] if A has 3 dims:
_get_on_last(probs::AbstractArray{<:Any,N}, i) where N = probs[fill(:,N-1)..., i]
_get_on_last(probs::AbstractArray{<:Any,N}, i) where N =
probs[fill(:,N-1)..., i]

# 1. Univariate Finite from a vector of classes or raw labels and
# array of probs; first, a dispatcher:
function MMI.UnivariateFinite(
::FI,
support::AbstractVector,
probs::Union{AbstractArray,Real};
probs;
kwargs...)

if support isa AbstractArray{<:CategoricalValue}
Expand All @@ -215,13 +217,12 @@ function MMI.UnivariateFinite(
kwargs...)
end

# The core method, ultimately called by 1.0, 1.1, 1.2, 1.3 below, or
# directly from the dispatcher 1. above
# The core method, ultimately called by 1.0, 1.1, 1.2, 1.3 below.
function _UnivariateFinite(support::AbstractVector{CategoricalValue{V,R}},
probs::AbstractArray{P},
N;
augment=false,
kwargs...) where {V,R,P<:Real}
kwargs...) where {V,R,P}

unique(support) == support ||
error("Non-unique vector of classes specified")
Expand All @@ -246,15 +247,15 @@ function _UnivariateFinite(support::AbstractVector{CategoricalValue{V,R}},
end

# 1.0 support does not consist of categorical elements:
function _UnivariateFinite(support::AbstractVector{L},
probs::AbstractArray{P},
function _UnivariateFinite(support,
probs::AbstractArray,
N;
augment=false,
pool=nothing,
ordered=false) where {L,P<:Real}
ordered=false)

# If we got here, then L<:CategoricalValue is not true, ie L is a
# raw label type
# If we got here, then the vector `support` is not
# `AbstractVector{<:CategoricalValue}`

if pool === nothing || ismissing(pool)
if pool === nothing
Expand Down Expand Up @@ -315,13 +316,13 @@ _UnivariateFinite(::Val{true},
# 1.3 corner case, probs a scalar:
_UnivariateFinite(::Val{true},
support::AbstractVector,
probs::Real;
probs;
kwargs...) =
UnivariateFinite(support, [probs,]; kwargs...)[1]

# 2. probablity only; unspecified support:
function MMI.UnivariateFinite(::FI,
probs::AbstractArray{<:Real,N};
probs::AbstractArray{<:Any,N};
pool=nothing,
augment=false,
kwargs...) where N
Expand Down
44 changes: 0 additions & 44 deletions test/openml.jl

This file was deleted.

3 changes: 0 additions & 3 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,3 @@ end
@test include("hyperparam/one_dimensional_range_methods.jl")
end

# @testset "openml" begin
# @test include("openml.jl")
# end
4 changes: 2 additions & 2 deletions test/univariate_finite/arrays.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@ c = 3
probs = rand(rng, n)
supp = ["class1", "class2"]

@test_throws DomainError UnivariateFinite(supp, probs, pool=missing)
# @test_throws DomainError UnivariateFinite(supp, probs, pool=missing)
u = UnivariateFinite(supp, probs, pool=missing, augment=true)
@test length(u) == n
@test size(u) == (n,)
@test pdf.(u, "class2") probs

# autosupport:
@test_throws DomainError UnivariateFinite(probs, pool=missing)
# @test_throws DomainError UnivariateFinite(probs, pool=missing)
u = UnivariateFinite(probs, pool=missing, augment=true)
@test length(u) == n
@test size(u) == (n,)
Expand Down
19 changes: 14 additions & 5 deletions test/univariate_finite/methods.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ V = categorical(collect("asqfasqffqsaaaa"))
a, s, q, f = v[1], v[2], v[3], v[4]
A, S, Q, F = V[1], V[2], V[3], V[4]

@testset "UnivariateFinite constructor" begin
@testset "set 1" begin

# ordered (OrderedFactor)
dict = Dict(s=>0.1, q=> 0.2, f=> 0.7)
Expand Down Expand Up @@ -53,13 +53,22 @@ A, S, Q, F = V[1], V[2], V[3], V[4]
@test samples == [rand(rng, d) for i in 1:N]

N = 10000
samples = rand(rng, d, N);
samples = rand(StableRNG(123), d, N);
@test Set(samples) == Set(support(d))
freq = Distributions.countmap(samples)
@test isapprox(freq[f]/N, 0.7, atol=0.05)
@test isapprox(freq[s]/N, 0.1, atol=0.05)
@test isapprox(freq[q]/N, 0.2, atol=0.05)

# test unnormalized case gives same answer:
dd = UnivariateFinite(support(d), [70, 20, 10])
samples = rand(StableRNG(123), dd, N);
@test Set(samples) == Set(support(d))
ffreq = Distributions.countmap(samples)
@test isapprox(freq[f]/N, ffreq[f]/N)
@test isapprox(freq[s]/N, ffreq[s]/N)
@test isapprox(freq[q]/N, ffreq[q]/N)

#
# unordered (Multiclass):
dict = Dict(S=>0.1, Q=> 0.2, F=> 0.7)
Expand Down Expand Up @@ -133,9 +142,9 @@ end
@test pdf(d, 'q') 0.2
@test_logs((:warn, r"No "),
UnivariateFinite(['f', 'q', 's'], [0.7, 0.2, 0.1]))
@test_throws(MethodError,
UnivariateFinite(['f', 'q', 's'], ["junk", 0.2, 0.1],
pool=missing))
# @test_throws(MethodError,
# UnivariateFinite(['f', 'q', 's'], ["junk", 0.2, 0.1],
# pool=missing))
d = UnivariateFinite(['f', 'q', 's'], [0.7, 0.2, 0.1], pool=missing)
@test pdf(d, 'f') 0.7
@test pdf(d, 's') 0.1
Expand Down

0 comments on commit 773508f

Please sign in to comment.