Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make cut close last interval on the right #409

Merged
merged 2 commits into from
Dec 27, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 5 additions & 7 deletions src/extras.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ function fill_refs!(refs::AbstractArray, X::AbstractArray,

if ismissing(x)
refs[i] = 0
elseif extend === true && x == upper
elseif x == upper
refs[i] = n-1
elseif extend !== true && !(lower <= x < upper)
elseif extend !== true && !(lower <= x <= upper)
extend === missing ||
throw(ArgumentError("value $x (at index $i) does not fall inside the breaks: " *
"adapt them manually, or pass extend=true or extend=missing"))
Expand Down Expand Up @@ -41,17 +41,15 @@ Cut a numeric array into intervals at values `breaks`
and return an ordered `CategoricalArray` indicating
the interval into which each entry falls. Intervals are of the form `[lower, upper)`,
i.e. the lower bound is included and the upper bound is excluded, except
if `extend=true` the last interval, which is then closed on both ends,
i.e. `[lower, upper]`.
the last interval, which is closed on both ends, i.e. `[lower, upper]`.

If `x` accepts missing values (i.e. `eltype(x) >: Missing`) the returned array will
also accept them.

# Keyword arguments
* `extend::Union{Bool, Missing}=false`: when `false`, an error is raised if some values
in `x` fall outside of the breaks; when `true`, breaks are automatically added to include
all values in `x`, and the upper bound is included in the last interval; when `missing`,
values outside of the breaks generate `missing` entries.
all values in `x`; when `missing`, values outside of the breaks generate `missing` entries.
* `labels::Union{AbstractVector, Function}`: a vector of strings, characters
or numbers giving the names to use for
the intervals; or a function `f(from, to, i; leftclosed, rightclosed)` that generates
Expand Down Expand Up @@ -200,7 +198,7 @@ function _cut(x::AbstractArray{T, N}, breaks::AbstractVector,
end
levs[end] = labels(from[end], to[end], n-1,
leftclosed=breaks[end-1] != breaks[end],
rightclosed=coalesce(extend, false))
rightclosed=true)
else
length(labels) == n-1 ||
throw(ArgumentError("labels must be of length $(n-1), but got length $(length(labels))"))
Expand Down
52 changes: 31 additions & 21 deletions test/15_extras.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,27 +6,37 @@ const ≅ = isequal

@testset "cut($(Union{Int, T})[...])" for T in (Union{}, Missing)
x = @inferred cut(Vector{Union{Int, T}}([2, 3, 5]), [1, 3, 6])
@test x == ["[1, 3)", "[3, 6)", "[3, 6)"]
@test x == ["[1, 3)", "[3, 6]", "[3, 6]"]
@test isa(x, CategoricalVector{Union{String, T}})
@test isordered(x)
@test levels(x) == ["[1, 3)", "[3, 6)"]
@test levels(x) == ["[1, 3)", "[3, 6]"]

@test cut(Vector{Union{T, Int}}([2, 3, 5]), [2, 5], extend=false) ==
["[2, 5]", "[2, 5]", "[2, 5]"]

err = @test_throws ArgumentError cut(Vector{Union{T, Int}}([2, 3, 5]), [3, 6])
@test err.value.msg == "value 2 (at index 1) does not fall inside the breaks: adapt them manually, or pass extend=true or extend=missing"


err = @test_throws ArgumentError cut(Vector{Union{T, Int}}([2, 3, 5]), [2, 5])
@test err.value.msg == "value 5 (at index 3) does not fall inside the breaks: adapt them manually, or pass extend=true or extend=missing"

if T === Missing
x = @inferred cut(Vector{Union{T, Int}}([2, 3, 5]), [2, 5], extend=missing)
else
x = cut(Vector{Union{T, Int}}([2, 3, 5]), [2, 5], extend=missing)
end
@test x ≅ ["[2, 5)", "[2, 5)", missing]
@test x ≅ ["[2, 5]", "[2, 5]", "[2, 5]"]
@test isa(x, CategoricalVector{Union{String, Missing}})
@test isordered(x)
@test levels(x) == ["[2, 5)"]
@test levels(x) == ["[2, 5]"]

if T === Missing
x = @inferred cut(Vector{Union{T, Int}}([2, 3, 6]), [2, 5], extend=missing)
else
x = cut(Vector{Union{T, Int}}([2, 3, 6]), [2, 5], extend=missing)
end
@test x ≅ ["[2, 5]", "[2, 5]", missing]
@test isa(x, CategoricalVector{Union{String, Missing}})
@test isordered(x)
@test levels(x) == ["[2, 5]"]

x = @inferred cut(Vector{Union{T, Int}}([2, 3, 5]), [3, 6], extend=true)
@test x == ["[2, 3)", "[3, 6]", "[3, 6]"]
Expand All @@ -40,10 +50,10 @@ const ≅ = isequal
@test levels(x) == ["[2, 3)", "[3, 6]"]

x = @inferred cut(Vector{Union{T, Int}}([1, 2, 4]), [1, 3, 6])
@test x == ["[1, 3)", "[1, 3)", "[3, 6)"]
@test x == ["[1, 3)", "[1, 3)", "[3, 6]"]
@test isa(x, CategoricalVector{Union{String, T}})
@test isordered(x)
@test levels(x) == ["[1, 3)", "[3, 6)"]
@test levels(x) == ["[1, 3)", "[3, 6]"]

x = @inferred cut(Vector{Union{T, Int}}([1, 2, 4]), [3, 6], extend=true)
@test x == ["[1, 3)", "[1, 3)", "[3, 6]"]
Expand All @@ -67,10 +77,10 @@ const ≅ = isequal
breaks = [18, 25, 35, 60, 100]
x = @inferred cut(Vector{Union{T, Int}}(ages), breaks)
@test x == ["[18, 25)", "[18, 25)", "[25, 35)", "[25, 35)", "[18, 25)", "[18, 25)",
"[35, 60)", "[25, 35)", "[60, 100)", "[35, 60)", "[35, 60)", "[25, 35)"]
"[35, 60)", "[25, 35)", "[60, 100]", "[35, 60)", "[35, 60)", "[25, 35)"]
@test isa(x, CategoricalVector{Union{String, T}})
@test isordered(x)
@test levels(x) == ["[18, 25)", "[25, 35)", "[35, 60)", "[60, 100)"]
@test levels(x) == ["[18, 25)", "[25, 35)", "[35, 60)", "[60, 100]"]

breaks = [1, 6, 3] # Unsorted breaks
labels = ["b", "a"] # Differs from lexical ordering
Expand All @@ -83,10 +93,10 @@ const ≅ = isequal
@test levels(x) == ["b", "a"]

x = @inferred cut(Matrix{Union{Float64, T}}([-1.1 3.0; 1.456 10.394]), [-2.134, 3.0, 12.5])
@test x == ["[-2.134, 3.0)" "[3.0, 12.5)"; "[-2.134, 3.0)" "[3.0, 12.5)"]
@test x == ["[-2.134, 3.0)" "[3.0, 12.5]"; "[-2.134, 3.0)" "[3.0, 12.5]"]
@test isa(x, CategoricalMatrix{Union{String, T}})
@test isordered(x)
@test levels(x) == ["[-2.134, 3.0)", "[3.0, 12.5)"]
@test levels(x) == ["[-2.134, 3.0)", "[3.0, 12.5]"]

labels = 0:2:8
x = @inferred cut(Vector{Union{T, Int}}(1:8), 0:2:10, labels=labels)
Expand Down Expand Up @@ -179,7 +189,7 @@ end
@test_throws ArgumentError cut(1:10, [1, 5, 5, 11])
y = cut(1:10, [1, 5, 5, 11], allowempty=true)
@test y == cut(1:10, [1, 5, 11])
@test levels(y) == ["[1, 5)", "(5, 5)", "[5, 11)"]
@test levels(y) == ["[1, 5)", "(5, 5)", "[5, 11]"]

@test_throws ArgumentError cut(1:10, [1, 5, 5, 5, 11])
@test_throws ArgumentError cut(1:10, [1, 5, 5, 11],
Expand All @@ -191,29 +201,29 @@ end

@test_throws ArgumentError cut(1:10, [1, 5, 5, 11], labels=string.(1:3))
y = cut(1:10, [1, 5, 5, 11], allowempty=true, labels=string.(1:3))
@test y == recode(cut(1:10, [1, 5, 11]), "[1, 5)" => "1", "[5, 11)" => "3")
@test y == recode(cut(1:10, [1, 5, 11]), "[1, 5)" => "1", "[5, 11]" => "3")
@test levels(y) == string.(1:3)

@test_throws ArgumentError cut(1:10, [1, 5, 5, 5, 11], labels=string.(1:4))
y = cut(1:10, [1, 5, 5, 5, 11], allowempty=true, labels=string.(1:4))
@test y == recode(cut(1:10, [1, 5, 11]), "[1, 5)" => "1", "[5, 11)" => "4")
@test y == recode(cut(1:10, [1, 5, 11]), "[1, 5)" => "1", "[5, 11]" => "4")
@test levels(y) == string.(1:4)

@test_throws ArgumentError cut(1:10, [1, 5, 5, 5, 5, 11], labels=string.(1:5))
y = cut(1:10, [1, 5, 5, 5, 5, 11], allowempty=true, labels=string.(1:5))
@test y == recode(cut(1:10, [1, 5, 11]), "[1, 5)" => "1", "[5, 11)" => "5")
@test y == recode(cut(1:10, [1, 5, 11]), "[1, 5)" => "1", "[5, 11]" => "5")
@test levels(y) == string.(1:5)

@test_throws ArgumentError cut(1:10, [1, 3, 3, 5, 5, 11], labels=string.(1:5))
y = cut(1:10, [1, 3, 3, 5, 5, 11], allowempty=true, labels=string.(1:5))
@test y == recode(cut(1:10, [1, 3, 5, 11]),
"[1, 3)" => "1", "[3, 5)" => "3", "[5, 11)" => "5")
"[1, 3)" => "1", "[3, 5)" => "3", "[5, 11]" => "5")
@test levels(y) == string.(1:5)

@test_throws ArgumentError cut(1:10, [1, 3, 3, 3, 5, 5, 5, 11], labels=string.(1:7))
y = cut(1:10, [1, 3, 3, 3, 5, 5, 5, 11], allowempty=true, labels=string.(1:7))
@test y == recode(cut(1:10, [1, 3, 5, 11]),
"[1, 3)" => "1", "[3, 5)" => "4", "[5, 11)" => "7")
"[1, 3)" => "1", "[3, 5)" => "4", "[5, 11]" => "7")
@test levels(y) == string.(1:7)

@test_throws ArgumentError cut(1:10, [1, 3, 5, 5, 11],
Expand Down Expand Up @@ -255,9 +265,9 @@ end
end

@testset "cut with extend=missing" begin
x = @inferred cut([-0.0, 0.0, 1.0, 2.0, 3.0, 4.0], [-0.0, 0.0, 3.0],
x = @inferred cut([-0.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0], [-0.0, 0.0, 3.0],
labels=[-0.0, 0.0], extend=missing)
@test x ≅ [-0.0, 0.0, 0.0, 0.0, missing, missing]
@test x ≅ [-0.0, 0.0, 0.0, 0.0, 0.0, missing, missing]
@test x isa CategoricalArray{Union{Missing, Float64},1,UInt32}
@test isordered(x)
@test levels(x) == [-0.0, 0.0]
Expand Down
Loading