From b77ec5d3bffe7d58c9d1448fba430fa8846d134a Mon Sep 17 00:00:00 2001 From: tiemvanderdeure Date: Mon, 28 Oct 2024 09:11:57 +0100 Subject: [PATCH 01/12] add mlj docstring --- src/mlj_interface.jl | 44 ++++++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/src/mlj_interface.jl b/src/mlj_interface.jl index 878a21d..8521302 100644 --- a/src/mlj_interface.jl +++ b/src/mlj_interface.jl @@ -24,30 +24,6 @@ function MaxnetBinaryClassifier(; ) end -""" - MaxnetBinaryClassifier - - A model type for fitting a maxnet model using `MLJ`. - - Use `MaxnetBinaryClassifier()` to create an instance with default parameters, or use keyword arguments to specify parameters. - - The keywords `link`, and `clamp` are passed to [`Maxnet.predict`](@ref), while all other keywords are passed to [`maxnet`](@ref). - See the documentation of these functions for the meaning of these parameters and their defaults. - - # Example - ```jldoctest - using Maxnet, MLJBase - p_a, env = Maxnet.bradypus() - - mach = machine(MaxnetBinaryClassifier(features = "lqp"), env, categorical(p_a)) - fit!(mach) - yhat = MLJBase.predict(mach, env) - # output - ``` - -""" -MaxnetBinaryClassifier - MMI.metadata_pkg( MaxnetBinaryClassifier; name = "Maxnet", @@ -67,6 +43,26 @@ MMI.metadata_model( reports_feature_importances=false ) +""" +$(MMI.doc_header(MaxnetBinaryClassifier)) + +The keywords `link`, and `clamp` are passed to [`Maxnet.predict`](@ref), while all other keywords are passed to [`maxnet`](@ref). +See the documentation of these functions for the meaning of these parameters and their defaults. + +# Example +```jldoctest +using Maxnet, MLJBase +p_a, env = Maxnet.bradypus() + +mach = machine(MaxnetBinaryClassifier(features = "lqp"), env, categorical(p_a)) +fit!(mach) +yhat = MLJBase.predict(mach, env) +# output +``` + +""" +MaxnetBinaryClassifier + function MMI.fit(m::MaxnetBinaryClassifier, verbosity::Int, X, y) # convert categorical to boolean y_boolean = Bool.(MMI.int(y) .- 1) From e43b31377810d833ea0bc4a7fd17f80f010dc24e Mon Sep 17 00:00:00 2001 From: tiemvanderdeure Date: Mon, 28 Oct 2024 09:45:24 +0100 Subject: [PATCH 02/12] test with MLJTestInterface --- Project.toml | 5 ++++- test/runtests.jl | 12 +++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index 034d148..c184514 100644 --- a/Project.toml +++ b/Project.toml @@ -11,6 +11,7 @@ GLMNet = "8d5ece8b-de18-5317-b113-243142960cc6" Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59" Lasso = "b4fcebef-c861-5a0f-a7e2-ba9dc32b180a" MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea" +MLJTestInterface = "72560011-54dd-4dc2-94f3-c5de45b75ecd" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsAPI = "82ae8749-77ed-4fe6-ae5f-f523153014b0" @@ -26,6 +27,7 @@ Interpolations = "0.15" Lasso = "0.7" MLJBase = "1" MLJModelInterface = "1" +MLJTestInterface = "0.2.6" Statistics = "1" StatsAPI = "1" StatsBase = "0.34" @@ -36,7 +38,8 @@ julia = "1.9" [extras] DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d" +MLJTestInterface = "72560011-54dd-4dc2-94f3-c5de45b75ecd" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["DelimitedFiles", "MLJBase", "Test"] +test = ["DelimitedFiles", "MLJBase", "MLJTestInterface", "Test"] diff --git a/test/runtests.jl b/test/runtests.jl index ed8e29a..b43c57b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,4 +1,4 @@ -using Maxnet, Test, Statistics, CategoricalArrays +using Maxnet, Test, Statistics, CategoricalArrays, MLJTestInterface p_a, env = Maxnet.bradypus() # Make the levels in ecoreg string to make sure that that works @@ -85,6 +85,16 @@ end m = maxnet(p_a, env; features = "lq", addsamplestobackground = false) @testset "MLJ" begin + data = MLJTestInterface.make_binary() + failures, summary = MLJTestInterface.test( + [MaxnetBinaryClassifier], + data...; + mod=@__MODULE__, + verbosity=0, # bump to debug + throw=false, # set to true to debug + ) + @test isempty(failures) + using MLJBase mn = Maxnet.MaxnetBinaryClassifier From c7b49b7bfc335503c07ffdd0580b783481c718f0 Mon Sep 17 00:00:00 2001 From: tiemvanderdeure Date: Mon, 28 Oct 2024 09:48:08 +0100 Subject: [PATCH 03/12] throw a helpful error if input data only has one class --- src/mlj_interface.jl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/mlj_interface.jl b/src/mlj_interface.jl index 8521302..b9ca24b 100644 --- a/src/mlj_interface.jl +++ b/src/mlj_interface.jl @@ -67,6 +67,13 @@ function MMI.fit(m::MaxnetBinaryClassifier, verbosity::Int, X, y) # convert categorical to boolean y_boolean = Bool.(MMI.int(y) .- 1) + allequal(y_boolean) && error( + """ + Only class $(CategoricalArrays.get(first(y))) was found in the data. + Provide data with two classes that represent background and presence samples. + """ + ) + fitresult = maxnet( y_boolean, X; m.features, regularization_multiplier = m.regularization_multiplier, From c49e30379cf35962e2a958e713575f011bc7acf3 Mon Sep 17 00:00:00 2001 From: tiemvanderdeure Date: Mon, 28 Oct 2024 09:51:08 +0100 Subject: [PATCH 04/12] mljtestinterface is not a dep (oops) --- Project.toml | 2 -- 1 file changed, 2 deletions(-) diff --git a/Project.toml b/Project.toml index c184514..aa30d6f 100644 --- a/Project.toml +++ b/Project.toml @@ -11,7 +11,6 @@ GLMNet = "8d5ece8b-de18-5317-b113-243142960cc6" Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59" Lasso = "b4fcebef-c861-5a0f-a7e2-ba9dc32b180a" MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea" -MLJTestInterface = "72560011-54dd-4dc2-94f3-c5de45b75ecd" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsAPI = "82ae8749-77ed-4fe6-ae5f-f523153014b0" @@ -27,7 +26,6 @@ Interpolations = "0.15" Lasso = "0.7" MLJBase = "1" MLJModelInterface = "1" -MLJTestInterface = "0.2.6" Statistics = "1" StatsAPI = "1" StatsBase = "0.34" From 93882430984a2981716e6accd07fb9c5d519f756 Mon Sep 17 00:00:00 2001 From: tiemvanderdeure Date: Tue, 26 Nov 2024 09:28:13 +0100 Subject: [PATCH 05/12] move allequal error to main function --- src/maxnet_function.jl | 5 +++++ src/mlj_interface.jl | 7 ------- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/maxnet_function.jl b/src/maxnet_function.jl index 9e87636..04f0f98 100644 --- a/src/maxnet_function.jl +++ b/src/maxnet_function.jl @@ -49,6 +49,11 @@ function maxnet( n_knots::Int = 50, kw...) + if allunique(presences) + pa = first(presences) ? "presences" : "absences" + ArgumentError("All data points are $pa. Maxnet will only work with at least some presences and some absences.") + end + _maxnet( presences, predictors, diff --git a/src/mlj_interface.jl b/src/mlj_interface.jl index b9ca24b..8521302 100644 --- a/src/mlj_interface.jl +++ b/src/mlj_interface.jl @@ -67,13 +67,6 @@ function MMI.fit(m::MaxnetBinaryClassifier, verbosity::Int, X, y) # convert categorical to boolean y_boolean = Bool.(MMI.int(y) .- 1) - allequal(y_boolean) && error( - """ - Only class $(CategoricalArrays.get(first(y))) was found in the data. - Provide data with two classes that represent background and presence samples. - """ - ) - fitresult = maxnet( y_boolean, X; m.features, regularization_multiplier = m.regularization_multiplier, From bd87fe9866bd7c86734cc1294b4ba2a4bd588055 Mon Sep 17 00:00:00 2001 From: tiemvanderdeure Date: Tue, 26 Nov 2024 10:39:52 +0100 Subject: [PATCH 06/12] fix allequal error --- src/maxnet_function.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/maxnet_function.jl b/src/maxnet_function.jl index 04f0f98..fd414ac 100644 --- a/src/maxnet_function.jl +++ b/src/maxnet_function.jl @@ -49,9 +49,9 @@ function maxnet( n_knots::Int = 50, kw...) - if allunique(presences) + if allequal(presences) pa = first(presences) ? "presences" : "absences" - ArgumentError("All data points are $pa. Maxnet will only work with at least some presences and some absences.") + throw(ArgumentError("All data points are $pa. Maxnet will only work with at least some presences and some absences.")) end _maxnet( From c443eb4c6b5ac0b0836e5e835cf5708d2266ff9e Mon Sep 17 00:00:00 2001 From: tiemvanderdeure Date: Tue, 26 Nov 2024 10:40:07 +0100 Subject: [PATCH 07/12] fix tests --- test/runtests.jl | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index b43c57b..ac6939b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,5 +1,7 @@ -using Maxnet, Test, Statistics, CategoricalArrays, MLJTestInterface +using Maxnet, Statistics, CategoricalArrays, MLJTestInterface +using Test +# read in Bradypus data p_a, env = Maxnet.bradypus() # Make the levels in ecoreg string to make sure that that works env = merge(env, (; ecoreg = recode(env.ecoreg, (l => string(l) for l in levels(env.ecoreg))...))) @@ -82,7 +84,6 @@ end m = maxnet(p_a, env; features = "lq", addsamplestobackground = false) @test m_w.entropy > m.entropy end -m = maxnet(p_a, env; features = "lq", addsamplestobackground = false) @testset "MLJ" begin data = MLJTestInterface.make_binary() @@ -93,8 +94,10 @@ m = maxnet(p_a, env; features = "lq", addsamplestobackground = false) verbosity=0, # bump to debug throw=false, # set to true to debug ) - @test isempty(failures) - + # The test that only has absences is expected to fail, all other tests should pass + @test length(failures) == 1 + @test first(failures).exception == ArgumentError("All data points are absences. Maxnet will only work with at least some presences and some absences.") + using MLJBase mn = Maxnet.MaxnetBinaryClassifier From ae399904376eb0b39ac54b33ff255fc2e45da772 Mon Sep 17 00:00:00 2001 From: tiemvanderdeure Date: Tue, 26 Nov 2024 14:51:54 +0100 Subject: [PATCH 08/12] add MLJBase as docs dep --- docs/Project.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/Project.toml b/docs/Project.toml index 491d2dd..3f5ef3c 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,3 +1,4 @@ [deps] Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d" Maxnet = "81f79f80-22f2-4e41-ab86-00c11cf0f26f" From a7a15f83bf8ec801ed5802530a8178c597c4da50 Mon Sep 17 00:00:00 2001 From: tiemvanderdeure Date: Tue, 26 Nov 2024 15:10:56 +0100 Subject: [PATCH 09/12] fix mlj doctest --- src/mlj_interface.jl | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/mlj_interface.jl b/src/mlj_interface.jl index 8521302..68f701e 100644 --- a/src/mlj_interface.jl +++ b/src/mlj_interface.jl @@ -46,18 +46,25 @@ MMI.metadata_model( """ $(MMI.doc_header(MaxnetBinaryClassifier)) -The keywords `link`, and `clamp` are passed to [`Maxnet.predict`](@ref), while all other keywords are passed to [`maxnet`](@ref). +The keywords `link`, and `clamp` are passed to [`predict`](@ref), while all other keywords are passed to [`maxnet`](@ref). See the documentation of these functions for the meaning of these parameters and their defaults. # Example ```jldoctest -using Maxnet, MLJBase +using MLJBase p_a, env = Maxnet.bradypus() -mach = machine(MaxnetBinaryClassifier(features = "lqp"), env, categorical(p_a)) -fit!(mach) +mach = machine(MaxnetBinaryClassifier(features = "lqp"), env, categorical(p_a), scitype_check_level = 0) +fit!(mach, verbosity = 0) yhat = MLJBase.predict(mach, env) +yhat[1:5] # output +5-element UnivariateFiniteVector{Multiclass{2}, Bool, UInt32, Float64}: + UnivariateFinite{Multiclass{2}}(false=>0.812, true=>0.188) + UnivariateFinite{Multiclass{2}}(false=>0.811, true=>0.189) + UnivariateFinite{Multiclass{2}}(false=>0.501, true=>0.499) + UnivariateFinite{Multiclass{2}}(false=>0.811, true=>0.189) + UnivariateFinite{Multiclass{2}}(false=>0.752, true=>0.248) ``` """ From 585cbe9da609bc348e296e518d50100439a9db27 Mon Sep 17 00:00:00 2001 From: tiemvanderdeure Date: Tue, 26 Nov 2024 15:50:48 +0100 Subject: [PATCH 10/12] attempt fix of multiclass printing --- docs/make.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/make.jl b/docs/make.jl index 234e065..cc3684e 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,7 +1,7 @@ using Maxnet using Documenter -DocMeta.setdocmeta!(Maxnet, :DocTestSetup, :(using Maxnet); recursive=true) +DocMeta.setdocmeta!(Maxnet, :DocTestSetup, :(using Maxnet; using MLJBase: Multiclass); recursive=true) makedocs(; modules=[Maxnet], From 6e38ccc4b45f7248818301b044ec18beeded5957 Mon Sep 17 00:00:00 2001 From: tiemvanderdeure Date: Tue, 26 Nov 2024 16:09:43 +0100 Subject: [PATCH 11/12] use @example instead of jldoctest --- docs/make.jl | 2 +- src/mlj_interface.jl | 11 ++--------- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/docs/make.jl b/docs/make.jl index cc3684e..234e065 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,7 +1,7 @@ using Maxnet using Documenter -DocMeta.setdocmeta!(Maxnet, :DocTestSetup, :(using Maxnet; using MLJBase: Multiclass); recursive=true) +DocMeta.setdocmeta!(Maxnet, :DocTestSetup, :(using Maxnet); recursive=true) makedocs(; modules=[Maxnet], diff --git a/src/mlj_interface.jl b/src/mlj_interface.jl index 68f701e..e081801 100644 --- a/src/mlj_interface.jl +++ b/src/mlj_interface.jl @@ -50,21 +50,14 @@ The keywords `link`, and `clamp` are passed to [`predict`](@ref), while all othe See the documentation of these functions for the meaning of these parameters and their defaults. # Example -```jldoctest +```@example using MLJBase p_a, env = Maxnet.bradypus() mach = machine(MaxnetBinaryClassifier(features = "lqp"), env, categorical(p_a), scitype_check_level = 0) fit!(mach, verbosity = 0) yhat = MLJBase.predict(mach, env) -yhat[1:5] -# output -5-element UnivariateFiniteVector{Multiclass{2}, Bool, UInt32, Float64}: - UnivariateFinite{Multiclass{2}}(false=>0.812, true=>0.188) - UnivariateFinite{Multiclass{2}}(false=>0.811, true=>0.189) - UnivariateFinite{Multiclass{2}}(false=>0.501, true=>0.499) - UnivariateFinite{Multiclass{2}}(false=>0.811, true=>0.189) - UnivariateFinite{Multiclass{2}}(false=>0.752, true=>0.248) + ``` """ From b2b09be2c64e816a7021ca7d9815cde9590a8a7e Mon Sep 17 00:00:00 2001 From: tiemvanderdeure Date: Mon, 2 Dec 2024 10:22:23 +0100 Subject: [PATCH 12/12] test for no failures in mlj interface test --- test/runtests.jl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index ac6939b..e776b16 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -94,9 +94,7 @@ end verbosity=0, # bump to debug throw=false, # set to true to debug ) - # The test that only has absences is expected to fail, all other tests should pass - @test length(failures) == 1 - @test first(failures).exception == ArgumentError("All data points are absences. Maxnet will only work with at least some presences and some absences.") + @test isempty(failures) using MLJBase mn = Maxnet.MaxnetBinaryClassifier