From 0de3aec7063f49a56d8f23252a42a4af896c599e Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Sun, 14 Apr 2024 13:13:53 +1200 Subject: [PATCH 01/13] update @update doc-string --- src/registry/src/update.jl | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/registry/src/update.jl b/src/registry/src/update.jl index 0000b361..5d823a84 100644 --- a/src/registry/src/update.jl +++ b/src/registry/src/update.jl @@ -63,6 +63,15 @@ To register all the models in GreatNewPackage with MLJ: the updated metadata available to users of the next MLJModels tagged release. +Once a new MLJModels version is released, you must make the following updates at MLJ.jl: + +- Ensure `GreatNewPackage` is in the [extras] and [target] sections of the Project.toml + for MLJ.jl (for inclusion in integration tests) + +- Add an entry for the new model(s) in MLJ/docs/ModelDescriptors.toml (for inclusion in + the MLJ Model Browser) + +These last two actions do not require tagging a new MLJ.jl release. """ macro update(ex) From 17578022018f08d31835a13021c52ee930605455 Mon Sep 17 00:00:00 2001 From: tiemvanderdeure Date: Tue, 30 Apr 2024 15:43:09 +0200 Subject: [PATCH 02/13] change Float64 to Float32 in fitresult --- src/builtins/Transformers.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/builtins/Transformers.jl b/src/builtins/Transformers.jl index c1ddd62e..e14938a0 100644 --- a/src/builtins/Transformers.jl +++ b/src/builtins/Transformers.jl @@ -581,7 +581,7 @@ function MMI.fit(transformer::Standardizer, verbosity::Int, X) is_invertible = !transformer.count && !transformer.ordered_factor # initialize fitresult: - fitresult_given_feature = LittleDict{Symbol,Tuple{Float64,Float64}}() + fitresult_given_feature = LittleDict{Symbol,Tuple{AbstractFloat,AbstractFloat}}() # special univariate case: if is_univariate @@ -631,7 +631,6 @@ function MMI.fit(transformer::Standardizer, verbosity::Int, X) ) end end - fitresult_given_feature = Dict{Symbol,Tuple{Float64,Float64}}() isempty(cols_to_fit) && verbosity > -1 && @warn "No features to standarize." From ac82a225e7f7f651e00f71ffd86063ccf4c23b83 Mon Sep 17 00:00:00 2001 From: tiemvanderdeure Date: Wed, 1 May 2024 13:12:21 +0200 Subject: [PATCH 03/13] update codecov action to v3 --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9b5d87fb..7bff42a8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -42,6 +42,6 @@ jobs: - uses: julia-actions/julia-buildpkg@v1 - uses: julia-actions/julia-runtest@v1 - uses: julia-actions/julia-processcoverage@v1 - - uses: codecov/codecov-action@v1 + - uses: codecov/codecov-action@v3 with: file: lcov.info From 66303c491ed58379af3a214d1b06e24c12295289 Mon Sep 17 00:00:00 2001 From: tiemvanderdeure Date: Wed, 1 May 2024 13:19:11 +0200 Subject: [PATCH 04/13] eps of type std(v) --- src/builtins/Transformers.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/builtins/Transformers.jl b/src/builtins/Transformers.jl index e14938a0..b72ce890 100644 --- a/src/builtins/Transformers.jl +++ b/src/builtins/Transformers.jl @@ -503,9 +503,10 @@ mutable struct UnivariateStandardizer <: Unsupervised end function MMI.fit(transformer::UnivariateStandardizer, verbosity::Int, v::AbstractVector{T}) where T<:Real - std(v) > eps(Float64) || + stdv = std(v) + stdv > eps(typeof(stdv)) || @warn "Extremely small standard deviation encountered in standardization." - fitresult = (mean(v), std(v)) + fitresult = (mean(v), stdv) cache = nothing report = NamedTuple() return fitresult, cache, report From 65172090203e26b3f1379d53d3614c0545532ab5 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Thu, 9 May 2024 09:44:13 +1200 Subject: [PATCH 05/13] fix urls broken by MLJ transfer --- README.md | 18 +++++++++--------- src/builtins/Constant.jl | 2 +- src/builtins/ThresholdPredictors.jl | 2 +- src/builtins/Transformers.jl | 2 +- test/builtins/ThresholdPredictors.jl | 2 +- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index d19d17d9..060a0ace 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,13 @@ # MLJModels -[![Build Status](https://github.com/alan-turing-institute/MLJModels.jl/workflows/CI/badge.svg)](https://github.com/alan-turing-institute/MLJModels.jl/actions) +[![Build Status](https://github.com/JuliaAI/MLJModels.jl/workflows/CI/badge.svg)](https://github.com/JuliaAI/MLJModels.jl/actions) Repository of the "built-in" models available for use in the -[MLJ](https://github.com/alan-turing-institute/MLJ.jl) MLJ machine +[MLJ](https://github.com/JuliaAI/MLJ.jl) MLJ machine learning framework; and the home of the MLJ model registry. For instructions on integrating a new model with MLJ visit -[here](https://alan-turing-institute.github.io/MLJ.jl/dev/adding_models_for_general_use/) +[here](https://JuliaAI.github.io/MLJ.jl/dev/adding_models_for_general_use/) ### Contents @@ -19,7 +19,7 @@ For instructions on integrating a new model with MLJ visit ## Who is this repo for? General users of the MLJ machine learning platform should refer to -[MLJ home page](https://alan-turing-institute.github.io/MLJ.jl/dev/) +[MLJ home page](https://JuliaAI.github.io/MLJ.jl/dev/) for usage and installation instructions. MLJModels is a dependency of MLJ that the general user can ignore. @@ -32,7 +32,7 @@ model interfaces, whether they be: [`EvoTrees.jl`](https://github.com/Evovest/EvoTrees.jl/blob/master/src/MLJ.jl); or - implemented in a separate **interface package**, such as - [MLJDecisionTreeInterface.jl](https://github.com/alan-turing-institute/MLJDecisionTreeInterface.jl). + [MLJDecisionTreeInterface.jl](https://github.com/JuliaAI/MLJDecisionTreeInterface.jl). It also a place for developers to add models (mostly transformers) such as `OneHotEncoder`, that are exported for "built-in" use in @@ -63,16 +63,16 @@ MLJModels contains: models that can be called from MLJ using `@load`. Package developers can register new models by implementing the MLJ interface in their package and following [these - instructions](https://alan-turing-institute.github.io/MLJ.jl/dev/adding_models_for_general_use/). + instructions](https://JuliaAI.github.io/MLJ.jl/dev/adding_models_for_general_use/). ## Instructions for updating the MLJ model registry Generally model registration is performed by administrators. If you have an interface you would like registered, open an issue -[here](https://github.com/alan-turing-institute/MLJ.jl/issues). +[here](https://github.com/JuliaAI/MLJ.jl/issues). **Administrator instructions.** These are given in the `MLJModels.@update` document string. After registering the model, make a PR to MLJ -updating [this dictionary of model descriptors](https://github.com/alan-turing-institute/MLJ.jl/blob/dev/docs/ModelDescriptors.toml) -to ensure the new models appear in the right places in MLJ's [Model Browser](https://alan-turing-institute.github.io/MLJ.jl/dev/model_browser/#Model-Browser) +updating [this dictionary of model descriptors](https://github.com/JuliaAI/MLJ.jl/blob/dev/docs/ModelDescriptors.toml) +to ensure the new models appear in the right places in MLJ's [Model Browser](https://JuliaAI.github.io/MLJ.jl/dev/model_browser/#Model-Browser) diff --git a/src/builtins/Constant.jl b/src/builtins/Constant.jl index 1cb7b10b..886d1644 100644 --- a/src/builtins/Constant.jl +++ b/src/builtins/Constant.jl @@ -99,7 +99,7 @@ metadata_pkg.( DeterministicConstantRegressor, DeterministicConstantClassifier), name = "MLJModels", uuid = "d491faf4-2d78-11e9-2867-c94bc002c0b7", - url = "https://github.com/alan-turing-institute/MLJModels.jl", + url = "https://github.com/JuliaAI/MLJModels.jl", julia = true, license = "MIT", is_wrapper = false) diff --git a/src/builtins/ThresholdPredictors.jl b/src/builtins/ThresholdPredictors.jl index a64e08bf..6614f1c7 100644 --- a/src/builtins/ThresholdPredictors.jl +++ b/src/builtins/ThresholdPredictors.jl @@ -327,7 +327,7 @@ MMI.package_name(::Type{<:ThresholdUnion}) = "MLJModels" MMI.package_uuid(::Type{<:ThresholdUnion}) = "" MMI.is_wrapper(::Type{<:ThresholdUnion}) = true MMI.package_url(::Type{<:ThresholdUnion}) = - "https://github.com/alan-turing-institute/MLJModels.jl" + "https://github.com/JuliaAI/MLJModels.jl" for New in THRESHOLD_TYPE_EXS New_str = string(New) diff --git a/src/builtins/Transformers.jl b/src/builtins/Transformers.jl index b72ce890..f8fcb916 100644 --- a/src/builtins/Transformers.jl +++ b/src/builtins/Transformers.jl @@ -1087,7 +1087,7 @@ metadata_pkg.( UnivariateTimeTypeToContinuous, InteractionTransformer), package_name = "MLJModels", package_uuid = "d491faf4-2d78-11e9-2867-c94bc002c0b7", - package_url = "https://github.com/alan-turing-institute/MLJModels.jl", + package_url = "https://github.com/JuliaAI/MLJModels.jl", is_pure_julia = true, package_license = "MIT") diff --git a/test/builtins/ThresholdPredictors.jl b/test/builtins/ThresholdPredictors.jl index 16a81f25..51f2aaf9 100644 --- a/test/builtins/ThresholdPredictors.jl +++ b/test/builtins/ThresholdPredictors.jl @@ -306,7 +306,7 @@ end # predict(mach2, X) @testset "serialization for atomic models with non-persistent fitresults" begin - # https://github.com/alan-turing-institute/MLJ.jl/issues/1099 + # https://github.com/JuliaAI/MLJ.jl/issues/1099 X, y = (; x = rand(8)), categorical(collect("OXXXXOOX"), ordered=true) deterministic_classifier = BinaryThresholdPredictor( EphemeralClassifier(), From 320a29cbded365ada09183cbb5cba3dd00dd0721 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Fri, 31 May 2024 12:02:14 +1200 Subject: [PATCH 06/13] bump 0.16.18 --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 81420ba9..ec55839c 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "MLJModels" uuid = "d491faf4-2d78-11e9-2867-c94bc002c0b7" authors = ["Anthony D. Blaom "] -version = "0.16.17" +version = "0.16.18" [deps] CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" From 62490207572657d794e92bc9191e24ef494ccb51 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Fri, 31 May 2024 12:06:52 +1200 Subject: [PATCH 07/13] bump compat MLJModelInterface = "1.10" --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index ec55839c..afaba46a 100644 --- a/Project.toml +++ b/Project.toml @@ -37,7 +37,7 @@ Distributions = "0.25" InteractiveUtils = "<0.0.1, 1" LinearAlgebra = "<0.0.1, 1" Markdown = "<0.0.1, 1" -MLJModelInterface = "1.4" +MLJModelInterface = "1.10" OrderedCollections = "1.1" Parameters = "0.12" Pkg = "<0.0.1, 1" From f900755db49a18ede63ea7a45b30905183b255a8 Mon Sep 17 00:00:00 2001 From: OkonSamuel Date: Sun, 2 Jun 2024 20:59:47 +0100 Subject: [PATCH 08/13] move FeatureSelector model to FeatureSelection.jl pkg --- src/builtins/Transformers.jl | 198 ++++------------------------------ test/builtins/Transformers.jl | 77 ------------- test/model_search.jl | 1 - 3 files changed, 21 insertions(+), 255 deletions(-) diff --git a/src/builtins/Transformers.jl b/src/builtins/Transformers.jl index f8fcb916..fd00d43d 100644 --- a/src/builtins/Transformers.jl +++ b/src/builtins/Transformers.jl @@ -183,90 +183,7 @@ function MMI.fitted_params(::FillImputer, fr) filler_given_feature=filler_given_feature) end - -# # FOR FEATURE (COLUMN) SELECTION - -mutable struct FeatureSelector <: Unsupervised - # features to be selected; empty means all - features::Union{Vector{Symbol}, Function} - ignore::Bool # features to be ignored -end - -# keyword constructor -function FeatureSelector( - ; - features::Union{AbstractVector{Symbol}, Function}=Symbol[], - ignore::Bool=false -) - transformer = FeatureSelector(features, ignore) - message = MMI.clean!(transformer) - isempty(message) || throw(ArgumentError(message)) - return transformer -end - -function MMI.clean!(transformer::FeatureSelector) - err = "" - if ( - typeof(transformer.features) <: AbstractVector{Symbol} && - isempty(transformer.features) && - transformer.ignore - ) - err *= "Features to be ignored must be specified in features field." - end - return err -end - -function MMI.fit(transformer::FeatureSelector, verbosity::Int, X) - all_features = Tables.schema(X).names - - if transformer.features isa AbstractVector{Symbol} - if isempty(transformer.features) - features = collect(all_features) - else - features = if transformer.ignore - !issubset(transformer.features, all_features) && verbosity > -1 && - @warn("Excluding non-existent feature(s).") - filter!(all_features |> collect) do ftr - !(ftr in transformer.features) - end - else - issubset(transformer.features, all_features) || - throw(ArgumentError("Attempting to select non-existent feature(s).")) - transformer.features |> collect - end - end - else - features = if transformer.ignore - filter!(all_features |> collect) do ftr - !(transformer.features(ftr)) - end - else - filter!(all_features |> collect) do ftr - transformer.features(ftr) - end - end - isempty(features) && throw( - ArgumentError("No feature(s) selected.\n The specified Bool-valued"* - " callable with the `ignore` option set to `$(transformer.ignore)` "* - "resulted in an empty feature set for selection") - ) - end - - fitresult = features - report = NamedTuple() - return fitresult, nothing, report -end - -MMI.fitted_params(::FeatureSelector, fitresult) = (features_to_keep=fitresult,) - -function MMI.transform(::FeatureSelector, features, X) - all(e -> e in Tables.schema(X).names, features) || - throw(ArgumentError("Supplied frame does not admit previously selected features.")) - return MMI.selectcols(X, features) -end - - -# # UNIVARIATE DISCRETIZER +## UNIVARIATE DISCRETIZER # helper function: reftype(::CategoricalArray{<:Any,<:Any,R}) where R = R @@ -1027,9 +944,14 @@ function MMI.transform(transformer::ContinuousEncoder, fitresult, X) features_to_keep, hot_encoder, hot_fitresult = values(fitresult) # dump unseen or untransformable features: - selector = FeatureSelector(features=features_to_keep) - selector_fitresult, _, _ = MMI.fit(selector, 0, X) - X0 = transform(selector, selector_fitresult, X) + if !issubset(features_to_keep, MMI.schema(X).names) + throw( + ArgumentError( + "Supplied frame does not admit previously selected features." + ) + ) + end + X0 = MMI.selectcols(X, features_to_keep) # one-hot encode: X1 = transform(hot_encoder, hot_fitresult, X0) @@ -1080,11 +1002,18 @@ end # # METADATA FOR ALL BUILT-IN TRANSFORMERS metadata_pkg.( - (FeatureSelector, UnivariateStandardizer, - UnivariateDiscretizer, Standardizer, - UnivariateBoxCoxTransformer, UnivariateFillImputer, - OneHotEncoder, FillImputer, ContinuousEncoder, - UnivariateTimeTypeToContinuous, InteractionTransformer), + ( + UnivariateStandardizer, + UnivariateDiscretizer, + Standardizer, + UnivariateBoxCoxTransformer, + UnivariateFillImputer, + OneHotEncoder, + FillImputer, + ContinuousEncoder, + UnivariateTimeTypeToContinuous, + InteractionTransformer + ), package_name = "MLJModels", package_uuid = "d491faf4-2d78-11e9-2867-c94bc002c0b7", package_url = "https://github.com/JuliaAI/MLJModels.jl", @@ -1106,11 +1035,6 @@ metadata_model(FillImputer, output_scitype = Table, load_path = "MLJModels.FillImputer") -metadata_model(FeatureSelector, - input_scitype = Table, - output_scitype = Table, - load_path = "MLJModels.FeatureSelector") - metadata_model(UnivariateDiscretizer, input_scitype = AbstractVector{<:Continuous}, output_scitype = AbstractVector{<:OrderedFactor}, @@ -1371,86 +1295,6 @@ See also [`UnivariateFillImputer`](@ref). """ FillImputer -""" -$(MLJModelInterface.doc_header(FeatureSelector)) - -Use this model to select features (columns) of a table, usually as -part of a model `Pipeline`. - - -# Training data - -In MLJ or MLJBase, bind an instance `model` to data with - - mach = machine(model, X) - -where - -- `X`: any table of input features, where "table" is in the sense of Tables.jl - -Train the machine using `fit!(mach, rows=...)`. - - -# Hyper-parameters - -- `features`: one of the following, with the behavior indicated: - - - `[]` (empty, the default): filter out all features (columns) which - were not encountered in training - - - non-empty vector of feature names (symbols): keep only the - specified features (`ignore=false`) or keep only unspecified - features (`ignore=true`) - - - function or other callable: keep a feature if the callable returns - `true` on its name. For example, specifying - `FeatureSelector(features = name -> name in [:x1, :x3], ignore = - true)` has the same effect as `FeatureSelector(features = [:x1, - :x3], ignore = true)`, namely to select all features, with the - exception of `:x1` and `:x3`. - -- `ignore`: whether to ignore or keep specified `features`, as - explained above - - -# Operations - -- `transform(mach, Xnew)`: select features from the table `Xnew` as - specified by the model, taking features seen during training into - account, if relevant - - -# Fitted parameters - -The fields of `fitted_params(mach)` are: - -- `features_to_keep`: the features that will be selected - - -# Example - -``` -using MLJ - -X = (ordinal1 = [1, 2, 3], - ordinal2 = coerce(["x", "y", "x"], OrderedFactor), - ordinal3 = [10.0, 20.0, 30.0], - ordinal4 = [-20.0, -30.0, -40.0], - nominal = coerce(["Your father", "he", "is"], Multiclass)); - -selector = FeatureSelector(features=[:ordinal3, ], ignore=true); - -julia> transform(fit!(machine(selector, X)), X) -(ordinal1 = [1, 2, 3], - ordinal2 = CategoricalValue{Symbol,UInt32}["x", "y", "x"], - ordinal4 = [-20.0, -30.0, -40.0], - nominal = CategoricalValue{String,UInt32}["Your father", "he", "is"],) - -``` -""" -FeatureSelector - - """ $(MLJModelInterface.doc_header(Standardizer)) diff --git a/test/builtins/Transformers.jl b/test/builtins/Transformers.jl index 6244c796..df6397c0 100644 --- a/test/builtins/Transformers.jl +++ b/test/builtins/Transformers.jl @@ -13,83 +13,6 @@ import MLJBase _get(x) = CategoricalArrays.DataAPI.unwrap(x) -#### FEATURE SELECTOR #### - -@testset "Feat Selector" begin - N = 100 - X = (Zn = rand(N), - Crim = rand(N), - x3 = categorical(rand("YN", N)), - x4 = categorical(rand("YN", N))) - - # Test feature selection with `features=Symbol[]` - namesX = Tables.schema(X).names |> collect - selector = FeatureSelector() - f, = MLJBase.fit(selector, 1, X) - @test f == namesX - Xt = MLJBase.transform(selector, f, MLJBase.selectrows(X, 1:2)) - @test Set(Tables.schema(Xt).names) == Set(namesX) - @test length(Xt.Zn) == 2 - - # Test on selecting features if `features` keyword is defined - selector = FeatureSelector(features=[:Zn, :Crim]) - f, = MLJBase.fit(selector, 1, X) - @test MLJBase.transform(selector, f, MLJBase.selectrows(X, 1:2)) == - MLJBase.select(X, 1:2, [:Zn, :Crim]) - - # test on ignoring a feature, even if it's listed in the `features` - selector.ignore = true - f, = MLJBase.fit(selector, 1, X) - Xnew = MLJBase.transform(selector, f, X) - @test MLJBase.transform(selector, f, MLJBase.selectrows(X, 1:2)) == - MLJBase.select(X, 1:2, [:x3, :x4]) - - # test error about features selected or excluded in fit. - selector = FeatureSelector(features=[:x1, :mickey_mouse]) - @test_throws( - ArgumentError, - MLJBase.fit(selector, 1, X) - ) - selector.ignore = true - @test_logs( - (:warn, r"Excluding non-existent"), - MLJBase.fit(selector, 1, X) - ) - - # features must be specified if ignore=true - @test_throws ArgumentError FeatureSelector(ignore=true) - - # test logs for no features selected when using Bool-Callable function interface: - selector = FeatureSelector(features= x-> x == (:x1)) - @test_throws( - ArgumentError, - MLJBase.fit(selector, 1, X) - ) - selector.ignore = true - selector.features = x-> x in [:Zn, :Crim, :x3, :x4] - @test_throws( - ArgumentError, - MLJBase.fit(selector, 1, X) - ) - - # Test model Metadata - infos = MLJModels.info_dict(selector) - @test infos[:input_scitype] == MLJBase.Table - @test infos[:output_scitype] == MLJBase.Table -end - - -# To be added with FeatureSelectorRule X = (n1=["a", "b", "a"], n2=["g", "g", "g"], n3=[7, 8, 9], -# n4 =UInt8[3,5,10], o1=[4.5, 3.6, 4.0], ) -# MLJBase.schema(X) -# Xc = coerce(X, :n1=>Multiclass, :n2=>Multiclass) - -# t = Discretizer(features=[:o1, :n3, :n2, :n1]) -# @test Xt.features == [:o1, :n3, :n2, :n1] -# @test Xt.is_ordinal == [true, false, false, false] -# @test Xt.A == [512 1 1 1; 1 2 1 2; 256 3 1 1] - - #### UNIVARIATE DISCRETIZATION #### @testset "U-Discr" begin diff --git a/test/model_search.jl b/test/model_search.jl index 9295bd98..87b03dda 100644 --- a/test/model_search.jl +++ b/test/model_search.jl @@ -47,7 +47,6 @@ end DeterministicConstantRegressor, ConstantClassifier, ConstantRegressor, - FeatureSelector, OneHotEncoder, Standardizer, UnivariateBoxCoxTransformer, From af1ab32c50b1f5337c69fbb04d0320bc5e489a11 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Mon, 3 Jun 2024 11:32:43 +1200 Subject: [PATCH 09/13] steps to include wrappers in registry and as option in search --- src/MLJModels.jl | 10 +- src/builtins/ThresholdPredictors.jl | 10 +- src/metadata.jl | 10 +- src/model_search.jl | 43 +- src/registry/Metadata.toml | 1437 ++++++++++++++++++++++++--- src/registry/Models.toml | 7 +- src/registry/Project.toml | 5 + src/registry/src/Registry.jl | 2 +- src/registry/src/update.jl | 33 +- 9 files changed, 1391 insertions(+), 166 deletions(-) diff --git a/src/MLJModels.jl b/src/MLJModels.jl index 639df9f3..474532c9 100755 --- a/src/MLJModels.jl +++ b/src/MLJModels.jl @@ -59,6 +59,11 @@ nonmissing = nonmissingtype include("utilities.jl") +# load built-in models: +include("builtins/Constant.jl") +include("builtins/Transformers.jl") +include("builtins/ThresholdPredictors.jl") + Handle = NamedTuple{(:name, :pkg), Tuple{String,String}} (::Type{Handle})(name,string) = NamedTuple{(:name, :pkg)}((name, string)) @@ -79,11 +84,6 @@ include("loading.jl") include("registry/src/Registry.jl") using .Registry -# load built-in models: -include("builtins/Constant.jl") -include("builtins/Transformers.jl") -include("builtins/ThresholdPredictors.jl") - # finalize: include("init.jl") diff --git a/src/builtins/ThresholdPredictors.jl b/src/builtins/ThresholdPredictors.jl index 6614f1c7..18e6da9f 100644 --- a/src/builtins/ThresholdPredictors.jl +++ b/src/builtins/ThresholdPredictors.jl @@ -328,14 +328,8 @@ MMI.package_uuid(::Type{<:ThresholdUnion}) = "" MMI.is_wrapper(::Type{<:ThresholdUnion}) = true MMI.package_url(::Type{<:ThresholdUnion}) = "https://github.com/JuliaAI/MLJModels.jl" - -for New in THRESHOLD_TYPE_EXS - New_str = string(New) - quote - MMI.load_path(::Type{<:$New{M}}) where M = "MLJModels."*$New_str - end |> eval -end - +MMI.load_path(::Type{<:ThresholdUnion}) = "MLJModels.BinaryThresholdPredictor" +MMI.constructor(::Type{<:ThresholdUnion}) = BinaryThresholdPredictor for trait in [:supports_weights, :supports_class_weights, diff --git a/src/metadata.jl b/src/metadata.jl index 7d5c81f8..99477caf 100644 --- a/src/metadata.jl +++ b/src/metadata.jl @@ -26,7 +26,14 @@ function decode_dic(s::String) if s[1] == ':' return Symbol(s[2:end]) elseif s[1] == '`' && s[2] != '`' # to exclude strings starting with ``` - return eval(Meta.parse(s[2:end-1])) + ex = Meta.parse(s[2:end-1]) + # We need a `try` here because `constructor` trait generally returns a + # function not in the namespace, because pkg defining it has not been loaded. + return try + eval(ex) + catch + ex + end else return s end @@ -150,4 +157,3 @@ function model_traits_in_registry(info_given_handle) first_entry = info_given_handle[Handle("ConstantRegressor")] return keys(first_entry) |> collect end - diff --git a/src/model_search.jl b/src/model_search.jl index 46cacff3..b85e9e98 100644 --- a/src/model_search.jl +++ b/src/model_search.jl @@ -327,31 +327,27 @@ matching(realmodel::Model, args...) = matching(info(realmodel), args...) """ - models() + models(; wrappers=false) -List all models in the MLJ registry. Here and below *model* means the -registry metadata entry for a genuine model type (a proxy for types -whose defining code may not be loaded). +List all models in the MLJ registry. Here and below *model* means the registry metadata +entry for a genuine model type (a proxy for types whose defining code may not be +loaded). To include wrappers and other composite models, such as `TunedModel` and `Stack`, +specify `wrappers=true`. - models(filters..) + models(filters...; wrappers=false) List all models `m` for which `filter(m)` is true, for each `filter` in `filters`. - models(matching(X, y)) + models(matching(X, y); wrappers=false) List all supervised models compatible with training data `X`, `y`. - models(matching(X)) + models(matching(X); wrappers=false) List all unsupervised models compatible with training data `X`. -Excluded in the listings are the built-in model-wraps, like `EnsembleModel`, -`TunedModel`, and `IteratedModel`. - - - ### Example If @@ -364,7 +360,8 @@ predictions. See also: [`localmodels`](@ref). """ -function models(conditions...) +function models(conditions...; wrappers=false) + wrappers || (conditions = (conditions..., m-> !m.is_wrapper)) unsorted = filter(info.(keys(INFO_GIVEN_HANDLE))) do model all(c(model) for c in conditions) end @@ -372,30 +369,30 @@ function models(conditions...) end """ - models(needle::Union{AbstractString,Regex}) + models(needle::Union{AbstractString,Regex}; wrappers=false) List all models whole `name` or `docstring` matches a given `needle`. """ -function models(needle::Union{AbstractString,Regex}) +function models(needle::Union{AbstractString,Regex}; kwargs...) f = model -> occursin(needle, model.name) || occursin(needle, model.docstring) - return models(f) + return models(f; kwargs...) end # get the model types in top-level of given module's namespace: -function localmodeltypes(modl; toplevel=false) +function localmodeltypes(modl; toplevel=false, wrappers=false) ft = finaltypes(Model) return filter!(ft) do M name = MLJModelInterface.name(M) test1 = !toplevel || isdefined(modl, Symbol(name)) - !MLJModelInterface.is_wrapper(M) && test1 + (!MLJModelInterface.is_wrapper(M) || wrappers) && test1 end end """ - localmodels(; modl=Main) - localmodels(filters...; modl=Main) - localmodels(needle::Union{AbstractString,Regex}; modl=Main) + localmodels(; modl=Main, wrappers=false) + localmodels(filters...; modl=Main, wrappers=false) + localmodels(needle::Union{AbstractString,Regex}; modl=Main, wrappers=false) List all models currently available to the user from the module `modl` without importing a package, and which additional pass through the @@ -412,8 +409,8 @@ examples: See also [`models`](@ref), [`load_path`](@ref). """ -function localmodels(args...; modl=Main, toplevel=false) - modeltypes = localmodeltypes(modl, toplevel=toplevel) +function localmodels(args...; modl=Main, kwargs...) + modeltypes = localmodeltypes(modl; kwargs...) handles = map(modeltypes) do M Handle(MMI.name(M), MMI.package_name(M)) end diff --git a/src/registry/Metadata.toml b/src/registry/Metadata.toml index 842e25ce..5a35aef2 100644 --- a/src/registry/Metadata.toml +++ b/src/registry/Metadata.toml @@ -32,6 +32,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [BetaML.GaussianMixtureImputer] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Continuous}}}`" @@ -66,6 +67,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [BetaML.RandomForestClassifier] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}`" @@ -100,6 +102,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [BetaML.RandomForestImputer] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}`" @@ -134,6 +137,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [BetaML.PerceptronClassifier] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Infinite}}, AbstractMatrix{<:ScientificTypesBase.Infinite}}`" @@ -168,6 +172,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [BetaML.AutoEncoder] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" @@ -202,6 +207,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [BetaML.DecisionTreeRegressor] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}`" @@ -236,6 +242,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [BetaML.PegasosClassifier] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Infinite}}, AbstractMatrix{<:ScientificTypesBase.Infinite}}`" @@ -270,6 +277,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [BetaML.NeuralNetworkRegressor] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" @@ -304,6 +312,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [BetaML.KMeansClusterer] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -338,6 +347,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [BetaML.MultitargetGaussianMixtureRegressor] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Infinite}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Infinite}}}`" @@ -372,6 +382,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [BetaML.GaussianMixtureRegressor] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Infinite}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Infinite}}}`" @@ -406,6 +417,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [BetaML.MultitargetNeuralNetworkRegressor] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" @@ -440,6 +452,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [BetaML.DecisionTreeClassifier] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}`" @@ -474,6 +487,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [BetaML.GeneralImputer] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}`" @@ -508,6 +522,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [BetaML.NeuralNetworkClassifier] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" @@ -542,6 +557,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [BetaML.SimpleImputer] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Continuous}}}`" @@ -576,6 +592,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [BetaML.GaussianMixtureClusterer] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Continuous}}}`" @@ -610,6 +627,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [BetaML.KernelPerceptronClassifier] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Infinite}}, AbstractMatrix{<:ScientificTypesBase.Infinite}}`" @@ -644,6 +662,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [BetaML.KMedoidsClusterer] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -678,6 +697,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [CatBoost.CatBoostRegressor] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" @@ -712,6 +732,7 @@ ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [CatBoost.CatBoostClassifier] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" @@ -746,6 +767,7 @@ ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [NearestNeighborModels.KNNClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -780,6 +802,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [NearestNeighborModels.MultitargetKNNClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -814,6 +837,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [NearestNeighborModels.MultitargetKNNRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -848,6 +872,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [NearestNeighborModels.KNNRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -882,6 +907,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.ProbabilisticSGDClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -916,6 +942,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.RidgeCVClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -950,6 +977,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.LogisticClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -984,6 +1012,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.RandomForestRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.Continuous}}}`" @@ -1010,14 +1039,15 @@ ":prediction_type" = ":deterministic" ":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:n_estimators, :criterion, :max_depth, :min_samples_split, :min_samples_leaf, :min_weight_fraction_leaf, :max_features, :max_leaf_nodes, :min_impurity_decrease, :bootstrap, :oob_score, :n_jobs, :random_state, :verbose, :warm_start, :ccp_alpha, :max_samples)`" -":hyperparameter_types" = "`(\"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Union{Nothing, Float64, Int64, String}\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Int64\", \"Bool\", \"Float64\", \"Union{Nothing, Float64, Int64}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameters" = "`(:n_estimators, :criterion, :max_depth, :min_samples_split, :min_samples_leaf, :min_weight_fraction_leaf, :max_features, :max_leaf_nodes, :min_impurity_decrease, :bootstrap, :oob_score, :n_jobs, :random_state, :verbose, :warm_start, :ccp_alpha, :max_samples, :monotonic_cst)`" +":hyperparameter_types" = "`(\"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Union{Nothing, Float64, Int64, String}\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Int64\", \"Bool\", \"Float64\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Dict, Vector}\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.ElasticNetCVRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1052,6 +1082,7 @@ ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.PerceptronClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1086,6 +1117,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.MultiTaskLassoRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1120,6 +1152,7 @@ ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.LinearRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1154,6 +1187,7 @@ ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.HDBSCAN] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1188,6 +1222,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.DBSCAN] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1222,6 +1257,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.RidgeRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1256,6 +1292,7 @@ ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.LassoLarsICRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1275,21 +1312,22 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nLassoLarsICRegressor\n```\n\nA model type for constructing a Lasso model with LARS using BIC or AIC for model selection, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nLassoLarsICRegressor = @load LassoLarsICRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = LassoLarsICRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`LassoLarsICRegressor(criterion=...)`.\n# Hyper-parameters\n\n- `criterion = aic`\n\n- `fit_intercept = true`\n\n- `verbose = false`\n\n- `normalize = false`\n\n- `precompute = auto`\n\n- `max_iter = 500`\n\n- `eps = 2.220446049250313e-16`\n\n- `copy_X = true`\n\n- `positive = false`\n\n" +":docstring" = "```\nLassoLarsICRegressor\n```\n\nA model type for constructing a Lasso model with LARS using BIC or AIC for model selection, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nLassoLarsICRegressor = @load LassoLarsICRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = LassoLarsICRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`LassoLarsICRegressor(criterion=...)`.\n# Hyper-parameters\n\n- `criterion = aic`\n\n- `fit_intercept = true`\n\n- `verbose = false`\n\n- `precompute = auto`\n\n- `max_iter = 500`\n\n- `eps = 2.220446049250313e-16`\n\n- `copy_X = true`\n\n- `positive = false`\n\n" ":name" = "LassoLarsICRegressor" ":human_name" = "Lasso model with LARS using BIC or AIC for model selection" ":is_supervised" = "`true`" ":prediction_type" = ":deterministic" ":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:criterion, :fit_intercept, :verbose, :normalize, :precompute, :max_iter, :eps, :copy_X, :positive)`" -":hyperparameter_types" = "`(\"String\", \"Bool\", \"Union{Bool, Int64}\", \"Bool\", \"Union{Bool, String, AbstractMatrix}\", \"Int64\", \"Float64\", \"Bool\", \"Any\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameters" = "`(:criterion, :fit_intercept, :verbose, :precompute, :max_iter, :eps, :copy_X, :positive)`" +":hyperparameter_types" = "`(\"String\", \"Bool\", \"Union{Bool, Int64}\", \"Union{Bool, String, AbstractMatrix}\", \"Int64\", \"Float64\", \"Bool\", \"Any\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.ARDRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1309,14 +1347,14 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nARDRegressor\n```\n\nA model type for constructing a Bayesian ARD regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nARDRegressor = @load ARDRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = ARDRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`ARDRegressor(n_iter=...)`.\n# Hyper-parameters\n\n- `n_iter = 300`\n\n- `tol = 0.001`\n\n- `alpha_1 = 1.0e-6`\n\n- `alpha_2 = 1.0e-6`\n\n- `lambda_1 = 1.0e-6`\n\n- `lambda_2 = 1.0e-6`\n\n- `compute_score = false`\n\n- `threshold_lambda = 10000.0`\n\n- `fit_intercept = true`\n\n- `copy_X = true`\n\n- `verbose = false`\n\n" +":docstring" = "```\nARDRegressor\n```\n\nA model type for constructing a Bayesian ARD regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nARDRegressor = @load ARDRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = ARDRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`ARDRegressor(max_iter=...)`.\n# Hyper-parameters\n\n- `max_iter = 300`\n\n- `tol = 0.001`\n\n- `alpha_1 = 1.0e-6`\n\n- `alpha_2 = 1.0e-6`\n\n- `lambda_1 = 1.0e-6`\n\n- `lambda_2 = 1.0e-6`\n\n- `compute_score = false`\n\n- `threshold_lambda = 10000.0`\n\n- `fit_intercept = true`\n\n- `copy_X = true`\n\n- `verbose = false`\n\n" ":name" = "ARDRegressor" ":human_name" = "Bayesian ARD regressor" ":is_supervised" = "`true`" ":prediction_type" = ":deterministic" ":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:n_iter, :tol, :alpha_1, :alpha_2, :lambda_1, :lambda_2, :compute_score, :threshold_lambda, :fit_intercept, :copy_X, :verbose)`" +":hyperparameters" = "`(:max_iter, :tol, :alpha_1, :alpha_2, :lambda_1, :lambda_2, :compute_score, :threshold_lambda, :fit_intercept, :copy_X, :verbose)`" ":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Bool\", \"Float64\", \"Bool\", \"Bool\", \"Bool\")`" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" @@ -1324,6 +1362,7 @@ ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.SVMNuRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1358,6 +1397,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.RidgeClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1392,6 +1432,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.SGDRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1426,6 +1467,7 @@ ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.ComplementNBClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Count}}`" @@ -1460,6 +1502,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.HuberRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1494,6 +1537,7 @@ ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.SVMNuClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1528,6 +1572,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.GradientBoostingClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1562,6 +1607,7 @@ ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.GaussianProcessRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1596,6 +1642,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.SVMLinearRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1630,6 +1677,7 @@ ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.LarsRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1649,21 +1697,22 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nLarsRegressor\n```\n\nA model type for constructing a least angle regressor (LARS), based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nLarsRegressor = @load LarsRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = LarsRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`LarsRegressor(fit_intercept=...)`.\n# Hyper-parameters\n\n- `fit_intercept = true`\n\n- `verbose = false`\n\n- `normalize = false`\n\n- `precompute = auto`\n\n- `n_nonzero_coefs = 500`\n\n- `eps = 2.220446049250313e-16`\n\n- `copy_X = true`\n\n- `fit_path = true`\n\n" +":docstring" = "```\nLarsRegressor\n```\n\nA model type for constructing a least angle regressor (LARS), based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nLarsRegressor = @load LarsRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = LarsRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`LarsRegressor(fit_intercept=...)`.\n# Hyper-parameters\n\n- `fit_intercept = true`\n\n- `verbose = false`\n\n- `precompute = auto`\n\n- `n_nonzero_coefs = 500`\n\n- `eps = 2.220446049250313e-16`\n\n- `copy_X = true`\n\n- `fit_path = true`\n\n" ":name" = "LarsRegressor" ":human_name" = "least angle regressor (LARS)" ":is_supervised" = "`true`" ":prediction_type" = ":deterministic" ":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:fit_intercept, :verbose, :normalize, :precompute, :n_nonzero_coefs, :eps, :copy_X, :fit_path)`" -":hyperparameter_types" = "`(\"Bool\", \"Union{Bool, Int64}\", \"Bool\", \"Union{Bool, String, AbstractMatrix}\", \"Int64\", \"Float64\", \"Bool\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameters" = "`(:fit_intercept, :verbose, :precompute, :n_nonzero_coefs, :eps, :copy_X, :fit_path)`" +":hyperparameter_types" = "`(\"Bool\", \"Union{Bool, Int64}\", \"Union{Bool, String, AbstractMatrix}\", \"Int64\", \"Float64\", \"Bool\", \"Bool\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.MeanShift] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1698,6 +1747,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.HistGradientBoostingClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1724,14 +1774,15 @@ ":prediction_type" = ":probabilistic" ":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:loss, :learning_rate, :max_iter, :max_leaf_nodes, :max_depth, :min_samples_leaf, :l2_regularization, :max_bins, :categorical_features, :monotonic_cst, :warm_start, :early_stopping, :scoring, :validation_fraction, :n_iter_no_change, :tol, :random_state, :class_weight)`" -":hyperparameter_types" = "`(\"String\", \"Float64\", \"Int64\", \"Union{Nothing, Int64}\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Int64\", \"Union{Nothing, Vector}\", \"Union{Nothing, Dict, Vector}\", \"Bool\", \"Union{Bool, String}\", \"String\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Int64}\", \"Float64\", \"Any\", \"Any\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameters" = "`(:loss, :learning_rate, :max_iter, :max_leaf_nodes, :max_depth, :min_samples_leaf, :l2_regularization, :max_bins, :categorical_features, :monotonic_cst, :interaction_cst, :warm_start, :early_stopping, :scoring, :validation_fraction, :n_iter_no_change, :tol, :random_state, :class_weight)`" +":hyperparameter_types" = "`(\"String\", \"Float64\", \"Int64\", \"Union{Nothing, Int64}\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Int64\", \"Union{Nothing, Vector}\", \"Union{Nothing, Dict, Vector}\", \"Any\", \"Bool\", \"Union{Bool, String}\", \"String\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Int64}\", \"Float64\", \"Any\", \"Any\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.AdaBoostRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1766,6 +1817,7 @@ ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.AffinityPropagation] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1800,6 +1852,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.MultiTaskLassoCVRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1834,6 +1887,7 @@ ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.OrthogonalMatchingPursuitRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1853,21 +1907,22 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nOrthogonalMatchingPursuitRegressor\n```\n\nA model type for constructing a orthogonal matching pursuit regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nOrthogonalMatchingPursuitRegressor = @load OrthogonalMatchingPursuitRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = OrthogonalMatchingPursuitRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`OrthogonalMatchingPursuitRegressor(n_nonzero_coefs=...)`.\n# Hyper-parameters\n\n- `n_nonzero_coefs = nothing`\n\n- `tol = nothing`\n\n- `fit_intercept = true`\n\n- `normalize = false`\n\n- `precompute = auto`\n\n" +":docstring" = "```\nOrthogonalMatchingPursuitRegressor\n```\n\nA model type for constructing a orthogonal matching pursuit regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nOrthogonalMatchingPursuitRegressor = @load OrthogonalMatchingPursuitRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = OrthogonalMatchingPursuitRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`OrthogonalMatchingPursuitRegressor(n_nonzero_coefs=...)`.\n# Hyper-parameters\n\n- `n_nonzero_coefs = nothing`\n\n- `tol = nothing`\n\n- `fit_intercept = true`\n\n- `precompute = auto`\n\n" ":name" = "OrthogonalMatchingPursuitRegressor" ":human_name" = "orthogonal matching pursuit regressor" ":is_supervised" = "`true`" ":prediction_type" = ":deterministic" ":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:n_nonzero_coefs, :tol, :fit_intercept, :normalize, :precompute)`" -":hyperparameter_types" = "`(\"Union{Nothing, Int64}\", \"Union{Nothing, Float64}\", \"Bool\", \"Bool\", \"Union{Bool, String, AbstractMatrix}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +":hyperparameters" = "`(:n_nonzero_coefs, :tol, :fit_intercept, :precompute)`" +":hyperparameter_types" = "`(\"Union{Nothing, Int64}\", \"Union{Nothing, Float64}\", \"Bool\", \"Union{Bool, String, AbstractMatrix}\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.RidgeCVRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1902,6 +1957,7 @@ ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.PassiveAggressiveClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1936,6 +1992,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.SVMRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1970,6 +2027,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.BernoulliNBClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Count}}`" @@ -2004,6 +2062,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.GaussianNBClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2038,6 +2097,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.ExtraTreesClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2064,14 +2124,15 @@ ":prediction_type" = ":probabilistic" ":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:n_estimators, :criterion, :max_depth, :min_samples_split, :min_samples_leaf, :min_weight_fraction_leaf, :max_features, :max_leaf_nodes, :min_impurity_decrease, :bootstrap, :oob_score, :n_jobs, :random_state, :verbose, :warm_start, :class_weight)`" -":hyperparameter_types" = "`(\"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Union{Nothing, Float64, Int64, String}\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Int64\", \"Bool\", \"Any\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameters" = "`(:n_estimators, :criterion, :max_depth, :min_samples_split, :min_samples_leaf, :min_weight_fraction_leaf, :max_features, :max_leaf_nodes, :min_impurity_decrease, :bootstrap, :oob_score, :n_jobs, :random_state, :verbose, :warm_start, :class_weight, :ccp_alpha, :max_samples, :monotonic_cst)`" +":hyperparameter_types" = "`(\"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Union{Nothing, Float64, Int64, String}\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Int64\", \"Bool\", \"Any\", \"Float64\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Dict, Vector}\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.KMeans] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2106,6 +2167,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.MultiTaskElasticNetCVRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2140,6 +2202,7 @@ ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.LassoLarsCVRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2159,21 +2222,22 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nLassoLarsCVRegressor\n```\n\nA model type for constructing a Lasso model fit with least angle regression (LARS) with built-in cross-validation, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nLassoLarsCVRegressor = @load LassoLarsCVRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = LassoLarsCVRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`LassoLarsCVRegressor(fit_intercept=...)`.\n# Hyper-parameters\n\n- `fit_intercept = true`\n\n- `verbose = false`\n\n- `max_iter = 500`\n\n- `normalize = false`\n\n- `precompute = auto`\n\n- `cv = 5`\n\n- `max_n_alphas = 1000`\n\n- `n_jobs = nothing`\n\n- `eps = 2.220446049250313e-16`\n\n- `copy_X = true`\n\n- `positive = false`\n\n" +":docstring" = "```\nLassoLarsCVRegressor\n```\n\nA model type for constructing a Lasso model fit with least angle regression (LARS) with built-in cross-validation, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nLassoLarsCVRegressor = @load LassoLarsCVRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = LassoLarsCVRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`LassoLarsCVRegressor(fit_intercept=...)`.\n# Hyper-parameters\n\n- `fit_intercept = true`\n\n- `verbose = false`\n\n- `max_iter = 500`\n\n- `precompute = auto`\n\n- `cv = 5`\n\n- `max_n_alphas = 1000`\n\n- `n_jobs = nothing`\n\n- `eps = 2.220446049250313e-16`\n\n- `copy_X = true`\n\n- `positive = false`\n\n" ":name" = "LassoLarsCVRegressor" ":human_name" = "Lasso model fit with least angle regression (LARS) with built-in cross-validation" ":is_supervised" = "`true`" ":prediction_type" = ":deterministic" ":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:fit_intercept, :verbose, :max_iter, :normalize, :precompute, :cv, :max_n_alphas, :n_jobs, :eps, :copy_X, :positive)`" -":hyperparameter_types" = "`(\"Bool\", \"Union{Bool, Int64}\", \"Int64\", \"Bool\", \"Union{Bool, String, AbstractMatrix}\", \"Any\", \"Int64\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\", \"Any\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameters" = "`(:fit_intercept, :verbose, :max_iter, :precompute, :cv, :max_n_alphas, :n_jobs, :eps, :copy_X, :positive)`" +":hyperparameter_types" = "`(\"Bool\", \"Union{Bool, Int64}\", \"Int64\", \"Union{Bool, String, AbstractMatrix}\", \"Any\", \"Int64\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\", \"Any\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.OrthogonalMatchingPursuitCVRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2193,21 +2257,22 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nOrthogonalMatchingPursuitCVRegressor\n```\n\nA model type for constructing a orthogonal ,atching pursuit (OMP) model with built-in cross-validation, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nOrthogonalMatchingPursuitCVRegressor = @load OrthogonalMatchingPursuitCVRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = OrthogonalMatchingPursuitCVRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`OrthogonalMatchingPursuitCVRegressor(copy=...)`.\n# Hyper-parameters\n\n- `copy = true`\n\n- `fit_intercept = true`\n\n- `normalize = false`\n\n- `max_iter = nothing`\n\n- `cv = 5`\n\n- `n_jobs = 1`\n\n- `verbose = false`\n\n" +":docstring" = "```\nOrthogonalMatchingPursuitCVRegressor\n```\n\nA model type for constructing a orthogonal ,atching pursuit (OMP) model with built-in cross-validation, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nOrthogonalMatchingPursuitCVRegressor = @load OrthogonalMatchingPursuitCVRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = OrthogonalMatchingPursuitCVRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`OrthogonalMatchingPursuitCVRegressor(copy=...)`.\n# Hyper-parameters\n\n- `copy = true`\n\n- `fit_intercept = true`\n\n- `max_iter = nothing`\n\n- `cv = 5`\n\n- `n_jobs = 1`\n\n- `verbose = false`\n\n" ":name" = "OrthogonalMatchingPursuitCVRegressor" ":human_name" = "orthogonal ,atching pursuit (OMP) model with built-in cross-validation" ":is_supervised" = "`true`" ":prediction_type" = ":deterministic" ":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:copy, :fit_intercept, :normalize, :max_iter, :cv, :n_jobs, :verbose)`" -":hyperparameter_types" = "`(\"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Union{Nothing, Int64}\", \"Union{Bool, Int64}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameters" = "`(:copy, :fit_intercept, :max_iter, :cv, :n_jobs, :verbose)`" +":hyperparameter_types" = "`(\"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Union{Nothing, Int64}\", \"Union{Bool, Int64}\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.AdaBoostClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2242,6 +2307,7 @@ ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.PassiveAggressiveRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2276,6 +2342,7 @@ ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.BayesianRidgeRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2295,14 +2362,14 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nBayesianRidgeRegressor\n```\n\nA model type for constructing a Bayesian ridge regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nBayesianRidgeRegressor = @load BayesianRidgeRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = BayesianRidgeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`BayesianRidgeRegressor(n_iter=...)`.\n# Hyper-parameters\n\n- `n_iter = 300`\n\n- `tol = 0.001`\n\n- `alpha_1 = 1.0e-6`\n\n- `alpha_2 = 1.0e-6`\n\n- `lambda_1 = 1.0e-6`\n\n- `lambda_2 = 1.0e-6`\n\n- `compute_score = false`\n\n- `fit_intercept = true`\n\n- `copy_X = true`\n\n- `verbose = false`\n\n" +":docstring" = "```\nBayesianRidgeRegressor\n```\n\nA model type for constructing a Bayesian ridge regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nBayesianRidgeRegressor = @load BayesianRidgeRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = BayesianRidgeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`BayesianRidgeRegressor(max_iter=...)`.\n# Hyper-parameters\n\n- `max_iter = 300`\n\n- `tol = 0.001`\n\n- `alpha_1 = 1.0e-6`\n\n- `alpha_2 = 1.0e-6`\n\n- `lambda_1 = 1.0e-6`\n\n- `lambda_2 = 1.0e-6`\n\n- `compute_score = false`\n\n- `fit_intercept = true`\n\n- `copy_X = true`\n\n- `verbose = false`\n\n" ":name" = "BayesianRidgeRegressor" ":human_name" = "Bayesian ridge regressor" ":is_supervised" = "`true`" ":prediction_type" = ":deterministic" ":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:n_iter, :tol, :alpha_1, :alpha_2, :lambda_1, :lambda_2, :compute_score, :fit_intercept, :copy_X, :verbose)`" +":hyperparameters" = "`(:max_iter, :tol, :alpha_1, :alpha_2, :lambda_1, :lambda_2, :compute_score, :fit_intercept, :copy_X, :verbose)`" ":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Bool\", \"Bool\", \"Bool\", \"Bool\")`" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" @@ -2310,6 +2377,7 @@ ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.RANSACRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2344,6 +2412,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.BaggingClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2378,6 +2447,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.GaussianProcessClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2412,6 +2482,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.OPTICS] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2446,6 +2517,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.KNeighborsRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2480,6 +2552,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.HistGradientBoostingRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2506,14 +2579,15 @@ ":prediction_type" = ":deterministic" ":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:loss, :quantile, :learning_rate, :max_iter, :max_leaf_nodes, :max_depth, :min_samples_leaf, :l2_regularization, :max_bins, :categorical_features, :monotonic_cst, :warm_start, :early_stopping, :scoring, :validation_fraction, :n_iter_no_change, :tol, :random_state)`" -":hyperparameter_types" = "`(\"String\", \"Union{Nothing, Float64}\", \"Float64\", \"Int64\", \"Union{Nothing, Int64}\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Int64\", \"Union{Nothing, Vector}\", \"Union{Nothing, Dict, Vector}\", \"Bool\", \"Union{Bool, String}\", \"String\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Int64}\", \"Float64\", \"Any\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameters" = "`(:loss, :quantile, :learning_rate, :max_iter, :max_leaf_nodes, :max_depth, :min_samples_leaf, :l2_regularization, :max_bins, :categorical_features, :monotonic_cst, :interaction_cst, :warm_start, :early_stopping, :scoring, :validation_fraction, :n_iter_no_change, :tol, :random_state)`" +":hyperparameter_types" = "`(\"String\", \"Union{Nothing, Float64}\", \"Float64\", \"Int64\", \"Union{Nothing, Int64}\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Int64\", \"Union{Nothing, Vector}\", \"Union{Nothing, Dict, Vector}\", \"Any\", \"Bool\", \"Union{Bool, String}\", \"String\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Int64}\", \"Float64\", \"Any\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.MiniBatchKMeans] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2548,6 +2622,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.LassoCVRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2582,6 +2657,7 @@ ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.DummyRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2616,6 +2692,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.BisectingKMeans] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2650,6 +2727,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.LassoLarsRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2669,21 +2747,22 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nLassoLarsRegressor\n```\n\nA model type for constructing a Lasso model fit with least angle regression (LARS), based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nLassoLarsRegressor = @load LassoLarsRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = LassoLarsRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`LassoLarsRegressor(alpha=...)`.\n# Hyper-parameters\n\n- `alpha = 1.0`\n\n- `fit_intercept = true`\n\n- `verbose = false`\n\n- `normalize = false`\n\n- `precompute = auto`\n\n- `max_iter = 500`\n\n- `eps = 2.220446049250313e-16`\n\n- `copy_X = true`\n\n- `fit_path = true`\n\n- `positive = false`\n\n" +":docstring" = "```\nLassoLarsRegressor\n```\n\nA model type for constructing a Lasso model fit with least angle regression (LARS), based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nLassoLarsRegressor = @load LassoLarsRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = LassoLarsRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`LassoLarsRegressor(alpha=...)`.\n# Hyper-parameters\n\n- `alpha = 1.0`\n\n- `fit_intercept = true`\n\n- `verbose = false`\n\n- `precompute = auto`\n\n- `max_iter = 500`\n\n- `eps = 2.220446049250313e-16`\n\n- `copy_X = true`\n\n- `fit_path = true`\n\n- `positive = false`\n\n" ":name" = "LassoLarsRegressor" ":human_name" = "Lasso model fit with least angle regression (LARS)" ":is_supervised" = "`true`" ":prediction_type" = ":deterministic" ":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:alpha, :fit_intercept, :verbose, :normalize, :precompute, :max_iter, :eps, :copy_X, :fit_path, :positive)`" -":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Union{Bool, Int64}\", \"Bool\", \"Union{Bool, String, AbstractMatrix}\", \"Int64\", \"Float64\", \"Bool\", \"Bool\", \"Any\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameters" = "`(:alpha, :fit_intercept, :verbose, :precompute, :max_iter, :eps, :copy_X, :fit_path, :positive)`" +":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Union{Bool, Int64}\", \"Union{Bool, String, AbstractMatrix}\", \"Int64\", \"Float64\", \"Bool\", \"Bool\", \"Any\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.LarsCVRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2703,21 +2782,22 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nLarsCVRegressor\n```\n\nA model type for constructing a least angle regressor with built-in cross-validation, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nLarsCVRegressor = @load LarsCVRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = LarsCVRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`LarsCVRegressor(fit_intercept=...)`.\n# Hyper-parameters\n\n- `fit_intercept = true`\n\n- `verbose = false`\n\n- `max_iter = 500`\n\n- `normalize = false`\n\n- `precompute = auto`\n\n- `cv = 5`\n\n- `max_n_alphas = 1000`\n\n- `n_jobs = nothing`\n\n- `eps = 2.220446049250313e-16`\n\n- `copy_X = true`\n\n" +":docstring" = "```\nLarsCVRegressor\n```\n\nA model type for constructing a least angle regressor with built-in cross-validation, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nLarsCVRegressor = @load LarsCVRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = LarsCVRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`LarsCVRegressor(fit_intercept=...)`.\n# Hyper-parameters\n\n- `fit_intercept = true`\n\n- `verbose = false`\n\n- `max_iter = 500`\n\n- `precompute = auto`\n\n- `cv = 5`\n\n- `max_n_alphas = 1000`\n\n- `n_jobs = nothing`\n\n- `eps = 2.220446049250313e-16`\n\n- `copy_X = true`\n\n" ":name" = "LarsCVRegressor" ":human_name" = "least angle regressor with built-in cross-validation" ":is_supervised" = "`true`" ":prediction_type" = ":deterministic" ":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:fit_intercept, :verbose, :max_iter, :normalize, :precompute, :cv, :max_n_alphas, :n_jobs, :eps, :copy_X)`" -":hyperparameter_types" = "`(\"Bool\", \"Union{Bool, Int64}\", \"Int64\", \"Bool\", \"Union{Bool, String, AbstractMatrix}\", \"Any\", \"Int64\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameters" = "`(:fit_intercept, :verbose, :max_iter, :precompute, :cv, :max_n_alphas, :n_jobs, :eps, :copy_X)`" +":hyperparameter_types" = "`(\"Bool\", \"Union{Bool, Int64}\", \"Int64\", \"Union{Bool, String, AbstractMatrix}\", \"Any\", \"Int64\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.KNeighborsClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2752,6 +2832,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.SVMLinearClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2786,6 +2867,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.FeatureAgglomeration] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2812,7 +2894,7 @@ ":prediction_type" = ":unknown" ":abstract_type" = "`MLJModelInterface.Unsupervised`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] -":hyperparameters" = "`(:n_clusters, :memory, :connectivity, :affinity, :compute_full_tree, :linkage, :distance_threshold)`" +":hyperparameters" = "`(:n_clusters, :memory, :connectivity, :metric, :compute_full_tree, :linkage, :distance_threshold)`" ":hyperparameter_types" = "`(\"Int64\", \"Any\", \"Any\", \"Any\", \"Union{Bool, String}\", \"String\", \"Union{Nothing, Float64}\")`" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" @@ -2820,6 +2902,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.DummyClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2854,6 +2937,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.BaggingRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2888,6 +2972,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.BayesianQDA] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2922,6 +3007,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.BayesianLDA] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2956,6 +3042,7 @@ ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.SGDClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2990,6 +3077,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.TheilSenRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -3024,6 +3112,7 @@ ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.SpectralClustering] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -3058,6 +3147,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.Birch] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -3092,6 +3182,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.AgglomerativeClustering] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -3118,7 +3209,7 @@ ":prediction_type" = ":unknown" ":abstract_type" = "`MLJModelInterface.Unsupervised`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params"] -":hyperparameters" = "`(:n_clusters, :affinity, :memory, :connectivity, :compute_full_tree, :linkage, :distance_threshold)`" +":hyperparameters" = "`(:n_clusters, :metric, :memory, :connectivity, :compute_full_tree, :linkage, :distance_threshold)`" ":hyperparameter_types" = "`(\"Int64\", \"String\", \"Any\", \"Any\", \"Union{Bool, String}\", \"String\", \"Union{Nothing, Float64}\")`" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" @@ -3126,6 +3217,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.ElasticNetRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -3160,6 +3252,7 @@ ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.RandomForestClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.Continuous}}}`" @@ -3186,14 +3279,15 @@ ":prediction_type" = ":probabilistic" ":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:n_estimators, :criterion, :max_depth, :min_samples_split, :min_samples_leaf, :min_weight_fraction_leaf, :max_features, :max_leaf_nodes, :min_impurity_decrease, :bootstrap, :oob_score, :n_jobs, :random_state, :verbose, :warm_start, :class_weight, :ccp_alpha, :max_samples)`" -":hyperparameter_types" = "`(\"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Union{Nothing, Float64, Int64, String}\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Int64\", \"Bool\", \"Any\", \"Float64\", \"Union{Nothing, Float64, Int64}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameters" = "`(:n_estimators, :criterion, :max_depth, :min_samples_split, :min_samples_leaf, :min_weight_fraction_leaf, :max_features, :max_leaf_nodes, :min_impurity_decrease, :bootstrap, :oob_score, :n_jobs, :random_state, :verbose, :warm_start, :class_weight, :ccp_alpha, :max_samples, :monotonic_cst)`" +":hyperparameter_types" = "`(\"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Union{Nothing, Float64, Int64, String}\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Int64\", \"Bool\", \"Any\", \"Float64\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Dict, Vector}\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.LogisticCVClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -3228,6 +3322,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.MultiTaskElasticNetRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -3262,6 +3357,7 @@ ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.ExtraTreesRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -3288,14 +3384,15 @@ ":prediction_type" = ":deterministic" ":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:n_estimators, :criterion, :max_depth, :min_samples_split, :min_samples_leaf, :min_weight_fraction_leaf, :max_features, :max_leaf_nodes, :min_impurity_decrease, :bootstrap, :oob_score, :n_jobs, :random_state, :verbose, :warm_start)`" -":hyperparameter_types" = "`(\"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Union{Nothing, Float64, Int64, String}\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Int64\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameters" = "`(:n_estimators, :criterion, :max_depth, :min_samples_split, :min_samples_leaf, :min_weight_fraction_leaf, :max_features, :max_leaf_nodes, :min_impurity_decrease, :bootstrap, :oob_score, :n_jobs, :random_state, :verbose, :warm_start, :ccp_alpha, :max_samples, :monotonic_cst)`" +":hyperparameter_types" = "`(\"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Union{Nothing, Float64, Int64, String}\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Int64\", \"Bool\", \"Float64\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Dict, Vector}\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.LassoRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -3330,6 +3427,7 @@ ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.MultinomialNBClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Count}}`" @@ -3364,6 +3462,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.GradientBoostingRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -3398,6 +3497,7 @@ ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.SVMClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -3432,6 +3532,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [OutlierDetectionNeighbors.ABODDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -3466,6 +3567,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [OutlierDetectionNeighbors.DNNDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -3500,6 +3602,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [OutlierDetectionNeighbors.LOFDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -3534,6 +3637,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [OutlierDetectionNeighbors.KNNDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -3568,6 +3672,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [OutlierDetectionNeighbors.COFDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -3602,6 +3707,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [SIRUS.StableRulesClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}`" @@ -3636,6 +3742,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [SIRUS.StableRulesRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}`" @@ -3670,6 +3777,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [SIRUS.StableForestClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}`" @@ -3704,6 +3812,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [SIRUS.StableForestRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}`" @@ -3738,6 +3847,77 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" + +[MLJIteration.DeterministicIteratedModel] +":input_scitype" = "`ScientificTypesBase.Unknown`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_pure_julia" = "`false`" +":package_name" = "MLJIteration" +":package_license" = "MIT" +":load_path" = "MLJIteration.IteratedModel" +":package_uuid" = "614be32b-d00c-4edb-bd02-1eb411ab5e55" +":package_url" = "https://github.com/JuliaAI/MLJIteration.jl" +":is_wrapper" = "`true`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "```\nIteratedModel(model=nothing,\n controls=Any[IterationControl.Step(1), EarlyStopping.Patience(5), EarlyStopping.GL(2.0), EarlyStopping.TimeLimit(Dates.Millisecond(108000)), EarlyStopping.InvalidValue()],\n retrain=false,\n resampling=Holdout(),\n measure=nothing,\n weights=nothing,\n class_weights=nothing,\n operation=predict,\n verbosity=1,\n check_measure=true,\n iteration_parameter=nothing,\n cache=true)\n```\n\nWrap the specified `model <: Supervised` in the specified iteration `controls`. Training a machine bound to the wrapper iterates a corresonding machine bound to `model`. Here `model` should support iteration.\n\nTo list all controls, do `MLJIteration.CONTROLS`. Controls are summarized at [https://alan-turing-institute.github.io/MLJ.jl/dev/getting_started/](https://alan-turing-institute.github.io/MLJ.jl/dev/controlling_iterative_models/) but query individual doc-strings for details and advanced options. For creating your own controls, refer to the documentation just cited.\n\nTo make out-of-sample losses available to the controls, the machine bound to `model` is only trained on part of the data, as iteration proceeds. See details on training below. Specify `retrain=true` to ensure the model is retrained on *all* available data, using the same number of iterations, once controlled iteration has stopped.\n\nSpecify `resampling=nothing` if all data is to be used for controlled iteration, with each out-of-sample loss replaced by the most recent training loss, assuming this is made available by the model (`supports_training_losses(model) == true`). Otherwise, `resampling` must have type `Holdout` (eg, `Holdout(fraction_train=0.8, rng=123)`).\n\nAssuming `retrain=true` or `resampling=nothing`, `iterated_model` behaves exactly like the original `model` but with the iteration parameter automatically selected. If `retrain=false` (default) and `resampling` is not `nothing`, then `iterated_model` behaves like the original model trained on a subset of the provided data.\n\nControlled iteration can be continued with new `fit!` calls (warm restart) by mutating a control, or by mutating the iteration parameter of `model`, which is otherwise ignored.\n\n### Training\n\nGiven an instance `iterated_model` of `IteratedModel`, calling `fit!(mach)` on a machine `mach = machine(iterated_model, data...)` performs the following actions:\n\n * Assuming `resampling !== nothing`, the `data` is split into *train* and *test* sets, according to the specified `resampling` strategy, which must have type `Holdout`.\n * A clone of the wrapped model, `iterated_model.model`, is bound to the train data in an internal machine, `train_mach`. If `resampling === nothing`, all data is used instead. This machine is the object to which controls are applied. For example, `Callback(fitted_params |> print)` will print the value of `fitted_params(train_mach)`.\n * The iteration parameter of the clone is set to `0`.\n * The specified `controls` are repeatedly applied to `train_mach` in sequence, until one of the controls triggers a stop. Loss-based controls (eg, `Patience()`, `GL()`, `Threshold(0.001)`) use an out-of-sample loss, obtained by applying `measure` to predictions and the test target values. (Specifically, these predictions are those returned by `operation(train_mach)`.) If `resampling === nothing` then the most recent training loss is used instead. Some controls require *both* out-of-sample and training losses (eg, `PQ()`).\n * Once a stop has been triggered, a clone of `model` is bound to all `data` in a machine called `mach_production` below, unless `retrain == false` or `resampling === nothing`, in which case `mach_production` coincides with `train_mach`.\n\n### Prediction\n\nCalling `predict(mach, Xnew)` returns `predict(mach_production, Xnew)`. Similar similar statements hold for `predict_mean`, `predict_mode`, `predict_median`.\n\n### Controls\n\nA control is permitted to mutate the fields (hyper-parameters) of `train_mach.model` (the clone of `model`). For example, to mutate a learning rate one might use the control\n\n```\nCallback(mach -> mach.model.eta = 1.05*mach.model.eta)\n```\n\nHowever, unless `model` supports warm restarts with respect to changes in that parameter, this will trigger retraining of `train_mach` from scratch, with a different training outcome, which is not recommended.\n\n### Warm restarts\n\nIf `iterated_model` is mutated and `fit!(mach)` is called again, then a warm restart is attempted if the only parameters to change are `model` or `controls` or both.\n\nSpecifically, `train_mach.model` is mutated to match the current value of `iterated_model.model` and the iteration parameter of the latter is updated to the last value used in the preceding `fit!(mach)` call. Then repeated application of the (updated) controls begin anew.\n" +":name" = "DeterministicIteratedModel" +":human_name" = "deterministic iterated model" +":is_supervised" = "`true`" +":prediction_type" = ":deterministic" +":abstract_type" = "`MLJModelInterface.Deterministic`" +":implemented_methods" = [] +":hyperparameters" = "`(:model, :controls, :resampling, :measure, :weights, :class_weights, :operation, :retrain, :check_measure, :iteration_parameter, :cache)`" +":hyperparameter_types" = "`(\"MLJModelInterface.Deterministic\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Vector{<:Real}}\", \"Union{Nothing, Dict{Any, <:Real}}\", \"Any\", \"Bool\", \"Bool\", \"Union{Nothing, Expr, Symbol}\", \"Bool\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`IteratedModel`" + +[MLJIteration.ProbabilisticIteratedModel] +":input_scitype" = "`ScientificTypesBase.Unknown`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_pure_julia" = "`false`" +":package_name" = "MLJIteration" +":package_license" = "MIT" +":load_path" = "MLJIteration.IteratedModel" +":package_uuid" = "614be32b-d00c-4edb-bd02-1eb411ab5e55" +":package_url" = "https://github.com/JuliaAI/MLJIteration.jl" +":is_wrapper" = "`true`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "```\nIteratedModel(model=nothing,\n controls=Any[IterationControl.Step(1), EarlyStopping.Patience(5), EarlyStopping.GL(2.0), EarlyStopping.TimeLimit(Dates.Millisecond(108000)), EarlyStopping.InvalidValue()],\n retrain=false,\n resampling=Holdout(),\n measure=nothing,\n weights=nothing,\n class_weights=nothing,\n operation=predict,\n verbosity=1,\n check_measure=true,\n iteration_parameter=nothing,\n cache=true)\n```\n\nWrap the specified `model <: Supervised` in the specified iteration `controls`. Training a machine bound to the wrapper iterates a corresonding machine bound to `model`. Here `model` should support iteration.\n\nTo list all controls, do `MLJIteration.CONTROLS`. Controls are summarized at [https://alan-turing-institute.github.io/MLJ.jl/dev/getting_started/](https://alan-turing-institute.github.io/MLJ.jl/dev/controlling_iterative_models/) but query individual doc-strings for details and advanced options. For creating your own controls, refer to the documentation just cited.\n\nTo make out-of-sample losses available to the controls, the machine bound to `model` is only trained on part of the data, as iteration proceeds. See details on training below. Specify `retrain=true` to ensure the model is retrained on *all* available data, using the same number of iterations, once controlled iteration has stopped.\n\nSpecify `resampling=nothing` if all data is to be used for controlled iteration, with each out-of-sample loss replaced by the most recent training loss, assuming this is made available by the model (`supports_training_losses(model) == true`). Otherwise, `resampling` must have type `Holdout` (eg, `Holdout(fraction_train=0.8, rng=123)`).\n\nAssuming `retrain=true` or `resampling=nothing`, `iterated_model` behaves exactly like the original `model` but with the iteration parameter automatically selected. If `retrain=false` (default) and `resampling` is not `nothing`, then `iterated_model` behaves like the original model trained on a subset of the provided data.\n\nControlled iteration can be continued with new `fit!` calls (warm restart) by mutating a control, or by mutating the iteration parameter of `model`, which is otherwise ignored.\n\n### Training\n\nGiven an instance `iterated_model` of `IteratedModel`, calling `fit!(mach)` on a machine `mach = machine(iterated_model, data...)` performs the following actions:\n\n * Assuming `resampling !== nothing`, the `data` is split into *train* and *test* sets, according to the specified `resampling` strategy, which must have type `Holdout`.\n * A clone of the wrapped model, `iterated_model.model`, is bound to the train data in an internal machine, `train_mach`. If `resampling === nothing`, all data is used instead. This machine is the object to which controls are applied. For example, `Callback(fitted_params |> print)` will print the value of `fitted_params(train_mach)`.\n * The iteration parameter of the clone is set to `0`.\n * The specified `controls` are repeatedly applied to `train_mach` in sequence, until one of the controls triggers a stop. Loss-based controls (eg, `Patience()`, `GL()`, `Threshold(0.001)`) use an out-of-sample loss, obtained by applying `measure` to predictions and the test target values. (Specifically, these predictions are those returned by `operation(train_mach)`.) If `resampling === nothing` then the most recent training loss is used instead. Some controls require *both* out-of-sample and training losses (eg, `PQ()`).\n * Once a stop has been triggered, a clone of `model` is bound to all `data` in a machine called `mach_production` below, unless `retrain == false` or `resampling === nothing`, in which case `mach_production` coincides with `train_mach`.\n\n### Prediction\n\nCalling `predict(mach, Xnew)` returns `predict(mach_production, Xnew)`. Similar similar statements hold for `predict_mean`, `predict_mode`, `predict_median`.\n\n### Controls\n\nA control is permitted to mutate the fields (hyper-parameters) of `train_mach.model` (the clone of `model`). For example, to mutate a learning rate one might use the control\n\n```\nCallback(mach -> mach.model.eta = 1.05*mach.model.eta)\n```\n\nHowever, unless `model` supports warm restarts with respect to changes in that parameter, this will trigger retraining of `train_mach` from scratch, with a different training outcome, which is not recommended.\n\n### Warm restarts\n\nIf `iterated_model` is mutated and `fit!(mach)` is called again, then a warm restart is attempted if the only parameters to change are `model` or `controls` or both.\n\nSpecifically, `train_mach.model` is mutated to match the current value of `iterated_model.model` and the iteration parameter of the latter is updated to the last value used in the preceding `fit!(mach)` call. Then repeated application of the (updated) controls begin anew.\n" +":name" = "ProbabilisticIteratedModel" +":human_name" = "probabilistic iterated model" +":is_supervised" = "`true`" +":prediction_type" = ":probabilistic" +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":implemented_methods" = [] +":hyperparameters" = "`(:model, :controls, :resampling, :measure, :weights, :class_weights, :operation, :retrain, :check_measure, :iteration_parameter, :cache)`" +":hyperparameter_types" = "`(\"MLJModelInterface.Probabilistic\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, AbstractVector{<:Real}}\", \"Union{Nothing, Dict{Any, <:Real}}\", \"Any\", \"Bool\", \"Bool\", \"Union{Nothing, Expr, Symbol}\", \"Bool\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`IteratedModel`" [PartialLeastSquaresRegressor.KPLSRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -3772,6 +3952,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [PartialLeastSquaresRegressor.PLSRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -3806,6 +3987,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [PartitionedLS.PartLS] ":input_scitype" = "`Union{ScientificTypesBase.Table{AbstractVector{ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" @@ -3825,7 +4007,7 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nPartLS\n```\n\nA model type for fitting a partitioned least squares model to data. Both an MLJ and native interfacew are provided.\n\n# MLJ Interface\n\nFrom MLJ, the type can be imported using\n\nPartLS = @load PartLS pkg=PartitionedLS\n\nConstruct an instance with default hyper-parameters using the syntax `model = PartLS()`. Provide keyword arguments to override hyper-parameter defaults, as in `model = PartLS(P=...)`.\n\n## Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any matrix or table with `Continuous` element scitype. Check column scitypes of a table `X` with `schema(X)`.\n\nTrain the machine using `fit!(mach)`.\n\n## Hyper-parameters\n\n * `Optimizer`: the optimization algorithm to use. It can be `Opt`, `Alt` or `BnB` (names exported by `PartitionedLS.jl`).\n * `P`: the partition matrix. It is a binary matrix where each row corresponds to a partition and each column corresponds to a feature. The element `P_{k, i} = 1` if feature `i` belongs to partition `k`.\n * `η`: the regularization parameter. It controls the strength of the regularization.\n * `ϵ`: the tolerance parameter. It is used to determine when the Alt optimization algorithm has converged. Only used by the `Alt` algorithm.\n * `T`: the maximum number of iterations. It is used to determine when to stop the Alt optimization algorithm has converged. Only used by the `Alt` algorithm.\n * `rng`: the random number generator to use.\n\n * If `nothing`, the global random number generator `rand` is used.\n * If an integer, the global number generator `rand` is used after seeding it with the given integer.\n * If an object of type `AbstractRNG`, the given random number generator is used.\n\n## Operations\n\n * `predict(mach, Xnew)`: return the predictions of the model on new data `Xnew`\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `α`: the values of the α variables. For each partition `k`, it holds the values of the α variables are such that $\\sum_{i \\in P_k} \\alpha_{k} = 1$.\n * `β`: the values of the β variables. For each partition `k`, `β_k` is the coefficient that multiplies the features in the k-th partition.\n * `t`: the intercept term of the model.\n * `P`: the partition matrix. It is a binary matrix where each row corresponds to a partition and each column corresponds to a feature. The element `P_{k, i} = 1` if feature `i` belongs to partition `k`.\n\n## Examples\n\n```julia\nPartLS = @load PartLS pkg=PartitionedLS\n\nX = [[1. 2. 3.];\n [3. 3. 4.];\n [8. 1. 3.];\n [5. 3. 1.]]\n\ny = [1.;\n 1.;\n 2.;\n 3.]\n\nP = [[1 0];\n [1 0];\n [0 1]]\n\n\nmodel = PartLS(P=P)\nmach = machine(model, X, y) |> fit!\n\n# predictions on the training set:\npredict(mach, X)\n\n```\n\n# Native Interface\n\n```\nusing PartitionedLS\n\nX = [[1. 2. 3.];\n [3. 3. 4.];\n [8. 1. 3.];\n [5. 3. 1.]]\n\ny = [1.;\n 1.;\n 2.;\n 3.]\n\nP = [[1 0];\n [1 0];\n [0 1]]\n\n\n# fit using the optimal algorithm\nresult = fit(Opt, X, y, P, η = 0.0)\ny_hat = predict(result.model, X)\n```\n\nFor other `fit` keyword options, refer to the \"Hyper-parameters\" section for the MLJ interface.\n" +":docstring" = "```\nPartLS\n```\n\nA model type for fitting a partitioned least squares model to data. Both an MLJ and native interface are provided.\n\n# MLJ Interface\n\nFrom MLJ, the type can be imported using\n\n```\nPartLS = @load PartLS pkg=PartitionedLS\n```\n\nConstruct an instance with default hyper-parameters using the syntax `model = PartLS()`. Provide keyword arguments to override hyper-parameter defaults, as in `model = PartLS(P=...)`.\n\n## Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any matrix or table with `Continuous` element scitype. Check column scitypes of a table `X` with `schema(X)`.\n * `y`: any vector with `Continuous` element scitype. Check scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach)`.\n\n## Hyper-parameters\n\n * `Optimizer`: the optimization algorithm to use. It can be `Opt`, `Alt` or `BnB` (names exported by `PartitionedLS.jl`).\n * `P`: the partition matrix. It is a binary matrix where each row corresponds to a partition and each column corresponds to a feature. The element `P_{k, i} = 1` if feature `i` belongs to partition `k`.\n * `η`: the regularization parameter. It controls the strength of the regularization.\n * `ϵ`: the tolerance parameter. It is used to determine when the Alt optimization algorithm has converged. Only used by the `Alt` algorithm.\n * `T`: the maximum number of iterations. It is used to determine when to stop the Alt optimization algorithm has converged. Only used by the `Alt` algorithm.\n * `rng`: the random number generator to use.\n\n * If `nothing`, the global random number generator `rand` is used.\n * If an integer, the global number generator `rand` is used after seeding it with the given integer.\n * If an object of type `AbstractRNG`, the given random number generator is used.\n\n## Operations\n\n * `predict(mach, Xnew)`: return the predictions of the model on new data `Xnew`\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `α`: the values of the α variables. For each partition `k`, it holds the values of the α variables are such that $\\sum_{i \\in P_k} \\alpha_{k} = 1$.\n * `β`: the values of the β variables. For each partition `k`, `β_k` is the coefficient that multiplies the features in the k-th partition.\n * `t`: the intercept term of the model.\n * `P`: the partition matrix. It is a binary matrix where each row corresponds to a partition and each column corresponds to a feature. The element `P_{k, i} = 1` if feature `i` belongs to partition `k`.\n\n## Examples\n\n```julia\nPartLS = @load PartLS pkg=PartitionedLS\n\nX = [[1. 2. 3.];\n [3. 3. 4.];\n [8. 1. 3.];\n [5. 3. 1.]]\n\ny = [1.;\n 1.;\n 2.;\n 3.]\n\nP = [[1 0];\n [1 0];\n [0 1]]\n\n\nmodel = PartLS(P=P)\nmach = machine(model, X, y) |> fit!\n\n# predictions on the training set:\npredict(mach, X)\n\n```\n\n# Native Interface\n\n```\nusing PartitionedLS\n\nX = [[1. 2. 3.];\n [3. 3. 4.];\n [8. 1. 3.];\n [5. 3. 1.]]\n\ny = [1.;\n 1.;\n 2.;\n 3.]\n\nP = [[1 0];\n [1 0];\n [0 1]]\n\n\n# fit using the optimal algorithm\nresult = fit(Opt, X, y, P, η = 0.0)\ny_hat = predict(result.model, X)\n```\n\nFor other `fit` keyword options, refer to the \"Hyper-parameters\" section for the MLJ interface.\n" ":name" = "PartLS" ":human_name" = "part ls" ":is_supervised" = "`true`" @@ -3840,6 +4022,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJLinearModels.QuantileRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -3874,6 +4057,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJLinearModels.LogisticClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -3908,6 +4092,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJLinearModels.MultinomialClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -3942,6 +4127,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJLinearModels.LADRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -3976,6 +4162,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJLinearModels.RidgeRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -4010,6 +4197,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJLinearModels.RobustRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -4044,6 +4232,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJLinearModels.ElasticNetRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -4078,6 +4267,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJLinearModels.LinearRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -4112,6 +4302,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJLinearModels.LassoRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -4146,6 +4337,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJLinearModels.HuberRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -4180,6 +4372,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [ParallelKMeans.KMeans] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -4214,6 +4407,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [NaiveBayes.GaussianNBClassifier] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -4247,41 +4441,568 @@ ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`nothing`" + +[NaiveBayes.MultinomialNBClassifier] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Count}}, AbstractMatrix{<:ScientificTypesBase.Count}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Count}}, AbstractMatrix{<:ScientificTypesBase.Count}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_pure_julia" = "`true`" +":package_name" = "NaiveBayes" +":package_license" = "unknown" +":load_path" = "MLJNaiveBayesInterface.MultinomialNBClassifier" +":package_uuid" = "9bbee03b-0db5-5f46-924f-b5c9c21b8c60" +":package_url" = "https://github.com/dfdx/NaiveBayes.jl" +":is_wrapper" = "`false`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "```\nMultinomialNBClassifier\n```\n\nA model type for constructing a multinomial naive Bayes classifier, based on [NaiveBayes.jl](https://github.com/dfdx/NaiveBayes.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultinomialNBClassifier = @load MultinomialNBClassifier pkg=NaiveBayes\n```\n\nDo `model = MultinomialNBClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultinomialNBClassifier(alpha=...)`.\n\nThe [multinomial naive Bayes classifier](https://en.wikipedia.org/wiki/Naive_Bayes_classifier#Multinomial_naive_Bayes) is often applied when input features consist of a counts (scitype `Count`) and when observations for a fixed target class are generated from a multinomial distribution with fixed probability vector, but whose sample length varies from observation to observation. For example, features might represent word counts in text documents being classified by sentiment.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Count`; check the column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Finite`; check the scitype with `schema(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `alpha=1`: Lindstone smoothing in estimation of multinomial probability vectors from training histograms (default corresponds to Laplacian smoothing).\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n * `predict_mode(mach, Xnew)`: Return the mode of above predictions.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `c_counts`: A dictionary containing the observed count of each input class.\n * `x_counts`: A dictionary containing the categorical counts of each input class.\n * `x_totals`: The sum of each count (input feature), ungrouped.\n * `n_obs`: The total number of observations in the training data.\n\n# Examples\n\n```\nusing MLJ\nimport TextAnalysis\n\nCountTransformer = @load CountTransformer pkg=MLJText\nMultinomialNBClassifier = @load MultinomialNBClassifier pkg=NaiveBayes\n\ntokenized_docs = TextAnalysis.tokenize.([\n \"I am very mad. You never listen.\",\n \"You seem to be having trouble? Can I help you?\",\n \"Our boss is mad at me. I hope he dies.\",\n \"His boss wants to help me. She is nice.\",\n \"Thank you for your help. It is nice working with you.\",\n \"Never do that again! I am so mad. \",\n])\n\nsentiment = [\n \"negative\",\n \"positive\",\n \"negative\",\n \"positive\",\n \"positive\",\n \"negative\",\n]\n\nmach1 = machine(CountTransformer(), tokenized_docs) |> fit!\n\n# matrix of counts:\nX = transform(mach1, tokenized_docs)\n\n# to ensure scitype(y) <: AbstractVector{<:OrderedFactor}:\ny = coerce(sentiment, OrderedFactor)\n\nclassifier = MultinomialNBClassifier()\nmach2 = machine(classifier, X, y)\nfit!(mach2, rows=1:4)\n\n# probabilistic predictions:\ny_prob = predict(mach2, rows=5:6) # distributions\npdf.(y_prob, \"positive\") # probabilities for \"positive\"\nlog_loss(y_prob, y[5:6])\n\n# point predictions:\nyhat = mode.(y_prob) # or `predict_mode(mach2, rows=5:6)`\n```\n\nSee also [`GaussianNBClassifier`](@ref)\n" +":name" = "MultinomialNBClassifier" +":human_name" = "multinomial naive Bayes classifier" +":is_supervised" = "`true`" +":prediction_type" = ":probabilistic" +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":implemented_methods" = [":fit", ":fitted_params", ":predict"] +":hyperparameters" = "`(:alpha,)`" +":hyperparameter_types" = "`(\"Int64\",)`" +":hyperparameter_ranges" = "`(nothing,)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`nothing`" + +[MLJBase.DeterministicStack] +":input_scitype" = "`ScientificTypesBase.Unknown`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_pure_julia" = "`false`" +":package_name" = "MLJBase" +":package_license" = "MIT" +":load_path" = "MLJBase.Stack" +":package_uuid" = "a7f614a8-145f-11e9-1d2a-a57a1082229d" +":package_url" = "https://github.com/JuliaAI/MLJBase.jl" +":is_wrapper" = "`true`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "```\nUnion{Types...}\n```\n\nA type union is an abstract type which includes all instances of any of its argument types. The empty union [`Union{}`](@ref) is the bottom type of Julia.\n\n# Examples\n\n```jldoctest\njulia> IntOrString = Union{Int,AbstractString}\nUnion{Int64, AbstractString}\n\njulia> 1 isa IntOrString\ntrue\n\njulia> \"Hello!\" isa IntOrString\ntrue\n\njulia> 1.0 isa IntOrString\nfalse\n```\n" +":name" = "DeterministicStack" +":human_name" = "deterministic stack" +":is_supervised" = "`true`" +":prediction_type" = ":deterministic" +":abstract_type" = "`MLJModelInterface.Deterministic`" +":implemented_methods" = [] +":hyperparameters" = "`(:models, :metalearner, :resampling, :measures, :cache, :acceleration)`" +":hyperparameter_types" = "`(\"Vector{MLJModelInterface.Supervised}\", \"MLJModelInterface.Deterministic\", \"Any\", \"Union{Nothing, AbstractVector}\", \"Bool\", \"ComputationalResources.AbstractResource\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" +":constructor" = "`MLJBase.Stack`" + +[MLJBase.ProbabilisticStack] +":input_scitype" = "`ScientificTypesBase.Unknown`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_pure_julia" = "`false`" +":package_name" = "MLJBase" +":package_license" = "MIT" +":load_path" = "MLJBase.Stack" +":package_uuid" = "a7f614a8-145f-11e9-1d2a-a57a1082229d" +":package_url" = "https://github.com/JuliaAI/MLJBase.jl" +":is_wrapper" = "`true`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "```\nUnion{Types...}\n```\n\nA type union is an abstract type which includes all instances of any of its argument types. The empty union [`Union{}`](@ref) is the bottom type of Julia.\n\n# Examples\n\n```jldoctest\njulia> IntOrString = Union{Int,AbstractString}\nUnion{Int64, AbstractString}\n\njulia> 1 isa IntOrString\ntrue\n\njulia> \"Hello!\" isa IntOrString\ntrue\n\njulia> 1.0 isa IntOrString\nfalse\n```\n" +":name" = "ProbabilisticStack" +":human_name" = "probabilistic stack" +":is_supervised" = "`true`" +":prediction_type" = ":probabilistic" +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":implemented_methods" = [] +":hyperparameters" = "`(:models, :metalearner, :resampling, :measures, :cache, :acceleration)`" +":hyperparameter_types" = "`(\"Vector{MLJModelInterface.Supervised}\", \"MLJModelInterface.Probabilistic\", \"Any\", \"Union{Nothing, AbstractVector}\", \"Bool\", \"ComputationalResources.AbstractResource\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" +":constructor" = "`MLJBase.Stack`" + +[MLJBase.Resampler] +":input_scitype" = "`ScientificTypesBase.Unknown`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`ScientificTypesBase.Unknown`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_pure_julia" = "`true`" +":package_name" = "MLJBase" +":package_license" = "unknown" +":load_path" = "MLJBase.Resampler" +":package_uuid" = "unknown" +":package_url" = "unknown" +":is_wrapper" = "`true`" +":supports_weights" = "`missing`" +":supports_class_weights" = "`missing`" +":supports_online" = "`false`" +":docstring" = "```\nresampler = Resampler(\n model=ConstantRegressor(),\n resampling=CV(),\n measure=nothing,\n weights=nothing,\n class_weights=nothing\n operation=predict,\n repeats = 1,\n acceleration=default_resource(),\n check_measure=true,\n per_observation=true,\n logger=nothing,\n compact=false,\n)\n```\n\n*Private method.* Use at own risk.\n\nResampling model wrapper, used internally by the `fit` method of `TunedModel` instances and `IteratedModel` instances. See [`evaluate!](@ref) for meaning of the options. Not intended for use by general user, who will ordinarily use [`evaluate!`](@ref) directly.\n\nGiven a machine `mach = machine(resampler, args...)` one obtains a performance evaluation of the specified `model`, performed according to the prescribed `resampling` strategy and other parameters, using data `args...`, by calling `fit!(mach)` followed by `evaluate(mach)`.\n\nOn subsequent calls to `fit!(mach)` new train/test pairs of row indices are only regenerated if `resampling`, `repeats` or `cache` fields of `resampler` have changed. The evolution of an RNG field of `resampler` does *not* constitute a change (`==` for `MLJType` objects is not sensitive to such changes; see [`is_same_except`](@ref)).\n\nIf there is single train/test pair, then warm-restart behavior of the wrapped model `resampler.model` will extend to warm-restart behaviour of the wrapper `resampler`, with respect to mutations of the wrapped model.\n\nThe sample `weights` are passed to the specified performance measures that support weights for evaluation. These weights are not to be confused with any weights bound to a `Resampler` instance in a machine, used for training the wrapped `model` when supported.\n\nThe sample `class_weights` are passed to the specified performance measures that support per-class weights for evaluation. These weights are not to be confused with any weights bound to a `Resampler` instance in a machine, used for training the wrapped `model` when supported.\n" +":name" = "Resampler" +":human_name" = "resampler" +":is_supervised" = "`false`" +":prediction_type" = ":unknown" +":abstract_type" = "`Any`" +":implemented_methods" = [":clean!", ":evaluate", ":fit", ":fitted_params", ":update"] +":hyperparameters" = "`(:model, :resampling, :measure, :weights, :class_weights, :operation, :acceleration, :check_measure, :repeats, :cache, :per_observation, :logger, :compact)`" +":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Union{Nothing, AbstractVector{<:Real}}\", \"Union{Nothing, AbstractDict{<:Any, <:Real}}\", \"Any\", \"ComputationalResources.AbstractResource\", \"Bool\", \"Int64\", \"Bool\", \"Bool\", \"Any\", \"Bool\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`MLJBase.Resampler`" + +[MLJBase.TransformedTargetModelInterval] +":input_scitype" = "`ScientificTypesBase.Unknown`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_pure_julia" = "`false`" +":package_name" = "MLJBase" +":package_license" = "MIT" +":load_path" = "MLJBase.TransformedTargetModel" +":package_uuid" = "a7f614a8-145f-11e9-1d2a-a57a1082229d" +":package_url" = "https://github.com/JuliaAI/MLJBase.jl" +":is_wrapper" = "`true`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "```\nTransformedTargetModel(model; transformer=nothing, inverse=nothing, cache=true)\n```\n\nWrap the supervised or semi-supervised `model` in a transformation of the target variable.\n\nHere `transformer` one of the following:\n\n * The `Unsupervised` model that is to transform the training target. By default (`inverse=nothing`) the parameters learned by this transformer are also used to inverse-transform the predictions of `model`, which means `transformer` must implement the `inverse_transform` method. If this is not the case, specify `inverse=identity` to suppress inversion.\n * A callable object for transforming the target, such as `y -> log.(y)`. In this case a callable `inverse`, such as `z -> exp.(z)`, should be specified.\n\nSpecify `cache=false` to prioritize memory over speed, or to guarantee data anonymity.\n\nSpecify `inverse=identity` if `model` is a probabilistic predictor, as inverse-transforming sample spaces is not supported. Alternatively, replace `model` with a deterministic model, such as `Pipeline(model, y -> mode.(y))`.\n\n### Examples\n\nA model that normalizes the target before applying ridge regression, with predictions returned on the original scale:\n\n```julia\n@load RidgeRegressor pkg=MLJLinearModels\nmodel = RidgeRegressor()\ntmodel = TransformedTargetModel(model, transformer=Standardizer())\n```\n\nA model that applies a static `log` transformation to the data, again returning predictions to the original scale:\n\n```julia\ntmodel2 = TransformedTargetModel(model, transformer=y->log.(y), inverse=z->exp.(y))\n```\n" +":name" = "TransformedTargetModelInterval" +":human_name" = "transformed target model interval" +":is_supervised" = "`true`" +":prediction_type" = ":interval" +":abstract_type" = "`MLJModelInterface.Interval`" +":implemented_methods" = [] +":hyperparameters" = "`(:model, :transformer, :inverse, :cache)`" +":hyperparameter_types" = "`(\"MLJModelInterface.Interval\", \"Any\", \"Any\", \"Any\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" +":constructor" = "`TransformedTargetModel`" + +[MLJBase.TransformedTargetModelDeterministicUnsupervisedDetector] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" +":predict_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_pure_julia" = "`false`" +":package_name" = "MLJBase" +":package_license" = "MIT" +":load_path" = "MLJBase.TransformedTargetModel" +":package_uuid" = "a7f614a8-145f-11e9-1d2a-a57a1082229d" +":package_url" = "https://github.com/JuliaAI/MLJBase.jl" +":is_wrapper" = "`true`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "```\nTransformedTargetModel(model; transformer=nothing, inverse=nothing, cache=true)\n```\n\nWrap the supervised or semi-supervised `model` in a transformation of the target variable.\n\nHere `transformer` one of the following:\n\n * The `Unsupervised` model that is to transform the training target. By default (`inverse=nothing`) the parameters learned by this transformer are also used to inverse-transform the predictions of `model`, which means `transformer` must implement the `inverse_transform` method. If this is not the case, specify `inverse=identity` to suppress inversion.\n * A callable object for transforming the target, such as `y -> log.(y)`. In this case a callable `inverse`, such as `z -> exp.(z)`, should be specified.\n\nSpecify `cache=false` to prioritize memory over speed, or to guarantee data anonymity.\n\nSpecify `inverse=identity` if `model` is a probabilistic predictor, as inverse-transforming sample spaces is not supported. Alternatively, replace `model` with a deterministic model, such as `Pipeline(model, y -> mode.(y))`.\n\n### Examples\n\nA model that normalizes the target before applying ridge regression, with predictions returned on the original scale:\n\n```julia\n@load RidgeRegressor pkg=MLJLinearModels\nmodel = RidgeRegressor()\ntmodel = TransformedTargetModel(model, transformer=Standardizer())\n```\n\nA model that applies a static `log` transformation to the data, again returning predictions to the original scale:\n\n```julia\ntmodel2 = TransformedTargetModel(model, transformer=y->log.(y), inverse=z->exp.(y))\n```\n" +":name" = "TransformedTargetModelDeterministicUnsupervisedDetector" +":human_name" = "transformed target model deterministic unsupervised detector" +":is_supervised" = "`false`" +":prediction_type" = ":deterministic" +":abstract_type" = "`MLJModelInterface.DeterministicUnsupervisedDetector`" +":implemented_methods" = [] +":hyperparameters" = "`(:model, :transformer, :inverse, :cache)`" +":hyperparameter_types" = "`(\"MLJModelInterface.DeterministicUnsupervisedDetector\", \"Any\", \"Any\", \"Any\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" +":constructor" = "`TransformedTargetModel`" + +[MLJBase.TransformedTargetModelProbabilistic] +":input_scitype" = "`ScientificTypesBase.Unknown`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_pure_julia" = "`false`" +":package_name" = "MLJBase" +":package_license" = "MIT" +":load_path" = "MLJBase.TransformedTargetModel" +":package_uuid" = "a7f614a8-145f-11e9-1d2a-a57a1082229d" +":package_url" = "https://github.com/JuliaAI/MLJBase.jl" +":is_wrapper" = "`true`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "```\nTransformedTargetModel(model; transformer=nothing, inverse=nothing, cache=true)\n```\n\nWrap the supervised or semi-supervised `model` in a transformation of the target variable.\n\nHere `transformer` one of the following:\n\n * The `Unsupervised` model that is to transform the training target. By default (`inverse=nothing`) the parameters learned by this transformer are also used to inverse-transform the predictions of `model`, which means `transformer` must implement the `inverse_transform` method. If this is not the case, specify `inverse=identity` to suppress inversion.\n * A callable object for transforming the target, such as `y -> log.(y)`. In this case a callable `inverse`, such as `z -> exp.(z)`, should be specified.\n\nSpecify `cache=false` to prioritize memory over speed, or to guarantee data anonymity.\n\nSpecify `inverse=identity` if `model` is a probabilistic predictor, as inverse-transforming sample spaces is not supported. Alternatively, replace `model` with a deterministic model, such as `Pipeline(model, y -> mode.(y))`.\n\n### Examples\n\nA model that normalizes the target before applying ridge regression, with predictions returned on the original scale:\n\n```julia\n@load RidgeRegressor pkg=MLJLinearModels\nmodel = RidgeRegressor()\ntmodel = TransformedTargetModel(model, transformer=Standardizer())\n```\n\nA model that applies a static `log` transformation to the data, again returning predictions to the original scale:\n\n```julia\ntmodel2 = TransformedTargetModel(model, transformer=y->log.(y), inverse=z->exp.(y))\n```\n" +":name" = "TransformedTargetModelProbabilistic" +":human_name" = "transformed target model probabilistic" +":is_supervised" = "`true`" +":prediction_type" = ":probabilistic" +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":implemented_methods" = [] +":hyperparameters" = "`(:model, :transformer, :inverse, :cache)`" +":hyperparameter_types" = "`(\"MLJModelInterface.Probabilistic\", \"Any\", \"Any\", \"Any\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" +":constructor" = "`TransformedTargetModel`" + +[MLJBase.StaticPipeline] +":input_scitype" = "`ScientificTypesBase.Unknown`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_pure_julia" = "`false`" +":package_name" = "MLJBase" +":package_license" = "unknown" +":load_path" = "MLJBase.Pipeline" +":package_uuid" = "unknown" +":package_url" = "unknown" +":is_wrapper" = "`true`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "```\nPipeline(component1, component2, ... , componentk; options...)\nPipeline(name1=component1, name2=component2, ..., namek=componentk; options...)\ncomponent1 |> component2 |> ... |> componentk\n```\n\nCreate an instance of a composite model type which sequentially composes the specified components in order. This means `component1` receives inputs, whose output is passed to `component2`, and so forth. A \"component\" is either a `Model` instance, a model type (converted immediately to its default instance) or any callable object. Here the \"output\" of a model is what `predict` returns if it is `Supervised`, or what `transform` returns if it is `Unsupervised`.\n\nNames for the component fields are automatically generated unless explicitly specified, as in\n\n```\nPipeline(encoder=ContinuousEncoder(drop_last=false),\n stand=Standardizer())\n```\n\nThe `Pipeline` constructor accepts keyword `options` discussed further below.\n\nOrdinary functions (and other callables) may be inserted in the pipeline as shown in the following example:\n\n```\nPipeline(X->coerce(X, :age=>Continuous), OneHotEncoder, ConstantClassifier)\n```\n\n### Syntactic sugar\n\nThe `|>` operator is overloaded to construct pipelines out of models, callables, and existing pipelines:\n\n```julia\nLinearRegressor = @load LinearRegressor pkg=MLJLinearModels add=true\nPCA = @load PCA pkg=MultivariateStats add=true\n\npipe1 = MLJBase.table |> ContinuousEncoder |> Standardizer\npipe2 = PCA |> LinearRegressor\npipe1 |> pipe2\n```\n\nAt most one of the components may be a supervised model, but this model can appear in any position. A pipeline with a `Supervised` component is itself `Supervised` and implements the `predict` operation. It is otherwise `Unsupervised` (possibly `Static`) and implements `transform`.\n\n### Special operations\n\nIf all the `components` are invertible unsupervised models (ie, implement `inverse_transform`) then `inverse_transform` is implemented for the pipeline. If there are no supervised models, then `predict` is nevertheless implemented, assuming the last component is a model that implements it (some clustering models). Similarly, calling `transform` on a supervised pipeline calls `transform` on the supervised component.\n\n### Optional key-word arguments\n\n * `prediction_type` - prediction type of the pipeline; possible values: `:deterministic`, `:probabilistic`, `:interval` (default=`:deterministic` if not inferable)\n * `operation` - operation applied to the supervised component model, when present; possible values: `predict`, `predict_mean`, `predict_median`, `predict_mode` (default=`predict`)\n * `cache` - whether the internal machines created for component models should cache model-specific representations of data (see [`machine`](@ref)) (default=`true`)\n\n!!! warning\n Set `cache=false` to guarantee data anonymization.\n\n\nTo build more complicated non-branching pipelines, refer to the MLJ manual sections on composing models.\n" +":name" = "StaticPipeline" +":human_name" = "static pipeline" +":is_supervised" = "`false`" +":prediction_type" = ":unknown" +":abstract_type" = "`MLJModelInterface.Static`" +":implemented_methods" = [] +":hyperparameters" = "`(:named_components, :cache)`" +":hyperparameter_types" = "`(\"NamedTuple\", \"Bool\")`" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" +":constructor" = "`Pipeline`" + +[MLJBase.TransformedTargetModelProbabilisticSupervisedDetector] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_pure_julia" = "`false`" +":package_name" = "MLJBase" +":package_license" = "MIT" +":load_path" = "MLJBase.TransformedTargetModel" +":package_uuid" = "a7f614a8-145f-11e9-1d2a-a57a1082229d" +":package_url" = "https://github.com/JuliaAI/MLJBase.jl" +":is_wrapper" = "`true`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "```\nTransformedTargetModel(model; transformer=nothing, inverse=nothing, cache=true)\n```\n\nWrap the supervised or semi-supervised `model` in a transformation of the target variable.\n\nHere `transformer` one of the following:\n\n * The `Unsupervised` model that is to transform the training target. By default (`inverse=nothing`) the parameters learned by this transformer are also used to inverse-transform the predictions of `model`, which means `transformer` must implement the `inverse_transform` method. If this is not the case, specify `inverse=identity` to suppress inversion.\n * A callable object for transforming the target, such as `y -> log.(y)`. In this case a callable `inverse`, such as `z -> exp.(z)`, should be specified.\n\nSpecify `cache=false` to prioritize memory over speed, or to guarantee data anonymity.\n\nSpecify `inverse=identity` if `model` is a probabilistic predictor, as inverse-transforming sample spaces is not supported. Alternatively, replace `model` with a deterministic model, such as `Pipeline(model, y -> mode.(y))`.\n\n### Examples\n\nA model that normalizes the target before applying ridge regression, with predictions returned on the original scale:\n\n```julia\n@load RidgeRegressor pkg=MLJLinearModels\nmodel = RidgeRegressor()\ntmodel = TransformedTargetModel(model, transformer=Standardizer())\n```\n\nA model that applies a static `log` transformation to the data, again returning predictions to the original scale:\n\n```julia\ntmodel2 = TransformedTargetModel(model, transformer=y->log.(y), inverse=z->exp.(y))\n```\n" +":name" = "TransformedTargetModelProbabilisticSupervisedDetector" +":human_name" = "transformed target model probabilistic supervised detector" +":is_supervised" = "`true`" +":prediction_type" = ":probabilistic" +":abstract_type" = "`MLJModelInterface.ProbabilisticSupervisedDetector`" +":implemented_methods" = [] +":hyperparameters" = "`(:model, :transformer, :inverse, :cache)`" +":hyperparameter_types" = "`(\"MLJModelInterface.ProbabilisticSupervisedDetector\", \"Any\", \"Any\", \"Any\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" +":constructor" = "`TransformedTargetModel`" + +[MLJBase.ProbabilisticPipeline] +":input_scitype" = "`ScientificTypesBase.Unknown`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_pure_julia" = "`false`" +":package_name" = "MLJBase" +":package_license" = "unknown" +":load_path" = "MLJBase.Pipeline" +":package_uuid" = "unknown" +":package_url" = "unknown" +":is_wrapper" = "`true`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "```\nPipeline(component1, component2, ... , componentk; options...)\nPipeline(name1=component1, name2=component2, ..., namek=componentk; options...)\ncomponent1 |> component2 |> ... |> componentk\n```\n\nCreate an instance of a composite model type which sequentially composes the specified components in order. This means `component1` receives inputs, whose output is passed to `component2`, and so forth. A \"component\" is either a `Model` instance, a model type (converted immediately to its default instance) or any callable object. Here the \"output\" of a model is what `predict` returns if it is `Supervised`, or what `transform` returns if it is `Unsupervised`.\n\nNames for the component fields are automatically generated unless explicitly specified, as in\n\n```\nPipeline(encoder=ContinuousEncoder(drop_last=false),\n stand=Standardizer())\n```\n\nThe `Pipeline` constructor accepts keyword `options` discussed further below.\n\nOrdinary functions (and other callables) may be inserted in the pipeline as shown in the following example:\n\n```\nPipeline(X->coerce(X, :age=>Continuous), OneHotEncoder, ConstantClassifier)\n```\n\n### Syntactic sugar\n\nThe `|>` operator is overloaded to construct pipelines out of models, callables, and existing pipelines:\n\n```julia\nLinearRegressor = @load LinearRegressor pkg=MLJLinearModels add=true\nPCA = @load PCA pkg=MultivariateStats add=true\n\npipe1 = MLJBase.table |> ContinuousEncoder |> Standardizer\npipe2 = PCA |> LinearRegressor\npipe1 |> pipe2\n```\n\nAt most one of the components may be a supervised model, but this model can appear in any position. A pipeline with a `Supervised` component is itself `Supervised` and implements the `predict` operation. It is otherwise `Unsupervised` (possibly `Static`) and implements `transform`.\n\n### Special operations\n\nIf all the `components` are invertible unsupervised models (ie, implement `inverse_transform`) then `inverse_transform` is implemented for the pipeline. If there are no supervised models, then `predict` is nevertheless implemented, assuming the last component is a model that implements it (some clustering models). Similarly, calling `transform` on a supervised pipeline calls `transform` on the supervised component.\n\n### Optional key-word arguments\n\n * `prediction_type` - prediction type of the pipeline; possible values: `:deterministic`, `:probabilistic`, `:interval` (default=`:deterministic` if not inferable)\n * `operation` - operation applied to the supervised component model, when present; possible values: `predict`, `predict_mean`, `predict_median`, `predict_mode` (default=`predict`)\n * `cache` - whether the internal machines created for component models should cache model-specific representations of data (see [`machine`](@ref)) (default=`true`)\n\n!!! warning\n Set `cache=false` to guarantee data anonymization.\n\n\nTo build more complicated non-branching pipelines, refer to the MLJ manual sections on composing models.\n" +":name" = "ProbabilisticPipeline" +":human_name" = "probabilistic pipeline" +":is_supervised" = "`true`" +":prediction_type" = ":probabilistic" +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":implemented_methods" = [] +":hyperparameters" = "`(:named_components, :cache)`" +":hyperparameter_types" = "`(\"NamedTuple\", \"Bool\")`" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" +":constructor" = "`Pipeline`" + +[MLJBase.DeterministicPipeline] +":input_scitype" = "`ScientificTypesBase.Unknown`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_pure_julia" = "`false`" +":package_name" = "MLJBase" +":package_license" = "unknown" +":load_path" = "MLJBase.Pipeline" +":package_uuid" = "unknown" +":package_url" = "unknown" +":is_wrapper" = "`true`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "```\nPipeline(component1, component2, ... , componentk; options...)\nPipeline(name1=component1, name2=component2, ..., namek=componentk; options...)\ncomponent1 |> component2 |> ... |> componentk\n```\n\nCreate an instance of a composite model type which sequentially composes the specified components in order. This means `component1` receives inputs, whose output is passed to `component2`, and so forth. A \"component\" is either a `Model` instance, a model type (converted immediately to its default instance) or any callable object. Here the \"output\" of a model is what `predict` returns if it is `Supervised`, or what `transform` returns if it is `Unsupervised`.\n\nNames for the component fields are automatically generated unless explicitly specified, as in\n\n```\nPipeline(encoder=ContinuousEncoder(drop_last=false),\n stand=Standardizer())\n```\n\nThe `Pipeline` constructor accepts keyword `options` discussed further below.\n\nOrdinary functions (and other callables) may be inserted in the pipeline as shown in the following example:\n\n```\nPipeline(X->coerce(X, :age=>Continuous), OneHotEncoder, ConstantClassifier)\n```\n\n### Syntactic sugar\n\nThe `|>` operator is overloaded to construct pipelines out of models, callables, and existing pipelines:\n\n```julia\nLinearRegressor = @load LinearRegressor pkg=MLJLinearModels add=true\nPCA = @load PCA pkg=MultivariateStats add=true\n\npipe1 = MLJBase.table |> ContinuousEncoder |> Standardizer\npipe2 = PCA |> LinearRegressor\npipe1 |> pipe2\n```\n\nAt most one of the components may be a supervised model, but this model can appear in any position. A pipeline with a `Supervised` component is itself `Supervised` and implements the `predict` operation. It is otherwise `Unsupervised` (possibly `Static`) and implements `transform`.\n\n### Special operations\n\nIf all the `components` are invertible unsupervised models (ie, implement `inverse_transform`) then `inverse_transform` is implemented for the pipeline. If there are no supervised models, then `predict` is nevertheless implemented, assuming the last component is a model that implements it (some clustering models). Similarly, calling `transform` on a supervised pipeline calls `transform` on the supervised component.\n\n### Optional key-word arguments\n\n * `prediction_type` - prediction type of the pipeline; possible values: `:deterministic`, `:probabilistic`, `:interval` (default=`:deterministic` if not inferable)\n * `operation` - operation applied to the supervised component model, when present; possible values: `predict`, `predict_mean`, `predict_median`, `predict_mode` (default=`predict`)\n * `cache` - whether the internal machines created for component models should cache model-specific representations of data (see [`machine`](@ref)) (default=`true`)\n\n!!! warning\n Set `cache=false` to guarantee data anonymization.\n\n\nTo build more complicated non-branching pipelines, refer to the MLJ manual sections on composing models.\n" +":name" = "DeterministicPipeline" +":human_name" = "deterministic pipeline" +":is_supervised" = "`true`" +":prediction_type" = ":deterministic" +":abstract_type" = "`MLJModelInterface.Deterministic`" +":implemented_methods" = [] +":hyperparameters" = "`(:named_components, :cache)`" +":hyperparameter_types" = "`(\"NamedTuple\", \"Bool\")`" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" +":constructor" = "`Pipeline`" + +[MLJBase.TransformedTargetModelDeterministic] +":input_scitype" = "`ScientificTypesBase.Unknown`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_pure_julia" = "`false`" +":package_name" = "MLJBase" +":package_license" = "MIT" +":load_path" = "MLJBase.TransformedTargetModel" +":package_uuid" = "a7f614a8-145f-11e9-1d2a-a57a1082229d" +":package_url" = "https://github.com/JuliaAI/MLJBase.jl" +":is_wrapper" = "`true`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "```\nTransformedTargetModel(model; transformer=nothing, inverse=nothing, cache=true)\n```\n\nWrap the supervised or semi-supervised `model` in a transformation of the target variable.\n\nHere `transformer` one of the following:\n\n * The `Unsupervised` model that is to transform the training target. By default (`inverse=nothing`) the parameters learned by this transformer are also used to inverse-transform the predictions of `model`, which means `transformer` must implement the `inverse_transform` method. If this is not the case, specify `inverse=identity` to suppress inversion.\n * A callable object for transforming the target, such as `y -> log.(y)`. In this case a callable `inverse`, such as `z -> exp.(z)`, should be specified.\n\nSpecify `cache=false` to prioritize memory over speed, or to guarantee data anonymity.\n\nSpecify `inverse=identity` if `model` is a probabilistic predictor, as inverse-transforming sample spaces is not supported. Alternatively, replace `model` with a deterministic model, such as `Pipeline(model, y -> mode.(y))`.\n\n### Examples\n\nA model that normalizes the target before applying ridge regression, with predictions returned on the original scale:\n\n```julia\n@load RidgeRegressor pkg=MLJLinearModels\nmodel = RidgeRegressor()\ntmodel = TransformedTargetModel(model, transformer=Standardizer())\n```\n\nA model that applies a static `log` transformation to the data, again returning predictions to the original scale:\n\n```julia\ntmodel2 = TransformedTargetModel(model, transformer=y->log.(y), inverse=z->exp.(y))\n```\n" +":name" = "TransformedTargetModelDeterministic" +":human_name" = "transformed target model deterministic" +":is_supervised" = "`true`" +":prediction_type" = ":deterministic" +":abstract_type" = "`MLJModelInterface.Deterministic`" +":implemented_methods" = [] +":hyperparameters" = "`(:model, :transformer, :inverse, :cache)`" +":hyperparameter_types" = "`(\"MLJModelInterface.Deterministic\", \"Any\", \"Any\", \"Any\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" +":constructor" = "`TransformedTargetModel`" + +[MLJBase.IntervalPipeline] +":input_scitype" = "`ScientificTypesBase.Unknown`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_pure_julia" = "`false`" +":package_name" = "MLJBase" +":package_license" = "unknown" +":load_path" = "MLJBase.Pipeline" +":package_uuid" = "unknown" +":package_url" = "unknown" +":is_wrapper" = "`true`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "```\nPipeline(component1, component2, ... , componentk; options...)\nPipeline(name1=component1, name2=component2, ..., namek=componentk; options...)\ncomponent1 |> component2 |> ... |> componentk\n```\n\nCreate an instance of a composite model type which sequentially composes the specified components in order. This means `component1` receives inputs, whose output is passed to `component2`, and so forth. A \"component\" is either a `Model` instance, a model type (converted immediately to its default instance) or any callable object. Here the \"output\" of a model is what `predict` returns if it is `Supervised`, or what `transform` returns if it is `Unsupervised`.\n\nNames for the component fields are automatically generated unless explicitly specified, as in\n\n```\nPipeline(encoder=ContinuousEncoder(drop_last=false),\n stand=Standardizer())\n```\n\nThe `Pipeline` constructor accepts keyword `options` discussed further below.\n\nOrdinary functions (and other callables) may be inserted in the pipeline as shown in the following example:\n\n```\nPipeline(X->coerce(X, :age=>Continuous), OneHotEncoder, ConstantClassifier)\n```\n\n### Syntactic sugar\n\nThe `|>` operator is overloaded to construct pipelines out of models, callables, and existing pipelines:\n\n```julia\nLinearRegressor = @load LinearRegressor pkg=MLJLinearModels add=true\nPCA = @load PCA pkg=MultivariateStats add=true\n\npipe1 = MLJBase.table |> ContinuousEncoder |> Standardizer\npipe2 = PCA |> LinearRegressor\npipe1 |> pipe2\n```\n\nAt most one of the components may be a supervised model, but this model can appear in any position. A pipeline with a `Supervised` component is itself `Supervised` and implements the `predict` operation. It is otherwise `Unsupervised` (possibly `Static`) and implements `transform`.\n\n### Special operations\n\nIf all the `components` are invertible unsupervised models (ie, implement `inverse_transform`) then `inverse_transform` is implemented for the pipeline. If there are no supervised models, then `predict` is nevertheless implemented, assuming the last component is a model that implements it (some clustering models). Similarly, calling `transform` on a supervised pipeline calls `transform` on the supervised component.\n\n### Optional key-word arguments\n\n * `prediction_type` - prediction type of the pipeline; possible values: `:deterministic`, `:probabilistic`, `:interval` (default=`:deterministic` if not inferable)\n * `operation` - operation applied to the supervised component model, when present; possible values: `predict`, `predict_mean`, `predict_median`, `predict_mode` (default=`predict`)\n * `cache` - whether the internal machines created for component models should cache model-specific representations of data (see [`machine`](@ref)) (default=`true`)\n\n!!! warning\n Set `cache=false` to guarantee data anonymization.\n\n\nTo build more complicated non-branching pipelines, refer to the MLJ manual sections on composing models.\n" +":name" = "IntervalPipeline" +":human_name" = "interval pipeline" +":is_supervised" = "`true`" +":prediction_type" = ":interval" +":abstract_type" = "`MLJModelInterface.Interval`" +":implemented_methods" = [] +":hyperparameters" = "`(:named_components, :cache)`" +":hyperparameter_types" = "`(\"NamedTuple\", \"Bool\")`" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" +":constructor" = "`Pipeline`" + +[MLJBase.UnsupervisedPipeline] +":input_scitype" = "`ScientificTypesBase.Unknown`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_pure_julia" = "`false`" +":package_name" = "MLJBase" +":package_license" = "unknown" +":load_path" = "MLJBase.Pipeline" +":package_uuid" = "unknown" +":package_url" = "unknown" +":is_wrapper" = "`true`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "```\nPipeline(component1, component2, ... , componentk; options...)\nPipeline(name1=component1, name2=component2, ..., namek=componentk; options...)\ncomponent1 |> component2 |> ... |> componentk\n```\n\nCreate an instance of a composite model type which sequentially composes the specified components in order. This means `component1` receives inputs, whose output is passed to `component2`, and so forth. A \"component\" is either a `Model` instance, a model type (converted immediately to its default instance) or any callable object. Here the \"output\" of a model is what `predict` returns if it is `Supervised`, or what `transform` returns if it is `Unsupervised`.\n\nNames for the component fields are automatically generated unless explicitly specified, as in\n\n```\nPipeline(encoder=ContinuousEncoder(drop_last=false),\n stand=Standardizer())\n```\n\nThe `Pipeline` constructor accepts keyword `options` discussed further below.\n\nOrdinary functions (and other callables) may be inserted in the pipeline as shown in the following example:\n\n```\nPipeline(X->coerce(X, :age=>Continuous), OneHotEncoder, ConstantClassifier)\n```\n\n### Syntactic sugar\n\nThe `|>` operator is overloaded to construct pipelines out of models, callables, and existing pipelines:\n\n```julia\nLinearRegressor = @load LinearRegressor pkg=MLJLinearModels add=true\nPCA = @load PCA pkg=MultivariateStats add=true\n\npipe1 = MLJBase.table |> ContinuousEncoder |> Standardizer\npipe2 = PCA |> LinearRegressor\npipe1 |> pipe2\n```\n\nAt most one of the components may be a supervised model, but this model can appear in any position. A pipeline with a `Supervised` component is itself `Supervised` and implements the `predict` operation. It is otherwise `Unsupervised` (possibly `Static`) and implements `transform`.\n\n### Special operations\n\nIf all the `components` are invertible unsupervised models (ie, implement `inverse_transform`) then `inverse_transform` is implemented for the pipeline. If there are no supervised models, then `predict` is nevertheless implemented, assuming the last component is a model that implements it (some clustering models). Similarly, calling `transform` on a supervised pipeline calls `transform` on the supervised component.\n\n### Optional key-word arguments\n\n * `prediction_type` - prediction type of the pipeline; possible values: `:deterministic`, `:probabilistic`, `:interval` (default=`:deterministic` if not inferable)\n * `operation` - operation applied to the supervised component model, when present; possible values: `predict`, `predict_mean`, `predict_median`, `predict_mode` (default=`predict`)\n * `cache` - whether the internal machines created for component models should cache model-specific representations of data (see [`machine`](@ref)) (default=`true`)\n\n!!! warning\n Set `cache=false` to guarantee data anonymization.\n\n\nTo build more complicated non-branching pipelines, refer to the MLJ manual sections on composing models.\n" +":name" = "UnsupervisedPipeline" +":human_name" = "unsupervised pipeline" +":is_supervised" = "`false`" +":prediction_type" = ":unknown" +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":implemented_methods" = [] +":hyperparameters" = "`(:named_components, :cache)`" +":hyperparameter_types" = "`(\"NamedTuple\", \"Bool\")`" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" +":constructor" = "`Pipeline`" + +[MLJBase.TransformedTargetModelProbabilisticUnsupervisedDetector] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_pure_julia" = "`false`" +":package_name" = "MLJBase" +":package_license" = "MIT" +":load_path" = "MLJBase.TransformedTargetModel" +":package_uuid" = "a7f614a8-145f-11e9-1d2a-a57a1082229d" +":package_url" = "https://github.com/JuliaAI/MLJBase.jl" +":is_wrapper" = "`true`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "```\nTransformedTargetModel(model; transformer=nothing, inverse=nothing, cache=true)\n```\n\nWrap the supervised or semi-supervised `model` in a transformation of the target variable.\n\nHere `transformer` one of the following:\n\n * The `Unsupervised` model that is to transform the training target. By default (`inverse=nothing`) the parameters learned by this transformer are also used to inverse-transform the predictions of `model`, which means `transformer` must implement the `inverse_transform` method. If this is not the case, specify `inverse=identity` to suppress inversion.\n * A callable object for transforming the target, such as `y -> log.(y)`. In this case a callable `inverse`, such as `z -> exp.(z)`, should be specified.\n\nSpecify `cache=false` to prioritize memory over speed, or to guarantee data anonymity.\n\nSpecify `inverse=identity` if `model` is a probabilistic predictor, as inverse-transforming sample spaces is not supported. Alternatively, replace `model` with a deterministic model, such as `Pipeline(model, y -> mode.(y))`.\n\n### Examples\n\nA model that normalizes the target before applying ridge regression, with predictions returned on the original scale:\n\n```julia\n@load RidgeRegressor pkg=MLJLinearModels\nmodel = RidgeRegressor()\ntmodel = TransformedTargetModel(model, transformer=Standardizer())\n```\n\nA model that applies a static `log` transformation to the data, again returning predictions to the original scale:\n\n```julia\ntmodel2 = TransformedTargetModel(model, transformer=y->log.(y), inverse=z->exp.(y))\n```\n" +":name" = "TransformedTargetModelProbabilisticUnsupervisedDetector" +":human_name" = "transformed target model probabilistic unsupervised detector" +":is_supervised" = "`false`" +":prediction_type" = ":probabilistic" +":abstract_type" = "`MLJModelInterface.ProbabilisticUnsupervisedDetector`" +":implemented_methods" = [] +":hyperparameters" = "`(:model, :transformer, :inverse, :cache)`" +":hyperparameter_types" = "`(\"MLJModelInterface.ProbabilisticUnsupervisedDetector\", \"Any\", \"Any\", \"Any\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" +":constructor" = "`TransformedTargetModel`" -[NaiveBayes.MultinomialNBClassifier] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Count}}, AbstractMatrix{<:ScientificTypesBase.Count}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Count}}, AbstractMatrix{<:ScientificTypesBase.Count}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" +[MLJBase.TransformedTargetModelDeterministicSupervisedDetector] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}`" +":predict_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_pure_julia" = "`true`" -":package_name" = "NaiveBayes" -":package_license" = "unknown" -":load_path" = "MLJNaiveBayesInterface.MultinomialNBClassifier" -":package_uuid" = "9bbee03b-0db5-5f46-924f-b5c9c21b8c60" -":package_url" = "https://github.com/dfdx/NaiveBayes.jl" -":is_wrapper" = "`false`" +":is_pure_julia" = "`false`" +":package_name" = "MLJBase" +":package_license" = "MIT" +":load_path" = "MLJBase.TransformedTargetModel" +":package_uuid" = "a7f614a8-145f-11e9-1d2a-a57a1082229d" +":package_url" = "https://github.com/JuliaAI/MLJBase.jl" +":is_wrapper" = "`true`" ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nMultinomialNBClassifier\n```\n\nA model type for constructing a multinomial naive Bayes classifier, based on [NaiveBayes.jl](https://github.com/dfdx/NaiveBayes.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultinomialNBClassifier = @load MultinomialNBClassifier pkg=NaiveBayes\n```\n\nDo `model = MultinomialNBClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultinomialNBClassifier(alpha=...)`.\n\nThe [multinomial naive Bayes classifier](https://en.wikipedia.org/wiki/Naive_Bayes_classifier#Multinomial_naive_Bayes) is often applied when input features consist of a counts (scitype `Count`) and when observations for a fixed target class are generated from a multinomial distribution with fixed probability vector, but whose sample length varies from observation to observation. For example, features might represent word counts in text documents being classified by sentiment.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Count`; check the column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Finite`; check the scitype with `schema(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `alpha=1`: Lindstone smoothing in estimation of multinomial probability vectors from training histograms (default corresponds to Laplacian smoothing).\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n * `predict_mode(mach, Xnew)`: Return the mode of above predictions.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `c_counts`: A dictionary containing the observed count of each input class.\n * `x_counts`: A dictionary containing the categorical counts of each input class.\n * `x_totals`: The sum of each count (input feature), ungrouped.\n * `n_obs`: The total number of observations in the training data.\n\n# Examples\n\n```\nusing MLJ\nimport TextAnalysis\n\nCountTransformer = @load CountTransformer pkg=MLJText\nMultinomialNBClassifier = @load MultinomialNBClassifier pkg=NaiveBayes\n\ntokenized_docs = TextAnalysis.tokenize.([\n \"I am very mad. You never listen.\",\n \"You seem to be having trouble? Can I help you?\",\n \"Our boss is mad at me. I hope he dies.\",\n \"His boss wants to help me. She is nice.\",\n \"Thank you for your help. It is nice working with you.\",\n \"Never do that again! I am so mad. \",\n])\n\nsentiment = [\n \"negative\",\n \"positive\",\n \"negative\",\n \"positive\",\n \"positive\",\n \"negative\",\n]\n\nmach1 = machine(CountTransformer(), tokenized_docs) |> fit!\n\n# matrix of counts:\nX = transform(mach1, tokenized_docs)\n\n# to ensure scitype(y) <: AbstractVector{<:OrderedFactor}:\ny = coerce(sentiment, OrderedFactor)\n\nclassifier = MultinomialNBClassifier()\nmach2 = machine(classifier, X, y)\nfit!(mach2, rows=1:4)\n\n# probabilistic predictions:\ny_prob = predict(mach2, rows=5:6) # distributions\npdf.(y_prob, \"positive\") # probabilities for \"positive\"\nlog_loss(y_prob, y[5:6])\n\n# point predictions:\nyhat = mode.(y_prob) # or `predict_mode(mach2, rows=5:6)`\n```\n\nSee also [`GaussianNBClassifier`](@ref)\n" -":name" = "MultinomialNBClassifier" -":human_name" = "multinomial naive Bayes classifier" +":docstring" = "```\nTransformedTargetModel(model; transformer=nothing, inverse=nothing, cache=true)\n```\n\nWrap the supervised or semi-supervised `model` in a transformation of the target variable.\n\nHere `transformer` one of the following:\n\n * The `Unsupervised` model that is to transform the training target. By default (`inverse=nothing`) the parameters learned by this transformer are also used to inverse-transform the predictions of `model`, which means `transformer` must implement the `inverse_transform` method. If this is not the case, specify `inverse=identity` to suppress inversion.\n * A callable object for transforming the target, such as `y -> log.(y)`. In this case a callable `inverse`, such as `z -> exp.(z)`, should be specified.\n\nSpecify `cache=false` to prioritize memory over speed, or to guarantee data anonymity.\n\nSpecify `inverse=identity` if `model` is a probabilistic predictor, as inverse-transforming sample spaces is not supported. Alternatively, replace `model` with a deterministic model, such as `Pipeline(model, y -> mode.(y))`.\n\n### Examples\n\nA model that normalizes the target before applying ridge regression, with predictions returned on the original scale:\n\n```julia\n@load RidgeRegressor pkg=MLJLinearModels\nmodel = RidgeRegressor()\ntmodel = TransformedTargetModel(model, transformer=Standardizer())\n```\n\nA model that applies a static `log` transformation to the data, again returning predictions to the original scale:\n\n```julia\ntmodel2 = TransformedTargetModel(model, transformer=y->log.(y), inverse=z->exp.(y))\n```\n" +":name" = "TransformedTargetModelDeterministicSupervisedDetector" +":human_name" = "transformed target model deterministic supervised detector" ":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:alpha,)`" -":hyperparameter_types" = "`(\"Int64\",)`" -":hyperparameter_ranges" = "`(nothing,)`" +":prediction_type" = ":deterministic" +":abstract_type" = "`MLJModelInterface.DeterministicSupervisedDetector`" +":implemented_methods" = [] +":hyperparameters" = "`(:model, :transformer, :inverse, :cache)`" +":hyperparameter_types" = "`(\"MLJModelInterface.DeterministicSupervisedDetector\", \"Any\", \"Any\", \"Any\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" +":constructor" = "`TransformedTargetModel`" [MultivariateStats.LDA] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -4316,6 +5037,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MultivariateStats.MultitargetLinearRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -4350,6 +5072,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MultivariateStats.BayesianSubspaceLDA] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -4384,6 +5107,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MultivariateStats.FactorAnalysis] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -4418,6 +5142,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MultivariateStats.LinearRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -4452,6 +5177,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MultivariateStats.ICA] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -4486,6 +5212,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MultivariateStats.PPCA] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -4520,6 +5247,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MultivariateStats.RidgeRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -4554,6 +5282,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MultivariateStats.KernelPCA] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -4588,6 +5317,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MultivariateStats.MultitargetRidgeRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -4622,6 +5352,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MultivariateStats.SubspaceLDA] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -4656,6 +5387,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MultivariateStats.BayesianLDA] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -4690,6 +5422,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MultivariateStats.PCA] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -4724,6 +5457,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [DecisionTree.AdaBoostStumpClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}`" @@ -4758,6 +5492,7 @@ ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [DecisionTree.DecisionTreeRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}`" @@ -4792,6 +5527,7 @@ ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [DecisionTree.DecisionTreeClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}`" @@ -4826,6 +5562,7 @@ ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [DecisionTree.RandomForestRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}`" @@ -4860,6 +5597,7 @@ ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [DecisionTree.RandomForestClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}`" @@ -4894,6 +5632,217 @@ ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" + +[MLJBalancing.BalancedModelDeterministic] +":input_scitype" = "`ScientificTypesBase.Unknown`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_pure_julia" = "`false`" +":package_name" = "MLJBalancing" +":package_license" = "MIT" +":load_path" = "MLJBalancing.BalancedModel" +":package_uuid" = "45f359ea-796d-4f51-95a5-deb1a414c586" +":package_url" = "https://github.com/JuliaAI/MLJBalancing.jl" +":is_wrapper" = "`true`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "```\nBalancedModel(; model=nothing, balancer1=balancer_model1, balancer2=balancer_model2, ...)\nBalancedModel(model; balancer1=balancer_model1, balancer2=balancer_model2, ...)\n```\n\nGiven a classification model, and one or more balancer models that all implement the `MLJModelInterface`, `BalancedModel` allows constructing a sequential pipeline that wraps an arbitrary number of balancing models and a classifier together in a sequential pipeline.\n\n# Operation\n\n * During training, data is first passed to `balancer1` and the result is passed to `balancer2` and so on, the result from the final balancer is then passed to the classifier for training.\n * During prediction, the balancers have no effect.\n\n# Arguments\n\n * `model::Supervised`: A classification model that implements the `MLJModelInterface`.\n * `balancer1::Static=...`: The first balancer model to pass the data to. This keyword argument can have any name.\n * `balancer2::Static=...`: The second balancer model to pass the data to. This keyword argument can have any name.\n * and so on for an arbitrary number of balancers.\n\n# Returns\n\n * An instance of type ProbabilisticBalancedModel or DeterministicBalancedModel, depending on the prediction type of model.\n\n# Example\n\n```julia\nusing MLJ\nusing Imbalance\n\n# generate data\nX, y = Imbalance.generate_imbalanced_data(1000, 5; class_probs=[0.2, 0.3, 0.5])\n\n# prepare classification and balancing models\nSMOTENC = @load SMOTENC pkg=Imbalance verbosity=0\nTomekUndersampler = @load TomekUndersampler pkg=Imbalance verbosity=0\nLogisticClassifier = @load LogisticClassifier pkg=MLJLinearModels verbosity=0\n\noversampler = SMOTENC(k=5, ratios=1.0, rng=42)\nundersampler = TomekUndersampler(min_ratios=0.5, rng=42)\nlogistic_model = LogisticClassifier()\n\n# wrap them in a BalancedModel\nbalanced_model = BalancedModel(model=logistic_model, balancer1=oversampler, balancer2=undersampler)\n\n# now this behaves as a unified model that can be trained, validated, fine-tuned, etc.\nmach = machine(balanced_model, X, y)\nfit!(mach)\n```\n" +":name" = "BalancedModelDeterministic" +":human_name" = "balanced model deterministic" +":is_supervised" = "`true`" +":prediction_type" = ":deterministic" +":abstract_type" = "`MLJModelInterface.Deterministic`" +":implemented_methods" = [":getproperty", ":setproperty!"] +":hyperparameters" = "`(:balancers, :model)`" +":hyperparameter_types" = "`(\"Any\", \"MLJModelInterface.Deterministic\")`" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" +":constructor" = "`BalancedModel`" + +[MLJBalancing.BalancedBaggingClassifier] +":input_scitype" = "`ScientificTypesBase.Unknown`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, AbstractVector{<:ScientificTypesBase.Finite}}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_pure_julia" = "`true`" +":package_name" = "MLJBalancing" +":package_license" = "unknown" +":load_path" = "MLJBalancing.BalancedBaggingClassifier" +":package_uuid" = "45f359ea-796d-4f51-95a5-deb1a414c586" +":package_url" = "https://github.com/JuliaAI/MLJBalancing.jl" +":is_wrapper" = "`true`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "```\nBalancedBaggingClassifier\n```\n\nA model type for constructing a balanced bagging classifier, based on [MLJBalancing.jl](https://github.com/JuliaAI/MLJBalancing).\n\nFrom MLJ, the type can be imported using\n\n`BalancedBaggingClassifier = @load BalancedBaggingClassifier pkg=MLJBalancing`\n\nConstruct an instance with default hyper-parameters using the syntax `bagging_model = BalancedBaggingClassifier(model=...)`\n\nGiven a probablistic classifier.`BalancedBaggingClassifier` performs bagging by undersampling only majority data in each bag so that its includes as much samples as in the minority data. This is proposed with an Adaboost classifier where the output scores are averaged in the paper Xu-Ying Liu, Jianxin Wu, & Zhi-Hua Zhou. (2009). Exploratory Undersampling for Class-Imbalance Learning. IEEE Transactions on Systems, Man, and Cybernetics, Part B (Cybernetics), 39 (2), 539–5501\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: input features of a form supported by the `model` being wrapped (typically a table, e.g., `DataFrame`, with `Continuous` columns will be supported, as a minimum)\n * `y`: the binary target, which can be any `AbstractVector` where `length(unique(y)) == 2`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `model::Probabilistic`: The classifier to use to train on each bag.\n * `T::Integer=0`: The number of bags to be used in the ensemble. If not given, will be set as the ratio between the frequency of the majority and minority classes. Can be later found in `report(mach)`.\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if Julia `VERSION>=1.7`. Otherwise, uses MersenneTwister`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given\n\nfeatures `Xnew` having the same scitype as `X` above. Predictions are probabilistic, but uncalibrated.\n\n * `predict_mode(mach, Xnew)`: return the mode of each prediction above\n\n# Example\n\n```julia\nusing MLJ\nusing Imbalance\n\n# Load base classifier and BalancedBaggingClassifier\nBalancedBaggingClassifier = @load BalancedBaggingClassifier pkg=MLJBalancing\nLogisticClassifier = @load LogisticClassifier pkg=MLJLinearModels verbosity=0\n\n# Construct the base classifier and use it to construct a BalancedBaggingClassifier\nlogistic_model = LogisticClassifier()\nmodel = BalancedBaggingClassifier(model=logistic_model, T=5)\n\n# Load the data and train the BalancedBaggingClassifier\nX, y = Imbalance.generate_imbalanced_data(100, 5; num_vals_per_category = [3, 2],\n class_probs = [0.9, 0.1],\n type = \"ColTable\",\n rng=42)\njulia> Imbalance.checkbalance(y)\n1: ▇▇▇▇▇▇▇▇▇▇ 16 (19.0%)\n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 84 (100.0%)\n\nmach = machine(model, X, y) |> fit!\n\n# Predict using the trained model\n\nyhat = predict(mach, X) # probabilistic predictions\npredict_mode(mach, X) # point predictions\n```\n" +":name" = "BalancedBaggingClassifier" +":human_name" = "balanced bagging classifier" +":is_supervised" = "`true`" +":prediction_type" = ":probabilistic" +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":implemented_methods" = [] +":hyperparameters" = "`(:model, :T, :rng)`" +":hyperparameter_types" = "`(\"MLJModelInterface.Probabilistic\", \"Integer\", \"Union{Integer, Random.AbstractRNG}\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" +":constructor" = "`MLJBalancing.BalancedBaggingClassifier`" + +[MLJBalancing.BalancedModelInterval] +":input_scitype" = "`ScientificTypesBase.Unknown`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_pure_julia" = "`false`" +":package_name" = "MLJBalancing" +":package_license" = "MIT" +":load_path" = "MLJBalancing.BalancedModel" +":package_uuid" = "45f359ea-796d-4f51-95a5-deb1a414c586" +":package_url" = "https://github.com/JuliaAI/MLJBalancing.jl" +":is_wrapper" = "`true`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "```\nBalancedModel(; model=nothing, balancer1=balancer_model1, balancer2=balancer_model2, ...)\nBalancedModel(model; balancer1=balancer_model1, balancer2=balancer_model2, ...)\n```\n\nGiven a classification model, and one or more balancer models that all implement the `MLJModelInterface`, `BalancedModel` allows constructing a sequential pipeline that wraps an arbitrary number of balancing models and a classifier together in a sequential pipeline.\n\n# Operation\n\n * During training, data is first passed to `balancer1` and the result is passed to `balancer2` and so on, the result from the final balancer is then passed to the classifier for training.\n * During prediction, the balancers have no effect.\n\n# Arguments\n\n * `model::Supervised`: A classification model that implements the `MLJModelInterface`.\n * `balancer1::Static=...`: The first balancer model to pass the data to. This keyword argument can have any name.\n * `balancer2::Static=...`: The second balancer model to pass the data to. This keyword argument can have any name.\n * and so on for an arbitrary number of balancers.\n\n# Returns\n\n * An instance of type ProbabilisticBalancedModel or DeterministicBalancedModel, depending on the prediction type of model.\n\n# Example\n\n```julia\nusing MLJ\nusing Imbalance\n\n# generate data\nX, y = Imbalance.generate_imbalanced_data(1000, 5; class_probs=[0.2, 0.3, 0.5])\n\n# prepare classification and balancing models\nSMOTENC = @load SMOTENC pkg=Imbalance verbosity=0\nTomekUndersampler = @load TomekUndersampler pkg=Imbalance verbosity=0\nLogisticClassifier = @load LogisticClassifier pkg=MLJLinearModels verbosity=0\n\noversampler = SMOTENC(k=5, ratios=1.0, rng=42)\nundersampler = TomekUndersampler(min_ratios=0.5, rng=42)\nlogistic_model = LogisticClassifier()\n\n# wrap them in a BalancedModel\nbalanced_model = BalancedModel(model=logistic_model, balancer1=oversampler, balancer2=undersampler)\n\n# now this behaves as a unified model that can be trained, validated, fine-tuned, etc.\nmach = machine(balanced_model, X, y)\nfit!(mach)\n```\n" +":name" = "BalancedModelInterval" +":human_name" = "balanced model interval" +":is_supervised" = "`true`" +":prediction_type" = ":interval" +":abstract_type" = "`MLJModelInterface.Interval`" +":implemented_methods" = [":getproperty", ":setproperty!"] +":hyperparameters" = "`(:balancers, :model)`" +":hyperparameter_types" = "`(\"Any\", \"MLJModelInterface.Interval\")`" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" +":constructor" = "`BalancedModel`" + +[MLJBalancing.BalancedModelProbabilistic] +":input_scitype" = "`ScientificTypesBase.Unknown`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_pure_julia" = "`false`" +":package_name" = "MLJBalancing" +":package_license" = "MIT" +":load_path" = "MLJBalancing.BalancedModel" +":package_uuid" = "45f359ea-796d-4f51-95a5-deb1a414c586" +":package_url" = "https://github.com/JuliaAI/MLJBalancing.jl" +":is_wrapper" = "`true`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "```\nBalancedModel(; model=nothing, balancer1=balancer_model1, balancer2=balancer_model2, ...)\nBalancedModel(model; balancer1=balancer_model1, balancer2=balancer_model2, ...)\n```\n\nGiven a classification model, and one or more balancer models that all implement the `MLJModelInterface`, `BalancedModel` allows constructing a sequential pipeline that wraps an arbitrary number of balancing models and a classifier together in a sequential pipeline.\n\n# Operation\n\n * During training, data is first passed to `balancer1` and the result is passed to `balancer2` and so on, the result from the final balancer is then passed to the classifier for training.\n * During prediction, the balancers have no effect.\n\n# Arguments\n\n * `model::Supervised`: A classification model that implements the `MLJModelInterface`.\n * `balancer1::Static=...`: The first balancer model to pass the data to. This keyword argument can have any name.\n * `balancer2::Static=...`: The second balancer model to pass the data to. This keyword argument can have any name.\n * and so on for an arbitrary number of balancers.\n\n# Returns\n\n * An instance of type ProbabilisticBalancedModel or DeterministicBalancedModel, depending on the prediction type of model.\n\n# Example\n\n```julia\nusing MLJ\nusing Imbalance\n\n# generate data\nX, y = Imbalance.generate_imbalanced_data(1000, 5; class_probs=[0.2, 0.3, 0.5])\n\n# prepare classification and balancing models\nSMOTENC = @load SMOTENC pkg=Imbalance verbosity=0\nTomekUndersampler = @load TomekUndersampler pkg=Imbalance verbosity=0\nLogisticClassifier = @load LogisticClassifier pkg=MLJLinearModels verbosity=0\n\noversampler = SMOTENC(k=5, ratios=1.0, rng=42)\nundersampler = TomekUndersampler(min_ratios=0.5, rng=42)\nlogistic_model = LogisticClassifier()\n\n# wrap them in a BalancedModel\nbalanced_model = BalancedModel(model=logistic_model, balancer1=oversampler, balancer2=undersampler)\n\n# now this behaves as a unified model that can be trained, validated, fine-tuned, etc.\nmach = machine(balanced_model, X, y)\nfit!(mach)\n```\n" +":name" = "BalancedModelProbabilistic" +":human_name" = "balanced model probabilistic" +":is_supervised" = "`true`" +":prediction_type" = ":probabilistic" +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":implemented_methods" = [":getproperty", ":propertynames", ":setproperty!"] +":hyperparameters" = "`(:balancers, :model)`" +":hyperparameter_types" = "`(\"Any\", \"MLJModelInterface.Probabilistic\")`" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" +":constructor" = "`BalancedModel`" + +[MLJTuning.DeterministicTunedModel] +":input_scitype" = "`ScientificTypesBase.Unknown`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_pure_julia" = "`false`" +":package_name" = "MLJTuning" +":package_license" = "MIT" +":load_path" = "MLJTuning.TunedModel" +":package_uuid" = "03970b2e-30c4-11ea-3135-d1576263f10f" +":package_url" = "https://github.com/alan-turing-institute/MLJTuning.jl" +":is_wrapper" = "`true`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "```\ntuned_model = TunedModel(; model=,\n tuning=RandomSearch(),\n resampling=Holdout(),\n range=nothing,\n measure=nothing,\n n=default_n(tuning, range),\n operation=nothing,\n other_options...)\n```\n\nConstruct a model wrapper for hyper-parameter optimization of a supervised learner, specifying the `tuning` strategy and `model` whose hyper-parameters are to be mutated.\n\n```\ntuned_model = TunedModel(; models=,\n resampling=Holdout(),\n measure=nothing,\n n=length(models),\n operation=nothing,\n other_options...)\n```\n\nConstruct a wrapper for multiple `models`, for selection of an optimal one (equivalent to specifying `tuning=Explicit()` and `range=models` above). Elements of the iterator `models` need not have a common type, but they must all be `Deterministic` or all be `Probabilistic` *and this is not checked* but inferred from the first element generated.\n\nSee below for a complete list of options.\n\n### Training\n\nCalling `fit!(mach)` on a machine `mach=machine(tuned_model, X, y)` or `mach=machine(tuned_model, X, y, w)` will:\n\n * Instigate a search, over clones of `model`, with the hyperparameter mutations specified by `range`, for a model optimizing the specified `measure`, using performance evaluations carried out using the specified `tuning` strategy and `resampling` strategy. In the case `models` is explictly listed, the search is instead over the models generated by the iterator `models`.\n * Fit an internal machine, based on the optimal model `fitted_params(mach).best_model`, wrapping the optimal `model` object in *all* the provided data `X`, `y`(, `w`). Calling `predict(mach, Xnew)` then returns predictions on `Xnew` of this internal machine. The final train can be supressed by setting `train_best=false`.\n\n### Search space\n\nThe `range` objects supported depend on the `tuning` strategy specified. Query the `strategy` docstring for details. To optimize over an explicit list `v` of models of the same type, use `strategy=Explicit()` and specify `model=v[1]` and `range=v`.\n\nThe number of models searched is specified by `n`. If unspecified, then `MLJTuning.default_n(tuning, range)` is used. When `n` is increased and `fit!(mach)` called again, the old search history is re-instated and the search continues where it left off.\n\n### Measures (metrics)\n\nIf more than one `measure` is specified, then only the first is optimized (unless `strategy` is multi-objective) but the performance against every measure specified will be computed and reported in `report(mach).best_performance` and other relevant attributes of the generated report. Options exist to pass per-observation weights or class weights to measures; see below.\n\n*Important.* If a custom measure, `my_measure` is used, and the measure is a score, rather than a loss, be sure to check that `MLJ.orientation(my_measure) == :score` to ensure maximization of the measure, rather than minimization. Override an incorrect value with `MLJ.orientation(::typeof(my_measure)) = :score`.\n\n### Accessing the fitted parameters and other training (tuning) outcomes\n\nA Plots.jl plot of performance estimates is returned by `plot(mach)` or `heatmap(mach)`.\n\nOnce a tuning machine `mach` has bee trained as above, then `fitted_params(mach)` has these keys/values:\n\n| key | value |\n| --------------------:| ---------------------------------------:|\n| `best_model` | optimal model instance |\n| `best_fitted_params` | learned parameters of the optimal model |\n\nThe named tuple `report(mach)` includes these keys/values:\n\n| key | value |\n| --------------------:| ------------------------------------------------------------------:|\n| `best_model` | optimal model instance |\n| `best_history_entry` | corresponding entry in the history, including performance estimate |\n| `best_report` | report generated by fitting the optimal model to all data |\n| `history` | tuning strategy-specific history of all evaluations |\n\nplus other key/value pairs specific to the `tuning` strategy.\n\nEach element of `history` is a property-accessible object with these properties:\n\n| key | value |\n| -------------:| -----------------------------------------------------------------:|\n| `measure` | vector of measures (metrics) |\n| `measurement` | vector of measurements, one per measure |\n| `per_fold` | vector of vectors of unaggregated per-fold measurements |\n| `evaluation` | full `PerformanceEvaluation`/`CompactPerformaceEvaluation` object |\n\n### Complete list of key-word options\n\n * `model`: `Supervised` model prototype that is cloned and mutated to generate models for evaluation\n * `models`: Alternatively, an iterator of MLJ models to be explicitly evaluated. These may have varying types.\n * `tuning=RandomSearch()`: tuning strategy to be applied (eg, `Grid()`). See the [Tuning Models](https://alan-turing-institute.github.io/MLJ.jl/dev/tuning_models/#Tuning-Models) section of the MLJ manual for a complete list of options.\n * `resampling=Holdout()`: resampling strategy (eg, `Holdout()`, `CV()`), `StratifiedCV()`) to be applied in performance evaluations\n * `measure`: measure or measures to be applied in performance evaluations; only the first used in optimization (unless the strategy is multi-objective) but all reported to the history\n * `weights`: per-observation weights to be passed the measure(s) in performance evaluations, where supported. Check support with `supports_weights(measure)`.\n * `class_weights`: class weights to be passed the measure(s) in performance evaluations, where supported. Check support with `supports_class_weights(measure)`.\n * `repeats=1`: for generating train/test sets multiple times in resampling (\"Monte Carlo\" resampling); see [`evaluate!`](@ref) for details\n * `operation`/`operations` - One of `predict`, `predict_mean`, `predict_mode`, `predict_median`, or `predict_joint`, or a vector of these of the same length as `measure`/`measures`. Automatically inferred if left unspecified.\n * `range`: range object; tuning strategy documentation describes supported types\n * `selection_heuristic`: the rule determining how the best model is decided. According to the default heuristic, `NaiveSelection()`, `measure` (or the first element of `measure`) is evaluated for each resample and these per-fold measurements are aggregrated. The model with the lowest (resp. highest) aggregate is chosen if the measure is a `:loss` (resp. a `:score`).\n * `n`: number of iterations (ie, models to be evaluated); set by tuning strategy if left unspecified\n * `train_best=true`: whether to train the optimal model\n * `acceleration=default_resource()`: mode of parallelization for tuning strategies that support this\n * `acceleration_resampling=CPU1()`: mode of parallelization for resampling\n * `check_measure=true`: whether to check `measure` is compatible with the specified `model` and `operation`)\n * `cache=true`: whether to cache model-specific representations of user-suplied data; set to `false` to conserve memory. Speed gains likely limited to the case `resampling isa Holdout`.\n * `compact_history=true`: whether to write `CompactPerformanceEvaluation`](@ref) or regular [`PerformanceEvaluation`](@ref) objects to the history (accessed via the `:evaluation` key); the compact form excludes some fields to conserve memory.\n" +":name" = "DeterministicTunedModel" +":human_name" = "deterministic tuned model" +":is_supervised" = "`true`" +":prediction_type" = ":deterministic" +":abstract_type" = "`MLJModelInterface.Deterministic`" +":implemented_methods" = [] +":hyperparameters" = "`(:model, :tuning, :resampling, :measure, :weights, :class_weights, :operation, :range, :selection_heuristic, :train_best, :repeats, :n, :acceleration, :acceleration_resampling, :check_measure, :cache, :compact_history, :logger)`" +":hyperparameter_types" = "`(\"Union{MLJModelInterface.Deterministic, MLJModelInterface.DeterministicSupervisedDetector, MLJModelInterface.DeterministicUnsupervisedDetector}\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, AbstractVector{<:Real}}\", \"Union{Nothing, AbstractDict}\", \"Any\", \"Any\", \"Any\", \"Bool\", \"Int64\", \"Union{Nothing, Int64}\", \"ComputationalResources.AbstractResource\", \"ComputationalResources.AbstractResource\", \"Bool\", \"Bool\", \"Bool\", \"Any\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":iteration_parameter" = ":n" +":supports_training_losses" = "`true`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`TunedModel`" + +[MLJTuning.ProbabilisticTunedModel] +":input_scitype" = "`ScientificTypesBase.Unknown`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_pure_julia" = "`false`" +":package_name" = "MLJTuning" +":package_license" = "MIT" +":load_path" = "MLJTuning.TunedModel" +":package_uuid" = "03970b2e-30c4-11ea-3135-d1576263f10f" +":package_url" = "https://github.com/alan-turing-institute/MLJTuning.jl" +":is_wrapper" = "`true`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "```\ntuned_model = TunedModel(; model=,\n tuning=RandomSearch(),\n resampling=Holdout(),\n range=nothing,\n measure=nothing,\n n=default_n(tuning, range),\n operation=nothing,\n other_options...)\n```\n\nConstruct a model wrapper for hyper-parameter optimization of a supervised learner, specifying the `tuning` strategy and `model` whose hyper-parameters are to be mutated.\n\n```\ntuned_model = TunedModel(; models=,\n resampling=Holdout(),\n measure=nothing,\n n=length(models),\n operation=nothing,\n other_options...)\n```\n\nConstruct a wrapper for multiple `models`, for selection of an optimal one (equivalent to specifying `tuning=Explicit()` and `range=models` above). Elements of the iterator `models` need not have a common type, but they must all be `Deterministic` or all be `Probabilistic` *and this is not checked* but inferred from the first element generated.\n\nSee below for a complete list of options.\n\n### Training\n\nCalling `fit!(mach)` on a machine `mach=machine(tuned_model, X, y)` or `mach=machine(tuned_model, X, y, w)` will:\n\n * Instigate a search, over clones of `model`, with the hyperparameter mutations specified by `range`, for a model optimizing the specified `measure`, using performance evaluations carried out using the specified `tuning` strategy and `resampling` strategy. In the case `models` is explictly listed, the search is instead over the models generated by the iterator `models`.\n * Fit an internal machine, based on the optimal model `fitted_params(mach).best_model`, wrapping the optimal `model` object in *all* the provided data `X`, `y`(, `w`). Calling `predict(mach, Xnew)` then returns predictions on `Xnew` of this internal machine. The final train can be supressed by setting `train_best=false`.\n\n### Search space\n\nThe `range` objects supported depend on the `tuning` strategy specified. Query the `strategy` docstring for details. To optimize over an explicit list `v` of models of the same type, use `strategy=Explicit()` and specify `model=v[1]` and `range=v`.\n\nThe number of models searched is specified by `n`. If unspecified, then `MLJTuning.default_n(tuning, range)` is used. When `n` is increased and `fit!(mach)` called again, the old search history is re-instated and the search continues where it left off.\n\n### Measures (metrics)\n\nIf more than one `measure` is specified, then only the first is optimized (unless `strategy` is multi-objective) but the performance against every measure specified will be computed and reported in `report(mach).best_performance` and other relevant attributes of the generated report. Options exist to pass per-observation weights or class weights to measures; see below.\n\n*Important.* If a custom measure, `my_measure` is used, and the measure is a score, rather than a loss, be sure to check that `MLJ.orientation(my_measure) == :score` to ensure maximization of the measure, rather than minimization. Override an incorrect value with `MLJ.orientation(::typeof(my_measure)) = :score`.\n\n### Accessing the fitted parameters and other training (tuning) outcomes\n\nA Plots.jl plot of performance estimates is returned by `plot(mach)` or `heatmap(mach)`.\n\nOnce a tuning machine `mach` has bee trained as above, then `fitted_params(mach)` has these keys/values:\n\n| key | value |\n| --------------------:| ---------------------------------------:|\n| `best_model` | optimal model instance |\n| `best_fitted_params` | learned parameters of the optimal model |\n\nThe named tuple `report(mach)` includes these keys/values:\n\n| key | value |\n| --------------------:| ------------------------------------------------------------------:|\n| `best_model` | optimal model instance |\n| `best_history_entry` | corresponding entry in the history, including performance estimate |\n| `best_report` | report generated by fitting the optimal model to all data |\n| `history` | tuning strategy-specific history of all evaluations |\n\nplus other key/value pairs specific to the `tuning` strategy.\n\nEach element of `history` is a property-accessible object with these properties:\n\n| key | value |\n| -------------:| -----------------------------------------------------------------:|\n| `measure` | vector of measures (metrics) |\n| `measurement` | vector of measurements, one per measure |\n| `per_fold` | vector of vectors of unaggregated per-fold measurements |\n| `evaluation` | full `PerformanceEvaluation`/`CompactPerformaceEvaluation` object |\n\n### Complete list of key-word options\n\n * `model`: `Supervised` model prototype that is cloned and mutated to generate models for evaluation\n * `models`: Alternatively, an iterator of MLJ models to be explicitly evaluated. These may have varying types.\n * `tuning=RandomSearch()`: tuning strategy to be applied (eg, `Grid()`). See the [Tuning Models](https://alan-turing-institute.github.io/MLJ.jl/dev/tuning_models/#Tuning-Models) section of the MLJ manual for a complete list of options.\n * `resampling=Holdout()`: resampling strategy (eg, `Holdout()`, `CV()`), `StratifiedCV()`) to be applied in performance evaluations\n * `measure`: measure or measures to be applied in performance evaluations; only the first used in optimization (unless the strategy is multi-objective) but all reported to the history\n * `weights`: per-observation weights to be passed the measure(s) in performance evaluations, where supported. Check support with `supports_weights(measure)`.\n * `class_weights`: class weights to be passed the measure(s) in performance evaluations, where supported. Check support with `supports_class_weights(measure)`.\n * `repeats=1`: for generating train/test sets multiple times in resampling (\"Monte Carlo\" resampling); see [`evaluate!`](@ref) for details\n * `operation`/`operations` - One of `predict`, `predict_mean`, `predict_mode`, `predict_median`, or `predict_joint`, or a vector of these of the same length as `measure`/`measures`. Automatically inferred if left unspecified.\n * `range`: range object; tuning strategy documentation describes supported types\n * `selection_heuristic`: the rule determining how the best model is decided. According to the default heuristic, `NaiveSelection()`, `measure` (or the first element of `measure`) is evaluated for each resample and these per-fold measurements are aggregrated. The model with the lowest (resp. highest) aggregate is chosen if the measure is a `:loss` (resp. a `:score`).\n * `n`: number of iterations (ie, models to be evaluated); set by tuning strategy if left unspecified\n * `train_best=true`: whether to train the optimal model\n * `acceleration=default_resource()`: mode of parallelization for tuning strategies that support this\n * `acceleration_resampling=CPU1()`: mode of parallelization for resampling\n * `check_measure=true`: whether to check `measure` is compatible with the specified `model` and `operation`)\n * `cache=true`: whether to cache model-specific representations of user-suplied data; set to `false` to conserve memory. Speed gains likely limited to the case `resampling isa Holdout`.\n * `compact_history=true`: whether to write `CompactPerformanceEvaluation`](@ref) or regular [`PerformanceEvaluation`](@ref) objects to the history (accessed via the `:evaluation` key); the compact form excludes some fields to conserve memory.\n" +":name" = "ProbabilisticTunedModel" +":human_name" = "probabilistic tuned model" +":is_supervised" = "`true`" +":prediction_type" = ":probabilistic" +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":implemented_methods" = [] +":hyperparameters" = "`(:model, :tuning, :resampling, :measure, :weights, :class_weights, :operation, :range, :selection_heuristic, :train_best, :repeats, :n, :acceleration, :acceleration_resampling, :check_measure, :cache, :compact_history, :logger)`" +":hyperparameter_types" = "`(\"Union{MLJModelInterface.Probabilistic, MLJModelInterface.ProbabilisticSupervisedDetector, MLJModelInterface.ProbabilisticUnsupervisedDetector}\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, AbstractVector{<:Real}}\", \"Union{Nothing, AbstractDict}\", \"Any\", \"Any\", \"Any\", \"Bool\", \"Int64\", \"Union{Nothing, Int64}\", \"ComputationalResources.AbstractResource\", \"ComputationalResources.AbstractResource\", \"Bool\", \"Bool\", \"Bool\", \"Any\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":iteration_parameter" = ":n" +":supports_training_losses" = "`true`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`TunedModel`" [Imbalance.RandomOversampler] ":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Infinite}}, AbstractVector}`" @@ -4928,6 +5877,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [Imbalance.SMOTENC] ":input_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractVector}`" @@ -4962,6 +5912,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [Imbalance.TomekUndersampler] ":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" @@ -4996,6 +5947,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [Imbalance.ClusterUndersampler] ":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" @@ -5030,6 +5982,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [Imbalance.SMOTE] ":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" @@ -5064,6 +6017,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [Imbalance.SMOTEN] ":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractMatrix{<:ScientificTypesBase.Finite}}, AbstractVector}`" @@ -5098,6 +6052,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [Imbalance.ROSE] ":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" @@ -5132,6 +6087,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [Imbalance.RandomUndersampler] ":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Infinite}}, AbstractVector}`" @@ -5166,6 +6122,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [Imbalance.ENNUndersampler] ":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" @@ -5200,6 +6157,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [Imbalance.BorderlineSMOTE1] ":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" @@ -5234,6 +6192,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [Imbalance.RandomWalkOversampler] ":input_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractVector}`" @@ -5268,6 +6227,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [Clustering.HierarchicalClustering] ":input_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" @@ -5302,6 +6262,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`(:predict,)`" +":constructor" = "`nothing`" [Clustering.DBSCAN] ":input_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" @@ -5336,6 +6297,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`(:predict,)`" +":constructor" = "`nothing`" [Clustering.KMeans] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -5370,6 +6332,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [Clustering.KMedoids] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -5404,6 +6367,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [EvoLinear.EvoSplineRegressor] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" @@ -5438,6 +6402,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [EvoLinear.EvoLinearRegressor] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" @@ -5472,6 +6437,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [XGBoost.XGBoostCount] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -5506,6 +6472,7 @@ ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [XGBoost.XGBoostRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -5540,6 +6507,7 @@ ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [XGBoost.XGBoostClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -5574,6 +6542,7 @@ ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [LightGBM.LGBMClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -5608,6 +6577,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [LightGBM.LGBMRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -5642,6 +6612,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [SymbolicRegression.MultitargetSRRegressor] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -5661,21 +6632,22 @@ ":supports_weights" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nMultitargetSRRegressor\n```\n\nA model type for constructing a Multi-Target Symbolic Regression via Evolutionary Search, based on [SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultitargetSRRegressor = @load MultitargetSRRegressor pkg=SymbolicRegression\n```\n\nDo `model = MultitargetSRRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetSRRegressor(binary_operators=...)`.\n\nMulti-target Symbolic Regression regressor (`MultitargetSRRegressor`) conducts several searches for expressions that predict each target variable from a set of input variables. All data is assumed to be `Continuous`. The search is performed using an evolutionary algorithm. This algorithm is described in the paper https://arxiv.org/abs/2305.01582.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nOR\n\n```\nmach = machine(model, X, y, w)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype\n\n`Continuous`; check column scitypes with `schema(X)`. Variable names in discovered expressions will be taken from the column names of `X`, if available. Units in columns of `X` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n\n * `y` is the target, which can be any table of target variables whose element scitype is `Continuous`; check the scitype with `schema(y)`. Units in columns of `y` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n * `w` is the observation weights which can either be `nothing` (default) or an `AbstractVector` whoose element scitype is `Count` or `Continuous`. The same weights are used for all targets.\n\nTrain the machine using `fit!(mach)`, inspect the discovered expressions with `report(mach)`, and predict on new data with `predict(mach, Xnew)`. Note that unlike other regressors, symbolic regression stores a list of lists of trained models. The models chosen from each of these lists is defined by the function `selection_method` keyword argument, which by default balances accuracy and complexity. You can override this at prediction time by passing a named tuple with keys `data` and `idx`.\n\n# Hyper-parameters\n\n * `binary_operators`: Vector of binary operators (functions) to use. Each operator should be defined for two input scalars, and one output scalar. All operators need to be defined over the entire real line (excluding infinity - these are stopped before they are input), or return `NaN` where not defined. For speed, define it so it takes two reals of the same type as input, and outputs the same type. For the SymbolicUtils simplification backend, you will need to define a generic method of the operator so it takes arbitrary types.\n * `unary_operators`: Same, but for unary operators (one input scalar, gives an output scalar).\n * `constraints`: Array of pairs specifying size constraints for each operator. The constraints for a binary operator should be a 2-tuple (e.g., `(-1, -1)`) and the constraints for a unary operator should be an `Int`. A size constraint is a limit to the size of the subtree in each argument of an operator. e.g., `[(^)=>(-1, 3)]` means that the `^` operator can have arbitrary size (`-1`) in its left argument, but a maximum size of `3` in its right argument. Default is no constraints.\n * `batching`: Whether to evolve based on small mini-batches of data, rather than the entire dataset.\n * `batch_size`: What batch size to use if using batching.\n * `elementwise_loss`: What elementwise loss function to use. Can be one of the following losses, or any other loss of type `SupervisedLoss`. You can also pass a function that takes a scalar target (left argument), and scalar predicted (right argument), and returns a scalar. This will be averaged over the predicted data. If weights are supplied, your function should take a third argument for the weight scalar. Included losses: Regression: - `LPDistLoss{P}()`, - `L1DistLoss()`, - `L2DistLoss()` (mean square), - `LogitDistLoss()`, - `HuberLoss(d)`, - `L1EpsilonInsLoss(ϵ)`, - `L2EpsilonInsLoss(ϵ)`, - `PeriodicLoss(c)`, - `QuantileLoss(τ)`, Classification: - `ZeroOneLoss()`, - `PerceptronLoss()`, - `L1HingeLoss()`, - `SmoothedL1HingeLoss(γ)`, - `ModifiedHuberLoss()`, - `L2MarginLoss()`, - `ExpLoss()`, - `SigmoidLoss()`, - `DWDMarginLoss(q)`.\n * `loss_function`: Alternatively, you may redefine the loss used as any function of `tree::AbstractExpressionNode{T}`, `dataset::Dataset{T}`, and `options::Options`, so long as you output a non-negative scalar of type `T`. This is useful if you want to use a loss that takes into account derivatives, or correlations across the dataset. This also means you could use a custom evaluation for a particular expression. If you are using `batching=true`, then your function should accept a fourth argument `idx`, which is either `nothing` (indicating that the full dataset should be used), or a vector of indices to use for the batch. For example,\n\n ```\n function my_loss(tree, dataset::Dataset{T,L}, options)::L where {T,L}\n prediction, flag = eval_tree_array(tree, dataset.X, options)\n if !flag\n return L(Inf)\n end\n return sum((prediction .- dataset.y) .^ 2) / dataset.n\n end\n ```\n * `node_type::Type{N}=Node`: The type of node to use for the search. For example, `Node` or `GraphNode`.\n * `populations`: How many populations of equations to use.\n * `population_size`: How many equations in each population.\n * `ncycles_per_iteration`: How many generations to consider per iteration.\n * `tournament_selection_n`: Number of expressions considered in each tournament.\n * `tournament_selection_p`: The fittest expression in a tournament is to be selected with probability `p`, the next fittest with probability `p*(1-p)`, and so forth.\n * `topn`: Number of equations to return to the host process, and to consider for the hall of fame.\n * `complexity_of_operators`: What complexity should be assigned to each operator, and the occurrence of a constant or variable. By default, this is 1 for all operators. Can be a real number as well, in which case the complexity of an expression will be rounded to the nearest integer. Input this in the form of, e.g., [(^) => 3, sin => 2].\n * `complexity_of_constants`: What complexity should be assigned to use of a constant. By default, this is 1.\n * `complexity_of_variables`: What complexity should be assigned to each variable. By default, this is 1.\n * `alpha`: The probability of accepting an equation mutation during regularized evolution is given by exp(-delta_loss/(alpha * T)), where T goes from 1 to 0. Thus, alpha=infinite is the same as no annealing.\n * `maxsize`: Maximum size of equations during the search.\n * `maxdepth`: Maximum depth of equations during the search, by default this is set equal to the maxsize.\n * `parsimony`: A multiplicative factor for how much complexity is punished.\n * `dimensional_constraint_penalty`: An additive factor if the dimensional constraint is violated.\n * `use_frequency`: Whether to use a parsimony that adapts to the relative proportion of equations at each complexity; this will ensure that there are a balanced number of equations considered for every complexity.\n * `use_frequency_in_tournament`: Whether to use the adaptive parsimony described above inside the score, rather than just at the mutation accept/reject stage.\n * `adaptive_parsimony_scaling`: How much to scale the adaptive parsimony term in the loss. Increase this if the search is spending too much time optimizing the most complex equations.\n * `turbo`: Whether to use `LoopVectorization.@turbo` to evaluate expressions. This can be significantly faster, but is only compatible with certain operators. *Experimental!*\n * `bumper`: Whether to use Bumper.jl for faster evaluation. *Experimental!*\n * `migration`: Whether to migrate equations between processes.\n * `hof_migration`: Whether to migrate equations from the hall of fame to processes.\n * `fraction_replaced`: What fraction of each population to replace with migrated equations at the end of each cycle.\n * `fraction_replaced_hof`: What fraction to replace with hall of fame equations at the end of each cycle.\n * `should_simplify`: Whether to simplify equations. If you pass a custom objective, this will be set to `false`.\n * `should_optimize_constants`: Whether to use an optimization algorithm to periodically optimize constants in equations.\n * `optimizer_algorithm`: Select algorithm to use for optimizing constants. Default is `Optim.BFGS(linesearch=LineSearches.BackTracking())`.\n * `optimizer_nrestarts`: How many different random starting positions to consider for optimization of constants.\n * `optimizer_probability`: Probability of performing optimization of constants at the end of a given iteration.\n * `optimizer_iterations`: How many optimization iterations to perform. This gets passed to `Optim.Options` as `iterations`. The default is 8.\n * `optimizer_f_calls_limit`: How many function calls to allow during optimization. This gets passed to `Optim.Options` as `f_calls_limit`. The default is `0` which means no limit.\n * `optimizer_options`: General options for the constant optimization. For details we refer to the documentation on `Optim.Options` from the `Optim.jl` package. Options can be provided here as `NamedTuple`, e.g. `(iterations=16,)`, as a `Dict`, e.g. Dict(:x_tol => 1.0e-32,), or as an `Optim.Options` instance.\n * `output_file`: What file to store equations to, as a backup.\n * `perturbation_factor`: When mutating a constant, either multiply or divide by (1+perturbation_factor)^(rand()+1).\n * `probability_negate_constant`: Probability of negating a constant in the equation when mutating it.\n * `mutation_weights`: Relative probabilities of the mutations. The struct `MutationWeights` should be passed to these options. See its documentation on `MutationWeights` for the different weights.\n * `crossover_probability`: Probability of performing crossover.\n * `annealing`: Whether to use simulated annealing.\n * `warmup_maxsize_by`: Whether to slowly increase the max size from 5 up to `maxsize`. If nonzero, specifies the fraction through the search at which the maxsize should be reached.\n * `verbosity`: Whether to print debugging statements or not.\n * `print_precision`: How many digits to print when printing equations. By default, this is 5.\n * `save_to_file`: Whether to save equations to a file during the search.\n * `bin_constraints`: See `constraints`. This is the same, but specified for binary operators only (for example, if you have an operator that is both a binary and unary operator).\n * `una_constraints`: Likewise, for unary operators.\n * `seed`: What random seed to use. `nothing` uses no seed.\n * `progress`: Whether to use a progress bar output (`verbosity` will have no effect).\n * `early_stop_condition`: Float - whether to stop early if the mean loss gets below this value. Function - a function taking (loss, complexity) as arguments and returning true or false.\n * `timeout_in_seconds`: Float64 - the time in seconds after which to exit (as an alternative to the number of iterations).\n * `max_evals`: Int (or Nothing) - the maximum number of evaluations of expressions to perform.\n * `skip_mutation_failures`: Whether to simply skip over mutations that fail or are rejected, rather than to replace the mutated expression with the original expression and proceed normally.\n * `nested_constraints`: Specifies how many times a combination of operators can be nested. For example, `[sin => [cos => 0], cos => [cos => 2]]` specifies that `cos` may never appear within a `sin`, but `sin` can be nested with itself an unlimited number of times. The second term specifies that `cos` can be nested up to 2 times within a `cos`, so that `cos(cos(cos(x)))` is allowed (as well as any combination of `+` or `-` within it), but `cos(cos(cos(cos(x))))` is not allowed. When an operator is not specified, it is assumed that it can be nested an unlimited number of times. This requires that there is no operator which is used both in the unary operators and the binary operators (e.g., `-` could be both subtract, and negation). For binary operators, both arguments are treated the same way, and the max of each argument is constrained.\n * `deterministic`: Use a global counter for the birth time, rather than calls to `time()`. This gives perfect resolution, and is therefore deterministic. However, it is not thread safe, and must be used in serial mode.\n * `define_helper_functions`: Whether to define helper functions for constructing and evaluating trees.\n * `niterations::Int=10`: The number of iterations to perform the search. More iterations will improve the results.\n * `parallelism=:multithreading`: What parallelism mode to use. The options are `:multithreading`, `:multiprocessing`, and `:serial`. By default, multithreading will be used. Multithreading uses less memory, but multiprocessing can handle multi-node compute. If using `:multithreading` mode, the number of threads available to julia are used. If using `:multiprocessing`, `numprocs` processes will be created dynamically if `procs` is unset. If you have already allocated processes, pass them to the `procs` argument and they will be used. You may also pass a string instead of a symbol, like `\"multithreading\"`.\n * `numprocs::Union{Int, Nothing}=nothing`: The number of processes to use, if you want `equation_search` to set this up automatically. By default this will be `4`, but can be any number (you should pick a number <= the number of cores available).\n * `procs::Union{Vector{Int}, Nothing}=nothing`: If you have set up a distributed run manually with `procs = addprocs()` and `@everywhere`, pass the `procs` to this keyword argument.\n * `addprocs_function::Union{Function, Nothing}=nothing`: If using multiprocessing (`parallelism=:multithreading`), and are not passing `procs` manually, then they will be allocated dynamically using `addprocs`. However, you may also pass a custom function to use instead of `addprocs`. This function should take a single positional argument, which is the number of processes to use, as well as the `lazy` keyword argument. For example, if set up on a slurm cluster, you could pass `addprocs_function = addprocs_slurm`, which will set up slurm processes.\n * `heap_size_hint_in_bytes::Union{Int,Nothing}=nothing`: On Julia 1.9+, you may set the `--heap-size-hint` flag on Julia processes, recommending garbage collection once a process is close to the recommended size. This is important for long-running distributed jobs where each process has an independent memory, and can help avoid out-of-memory errors. By default, this is set to `Sys.free_memory() / numprocs`.\n * `runtests::Bool=true`: Whether to run (quick) tests before starting the search, to see if there will be any problems during the equation search related to the host environment.\n * `loss_type::Type=Nothing`: If you would like to use a different type for the loss than for the data you passed, specify the type here. Note that if you pass complex data `::Complex{L}`, then the loss type will automatically be set to `L`.\n * `selection_method::Function`: Function to selection expression from the Pareto frontier for use in `predict`. See `SymbolicRegression.MLJInterfaceModule.choose_best` for an example. This function should return a single integer specifying the index of the expression to use. By default, this maximizes the score (a pound-for-pound rating) of expressions reaching the threshold of 1.5x the minimum loss. To override this at prediction time, you can pass a named tuple with keys `data` and `idx` to `predict`. See the Operations section for details.\n * `dimensions_type::AbstractDimensions`: The type of dimensions to use when storing the units of the data. By default this is `DynamicQuantities.SymbolicDimensions`.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. The expression used for prediction is defined by the `selection_method` function, which can be seen by viewing `report(mach).best_idx`.\n * `predict(mach, (data=Xnew, idx=i))`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. By passing a named tuple with keys `data` and `idx`, you are able to specify the equation you wish to evaluate in `idx`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `best_idx::Vector{Int}`: The index of the best expression in each Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Vector{Node{T}}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity). The outer vector is indexed by target variable, and the inner vector is ordered by increasing complexity. `T` is equal to the element type of the passed data.\n * `equation_strings::Vector{Vector{String}}`: The expressions discovered by the search, represented as strings for easy inspection.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `best_idx::Vector{Int}`: The index of the best expression in each Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Vector{Node{T}}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity). The outer vector is indexed by target variable, and the inner vector is ordered by increasing complexity.\n * `equation_strings::Vector{Vector{String}}`: The expressions discovered by the search, represented as strings for easy inspection.\n * `complexities::Vector{Vector{Int}}`: The complexity of each expression in each Pareto frontier.\n * `losses::Vector{Vector{L}}`: The loss of each expression in each Pareto frontier, according to the loss function specified in the model. The type `L` is the loss type, which is usually the same as the element type of data passed (i.e., `T`), but can differ if complex data types are passed.\n * `scores::Vector{Vector{L}}`: A metric which considers both the complexity and loss of an expression, equal to the change in the log-loss divided by the change in complexity, relative to the previous expression along the Pareto frontier. A larger score aims to indicate an expression is more likely to be the true expression generating the data, but this is very problem-dependent and generally several other factors should be considered.\n\n# Examples\n\n```julia\nusing MLJ\nMultitargetSRRegressor = @load MultitargetSRRegressor pkg=SymbolicRegression\nX = (a=rand(100), b=rand(100), c=rand(100))\nY = (y1=(@. cos(X.c) * 2.1 - 0.9), y2=(@. X.a * X.b + X.c))\nmodel = MultitargetSRRegressor(binary_operators=[+, -, *], unary_operators=[exp], niterations=100)\nmach = machine(model, X, Y)\nfit!(mach)\ny_hat = predict(mach, X)\n# View the equations used:\nr = report(mach)\nfor (output_index, (eq, i)) in enumerate(zip(r.equation_strings, r.best_idx))\n println(\"Equation used for \", output_index, \": \", eq[i])\nend\n```\n\nSee also [`SRRegressor`](@ref).\n" +":docstring" = "```\nMultitargetSRRegressor\n```\n\nA model type for constructing a Multi-Target Symbolic Regression via Evolutionary Search, based on [SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultitargetSRRegressor = @load MultitargetSRRegressor pkg=SymbolicRegression\n```\n\nDo `model = MultitargetSRRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetSRRegressor(binary_operators=...)`.\n\nMulti-target Symbolic Regression regressor (`MultitargetSRRegressor`) conducts several searches for expressions that predict each target variable from a set of input variables. All data is assumed to be `Continuous`. The search is performed using an evolutionary algorithm. This algorithm is described in the paper https://arxiv.org/abs/2305.01582.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nOR\n\n```\nmach = machine(model, X, y, w)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype\n\n`Continuous`; check column scitypes with `schema(X)`. Variable names in discovered expressions will be taken from the column names of `X`, if available. Units in columns of `X` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n\n * `y` is the target, which can be any table of target variables whose element scitype is `Continuous`; check the scitype with `schema(y)`. Units in columns of `y` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n * `w` is the observation weights which can either be `nothing` (default) or an `AbstractVector` whoose element scitype is `Count` or `Continuous`. The same weights are used for all targets.\n\nTrain the machine using `fit!(mach)`, inspect the discovered expressions with `report(mach)`, and predict on new data with `predict(mach, Xnew)`. Note that unlike other regressors, symbolic regression stores a list of lists of trained models. The models chosen from each of these lists is defined by the function `selection_method` keyword argument, which by default balances accuracy and complexity. You can override this at prediction time by passing a named tuple with keys `data` and `idx`.\n\n# Hyper-parameters\n\n * `binary_operators`: Vector of binary operators (functions) to use. Each operator should be defined for two input scalars, and one output scalar. All operators need to be defined over the entire real line (excluding infinity - these are stopped before they are input), or return `NaN` where not defined. For speed, define it so it takes two reals of the same type as input, and outputs the same type. For the SymbolicUtils simplification backend, you will need to define a generic method of the operator so it takes arbitrary types.\n * `unary_operators`: Same, but for unary operators (one input scalar, gives an output scalar).\n * `constraints`: Array of pairs specifying size constraints for each operator. The constraints for a binary operator should be a 2-tuple (e.g., `(-1, -1)`) and the constraints for a unary operator should be an `Int`. A size constraint is a limit to the size of the subtree in each argument of an operator. e.g., `[(^)=>(-1, 3)]` means that the `^` operator can have arbitrary size (`-1`) in its left argument, but a maximum size of `3` in its right argument. Default is no constraints.\n * `batching`: Whether to evolve based on small mini-batches of data, rather than the entire dataset.\n * `batch_size`: What batch size to use if using batching.\n * `elementwise_loss`: What elementwise loss function to use. Can be one of the following losses, or any other loss of type `SupervisedLoss`. You can also pass a function that takes a scalar target (left argument), and scalar predicted (right argument), and returns a scalar. This will be averaged over the predicted data. If weights are supplied, your function should take a third argument for the weight scalar. Included losses: Regression: - `LPDistLoss{P}()`, - `L1DistLoss()`, - `L2DistLoss()` (mean square), - `LogitDistLoss()`, - `HuberLoss(d)`, - `L1EpsilonInsLoss(ϵ)`, - `L2EpsilonInsLoss(ϵ)`, - `PeriodicLoss(c)`, - `QuantileLoss(τ)`, Classification: - `ZeroOneLoss()`, - `PerceptronLoss()`, - `L1HingeLoss()`, - `SmoothedL1HingeLoss(γ)`, - `ModifiedHuberLoss()`, - `L2MarginLoss()`, - `ExpLoss()`, - `SigmoidLoss()`, - `DWDMarginLoss(q)`.\n * `loss_function`: Alternatively, you may redefine the loss used as any function of `tree::Node{T}`, `dataset::Dataset{T}`, and `options::Options`, so long as you output a non-negative scalar of type `T`. This is useful if you want to use a loss that takes into account derivatives, or correlations across the dataset. This also means you could use a custom evaluation for a particular expression. If you are using `batching=true`, then your function should accept a fourth argument `idx`, which is either `nothing` (indicating that the full dataset should be used), or a vector of indices to use for the batch. For example,\n\n ```\n function my_loss(tree, dataset::Dataset{T,L}, options)::L where {T,L}\n prediction, flag = eval_tree_array(tree, dataset.X, options)\n if !flag\n return L(Inf)\n end\n return sum((prediction .- dataset.y) .^ 2) / dataset.n\n end\n ```\n * `populations`: How many populations of equations to use.\n * `population_size`: How many equations in each population.\n * `ncycles_per_iteration`: How many generations to consider per iteration.\n * `tournament_selection_n`: Number of expressions considered in each tournament.\n * `tournament_selection_p`: The fittest expression in a tournament is to be selected with probability `p`, the next fittest with probability `p*(1-p)`, and so forth.\n * `topn`: Number of equations to return to the host process, and to consider for the hall of fame.\n * `complexity_of_operators`: What complexity should be assigned to each operator, and the occurrence of a constant or variable. By default, this is 1 for all operators. Can be a real number as well, in which case the complexity of an expression will be rounded to the nearest integer. Input this in the form of, e.g., [(^) => 3, sin => 2].\n * `complexity_of_constants`: What complexity should be assigned to use of a constant. By default, this is 1.\n * `complexity_of_variables`: What complexity should be assigned to each variable. By default, this is 1.\n * `alpha`: The probability of accepting an equation mutation during regularized evolution is given by exp(-delta_loss/(alpha * T)), where T goes from 1 to 0. Thus, alpha=infinite is the same as no annealing.\n * `maxsize`: Maximum size of equations during the search.\n * `maxdepth`: Maximum depth of equations during the search, by default this is set equal to the maxsize.\n * `parsimony`: A multiplicative factor for how much complexity is punished.\n * `dimensional_constraint_penalty`: An additive factor if the dimensional constraint is violated.\n * `use_frequency`: Whether to use a parsimony that adapts to the relative proportion of equations at each complexity; this will ensure that there are a balanced number of equations considered for every complexity.\n * `use_frequency_in_tournament`: Whether to use the adaptive parsimony described above inside the score, rather than just at the mutation accept/reject stage.\n * `adaptive_parsimony_scaling`: How much to scale the adaptive parsimony term in the loss. Increase this if the search is spending too much time optimizing the most complex equations.\n * `turbo`: Whether to use `LoopVectorization.@turbo` to evaluate expressions. This can be significantly faster, but is only compatible with certain operators. *Experimental!*\n * `migration`: Whether to migrate equations between processes.\n * `hof_migration`: Whether to migrate equations from the hall of fame to processes.\n * `fraction_replaced`: What fraction of each population to replace with migrated equations at the end of each cycle.\n * `fraction_replaced_hof`: What fraction to replace with hall of fame equations at the end of each cycle.\n * `should_simplify`: Whether to simplify equations. If you pass a custom objective, this will be set to `false`.\n * `should_optimize_constants`: Whether to use an optimization algorithm to periodically optimize constants in equations.\n * `optimizer_nrestarts`: How many different random starting positions to consider for optimization of constants.\n * `optimizer_algorithm`: Select algorithm to use for optimizing constants. Default is \"BFGS\", but \"NelderMead\" is also supported.\n * `optimizer_options`: General options for the constant optimization. For details we refer to the documentation on `Optim.Options` from the `Optim.jl` package. Options can be provided here as `NamedTuple`, e.g. `(iterations=16,)`, as a `Dict`, e.g. Dict(:x_tol => 1.0e-32,), or as an `Optim.Options` instance.\n * `output_file`: What file to store equations to, as a backup.\n * `perturbation_factor`: When mutating a constant, either multiply or divide by (1+perturbation_factor)^(rand()+1).\n * `probability_negate_constant`: Probability of negating a constant in the equation when mutating it.\n * `mutation_weights`: Relative probabilities of the mutations. The struct `MutationWeights` should be passed to these options. See its documentation on `MutationWeights` for the different weights.\n * `crossover_probability`: Probability of performing crossover.\n * `annealing`: Whether to use simulated annealing.\n * `warmup_maxsize_by`: Whether to slowly increase the max size from 5 up to `maxsize`. If nonzero, specifies the fraction through the search at which the maxsize should be reached.\n * `verbosity`: Whether to print debugging statements or not.\n * `print_precision`: How many digits to print when printing equations. By default, this is 5.\n * `save_to_file`: Whether to save equations to a file during the search.\n * `bin_constraints`: See `constraints`. This is the same, but specified for binary operators only (for example, if you have an operator that is both a binary and unary operator).\n * `una_constraints`: Likewise, for unary operators.\n * `seed`: What random seed to use. `nothing` uses no seed.\n * `progress`: Whether to use a progress bar output (`verbosity` will have no effect).\n * `early_stop_condition`: Float - whether to stop early if the mean loss gets below this value. Function - a function taking (loss, complexity) as arguments and returning true or false.\n * `timeout_in_seconds`: Float64 - the time in seconds after which to exit (as an alternative to the number of iterations).\n * `max_evals`: Int (or Nothing) - the maximum number of evaluations of expressions to perform.\n * `skip_mutation_failures`: Whether to simply skip over mutations that fail or are rejected, rather than to replace the mutated expression with the original expression and proceed normally.\n * `enable_autodiff`: Whether to enable automatic differentiation functionality. This is turned off by default. If turned on, this will be turned off if one of the operators does not have well-defined gradients.\n * `nested_constraints`: Specifies how many times a combination of operators can be nested. For example, `[sin => [cos => 0], cos => [cos => 2]]` specifies that `cos` may never appear within a `sin`, but `sin` can be nested with itself an unlimited number of times. The second term specifies that `cos` can be nested up to 2 times within a `cos`, so that `cos(cos(cos(x)))` is allowed (as well as any combination of `+` or `-` within it), but `cos(cos(cos(cos(x))))` is not allowed. When an operator is not specified, it is assumed that it can be nested an unlimited number of times. This requires that there is no operator which is used both in the unary operators and the binary operators (e.g., `-` could be both subtract, and negation). For binary operators, both arguments are treated the same way, and the max of each argument is constrained.\n * `deterministic`: Use a global counter for the birth time, rather than calls to `time()`. This gives perfect resolution, and is therefore deterministic. However, it is not thread safe, and must be used in serial mode.\n * `define_helper_functions`: Whether to define helper functions for constructing and evaluating trees.\n * `niterations::Int=10`: The number of iterations to perform the search. More iterations will improve the results.\n * `parallelism=:multithreading`: What parallelism mode to use. The options are `:multithreading`, `:multiprocessing`, and `:serial`. By default, multithreading will be used. Multithreading uses less memory, but multiprocessing can handle multi-node compute. If using `:multithreading` mode, the number of threads available to julia are used. If using `:multiprocessing`, `numprocs` processes will be created dynamically if `procs` is unset. If you have already allocated processes, pass them to the `procs` argument and they will be used. You may also pass a string instead of a symbol, like `\"multithreading\"`.\n * `numprocs::Union{Int, Nothing}=nothing`: The number of processes to use, if you want `equation_search` to set this up automatically. By default this will be `4`, but can be any number (you should pick a number <= the number of cores available).\n * `procs::Union{Vector{Int}, Nothing}=nothing`: If you have set up a distributed run manually with `procs = addprocs()` and `@everywhere`, pass the `procs` to this keyword argument.\n * `addprocs_function::Union{Function, Nothing}=nothing`: If using multiprocessing (`parallelism=:multithreading`), and are not passing `procs` manually, then they will be allocated dynamically using `addprocs`. However, you may also pass a custom function to use instead of `addprocs`. This function should take a single positional argument, which is the number of processes to use, as well as the `lazy` keyword argument. For example, if set up on a slurm cluster, you could pass `addprocs_function = addprocs_slurm`, which will set up slurm processes.\n * `heap_size_hint_in_bytes::Union{Int,Nothing}=nothing`: On Julia 1.9+, you may set the `--heap-size-hint` flag on Julia processes, recommending garbage collection once a process is close to the recommended size. This is important for long-running distributed jobs where each process has an independent memory, and can help avoid out-of-memory errors. By default, this is set to `Sys.free_memory() / numprocs`.\n * `runtests::Bool=true`: Whether to run (quick) tests before starting the search, to see if there will be any problems during the equation search related to the host environment.\n * `loss_type::Type=Nothing`: If you would like to use a different type for the loss than for the data you passed, specify the type here. Note that if you pass complex data `::Complex{L}`, then the loss type will automatically be set to `L`.\n * `selection_method::Function`: Function to selection expression from the Pareto frontier for use in `predict`. See `SymbolicRegression.MLJInterfaceModule.choose_best` for an example. This function should return a single integer specifying the index of the expression to use. By default, this maximizes the score (a pound-for-pound rating) of expressions reaching the threshold of 1.5x the minimum loss. To override this at prediction time, you can pass a named tuple with keys `data` and `idx` to `predict`. See the Operations section for details.\n * `dimensions_type::AbstractDimensions`: The type of dimensions to use when storing the units of the data. By default this is `DynamicQuantities.SymbolicDimensions`.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. The expression used for prediction is defined by the `selection_method` function, which can be seen by viewing `report(mach).best_idx`.\n * `predict(mach, (data=Xnew, idx=i))`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. By passing a named tuple with keys `data` and `idx`, you are able to specify the equation you wish to evaluate in `idx`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `best_idx::Vector{Int}`: The index of the best expression in each Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Vector{Node{T}}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity). The outer vector is indexed by target variable, and the inner vector is ordered by increasing complexity. `T` is equal to the element type of the passed data.\n * `equation_strings::Vector{Vector{String}}`: The expressions discovered by the search, represented as strings for easy inspection.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `best_idx::Vector{Int}`: The index of the best expression in each Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Vector{Node{T}}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity). The outer vector is indexed by target variable, and the inner vector is ordered by increasing complexity.\n * `equation_strings::Vector{Vector{String}}`: The expressions discovered by the search, represented as strings for easy inspection.\n * `complexities::Vector{Vector{Int}}`: The complexity of each expression in each Pareto frontier.\n * `losses::Vector{Vector{L}}`: The loss of each expression in each Pareto frontier, according to the loss function specified in the model. The type `L` is the loss type, which is usually the same as the element type of data passed (i.e., `T`), but can differ if complex data types are passed.\n * `scores::Vector{Vector{L}}`: A metric which considers both the complexity and loss of an expression, equal to the change in the log-loss divided by the change in complexity, relative to the previous expression along the Pareto frontier. A larger score aims to indicate an expression is more likely to be the true expression generating the data, but this is very problem-dependent and generally several other factors should be considered.\n\n# Examples\n\n```julia\nusing MLJ\nMultitargetSRRegressor = @load MultitargetSRRegressor pkg=SymbolicRegression\nX = (a=rand(100), b=rand(100), c=rand(100))\nY = (y1=(@. cos(X.c) * 2.1 - 0.9), y2=(@. X.a * X.b + X.c))\nmodel = MultitargetSRRegressor(binary_operators=[+, -, *], unary_operators=[exp], niterations=100)\nmach = machine(model, X, Y)\nfit!(mach)\ny_hat = predict(mach, X)\n# View the equations used:\nr = report(mach)\nfor (output_index, (eq, i)) in enumerate(zip(r.equation_strings, r.best_idx))\n println(\"Equation used for \", output_index, \": \", eq[i])\nend\n```\n\nSee also [`SRRegressor`](@ref).\n" ":name" = "MultitargetSRRegressor" ":human_name" = "Multi-Target Symbolic Regression via Evolutionary Search" ":is_supervised" = "`true`" ":prediction_type" = ":deterministic" ":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [] -":hyperparameters" = "`(:binary_operators, :unary_operators, :constraints, :elementwise_loss, :loss_function, :tournament_selection_n, :tournament_selection_p, :topn, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :parsimony, :dimensional_constraint_penalty, :alpha, :maxsize, :maxdepth, :turbo, :bumper, :migration, :hof_migration, :should_simplify, :should_optimize_constants, :output_file, :node_type, :populations, :perturbation_factor, :annealing, :batching, :batch_size, :mutation_weights, :crossover_probability, :warmup_maxsize_by, :use_frequency, :use_frequency_in_tournament, :adaptive_parsimony_scaling, :population_size, :ncycles_per_iteration, :fraction_replaced, :fraction_replaced_hof, :verbosity, :print_precision, :save_to_file, :probability_negate_constant, :seed, :bin_constraints, :una_constraints, :progress, :terminal_width, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_f_calls_limit, :optimizer_options, :use_recorder, :recorder_file, :early_stop_condition, :timeout_in_seconds, :max_evals, :skip_mutation_failures, :nested_constraints, :deterministic, :define_helper_functions, :fast_cycle, :npopulations, :npop, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :heap_size_hint_in_bytes, :runtests, :loss_type, :selection_method, :dimensions_type)`" -":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Integer\", \"Real\", \"Integer\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Real\", \"Union{Nothing, Real}\", \"Real\", \"Integer\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Bool\", \"Union{Nothing, AbstractString}\", \"Type\", \"Integer\", \"Real\", \"Bool\", \"Bool\", \"Integer\", \"Union{SymbolicRegression.CoreModule.MutationWeightsModule.MutationWeights, NamedTuple, AbstractVector}\", \"Real\", \"Real\", \"Bool\", \"Bool\", \"Real\", \"Integer\", \"Integer\", \"Real\", \"Real\", \"Union{Nothing, Integer}\", \"Integer\", \"Bool\", \"Real\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"Union{AbstractString, Optim.AbstractOptimizer}\", \"Integer\", \"Real\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Bool\", \"AbstractString\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Bool\", \"Any\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Bool\", \"Any\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameters" = "`(:binary_operators, :unary_operators, :constraints, :elementwise_loss, :loss_function, :tournament_selection_n, :tournament_selection_p, :topn, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :parsimony, :dimensional_constraint_penalty, :alpha, :maxsize, :maxdepth, :turbo, :migration, :hof_migration, :should_simplify, :should_optimize_constants, :output_file, :populations, :perturbation_factor, :annealing, :batching, :batch_size, :mutation_weights, :crossover_probability, :warmup_maxsize_by, :use_frequency, :use_frequency_in_tournament, :adaptive_parsimony_scaling, :population_size, :ncycles_per_iteration, :fraction_replaced, :fraction_replaced_hof, :verbosity, :print_precision, :save_to_file, :probability_negate_constant, :seed, :bin_constraints, :una_constraints, :progress, :terminal_width, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_options, :val_recorder, :recorder_file, :early_stop_condition, :timeout_in_seconds, :max_evals, :skip_mutation_failures, :enable_autodiff, :nested_constraints, :deterministic, :define_helper_functions, :fast_cycle, :npopulations, :npop, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :heap_size_hint_in_bytes, :runtests, :loss_type, :selection_method, :dimensions_type)`" +":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Integer\", \"Real\", \"Integer\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Real\", \"Union{Nothing, Real}\", \"Real\", \"Integer\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Bool\", \"Union{Nothing, AbstractString}\", \"Integer\", \"Real\", \"Bool\", \"Bool\", \"Integer\", \"Union{SymbolicRegression.CoreModule.OptionsStructModule.MutationWeights, NamedTuple, AbstractVector}\", \"Real\", \"Real\", \"Bool\", \"Bool\", \"Real\", \"Integer\", \"Integer\", \"Real\", \"Real\", \"Union{Nothing, Integer}\", \"Integer\", \"Bool\", \"Real\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"AbstractString\", \"Integer\", \"Real\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Val\", \"AbstractString\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"Any\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Bool\", \"Any\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [SymbolicRegression.SRRegressor] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -5695,21 +6667,22 @@ ":supports_weights" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nSRRegressor\n```\n\nA model type for constructing a Symbolic Regression via Evolutionary Search, based on [SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSRRegressor = @load SRRegressor pkg=SymbolicRegression\n```\n\nDo `model = SRRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SRRegressor(binary_operators=...)`.\n\nSingle-target Symbolic Regression regressor (`SRRegressor`) searches for symbolic expressions that predict a single target variable from a set of input variables. All data is assumed to be `Continuous`. The search is performed using an evolutionary algorithm. This algorithm is described in the paper https://arxiv.org/abs/2305.01582.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nOR\n\n```\nmach = machine(model, X, y, w)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`. Variable names in discovered expressions will be taken from the column names of `X`, if available. Units in columns of `X` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`. Units in `y` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n * `w` is the observation weights which can either be `nothing` (default) or an `AbstractVector` whoose element scitype is `Count` or `Continuous`.\n\nTrain the machine using `fit!(mach)`, inspect the discovered expressions with `report(mach)`, and predict on new data with `predict(mach, Xnew)`. Note that unlike other regressors, symbolic regression stores a list of trained models. The model chosen from this list is defined by the function `selection_method` keyword argument, which by default balances accuracy and complexity. You can override this at prediction time by passing a named tuple with keys `data` and `idx`.\n\n# Hyper-parameters\n\n * `binary_operators`: Vector of binary operators (functions) to use. Each operator should be defined for two input scalars, and one output scalar. All operators need to be defined over the entire real line (excluding infinity - these are stopped before they are input), or return `NaN` where not defined. For speed, define it so it takes two reals of the same type as input, and outputs the same type. For the SymbolicUtils simplification backend, you will need to define a generic method of the operator so it takes arbitrary types.\n * `unary_operators`: Same, but for unary operators (one input scalar, gives an output scalar).\n * `constraints`: Array of pairs specifying size constraints for each operator. The constraints for a binary operator should be a 2-tuple (e.g., `(-1, -1)`) and the constraints for a unary operator should be an `Int`. A size constraint is a limit to the size of the subtree in each argument of an operator. e.g., `[(^)=>(-1, 3)]` means that the `^` operator can have arbitrary size (`-1`) in its left argument, but a maximum size of `3` in its right argument. Default is no constraints.\n * `batching`: Whether to evolve based on small mini-batches of data, rather than the entire dataset.\n * `batch_size`: What batch size to use if using batching.\n * `elementwise_loss`: What elementwise loss function to use. Can be one of the following losses, or any other loss of type `SupervisedLoss`. You can also pass a function that takes a scalar target (left argument), and scalar predicted (right argument), and returns a scalar. This will be averaged over the predicted data. If weights are supplied, your function should take a third argument for the weight scalar. Included losses: Regression: - `LPDistLoss{P}()`, - `L1DistLoss()`, - `L2DistLoss()` (mean square), - `LogitDistLoss()`, - `HuberLoss(d)`, - `L1EpsilonInsLoss(ϵ)`, - `L2EpsilonInsLoss(ϵ)`, - `PeriodicLoss(c)`, - `QuantileLoss(τ)`, Classification: - `ZeroOneLoss()`, - `PerceptronLoss()`, - `L1HingeLoss()`, - `SmoothedL1HingeLoss(γ)`, - `ModifiedHuberLoss()`, - `L2MarginLoss()`, - `ExpLoss()`, - `SigmoidLoss()`, - `DWDMarginLoss(q)`.\n * `loss_function`: Alternatively, you may redefine the loss used as any function of `tree::AbstractExpressionNode{T}`, `dataset::Dataset{T}`, and `options::Options`, so long as you output a non-negative scalar of type `T`. This is useful if you want to use a loss that takes into account derivatives, or correlations across the dataset. This also means you could use a custom evaluation for a particular expression. If you are using `batching=true`, then your function should accept a fourth argument `idx`, which is either `nothing` (indicating that the full dataset should be used), or a vector of indices to use for the batch. For example,\n\n ```\n function my_loss(tree, dataset::Dataset{T,L}, options)::L where {T,L}\n prediction, flag = eval_tree_array(tree, dataset.X, options)\n if !flag\n return L(Inf)\n end\n return sum((prediction .- dataset.y) .^ 2) / dataset.n\n end\n ```\n * `node_type::Type{N}=Node`: The type of node to use for the search. For example, `Node` or `GraphNode`.\n * `populations`: How many populations of equations to use.\n * `population_size`: How many equations in each population.\n * `ncycles_per_iteration`: How many generations to consider per iteration.\n * `tournament_selection_n`: Number of expressions considered in each tournament.\n * `tournament_selection_p`: The fittest expression in a tournament is to be selected with probability `p`, the next fittest with probability `p*(1-p)`, and so forth.\n * `topn`: Number of equations to return to the host process, and to consider for the hall of fame.\n * `complexity_of_operators`: What complexity should be assigned to each operator, and the occurrence of a constant or variable. By default, this is 1 for all operators. Can be a real number as well, in which case the complexity of an expression will be rounded to the nearest integer. Input this in the form of, e.g., [(^) => 3, sin => 2].\n * `complexity_of_constants`: What complexity should be assigned to use of a constant. By default, this is 1.\n * `complexity_of_variables`: What complexity should be assigned to each variable. By default, this is 1.\n * `alpha`: The probability of accepting an equation mutation during regularized evolution is given by exp(-delta_loss/(alpha * T)), where T goes from 1 to 0. Thus, alpha=infinite is the same as no annealing.\n * `maxsize`: Maximum size of equations during the search.\n * `maxdepth`: Maximum depth of equations during the search, by default this is set equal to the maxsize.\n * `parsimony`: A multiplicative factor for how much complexity is punished.\n * `dimensional_constraint_penalty`: An additive factor if the dimensional constraint is violated.\n * `use_frequency`: Whether to use a parsimony that adapts to the relative proportion of equations at each complexity; this will ensure that there are a balanced number of equations considered for every complexity.\n * `use_frequency_in_tournament`: Whether to use the adaptive parsimony described above inside the score, rather than just at the mutation accept/reject stage.\n * `adaptive_parsimony_scaling`: How much to scale the adaptive parsimony term in the loss. Increase this if the search is spending too much time optimizing the most complex equations.\n * `turbo`: Whether to use `LoopVectorization.@turbo` to evaluate expressions. This can be significantly faster, but is only compatible with certain operators. *Experimental!*\n * `bumper`: Whether to use Bumper.jl for faster evaluation. *Experimental!*\n * `migration`: Whether to migrate equations between processes.\n * `hof_migration`: Whether to migrate equations from the hall of fame to processes.\n * `fraction_replaced`: What fraction of each population to replace with migrated equations at the end of each cycle.\n * `fraction_replaced_hof`: What fraction to replace with hall of fame equations at the end of each cycle.\n * `should_simplify`: Whether to simplify equations. If you pass a custom objective, this will be set to `false`.\n * `should_optimize_constants`: Whether to use an optimization algorithm to periodically optimize constants in equations.\n * `optimizer_algorithm`: Select algorithm to use for optimizing constants. Default is `Optim.BFGS(linesearch=LineSearches.BackTracking())`.\n * `optimizer_nrestarts`: How many different random starting positions to consider for optimization of constants.\n * `optimizer_probability`: Probability of performing optimization of constants at the end of a given iteration.\n * `optimizer_iterations`: How many optimization iterations to perform. This gets passed to `Optim.Options` as `iterations`. The default is 8.\n * `optimizer_f_calls_limit`: How many function calls to allow during optimization. This gets passed to `Optim.Options` as `f_calls_limit`. The default is `0` which means no limit.\n * `optimizer_options`: General options for the constant optimization. For details we refer to the documentation on `Optim.Options` from the `Optim.jl` package. Options can be provided here as `NamedTuple`, e.g. `(iterations=16,)`, as a `Dict`, e.g. Dict(:x_tol => 1.0e-32,), or as an `Optim.Options` instance.\n * `output_file`: What file to store equations to, as a backup.\n * `perturbation_factor`: When mutating a constant, either multiply or divide by (1+perturbation_factor)^(rand()+1).\n * `probability_negate_constant`: Probability of negating a constant in the equation when mutating it.\n * `mutation_weights`: Relative probabilities of the mutations. The struct `MutationWeights` should be passed to these options. See its documentation on `MutationWeights` for the different weights.\n * `crossover_probability`: Probability of performing crossover.\n * `annealing`: Whether to use simulated annealing.\n * `warmup_maxsize_by`: Whether to slowly increase the max size from 5 up to `maxsize`. If nonzero, specifies the fraction through the search at which the maxsize should be reached.\n * `verbosity`: Whether to print debugging statements or not.\n * `print_precision`: How many digits to print when printing equations. By default, this is 5.\n * `save_to_file`: Whether to save equations to a file during the search.\n * `bin_constraints`: See `constraints`. This is the same, but specified for binary operators only (for example, if you have an operator that is both a binary and unary operator).\n * `una_constraints`: Likewise, for unary operators.\n * `seed`: What random seed to use. `nothing` uses no seed.\n * `progress`: Whether to use a progress bar output (`verbosity` will have no effect).\n * `early_stop_condition`: Float - whether to stop early if the mean loss gets below this value. Function - a function taking (loss, complexity) as arguments and returning true or false.\n * `timeout_in_seconds`: Float64 - the time in seconds after which to exit (as an alternative to the number of iterations).\n * `max_evals`: Int (or Nothing) - the maximum number of evaluations of expressions to perform.\n * `skip_mutation_failures`: Whether to simply skip over mutations that fail or are rejected, rather than to replace the mutated expression with the original expression and proceed normally.\n * `nested_constraints`: Specifies how many times a combination of operators can be nested. For example, `[sin => [cos => 0], cos => [cos => 2]]` specifies that `cos` may never appear within a `sin`, but `sin` can be nested with itself an unlimited number of times. The second term specifies that `cos` can be nested up to 2 times within a `cos`, so that `cos(cos(cos(x)))` is allowed (as well as any combination of `+` or `-` within it), but `cos(cos(cos(cos(x))))` is not allowed. When an operator is not specified, it is assumed that it can be nested an unlimited number of times. This requires that there is no operator which is used both in the unary operators and the binary operators (e.g., `-` could be both subtract, and negation). For binary operators, both arguments are treated the same way, and the max of each argument is constrained.\n * `deterministic`: Use a global counter for the birth time, rather than calls to `time()`. This gives perfect resolution, and is therefore deterministic. However, it is not thread safe, and must be used in serial mode.\n * `define_helper_functions`: Whether to define helper functions for constructing and evaluating trees.\n * `niterations::Int=10`: The number of iterations to perform the search. More iterations will improve the results.\n * `parallelism=:multithreading`: What parallelism mode to use. The options are `:multithreading`, `:multiprocessing`, and `:serial`. By default, multithreading will be used. Multithreading uses less memory, but multiprocessing can handle multi-node compute. If using `:multithreading` mode, the number of threads available to julia are used. If using `:multiprocessing`, `numprocs` processes will be created dynamically if `procs` is unset. If you have already allocated processes, pass them to the `procs` argument and they will be used. You may also pass a string instead of a symbol, like `\"multithreading\"`.\n * `numprocs::Union{Int, Nothing}=nothing`: The number of processes to use, if you want `equation_search` to set this up automatically. By default this will be `4`, but can be any number (you should pick a number <= the number of cores available).\n * `procs::Union{Vector{Int}, Nothing}=nothing`: If you have set up a distributed run manually with `procs = addprocs()` and `@everywhere`, pass the `procs` to this keyword argument.\n * `addprocs_function::Union{Function, Nothing}=nothing`: If using multiprocessing (`parallelism=:multithreading`), and are not passing `procs` manually, then they will be allocated dynamically using `addprocs`. However, you may also pass a custom function to use instead of `addprocs`. This function should take a single positional argument, which is the number of processes to use, as well as the `lazy` keyword argument. For example, if set up on a slurm cluster, you could pass `addprocs_function = addprocs_slurm`, which will set up slurm processes.\n * `heap_size_hint_in_bytes::Union{Int,Nothing}=nothing`: On Julia 1.9+, you may set the `--heap-size-hint` flag on Julia processes, recommending garbage collection once a process is close to the recommended size. This is important for long-running distributed jobs where each process has an independent memory, and can help avoid out-of-memory errors. By default, this is set to `Sys.free_memory() / numprocs`.\n * `runtests::Bool=true`: Whether to run (quick) tests before starting the search, to see if there will be any problems during the equation search related to the host environment.\n * `loss_type::Type=Nothing`: If you would like to use a different type for the loss than for the data you passed, specify the type here. Note that if you pass complex data `::Complex{L}`, then the loss type will automatically be set to `L`.\n * `selection_method::Function`: Function to selection expression from the Pareto frontier for use in `predict`. See `SymbolicRegression.MLJInterfaceModule.choose_best` for an example. This function should return a single integer specifying the index of the expression to use. By default, this maximizes the score (a pound-for-pound rating) of expressions reaching the threshold of 1.5x the minimum loss. To override this at prediction time, you can pass a named tuple with keys `data` and `idx` to `predict`. See the Operations section for details.\n * `dimensions_type::AbstractDimensions`: The type of dimensions to use when storing the units of the data. By default this is `DynamicQuantities.SymbolicDimensions`.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. The expression used for prediction is defined by the `selection_method` function, which can be seen by viewing `report(mach).best_idx`.\n * `predict(mach, (data=Xnew, idx=i))`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. By passing a named tuple with keys `data` and `idx`, you are able to specify the equation you wish to evaluate in `idx`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `best_idx::Int`: The index of the best expression in the Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Node{T}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity). `T` is equal to the element type of the passed data.\n * `equation_strings::Vector{String}`: The expressions discovered by the search, represented as strings for easy inspection.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `best_idx::Int`: The index of the best expression in the Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Node{T}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity).\n * `equation_strings::Vector{String}`: The expressions discovered by the search, represented as strings for easy inspection.\n * `complexities::Vector{Int}`: The complexity of each expression in the Pareto frontier.\n * `losses::Vector{L}`: The loss of each expression in the Pareto frontier, according to the loss function specified in the model. The type `L` is the loss type, which is usually the same as the element type of data passed (i.e., `T`), but can differ if complex data types are passed.\n * `scores::Vector{L}`: A metric which considers both the complexity and loss of an expression, equal to the change in the log-loss divided by the change in complexity, relative to the previous expression along the Pareto frontier. A larger score aims to indicate an expression is more likely to be the true expression generating the data, but this is very problem-dependent and generally several other factors should be considered.\n\n# Examples\n\n```julia\nusing MLJ\nSRRegressor = @load SRRegressor pkg=SymbolicRegression\nX, y = @load_boston\nmodel = SRRegressor(binary_operators=[+, -, *], unary_operators=[exp], niterations=100)\nmach = machine(model, X, y)\nfit!(mach)\ny_hat = predict(mach, X)\n# View the equation used:\nr = report(mach)\nprintln(\"Equation used:\", r.equation_strings[r.best_idx])\n```\n\nWith units and variable names:\n\n```julia\nusing MLJ\nusing DynamicQuantities\nSRegressor = @load SRRegressor pkg=SymbolicRegression\n\nX = (; x1=rand(32) .* us\"km/h\", x2=rand(32) .* us\"km\")\ny = @. X.x2 / X.x1 + 0.5us\"h\"\nmodel = SRRegressor(binary_operators=[+, -, *, /])\nmach = machine(model, X, y)\nfit!(mach)\ny_hat = predict(mach, X)\n# View the equation used:\nr = report(mach)\nprintln(\"Equation used:\", r.equation_strings[r.best_idx])\n```\n\nSee also [`MultitargetSRRegressor`](@ref).\n" +":docstring" = "```\nSRRegressor\n```\n\nA model type for constructing a Symbolic Regression via Evolutionary Search, based on [SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSRRegressor = @load SRRegressor pkg=SymbolicRegression\n```\n\nDo `model = SRRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SRRegressor(binary_operators=...)`.\n\nSingle-target Symbolic Regression regressor (`SRRegressor`) searches for symbolic expressions that predict a single target variable from a set of input variables. All data is assumed to be `Continuous`. The search is performed using an evolutionary algorithm. This algorithm is described in the paper https://arxiv.org/abs/2305.01582.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nOR\n\n```\nmach = machine(model, X, y, w)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`. Variable names in discovered expressions will be taken from the column names of `X`, if available. Units in columns of `X` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`. Units in `y` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n * `w` is the observation weights which can either be `nothing` (default) or an `AbstractVector` whoose element scitype is `Count` or `Continuous`.\n\nTrain the machine using `fit!(mach)`, inspect the discovered expressions with `report(mach)`, and predict on new data with `predict(mach, Xnew)`. Note that unlike other regressors, symbolic regression stores a list of trained models. The model chosen from this list is defined by the function `selection_method` keyword argument, which by default balances accuracy and complexity. You can override this at prediction time by passing a named tuple with keys `data` and `idx`.\n\n# Hyper-parameters\n\n * `binary_operators`: Vector of binary operators (functions) to use. Each operator should be defined for two input scalars, and one output scalar. All operators need to be defined over the entire real line (excluding infinity - these are stopped before they are input), or return `NaN` where not defined. For speed, define it so it takes two reals of the same type as input, and outputs the same type. For the SymbolicUtils simplification backend, you will need to define a generic method of the operator so it takes arbitrary types.\n * `unary_operators`: Same, but for unary operators (one input scalar, gives an output scalar).\n * `constraints`: Array of pairs specifying size constraints for each operator. The constraints for a binary operator should be a 2-tuple (e.g., `(-1, -1)`) and the constraints for a unary operator should be an `Int`. A size constraint is a limit to the size of the subtree in each argument of an operator. e.g., `[(^)=>(-1, 3)]` means that the `^` operator can have arbitrary size (`-1`) in its left argument, but a maximum size of `3` in its right argument. Default is no constraints.\n * `batching`: Whether to evolve based on small mini-batches of data, rather than the entire dataset.\n * `batch_size`: What batch size to use if using batching.\n * `elementwise_loss`: What elementwise loss function to use. Can be one of the following losses, or any other loss of type `SupervisedLoss`. You can also pass a function that takes a scalar target (left argument), and scalar predicted (right argument), and returns a scalar. This will be averaged over the predicted data. If weights are supplied, your function should take a third argument for the weight scalar. Included losses: Regression: - `LPDistLoss{P}()`, - `L1DistLoss()`, - `L2DistLoss()` (mean square), - `LogitDistLoss()`, - `HuberLoss(d)`, - `L1EpsilonInsLoss(ϵ)`, - `L2EpsilonInsLoss(ϵ)`, - `PeriodicLoss(c)`, - `QuantileLoss(τ)`, Classification: - `ZeroOneLoss()`, - `PerceptronLoss()`, - `L1HingeLoss()`, - `SmoothedL1HingeLoss(γ)`, - `ModifiedHuberLoss()`, - `L2MarginLoss()`, - `ExpLoss()`, - `SigmoidLoss()`, - `DWDMarginLoss(q)`.\n * `loss_function`: Alternatively, you may redefine the loss used as any function of `tree::Node{T}`, `dataset::Dataset{T}`, and `options::Options`, so long as you output a non-negative scalar of type `T`. This is useful if you want to use a loss that takes into account derivatives, or correlations across the dataset. This also means you could use a custom evaluation for a particular expression. If you are using `batching=true`, then your function should accept a fourth argument `idx`, which is either `nothing` (indicating that the full dataset should be used), or a vector of indices to use for the batch. For example,\n\n ```\n function my_loss(tree, dataset::Dataset{T,L}, options)::L where {T,L}\n prediction, flag = eval_tree_array(tree, dataset.X, options)\n if !flag\n return L(Inf)\n end\n return sum((prediction .- dataset.y) .^ 2) / dataset.n\n end\n ```\n * `populations`: How many populations of equations to use.\n * `population_size`: How many equations in each population.\n * `ncycles_per_iteration`: How many generations to consider per iteration.\n * `tournament_selection_n`: Number of expressions considered in each tournament.\n * `tournament_selection_p`: The fittest expression in a tournament is to be selected with probability `p`, the next fittest with probability `p*(1-p)`, and so forth.\n * `topn`: Number of equations to return to the host process, and to consider for the hall of fame.\n * `complexity_of_operators`: What complexity should be assigned to each operator, and the occurrence of a constant or variable. By default, this is 1 for all operators. Can be a real number as well, in which case the complexity of an expression will be rounded to the nearest integer. Input this in the form of, e.g., [(^) => 3, sin => 2].\n * `complexity_of_constants`: What complexity should be assigned to use of a constant. By default, this is 1.\n * `complexity_of_variables`: What complexity should be assigned to each variable. By default, this is 1.\n * `alpha`: The probability of accepting an equation mutation during regularized evolution is given by exp(-delta_loss/(alpha * T)), where T goes from 1 to 0. Thus, alpha=infinite is the same as no annealing.\n * `maxsize`: Maximum size of equations during the search.\n * `maxdepth`: Maximum depth of equations during the search, by default this is set equal to the maxsize.\n * `parsimony`: A multiplicative factor for how much complexity is punished.\n * `dimensional_constraint_penalty`: An additive factor if the dimensional constraint is violated.\n * `use_frequency`: Whether to use a parsimony that adapts to the relative proportion of equations at each complexity; this will ensure that there are a balanced number of equations considered for every complexity.\n * `use_frequency_in_tournament`: Whether to use the adaptive parsimony described above inside the score, rather than just at the mutation accept/reject stage.\n * `adaptive_parsimony_scaling`: How much to scale the adaptive parsimony term in the loss. Increase this if the search is spending too much time optimizing the most complex equations.\n * `turbo`: Whether to use `LoopVectorization.@turbo` to evaluate expressions. This can be significantly faster, but is only compatible with certain operators. *Experimental!*\n * `migration`: Whether to migrate equations between processes.\n * `hof_migration`: Whether to migrate equations from the hall of fame to processes.\n * `fraction_replaced`: What fraction of each population to replace with migrated equations at the end of each cycle.\n * `fraction_replaced_hof`: What fraction to replace with hall of fame equations at the end of each cycle.\n * `should_simplify`: Whether to simplify equations. If you pass a custom objective, this will be set to `false`.\n * `should_optimize_constants`: Whether to use an optimization algorithm to periodically optimize constants in equations.\n * `optimizer_nrestarts`: How many different random starting positions to consider for optimization of constants.\n * `optimizer_algorithm`: Select algorithm to use for optimizing constants. Default is \"BFGS\", but \"NelderMead\" is also supported.\n * `optimizer_options`: General options for the constant optimization. For details we refer to the documentation on `Optim.Options` from the `Optim.jl` package. Options can be provided here as `NamedTuple`, e.g. `(iterations=16,)`, as a `Dict`, e.g. Dict(:x_tol => 1.0e-32,), or as an `Optim.Options` instance.\n * `output_file`: What file to store equations to, as a backup.\n * `perturbation_factor`: When mutating a constant, either multiply or divide by (1+perturbation_factor)^(rand()+1).\n * `probability_negate_constant`: Probability of negating a constant in the equation when mutating it.\n * `mutation_weights`: Relative probabilities of the mutations. The struct `MutationWeights` should be passed to these options. See its documentation on `MutationWeights` for the different weights.\n * `crossover_probability`: Probability of performing crossover.\n * `annealing`: Whether to use simulated annealing.\n * `warmup_maxsize_by`: Whether to slowly increase the max size from 5 up to `maxsize`. If nonzero, specifies the fraction through the search at which the maxsize should be reached.\n * `verbosity`: Whether to print debugging statements or not.\n * `print_precision`: How many digits to print when printing equations. By default, this is 5.\n * `save_to_file`: Whether to save equations to a file during the search.\n * `bin_constraints`: See `constraints`. This is the same, but specified for binary operators only (for example, if you have an operator that is both a binary and unary operator).\n * `una_constraints`: Likewise, for unary operators.\n * `seed`: What random seed to use. `nothing` uses no seed.\n * `progress`: Whether to use a progress bar output (`verbosity` will have no effect).\n * `early_stop_condition`: Float - whether to stop early if the mean loss gets below this value. Function - a function taking (loss, complexity) as arguments and returning true or false.\n * `timeout_in_seconds`: Float64 - the time in seconds after which to exit (as an alternative to the number of iterations).\n * `max_evals`: Int (or Nothing) - the maximum number of evaluations of expressions to perform.\n * `skip_mutation_failures`: Whether to simply skip over mutations that fail or are rejected, rather than to replace the mutated expression with the original expression and proceed normally.\n * `enable_autodiff`: Whether to enable automatic differentiation functionality. This is turned off by default. If turned on, this will be turned off if one of the operators does not have well-defined gradients.\n * `nested_constraints`: Specifies how many times a combination of operators can be nested. For example, `[sin => [cos => 0], cos => [cos => 2]]` specifies that `cos` may never appear within a `sin`, but `sin` can be nested with itself an unlimited number of times. The second term specifies that `cos` can be nested up to 2 times within a `cos`, so that `cos(cos(cos(x)))` is allowed (as well as any combination of `+` or `-` within it), but `cos(cos(cos(cos(x))))` is not allowed. When an operator is not specified, it is assumed that it can be nested an unlimited number of times. This requires that there is no operator which is used both in the unary operators and the binary operators (e.g., `-` could be both subtract, and negation). For binary operators, both arguments are treated the same way, and the max of each argument is constrained.\n * `deterministic`: Use a global counter for the birth time, rather than calls to `time()`. This gives perfect resolution, and is therefore deterministic. However, it is not thread safe, and must be used in serial mode.\n * `define_helper_functions`: Whether to define helper functions for constructing and evaluating trees.\n * `niterations::Int=10`: The number of iterations to perform the search. More iterations will improve the results.\n * `parallelism=:multithreading`: What parallelism mode to use. The options are `:multithreading`, `:multiprocessing`, and `:serial`. By default, multithreading will be used. Multithreading uses less memory, but multiprocessing can handle multi-node compute. If using `:multithreading` mode, the number of threads available to julia are used. If using `:multiprocessing`, `numprocs` processes will be created dynamically if `procs` is unset. If you have already allocated processes, pass them to the `procs` argument and they will be used. You may also pass a string instead of a symbol, like `\"multithreading\"`.\n * `numprocs::Union{Int, Nothing}=nothing`: The number of processes to use, if you want `equation_search` to set this up automatically. By default this will be `4`, but can be any number (you should pick a number <= the number of cores available).\n * `procs::Union{Vector{Int}, Nothing}=nothing`: If you have set up a distributed run manually with `procs = addprocs()` and `@everywhere`, pass the `procs` to this keyword argument.\n * `addprocs_function::Union{Function, Nothing}=nothing`: If using multiprocessing (`parallelism=:multithreading`), and are not passing `procs` manually, then they will be allocated dynamically using `addprocs`. However, you may also pass a custom function to use instead of `addprocs`. This function should take a single positional argument, which is the number of processes to use, as well as the `lazy` keyword argument. For example, if set up on a slurm cluster, you could pass `addprocs_function = addprocs_slurm`, which will set up slurm processes.\n * `heap_size_hint_in_bytes::Union{Int,Nothing}=nothing`: On Julia 1.9+, you may set the `--heap-size-hint` flag on Julia processes, recommending garbage collection once a process is close to the recommended size. This is important for long-running distributed jobs where each process has an independent memory, and can help avoid out-of-memory errors. By default, this is set to `Sys.free_memory() / numprocs`.\n * `runtests::Bool=true`: Whether to run (quick) tests before starting the search, to see if there will be any problems during the equation search related to the host environment.\n * `loss_type::Type=Nothing`: If you would like to use a different type for the loss than for the data you passed, specify the type here. Note that if you pass complex data `::Complex{L}`, then the loss type will automatically be set to `L`.\n * `selection_method::Function`: Function to selection expression from the Pareto frontier for use in `predict`. See `SymbolicRegression.MLJInterfaceModule.choose_best` for an example. This function should return a single integer specifying the index of the expression to use. By default, this maximizes the score (a pound-for-pound rating) of expressions reaching the threshold of 1.5x the minimum loss. To override this at prediction time, you can pass a named tuple with keys `data` and `idx` to `predict`. See the Operations section for details.\n * `dimensions_type::AbstractDimensions`: The type of dimensions to use when storing the units of the data. By default this is `DynamicQuantities.SymbolicDimensions`.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. The expression used for prediction is defined by the `selection_method` function, which can be seen by viewing `report(mach).best_idx`.\n * `predict(mach, (data=Xnew, idx=i))`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. By passing a named tuple with keys `data` and `idx`, you are able to specify the equation you wish to evaluate in `idx`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `best_idx::Int`: The index of the best expression in the Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Node{T}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity). `T` is equal to the element type of the passed data.\n * `equation_strings::Vector{String}`: The expressions discovered by the search, represented as strings for easy inspection.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `best_idx::Int`: The index of the best expression in the Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Node{T}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity).\n * `equation_strings::Vector{String}`: The expressions discovered by the search, represented as strings for easy inspection.\n * `complexities::Vector{Int}`: The complexity of each expression in the Pareto frontier.\n * `losses::Vector{L}`: The loss of each expression in the Pareto frontier, according to the loss function specified in the model. The type `L` is the loss type, which is usually the same as the element type of data passed (i.e., `T`), but can differ if complex data types are passed.\n * `scores::Vector{L}`: A metric which considers both the complexity and loss of an expression, equal to the change in the log-loss divided by the change in complexity, relative to the previous expression along the Pareto frontier. A larger score aims to indicate an expression is more likely to be the true expression generating the data, but this is very problem-dependent and generally several other factors should be considered.\n\n# Examples\n\n```julia\nusing MLJ\nSRRegressor = @load SRRegressor pkg=SymbolicRegression\nX, y = @load_boston\nmodel = SRRegressor(binary_operators=[+, -, *], unary_operators=[exp], niterations=100)\nmach = machine(model, X, y)\nfit!(mach)\ny_hat = predict(mach, X)\n# View the equation used:\nr = report(mach)\nprintln(\"Equation used:\", r.equation_strings[r.best_idx])\n```\n\nWith units and variable names:\n\n```julia\nusing MLJ\nusing DynamicQuantities\nSRegressor = @load SRRegressor pkg=SymbolicRegression\n\nX = (; x1=rand(32) .* us\"km/h\", x2=rand(32) .* us\"km\")\ny = @. X.x2 / X.x1 + 0.5us\"h\"\nmodel = SRRegressor(binary_operators=[+, -, *, /])\nmach = machine(model, X, y)\nfit!(mach)\ny_hat = predict(mach, X)\n# View the equation used:\nr = report(mach)\nprintln(\"Equation used:\", r.equation_strings[r.best_idx])\n```\n\nSee also [`MultitargetSRRegressor`](@ref).\n" ":name" = "SRRegressor" ":human_name" = "Symbolic Regression via Evolutionary Search" ":is_supervised" = "`true`" ":prediction_type" = ":deterministic" ":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [] -":hyperparameters" = "`(:binary_operators, :unary_operators, :constraints, :elementwise_loss, :loss_function, :tournament_selection_n, :tournament_selection_p, :topn, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :parsimony, :dimensional_constraint_penalty, :alpha, :maxsize, :maxdepth, :turbo, :bumper, :migration, :hof_migration, :should_simplify, :should_optimize_constants, :output_file, :node_type, :populations, :perturbation_factor, :annealing, :batching, :batch_size, :mutation_weights, :crossover_probability, :warmup_maxsize_by, :use_frequency, :use_frequency_in_tournament, :adaptive_parsimony_scaling, :population_size, :ncycles_per_iteration, :fraction_replaced, :fraction_replaced_hof, :verbosity, :print_precision, :save_to_file, :probability_negate_constant, :seed, :bin_constraints, :una_constraints, :progress, :terminal_width, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_f_calls_limit, :optimizer_options, :use_recorder, :recorder_file, :early_stop_condition, :timeout_in_seconds, :max_evals, :skip_mutation_failures, :nested_constraints, :deterministic, :define_helper_functions, :fast_cycle, :npopulations, :npop, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :heap_size_hint_in_bytes, :runtests, :loss_type, :selection_method, :dimensions_type)`" -":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Integer\", \"Real\", \"Integer\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Real\", \"Union{Nothing, Real}\", \"Real\", \"Integer\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Bool\", \"Union{Nothing, AbstractString}\", \"Type\", \"Integer\", \"Real\", \"Bool\", \"Bool\", \"Integer\", \"Union{SymbolicRegression.CoreModule.MutationWeightsModule.MutationWeights, NamedTuple, AbstractVector}\", \"Real\", \"Real\", \"Bool\", \"Bool\", \"Real\", \"Integer\", \"Integer\", \"Real\", \"Real\", \"Union{Nothing, Integer}\", \"Integer\", \"Bool\", \"Real\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"Union{AbstractString, Optim.AbstractOptimizer}\", \"Integer\", \"Real\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Bool\", \"AbstractString\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Bool\", \"Any\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Bool\", \"Any\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameters" = "`(:binary_operators, :unary_operators, :constraints, :elementwise_loss, :loss_function, :tournament_selection_n, :tournament_selection_p, :topn, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :parsimony, :dimensional_constraint_penalty, :alpha, :maxsize, :maxdepth, :turbo, :migration, :hof_migration, :should_simplify, :should_optimize_constants, :output_file, :populations, :perturbation_factor, :annealing, :batching, :batch_size, :mutation_weights, :crossover_probability, :warmup_maxsize_by, :use_frequency, :use_frequency_in_tournament, :adaptive_parsimony_scaling, :population_size, :ncycles_per_iteration, :fraction_replaced, :fraction_replaced_hof, :verbosity, :print_precision, :save_to_file, :probability_negate_constant, :seed, :bin_constraints, :una_constraints, :progress, :terminal_width, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_options, :val_recorder, :recorder_file, :early_stop_condition, :timeout_in_seconds, :max_evals, :skip_mutation_failures, :enable_autodiff, :nested_constraints, :deterministic, :define_helper_functions, :fast_cycle, :npopulations, :npop, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :heap_size_hint_in_bytes, :runtests, :loss_type, :selection_method, :dimensions_type)`" +":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Integer\", \"Real\", \"Integer\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Real\", \"Union{Nothing, Real}\", \"Real\", \"Integer\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Bool\", \"Union{Nothing, AbstractString}\", \"Integer\", \"Real\", \"Bool\", \"Bool\", \"Integer\", \"Union{SymbolicRegression.CoreModule.OptionsStructModule.MutationWeights, NamedTuple, AbstractVector}\", \"Real\", \"Real\", \"Bool\", \"Bool\", \"Real\", \"Integer\", \"Integer\", \"Real\", \"Real\", \"Union{Nothing, Integer}\", \"Integer\", \"Bool\", \"Real\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"AbstractString\", \"Integer\", \"Real\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Val\", \"AbstractString\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"Any\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Bool\", \"Any\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJText.TfidfTransformer] ":input_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" @@ -5744,6 +6717,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJText.CountTransformer] ":input_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" @@ -5778,6 +6752,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJText.BM25Transformer] ":input_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" @@ -5812,6 +6787,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [EvoTrees.EvoTreeClassifier] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" @@ -5846,6 +6822,7 @@ ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [EvoTrees.EvoTreeGaussian] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" @@ -5880,6 +6857,7 @@ ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [EvoTrees.EvoTreeMLE] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" @@ -5914,6 +6892,7 @@ ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [EvoTrees.EvoTreeRegressor] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" @@ -5948,6 +6927,7 @@ ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [EvoTrees.EvoTreeCount] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" @@ -5982,6 +6962,7 @@ ":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJModels.ConstantClassifier] ":input_scitype" = "`ScientificTypesBase.Table`" @@ -5996,7 +6977,7 @@ ":package_license" = "MIT" ":load_path" = "MLJModels.ConstantClassifier" ":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":package_url" = "https://github.com/alan-turing-institute/MLJModels.jl" +":package_url" = "https://github.com/JuliaAI/MLJModels.jl" ":is_wrapper" = "`false`" ":supports_weights" = "`true`" ":supports_class_weights" = "`false`" @@ -6016,6 +6997,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJModels.Standardizer] ":input_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -6030,12 +7012,12 @@ ":package_license" = "MIT" ":load_path" = "MLJModels.Standardizer" ":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":package_url" = "https://github.com/alan-turing-institute/MLJModels.jl" +":package_url" = "https://github.com/JuliaAI/MLJModels.jl" ":is_wrapper" = "`false`" ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nStandardizer\n```\n\nA model type for constructing a standardizer, based on [MLJModels.jl](https://github.com/alan-turing-institute/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nStandardizer = @load Standardizer pkg=MLJModels\n```\n\nDo `model = Standardizer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `Standardizer(features=...)`.\n\nUse this model to standardize (whiten) a `Continuous` vector, or relevant columns of a table. The rescalings applied by this transformer to new data are always those learned during the training phase, which are generally different from what would actually standardize the new data.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table or any abstract vector with `Continuous` element scitype (any abstract float vector). Only features in a table with `Continuous` scitype can be standardized; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: one of the following, with the behavior indicated below:\n\n * `[]` (empty, the default): standardize all features (columns) having `Continuous` element scitype\n * non-empty vector of feature names (symbols): standardize only the `Continuous` features in the vector (if `ignore=false`) or `Continuous` features *not* named in the vector (`ignore=true`).\n * function or other callable: standardize a feature if the callable returns `true` on its name. For example, `Standardizer(features = name -> name in [:x1, :x3], ignore = true, count=true)` has the same effect as `Standardizer(features = [:x1, :x3], ignore = true, count=true)`, namely to standardize all `Continuous` and `Count` features, with the exception of `:x1` and `:x3`.\n\n Note this behavior is further modified if the `ordered_factor` or `count` flags are set to `true`; see below\n * `ignore=false`: whether to ignore or standardize specified `features`, as explained above\n * `ordered_factor=false`: if `true`, standardize any `OrderedFactor` feature wherever a `Continuous` feature would be standardized, as described above\n * `count=false`: if `true`, standardize any `Count` feature wherever a `Continuous` feature would be standardized, as described above\n\n# Operations\n\n * `transform(mach, Xnew)`: return `Xnew` with relevant features standardized according to the rescalings learned during fitting of `mach`.\n * `inverse_transform(mach, Z)`: apply the inverse transformation to `Z`, so that `inverse_transform(mach, transform(mach, Xnew))` is approximately the same as `Xnew`; unavailable if `ordered_factor` or `count` flags were set to `true`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_fit` - the names of features that will be standardized\n * `means` - the corresponding untransformed mean values\n * `stds` - the corresponding untransformed standard deviations\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `features_fit`: the names of features that will be standardized\n\n# Examples\n\n```\nusing MLJ\n\nX = (ordinal1 = [1, 2, 3],\n ordinal2 = coerce([:x, :y, :x], OrderedFactor),\n ordinal3 = [10.0, 20.0, 30.0],\n ordinal4 = [-20.0, -30.0, -40.0],\n nominal = coerce([\"Your father\", \"he\", \"is\"], Multiclass));\n\njulia> schema(X)\n┌──────────┬──────────────────┐\n│ names │ scitypes │\n├──────────┼──────────────────┤\n│ ordinal1 │ Count │\n│ ordinal2 │ OrderedFactor{2} │\n│ ordinal3 │ Continuous │\n│ ordinal4 │ Continuous │\n│ nominal │ Multiclass{3} │\n└──────────┴──────────────────┘\n\nstand1 = Standardizer();\n\njulia> transform(fit!(machine(stand1, X)), X)\n(ordinal1 = [1, 2, 3],\n ordinal2 = CategoricalValue{Symbol,UInt32}[:x, :y, :x],\n ordinal3 = [-1.0, 0.0, 1.0],\n ordinal4 = [1.0, 0.0, -1.0],\n nominal = CategoricalValue{String,UInt32}[\"Your father\", \"he\", \"is\"],)\n\nstand2 = Standardizer(features=[:ordinal3, ], ignore=true, count=true);\n\njulia> transform(fit!(machine(stand2, X)), X)\n(ordinal1 = [-1.0, 0.0, 1.0],\n ordinal2 = CategoricalValue{Symbol,UInt32}[:x, :y, :x],\n ordinal3 = [10.0, 20.0, 30.0],\n ordinal4 = [1.0, 0.0, -1.0],\n nominal = CategoricalValue{String,UInt32}[\"Your father\", \"he\", \"is\"],)\n```\n\nSee also [`OneHotEncoder`](@ref), [`ContinuousEncoder`](@ref).\n" +":docstring" = "```\nStandardizer\n```\n\nA model type for constructing a standardizer, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nStandardizer = @load Standardizer pkg=MLJModels\n```\n\nDo `model = Standardizer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `Standardizer(features=...)`.\n\nUse this model to standardize (whiten) a `Continuous` vector, or relevant columns of a table. The rescalings applied by this transformer to new data are always those learned during the training phase, which are generally different from what would actually standardize the new data.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table or any abstract vector with `Continuous` element scitype (any abstract float vector). Only features in a table with `Continuous` scitype can be standardized; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: one of the following, with the behavior indicated below:\n\n * `[]` (empty, the default): standardize all features (columns) having `Continuous` element scitype\n * non-empty vector of feature names (symbols): standardize only the `Continuous` features in the vector (if `ignore=false`) or `Continuous` features *not* named in the vector (`ignore=true`).\n * function or other callable: standardize a feature if the callable returns `true` on its name. For example, `Standardizer(features = name -> name in [:x1, :x3], ignore = true, count=true)` has the same effect as `Standardizer(features = [:x1, :x3], ignore = true, count=true)`, namely to standardize all `Continuous` and `Count` features, with the exception of `:x1` and `:x3`.\n\n Note this behavior is further modified if the `ordered_factor` or `count` flags are set to `true`; see below\n * `ignore=false`: whether to ignore or standardize specified `features`, as explained above\n * `ordered_factor=false`: if `true`, standardize any `OrderedFactor` feature wherever a `Continuous` feature would be standardized, as described above\n * `count=false`: if `true`, standardize any `Count` feature wherever a `Continuous` feature would be standardized, as described above\n\n# Operations\n\n * `transform(mach, Xnew)`: return `Xnew` with relevant features standardized according to the rescalings learned during fitting of `mach`.\n * `inverse_transform(mach, Z)`: apply the inverse transformation to `Z`, so that `inverse_transform(mach, transform(mach, Xnew))` is approximately the same as `Xnew`; unavailable if `ordered_factor` or `count` flags were set to `true`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_fit` - the names of features that will be standardized\n * `means` - the corresponding untransformed mean values\n * `stds` - the corresponding untransformed standard deviations\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `features_fit`: the names of features that will be standardized\n\n# Examples\n\n```\nusing MLJ\n\nX = (ordinal1 = [1, 2, 3],\n ordinal2 = coerce([:x, :y, :x], OrderedFactor),\n ordinal3 = [10.0, 20.0, 30.0],\n ordinal4 = [-20.0, -30.0, -40.0],\n nominal = coerce([\"Your father\", \"he\", \"is\"], Multiclass));\n\njulia> schema(X)\n┌──────────┬──────────────────┐\n│ names │ scitypes │\n├──────────┼──────────────────┤\n│ ordinal1 │ Count │\n│ ordinal2 │ OrderedFactor{2} │\n│ ordinal3 │ Continuous │\n│ ordinal4 │ Continuous │\n│ nominal │ Multiclass{3} │\n└──────────┴──────────────────┘\n\nstand1 = Standardizer();\n\njulia> transform(fit!(machine(stand1, X)), X)\n(ordinal1 = [1, 2, 3],\n ordinal2 = CategoricalValue{Symbol,UInt32}[:x, :y, :x],\n ordinal3 = [-1.0, 0.0, 1.0],\n ordinal4 = [1.0, 0.0, -1.0],\n nominal = CategoricalValue{String,UInt32}[\"Your father\", \"he\", \"is\"],)\n\nstand2 = Standardizer(features=[:ordinal3, ], ignore=true, count=true);\n\njulia> transform(fit!(machine(stand2, X)), X)\n(ordinal1 = [-1.0, 0.0, 1.0],\n ordinal2 = CategoricalValue{Symbol,UInt32}[:x, :y, :x],\n ordinal3 = [10.0, 20.0, 30.0],\n ordinal4 = [1.0, 0.0, -1.0],\n nominal = CategoricalValue{String,UInt32}[\"Your father\", \"he\", \"is\"],)\n```\n\nSee also [`OneHotEncoder`](@ref), [`ContinuousEncoder`](@ref).\n" ":name" = "Standardizer" ":human_name" = "standardizer" ":is_supervised" = "`false`" @@ -6050,6 +7032,42 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" + +[MLJModels.ThresholdSupervisedDetector] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}`" +":predict_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_pure_julia" = "`false`" +":package_name" = "MLJModels" +":package_license" = "unknown" +":load_path" = "MLJModels.BinaryThresholdPredictor" +":package_uuid" = "" +":package_url" = "https://github.com/JuliaAI/MLJModels.jl" +":is_wrapper" = "`true`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "```\nBinaryThresholdPredictor(model; threshold=0.5)\n```\n\nWrap the `Probabilistic` model, `model`, assumed to support binary classification, as a `Deterministic` model, by applying the specified `threshold` to the positive class probability. In addition to conventional supervised classifiers, it can also be applied to outlier detection models that predict normalized scores - in the form of appropriate `UnivariateFinite` distributions - that is, models that subtype `AbstractProbabilisticUnsupervisedDetector` or `AbstractProbabilisticSupervisedDetector`.\n\nBy convention the positive class is the second class returned by `levels(y)`, where `y` is the target.\n\nIf `threshold=0.5` then calling `predict` on the wrapped model is equivalent to calling `predict_mode` on the atomic model.\n\n# Example\n\nBelow is an application to the well-known Pima Indian diabetes dataset, including optimization of the `threshold` parameter, with a high balanced accuracy the objective. The target class distribution is 500 positives to 268 negatives.\n\nLoading the data:\n\n```julia\nusing MLJ, Random\nrng = Xoshiro(123)\n\ndiabetes = OpenML.load(43582)\noutcome, X = unpack(diabetes, ==(:Outcome), rng=rng);\ny = coerce(Int.(outcome), OrderedFactor);\n```\n\nChoosing a probabilistic classifier:\n\n```julia\nEvoTreesClassifier = @load EvoTreesClassifier\nprob_predictor = EvoTreesClassifier()\n```\n\nWrapping in `TunedModel` to get a deterministic classifier with `threshold` as a new hyperparameter:\n\n```julia\npoint_predictor = BinaryThresholdPredictor(prob_predictor, threshold=0.6)\nXnew, _ = make_moons(3, rng=rng)\nmach = machine(point_predictor, X, y) |> fit!\npredict(mach, X)[1:3] # [0, 0, 0]\n```\n\nEstimating performance:\n\n```julia\nbalanced = BalancedAccuracy(adjusted=true)\ne = evaluate!(mach, resampling=CV(nfolds=6), measures=[balanced, accuracy])\ne.measurement[1] # 0.405 ± 0.089\n```\n\nWrapping in tuning strategy to learn `threshold` that maximizes balanced accuracy:\n\n```julia\nr = range(point_predictor, :threshold, lower=0.1, upper=0.9)\ntuned_point_predictor = TunedModel(\n point_predictor,\n tuning=RandomSearch(rng=rng),\n resampling=CV(nfolds=6),\n range = r,\n measure=balanced,\n n=30,\n)\nmach2 = machine(tuned_point_predictor, X, y) |> fit!\noptimized_point_predictor = report(mach2).best_model\noptimized_point_predictor.threshold # 0.260\npredict(mach2, X)[1:3] # [1, 1, 0]\n```\n\nEstimating the performance of the auto-thresholding model (nested resampling here):\n\n```julia\ne = evaluate!(mach2, resampling=CV(nfolds=6), measure=[balanced, accuracy])\ne.measurement[1] # 0.477 ± 0.110\n```\n" +":name" = "ThresholdSupervisedDetector" +":human_name" = "threshold supervised detector" +":is_supervised" = "`true`" +":prediction_type" = ":deterministic" +":abstract_type" = "`MLJModelInterface.DeterministicSupervisedDetector`" +":implemented_methods" = [] +":hyperparameters" = "`(:model, :threshold)`" +":hyperparameter_types" = "`(\"MLJModelInterface.ProbabilisticSupervisedDetector\", \"Float64\")`" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`BinaryThresholdPredictor`" [MLJModels.DeterministicConstantClassifier] ":input_scitype" = "`ScientificTypesBase.Table`" @@ -6064,12 +7082,12 @@ ":package_license" = "MIT" ":load_path" = "MLJModels.DeterministicConstantClassifier" ":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":package_url" = "https://github.com/alan-turing-institute/MLJModels.jl" +":package_url" = "https://github.com/JuliaAI/MLJModels.jl" ":is_wrapper" = "`false`" ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nDeterministicConstantClassifier\n```\n\nA model type for constructing a deterministic constant classifier, based on\n[MLJModels.jl](https://github.com/alan-turing-institute/MLJModels.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nDeterministicConstantClassifier = @load DeterministicConstantClassifier pkg=MLJModels\n```\n\nDo `model = DeterministicConstantClassifier()` to construct an instance with default hyper-parameters. " +":docstring" = "```\nDeterministicConstantClassifier\n```\n\nA model type for constructing a deterministic constant classifier, based on\n[MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nDeterministicConstantClassifier = @load DeterministicConstantClassifier pkg=MLJModels\n```\n\nDo `model = DeterministicConstantClassifier()` to construct an instance with default hyper-parameters. " ":name" = "DeterministicConstantClassifier" ":human_name" = "deterministic constant classifier" ":is_supervised" = "`true`" @@ -6084,6 +7102,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJModels.UnivariateTimeTypeToContinuous] ":input_scitype" = "`AbstractVector{<:ScientificTypesBase.ScientificTimeType}`" @@ -6098,12 +7117,12 @@ ":package_license" = "MIT" ":load_path" = "MLJModels.UnivariateTimeTypeToContinuous" ":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":package_url" = "https://github.com/alan-turing-institute/MLJModels.jl" +":package_url" = "https://github.com/JuliaAI/MLJModels.jl" ":is_wrapper" = "`false`" ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nUnivariateTimeTypeToContinuous\n```\n\nA model type for constructing a single variable transformer that creates continuous representations of temporally typed data, based on [MLJModels.jl](https://github.com/alan-turing-institute/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nUnivariateTimeTypeToContinuous = @load UnivariateTimeTypeToContinuous pkg=MLJModels\n```\n\nDo `model = UnivariateTimeTypeToContinuous()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateTimeTypeToContinuous(zero_time=...)`.\n\nUse this model to convert vectors with a `TimeType` element type to vectors of `Float64` type (`Continuous` element scitype).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector whose element type is a subtype of `Dates.TimeType`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `zero_time`: the time that is to correspond to 0.0 under transformations, with the type coinciding with the training data element type. If unspecified, the earliest time encountered in training is used.\n * `step::Period=Hour(24)`: time interval to correspond to one unit under transformation\n\n# Operations\n\n * `transform(mach, xnew)`: apply the encoding inferred when `mach` was fit\n\n# Fitted parameters\n\n`fitted_params(mach).fitresult` is the tuple `(zero_time, step)` actually used in transformations, which may differ from the user-specified hyper-parameters.\n\n# Example\n\n```\nusing MLJ\nusing Dates\n\nx = [Date(2001, 1, 1) + Day(i) for i in 0:4]\n\nencoder = UnivariateTimeTypeToContinuous(zero_time=Date(2000, 1, 1),\n step=Week(1))\n\nmach = machine(encoder, x)\nfit!(mach)\njulia> transform(mach, x)\n5-element Vector{Float64}:\n 52.285714285714285\n 52.42857142857143\n 52.57142857142857\n 52.714285714285715\n 52.857142\n```\n" +":docstring" = "```\nUnivariateTimeTypeToContinuous\n```\n\nA model type for constructing a single variable transformer that creates continuous representations of temporally typed data, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nUnivariateTimeTypeToContinuous = @load UnivariateTimeTypeToContinuous pkg=MLJModels\n```\n\nDo `model = UnivariateTimeTypeToContinuous()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateTimeTypeToContinuous(zero_time=...)`.\n\nUse this model to convert vectors with a `TimeType` element type to vectors of `Float64` type (`Continuous` element scitype).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector whose element type is a subtype of `Dates.TimeType`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `zero_time`: the time that is to correspond to 0.0 under transformations, with the type coinciding with the training data element type. If unspecified, the earliest time encountered in training is used.\n * `step::Period=Hour(24)`: time interval to correspond to one unit under transformation\n\n# Operations\n\n * `transform(mach, xnew)`: apply the encoding inferred when `mach` was fit\n\n# Fitted parameters\n\n`fitted_params(mach).fitresult` is the tuple `(zero_time, step)` actually used in transformations, which may differ from the user-specified hyper-parameters.\n\n# Example\n\n```\nusing MLJ\nusing Dates\n\nx = [Date(2001, 1, 1) + Day(i) for i in 0:4]\n\nencoder = UnivariateTimeTypeToContinuous(zero_time=Date(2000, 1, 1),\n step=Week(1))\n\nmach = machine(encoder, x)\nfit!(mach)\njulia> transform(mach, x)\n5-element Vector{Float64}:\n 52.285714285714285\n 52.42857142857143\n 52.57142857142857\n 52.714285714285715\n 52.857142\n```\n" ":name" = "UnivariateTimeTypeToContinuous" ":human_name" = "single variable transformer that creates continuous representations of temporally typed data" ":is_supervised" = "`false`" @@ -6118,6 +7137,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJModels.OneHotEncoder] ":input_scitype" = "`ScientificTypesBase.Table`" @@ -6132,12 +7152,12 @@ ":package_license" = "MIT" ":load_path" = "MLJModels.OneHotEncoder" ":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":package_url" = "https://github.com/alan-turing-institute/MLJModels.jl" +":package_url" = "https://github.com/JuliaAI/MLJModels.jl" ":is_wrapper" = "`false`" ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nOneHotEncoder\n```\n\nA model type for constructing a one-hot encoder, based on [MLJModels.jl](https://github.com/alan-turing-institute/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nOneHotEncoder = @load OneHotEncoder pkg=MLJModels\n```\n\nDo `model = OneHotEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `OneHotEncoder(features=...)`.\n\nUse this model to one-hot encode the `Multiclass` and `OrderedFactor` features (columns) of some table, leaving other columns unchanged.\n\nNew data to be transformed may lack features present in the fit data, but no *new* features can be present.\n\n**Warning:** This transformer assumes that `levels(col)` for any `Multiclass` or `OrderedFactor` column, `col`, is the same for training data and new data to be transformed.\n\nTo ensure *all* features are transformed into `Continuous` features, or dropped, use [`ContinuousEncoder`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table. Columns can be of mixed type but only those with element scitype `Multiclass` or `OrderedFactor` can be encoded. Check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: a vector of symbols (column names). If empty (default) then all `Multiclass` and `OrderedFactor` features are encoded. Otherwise, encoding is further restricted to the specified features (`ignore=false`) or the unspecified features (`ignore=true`). This default behavior can be modified by the `ordered_factor` flag.\n * `ordered_factor=false`: when `true`, `OrderedFactor` features are universally excluded\n * `drop_last=true`: whether to drop the column corresponding to the final class of encoded features. For example, a three-class feature is spawned into three new features if `drop_last=false`, but just two features otherwise.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `all_features`: names of all features encountered in training\n * `fitted_levels_given_feature`: dictionary of the levels associated with each feature encoded, keyed on the feature name\n * `ref_name_pairs_given_feature`: dictionary of pairs `r => ftr` (such as `0x00000001 => :grad__A`) where `r` is a CategoricalArrays.jl reference integer representing a level, and `ftr` the corresponding new feature name; the dictionary is keyed on the names of features that are encoded\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `features_to_be_encoded`: names of input features to be encoded\n * `new_features`: names of all output features\n\n# Example\n\n```\nusing MLJ\n\nX = (name=categorical([\"Danesh\", \"Lee\", \"Mary\", \"John\"]),\n grade=categorical([\"A\", \"B\", \"A\", \"C\"], ordered=true),\n height=[1.85, 1.67, 1.5, 1.67],\n n_devices=[3, 2, 4, 3])\n\njulia> schema(X)\n┌───────────┬──────────────────┐\n│ names │ scitypes │\n├───────────┼──────────────────┤\n│ name │ Multiclass{4} │\n│ grade │ OrderedFactor{3} │\n│ height │ Continuous │\n│ n_devices │ Count │\n└───────────┴──────────────────┘\n\nhot = OneHotEncoder(drop_last=true)\nmach = fit!(machine(hot, X))\nW = transform(mach, X)\n\njulia> schema(W)\n┌──────────────┬────────────┐\n│ names │ scitypes │\n├──────────────┼────────────┤\n│ name__Danesh │ Continuous │\n│ name__John │ Continuous │\n│ name__Lee │ Continuous │\n│ grade__A │ Continuous │\n│ grade__B │ Continuous │\n│ height │ Continuous │\n│ n_devices │ Count │\n└──────────────┴────────────┘\n```\n\nSee also [`ContinuousEncoder`](@ref).\n" +":docstring" = "```\nOneHotEncoder\n```\n\nA model type for constructing a one-hot encoder, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nOneHotEncoder = @load OneHotEncoder pkg=MLJModels\n```\n\nDo `model = OneHotEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `OneHotEncoder(features=...)`.\n\nUse this model to one-hot encode the `Multiclass` and `OrderedFactor` features (columns) of some table, leaving other columns unchanged.\n\nNew data to be transformed may lack features present in the fit data, but no *new* features can be present.\n\n**Warning:** This transformer assumes that `levels(col)` for any `Multiclass` or `OrderedFactor` column, `col`, is the same for training data and new data to be transformed.\n\nTo ensure *all* features are transformed into `Continuous` features, or dropped, use [`ContinuousEncoder`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table. Columns can be of mixed type but only those with element scitype `Multiclass` or `OrderedFactor` can be encoded. Check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: a vector of symbols (column names). If empty (default) then all `Multiclass` and `OrderedFactor` features are encoded. Otherwise, encoding is further restricted to the specified features (`ignore=false`) or the unspecified features (`ignore=true`). This default behavior can be modified by the `ordered_factor` flag.\n * `ordered_factor=false`: when `true`, `OrderedFactor` features are universally excluded\n * `drop_last=true`: whether to drop the column corresponding to the final class of encoded features. For example, a three-class feature is spawned into three new features if `drop_last=false`, but just two features otherwise.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `all_features`: names of all features encountered in training\n * `fitted_levels_given_feature`: dictionary of the levels associated with each feature encoded, keyed on the feature name\n * `ref_name_pairs_given_feature`: dictionary of pairs `r => ftr` (such as `0x00000001 => :grad__A`) where `r` is a CategoricalArrays.jl reference integer representing a level, and `ftr` the corresponding new feature name; the dictionary is keyed on the names of features that are encoded\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `features_to_be_encoded`: names of input features to be encoded\n * `new_features`: names of all output features\n\n# Example\n\n```\nusing MLJ\n\nX = (name=categorical([\"Danesh\", \"Lee\", \"Mary\", \"John\"]),\n grade=categorical([\"A\", \"B\", \"A\", \"C\"], ordered=true),\n height=[1.85, 1.67, 1.5, 1.67],\n n_devices=[3, 2, 4, 3])\n\njulia> schema(X)\n┌───────────┬──────────────────┐\n│ names │ scitypes │\n├───────────┼──────────────────┤\n│ name │ Multiclass{4} │\n│ grade │ OrderedFactor{3} │\n│ height │ Continuous │\n│ n_devices │ Count │\n└───────────┴──────────────────┘\n\nhot = OneHotEncoder(drop_last=true)\nmach = fit!(machine(hot, X))\nW = transform(mach, X)\n\njulia> schema(W)\n┌──────────────┬────────────┐\n│ names │ scitypes │\n├──────────────┼────────────┤\n│ name__Danesh │ Continuous │\n│ name__John │ Continuous │\n│ name__Lee │ Continuous │\n│ grade__A │ Continuous │\n│ grade__B │ Continuous │\n│ height │ Continuous │\n│ n_devices │ Count │\n└──────────────┴────────────┘\n```\n\nSee also [`ContinuousEncoder`](@ref).\n" ":name" = "OneHotEncoder" ":human_name" = "one-hot encoder" ":is_supervised" = "`false`" @@ -6152,6 +7172,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJModels.ContinuousEncoder] ":input_scitype" = "`ScientificTypesBase.Table`" @@ -6166,12 +7187,12 @@ ":package_license" = "MIT" ":load_path" = "MLJModels.ContinuousEncoder" ":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":package_url" = "https://github.com/alan-turing-institute/MLJModels.jl" +":package_url" = "https://github.com/JuliaAI/MLJModels.jl" ":is_wrapper" = "`false`" ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nContinuousEncoder\n```\n\nA model type for constructing a continuous encoder, based on [MLJModels.jl](https://github.com/alan-turing-institute/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nContinuousEncoder = @load ContinuousEncoder pkg=MLJModels\n```\n\nDo `model = ContinuousEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ContinuousEncoder(drop_last=...)`.\n\nUse this model to arrange all features (columns) of a table to have `Continuous` element scitype, by applying the following protocol to each feature `ftr`:\n\n * If `ftr` is already `Continuous` retain it.\n * If `ftr` is `Multiclass`, one-hot encode it.\n * If `ftr` is `OrderedFactor`, replace it with `coerce(ftr, Continuous)` (vector of floating point integers), unless `ordered_factors=false` is specified, in which case one-hot encode it.\n * If `ftr` is `Count`, replace it with `coerce(ftr, Continuous)`.\n * If `ftr` has some other element scitype, or was not observed in fitting the encoder, drop it from the table.\n\n**Warning:** This transformer assumes that `levels(col)` for any `Multiclass` or `OrderedFactor` column, `col`, is the same for training data and new data to be transformed.\n\nTo selectively one-hot-encode categorical features (without dropping columns) use [`OneHotEncoder`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table. Columns can be of mixed type but only those with element scitype `Multiclass` or `OrderedFactor` can be encoded. Check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `drop_last=true`: whether to drop the column corresponding to the final class of one-hot encoded features. For example, a three-class feature is spawned into three new features if `drop_last=false`, but two just features otherwise.\n * `one_hot_ordered_factors=false`: whether to one-hot any feature with `OrderedFactor` element scitype, or to instead coerce it directly to a (single) `Continuous` feature using the order\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_to_keep`: names of features that will not be dropped from the table\n * `one_hot_encoder`: the `OneHotEncoder` model instance for handling the one-hot encoding\n * `one_hot_encoder_fitresult`: the fitted parameters of the `OneHotEncoder` model\n\n# Report\n\n * `features_to_keep`: names of input features that will not be dropped from the table\n * `new_features`: names of all output features\n\n# Example\n\n```julia\nX = (name=categorical([\"Danesh\", \"Lee\", \"Mary\", \"John\"]),\n grade=categorical([\"A\", \"B\", \"A\", \"C\"], ordered=true),\n height=[1.85, 1.67, 1.5, 1.67],\n n_devices=[3, 2, 4, 3],\n comments=[\"the force\", \"be\", \"with you\", \"too\"])\n\njulia> schema(X)\n┌───────────┬──────────────────┐\n│ names │ scitypes │\n├───────────┼──────────────────┤\n│ name │ Multiclass{4} │\n│ grade │ OrderedFactor{3} │\n│ height │ Continuous │\n│ n_devices │ Count │\n│ comments │ Textual │\n└───────────┴──────────────────┘\n\nencoder = ContinuousEncoder(drop_last=true)\nmach = fit!(machine(encoder, X))\nW = transform(mach, X)\n\njulia> schema(W)\n┌──────────────┬────────────┐\n│ names │ scitypes │\n├──────────────┼────────────┤\n│ name__Danesh │ Continuous │\n│ name__John │ Continuous │\n│ name__Lee │ Continuous │\n│ grade │ Continuous │\n│ height │ Continuous │\n│ n_devices │ Continuous │\n└──────────────┴────────────┘\n\njulia> setdiff(schema(X).names, report(mach).features_to_keep) # dropped features\n1-element Vector{Symbol}:\n :comments\n\n```\n\nSee also [`OneHotEncoder`](@ref)\n" +":docstring" = "```\nContinuousEncoder\n```\n\nA model type for constructing a continuous encoder, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nContinuousEncoder = @load ContinuousEncoder pkg=MLJModels\n```\n\nDo `model = ContinuousEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ContinuousEncoder(drop_last=...)`.\n\nUse this model to arrange all features (columns) of a table to have `Continuous` element scitype, by applying the following protocol to each feature `ftr`:\n\n * If `ftr` is already `Continuous` retain it.\n * If `ftr` is `Multiclass`, one-hot encode it.\n * If `ftr` is `OrderedFactor`, replace it with `coerce(ftr, Continuous)` (vector of floating point integers), unless `ordered_factors=false` is specified, in which case one-hot encode it.\n * If `ftr` is `Count`, replace it with `coerce(ftr, Continuous)`.\n * If `ftr` has some other element scitype, or was not observed in fitting the encoder, drop it from the table.\n\n**Warning:** This transformer assumes that `levels(col)` for any `Multiclass` or `OrderedFactor` column, `col`, is the same for training data and new data to be transformed.\n\nTo selectively one-hot-encode categorical features (without dropping columns) use [`OneHotEncoder`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table. Columns can be of mixed type but only those with element scitype `Multiclass` or `OrderedFactor` can be encoded. Check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `drop_last=true`: whether to drop the column corresponding to the final class of one-hot encoded features. For example, a three-class feature is spawned into three new features if `drop_last=false`, but two just features otherwise.\n * `one_hot_ordered_factors=false`: whether to one-hot any feature with `OrderedFactor` element scitype, or to instead coerce it directly to a (single) `Continuous` feature using the order\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_to_keep`: names of features that will not be dropped from the table\n * `one_hot_encoder`: the `OneHotEncoder` model instance for handling the one-hot encoding\n * `one_hot_encoder_fitresult`: the fitted parameters of the `OneHotEncoder` model\n\n# Report\n\n * `features_to_keep`: names of input features that will not be dropped from the table\n * `new_features`: names of all output features\n\n# Example\n\n```julia\nX = (name=categorical([\"Danesh\", \"Lee\", \"Mary\", \"John\"]),\n grade=categorical([\"A\", \"B\", \"A\", \"C\"], ordered=true),\n height=[1.85, 1.67, 1.5, 1.67],\n n_devices=[3, 2, 4, 3],\n comments=[\"the force\", \"be\", \"with you\", \"too\"])\n\njulia> schema(X)\n┌───────────┬──────────────────┐\n│ names │ scitypes │\n├───────────┼──────────────────┤\n│ name │ Multiclass{4} │\n│ grade │ OrderedFactor{3} │\n│ height │ Continuous │\n│ n_devices │ Count │\n│ comments │ Textual │\n└───────────┴──────────────────┘\n\nencoder = ContinuousEncoder(drop_last=true)\nmach = fit!(machine(encoder, X))\nW = transform(mach, X)\n\njulia> schema(W)\n┌──────────────┬────────────┐\n│ names │ scitypes │\n├──────────────┼────────────┤\n│ name__Danesh │ Continuous │\n│ name__John │ Continuous │\n│ name__Lee │ Continuous │\n│ grade │ Continuous │\n│ height │ Continuous │\n│ n_devices │ Continuous │\n└──────────────┴────────────┘\n\njulia> setdiff(schema(X).names, report(mach).features_to_keep) # dropped features\n1-element Vector{Symbol}:\n :comments\n\n```\n\nSee also [`OneHotEncoder`](@ref)\n" ":name" = "ContinuousEncoder" ":human_name" = "continuous encoder" ":is_supervised" = "`false`" @@ -6186,6 +7207,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJModels.UnivariateBoxCoxTransformer] ":input_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" @@ -6200,12 +7222,12 @@ ":package_license" = "MIT" ":load_path" = "MLJModels.UnivariateBoxCoxTransformer" ":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":package_url" = "https://github.com/alan-turing-institute/MLJModels.jl" +":package_url" = "https://github.com/JuliaAI/MLJModels.jl" ":is_wrapper" = "`false`" ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nUnivariateBoxCoxTransformer\n```\n\nA model type for constructing a single variable Box-Cox transformer, based on [MLJModels.jl](https://github.com/alan-turing-institute/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nUnivariateBoxCoxTransformer = @load UnivariateBoxCoxTransformer pkg=MLJModels\n```\n\nDo `model = UnivariateBoxCoxTransformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateBoxCoxTransformer(n=...)`.\n\nBox-Cox transformations attempt to make data look more normally distributed. This can improve performance and assist in the interpretation of models which suppose that data is generated by a normal distribution.\n\nA Box-Cox transformation (with shift) is of the form\n\n```\nx -> ((x + c)^λ - 1)/λ\n```\n\nfor some constant `c` and real `λ`, unless `λ = 0`, in which case the above is replaced with\n\n```\nx -> log(x + c)\n```\n\nGiven user-specified hyper-parameters `n::Integer` and `shift::Bool`, the present implementation learns the parameters `c` and `λ` from the training data as follows: If `shift=true` and zeros are encountered in the data, then `c` is set to `0.2` times the data mean. If there are no zeros, then no shift is applied. Finally, `n` different values of `λ` between `-0.4` and `3` are considered, with `λ` fixed to the value maximizing normality of the transformed data.\n\n*Reference:* [Wikipedia entry for power transform](https://en.wikipedia.org/wiki/Power_transform).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with element scitype `Continuous`; check the scitype with `scitype(x)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `n=171`: number of values of the exponent `λ` to try\n * `shift=false`: whether to include a preliminary constant translation in transformations, in the presence of zeros\n\n# Operations\n\n * `transform(mach, xnew)`: apply the Box-Cox transformation learned when fitting `mach`\n * `inverse_transform(mach, z)`: reconstruct the vector `z` whose transformation learned by `mach` is `z`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `λ`: the learned Box-Cox exponent\n * `c`: the learned shift\n\n# Examples\n\n```\nusing MLJ\nusing UnicodePlots\nusing Random\nRandom.seed!(123)\n\ntransf = UnivariateBoxCoxTransformer()\n\nx = randn(1000).^2\n\nmach = machine(transf, x)\nfit!(mach)\n\nz = transform(mach, x)\n\njulia> histogram(x)\n ┌ ┐\n [ 0.0, 2.0) ┤███████████████████████████████████ 848\n [ 2.0, 4.0) ┤████▌ 109\n [ 4.0, 6.0) ┤█▍ 33\n [ 6.0, 8.0) ┤▍ 7\n [ 8.0, 10.0) ┤▏ 2\n [10.0, 12.0) ┤ 0\n [12.0, 14.0) ┤▏ 1\n └ ┘\n Frequency\n\njulia> histogram(z)\n ┌ ┐\n [-5.0, -4.0) ┤█▎ 8\n [-4.0, -3.0) ┤████████▊ 64\n [-3.0, -2.0) ┤█████████████████████▊ 159\n [-2.0, -1.0) ┤█████████████████████████████▊ 216\n [-1.0, 0.0) ┤███████████████████████████████████ 254\n [ 0.0, 1.0) ┤█████████████████████████▊ 188\n [ 1.0, 2.0) ┤████████████▍ 90\n [ 2.0, 3.0) ┤██▊ 20\n [ 3.0, 4.0) ┤▎ 1\n └ ┘\n Frequency\n\n```\n" +":docstring" = "```\nUnivariateBoxCoxTransformer\n```\n\nA model type for constructing a single variable Box-Cox transformer, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nUnivariateBoxCoxTransformer = @load UnivariateBoxCoxTransformer pkg=MLJModels\n```\n\nDo `model = UnivariateBoxCoxTransformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateBoxCoxTransformer(n=...)`.\n\nBox-Cox transformations attempt to make data look more normally distributed. This can improve performance and assist in the interpretation of models which suppose that data is generated by a normal distribution.\n\nA Box-Cox transformation (with shift) is of the form\n\n```\nx -> ((x + c)^λ - 1)/λ\n```\n\nfor some constant `c` and real `λ`, unless `λ = 0`, in which case the above is replaced with\n\n```\nx -> log(x + c)\n```\n\nGiven user-specified hyper-parameters `n::Integer` and `shift::Bool`, the present implementation learns the parameters `c` and `λ` from the training data as follows: If `shift=true` and zeros are encountered in the data, then `c` is set to `0.2` times the data mean. If there are no zeros, then no shift is applied. Finally, `n` different values of `λ` between `-0.4` and `3` are considered, with `λ` fixed to the value maximizing normality of the transformed data.\n\n*Reference:* [Wikipedia entry for power transform](https://en.wikipedia.org/wiki/Power_transform).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with element scitype `Continuous`; check the scitype with `scitype(x)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `n=171`: number of values of the exponent `λ` to try\n * `shift=false`: whether to include a preliminary constant translation in transformations, in the presence of zeros\n\n# Operations\n\n * `transform(mach, xnew)`: apply the Box-Cox transformation learned when fitting `mach`\n * `inverse_transform(mach, z)`: reconstruct the vector `z` whose transformation learned by `mach` is `z`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `λ`: the learned Box-Cox exponent\n * `c`: the learned shift\n\n# Examples\n\n```\nusing MLJ\nusing UnicodePlots\nusing Random\nRandom.seed!(123)\n\ntransf = UnivariateBoxCoxTransformer()\n\nx = randn(1000).^2\n\nmach = machine(transf, x)\nfit!(mach)\n\nz = transform(mach, x)\n\njulia> histogram(x)\n ┌ ┐\n [ 0.0, 2.0) ┤███████████████████████████████████ 848\n [ 2.0, 4.0) ┤████▌ 109\n [ 4.0, 6.0) ┤█▍ 33\n [ 6.0, 8.0) ┤▍ 7\n [ 8.0, 10.0) ┤▏ 2\n [10.0, 12.0) ┤ 0\n [12.0, 14.0) ┤▏ 1\n └ ┘\n Frequency\n\njulia> histogram(z)\n ┌ ┐\n [-5.0, -4.0) ┤█▎ 8\n [-4.0, -3.0) ┤████████▊ 64\n [-3.0, -2.0) ┤█████████████████████▊ 159\n [-2.0, -1.0) ┤█████████████████████████████▊ 216\n [-1.0, 0.0) ┤███████████████████████████████████ 254\n [ 0.0, 1.0) ┤█████████████████████████▊ 188\n [ 1.0, 2.0) ┤████████████▍ 90\n [ 2.0, 3.0) ┤██▊ 20\n [ 3.0, 4.0) ┤▎ 1\n └ ┘\n Frequency\n\n```\n" ":name" = "UnivariateBoxCoxTransformer" ":human_name" = "single variable Box-Cox transformer" ":is_supervised" = "`false`" @@ -6220,6 +7242,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJModels.InteractionTransformer] ":input_scitype" = "`Tuple{ScientificTypesBase.Table}`" @@ -6234,12 +7257,12 @@ ":package_license" = "MIT" ":load_path" = "MLJModels.InteractionTransformer" ":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":package_url" = "https://github.com/alan-turing-institute/MLJModels.jl" +":package_url" = "https://github.com/JuliaAI/MLJModels.jl" ":is_wrapper" = "`false`" ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nInteractionTransformer\n```\n\nA model type for constructing a interaction transformer, based on [MLJModels.jl](https://github.com/alan-turing-institute/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nInteractionTransformer = @load InteractionTransformer pkg=MLJModels\n```\n\nDo `model = InteractionTransformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `InteractionTransformer(order=...)`.\n\nGenerates all polynomial interaction terms up to the given order for the subset of chosen columns. Any column that contains elements with scitype `<:Infinite` is a valid basis to generate interactions. If `features` is not specified, all such columns with scitype `<:Infinite` in the table are used as a basis.\n\nIn MLJ or MLJBase, you can transform features `X` with the single call\n\n```\ntransform(machine(model), X)\n```\n\nSee also the example below.\n\n# Hyper-parameters\n\n * `order`: Maximum order of interactions to be generated.\n * `features`: Restricts interations generation to those columns\n\n# Operations\n\n * `transform(machine(model), X)`: Generates polynomial interaction terms out of table `X` using the hyper-parameters specified in `model`.\n\n# Example\n\n```\nusing MLJ\n\nX = (\n A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"]\n)\nit = InteractionTransformer(order=3)\nmach = machine(it)\n\njulia> transform(mach, X)\n(A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"],\n A_B = [4, 10, 18],\n A_C = [7, 16, 27],\n B_C = [28, 40, 54],\n A_B_C = [28, 80, 162],)\n\nit = InteractionTransformer(order=2, features=[:A, :B])\nmach = machine(it)\n\njulia> transform(mach, X)\n(A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"],\n A_B = [4, 10, 18],)\n\n```\n" +":docstring" = "```\nInteractionTransformer\n```\n\nA model type for constructing a interaction transformer, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nInteractionTransformer = @load InteractionTransformer pkg=MLJModels\n```\n\nDo `model = InteractionTransformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `InteractionTransformer(order=...)`.\n\nGenerates all polynomial interaction terms up to the given order for the subset of chosen columns. Any column that contains elements with scitype `<:Infinite` is a valid basis to generate interactions. If `features` is not specified, all such columns with scitype `<:Infinite` in the table are used as a basis.\n\nIn MLJ or MLJBase, you can transform features `X` with the single call\n\n```\ntransform(machine(model), X)\n```\n\nSee also the example below.\n\n# Hyper-parameters\n\n * `order`: Maximum order of interactions to be generated.\n * `features`: Restricts interations generation to those columns\n\n# Operations\n\n * `transform(machine(model), X)`: Generates polynomial interaction terms out of table `X` using the hyper-parameters specified in `model`.\n\n# Example\n\n```\nusing MLJ\n\nX = (\n A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"]\n)\nit = InteractionTransformer(order=3)\nmach = machine(it)\n\njulia> transform(mach, X)\n(A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"],\n A_B = [4, 10, 18],\n A_C = [7, 16, 27],\n B_C = [28, 40, 54],\n A_B_C = [28, 80, 162],)\n\nit = InteractionTransformer(order=2, features=[:A, :B])\nmach = machine(it)\n\njulia> transform(mach, X)\n(A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"],\n A_B = [4, 10, 18],)\n\n```\n" ":name" = "InteractionTransformer" ":human_name" = "interaction transformer" ":is_supervised" = "`false`" @@ -6254,6 +7277,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJModels.ConstantRegressor] ":input_scitype" = "`ScientificTypesBase.Table`" @@ -6268,7 +7292,7 @@ ":package_license" = "MIT" ":load_path" = "MLJModels.ConstantRegressor" ":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":package_url" = "https://github.com/alan-turing-institute/MLJModels.jl" +":package_url" = "https://github.com/JuliaAI/MLJModels.jl" ":is_wrapper" = "`false`" ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" @@ -6288,6 +7312,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJModels.FeatureSelector] ":input_scitype" = "`ScientificTypesBase.Table`" @@ -6302,12 +7327,12 @@ ":package_license" = "MIT" ":load_path" = "MLJModels.FeatureSelector" ":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":package_url" = "https://github.com/alan-turing-institute/MLJModels.jl" +":package_url" = "https://github.com/JuliaAI/MLJModels.jl" ":is_wrapper" = "`false`" ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nFeatureSelector\n```\n\nA model type for constructing a feature selector, based on [MLJModels.jl](https://github.com/alan-turing-institute/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nFeatureSelector = @load FeatureSelector pkg=MLJModels\n```\n\nDo `model = FeatureSelector()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FeatureSelector(features=...)`.\n\nUse this model to select features (columns) of a table, usually as part of a model `Pipeline`.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any table of input features, where \"table\" is in the sense of Tables.jl\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: one of the following, with the behavior indicated:\n\n * `[]` (empty, the default): filter out all features (columns) which were not encountered in training\n * non-empty vector of feature names (symbols): keep only the specified features (`ignore=false`) or keep only unspecified features (`ignore=true`)\n * function or other callable: keep a feature if the callable returns `true` on its name. For example, specifying `FeatureSelector(features = name -> name in [:x1, :x3], ignore = true)` has the same effect as `FeatureSelector(features = [:x1, :x3], ignore = true)`, namely to select all features, with the exception of `:x1` and `:x3`.\n * `ignore`: whether to ignore or keep specified `features`, as explained above\n\n# Operations\n\n * `transform(mach, Xnew)`: select features from the table `Xnew` as specified by the model, taking features seen during training into account, if relevant\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_to_keep`: the features that will be selected\n\n# Example\n\n```\nusing MLJ\n\nX = (ordinal1 = [1, 2, 3],\n ordinal2 = coerce([\"x\", \"y\", \"x\"], OrderedFactor),\n ordinal3 = [10.0, 20.0, 30.0],\n ordinal4 = [-20.0, -30.0, -40.0],\n nominal = coerce([\"Your father\", \"he\", \"is\"], Multiclass));\n\nselector = FeatureSelector(features=[:ordinal3, ], ignore=true);\n\njulia> transform(fit!(machine(selector, X)), X)\n(ordinal1 = [1, 2, 3],\n ordinal2 = CategoricalValue{Symbol,UInt32}[\"x\", \"y\", \"x\"],\n ordinal4 = [-20.0, -30.0, -40.0],\n nominal = CategoricalValue{String,UInt32}[\"Your father\", \"he\", \"is\"],)\n\n```\n" +":docstring" = "```\nFeatureSelector\n```\n\nA model type for constructing a feature selector, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nFeatureSelector = @load FeatureSelector pkg=MLJModels\n```\n\nDo `model = FeatureSelector()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FeatureSelector(features=...)`.\n\nUse this model to select features (columns) of a table, usually as part of a model `Pipeline`.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any table of input features, where \"table\" is in the sense of Tables.jl\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: one of the following, with the behavior indicated:\n\n * `[]` (empty, the default): filter out all features (columns) which were not encountered in training\n * non-empty vector of feature names (symbols): keep only the specified features (`ignore=false`) or keep only unspecified features (`ignore=true`)\n * function or other callable: keep a feature if the callable returns `true` on its name. For example, specifying `FeatureSelector(features = name -> name in [:x1, :x3], ignore = true)` has the same effect as `FeatureSelector(features = [:x1, :x3], ignore = true)`, namely to select all features, with the exception of `:x1` and `:x3`.\n * `ignore`: whether to ignore or keep specified `features`, as explained above\n\n# Operations\n\n * `transform(mach, Xnew)`: select features from the table `Xnew` as specified by the model, taking features seen during training into account, if relevant\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_to_keep`: the features that will be selected\n\n# Example\n\n```\nusing MLJ\n\nX = (ordinal1 = [1, 2, 3],\n ordinal2 = coerce([\"x\", \"y\", \"x\"], OrderedFactor),\n ordinal3 = [10.0, 20.0, 30.0],\n ordinal4 = [-20.0, -30.0, -40.0],\n nominal = coerce([\"Your father\", \"he\", \"is\"], Multiclass));\n\nselector = FeatureSelector(features=[:ordinal3, ], ignore=true);\n\njulia> transform(fit!(machine(selector, X)), X)\n(ordinal1 = [1, 2, 3],\n ordinal2 = CategoricalValue{Symbol,UInt32}[\"x\", \"y\", \"x\"],\n ordinal4 = [-20.0, -30.0, -40.0],\n nominal = CategoricalValue{String,UInt32}[\"Your father\", \"he\", \"is\"],)\n\n```\n" ":name" = "FeatureSelector" ":human_name" = "feature selector" ":is_supervised" = "`false`" @@ -6322,6 +7347,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJModels.UnivariateDiscretizer] ":input_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" @@ -6336,12 +7362,12 @@ ":package_license" = "MIT" ":load_path" = "MLJModels.UnivariateDiscretizer" ":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":package_url" = "https://github.com/alan-turing-institute/MLJModels.jl" +":package_url" = "https://github.com/JuliaAI/MLJModels.jl" ":is_wrapper" = "`false`" ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nUnivariateDiscretizer\n```\n\nA model type for constructing a single variable discretizer, based on [MLJModels.jl](https://github.com/alan-turing-institute/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nUnivariateDiscretizer = @load UnivariateDiscretizer pkg=MLJModels\n```\n\nDo `model = UnivariateDiscretizer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateDiscretizer(n_classes=...)`.\n\nDiscretization converts a `Continuous` vector into an `OrderedFactor` vector. In particular, the output is a `CategoricalVector` (whose reference type is optimized).\n\nThe transformation is chosen so that the vector on which the transformer is fit has, in transformed form, an approximately uniform distribution of values. Specifically, if `n_classes` is the level of discretization, then `2*n_classes - 1` ordered quantiles are computed, the odd quantiles being used for transforming (discretization) and the even quantiles for inverse transforming.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with `Continuous` element scitype; check scitype with `scitype(x)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `n_classes`: number of discrete classes in the output\n\n# Operations\n\n * `transform(mach, xnew)`: discretize `xnew` according to the discretization learned when fitting `mach`\n * `inverse_transform(mach, z)`: attempt to reconstruct from `z` a vector that transforms to give `z`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach).fitesult` include:\n\n * `odd_quantiles`: quantiles used for transforming (length is `n_classes - 1`)\n * `even_quantiles`: quantiles used for inverse transforming (length is `n_classes`)\n\n# Example\n\n```\nusing MLJ\nusing Random\nRandom.seed!(123)\n\ndiscretizer = UnivariateDiscretizer(n_classes=100)\nmach = machine(discretizer, randn(1000))\nfit!(mach)\n\njulia> x = rand(5)\n5-element Vector{Float64}:\n 0.8585244609846809\n 0.37541692370451396\n 0.6767070590395461\n 0.9208844241267105\n 0.7064611415680901\n\njulia> z = transform(mach, x)\n5-element CategoricalArrays.CategoricalArray{UInt8,1,UInt8}:\n 0x52\n 0x42\n 0x4d\n 0x54\n 0x4e\n\nx_approx = inverse_transform(mach, z)\njulia> x - x_approx\n5-element Vector{Float64}:\n 0.008224506144777322\n 0.012731354778359405\n 0.0056265330571125816\n 0.005738175684445124\n 0.006835652575801987\n```\n" +":docstring" = "```\nUnivariateDiscretizer\n```\n\nA model type for constructing a single variable discretizer, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nUnivariateDiscretizer = @load UnivariateDiscretizer pkg=MLJModels\n```\n\nDo `model = UnivariateDiscretizer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateDiscretizer(n_classes=...)`.\n\nDiscretization converts a `Continuous` vector into an `OrderedFactor` vector. In particular, the output is a `CategoricalVector` (whose reference type is optimized).\n\nThe transformation is chosen so that the vector on which the transformer is fit has, in transformed form, an approximately uniform distribution of values. Specifically, if `n_classes` is the level of discretization, then `2*n_classes - 1` ordered quantiles are computed, the odd quantiles being used for transforming (discretization) and the even quantiles for inverse transforming.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with `Continuous` element scitype; check scitype with `scitype(x)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `n_classes`: number of discrete classes in the output\n\n# Operations\n\n * `transform(mach, xnew)`: discretize `xnew` according to the discretization learned when fitting `mach`\n * `inverse_transform(mach, z)`: attempt to reconstruct from `z` a vector that transforms to give `z`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach).fitesult` include:\n\n * `odd_quantiles`: quantiles used for transforming (length is `n_classes - 1`)\n * `even_quantiles`: quantiles used for inverse transforming (length is `n_classes`)\n\n# Example\n\n```\nusing MLJ\nusing Random\nRandom.seed!(123)\n\ndiscretizer = UnivariateDiscretizer(n_classes=100)\nmach = machine(discretizer, randn(1000))\nfit!(mach)\n\njulia> x = rand(5)\n5-element Vector{Float64}:\n 0.8585244609846809\n 0.37541692370451396\n 0.6767070590395461\n 0.9208844241267105\n 0.7064611415680901\n\njulia> z = transform(mach, x)\n5-element CategoricalArrays.CategoricalArray{UInt8,1,UInt8}:\n 0x52\n 0x42\n 0x4d\n 0x54\n 0x4e\n\nx_approx = inverse_transform(mach, z)\njulia> x - x_approx\n5-element Vector{Float64}:\n 0.008224506144777322\n 0.012731354778359405\n 0.0056265330571125816\n 0.005738175684445124\n 0.006835652575801987\n```\n" ":name" = "UnivariateDiscretizer" ":human_name" = "single variable discretizer" ":is_supervised" = "`false`" @@ -6356,6 +7382,42 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" + +[MLJModels.BinaryThresholdPredictor] +":input_scitype" = "`ScientificTypesBase.Unknown`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_pure_julia" = "`false`" +":package_name" = "MLJModels" +":package_license" = "unknown" +":load_path" = "MLJModels.BinaryThresholdPredictor" +":package_uuid" = "" +":package_url" = "https://github.com/JuliaAI/MLJModels.jl" +":is_wrapper" = "`true`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "```\nBinaryThresholdPredictor(model; threshold=0.5)\n```\n\nWrap the `Probabilistic` model, `model`, assumed to support binary classification, as a `Deterministic` model, by applying the specified `threshold` to the positive class probability. In addition to conventional supervised classifiers, it can also be applied to outlier detection models that predict normalized scores - in the form of appropriate `UnivariateFinite` distributions - that is, models that subtype `AbstractProbabilisticUnsupervisedDetector` or `AbstractProbabilisticSupervisedDetector`.\n\nBy convention the positive class is the second class returned by `levels(y)`, where `y` is the target.\n\nIf `threshold=0.5` then calling `predict` on the wrapped model is equivalent to calling `predict_mode` on the atomic model.\n\n# Example\n\nBelow is an application to the well-known Pima Indian diabetes dataset, including optimization of the `threshold` parameter, with a high balanced accuracy the objective. The target class distribution is 500 positives to 268 negatives.\n\nLoading the data:\n\n```julia\nusing MLJ, Random\nrng = Xoshiro(123)\n\ndiabetes = OpenML.load(43582)\noutcome, X = unpack(diabetes, ==(:Outcome), rng=rng);\ny = coerce(Int.(outcome), OrderedFactor);\n```\n\nChoosing a probabilistic classifier:\n\n```julia\nEvoTreesClassifier = @load EvoTreesClassifier\nprob_predictor = EvoTreesClassifier()\n```\n\nWrapping in `TunedModel` to get a deterministic classifier with `threshold` as a new hyperparameter:\n\n```julia\npoint_predictor = BinaryThresholdPredictor(prob_predictor, threshold=0.6)\nXnew, _ = make_moons(3, rng=rng)\nmach = machine(point_predictor, X, y) |> fit!\npredict(mach, X)[1:3] # [0, 0, 0]\n```\n\nEstimating performance:\n\n```julia\nbalanced = BalancedAccuracy(adjusted=true)\ne = evaluate!(mach, resampling=CV(nfolds=6), measures=[balanced, accuracy])\ne.measurement[1] # 0.405 ± 0.089\n```\n\nWrapping in tuning strategy to learn `threshold` that maximizes balanced accuracy:\n\n```julia\nr = range(point_predictor, :threshold, lower=0.1, upper=0.9)\ntuned_point_predictor = TunedModel(\n point_predictor,\n tuning=RandomSearch(rng=rng),\n resampling=CV(nfolds=6),\n range = r,\n measure=balanced,\n n=30,\n)\nmach2 = machine(tuned_point_predictor, X, y) |> fit!\noptimized_point_predictor = report(mach2).best_model\noptimized_point_predictor.threshold # 0.260\npredict(mach2, X)[1:3] # [1, 1, 0]\n```\n\nEstimating the performance of the auto-thresholding model (nested resampling here):\n\n```julia\ne = evaluate!(mach2, resampling=CV(nfolds=6), measure=[balanced, accuracy])\ne.measurement[1] # 0.477 ± 0.110\n```\n" +":name" = "BinaryThresholdPredictor" +":human_name" = "binary threshold predictor" +":is_supervised" = "`true`" +":prediction_type" = ":deterministic" +":abstract_type" = "`MLJModelInterface.Deterministic`" +":implemented_methods" = [] +":hyperparameters" = "`(:model, :threshold)`" +":hyperparameter_types" = "`(\"MLJModelInterface.Probabilistic\", \"Float64\")`" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`BinaryThresholdPredictor`" [MLJModels.FillImputer] ":input_scitype" = "`ScientificTypesBase.Table`" @@ -6370,12 +7432,12 @@ ":package_license" = "MIT" ":load_path" = "MLJModels.FillImputer" ":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":package_url" = "https://github.com/alan-turing-institute/MLJModels.jl" +":package_url" = "https://github.com/JuliaAI/MLJModels.jl" ":is_wrapper" = "`false`" ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nFillImputer\n```\n\nA model type for constructing a fill imputer, based on [MLJModels.jl](https://github.com/alan-turing-institute/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nFillImputer = @load FillImputer pkg=MLJModels\n```\n\nDo `model = FillImputer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FillImputer(features=...)`.\n\nUse this model to impute `missing` values in tabular data. A fixed \"filler\" value is learned from the training data, one for each column of the table.\n\nFor imputing missing values in a vector, use [`UnivariateFillImputer`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have element scitypes `Union{Missing, T}`, where `T` is a subtype of `Continuous`, `Multiclass`, `OrderedFactor` or `Count`. Check scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: a vector of names of features (symbols) for which imputation is to be attempted; default is empty, which is interpreted as \"impute all\".\n * `continuous_fill`: function or other callable to determine value to be imputed in the case of `Continuous` (abstract float) data; default is to apply `median` after skipping `missing` values\n * `count_fill`: function or other callable to determine value to be imputed in the case of `Count` (integer) data; default is to apply rounded `median` after skipping `missing` values\n * `finite_fill`: function or other callable to determine value to be imputed in the case of `Multiclass` or `OrderedFactor` data (categorical vectors); default is to apply `mode` after skipping `missing` values\n\n# Operations\n\n * `transform(mach, Xnew)`: return `Xnew` with missing values imputed with the fill values learned when fitting `mach`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_seen_in_fit`: the names of features (columns) encountered during training\n * `univariate_transformer`: the univariate model applied to determine the fillers (it's fields contain the functions defining the filler computations)\n * `filler_given_feature`: dictionary of filler values, keyed on feature (column) names\n\n# Examples\n\n```\nusing MLJ\nimputer = FillImputer()\n\nX = (a = [1.0, 2.0, missing, 3.0, missing],\n b = coerce([\"y\", \"n\", \"y\", missing, \"y\"], Multiclass),\n c = [1, 1, 2, missing, 3])\n\nschema(X)\njulia> schema(X)\n┌───────┬───────────────────────────────┐\n│ names │ scitypes │\n├───────┼───────────────────────────────┤\n│ a │ Union{Missing, Continuous} │\n│ b │ Union{Missing, Multiclass{2}} │\n│ c │ Union{Missing, Count} │\n└───────┴───────────────────────────────┘\n\nmach = machine(imputer, X)\nfit!(mach)\n\njulia> fitted_params(mach).filler_given_feature\n(filler = 2.0,)\n\njulia> fitted_params(mach).filler_given_feature\nDict{Symbol, Any} with 3 entries:\n :a => 2.0\n :b => \"y\"\n :c => 2\n\njulia> transform(mach, X)\n(a = [1.0, 2.0, 2.0, 3.0, 2.0],\n b = CategoricalValue{String, UInt32}[\"y\", \"n\", \"y\", \"y\", \"y\"],\n c = [1, 1, 2, 2, 3],)\n```\n\nSee also [`UnivariateFillImputer`](@ref).\n" +":docstring" = "```\nFillImputer\n```\n\nA model type for constructing a fill imputer, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nFillImputer = @load FillImputer pkg=MLJModels\n```\n\nDo `model = FillImputer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FillImputer(features=...)`.\n\nUse this model to impute `missing` values in tabular data. A fixed \"filler\" value is learned from the training data, one for each column of the table.\n\nFor imputing missing values in a vector, use [`UnivariateFillImputer`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have element scitypes `Union{Missing, T}`, where `T` is a subtype of `Continuous`, `Multiclass`, `OrderedFactor` or `Count`. Check scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: a vector of names of features (symbols) for which imputation is to be attempted; default is empty, which is interpreted as \"impute all\".\n * `continuous_fill`: function or other callable to determine value to be imputed in the case of `Continuous` (abstract float) data; default is to apply `median` after skipping `missing` values\n * `count_fill`: function or other callable to determine value to be imputed in the case of `Count` (integer) data; default is to apply rounded `median` after skipping `missing` values\n * `finite_fill`: function or other callable to determine value to be imputed in the case of `Multiclass` or `OrderedFactor` data (categorical vectors); default is to apply `mode` after skipping `missing` values\n\n# Operations\n\n * `transform(mach, Xnew)`: return `Xnew` with missing values imputed with the fill values learned when fitting `mach`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_seen_in_fit`: the names of features (columns) encountered during training\n * `univariate_transformer`: the univariate model applied to determine the fillers (it's fields contain the functions defining the filler computations)\n * `filler_given_feature`: dictionary of filler values, keyed on feature (column) names\n\n# Examples\n\n```\nusing MLJ\nimputer = FillImputer()\n\nX = (a = [1.0, 2.0, missing, 3.0, missing],\n b = coerce([\"y\", \"n\", \"y\", missing, \"y\"], Multiclass),\n c = [1, 1, 2, missing, 3])\n\nschema(X)\njulia> schema(X)\n┌───────┬───────────────────────────────┐\n│ names │ scitypes │\n├───────┼───────────────────────────────┤\n│ a │ Union{Missing, Continuous} │\n│ b │ Union{Missing, Multiclass{2}} │\n│ c │ Union{Missing, Count} │\n└───────┴───────────────────────────────┘\n\nmach = machine(imputer, X)\nfit!(mach)\n\njulia> fitted_params(mach).filler_given_feature\n(filler = 2.0,)\n\njulia> fitted_params(mach).filler_given_feature\nDict{Symbol, Any} with 3 entries:\n :a => 2.0\n :b => \"y\"\n :c => 2\n\njulia> transform(mach, X)\n(a = [1.0, 2.0, 2.0, 3.0, 2.0],\n b = CategoricalValue{String, UInt32}[\"y\", \"n\", \"y\", \"y\", \"y\"],\n c = [1, 1, 2, 2, 3],)\n```\n\nSee also [`UnivariateFillImputer`](@ref).\n" ":name" = "FillImputer" ":human_name" = "fill imputer" ":is_supervised" = "`false`" @@ -6390,6 +7452,42 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" + +[MLJModels.ThresholdUnsupervisedDetector] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" +":predict_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_pure_julia" = "`false`" +":package_name" = "MLJModels" +":package_license" = "unknown" +":load_path" = "MLJModels.BinaryThresholdPredictor" +":package_uuid" = "" +":package_url" = "https://github.com/JuliaAI/MLJModels.jl" +":is_wrapper" = "`true`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "```\nBinaryThresholdPredictor(model; threshold=0.5)\n```\n\nWrap the `Probabilistic` model, `model`, assumed to support binary classification, as a `Deterministic` model, by applying the specified `threshold` to the positive class probability. In addition to conventional supervised classifiers, it can also be applied to outlier detection models that predict normalized scores - in the form of appropriate `UnivariateFinite` distributions - that is, models that subtype `AbstractProbabilisticUnsupervisedDetector` or `AbstractProbabilisticSupervisedDetector`.\n\nBy convention the positive class is the second class returned by `levels(y)`, where `y` is the target.\n\nIf `threshold=0.5` then calling `predict` on the wrapped model is equivalent to calling `predict_mode` on the atomic model.\n\n# Example\n\nBelow is an application to the well-known Pima Indian diabetes dataset, including optimization of the `threshold` parameter, with a high balanced accuracy the objective. The target class distribution is 500 positives to 268 negatives.\n\nLoading the data:\n\n```julia\nusing MLJ, Random\nrng = Xoshiro(123)\n\ndiabetes = OpenML.load(43582)\noutcome, X = unpack(diabetes, ==(:Outcome), rng=rng);\ny = coerce(Int.(outcome), OrderedFactor);\n```\n\nChoosing a probabilistic classifier:\n\n```julia\nEvoTreesClassifier = @load EvoTreesClassifier\nprob_predictor = EvoTreesClassifier()\n```\n\nWrapping in `TunedModel` to get a deterministic classifier with `threshold` as a new hyperparameter:\n\n```julia\npoint_predictor = BinaryThresholdPredictor(prob_predictor, threshold=0.6)\nXnew, _ = make_moons(3, rng=rng)\nmach = machine(point_predictor, X, y) |> fit!\npredict(mach, X)[1:3] # [0, 0, 0]\n```\n\nEstimating performance:\n\n```julia\nbalanced = BalancedAccuracy(adjusted=true)\ne = evaluate!(mach, resampling=CV(nfolds=6), measures=[balanced, accuracy])\ne.measurement[1] # 0.405 ± 0.089\n```\n\nWrapping in tuning strategy to learn `threshold` that maximizes balanced accuracy:\n\n```julia\nr = range(point_predictor, :threshold, lower=0.1, upper=0.9)\ntuned_point_predictor = TunedModel(\n point_predictor,\n tuning=RandomSearch(rng=rng),\n resampling=CV(nfolds=6),\n range = r,\n measure=balanced,\n n=30,\n)\nmach2 = machine(tuned_point_predictor, X, y) |> fit!\noptimized_point_predictor = report(mach2).best_model\noptimized_point_predictor.threshold # 0.260\npredict(mach2, X)[1:3] # [1, 1, 0]\n```\n\nEstimating the performance of the auto-thresholding model (nested resampling here):\n\n```julia\ne = evaluate!(mach2, resampling=CV(nfolds=6), measure=[balanced, accuracy])\ne.measurement[1] # 0.477 ± 0.110\n```\n" +":name" = "ThresholdUnsupervisedDetector" +":human_name" = "threshold unsupervised detector" +":is_supervised" = "`false`" +":prediction_type" = ":deterministic" +":abstract_type" = "`MLJModelInterface.DeterministicUnsupervisedDetector`" +":implemented_methods" = [] +":hyperparameters" = "`(:model, :threshold)`" +":hyperparameter_types" = "`(\"MLJModelInterface.ProbabilisticUnsupervisedDetector\", \"Float64\")`" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`BinaryThresholdPredictor`" [MLJModels.DeterministicConstantRegressor] ":input_scitype" = "`ScientificTypesBase.Table`" @@ -6404,12 +7502,12 @@ ":package_license" = "MIT" ":load_path" = "MLJModels.DeterministicConstantRegressor" ":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":package_url" = "https://github.com/alan-turing-institute/MLJModels.jl" +":package_url" = "https://github.com/JuliaAI/MLJModels.jl" ":is_wrapper" = "`false`" ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nDeterministicConstantRegressor\n```\n\nA model type for constructing a deterministic constant regressor, based on\n[MLJModels.jl](https://github.com/alan-turing-institute/MLJModels.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nDeterministicConstantRegressor = @load DeterministicConstantRegressor pkg=MLJModels\n```\n\nDo `model = DeterministicConstantRegressor()` to construct an instance with default hyper-parameters. " +":docstring" = "```\nDeterministicConstantRegressor\n```\n\nA model type for constructing a deterministic constant regressor, based on\n[MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nDeterministicConstantRegressor = @load DeterministicConstantRegressor pkg=MLJModels\n```\n\nDo `model = DeterministicConstantRegressor()` to construct an instance with default hyper-parameters. " ":name" = "DeterministicConstantRegressor" ":human_name" = "deterministic constant regressor" ":is_supervised" = "`true`" @@ -6424,6 +7522,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJModels.UnivariateStandardizer] ":input_scitype" = "`AbstractVector{<:ScientificTypesBase.Infinite}`" @@ -6438,7 +7537,7 @@ ":package_license" = "MIT" ":load_path" = "MLJModels.UnivariateStandardizer" ":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":package_url" = "https://github.com/alan-turing-institute/MLJModels.jl" +":package_url" = "https://github.com/JuliaAI/MLJModels.jl" ":is_wrapper" = "`false`" ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" @@ -6458,6 +7557,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJModels.UnivariateFillImputer] ":input_scitype" = "`Union{AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Count}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}}`" @@ -6472,12 +7572,12 @@ ":package_license" = "MIT" ":load_path" = "MLJModels.UnivariateFillImputer" ":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":package_url" = "https://github.com/alan-turing-institute/MLJModels.jl" +":package_url" = "https://github.com/JuliaAI/MLJModels.jl" ":is_wrapper" = "`false`" ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nUnivariateFillImputer\n```\n\nA model type for constructing a single variable fill imputer, based on [MLJModels.jl](https://github.com/alan-turing-institute/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nUnivariateFillImputer = @load UnivariateFillImputer pkg=MLJModels\n```\n\nDo `model = UnivariateFillImputer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateFillImputer(continuous_fill=...)`.\n\nUse this model to imputing `missing` values in a vector with a fixed value learned from the non-missing values of training vector.\n\nFor imputing missing values in tabular data, use [`FillImputer`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with element scitype `Union{Missing, T}` where `T` is a subtype of `Continuous`, `Multiclass`, `OrderedFactor` or `Count`; check scitype using `scitype(x)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `continuous_fill`: function or other callable to determine value to be imputed in the case of `Continuous` (abstract float) data; default is to apply `median` after skipping `missing` values\n * `count_fill`: function or other callable to determine value to be imputed in the case of `Count` (integer) data; default is to apply rounded `median` after skipping `missing` values\n * `finite_fill`: function or other callable to determine value to be imputed in the case of `Multiclass` or `OrderedFactor` data (categorical vectors); default is to apply `mode` after skipping `missing` values\n\n# Operations\n\n * `transform(mach, xnew)`: return `xnew` with missing values imputed with the fill values learned when fitting `mach`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `filler`: the fill value to be imputed in all new data\n\n# Examples\n\n```\nusing MLJ\nimputer = UnivariateFillImputer()\n\nx_continuous = [1.0, 2.0, missing, 3.0]\nx_multiclass = coerce([\"y\", \"n\", \"y\", missing, \"y\"], Multiclass)\nx_count = [1, 1, 1, 2, missing, 3, 3]\n\nmach = machine(imputer, x_continuous)\nfit!(mach)\n\njulia> fitted_params(mach)\n(filler = 2.0,)\n\njulia> transform(mach, [missing, missing, 101.0])\n3-element Vector{Float64}:\n 2.0\n 2.0\n 101.0\n\nmach2 = machine(imputer, x_multiclass) |> fit!\n\njulia> transform(mach2, x_multiclass)\n5-element CategoricalArray{String,1,UInt32}:\n \"y\"\n \"n\"\n \"y\"\n \"y\"\n \"y\"\n\nmach3 = machine(imputer, x_count) |> fit!\n\njulia> transform(mach3, [missing, missing, 5])\n3-element Vector{Int64}:\n 2\n 2\n 5\n```\n\nFor imputing tabular data, use [`FillImputer`](@ref).\n" +":docstring" = "```\nUnivariateFillImputer\n```\n\nA model type for constructing a single variable fill imputer, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nUnivariateFillImputer = @load UnivariateFillImputer pkg=MLJModels\n```\n\nDo `model = UnivariateFillImputer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateFillImputer(continuous_fill=...)`.\n\nUse this model to imputing `missing` values in a vector with a fixed value learned from the non-missing values of training vector.\n\nFor imputing missing values in tabular data, use [`FillImputer`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with element scitype `Union{Missing, T}` where `T` is a subtype of `Continuous`, `Multiclass`, `OrderedFactor` or `Count`; check scitype using `scitype(x)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `continuous_fill`: function or other callable to determine value to be imputed in the case of `Continuous` (abstract float) data; default is to apply `median` after skipping `missing` values\n * `count_fill`: function or other callable to determine value to be imputed in the case of `Count` (integer) data; default is to apply rounded `median` after skipping `missing` values\n * `finite_fill`: function or other callable to determine value to be imputed in the case of `Multiclass` or `OrderedFactor` data (categorical vectors); default is to apply `mode` after skipping `missing` values\n\n# Operations\n\n * `transform(mach, xnew)`: return `xnew` with missing values imputed with the fill values learned when fitting `mach`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `filler`: the fill value to be imputed in all new data\n\n# Examples\n\n```\nusing MLJ\nimputer = UnivariateFillImputer()\n\nx_continuous = [1.0, 2.0, missing, 3.0]\nx_multiclass = coerce([\"y\", \"n\", \"y\", missing, \"y\"], Multiclass)\nx_count = [1, 1, 1, 2, missing, 3, 3]\n\nmach = machine(imputer, x_continuous)\nfit!(mach)\n\njulia> fitted_params(mach)\n(filler = 2.0,)\n\njulia> transform(mach, [missing, missing, 101.0])\n3-element Vector{Float64}:\n 2.0\n 2.0\n 101.0\n\nmach2 = machine(imputer, x_multiclass) |> fit!\n\njulia> transform(mach2, x_multiclass)\n5-element CategoricalArray{String,1,UInt32}:\n \"y\"\n \"n\"\n \"y\"\n \"y\"\n \"y\"\n\nmach3 = machine(imputer, x_count) |> fit!\n\njulia> transform(mach3, [missing, missing, 5])\n3-element Vector{Int64}:\n 2\n 2\n 5\n```\n\nFor imputing tabular data, use [`FillImputer`](@ref).\n" ":name" = "UnivariateFillImputer" ":human_name" = "single variable fill imputer" ":is_supervised" = "`false`" @@ -6492,6 +7592,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [OutlierDetectionPython.MCDDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -6526,6 +7627,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [OutlierDetectionPython.COPODDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -6560,6 +7662,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [OutlierDetectionPython.HBOSDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -6594,6 +7697,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [OutlierDetectionPython.IForestDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -6628,6 +7732,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [OutlierDetectionPython.SOSDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -6662,6 +7767,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [OutlierDetectionPython.ABODDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -6696,6 +7802,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [OutlierDetectionPython.LOFDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -6730,6 +7837,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [OutlierDetectionPython.PCADetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -6764,6 +7872,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [OutlierDetectionPython.INNEDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -6798,6 +7907,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [OutlierDetectionPython.OCSVMDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -6832,6 +7942,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [OutlierDetectionPython.ECODDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -6866,6 +7977,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [OutlierDetectionPython.SODDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -6900,6 +8012,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [OutlierDetectionPython.LODADetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -6934,6 +8047,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [OutlierDetectionPython.KDEDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -6968,6 +8082,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [OutlierDetectionPython.CDDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -7002,6 +8117,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [OutlierDetectionPython.KNNDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -7036,6 +8152,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [OutlierDetectionPython.GMMDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -7070,6 +8187,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [OutlierDetectionPython.COFDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -7104,6 +8222,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [OutlierDetectionPython.CBLOFDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -7138,6 +8257,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [OutlierDetectionPython.LOCIDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -7172,6 +8292,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [OutlierDetectionPython.LMDDDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -7206,6 +8327,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [OutlierDetectionPython.RODDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -7240,6 +8362,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [OneRule.OneRuleClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}`" @@ -7274,6 +8397,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [SelfOrganizingMaps.SelfOrganizingMap] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" @@ -7308,6 +8432,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [LIBSVM.ProbabilisticNuSVC] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -7342,6 +8467,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [LIBSVM.EpsilonSVR] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -7376,6 +8502,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [LIBSVM.LinearSVC] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -7410,6 +8537,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [LIBSVM.ProbabilisticSVC] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -7444,6 +8572,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [LIBSVM.NuSVR] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -7478,6 +8607,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [LIBSVM.NuSVC] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -7512,6 +8642,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [LIBSVM.SVC] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -7546,6 +8677,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [LIBSVM.OneClassSVM] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -7580,6 +8712,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [TSVD.TSVDTransformer] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" @@ -7614,6 +8747,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [GLM.LinearBinaryClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}`" @@ -7648,6 +8782,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [GLM.LinearCountRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}`" @@ -7682,6 +8817,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [GLM.LinearRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}`" @@ -7716,6 +8852,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJFlux.MultitargetNeuralNetworkRegressor] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" @@ -7750,6 +8887,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`(:optimiser, :builder)`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJFlux.NeuralNetworkClassifier] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" @@ -7784,6 +8922,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`(:optimiser, :builder)`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJFlux.ImageClassifier] ":input_scitype" = "`AbstractVector{<:ScientificTypesBase.Image}`" @@ -7818,6 +8957,7 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`(:optimiser, :builder)`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MLJFlux.NeuralNetworkRegressor] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" @@ -7852,3 +8992,74 @@ ":reports_feature_importances" = "`false`" ":deep_properties" = "`(:optimiser, :builder)`" ":reporting_operations" = "`()`" +":constructor" = "`nothing`" + +[MLJEnsembles.DeterministicEnsembleModel] +":input_scitype" = "`ScientificTypesBase.Unknown`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_pure_julia" = "`false`" +":package_name" = "MLJEnsembles" +":package_license" = "unknown" +":load_path" = "MLJEnsembles.EnsembleModel" +":package_uuid" = "50ed68f4-41fd-4504-931a-ed422449fee0" +":package_url" = "https://github.com/JuliaAI/MLJEnsembles.jl" +":is_wrapper" = "`true`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "```\nEnsembleModel(model,\n atomic_weights=Float64[],\n bagging_fraction=0.8,\n n=100,\n rng=GLOBAL_RNG,\n acceleration=CPU1(),\n out_of_bag_measure=[])\n```\n\nCreate a model for training an ensemble of `n` clones of `model`, with optional bagging. Ensembling is useful if `fit!(machine(atom, data...))` does not create identical models on repeated calls (ie, is a stochastic model, such as a decision tree with randomized node selection criteria), or if `bagging_fraction` is set to a value less than 1.0, or both.\n\nHere the atomic `model` must support targets with scitype `AbstractVector{<:Finite}` (single-target classifiers) or `AbstractVector{<:Continuous}` (single-target regressors).\n\nIf `rng` is an integer, then `MersenneTwister(rng)` is the random number generator used for bagging. Otherwise some `AbstractRNG` object is expected.\n\nThe atomic predictions are optionally weighted according to the vector `atomic_weights` (to allow for external optimization) except in the case that `model` is a `Deterministic` classifier, in which case `atomic_weights` are ignored.\n\nThe ensemble model is `Deterministic` or `Probabilistic`, according to the corresponding supertype of `atom`. In the case of deterministic classifiers (`target_scitype(atom) <: Abstract{<:Finite}`), the predictions are majority votes, and for regressors (`target_scitype(atom)<: AbstractVector{<:Continuous}`) they are ordinary averages. Probabilistic predictions are obtained by averaging the atomic probability distribution/mass functions; in particular, for regressors, the ensemble prediction on each input pattern has the type `MixtureModel{VF,VS,D}` from the Distributions.jl package, where `D` is the type of predicted distribution for `atom`.\n\nSpecify `acceleration=CPUProcesses()` for distributed computing, or `CPUThreads()` for multithreading.\n\nIf a single measure or non-empty vector of measures is specified by `out_of_bag_measure`, then out-of-bag estimates of performance are written to the training report (call `report` on the trained machine wrapping the ensemble model).\n\n*Important:* If per-observation or class weights `w` (not to be confused with atomic weights) are specified when constructing a machine for the ensemble model, as in `mach = machine(ensemble_model, X, y, w)`, then `w` is used by any measures specified in `out_of_bag_measure` that support them.\n" +":name" = "DeterministicEnsembleModel" +":human_name" = "deterministic ensemble model" +":is_supervised" = "`true`" +":prediction_type" = ":deterministic" +":abstract_type" = "`MLJModelInterface.Deterministic`" +":implemented_methods" = [] +":hyperparameters" = "`(:model, :atomic_weights, :bagging_fraction, :rng, :n, :acceleration, :out_of_bag_measure)`" +":hyperparameter_types" = "`(\"MLJModelInterface.Deterministic\", \"Vector{Float64}\", \"Float64\", \"Union{Int64, Random.AbstractRNG}\", \"Int64\", \"ComputationalResources.AbstractResource\", \"Any\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`EnsembleModel`" + +[MLJEnsembles.ProbabilisticEnsembleModel] +":input_scitype" = "`ScientificTypesBase.Unknown`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_pure_julia" = "`false`" +":package_name" = "MLJEnsembles" +":package_license" = "unknown" +":load_path" = "MLJEnsembles.EnsembleModel" +":package_uuid" = "50ed68f4-41fd-4504-931a-ed422449fee0" +":package_url" = "https://github.com/JuliaAI/MLJEnsembles.jl" +":is_wrapper" = "`true`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "```\nEnsembleModel(model,\n atomic_weights=Float64[],\n bagging_fraction=0.8,\n n=100,\n rng=GLOBAL_RNG,\n acceleration=CPU1(),\n out_of_bag_measure=[])\n```\n\nCreate a model for training an ensemble of `n` clones of `model`, with optional bagging. Ensembling is useful if `fit!(machine(atom, data...))` does not create identical models on repeated calls (ie, is a stochastic model, such as a decision tree with randomized node selection criteria), or if `bagging_fraction` is set to a value less than 1.0, or both.\n\nHere the atomic `model` must support targets with scitype `AbstractVector{<:Finite}` (single-target classifiers) or `AbstractVector{<:Continuous}` (single-target regressors).\n\nIf `rng` is an integer, then `MersenneTwister(rng)` is the random number generator used for bagging. Otherwise some `AbstractRNG` object is expected.\n\nThe atomic predictions are optionally weighted according to the vector `atomic_weights` (to allow for external optimization) except in the case that `model` is a `Deterministic` classifier, in which case `atomic_weights` are ignored.\n\nThe ensemble model is `Deterministic` or `Probabilistic`, according to the corresponding supertype of `atom`. In the case of deterministic classifiers (`target_scitype(atom) <: Abstract{<:Finite}`), the predictions are majority votes, and for regressors (`target_scitype(atom)<: AbstractVector{<:Continuous}`) they are ordinary averages. Probabilistic predictions are obtained by averaging the atomic probability distribution/mass functions; in particular, for regressors, the ensemble prediction on each input pattern has the type `MixtureModel{VF,VS,D}` from the Distributions.jl package, where `D` is the type of predicted distribution for `atom`.\n\nSpecify `acceleration=CPUProcesses()` for distributed computing, or `CPUThreads()` for multithreading.\n\nIf a single measure or non-empty vector of measures is specified by `out_of_bag_measure`, then out-of-bag estimates of performance are written to the training report (call `report` on the trained machine wrapping the ensemble model).\n\n*Important:* If per-observation or class weights `w` (not to be confused with atomic weights) are specified when constructing a machine for the ensemble model, as in `mach = machine(ensemble_model, X, y, w)`, then `w` is used by any measures specified in `out_of_bag_measure` that support them.\n" +":name" = "ProbabilisticEnsembleModel" +":human_name" = "probabilistic ensemble model" +":is_supervised" = "`true`" +":prediction_type" = ":probabilistic" +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":implemented_methods" = [] +":hyperparameters" = "`(:model, :atomic_weights, :bagging_fraction, :rng, :n, :acceleration, :out_of_bag_measure)`" +":hyperparameter_types" = "`(\"MLJModelInterface.Probabilistic\", \"Vector{Float64}\", \"Float64\", \"Union{Int64, Random.AbstractRNG}\", \"Int64\", \"ComputationalResources.AbstractResource\", \"Any\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`EnsembleModel`" diff --git a/src/registry/Models.toml b/src/registry/Models.toml index 17d54a78..cabe4514 100644 --- a/src/registry/Models.toml +++ b/src/registry/Models.toml @@ -4,13 +4,17 @@ NearestNeighborModels = ["KNNClassifier", "MultitargetKNNClassifier", "Multitarg MLJScikitLearnInterface = ["ProbabilisticSGDClassifier", "RidgeCVClassifier", "LogisticClassifier", "RandomForestRegressor", "ElasticNetCVRegressor", "PerceptronClassifier", "MultiTaskLassoRegressor", "LinearRegressor", "HDBSCAN", "DBSCAN", "RidgeRegressor", "LassoLarsICRegressor", "ARDRegressor", "SVMNuRegressor", "RidgeClassifier", "SGDRegressor", "ComplementNBClassifier", "HuberRegressor", "SVMNuClassifier", "GradientBoostingClassifier", "GaussianProcessRegressor", "SVMLinearRegressor", "LarsRegressor", "MeanShift", "HistGradientBoostingClassifier", "AdaBoostRegressor", "AffinityPropagation", "MultiTaskLassoCVRegressor", "OrthogonalMatchingPursuitRegressor", "RidgeCVRegressor", "PassiveAggressiveClassifier", "SVMRegressor", "BernoulliNBClassifier", "GaussianNBClassifier", "ExtraTreesClassifier", "KMeans", "MultiTaskElasticNetCVRegressor", "LassoLarsCVRegressor", "OrthogonalMatchingPursuitCVRegressor", "AdaBoostClassifier", "PassiveAggressiveRegressor", "BayesianRidgeRegressor", "RANSACRegressor", "BaggingClassifier", "GaussianProcessClassifier", "OPTICS", "KNeighborsRegressor", "HistGradientBoostingRegressor", "MiniBatchKMeans", "LassoCVRegressor", "DummyRegressor", "BisectingKMeans", "LassoLarsRegressor", "LarsCVRegressor", "KNeighborsClassifier", "SVMLinearClassifier", "FeatureAgglomeration", "DummyClassifier", "BaggingRegressor", "BayesianQDA", "BayesianLDA", "SGDClassifier", "TheilSenRegressor", "SpectralClustering", "Birch", "AgglomerativeClustering", "ElasticNetRegressor", "RandomForestClassifier", "LogisticCVClassifier", "MultiTaskElasticNetRegressor", "ExtraTreesRegressor", "LassoRegressor", "MultinomialNBClassifier", "GradientBoostingRegressor", "SVMClassifier"] OutlierDetectionNeighbors = ["ABODDetector", "DNNDetector", "LOFDetector", "KNNDetector", "COFDetector"] SIRUS = ["StableRulesClassifier", "StableRulesRegressor", "StableForestClassifier", "StableForestRegressor"] +MLJIteration = ["DeterministicIteratedModel", "ProbabilisticIteratedModel"] PartialLeastSquaresRegressor = ["KPLSRegressor", "PLSRegressor"] PartitionedLS = ["PartLS"] MLJLinearModels = ["QuantileRegressor", "LogisticClassifier", "MultinomialClassifier", "LADRegressor", "RidgeRegressor", "RobustRegressor", "ElasticNetRegressor", "LinearRegressor", "LassoRegressor", "HuberRegressor"] ParallelKMeans = ["KMeans"] NaiveBayes = ["GaussianNBClassifier", "MultinomialNBClassifier"] +MLJBase = ["DeterministicStack", "ProbabilisticStack", "Resampler", "TransformedTargetModelInterval", "TransformedTargetModelDeterministicUnsupervisedDetector", "TransformedTargetModelProbabilistic", "StaticPipeline", "TransformedTargetModelProbabilisticSupervisedDetector", "ProbabilisticPipeline", "DeterministicPipeline", "TransformedTargetModelDeterministic", "IntervalPipeline", "UnsupervisedPipeline", "TransformedTargetModelProbabilisticUnsupervisedDetector", "TransformedTargetModelDeterministicSupervisedDetector"] MultivariateStats = ["LDA", "MultitargetLinearRegressor", "BayesianSubspaceLDA", "FactorAnalysis", "LinearRegressor", "ICA", "PPCA", "RidgeRegressor", "KernelPCA", "MultitargetRidgeRegressor", "SubspaceLDA", "BayesianLDA", "PCA"] DecisionTree = ["AdaBoostStumpClassifier", "DecisionTreeRegressor", "DecisionTreeClassifier", "RandomForestRegressor", "RandomForestClassifier"] +MLJBalancing = ["BalancedModelDeterministic", "BalancedBaggingClassifier", "BalancedModelInterval", "BalancedModelProbabilistic"] +MLJTuning = ["DeterministicTunedModel", "ProbabilisticTunedModel"] Imbalance = ["RandomOversampler", "SMOTENC", "TomekUndersampler", "ClusterUndersampler", "SMOTE", "SMOTEN", "ROSE", "RandomUndersampler", "ENNUndersampler", "BorderlineSMOTE1", "RandomWalkOversampler"] Clustering = ["HierarchicalClustering", "DBSCAN", "KMeans", "KMedoids"] EvoLinear = ["EvoSplineRegressor", "EvoLinearRegressor"] @@ -19,7 +23,7 @@ LightGBM = ["LGBMClassifier", "LGBMRegressor"] SymbolicRegression = ["MultitargetSRRegressor", "SRRegressor"] MLJText = ["TfidfTransformer", "CountTransformer", "BM25Transformer"] EvoTrees = ["EvoTreeClassifier", "EvoTreeGaussian", "EvoTreeMLE", "EvoTreeRegressor", "EvoTreeCount"] -MLJModels = ["ConstantClassifier", "Standardizer", "DeterministicConstantClassifier", "UnivariateTimeTypeToContinuous", "OneHotEncoder", "ContinuousEncoder", "UnivariateBoxCoxTransformer", "InteractionTransformer", "ConstantRegressor", "FeatureSelector", "UnivariateDiscretizer", "FillImputer", "DeterministicConstantRegressor", "UnivariateStandardizer", "UnivariateFillImputer"] +MLJModels = ["ConstantClassifier", "Standardizer", "ThresholdSupervisedDetector", "DeterministicConstantClassifier", "UnivariateTimeTypeToContinuous", "OneHotEncoder", "ContinuousEncoder", "UnivariateBoxCoxTransformer", "InteractionTransformer", "ConstantRegressor", "FeatureSelector", "UnivariateDiscretizer", "BinaryThresholdPredictor", "FillImputer", "ThresholdUnsupervisedDetector", "DeterministicConstantRegressor", "UnivariateStandardizer", "UnivariateFillImputer"] OutlierDetectionPython = ["MCDDetector", "COPODDetector", "HBOSDetector", "IForestDetector", "SOSDetector", "ABODDetector", "LOFDetector", "PCADetector", "INNEDetector", "OCSVMDetector", "ECODDetector", "SODDetector", "LODADetector", "KDEDetector", "CDDetector", "KNNDetector", "GMMDetector", "COFDetector", "CBLOFDetector", "LOCIDetector", "LMDDDetector", "RODDetector"] OneRule = ["OneRuleClassifier"] SelfOrganizingMaps = ["SelfOrganizingMap"] @@ -27,3 +31,4 @@ LIBSVM = ["ProbabilisticNuSVC", "EpsilonSVR", "LinearSVC", "ProbabilisticSVC", " TSVD = ["TSVDTransformer"] GLM = ["LinearBinaryClassifier", "LinearCountRegressor", "LinearRegressor"] MLJFlux = ["MultitargetNeuralNetworkRegressor", "NeuralNetworkClassifier", "ImageClassifier", "NeuralNetworkRegressor"] +MLJEnsembles = ["DeterministicEnsembleModel", "ProbabilisticEnsembleModel"] diff --git a/src/registry/Project.toml b/src/registry/Project.toml index 2ab87eba..0120f262 100644 --- a/src/registry/Project.toml +++ b/src/registry/Project.toml @@ -10,10 +10,13 @@ MLJBalancing = "45f359ea-796d-4f51-95a5-deb1a414c586" MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d" MLJClusteringInterface = "d354fa79-ed1c-40d4-88ef-b8c7bd1568af" MLJDecisionTreeInterface = "c6f25543-311c-4c74-83dc-3ea6d1015661" +MLJEnsembles = "50ed68f4-41fd-4504-931a-ed422449fee0" MLJFlux = "094fc8d1-fd35-5302-93ea-dabda2abf845" MLJGLMInterface = "caf8df21-4939-456d-ac9c-5fefbfb04c0c" +MLJIteration = "614be32b-d00c-4edb-bd02-1eb411ab5e55" MLJLIBSVMInterface = "61c7150f-6c77-4bb1-949c-13197eac2a52" MLJLinearModels = "6ee0df7b-362f-4a72-a706-9e79364fb692" +MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea" MLJModels = "d491faf4-2d78-11e9-2867-c94bc002c0b7" MLJMultivariateStatsInterface = "1b6a4a23-ba22-4f51-9698-8599985d3728" MLJNaiveBayesInterface = "33e4bacb-b9e2-458e-9a13-5d9a90b235fa" @@ -21,6 +24,7 @@ MLJScikitLearnInterface = "5ae90465-5518-4432-b9d2-8a1def2f0cab" MLJTSVDInterface = "7fa162e1-0e29-41ca-a6fa-c000ca4e7e7e" MLJTestInterface = "72560011-54dd-4dc2-94f3-c5de45b75ecd" MLJText = "5e27fcf9-6bac-46ba-8580-b5712f3d6387" +MLJTuning = "03970b2e-30c4-11ea-3135-d1576263f10f" MLJXGBoostInterface = "54119dfa-1dab-4055-a167-80440f4f7a91" NearestNeighborModels = "636a865e-7cf4-491e-846c-de09b730eb36" OneRule = "90484964-6d6a-4979-af09-8657dbed84ff" @@ -32,6 +36,7 @@ PartitionedLS = "19f41c5e-8610-11e9-2f2a-0d67e7c5027f" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" SIRUS = "cdeec39e-fb35-4959-aadb-a1dd5dede958" SelfOrganizingMaps = "ba4b7379-301a-4be0-bee6-171e4e152787" +StatisticalTraits = "64bff920-2084-43da-a3e6-9bb72801c0c9" SymbolicRegression = "8254be44-1295-4e6a-a16d-46603ac705cb" [extras] diff --git a/src/registry/src/Registry.jl b/src/registry/src/Registry.jl index 2901bd14..7f812653 100644 --- a/src/registry/src/Registry.jl +++ b/src/registry/src/Registry.jl @@ -1,4 +1,4 @@ -module Registry +module Registry using Pkg import Pkg.TOML diff --git a/src/registry/src/update.jl b/src/registry/src/update.jl index 5d823a84..41b8e1fb 100644 --- a/src/registry/src/update.jl +++ b/src/registry/src/update.jl @@ -10,9 +10,10 @@ function finaltypes(T::Type) end const project_toml = joinpath(srcdir, "../Project.toml") -const PACKAGES = map(Symbol, - keys(TOML.parsefile(project_toml)["deps"])|>collect) -push!(PACKAGES, :MLJModels) +const PACKAGES = map( + Symbol, + keys(TOML.parsefile(project_toml)["deps"]) |> collect, +) filter!(PACKAGES) do pkg !(pkg in (:InteractiveUtils, :Pkg, :MLJModelInterface, :MLJTestIntegration)) end @@ -92,16 +93,17 @@ function _update(mod, test_env_only) end using Pkg Pkg.activate($environment_path) - @info "resolving registry environment..." + @info "resolving Model Registry environment..." Pkg.resolve() end program2 = quote + warnings = "" - @info "Instantiating registry environment..." + @info "Instantiating Model Registry environment..." Pkg.instantiate() - @info "Loading registered packages..." + @info "Loading packages from the Model Registry..." import MLJModels using Pkg.TOML @@ -113,7 +115,7 @@ function _update(mod, test_env_only) modeltypes = MLJModels.Registry.finaltypes(MLJModels.Model) filter!(modeltypes) do T - !isabstracttype(T) && !MLJModels.MLJModelInterface.is_wrapper(T) + !isabstracttype(T) end # generate and write to file the model metadata: @@ -125,12 +127,15 @@ function _update(mod, test_env_only) pkg = _info[:package_name] path = _info[:load_path] api_pkg = split(path, '.') |> first - pkg in ["unknown",] && - @warn "$M `package_name` or `load_path` is \"unknown\")" + pkg in ["unknown",] && begin + global warnings *= "$M `package_name` or `load_path` is \"unknown\")\n" + end modelname = _info[:name] - api_pkg in api_packages || - error("Bad `load_path` trait for $M: "* - "$api_pkg not a registered package. ") + api_pkg in api_packages || begin + global warnings *= "Bad `load_path` trait for $M: "* + "`$api_pkg` not a registered package.\n" + end + haskey(meta_given_package, pkg) || (meta_given_package[pkg] = Dict()) haskey(meta_given_package, modelname) && @@ -154,6 +159,8 @@ function _update(mod, test_env_only) TOML.print(file, models_given_pkg) end + isempty(warnings) || @warn warnings + :(println("Local Metadata.toml updated.")) end @@ -162,7 +169,7 @@ function _update(mod, test_env_only) test_env_only || mod.eval(program2) println("\n You can check the registry by running "* - "`MLJModels.check_registry() but may need to force "* + "`MLJModels.check_registry()` but may need to force "* "recompilation of MLJModels.\n\n"* "You can safely ignore \"conflicting import\" warnings. ") From 60a7e481e12e957579d34db02338f6d26d7849f8 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Mon, 3 Jun 2024 15:20:08 +1200 Subject: [PATCH 10/13] base registry on constructors rather than model types --- src/registry/Metadata.toml | 1812 ++++++++++-------------------- src/registry/Models.toml | 26 +- src/registry/src/Registry.jl | 5 +- src/registry/src/constructors.jl | 31 + src/registry/src/update.jl | 20 +- 5 files changed, 649 insertions(+), 1245 deletions(-) create mode 100644 src/registry/src/constructors.jl diff --git a/src/registry/Metadata.toml b/src/registry/Metadata.toml index 5a35aef2..e6ecbc84 100644 --- a/src/registry/Metadata.toml +++ b/src/registry/Metadata.toml @@ -279,41 +279,6 @@ ":reporting_operations" = "`()`" ":constructor" = "`nothing`" -[BetaML.NeuralNetworkRegressor] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" -":predict_scitype" = "`AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_pure_julia" = "`true`" -":package_name" = "BetaML" -":package_license" = "MIT" -":load_path" = "BetaML.Bmlj.NeuralNetworkRegressor" -":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" -":package_url" = "https://github.com/sylvaticus/BetaML.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = "```julia\nmutable struct NeuralNetworkRegressor <: MLJModelInterface.Deterministic\n```\n\nA simple but flexible Feedforward Neural Network, from the Beta Machine Learning Toolkit (BetaML) for regression of a single dimensional target.\n\n# Parameters:\n\n * `layers`: Array of layer objects [def: `nothing`, i.e. basic network]. See `subtypes(BetaML.AbstractLayer)` for supported layers\n * `loss`: Loss (cost) function [def: `BetaML.squared_cost`]. Should always assume y and ŷ as matrices, even if the regression task is 1-D\n\n !!! warning\n If you change the parameter `loss`, you need to either provide its derivative on the parameter `dloss` or use autodiff with `dloss=nothing`.\n\n * `dloss`: Derivative of the loss function [def: `BetaML.dsquared_cost`, i.e. use the derivative of the squared cost]. Use `nothing` for autodiff.\n * `epochs`: Number of epochs, i.e. passages trough the whole training sample [def: `200`]\n * `batch_size`: Size of each individual batch [def: `16`]\n * `opt_alg`: The optimisation algorithm to update the gradient at each batch [def: `BetaML.ADAM()`]. See `subtypes(BetaML.OptimisationAlgorithm)` for supported optimizers\n * `shuffle`: Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]\n * `descr`: An optional title and/or description for this model\n * `cb`: A call back function to provide information during training [def: `fitting_info`]\n * `rng`: Random Number Generator (see [`FIXEDSEED`](@ref)) [deafult: `Random.GLOBAL_RNG`]\n\n# Notes:\n\n * data must be numerical\n * the label should be be a *n-records* vector.\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y = @load_boston;\n\njulia> modelType = @load NeuralNetworkRegressor pkg = \"BetaML\" verbosity=0\nBetaML.Nn.NeuralNetworkRegressor\n\njulia> layers = [BetaML.DenseLayer(12,20,f=BetaML.relu),BetaML.DenseLayer(20,20,f=BetaML.relu),BetaML.DenseLayer(20,1,f=BetaML.relu)];\n\njulia> model = modelType(layers=layers,opt_alg=BetaML.ADAM());\nNeuralNetworkRegressor(\n layers = BetaML.Nn.AbstractLayer[BetaML.Nn.DenseLayer([-0.23249759178069676 -0.4125090172711131 … 0.41401934928739 -0.33017881111237535; -0.27912169279319965 0.270551221249931 … 0.19258414323473344 0.1703002982374256; … ; 0.31186742456482447 0.14776438287394805 … 0.3624993442655036 0.1438885872964824; 0.24363744610286758 -0.3221033024934767 … 0.14886090419299408 0.038411663101909355], [-0.42360286004241765, -0.34355377040029594, 0.11510963232946697, 0.29078650404397893, -0.04940236502546075, 0.05142849152316714, -0.177685375947775, 0.3857630523957018, -0.25454667127064756, -0.1726731848206195, 0.29832456225553444, -0.21138505291162835, -0.15763643112604903, -0.08477044513587562, -0.38436681165349196, 0.20538016429104916, -0.25008157754468335, 0.268681800562054, 0.10600581996650865, 0.4262194464325672], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.08534180387478185 0.19659398307677617 … -0.3413633217504578 -0.0484925247381256; 0.0024419192794883915 -0.14614102508129 … -0.21912059923003044 0.2680725396694708; … ; 0.25151545823147886 -0.27532269951606037 … 0.20739970895058063 0.2891938885916349; -0.1699020711688904 -0.1350423717084296 … 0.16947589410758873 0.3629006047373296], [0.2158116357688406, -0.3255582642532289, -0.057314442103850394, 0.29029696770539953, 0.24994080694366455, 0.3624239027782297, -0.30674318230919984, -0.3854738338935017, 0.10809721838554087, 0.16073511121016176, -0.005923262068960489, 0.3157147976348795, -0.10938918304264739, -0.24521229198853187, -0.307167732178712, 0.0808907777008302, -0.014577497150872254, -0.0011287181458157214, 0.07522282588658086, 0.043366500526073104], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.021367697115938555 -0.28326652172347155 … 0.05346175368370165 -0.26037328415871647], [-0.2313659199724562], BetaML.Utils.relu, BetaML.Utils.drelu)], \n loss = BetaML.Utils.squared_cost, \n dloss = BetaML.Utils.dsquared_cost, \n epochs = 100, \n batch_size = 32, \n opt_alg = BetaML.Nn.ADAM(BetaML.Nn.var\"#90#93\"(), 1.0, 0.9, 0.999, 1.0e-8, BetaML.Nn.Learnable[], BetaML.Nn.Learnable[]), \n shuffle = true, \n descr = \"\", \n cb = BetaML.Nn.fitting_info, \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X, y);\n\njulia> fit!(mach);\n\njulia> ŷ = predict(mach, X);\n\njulia> hcat(y,ŷ)\n506×2 Matrix{Float64}:\n 24.0 30.7726\n 21.6 28.0811\n 34.7 31.3194\n ⋮ \n 23.9 30.9032\n 22.0 29.49\n 11.9 27.2438\n```\n" -":name" = "NeuralNetworkRegressor" -":human_name" = "neural network regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":fit", ":predict"] -":hyperparameters" = "`(:layers, :loss, :dloss, :epochs, :batch_size, :opt_alg, :shuffle, :descr, :cb, :rng)`" -":hyperparameter_types" = "`(\"Union{Nothing, Vector{BetaML.Nn.AbstractLayer}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Int64\", \"Int64\", \"BetaML.Nn.OptimisationAlgorithm\", \"Bool\", \"String\", \"Function\", \"Random.AbstractRNG\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" - [BetaML.KMeansClusterer] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -349,6 +314,41 @@ ":reporting_operations" = "`()`" ":constructor" = "`nothing`" +[BetaML.NeuralNetworkRegressor] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" +":predict_scitype" = "`AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_pure_julia" = "`true`" +":package_name" = "BetaML" +":package_license" = "MIT" +":load_path" = "BetaML.Bmlj.NeuralNetworkRegressor" +":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":package_url" = "https://github.com/sylvaticus/BetaML.jl" +":is_wrapper" = "`false`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "```julia\nmutable struct NeuralNetworkRegressor <: MLJModelInterface.Deterministic\n```\n\nA simple but flexible Feedforward Neural Network, from the Beta Machine Learning Toolkit (BetaML) for regression of a single dimensional target.\n\n# Parameters:\n\n * `layers`: Array of layer objects [def: `nothing`, i.e. basic network]. See `subtypes(BetaML.AbstractLayer)` for supported layers\n * `loss`: Loss (cost) function [def: `BetaML.squared_cost`]. Should always assume y and ŷ as matrices, even if the regression task is 1-D\n\n !!! warning\n If you change the parameter `loss`, you need to either provide its derivative on the parameter `dloss` or use autodiff with `dloss=nothing`.\n\n * `dloss`: Derivative of the loss function [def: `BetaML.dsquared_cost`, i.e. use the derivative of the squared cost]. Use `nothing` for autodiff.\n * `epochs`: Number of epochs, i.e. passages trough the whole training sample [def: `200`]\n * `batch_size`: Size of each individual batch [def: `16`]\n * `opt_alg`: The optimisation algorithm to update the gradient at each batch [def: `BetaML.ADAM()`]. See `subtypes(BetaML.OptimisationAlgorithm)` for supported optimizers\n * `shuffle`: Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]\n * `descr`: An optional title and/or description for this model\n * `cb`: A call back function to provide information during training [def: `fitting_info`]\n * `rng`: Random Number Generator (see [`FIXEDSEED`](@ref)) [deafult: `Random.GLOBAL_RNG`]\n\n# Notes:\n\n * data must be numerical\n * the label should be be a *n-records* vector.\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y = @load_boston;\n\njulia> modelType = @load NeuralNetworkRegressor pkg = \"BetaML\" verbosity=0\nBetaML.Nn.NeuralNetworkRegressor\n\njulia> layers = [BetaML.DenseLayer(12,20,f=BetaML.relu),BetaML.DenseLayer(20,20,f=BetaML.relu),BetaML.DenseLayer(20,1,f=BetaML.relu)];\n\njulia> model = modelType(layers=layers,opt_alg=BetaML.ADAM());\nNeuralNetworkRegressor(\n layers = BetaML.Nn.AbstractLayer[BetaML.Nn.DenseLayer([-0.23249759178069676 -0.4125090172711131 … 0.41401934928739 -0.33017881111237535; -0.27912169279319965 0.270551221249931 … 0.19258414323473344 0.1703002982374256; … ; 0.31186742456482447 0.14776438287394805 … 0.3624993442655036 0.1438885872964824; 0.24363744610286758 -0.3221033024934767 … 0.14886090419299408 0.038411663101909355], [-0.42360286004241765, -0.34355377040029594, 0.11510963232946697, 0.29078650404397893, -0.04940236502546075, 0.05142849152316714, -0.177685375947775, 0.3857630523957018, -0.25454667127064756, -0.1726731848206195, 0.29832456225553444, -0.21138505291162835, -0.15763643112604903, -0.08477044513587562, -0.38436681165349196, 0.20538016429104916, -0.25008157754468335, 0.268681800562054, 0.10600581996650865, 0.4262194464325672], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.08534180387478185 0.19659398307677617 … -0.3413633217504578 -0.0484925247381256; 0.0024419192794883915 -0.14614102508129 … -0.21912059923003044 0.2680725396694708; … ; 0.25151545823147886 -0.27532269951606037 … 0.20739970895058063 0.2891938885916349; -0.1699020711688904 -0.1350423717084296 … 0.16947589410758873 0.3629006047373296], [0.2158116357688406, -0.3255582642532289, -0.057314442103850394, 0.29029696770539953, 0.24994080694366455, 0.3624239027782297, -0.30674318230919984, -0.3854738338935017, 0.10809721838554087, 0.16073511121016176, -0.005923262068960489, 0.3157147976348795, -0.10938918304264739, -0.24521229198853187, -0.307167732178712, 0.0808907777008302, -0.014577497150872254, -0.0011287181458157214, 0.07522282588658086, 0.043366500526073104], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.021367697115938555 -0.28326652172347155 … 0.05346175368370165 -0.26037328415871647], [-0.2313659199724562], BetaML.Utils.relu, BetaML.Utils.drelu)], \n loss = BetaML.Utils.squared_cost, \n dloss = BetaML.Utils.dsquared_cost, \n epochs = 100, \n batch_size = 32, \n opt_alg = BetaML.Nn.ADAM(BetaML.Nn.var\"#90#93\"(), 1.0, 0.9, 0.999, 1.0e-8, BetaML.Nn.Learnable[], BetaML.Nn.Learnable[]), \n shuffle = true, \n descr = \"\", \n cb = BetaML.Nn.fitting_info, \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X, y);\n\njulia> fit!(mach);\n\njulia> ŷ = predict(mach, X);\n\njulia> hcat(y,ŷ)\n506×2 Matrix{Float64}:\n 24.0 30.7726\n 21.6 28.0811\n 34.7 31.3194\n ⋮ \n 23.9 30.9032\n 22.0 29.49\n 11.9 27.2438\n```\n" +":name" = "NeuralNetworkRegressor" +":human_name" = "neural network regressor" +":is_supervised" = "`true`" +":prediction_type" = ":deterministic" +":abstract_type" = "`MLJModelInterface.Deterministic`" +":implemented_methods" = [":fit", ":predict"] +":hyperparameters" = "`(:layers, :loss, :dloss, :epochs, :batch_size, :opt_alg, :shuffle, :descr, :cb, :rng)`" +":hyperparameter_types" = "`(\"Union{Nothing, Vector{BetaML.Nn.AbstractLayer}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Int64\", \"Int64\", \"BetaML.Nn.OptimisationAlgorithm\", \"Bool\", \"String\", \"Function\", \"Random.AbstractRNG\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`nothing`" + [BetaML.MultitargetGaussianMixtureRegressor] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Infinite}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Infinite}}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" @@ -1924,37 +1924,37 @@ ":reporting_operations" = "`()`" ":constructor" = "`nothing`" -[MLJScikitLearnInterface.RidgeCVRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +[MLJScikitLearnInterface.BernoulliNBClassifier] +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Count}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Count}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":is_pure_julia" = "`false`" ":package_name" = "MLJScikitLearnInterface" ":package_license" = "BSD" -":load_path" = "MLJScikitLearnInterface.RidgeCVRegressor" +":load_path" = "MLJScikitLearnInterface.BernoulliNBClassifier" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" ":is_wrapper" = "`false`" ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nRidgeCVRegressor\n```\n\nA model type for constructing a ridge regressor with built-in cross-validation, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nRidgeCVRegressor = @load RidgeCVRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = RidgeCVRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`RidgeCVRegressor(alphas=...)`.\n# Hyper-parameters\n\n- `alphas = (0.1, 1.0, 10.0)`\n\n- `fit_intercept = true`\n\n- `scoring = nothing`\n\n- `cv = 5`\n\n- `gcv_mode = nothing`\n\n- `store_cv_values = false`\n\n" -":name" = "RidgeCVRegressor" -":human_name" = "ridge regressor with built-in cross-validation" +":docstring" = "```\nBernoulliNBClassifier\n```\n\nA model type for constructing a Bernoulli naive Bayes classifier, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nBernoulliNBClassifier = @load BernoulliNBClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = BernoulliNBClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `BernoulliNBClassifier(alpha=...)`.\n\nBinomial naive bayes classifier. It is suitable for classification with binary features; features will be binarized based on the `binarize` keyword (unless it's `nothing` in which case the features are assumed to be binary).\n" +":name" = "BernoulliNBClassifier" +":human_name" = "Bernoulli naive Bayes classifier" ":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:alphas, :fit_intercept, :scoring, :cv, :gcv_mode, :store_cv_values)`" -":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Any\", \"Any\", \"Union{Nothing, String}\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" +":prediction_type" = ":probabilistic" +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":hyperparameters" = "`(:alpha, :binarize, :fit_prior, :class_prior)`" +":hyperparameter_types" = "`(\"Float64\", \"Union{Nothing, Float64}\", \"Bool\", \"Union{Nothing, AbstractVector}\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" -":reports_feature_importances" = "`true`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" ":constructor" = "`nothing`" @@ -1994,7 +1994,7 @@ ":reporting_operations" = "`()`" ":constructor" = "`nothing`" -[MLJScikitLearnInterface.SVMRegressor] +[MLJScikitLearnInterface.RidgeCVRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" @@ -2005,58 +2005,58 @@ ":is_pure_julia" = "`false`" ":package_name" = "MLJScikitLearnInterface" ":package_license" = "BSD" -":load_path" = "MLJScikitLearnInterface.SVMRegressor" +":load_path" = "MLJScikitLearnInterface.RidgeCVRegressor" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" ":is_wrapper" = "`false`" ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nSVMRegressor\n```\n\nA model type for constructing a epsilon-support vector regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nSVMRegressor = @load SVMRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = SVMRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`SVMRegressor(kernel=...)`.\n# Hyper-parameters\n\n- `kernel = rbf`\n\n- `degree = 3`\n\n- `gamma = scale`\n\n- `coef0 = 0.0`\n\n- `tol = 0.001`\n\n- `C = 1.0`\n\n- `epsilon = 0.1`\n\n- `shrinking = true`\n\n- `cache_size = 200`\n\n- `max_iter = -1`\n\n" -":name" = "SVMRegressor" -":human_name" = "epsilon-support vector regressor" +":docstring" = "```\nRidgeCVRegressor\n```\n\nA model type for constructing a ridge regressor with built-in cross-validation, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nRidgeCVRegressor = @load RidgeCVRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = RidgeCVRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`RidgeCVRegressor(alphas=...)`.\n# Hyper-parameters\n\n- `alphas = (0.1, 1.0, 10.0)`\n\n- `fit_intercept = true`\n\n- `scoring = nothing`\n\n- `cv = 5`\n\n- `gcv_mode = nothing`\n\n- `store_cv_values = false`\n\n" +":name" = "RidgeCVRegressor" +":human_name" = "ridge regressor with built-in cross-validation" ":is_supervised" = "`true`" ":prediction_type" = ":deterministic" ":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:kernel, :degree, :gamma, :coef0, :tol, :C, :epsilon, :shrinking, :cache_size, :max_iter)`" -":hyperparameter_types" = "`(\"Union{Function, String}\", \"Int64\", \"Union{Float64, String}\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Any\", \"Int64\", \"Int64\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] +":hyperparameters" = "`(:alphas, :fit_intercept, :scoring, :cv, :gcv_mode, :store_cv_values)`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Any\", \"Any\", \"Union{Nothing, String}\", \"Bool\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" +":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" ":constructor" = "`nothing`" -[MLJScikitLearnInterface.BernoulliNBClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Count}}`" +[MLJScikitLearnInterface.SVMRegressor] +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Count}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":is_pure_julia" = "`false`" ":package_name" = "MLJScikitLearnInterface" ":package_license" = "BSD" -":load_path" = "MLJScikitLearnInterface.BernoulliNBClassifier" +":load_path" = "MLJScikitLearnInterface.SVMRegressor" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" ":is_wrapper" = "`false`" ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nBernoulliNBClassifier\n```\n\nA model type for constructing a Bernoulli naive Bayes classifier, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nBernoulliNBClassifier = @load BernoulliNBClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = BernoulliNBClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `BernoulliNBClassifier(alpha=...)`.\n\nBinomial naive bayes classifier. It is suitable for classification with binary features; features will be binarized based on the `binarize` keyword (unless it's `nothing` in which case the features are assumed to be binary).\n" -":name" = "BernoulliNBClassifier" -":human_name" = "Bernoulli naive Bayes classifier" +":docstring" = "```\nSVMRegressor\n```\n\nA model type for constructing a epsilon-support vector regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nSVMRegressor = @load SVMRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = SVMRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`SVMRegressor(kernel=...)`.\n# Hyper-parameters\n\n- `kernel = rbf`\n\n- `degree = 3`\n\n- `gamma = scale`\n\n- `coef0 = 0.0`\n\n- `tol = 0.001`\n\n- `C = 1.0`\n\n- `epsilon = 0.1`\n\n- `shrinking = true`\n\n- `cache_size = 200`\n\n- `max_iter = -1`\n\n" +":name" = "SVMRegressor" +":human_name" = "epsilon-support vector regressor" ":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" +":prediction_type" = ":deterministic" +":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:alpha, :binarize, :fit_prior, :class_prior)`" -":hyperparameter_types" = "`(\"Float64\", \"Union{Nothing, Float64}\", \"Bool\", \"Union{Nothing, AbstractVector}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":hyperparameters" = "`(:kernel, :degree, :gamma, :coef0, :tol, :C, :epsilon, :shrinking, :cache_size, :max_iter)`" +":hyperparameter_types" = "`(\"Union{Function, String}\", \"Int64\", \"Union{Float64, String}\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Any\", \"Int64\", \"Int64\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`false`" @@ -2379,34 +2379,34 @@ ":reporting_operations" = "`()`" ":constructor" = "`nothing`" -[MLJScikitLearnInterface.RANSACRegressor] +[MLJScikitLearnInterface.GaussianProcessClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":is_pure_julia" = "`false`" ":package_name" = "MLJScikitLearnInterface" ":package_license" = "BSD" -":load_path" = "MLJScikitLearnInterface.RANSACRegressor" +":load_path" = "MLJScikitLearnInterface.GaussianProcessClassifier" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" ":is_wrapper" = "`false`" ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nRANSACRegressor\n```\n\nA model type for constructing a ransac regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nRANSACRegressor = @load RANSACRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = RANSACRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`RANSACRegressor(estimator=...)`.\n# Hyper-parameters\n\n- `estimator = nothing`\n\n- `min_samples = 5`\n\n- `residual_threshold = nothing`\n\n- `is_data_valid = nothing`\n\n- `is_model_valid = nothing`\n\n- `max_trials = 100`\n\n- `max_skips = 9223372036854775807`\n\n- `stop_n_inliers = 9223372036854775807`\n\n- `stop_score = Inf`\n\n- `stop_probability = 0.99`\n\n- `loss = absolute_error`\n\n- `random_state = nothing`\n\n" -":name" = "RANSACRegressor" -":human_name" = "ransac regressor" +":docstring" = "```\nGaussianProcessClassifier\n```\n\nA model type for constructing a Gaussian process classifier, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nGaussianProcessClassifier = @load GaussianProcessClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = GaussianProcessClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`GaussianProcessClassifier(kernel=...)`.\n# Hyper-parameters\n\n- `kernel = nothing`\n\n- `optimizer = fmin_l_bfgs_b`\n\n- `n_restarts_optimizer = 0`\n\n- `copy_X_train = true`\n\n- `random_state = nothing`\n\n- `max_iter_predict = 100`\n\n- `warm_start = false`\n\n- `multi_class = one_vs_rest`\n\n" +":name" = "GaussianProcessClassifier" +":human_name" = "Gaussian process classifier" ":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" +":prediction_type" = ":probabilistic" +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:estimator, :min_samples, :residual_threshold, :is_data_valid, :is_model_valid, :max_trials, :max_skips, :stop_n_inliers, :stop_score, :stop_probability, :loss, :random_state)`" -":hyperparameter_types" = "`(\"Any\", \"Union{Float64, Int64}\", \"Union{Nothing, Float64}\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Function, String}\", \"Any\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameters" = "`(:kernel, :optimizer, :n_restarts_optimizer, :copy_X_train, :random_state, :max_iter_predict, :warm_start, :multi_class)`" +":hyperparameter_types" = "`(\"Any\", \"Any\", \"Int64\", \"Bool\", \"Any\", \"Int64\", \"Bool\", \"String\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`false`" @@ -2449,34 +2449,34 @@ ":reporting_operations" = "`()`" ":constructor" = "`nothing`" -[MLJScikitLearnInterface.GaussianProcessClassifier] +[MLJScikitLearnInterface.OPTICS] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_pure_julia" = "`false`" ":package_name" = "MLJScikitLearnInterface" ":package_license" = "BSD" -":load_path" = "MLJScikitLearnInterface.GaussianProcessClassifier" +":load_path" = "MLJScikitLearnInterface.OPTICS" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" ":is_wrapper" = "`false`" ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nGaussianProcessClassifier\n```\n\nA model type for constructing a Gaussian process classifier, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nGaussianProcessClassifier = @load GaussianProcessClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = GaussianProcessClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`GaussianProcessClassifier(kernel=...)`.\n# Hyper-parameters\n\n- `kernel = nothing`\n\n- `optimizer = fmin_l_bfgs_b`\n\n- `n_restarts_optimizer = 0`\n\n- `copy_X_train = true`\n\n- `random_state = nothing`\n\n- `max_iter_predict = 100`\n\n- `warm_start = false`\n\n- `multi_class = one_vs_rest`\n\n" -":name" = "GaussianProcessClassifier" -":human_name" = "Gaussian process classifier" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:kernel, :optimizer, :n_restarts_optimizer, :copy_X_train, :random_state, :max_iter_predict, :warm_start, :multi_class)`" -":hyperparameter_types" = "`(\"Any\", \"Any\", \"Int64\", \"Bool\", \"Any\", \"Int64\", \"Bool\", \"String\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":docstring" = "```\nOPTICS\n```\n\nA model type for constructing a optics, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nOPTICS = @load OPTICS pkg=MLJScikitLearnInterface\n```\n\nDo `model = OPTICS()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `OPTICS(min_samples=...)`.\n\nOPTICS (Ordering Points To Identify the Clustering Structure), closely related to [`DBSCAN'](@ref), finds core sample of high density and expands clusters from them. Unlike DBSCAN, keeps cluster hierarchy for a variable neighborhood radius. Better suited for usage on large datasets than the current sklearn implementation of DBSCAN.\n" +":name" = "OPTICS" +":human_name" = "optics" +":is_supervised" = "`false`" +":prediction_type" = ":unknown" +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params"] +":hyperparameters" = "`(:min_samples, :max_eps, :metric, :p, :cluster_method, :eps, :xi, :predecessor_correction, :min_cluster_size, :algorithm, :leaf_size, :n_jobs)`" +":hyperparameter_types" = "`(\"Union{Float64, Int64}\", \"Float64\", \"String\", \"Int64\", \"String\", \"Union{Nothing, Float64}\", \"Float64\", \"Bool\", \"Union{Nothing, Float64, Int64}\", \"String\", \"Int64\", \"Union{Nothing, Int64}\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`false`" @@ -2484,33 +2484,33 @@ ":reporting_operations" = "`()`" ":constructor" = "`nothing`" -[MLJScikitLearnInterface.OPTICS] +[MLJScikitLearnInterface.RANSACRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":is_pure_julia" = "`false`" ":package_name" = "MLJScikitLearnInterface" ":package_license" = "BSD" -":load_path" = "MLJScikitLearnInterface.OPTICS" +":load_path" = "MLJScikitLearnInterface.RANSACRegressor" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" ":is_wrapper" = "`false`" ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nOPTICS\n```\n\nA model type for constructing a optics, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nOPTICS = @load OPTICS pkg=MLJScikitLearnInterface\n```\n\nDo `model = OPTICS()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `OPTICS(min_samples=...)`.\n\nOPTICS (Ordering Points To Identify the Clustering Structure), closely related to [`DBSCAN'](@ref), finds core sample of high density and expands clusters from them. Unlike DBSCAN, keeps cluster hierarchy for a variable neighborhood radius. Better suited for usage on large datasets than the current sklearn implementation of DBSCAN.\n" -":name" = "OPTICS" -":human_name" = "optics" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params"] -":hyperparameters" = "`(:min_samples, :max_eps, :metric, :p, :cluster_method, :eps, :xi, :predecessor_correction, :min_cluster_size, :algorithm, :leaf_size, :n_jobs)`" -":hyperparameter_types" = "`(\"Union{Float64, Int64}\", \"Float64\", \"String\", \"Int64\", \"String\", \"Union{Nothing, Float64}\", \"Float64\", \"Bool\", \"Union{Nothing, Float64, Int64}\", \"String\", \"Int64\", \"Union{Nothing, Int64}\")`" +":docstring" = "```\nRANSACRegressor\n```\n\nA model type for constructing a ransac regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nRANSACRegressor = @load RANSACRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = RANSACRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`RANSACRegressor(estimator=...)`.\n# Hyper-parameters\n\n- `estimator = nothing`\n\n- `min_samples = 5`\n\n- `residual_threshold = nothing`\n\n- `is_data_valid = nothing`\n\n- `is_model_valid = nothing`\n\n- `max_trials = 100`\n\n- `max_skips = 9223372036854775807`\n\n- `stop_n_inliers = 9223372036854775807`\n\n- `stop_score = Inf`\n\n- `stop_probability = 0.99`\n\n- `loss = absolute_error`\n\n- `random_state = nothing`\n\n" +":name" = "RANSACRegressor" +":human_name" = "ransac regressor" +":is_supervised" = "`true`" +":prediction_type" = ":deterministic" +":abstract_type" = "`MLJModelInterface.Deterministic`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":hyperparameters" = "`(:estimator, :min_samples, :residual_threshold, :is_data_valid, :is_model_valid, :max_trials, :max_skips, :stop_n_inliers, :stop_score, :stop_probability, :loss, :random_state)`" +":hyperparameter_types" = "`(\"Any\", \"Union{Float64, Int64}\", \"Union{Nothing, Float64}\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Function, String}\", \"Any\")`" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" @@ -3744,34 +3744,34 @@ ":reporting_operations" = "`()`" ":constructor" = "`nothing`" -[SIRUS.StableRulesRegressor] +[SIRUS.StableForestClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":is_pure_julia" = "`true`" ":package_name" = "SIRUS" ":package_license" = "MIT" -":load_path" = "SIRUS.StableForestRegressor" +":load_path" = "SIRUS.StableForestClassifier" ":package_uuid" = "9113e207-2504-4b06-8eee-d78e288bee65" ":package_url" = "https://github.com/rikhuijzer/SIRUS.jl" ":is_wrapper" = "`false`" ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nStableRulesRegressor\n```\n\nA model type for constructing a stable rules regressor, based on [SIRUS.jl](https://github.com/rikhuijzer/SIRUS.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nStableRulesRegressor = @load StableRulesRegressor pkg=SIRUS\n```\n\nDo `model = StableRulesRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `StableRulesRegressor(rng=...)`.\n\n`StableRulesRegressor` implements the explainable rule-based regression model based on a random forest.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `rng::AbstractRNG=default_rng()`: Random number generator. Using a `StableRNG` from `StableRNGs.jl` is advised.\n * `partial_sampling::Float64=0.7`: Ratio of samples to use in each subset of the data. The default should be fine for most cases.\n * `n_trees::Int=1000`: The number of trees to use. It is advisable to use at least thousand trees to for a better rule selection, and in turn better predictive performance.\n * `max_depth::Int=2`: The depth of the tree. A lower depth decreases model complexity and can therefore improve accuracy when the sample size is small (reduce overfitting).\n * `q::Int=10`: Number of cutpoints to use per feature. The default value should be fine for most situations.\n * `min_data_in_leaf::Int=5`: Minimum number of data points per leaf.\n * `max_rules::Int=10`: This is the most important hyperparameter after `lambda`. The more rules, the more accurate the model should be. If this is not the case, tune `lambda` first. However, more rules will also decrease model interpretability. So, it is important to find a good balance here. In most cases, 10 to 40 rules should provide reasonable accuracy while remaining interpretable.\n * `lambda::Float64=1.0`: The weights of the final rules are determined via a regularized regression over each rule as a binary feature. This hyperparameter specifies the strength of the ridge (L2) regularizer. SIRUS is very sensitive to the choice of this hyperparameter. Ensure that you try the full range from 10^-4 to 10^4 (e.g., 0.001, 0.01, ..., 100). When trying the range, one good check is to verify that an increase in `max_rules` increases performance. If this is not the case, then try a different value for `lambda`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `fitresult`: A `StableRules` object.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return a vector of predictions for each row of `Xnew`.\n" -":name" = "StableRulesRegressor" -":human_name" = "stable rules regressor" +":docstring" = "```\nStableForestClassifier\n```\n\nA model type for constructing a stable forest classifier, based on [SIRUS.jl](https://github.com/rikhuijzer/SIRUS.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nStableForestClassifier = @load StableForestClassifier pkg=SIRUS\n```\n\nDo `model = StableForestClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `StableForestClassifier(rng=...)`.\n\n`StableForestClassifier` implements the random forest classifier with a stabilized forest structure (Bénard et al., [2021](http://proceedings.mlr.press/v130/benard21a.html)). This stabilization increases stability when extracting rules. The impact on the predictive accuracy compared to standard random forests should be relatively small.\n\n!!! note\n Just like normal random forests, this model is not easily explainable. If you are interested in an explainable model, use the `StableRulesClassifier` or `StableRulesRegressor`.\n\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `rng::AbstractRNG=default_rng()`: Random number generator. Using a `StableRNG` from `StableRNGs.jl` is advised.\n * `partial_sampling::Float64=0.7`: Ratio of samples to use in each subset of the data. The default should be fine for most cases.\n * `n_trees::Int=1000`: The number of trees to use. It is advisable to use at least thousand trees to for a better rule selection, and in turn better predictive performance.\n * `max_depth::Int=2`: The depth of the tree. A lower depth decreases model complexity and can therefore improve accuracy when the sample size is small (reduce overfitting).\n * `q::Int=10`: Number of cutpoints to use per feature. The default value should be fine for most situations.\n * `min_data_in_leaf::Int=5`: Minimum number of data points per leaf.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `fitresult`: A `StableForest` object.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return a vector of predictions for each row of `Xnew`.\n" +":name" = "StableForestClassifier" +":human_name" = "stable forest classifier" ":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" +":prediction_type" = ":probabilistic" +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":clean!", ":fit", ":predict"] -":hyperparameters" = "`(:rng, :partial_sampling, :n_trees, :max_depth, :q, :min_data_in_leaf, :max_rules, :lambda)`" -":hyperparameter_types" = "`(\"Random.AbstractRNG\", \"Real\", \"Int64\", \"Int64\", \"Int64\", \"Int64\", \"Int64\", \"Float64\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameters" = "`(:rng, :partial_sampling, :n_trees, :max_depth, :q, :min_data_in_leaf)`" +":hyperparameter_types" = "`(\"Random.AbstractRNG\", \"Real\", \"Int64\", \"Int64\", \"Int64\", \"Int64\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`false`" @@ -3779,34 +3779,34 @@ ":reporting_operations" = "`()`" ":constructor" = "`nothing`" -[SIRUS.StableForestClassifier] +[SIRUS.StableRulesRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":is_pure_julia" = "`true`" ":package_name" = "SIRUS" ":package_license" = "MIT" -":load_path" = "SIRUS.StableForestClassifier" +":load_path" = "SIRUS.StableForestRegressor" ":package_uuid" = "9113e207-2504-4b06-8eee-d78e288bee65" ":package_url" = "https://github.com/rikhuijzer/SIRUS.jl" ":is_wrapper" = "`false`" ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nStableForestClassifier\n```\n\nA model type for constructing a stable forest classifier, based on [SIRUS.jl](https://github.com/rikhuijzer/SIRUS.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nStableForestClassifier = @load StableForestClassifier pkg=SIRUS\n```\n\nDo `model = StableForestClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `StableForestClassifier(rng=...)`.\n\n`StableForestClassifier` implements the random forest classifier with a stabilized forest structure (Bénard et al., [2021](http://proceedings.mlr.press/v130/benard21a.html)). This stabilization increases stability when extracting rules. The impact on the predictive accuracy compared to standard random forests should be relatively small.\n\n!!! note\n Just like normal random forests, this model is not easily explainable. If you are interested in an explainable model, use the `StableRulesClassifier` or `StableRulesRegressor`.\n\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `rng::AbstractRNG=default_rng()`: Random number generator. Using a `StableRNG` from `StableRNGs.jl` is advised.\n * `partial_sampling::Float64=0.7`: Ratio of samples to use in each subset of the data. The default should be fine for most cases.\n * `n_trees::Int=1000`: The number of trees to use. It is advisable to use at least thousand trees to for a better rule selection, and in turn better predictive performance.\n * `max_depth::Int=2`: The depth of the tree. A lower depth decreases model complexity and can therefore improve accuracy when the sample size is small (reduce overfitting).\n * `q::Int=10`: Number of cutpoints to use per feature. The default value should be fine for most situations.\n * `min_data_in_leaf::Int=5`: Minimum number of data points per leaf.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `fitresult`: A `StableForest` object.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return a vector of predictions for each row of `Xnew`.\n" -":name" = "StableForestClassifier" -":human_name" = "stable forest classifier" +":docstring" = "```\nStableRulesRegressor\n```\n\nA model type for constructing a stable rules regressor, based on [SIRUS.jl](https://github.com/rikhuijzer/SIRUS.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nStableRulesRegressor = @load StableRulesRegressor pkg=SIRUS\n```\n\nDo `model = StableRulesRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `StableRulesRegressor(rng=...)`.\n\n`StableRulesRegressor` implements the explainable rule-based regression model based on a random forest.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `rng::AbstractRNG=default_rng()`: Random number generator. Using a `StableRNG` from `StableRNGs.jl` is advised.\n * `partial_sampling::Float64=0.7`: Ratio of samples to use in each subset of the data. The default should be fine for most cases.\n * `n_trees::Int=1000`: The number of trees to use. It is advisable to use at least thousand trees to for a better rule selection, and in turn better predictive performance.\n * `max_depth::Int=2`: The depth of the tree. A lower depth decreases model complexity and can therefore improve accuracy when the sample size is small (reduce overfitting).\n * `q::Int=10`: Number of cutpoints to use per feature. The default value should be fine for most situations.\n * `min_data_in_leaf::Int=5`: Minimum number of data points per leaf.\n * `max_rules::Int=10`: This is the most important hyperparameter after `lambda`. The more rules, the more accurate the model should be. If this is not the case, tune `lambda` first. However, more rules will also decrease model interpretability. So, it is important to find a good balance here. In most cases, 10 to 40 rules should provide reasonable accuracy while remaining interpretable.\n * `lambda::Float64=1.0`: The weights of the final rules are determined via a regularized regression over each rule as a binary feature. This hyperparameter specifies the strength of the ridge (L2) regularizer. SIRUS is very sensitive to the choice of this hyperparameter. Ensure that you try the full range from 10^-4 to 10^4 (e.g., 0.001, 0.01, ..., 100). When trying the range, one good check is to verify that an increase in `max_rules` increases performance. If this is not the case, then try a different value for `lambda`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `fitresult`: A `StableRules` object.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return a vector of predictions for each row of `Xnew`.\n" +":name" = "StableRulesRegressor" +":human_name" = "stable rules regressor" ":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" +":prediction_type" = ":deterministic" +":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":clean!", ":fit", ":predict"] -":hyperparameters" = "`(:rng, :partial_sampling, :n_trees, :max_depth, :q, :min_data_in_leaf)`" -":hyperparameter_types" = "`(\"Random.AbstractRNG\", \"Real\", \"Int64\", \"Int64\", \"Int64\", \"Int64\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameters" = "`(:rng, :partial_sampling, :n_trees, :max_depth, :q, :min_data_in_leaf, :max_rules, :lambda)`" +":hyperparameter_types" = "`(\"Random.AbstractRNG\", \"Real\", \"Int64\", \"Int64\", \"Int64\", \"Int64\", \"Int64\", \"Float64\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`false`" @@ -3849,42 +3849,7 @@ ":reporting_operations" = "`()`" ":constructor" = "`nothing`" -[MLJIteration.DeterministicIteratedModel] -":input_scitype" = "`ScientificTypesBase.Unknown`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_pure_julia" = "`false`" -":package_name" = "MLJIteration" -":package_license" = "MIT" -":load_path" = "MLJIteration.IteratedModel" -":package_uuid" = "614be32b-d00c-4edb-bd02-1eb411ab5e55" -":package_url" = "https://github.com/JuliaAI/MLJIteration.jl" -":is_wrapper" = "`true`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = "```\nIteratedModel(model=nothing,\n controls=Any[IterationControl.Step(1), EarlyStopping.Patience(5), EarlyStopping.GL(2.0), EarlyStopping.TimeLimit(Dates.Millisecond(108000)), EarlyStopping.InvalidValue()],\n retrain=false,\n resampling=Holdout(),\n measure=nothing,\n weights=nothing,\n class_weights=nothing,\n operation=predict,\n verbosity=1,\n check_measure=true,\n iteration_parameter=nothing,\n cache=true)\n```\n\nWrap the specified `model <: Supervised` in the specified iteration `controls`. Training a machine bound to the wrapper iterates a corresonding machine bound to `model`. Here `model` should support iteration.\n\nTo list all controls, do `MLJIteration.CONTROLS`. Controls are summarized at [https://alan-turing-institute.github.io/MLJ.jl/dev/getting_started/](https://alan-turing-institute.github.io/MLJ.jl/dev/controlling_iterative_models/) but query individual doc-strings for details and advanced options. For creating your own controls, refer to the documentation just cited.\n\nTo make out-of-sample losses available to the controls, the machine bound to `model` is only trained on part of the data, as iteration proceeds. See details on training below. Specify `retrain=true` to ensure the model is retrained on *all* available data, using the same number of iterations, once controlled iteration has stopped.\n\nSpecify `resampling=nothing` if all data is to be used for controlled iteration, with each out-of-sample loss replaced by the most recent training loss, assuming this is made available by the model (`supports_training_losses(model) == true`). Otherwise, `resampling` must have type `Holdout` (eg, `Holdout(fraction_train=0.8, rng=123)`).\n\nAssuming `retrain=true` or `resampling=nothing`, `iterated_model` behaves exactly like the original `model` but with the iteration parameter automatically selected. If `retrain=false` (default) and `resampling` is not `nothing`, then `iterated_model` behaves like the original model trained on a subset of the provided data.\n\nControlled iteration can be continued with new `fit!` calls (warm restart) by mutating a control, or by mutating the iteration parameter of `model`, which is otherwise ignored.\n\n### Training\n\nGiven an instance `iterated_model` of `IteratedModel`, calling `fit!(mach)` on a machine `mach = machine(iterated_model, data...)` performs the following actions:\n\n * Assuming `resampling !== nothing`, the `data` is split into *train* and *test* sets, according to the specified `resampling` strategy, which must have type `Holdout`.\n * A clone of the wrapped model, `iterated_model.model`, is bound to the train data in an internal machine, `train_mach`. If `resampling === nothing`, all data is used instead. This machine is the object to which controls are applied. For example, `Callback(fitted_params |> print)` will print the value of `fitted_params(train_mach)`.\n * The iteration parameter of the clone is set to `0`.\n * The specified `controls` are repeatedly applied to `train_mach` in sequence, until one of the controls triggers a stop. Loss-based controls (eg, `Patience()`, `GL()`, `Threshold(0.001)`) use an out-of-sample loss, obtained by applying `measure` to predictions and the test target values. (Specifically, these predictions are those returned by `operation(train_mach)`.) If `resampling === nothing` then the most recent training loss is used instead. Some controls require *both* out-of-sample and training losses (eg, `PQ()`).\n * Once a stop has been triggered, a clone of `model` is bound to all `data` in a machine called `mach_production` below, unless `retrain == false` or `resampling === nothing`, in which case `mach_production` coincides with `train_mach`.\n\n### Prediction\n\nCalling `predict(mach, Xnew)` returns `predict(mach_production, Xnew)`. Similar similar statements hold for `predict_mean`, `predict_mode`, `predict_median`.\n\n### Controls\n\nA control is permitted to mutate the fields (hyper-parameters) of `train_mach.model` (the clone of `model`). For example, to mutate a learning rate one might use the control\n\n```\nCallback(mach -> mach.model.eta = 1.05*mach.model.eta)\n```\n\nHowever, unless `model` supports warm restarts with respect to changes in that parameter, this will trigger retraining of `train_mach` from scratch, with a different training outcome, which is not recommended.\n\n### Warm restarts\n\nIf `iterated_model` is mutated and `fit!(mach)` is called again, then a warm restart is attempted if the only parameters to change are `model` or `controls` or both.\n\nSpecifically, `train_mach.model` is mutated to match the current value of `iterated_model.model` and the iteration parameter of the latter is updated to the last value used in the preceding `fit!(mach)` call. Then repeated application of the (updated) controls begin anew.\n" -":name" = "DeterministicIteratedModel" -":human_name" = "deterministic iterated model" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [] -":hyperparameters" = "`(:model, :controls, :resampling, :measure, :weights, :class_weights, :operation, :retrain, :check_measure, :iteration_parameter, :cache)`" -":hyperparameter_types" = "`(\"MLJModelInterface.Deterministic\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Vector{<:Real}}\", \"Union{Nothing, Dict{Any, <:Real}}\", \"Any\", \"Bool\", \"Bool\", \"Union{Nothing, Expr, Symbol}\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`IteratedModel`" - -[MLJIteration.ProbabilisticIteratedModel] +[MLJIteration.IteratedModel] ":input_scitype" = "`ScientificTypesBase.Unknown`" ":output_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" @@ -3902,8 +3867,8 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nIteratedModel(model=nothing,\n controls=Any[IterationControl.Step(1), EarlyStopping.Patience(5), EarlyStopping.GL(2.0), EarlyStopping.TimeLimit(Dates.Millisecond(108000)), EarlyStopping.InvalidValue()],\n retrain=false,\n resampling=Holdout(),\n measure=nothing,\n weights=nothing,\n class_weights=nothing,\n operation=predict,\n verbosity=1,\n check_measure=true,\n iteration_parameter=nothing,\n cache=true)\n```\n\nWrap the specified `model <: Supervised` in the specified iteration `controls`. Training a machine bound to the wrapper iterates a corresonding machine bound to `model`. Here `model` should support iteration.\n\nTo list all controls, do `MLJIteration.CONTROLS`. Controls are summarized at [https://alan-turing-institute.github.io/MLJ.jl/dev/getting_started/](https://alan-turing-institute.github.io/MLJ.jl/dev/controlling_iterative_models/) but query individual doc-strings for details and advanced options. For creating your own controls, refer to the documentation just cited.\n\nTo make out-of-sample losses available to the controls, the machine bound to `model` is only trained on part of the data, as iteration proceeds. See details on training below. Specify `retrain=true` to ensure the model is retrained on *all* available data, using the same number of iterations, once controlled iteration has stopped.\n\nSpecify `resampling=nothing` if all data is to be used for controlled iteration, with each out-of-sample loss replaced by the most recent training loss, assuming this is made available by the model (`supports_training_losses(model) == true`). Otherwise, `resampling` must have type `Holdout` (eg, `Holdout(fraction_train=0.8, rng=123)`).\n\nAssuming `retrain=true` or `resampling=nothing`, `iterated_model` behaves exactly like the original `model` but with the iteration parameter automatically selected. If `retrain=false` (default) and `resampling` is not `nothing`, then `iterated_model` behaves like the original model trained on a subset of the provided data.\n\nControlled iteration can be continued with new `fit!` calls (warm restart) by mutating a control, or by mutating the iteration parameter of `model`, which is otherwise ignored.\n\n### Training\n\nGiven an instance `iterated_model` of `IteratedModel`, calling `fit!(mach)` on a machine `mach = machine(iterated_model, data...)` performs the following actions:\n\n * Assuming `resampling !== nothing`, the `data` is split into *train* and *test* sets, according to the specified `resampling` strategy, which must have type `Holdout`.\n * A clone of the wrapped model, `iterated_model.model`, is bound to the train data in an internal machine, `train_mach`. If `resampling === nothing`, all data is used instead. This machine is the object to which controls are applied. For example, `Callback(fitted_params |> print)` will print the value of `fitted_params(train_mach)`.\n * The iteration parameter of the clone is set to `0`.\n * The specified `controls` are repeatedly applied to `train_mach` in sequence, until one of the controls triggers a stop. Loss-based controls (eg, `Patience()`, `GL()`, `Threshold(0.001)`) use an out-of-sample loss, obtained by applying `measure` to predictions and the test target values. (Specifically, these predictions are those returned by `operation(train_mach)`.) If `resampling === nothing` then the most recent training loss is used instead. Some controls require *both* out-of-sample and training losses (eg, `PQ()`).\n * Once a stop has been triggered, a clone of `model` is bound to all `data` in a machine called `mach_production` below, unless `retrain == false` or `resampling === nothing`, in which case `mach_production` coincides with `train_mach`.\n\n### Prediction\n\nCalling `predict(mach, Xnew)` returns `predict(mach_production, Xnew)`. Similar similar statements hold for `predict_mean`, `predict_mode`, `predict_median`.\n\n### Controls\n\nA control is permitted to mutate the fields (hyper-parameters) of `train_mach.model` (the clone of `model`). For example, to mutate a learning rate one might use the control\n\n```\nCallback(mach -> mach.model.eta = 1.05*mach.model.eta)\n```\n\nHowever, unless `model` supports warm restarts with respect to changes in that parameter, this will trigger retraining of `train_mach` from scratch, with a different training outcome, which is not recommended.\n\n### Warm restarts\n\nIf `iterated_model` is mutated and `fit!(mach)` is called again, then a warm restart is attempted if the only parameters to change are `model` or `controls` or both.\n\nSpecifically, `train_mach.model` is mutated to match the current value of `iterated_model.model` and the iteration parameter of the latter is updated to the last value used in the preceding `fit!(mach)` call. Then repeated application of the (updated) controls begin anew.\n" -":name" = "ProbabilisticIteratedModel" +":docstring" = "```\nIteratedModel(model;\n controls=MLJIteration.DEFAULT_CONTROLS,\n resampling=Holdout(),\n measure=nothing,\n retrain=false,\n advanced_options...,\n)\n```\n\nWrap the specified supervised `model` in the specified iteration `controls`. Here `model` should support iteration, which is true if (`iteration_parameter(model)` is different from `nothing`.\n\nAvailable controls: Step(), Info(), Warn(), Error(), Callback(), WithLossDo(), WithTrainingLossesDo(), WithNumberDo(), Data(), Disjunction(), GL(), InvalidValue(), Never(), NotANumber(), NumberLimit(), NumberSinceBest(), PQ(), Patience(), Threshold(), TimeLimit(), Warmup(), WithIterationsDo(), WithEvaluationDo(), WithFittedParamsDo(), WithReportDo(), WithMachineDo(), WithModelDo(), CycleLearningRate() and Save().\n\n!!! important\n To make out-of-sample losses available to the controls, the wrapped `model` is only trained on part of the data, as iteration proceeds. The user may want to force retraining on all data after controlled iteration has finished by specifying `retrain=true`. See also \"Training\", and the `retrain` option, under \"Extended help\" below.\n\n\n# Extended help\n\n# Options\n\n * `controls=Any[IterationControl.Step(1), EarlyStopping.Patience(5), EarlyStopping.GL(2.0), EarlyStopping.TimeLimit(Dates.Millisecond(108000)), EarlyStopping.InvalidValue()]`: Controls are summarized at [https://JuliaAI.github.io/MLJ.jl/dev/getting_started/](https://JuliaAI.github.io/MLJ.jl/dev/controlling_iterative_models/) but query individual doc-strings for details and advanced options. For creating your own controls, refer to the documentation just cited.\n * `resampling=Holdout(fraction_train=0.7)`: The default resampling holds back 30% of data for computing an out-of-sample estimate of performance (the \"loss\") for loss-based controls such as `WithLossDo`. Specify `resampling=nothing` if all data is to be used for controlled iteration, with each out-of-sample loss replaced by the most recent training loss, assuming this is made available by the model (`supports_training_losses(model) == true`). If the model does not report a training loss, you can use `resampling=InSample()` instead. Otherwise, `resampling` must have type `Holdout` or be a vector with one element of the form `(train_indices, test_indices)`.\n * `measure=nothing`: StatisticalMeasures.jl compatible measure for estimating model performance (the \"loss\", but the orientation is immaterial - i.e., this could be a score). Inferred by default. Ignored if `resampling=nothing`.\n * `retrain=false`: If `retrain=true` or `resampling=nothing`, `iterated_model` behaves exactly like the original `model` but with the iteration parameter automatically selected (\"learned\"). That is, the model is retrained on *all* available data, using the same number of iterations, once controlled iteration has stopped. This is typically desired if wrapping the iterated model further, or when inserting in a pipeline or other composite model. If `retrain=false` (default) and `resampling isa Holdout`, then `iterated_model` behaves like the original model trained on a subset of the provided data.\n * `weights=nothing`: per-observation weights to be passed to `measure` where supported; if unspecified, these are understood to be uniform.\n * `class_weights=nothing`: class-weights to be passed to `measure` where supported; if unspecified, these are understood to be uniform.\n * `operation=nothing`: Operation, such as `predict` or `predict_mode`, for computing target values, or proxy target values, for consumption by `measure`; automatically inferred by default.\n * `check_measure=true`: Specify `false` to override checks on `measure` for compatibility with the training data.\n * `iteration_parameter=nothing`: A symbol, such as `:epochs`, naming the iteration parameter of `model`; inferred by default. Note that the actual value of the iteration parameter in the supplied `model` is ignored; only the value of an internal clone is mutated during training the wrapped model.\n * `cache=true`: Whether or not model-specific representations of data are cached in between iteration parameter increments; specify `cache=false` to prioritize memory over speed.\n\n# Training\n\nTraining an instance `iterated_model` of `IteratedModel` on some `data` (by binding to a machine and calling `fit!`, for example) performs the following actions:\n\n * Assuming `resampling !== nothing`, the `data` is split into *train* and *test* sets, according to the specified `resampling` strategy.\n * A clone of the wrapped model, `model` is bound to the train data in an internal machine, `train_mach`. If `resampling === nothing`, all data is used instead. This machine is the object to which controls are applied. For example, `Callback(fitted_params |> print)` will print the value of `fitted_params(train_mach)`.\n * The iteration parameter of the clone is set to `0`.\n * The specified `controls` are repeatedly applied to `train_mach` in sequence, until one of the controls triggers a stop. Loss-based controls (eg, `Patience()`, `GL()`, `Threshold(0.001)`) use an out-of-sample loss, obtained by applying `measure` to predictions and the test target values. (Specifically, these predictions are those returned by `operation(train_mach)`.) If `resampling === nothing` then the most recent training loss is used instead. Some controls require *both* out-of-sample and training losses (eg, `PQ()`).\n * Once a stop has been triggered, a clone of `model` is bound to all `data` in a machine called `mach_production` below, unless `retrain == false` (true by default) or `resampling === nothing`, in which case `mach_production` coincides with `train_mach`.\n\n# Prediction\n\nCalling `predict(mach, Xnew)` in the example above returns `predict(mach_production, Xnew)`. Similar similar statements hold for `predict_mean`, `predict_mode`, `predict_median`.\n\n# Controls that mutate parameters\n\nA control is permitted to mutate the fields (hyper-parameters) of `train_mach.model` (the clone of `model`). For example, to mutate a learning rate one might use the control\n\n```\nCallback(mach -> mach.model.eta = 1.05*mach.model.eta)\n```\n\nHowever, unless `model` supports warm restarts with respect to changes in that parameter, this will trigger retraining of `train_mach` from scratch, with a different training outcome, which is not recommended.\n\n# Warm restarts\n\nIn the following example, the second `fit!` call will not restart training of the internal `train_mach`, assuming `model` supports warm restarts:\n\n```julia\niterated_model = IteratedModel(\n model,\n controls = [Step(1), NumberLimit(100)],\n)\nmach = machine(iterated_model, X, y)\nfit!(mach) # train for 100 iterations\niterated_model.controls = [Step(1), NumberLimit(50)],\nfit!(mach) # train for an *extra* 50 iterations\n```\n\nMore generally, if `iterated_model` is mutated and `fit!(mach)` is called again, then a warm restart is attempted if the only parameters to change are `model` or `controls` or both.\n\nSpecifically, `train_mach.model` is mutated to match the current value of `iterated_model.model` and the iteration parameter of the latter is updated to the last value used in the preceding `fit!(mach)` call. Then repeated application of the (updated) controls begin anew.\n" +":name" = "IteratedModel" ":human_name" = "probabilistic iterated model" ":is_supervised" = "`true`" ":prediction_type" = ":probabilistic" @@ -4479,75 +4444,40 @@ ":reporting_operations" = "`()`" ":constructor" = "`nothing`" -[MLJBase.DeterministicStack] +[MLJBase.Pipeline] ":input_scitype" = "`ScientificTypesBase.Unknown`" ":output_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_pure_julia" = "`false`" -":package_name" = "MLJBase" -":package_license" = "MIT" -":load_path" = "MLJBase.Stack" -":package_uuid" = "a7f614a8-145f-11e9-1d2a-a57a1082229d" -":package_url" = "https://github.com/JuliaAI/MLJBase.jl" -":is_wrapper" = "`true`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = "```\nUnion{Types...}\n```\n\nA type union is an abstract type which includes all instances of any of its argument types. The empty union [`Union{}`](@ref) is the bottom type of Julia.\n\n# Examples\n\n```jldoctest\njulia> IntOrString = Union{Int,AbstractString}\nUnion{Int64, AbstractString}\n\njulia> 1 isa IntOrString\ntrue\n\njulia> \"Hello!\" isa IntOrString\ntrue\n\njulia> 1.0 isa IntOrString\nfalse\n```\n" -":name" = "DeterministicStack" -":human_name" = "deterministic stack" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [] -":hyperparameters" = "`(:models, :metalearner, :resampling, :measures, :cache, :acceleration)`" -":hyperparameter_types" = "`(\"Vector{MLJModelInterface.Supervised}\", \"MLJModelInterface.Deterministic\", \"Any\", \"Union{Nothing, AbstractVector}\", \"Bool\", \"ComputationalResources.AbstractResource\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" -":constructor" = "`MLJBase.Stack`" - -[MLJBase.ProbabilisticStack] -":input_scitype" = "`ScientificTypesBase.Unknown`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" +":fit_data_scitype" = "`Tuple{}`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":is_pure_julia" = "`false`" ":package_name" = "MLJBase" -":package_license" = "MIT" -":load_path" = "MLJBase.Stack" -":package_uuid" = "a7f614a8-145f-11e9-1d2a-a57a1082229d" -":package_url" = "https://github.com/JuliaAI/MLJBase.jl" +":package_license" = "unknown" +":load_path" = "MLJBase.Pipeline" +":package_uuid" = "unknown" +":package_url" = "unknown" ":is_wrapper" = "`true`" ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nUnion{Types...}\n```\n\nA type union is an abstract type which includes all instances of any of its argument types. The empty union [`Union{}`](@ref) is the bottom type of Julia.\n\n# Examples\n\n```jldoctest\njulia> IntOrString = Union{Int,AbstractString}\nUnion{Int64, AbstractString}\n\njulia> 1 isa IntOrString\ntrue\n\njulia> \"Hello!\" isa IntOrString\ntrue\n\njulia> 1.0 isa IntOrString\nfalse\n```\n" -":name" = "ProbabilisticStack" -":human_name" = "probabilistic stack" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" +":docstring" = "```\nPipeline(component1, component2, ... , componentk; options...)\nPipeline(name1=component1, name2=component2, ..., namek=componentk; options...)\ncomponent1 |> component2 |> ... |> componentk\n```\n\nCreate an instance of a composite model type which sequentially composes the specified components in order. This means `component1` receives inputs, whose output is passed to `component2`, and so forth. A \"component\" is either a `Model` instance, a model type (converted immediately to its default instance) or any callable object. Here the \"output\" of a model is what `predict` returns if it is `Supervised`, or what `transform` returns if it is `Unsupervised`.\n\nNames for the component fields are automatically generated unless explicitly specified, as in\n\n```\nPipeline(encoder=ContinuousEncoder(drop_last=false),\n stand=Standardizer())\n```\n\nThe `Pipeline` constructor accepts keyword `options` discussed further below.\n\nOrdinary functions (and other callables) may be inserted in the pipeline as shown in the following example:\n\n```\nPipeline(X->coerce(X, :age=>Continuous), OneHotEncoder, ConstantClassifier)\n```\n\n### Syntactic sugar\n\nThe `|>` operator is overloaded to construct pipelines out of models, callables, and existing pipelines:\n\n```julia\nLinearRegressor = @load LinearRegressor pkg=MLJLinearModels add=true\nPCA = @load PCA pkg=MultivariateStats add=true\n\npipe1 = MLJBase.table |> ContinuousEncoder |> Standardizer\npipe2 = PCA |> LinearRegressor\npipe1 |> pipe2\n```\n\nAt most one of the components may be a supervised model, but this model can appear in any position. A pipeline with a `Supervised` component is itself `Supervised` and implements the `predict` operation. It is otherwise `Unsupervised` (possibly `Static`) and implements `transform`.\n\n### Special operations\n\nIf all the `components` are invertible unsupervised models (ie, implement `inverse_transform`) then `inverse_transform` is implemented for the pipeline. If there are no supervised models, then `predict` is nevertheless implemented, assuming the last component is a model that implements it (some clustering models). Similarly, calling `transform` on a supervised pipeline calls `transform` on the supervised component.\n\n### Optional key-word arguments\n\n * `prediction_type` - prediction type of the pipeline; possible values: `:deterministic`, `:probabilistic`, `:interval` (default=`:deterministic` if not inferable)\n * `operation` - operation applied to the supervised component model, when present; possible values: `predict`, `predict_mean`, `predict_median`, `predict_mode` (default=`predict`)\n * `cache` - whether the internal machines created for component models should cache model-specific representations of data (see [`machine`](@ref)) (default=`true`)\n\n!!! warning\n Set `cache=false` to guarantee data anonymization.\n\n\nTo build more complicated non-branching pipelines, refer to the MLJ manual sections on composing models.\n" +":name" = "Pipeline" +":human_name" = "static pipeline" +":is_supervised" = "`false`" +":prediction_type" = ":unknown" +":abstract_type" = "`MLJModelInterface.Static`" ":implemented_methods" = [] -":hyperparameters" = "`(:models, :metalearner, :resampling, :measures, :cache, :acceleration)`" -":hyperparameter_types" = "`(\"Vector{MLJModelInterface.Supervised}\", \"MLJModelInterface.Probabilistic\", \"Any\", \"Union{Nothing, AbstractVector}\", \"Bool\", \"ComputationalResources.AbstractResource\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameters" = "`(:named_components, :cache)`" +":hyperparameter_types" = "`(\"NamedTuple\", \"Bool\")`" +":hyperparameter_ranges" = "`(nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" -":constructor" = "`MLJBase.Stack`" +":constructor" = "`Pipeline`" [MLJBase.Resampler] ":input_scitype" = "`ScientificTypesBase.Unknown`" @@ -4567,7 +4497,7 @@ ":supports_weights" = "`missing`" ":supports_class_weights" = "`missing`" ":supports_online" = "`false`" -":docstring" = "```\nresampler = Resampler(\n model=ConstantRegressor(),\n resampling=CV(),\n measure=nothing,\n weights=nothing,\n class_weights=nothing\n operation=predict,\n repeats = 1,\n acceleration=default_resource(),\n check_measure=true,\n per_observation=true,\n logger=nothing,\n compact=false,\n)\n```\n\n*Private method.* Use at own risk.\n\nResampling model wrapper, used internally by the `fit` method of `TunedModel` instances and `IteratedModel` instances. See [`evaluate!](@ref) for meaning of the options. Not intended for use by general user, who will ordinarily use [`evaluate!`](@ref) directly.\n\nGiven a machine `mach = machine(resampler, args...)` one obtains a performance evaluation of the specified `model`, performed according to the prescribed `resampling` strategy and other parameters, using data `args...`, by calling `fit!(mach)` followed by `evaluate(mach)`.\n\nOn subsequent calls to `fit!(mach)` new train/test pairs of row indices are only regenerated if `resampling`, `repeats` or `cache` fields of `resampler` have changed. The evolution of an RNG field of `resampler` does *not* constitute a change (`==` for `MLJType` objects is not sensitive to such changes; see [`is_same_except`](@ref)).\n\nIf there is single train/test pair, then warm-restart behavior of the wrapped model `resampler.model` will extend to warm-restart behaviour of the wrapper `resampler`, with respect to mutations of the wrapped model.\n\nThe sample `weights` are passed to the specified performance measures that support weights for evaluation. These weights are not to be confused with any weights bound to a `Resampler` instance in a machine, used for training the wrapped `model` when supported.\n\nThe sample `class_weights` are passed to the specified performance measures that support per-class weights for evaluation. These weights are not to be confused with any weights bound to a `Resampler` instance in a machine, used for training the wrapped `model` when supported.\n" +":docstring" = "```\nresampler = Resampler(\n model=ConstantRegressor(),\n resampling=CV(),\n measure=nothing,\n weights=nothing,\n class_weights=nothing\n operation=predict,\n repeats = 1,\n acceleration=default_resource(),\n check_measure=true,\n per_observation=true,\n logger=nothing,\n compact=false,\n)\n```\n\n*Private method.* Use at own risk.\n\nResampling model wrapper, used internally by the `fit` method of `TunedModel` instances and `IteratedModel` instances. See [`evaluate!`](@ref) for meaning of the options. Not intended for use by general user, who will ordinarily use [`evaluate!`](@ref) directly.\n\nGiven a machine `mach = machine(resampler, args...)` one obtains a performance evaluation of the specified `model`, performed according to the prescribed `resampling` strategy and other parameters, using data `args...`, by calling `fit!(mach)` followed by `evaluate(mach)`.\n\nOn subsequent calls to `fit!(mach)` new train/test pairs of row indices are only regenerated if `resampling`, `repeats` or `cache` fields of `resampler` have changed. The evolution of an RNG field of `resampler` does *not* constitute a change (`==` for `MLJType` objects is not sensitive to such changes; see [`is_same_except`](@ref)).\n\nIf there is single train/test pair, then warm-restart behavior of the wrapped model `resampler.model` will extend to warm-restart behaviour of the wrapper `resampler`, with respect to mutations of the wrapped model.\n\nThe sample `weights` are passed to the specified performance measures that support weights for evaluation. These weights are not to be confused with any weights bound to a `Resampler` instance in a machine, used for training the wrapped `model` when supported.\n\nThe sample `class_weights` are passed to the specified performance measures that support per-class weights for evaluation. These weights are not to be confused with any weights bound to a `Resampler` instance in a machine, used for training the wrapped `model` when supported.\n" ":name" = "Resampler" ":human_name" = "resampler" ":is_supervised" = "`false`" @@ -4584,7 +4514,7 @@ ":reporting_operations" = "`()`" ":constructor" = "`MLJBase.Resampler`" -[MLJBase.TransformedTargetModelInterval] +[MLJBase.Stack] ":input_scitype" = "`ScientificTypesBase.Unknown`" ":output_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" @@ -4595,66 +4525,31 @@ ":is_pure_julia" = "`false`" ":package_name" = "MLJBase" ":package_license" = "MIT" -":load_path" = "MLJBase.TransformedTargetModel" +":load_path" = "MLJBase.Stack" ":package_uuid" = "a7f614a8-145f-11e9-1d2a-a57a1082229d" ":package_url" = "https://github.com/JuliaAI/MLJBase.jl" ":is_wrapper" = "`true`" ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nTransformedTargetModel(model; transformer=nothing, inverse=nothing, cache=true)\n```\n\nWrap the supervised or semi-supervised `model` in a transformation of the target variable.\n\nHere `transformer` one of the following:\n\n * The `Unsupervised` model that is to transform the training target. By default (`inverse=nothing`) the parameters learned by this transformer are also used to inverse-transform the predictions of `model`, which means `transformer` must implement the `inverse_transform` method. If this is not the case, specify `inverse=identity` to suppress inversion.\n * A callable object for transforming the target, such as `y -> log.(y)`. In this case a callable `inverse`, such as `z -> exp.(z)`, should be specified.\n\nSpecify `cache=false` to prioritize memory over speed, or to guarantee data anonymity.\n\nSpecify `inverse=identity` if `model` is a probabilistic predictor, as inverse-transforming sample spaces is not supported. Alternatively, replace `model` with a deterministic model, such as `Pipeline(model, y -> mode.(y))`.\n\n### Examples\n\nA model that normalizes the target before applying ridge regression, with predictions returned on the original scale:\n\n```julia\n@load RidgeRegressor pkg=MLJLinearModels\nmodel = RidgeRegressor()\ntmodel = TransformedTargetModel(model, transformer=Standardizer())\n```\n\nA model that applies a static `log` transformation to the data, again returning predictions to the original scale:\n\n```julia\ntmodel2 = TransformedTargetModel(model, transformer=y->log.(y), inverse=z->exp.(y))\n```\n" -":name" = "TransformedTargetModelInterval" -":human_name" = "transformed target model interval" +":docstring" = "```\nUnion{Types...}\n```\n\nA type union is an abstract type which includes all instances of any of its argument types. The empty union [`Union{}`](@ref) is the bottom type of Julia.\n\n# Examples\n\n```jldoctest\njulia> IntOrString = Union{Int,AbstractString}\nUnion{Int64, AbstractString}\n\njulia> 1 isa IntOrString\ntrue\n\njulia> \"Hello!\" isa IntOrString\ntrue\n\njulia> 1.0 isa IntOrString\nfalse\n```\n" +":name" = "Stack" +":human_name" = "probabilistic stack" ":is_supervised" = "`true`" -":prediction_type" = ":interval" -":abstract_type" = "`MLJModelInterface.Interval`" -":implemented_methods" = [] -":hyperparameters" = "`(:model, :transformer, :inverse, :cache)`" -":hyperparameter_types" = "`(\"MLJModelInterface.Interval\", \"Any\", \"Any\", \"Any\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" -":constructor" = "`TransformedTargetModel`" - -[MLJBase.TransformedTargetModelDeterministicUnsupervisedDetector] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":predict_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_pure_julia" = "`false`" -":package_name" = "MLJBase" -":package_license" = "MIT" -":load_path" = "MLJBase.TransformedTargetModel" -":package_uuid" = "a7f614a8-145f-11e9-1d2a-a57a1082229d" -":package_url" = "https://github.com/JuliaAI/MLJBase.jl" -":is_wrapper" = "`true`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = "```\nTransformedTargetModel(model; transformer=nothing, inverse=nothing, cache=true)\n```\n\nWrap the supervised or semi-supervised `model` in a transformation of the target variable.\n\nHere `transformer` one of the following:\n\n * The `Unsupervised` model that is to transform the training target. By default (`inverse=nothing`) the parameters learned by this transformer are also used to inverse-transform the predictions of `model`, which means `transformer` must implement the `inverse_transform` method. If this is not the case, specify `inverse=identity` to suppress inversion.\n * A callable object for transforming the target, such as `y -> log.(y)`. In this case a callable `inverse`, such as `z -> exp.(z)`, should be specified.\n\nSpecify `cache=false` to prioritize memory over speed, or to guarantee data anonymity.\n\nSpecify `inverse=identity` if `model` is a probabilistic predictor, as inverse-transforming sample spaces is not supported. Alternatively, replace `model` with a deterministic model, such as `Pipeline(model, y -> mode.(y))`.\n\n### Examples\n\nA model that normalizes the target before applying ridge regression, with predictions returned on the original scale:\n\n```julia\n@load RidgeRegressor pkg=MLJLinearModels\nmodel = RidgeRegressor()\ntmodel = TransformedTargetModel(model, transformer=Standardizer())\n```\n\nA model that applies a static `log` transformation to the data, again returning predictions to the original scale:\n\n```julia\ntmodel2 = TransformedTargetModel(model, transformer=y->log.(y), inverse=z->exp.(y))\n```\n" -":name" = "TransformedTargetModelDeterministicUnsupervisedDetector" -":human_name" = "transformed target model deterministic unsupervised detector" -":is_supervised" = "`false`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.DeterministicUnsupervisedDetector`" +":prediction_type" = ":probabilistic" +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [] -":hyperparameters" = "`(:model, :transformer, :inverse, :cache)`" -":hyperparameter_types" = "`(\"MLJModelInterface.DeterministicUnsupervisedDetector\", \"Any\", \"Any\", \"Any\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":hyperparameters" = "`(:models, :metalearner, :resampling, :measures, :cache, :acceleration)`" +":hyperparameter_types" = "`(\"Vector{MLJModelInterface.Supervised}\", \"MLJModelInterface.Probabilistic\", \"Any\", \"Union{Nothing, AbstractVector}\", \"Bool\", \"ComputationalResources.AbstractResource\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" -":constructor" = "`TransformedTargetModel`" +":constructor" = "`MLJBase.Stack`" -[MLJBase.TransformedTargetModelProbabilistic] +[MLJBase.TransformedTargetModel] ":input_scitype" = "`ScientificTypesBase.Unknown`" ":output_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" @@ -4673,7 +4568,7 @@ ":supports_class_weights" = "`false`" ":supports_online" = "`false`" ":docstring" = "```\nTransformedTargetModel(model; transformer=nothing, inverse=nothing, cache=true)\n```\n\nWrap the supervised or semi-supervised `model` in a transformation of the target variable.\n\nHere `transformer` one of the following:\n\n * The `Unsupervised` model that is to transform the training target. By default (`inverse=nothing`) the parameters learned by this transformer are also used to inverse-transform the predictions of `model`, which means `transformer` must implement the `inverse_transform` method. If this is not the case, specify `inverse=identity` to suppress inversion.\n * A callable object for transforming the target, such as `y -> log.(y)`. In this case a callable `inverse`, such as `z -> exp.(z)`, should be specified.\n\nSpecify `cache=false` to prioritize memory over speed, or to guarantee data anonymity.\n\nSpecify `inverse=identity` if `model` is a probabilistic predictor, as inverse-transforming sample spaces is not supported. Alternatively, replace `model` with a deterministic model, such as `Pipeline(model, y -> mode.(y))`.\n\n### Examples\n\nA model that normalizes the target before applying ridge regression, with predictions returned on the original scale:\n\n```julia\n@load RidgeRegressor pkg=MLJLinearModels\nmodel = RidgeRegressor()\ntmodel = TransformedTargetModel(model, transformer=Standardizer())\n```\n\nA model that applies a static `log` transformation to the data, again returning predictions to the original scale:\n\n```julia\ntmodel2 = TransformedTargetModel(model, transformer=y->log.(y), inverse=z->exp.(y))\n```\n" -":name" = "TransformedTargetModelProbabilistic" +":name" = "TransformedTargetModel" ":human_name" = "transformed target model probabilistic" ":is_supervised" = "`true`" ":prediction_type" = ":probabilistic" @@ -4689,390 +4584,75 @@ ":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" ":constructor" = "`TransformedTargetModel`" -[MLJBase.StaticPipeline] -":input_scitype" = "`ScientificTypesBase.Unknown`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" +[MultivariateStats.LDA] +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_pure_julia" = "`false`" -":package_name" = "MLJBase" -":package_license" = "unknown" -":load_path" = "MLJBase.Pipeline" -":package_uuid" = "unknown" -":package_url" = "unknown" -":is_wrapper" = "`true`" +":is_pure_julia" = "`true`" +":package_name" = "MultivariateStats" +":package_license" = "MIT" +":load_path" = "MLJMultivariateStatsInterface.LDA" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":is_wrapper" = "`false`" ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nPipeline(component1, component2, ... , componentk; options...)\nPipeline(name1=component1, name2=component2, ..., namek=componentk; options...)\ncomponent1 |> component2 |> ... |> componentk\n```\n\nCreate an instance of a composite model type which sequentially composes the specified components in order. This means `component1` receives inputs, whose output is passed to `component2`, and so forth. A \"component\" is either a `Model` instance, a model type (converted immediately to its default instance) or any callable object. Here the \"output\" of a model is what `predict` returns if it is `Supervised`, or what `transform` returns if it is `Unsupervised`.\n\nNames for the component fields are automatically generated unless explicitly specified, as in\n\n```\nPipeline(encoder=ContinuousEncoder(drop_last=false),\n stand=Standardizer())\n```\n\nThe `Pipeline` constructor accepts keyword `options` discussed further below.\n\nOrdinary functions (and other callables) may be inserted in the pipeline as shown in the following example:\n\n```\nPipeline(X->coerce(X, :age=>Continuous), OneHotEncoder, ConstantClassifier)\n```\n\n### Syntactic sugar\n\nThe `|>` operator is overloaded to construct pipelines out of models, callables, and existing pipelines:\n\n```julia\nLinearRegressor = @load LinearRegressor pkg=MLJLinearModels add=true\nPCA = @load PCA pkg=MultivariateStats add=true\n\npipe1 = MLJBase.table |> ContinuousEncoder |> Standardizer\npipe2 = PCA |> LinearRegressor\npipe1 |> pipe2\n```\n\nAt most one of the components may be a supervised model, but this model can appear in any position. A pipeline with a `Supervised` component is itself `Supervised` and implements the `predict` operation. It is otherwise `Unsupervised` (possibly `Static`) and implements `transform`.\n\n### Special operations\n\nIf all the `components` are invertible unsupervised models (ie, implement `inverse_transform`) then `inverse_transform` is implemented for the pipeline. If there are no supervised models, then `predict` is nevertheless implemented, assuming the last component is a model that implements it (some clustering models). Similarly, calling `transform` on a supervised pipeline calls `transform` on the supervised component.\n\n### Optional key-word arguments\n\n * `prediction_type` - prediction type of the pipeline; possible values: `:deterministic`, `:probabilistic`, `:interval` (default=`:deterministic` if not inferable)\n * `operation` - operation applied to the supervised component model, when present; possible values: `predict`, `predict_mean`, `predict_median`, `predict_mode` (default=`predict`)\n * `cache` - whether the internal machines created for component models should cache model-specific representations of data (see [`machine`](@ref)) (default=`true`)\n\n!!! warning\n Set `cache=false` to guarantee data anonymization.\n\n\nTo build more complicated non-branching pipelines, refer to the MLJ manual sections on composing models.\n" -":name" = "StaticPipeline" -":human_name" = "static pipeline" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Static`" -":implemented_methods" = [] -":hyperparameters" = "`(:named_components, :cache)`" -":hyperparameter_types" = "`(\"NamedTuple\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing)`" +":docstring" = "```\nLDA\n```\n\nA model type for constructing a linear discriminant analysis model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLDA = @load LDA pkg=MultivariateStats\n```\n\nDo `model = LDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LDA(method=...)`.\n\n[Multiclass linear discriminant analysis](https://en.wikipedia.org/wiki/Linear_discriminant_analysis) learns a projection in a space of features to a lower dimensional space, in a way that attempts to preserve as much as possible the degree to which the classes of a discrete target variable can be discriminated. This can be used either for dimension reduction of the features (see `transform` below) or for probabilistic classification of the target (see `predict` below).\n\nIn the case of prediction, the class probability for a new observation reflects the proximity of that observation to training observations associated with that class, and how far away the observation is from observations associated with other classes. Specifically, the distances, in the transformed (projected) space, of a new observation, from the centroid of each target class, is computed; the resulting vector of distances, multiplied by minus one, is passed to a softmax function to obtain a class probability prediction. Here \"distance\" is computed using a user-specified distance function.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `method::Symbol=:gevd`: The solver, one of `:gevd` or `:whiten` methods.\n * `cov_w::StatsBase.SimpleCovariance()`: An estimator for the within-class covariance (used in computing the within-class scatter matrix, `Sw`). Any robust estimator from `CovarianceEstimation.jl` can be used.\n * `cov_b::StatsBase.SimpleCovariance()`: The same as `cov_w` but for the between-class covariance (used in computing the between-class scatter matrix, `Sb`).\n * `outdim::Int=0`: The output dimension, i.e dimension of the transformed space, automatically set to `min(indim, nclasses-1)` if equal to 0.\n * `regcoef::Float64=1e-6`: The regularization coefficient. A positive value `regcoef*eigmax(Sw)` where `Sw` is the within-class scatter matrix, is added to the diagonal of `Sw` to improve numerical stability. This can be useful if using the standard covariance estimator.\n * `dist=Distances.SqEuclidean()`: The distance metric to use when performing classification (to compare the distance between a new point and centroids in the transformed space); must be a subtype of `Distances.SemiMetric` from Distances.jl, e.g., `Distances.CosineDist`.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `classes`: The classes seen during model fitting.\n * `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where `indim` and `outdim` are the input and output dimensions respectively (See Report section below).\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: The dimension of the input space i.e the number of training features.\n * `outdim`: The dimension of the transformed space the model is projected to.\n * `mean`: The mean of the untransformed training data. A vector of length `indim`.\n * `nclasses`: The number of classes directly observed in the training data (which can be less than the total number of classes in the class pool).\n * `class_means`: The class-specific means of the training data. A matrix of size `(indim, nclasses)` with the ith column being the class-mean of the ith class in `classes` (See fitted params section above).\n * `class_weights`: The weights (class counts) of each class. A vector of length `nclasses` with the ith element being the class weight of the ith class in `classes`. (See fitted params section above.)\n * `Sb`: The between class scatter matrix.\n * `Sw`: The within class scatter matrix.\n\n# Examples\n\n```\nusing MLJ\n\nLDA = @load LDA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = LDA()\nmach = machine(model, X, y) |> fit!\n\nXproj = transform(mach, X)\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n\n```\n\nSee also [`BayesianLDA`](@ref), [`SubspaceLDA`](@ref), [`BayesianSubspaceLDA`](@ref)\n" +":name" = "LDA" +":human_name" = "linear discriminant analysis model" +":is_supervised" = "`true`" +":prediction_type" = ":probabilistic" +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":transform"] +":hyperparameters" = "`(:method, :cov_w, :cov_b, :outdim, :regcoef, :dist)`" +":hyperparameter_types" = "`(\"Symbol\", \"StatsBase.CovarianceEstimator\", \"StatsBase.CovarianceEstimator\", \"Int64\", \"Float64\", \"Distances.SemiMetric\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" -":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" -":constructor" = "`Pipeline`" +":reporting_operations" = "`()`" +":constructor" = "`nothing`" -[MLJBase.TransformedTargetModelProbabilisticSupervisedDetector] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +[MultivariateStats.MultitargetLinearRegressor] +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":predict_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_pure_julia" = "`false`" -":package_name" = "MLJBase" +":is_pure_julia" = "`true`" +":package_name" = "MultivariateStats" ":package_license" = "MIT" -":load_path" = "MLJBase.TransformedTargetModel" -":package_uuid" = "a7f614a8-145f-11e9-1d2a-a57a1082229d" -":package_url" = "https://github.com/JuliaAI/MLJBase.jl" -":is_wrapper" = "`true`" +":load_path" = "MLJMultivariateStatsInterface.MultitargetLinearRegressor" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":is_wrapper" = "`false`" ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nTransformedTargetModel(model; transformer=nothing, inverse=nothing, cache=true)\n```\n\nWrap the supervised or semi-supervised `model` in a transformation of the target variable.\n\nHere `transformer` one of the following:\n\n * The `Unsupervised` model that is to transform the training target. By default (`inverse=nothing`) the parameters learned by this transformer are also used to inverse-transform the predictions of `model`, which means `transformer` must implement the `inverse_transform` method. If this is not the case, specify `inverse=identity` to suppress inversion.\n * A callable object for transforming the target, such as `y -> log.(y)`. In this case a callable `inverse`, such as `z -> exp.(z)`, should be specified.\n\nSpecify `cache=false` to prioritize memory over speed, or to guarantee data anonymity.\n\nSpecify `inverse=identity` if `model` is a probabilistic predictor, as inverse-transforming sample spaces is not supported. Alternatively, replace `model` with a deterministic model, such as `Pipeline(model, y -> mode.(y))`.\n\n### Examples\n\nA model that normalizes the target before applying ridge regression, with predictions returned on the original scale:\n\n```julia\n@load RidgeRegressor pkg=MLJLinearModels\nmodel = RidgeRegressor()\ntmodel = TransformedTargetModel(model, transformer=Standardizer())\n```\n\nA model that applies a static `log` transformation to the data, again returning predictions to the original scale:\n\n```julia\ntmodel2 = TransformedTargetModel(model, transformer=y->log.(y), inverse=z->exp.(y))\n```\n" -":name" = "TransformedTargetModelProbabilisticSupervisedDetector" -":human_name" = "transformed target model probabilistic supervised detector" +":docstring" = "```\nMultitargetLinearRegressor\n```\n\nA model type for constructing a multitarget linear regressor, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultitargetLinearRegressor = @load MultitargetLinearRegressor pkg=MultivariateStats\n```\n\nDo `model = MultitargetLinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetLinearRegressor(bias=...)`.\n\n`MultitargetLinearRegressor` assumes the target variable is vector-valued with continuous components. It trains a linear prediction function using the least squares algorithm. Options exist to specify a bias term.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any table of responses whose element scitype is `Continuous`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `bias=true`: Include the bias term if true, otherwise fit without bias term.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coefficients`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Examples\n\n```\nusing MLJ\nusing DataFrames\n\nLinearRegressor = @load MultitargetLinearRegressor pkg=MultivariateStats\nlinear_regressor = LinearRegressor()\n\nX, y = make_regression(100, 9; n_targets = 2) # a table and a table (synthetic data)\n\nmach = machine(linear_regressor, X, y) |> fit!\n\nXnew, _ = make_regression(3, 9)\nyhat = predict(mach, Xnew) # new predictions\n```\n\nSee also [`LinearRegressor`](@ref), [`RidgeRegressor`](@ref), [`MultitargetRidgeRegressor`](@ref)\n" +":name" = "MultitargetLinearRegressor" +":human_name" = "multitarget linear regressor" ":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.ProbabilisticSupervisedDetector`" -":implemented_methods" = [] -":hyperparameters" = "`(:model, :transformer, :inverse, :cache)`" -":hyperparameter_types" = "`(\"MLJModelInterface.ProbabilisticSupervisedDetector\", \"Any\", \"Any\", \"Any\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":prediction_type" = ":deterministic" +":abstract_type" = "`MLJModelInterface.Deterministic`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":hyperparameters" = "`(:bias,)`" +":hyperparameter_types" = "`(\"Bool\",)`" +":hyperparameter_ranges" = "`(nothing,)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" -":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" -":constructor" = "`TransformedTargetModel`" - -[MLJBase.ProbabilisticPipeline] -":input_scitype" = "`ScientificTypesBase.Unknown`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_pure_julia" = "`false`" -":package_name" = "MLJBase" -":package_license" = "unknown" -":load_path" = "MLJBase.Pipeline" -":package_uuid" = "unknown" -":package_url" = "unknown" -":is_wrapper" = "`true`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = "```\nPipeline(component1, component2, ... , componentk; options...)\nPipeline(name1=component1, name2=component2, ..., namek=componentk; options...)\ncomponent1 |> component2 |> ... |> componentk\n```\n\nCreate an instance of a composite model type which sequentially composes the specified components in order. This means `component1` receives inputs, whose output is passed to `component2`, and so forth. A \"component\" is either a `Model` instance, a model type (converted immediately to its default instance) or any callable object. Here the \"output\" of a model is what `predict` returns if it is `Supervised`, or what `transform` returns if it is `Unsupervised`.\n\nNames for the component fields are automatically generated unless explicitly specified, as in\n\n```\nPipeline(encoder=ContinuousEncoder(drop_last=false),\n stand=Standardizer())\n```\n\nThe `Pipeline` constructor accepts keyword `options` discussed further below.\n\nOrdinary functions (and other callables) may be inserted in the pipeline as shown in the following example:\n\n```\nPipeline(X->coerce(X, :age=>Continuous), OneHotEncoder, ConstantClassifier)\n```\n\n### Syntactic sugar\n\nThe `|>` operator is overloaded to construct pipelines out of models, callables, and existing pipelines:\n\n```julia\nLinearRegressor = @load LinearRegressor pkg=MLJLinearModels add=true\nPCA = @load PCA pkg=MultivariateStats add=true\n\npipe1 = MLJBase.table |> ContinuousEncoder |> Standardizer\npipe2 = PCA |> LinearRegressor\npipe1 |> pipe2\n```\n\nAt most one of the components may be a supervised model, but this model can appear in any position. A pipeline with a `Supervised` component is itself `Supervised` and implements the `predict` operation. It is otherwise `Unsupervised` (possibly `Static`) and implements `transform`.\n\n### Special operations\n\nIf all the `components` are invertible unsupervised models (ie, implement `inverse_transform`) then `inverse_transform` is implemented for the pipeline. If there are no supervised models, then `predict` is nevertheless implemented, assuming the last component is a model that implements it (some clustering models). Similarly, calling `transform` on a supervised pipeline calls `transform` on the supervised component.\n\n### Optional key-word arguments\n\n * `prediction_type` - prediction type of the pipeline; possible values: `:deterministic`, `:probabilistic`, `:interval` (default=`:deterministic` if not inferable)\n * `operation` - operation applied to the supervised component model, when present; possible values: `predict`, `predict_mean`, `predict_median`, `predict_mode` (default=`predict`)\n * `cache` - whether the internal machines created for component models should cache model-specific representations of data (see [`machine`](@ref)) (default=`true`)\n\n!!! warning\n Set `cache=false` to guarantee data anonymization.\n\n\nTo build more complicated non-branching pipelines, refer to the MLJ manual sections on composing models.\n" -":name" = "ProbabilisticPipeline" -":human_name" = "probabilistic pipeline" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [] -":hyperparameters" = "`(:named_components, :cache)`" -":hyperparameter_types" = "`(\"NamedTuple\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" -":constructor" = "`Pipeline`" - -[MLJBase.DeterministicPipeline] -":input_scitype" = "`ScientificTypesBase.Unknown`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_pure_julia" = "`false`" -":package_name" = "MLJBase" -":package_license" = "unknown" -":load_path" = "MLJBase.Pipeline" -":package_uuid" = "unknown" -":package_url" = "unknown" -":is_wrapper" = "`true`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = "```\nPipeline(component1, component2, ... , componentk; options...)\nPipeline(name1=component1, name2=component2, ..., namek=componentk; options...)\ncomponent1 |> component2 |> ... |> componentk\n```\n\nCreate an instance of a composite model type which sequentially composes the specified components in order. This means `component1` receives inputs, whose output is passed to `component2`, and so forth. A \"component\" is either a `Model` instance, a model type (converted immediately to its default instance) or any callable object. Here the \"output\" of a model is what `predict` returns if it is `Supervised`, or what `transform` returns if it is `Unsupervised`.\n\nNames for the component fields are automatically generated unless explicitly specified, as in\n\n```\nPipeline(encoder=ContinuousEncoder(drop_last=false),\n stand=Standardizer())\n```\n\nThe `Pipeline` constructor accepts keyword `options` discussed further below.\n\nOrdinary functions (and other callables) may be inserted in the pipeline as shown in the following example:\n\n```\nPipeline(X->coerce(X, :age=>Continuous), OneHotEncoder, ConstantClassifier)\n```\n\n### Syntactic sugar\n\nThe `|>` operator is overloaded to construct pipelines out of models, callables, and existing pipelines:\n\n```julia\nLinearRegressor = @load LinearRegressor pkg=MLJLinearModels add=true\nPCA = @load PCA pkg=MultivariateStats add=true\n\npipe1 = MLJBase.table |> ContinuousEncoder |> Standardizer\npipe2 = PCA |> LinearRegressor\npipe1 |> pipe2\n```\n\nAt most one of the components may be a supervised model, but this model can appear in any position. A pipeline with a `Supervised` component is itself `Supervised` and implements the `predict` operation. It is otherwise `Unsupervised` (possibly `Static`) and implements `transform`.\n\n### Special operations\n\nIf all the `components` are invertible unsupervised models (ie, implement `inverse_transform`) then `inverse_transform` is implemented for the pipeline. If there are no supervised models, then `predict` is nevertheless implemented, assuming the last component is a model that implements it (some clustering models). Similarly, calling `transform` on a supervised pipeline calls `transform` on the supervised component.\n\n### Optional key-word arguments\n\n * `prediction_type` - prediction type of the pipeline; possible values: `:deterministic`, `:probabilistic`, `:interval` (default=`:deterministic` if not inferable)\n * `operation` - operation applied to the supervised component model, when present; possible values: `predict`, `predict_mean`, `predict_median`, `predict_mode` (default=`predict`)\n * `cache` - whether the internal machines created for component models should cache model-specific representations of data (see [`machine`](@ref)) (default=`true`)\n\n!!! warning\n Set `cache=false` to guarantee data anonymization.\n\n\nTo build more complicated non-branching pipelines, refer to the MLJ manual sections on composing models.\n" -":name" = "DeterministicPipeline" -":human_name" = "deterministic pipeline" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [] -":hyperparameters" = "`(:named_components, :cache)`" -":hyperparameter_types" = "`(\"NamedTuple\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" -":constructor" = "`Pipeline`" - -[MLJBase.TransformedTargetModelDeterministic] -":input_scitype" = "`ScientificTypesBase.Unknown`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_pure_julia" = "`false`" -":package_name" = "MLJBase" -":package_license" = "MIT" -":load_path" = "MLJBase.TransformedTargetModel" -":package_uuid" = "a7f614a8-145f-11e9-1d2a-a57a1082229d" -":package_url" = "https://github.com/JuliaAI/MLJBase.jl" -":is_wrapper" = "`true`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = "```\nTransformedTargetModel(model; transformer=nothing, inverse=nothing, cache=true)\n```\n\nWrap the supervised or semi-supervised `model` in a transformation of the target variable.\n\nHere `transformer` one of the following:\n\n * The `Unsupervised` model that is to transform the training target. By default (`inverse=nothing`) the parameters learned by this transformer are also used to inverse-transform the predictions of `model`, which means `transformer` must implement the `inverse_transform` method. If this is not the case, specify `inverse=identity` to suppress inversion.\n * A callable object for transforming the target, such as `y -> log.(y)`. In this case a callable `inverse`, such as `z -> exp.(z)`, should be specified.\n\nSpecify `cache=false` to prioritize memory over speed, or to guarantee data anonymity.\n\nSpecify `inverse=identity` if `model` is a probabilistic predictor, as inverse-transforming sample spaces is not supported. Alternatively, replace `model` with a deterministic model, such as `Pipeline(model, y -> mode.(y))`.\n\n### Examples\n\nA model that normalizes the target before applying ridge regression, with predictions returned on the original scale:\n\n```julia\n@load RidgeRegressor pkg=MLJLinearModels\nmodel = RidgeRegressor()\ntmodel = TransformedTargetModel(model, transformer=Standardizer())\n```\n\nA model that applies a static `log` transformation to the data, again returning predictions to the original scale:\n\n```julia\ntmodel2 = TransformedTargetModel(model, transformer=y->log.(y), inverse=z->exp.(y))\n```\n" -":name" = "TransformedTargetModelDeterministic" -":human_name" = "transformed target model deterministic" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [] -":hyperparameters" = "`(:model, :transformer, :inverse, :cache)`" -":hyperparameter_types" = "`(\"MLJModelInterface.Deterministic\", \"Any\", \"Any\", \"Any\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" -":constructor" = "`TransformedTargetModel`" - -[MLJBase.IntervalPipeline] -":input_scitype" = "`ScientificTypesBase.Unknown`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_pure_julia" = "`false`" -":package_name" = "MLJBase" -":package_license" = "unknown" -":load_path" = "MLJBase.Pipeline" -":package_uuid" = "unknown" -":package_url" = "unknown" -":is_wrapper" = "`true`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = "```\nPipeline(component1, component2, ... , componentk; options...)\nPipeline(name1=component1, name2=component2, ..., namek=componentk; options...)\ncomponent1 |> component2 |> ... |> componentk\n```\n\nCreate an instance of a composite model type which sequentially composes the specified components in order. This means `component1` receives inputs, whose output is passed to `component2`, and so forth. A \"component\" is either a `Model` instance, a model type (converted immediately to its default instance) or any callable object. Here the \"output\" of a model is what `predict` returns if it is `Supervised`, or what `transform` returns if it is `Unsupervised`.\n\nNames for the component fields are automatically generated unless explicitly specified, as in\n\n```\nPipeline(encoder=ContinuousEncoder(drop_last=false),\n stand=Standardizer())\n```\n\nThe `Pipeline` constructor accepts keyword `options` discussed further below.\n\nOrdinary functions (and other callables) may be inserted in the pipeline as shown in the following example:\n\n```\nPipeline(X->coerce(X, :age=>Continuous), OneHotEncoder, ConstantClassifier)\n```\n\n### Syntactic sugar\n\nThe `|>` operator is overloaded to construct pipelines out of models, callables, and existing pipelines:\n\n```julia\nLinearRegressor = @load LinearRegressor pkg=MLJLinearModels add=true\nPCA = @load PCA pkg=MultivariateStats add=true\n\npipe1 = MLJBase.table |> ContinuousEncoder |> Standardizer\npipe2 = PCA |> LinearRegressor\npipe1 |> pipe2\n```\n\nAt most one of the components may be a supervised model, but this model can appear in any position. A pipeline with a `Supervised` component is itself `Supervised` and implements the `predict` operation. It is otherwise `Unsupervised` (possibly `Static`) and implements `transform`.\n\n### Special operations\n\nIf all the `components` are invertible unsupervised models (ie, implement `inverse_transform`) then `inverse_transform` is implemented for the pipeline. If there are no supervised models, then `predict` is nevertheless implemented, assuming the last component is a model that implements it (some clustering models). Similarly, calling `transform` on a supervised pipeline calls `transform` on the supervised component.\n\n### Optional key-word arguments\n\n * `prediction_type` - prediction type of the pipeline; possible values: `:deterministic`, `:probabilistic`, `:interval` (default=`:deterministic` if not inferable)\n * `operation` - operation applied to the supervised component model, when present; possible values: `predict`, `predict_mean`, `predict_median`, `predict_mode` (default=`predict`)\n * `cache` - whether the internal machines created for component models should cache model-specific representations of data (see [`machine`](@ref)) (default=`true`)\n\n!!! warning\n Set `cache=false` to guarantee data anonymization.\n\n\nTo build more complicated non-branching pipelines, refer to the MLJ manual sections on composing models.\n" -":name" = "IntervalPipeline" -":human_name" = "interval pipeline" -":is_supervised" = "`true`" -":prediction_type" = ":interval" -":abstract_type" = "`MLJModelInterface.Interval`" -":implemented_methods" = [] -":hyperparameters" = "`(:named_components, :cache)`" -":hyperparameter_types" = "`(\"NamedTuple\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" -":constructor" = "`Pipeline`" - -[MLJBase.UnsupervisedPipeline] -":input_scitype" = "`ScientificTypesBase.Unknown`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_pure_julia" = "`false`" -":package_name" = "MLJBase" -":package_license" = "unknown" -":load_path" = "MLJBase.Pipeline" -":package_uuid" = "unknown" -":package_url" = "unknown" -":is_wrapper" = "`true`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = "```\nPipeline(component1, component2, ... , componentk; options...)\nPipeline(name1=component1, name2=component2, ..., namek=componentk; options...)\ncomponent1 |> component2 |> ... |> componentk\n```\n\nCreate an instance of a composite model type which sequentially composes the specified components in order. This means `component1` receives inputs, whose output is passed to `component2`, and so forth. A \"component\" is either a `Model` instance, a model type (converted immediately to its default instance) or any callable object. Here the \"output\" of a model is what `predict` returns if it is `Supervised`, or what `transform` returns if it is `Unsupervised`.\n\nNames for the component fields are automatically generated unless explicitly specified, as in\n\n```\nPipeline(encoder=ContinuousEncoder(drop_last=false),\n stand=Standardizer())\n```\n\nThe `Pipeline` constructor accepts keyword `options` discussed further below.\n\nOrdinary functions (and other callables) may be inserted in the pipeline as shown in the following example:\n\n```\nPipeline(X->coerce(X, :age=>Continuous), OneHotEncoder, ConstantClassifier)\n```\n\n### Syntactic sugar\n\nThe `|>` operator is overloaded to construct pipelines out of models, callables, and existing pipelines:\n\n```julia\nLinearRegressor = @load LinearRegressor pkg=MLJLinearModels add=true\nPCA = @load PCA pkg=MultivariateStats add=true\n\npipe1 = MLJBase.table |> ContinuousEncoder |> Standardizer\npipe2 = PCA |> LinearRegressor\npipe1 |> pipe2\n```\n\nAt most one of the components may be a supervised model, but this model can appear in any position. A pipeline with a `Supervised` component is itself `Supervised` and implements the `predict` operation. It is otherwise `Unsupervised` (possibly `Static`) and implements `transform`.\n\n### Special operations\n\nIf all the `components` are invertible unsupervised models (ie, implement `inverse_transform`) then `inverse_transform` is implemented for the pipeline. If there are no supervised models, then `predict` is nevertheless implemented, assuming the last component is a model that implements it (some clustering models). Similarly, calling `transform` on a supervised pipeline calls `transform` on the supervised component.\n\n### Optional key-word arguments\n\n * `prediction_type` - prediction type of the pipeline; possible values: `:deterministic`, `:probabilistic`, `:interval` (default=`:deterministic` if not inferable)\n * `operation` - operation applied to the supervised component model, when present; possible values: `predict`, `predict_mean`, `predict_median`, `predict_mode` (default=`predict`)\n * `cache` - whether the internal machines created for component models should cache model-specific representations of data (see [`machine`](@ref)) (default=`true`)\n\n!!! warning\n Set `cache=false` to guarantee data anonymization.\n\n\nTo build more complicated non-branching pipelines, refer to the MLJ manual sections on composing models.\n" -":name" = "UnsupervisedPipeline" -":human_name" = "unsupervised pipeline" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":implemented_methods" = [] -":hyperparameters" = "`(:named_components, :cache)`" -":hyperparameter_types" = "`(\"NamedTuple\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" -":constructor" = "`Pipeline`" - -[MLJBase.TransformedTargetModelProbabilisticUnsupervisedDetector] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_pure_julia" = "`false`" -":package_name" = "MLJBase" -":package_license" = "MIT" -":load_path" = "MLJBase.TransformedTargetModel" -":package_uuid" = "a7f614a8-145f-11e9-1d2a-a57a1082229d" -":package_url" = "https://github.com/JuliaAI/MLJBase.jl" -":is_wrapper" = "`true`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = "```\nTransformedTargetModel(model; transformer=nothing, inverse=nothing, cache=true)\n```\n\nWrap the supervised or semi-supervised `model` in a transformation of the target variable.\n\nHere `transformer` one of the following:\n\n * The `Unsupervised` model that is to transform the training target. By default (`inverse=nothing`) the parameters learned by this transformer are also used to inverse-transform the predictions of `model`, which means `transformer` must implement the `inverse_transform` method. If this is not the case, specify `inverse=identity` to suppress inversion.\n * A callable object for transforming the target, such as `y -> log.(y)`. In this case a callable `inverse`, such as `z -> exp.(z)`, should be specified.\n\nSpecify `cache=false` to prioritize memory over speed, or to guarantee data anonymity.\n\nSpecify `inverse=identity` if `model` is a probabilistic predictor, as inverse-transforming sample spaces is not supported. Alternatively, replace `model` with a deterministic model, such as `Pipeline(model, y -> mode.(y))`.\n\n### Examples\n\nA model that normalizes the target before applying ridge regression, with predictions returned on the original scale:\n\n```julia\n@load RidgeRegressor pkg=MLJLinearModels\nmodel = RidgeRegressor()\ntmodel = TransformedTargetModel(model, transformer=Standardizer())\n```\n\nA model that applies a static `log` transformation to the data, again returning predictions to the original scale:\n\n```julia\ntmodel2 = TransformedTargetModel(model, transformer=y->log.(y), inverse=z->exp.(y))\n```\n" -":name" = "TransformedTargetModelProbabilisticUnsupervisedDetector" -":human_name" = "transformed target model probabilistic unsupervised detector" -":is_supervised" = "`false`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.ProbabilisticUnsupervisedDetector`" -":implemented_methods" = [] -":hyperparameters" = "`(:model, :transformer, :inverse, :cache)`" -":hyperparameter_types" = "`(\"MLJModelInterface.ProbabilisticUnsupervisedDetector\", \"Any\", \"Any\", \"Any\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" -":constructor" = "`TransformedTargetModel`" - -[MLJBase.TransformedTargetModelDeterministicSupervisedDetector] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}`" -":predict_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_pure_julia" = "`false`" -":package_name" = "MLJBase" -":package_license" = "MIT" -":load_path" = "MLJBase.TransformedTargetModel" -":package_uuid" = "a7f614a8-145f-11e9-1d2a-a57a1082229d" -":package_url" = "https://github.com/JuliaAI/MLJBase.jl" -":is_wrapper" = "`true`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = "```\nTransformedTargetModel(model; transformer=nothing, inverse=nothing, cache=true)\n```\n\nWrap the supervised or semi-supervised `model` in a transformation of the target variable.\n\nHere `transformer` one of the following:\n\n * The `Unsupervised` model that is to transform the training target. By default (`inverse=nothing`) the parameters learned by this transformer are also used to inverse-transform the predictions of `model`, which means `transformer` must implement the `inverse_transform` method. If this is not the case, specify `inverse=identity` to suppress inversion.\n * A callable object for transforming the target, such as `y -> log.(y)`. In this case a callable `inverse`, such as `z -> exp.(z)`, should be specified.\n\nSpecify `cache=false` to prioritize memory over speed, or to guarantee data anonymity.\n\nSpecify `inverse=identity` if `model` is a probabilistic predictor, as inverse-transforming sample spaces is not supported. Alternatively, replace `model` with a deterministic model, such as `Pipeline(model, y -> mode.(y))`.\n\n### Examples\n\nA model that normalizes the target before applying ridge regression, with predictions returned on the original scale:\n\n```julia\n@load RidgeRegressor pkg=MLJLinearModels\nmodel = RidgeRegressor()\ntmodel = TransformedTargetModel(model, transformer=Standardizer())\n```\n\nA model that applies a static `log` transformation to the data, again returning predictions to the original scale:\n\n```julia\ntmodel2 = TransformedTargetModel(model, transformer=y->log.(y), inverse=z->exp.(y))\n```\n" -":name" = "TransformedTargetModelDeterministicSupervisedDetector" -":human_name" = "transformed target model deterministic supervised detector" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.DeterministicSupervisedDetector`" -":implemented_methods" = [] -":hyperparameters" = "`(:model, :transformer, :inverse, :cache)`" -":hyperparameter_types" = "`(\"MLJModelInterface.DeterministicSupervisedDetector\", \"Any\", \"Any\", \"Any\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" -":constructor" = "`TransformedTargetModel`" - -[MultivariateStats.LDA] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_pure_julia" = "`true`" -":package_name" = "MultivariateStats" -":package_license" = "MIT" -":load_path" = "MLJMultivariateStatsInterface.LDA" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = "```\nLDA\n```\n\nA model type for constructing a linear discriminant analysis model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLDA = @load LDA pkg=MultivariateStats\n```\n\nDo `model = LDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LDA(method=...)`.\n\n[Multiclass linear discriminant analysis](https://en.wikipedia.org/wiki/Linear_discriminant_analysis) learns a projection in a space of features to a lower dimensional space, in a way that attempts to preserve as much as possible the degree to which the classes of a discrete target variable can be discriminated. This can be used either for dimension reduction of the features (see `transform` below) or for probabilistic classification of the target (see `predict` below).\n\nIn the case of prediction, the class probability for a new observation reflects the proximity of that observation to training observations associated with that class, and how far away the observation is from observations associated with other classes. Specifically, the distances, in the transformed (projected) space, of a new observation, from the centroid of each target class, is computed; the resulting vector of distances, multiplied by minus one, is passed to a softmax function to obtain a class probability prediction. Here \"distance\" is computed using a user-specified distance function.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `method::Symbol=:gevd`: The solver, one of `:gevd` or `:whiten` methods.\n * `cov_w::StatsBase.SimpleCovariance()`: An estimator for the within-class covariance (used in computing the within-class scatter matrix, `Sw`). Any robust estimator from `CovarianceEstimation.jl` can be used.\n * `cov_b::StatsBase.SimpleCovariance()`: The same as `cov_w` but for the between-class covariance (used in computing the between-class scatter matrix, `Sb`).\n * `outdim::Int=0`: The output dimension, i.e dimension of the transformed space, automatically set to `min(indim, nclasses-1)` if equal to 0.\n * `regcoef::Float64=1e-6`: The regularization coefficient. A positive value `regcoef*eigmax(Sw)` where `Sw` is the within-class scatter matrix, is added to the diagonal of `Sw` to improve numerical stability. This can be useful if using the standard covariance estimator.\n * `dist=Distances.SqEuclidean()`: The distance metric to use when performing classification (to compare the distance between a new point and centroids in the transformed space); must be a subtype of `Distances.SemiMetric` from Distances.jl, e.g., `Distances.CosineDist`.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `classes`: The classes seen during model fitting.\n * `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where `indim` and `outdim` are the input and output dimensions respectively (See Report section below).\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: The dimension of the input space i.e the number of training features.\n * `outdim`: The dimension of the transformed space the model is projected to.\n * `mean`: The mean of the untransformed training data. A vector of length `indim`.\n * `nclasses`: The number of classes directly observed in the training data (which can be less than the total number of classes in the class pool).\n * `class_means`: The class-specific means of the training data. A matrix of size `(indim, nclasses)` with the ith column being the class-mean of the ith class in `classes` (See fitted params section above).\n * `class_weights`: The weights (class counts) of each class. A vector of length `nclasses` with the ith element being the class weight of the ith class in `classes`. (See fitted params section above.)\n * `Sb`: The between class scatter matrix.\n * `Sw`: The within class scatter matrix.\n\n# Examples\n\n```\nusing MLJ\n\nLDA = @load LDA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = LDA()\nmach = machine(model, X, y) |> fit!\n\nXproj = transform(mach, X)\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n\n```\n\nSee also [`BayesianLDA`](@ref), [`SubspaceLDA`](@ref), [`BayesianSubspaceLDA`](@ref)\n" -":name" = "LDA" -":human_name" = "linear discriminant analysis model" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":transform"] -":hyperparameters" = "`(:method, :cov_w, :cov_b, :outdim, :regcoef, :dist)`" -":hyperparameter_types" = "`(\"Symbol\", \"StatsBase.CovarianceEstimator\", \"StatsBase.CovarianceEstimator\", \"Int64\", \"Float64\", \"Distances.SemiMetric\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" - -[MultivariateStats.MultitargetLinearRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":predict_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_pure_julia" = "`true`" -":package_name" = "MultivariateStats" -":package_license" = "MIT" -":load_path" = "MLJMultivariateStatsInterface.MultitargetLinearRegressor" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = "```\nMultitargetLinearRegressor\n```\n\nA model type for constructing a multitarget linear regressor, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultitargetLinearRegressor = @load MultitargetLinearRegressor pkg=MultivariateStats\n```\n\nDo `model = MultitargetLinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetLinearRegressor(bias=...)`.\n\n`MultitargetLinearRegressor` assumes the target variable is vector-valued with continuous components. It trains a linear prediction function using the least squares algorithm. Options exist to specify a bias term.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any table of responses whose element scitype is `Continuous`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `bias=true`: Include the bias term if true, otherwise fit without bias term.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coefficients`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Examples\n\n```\nusing MLJ\nusing DataFrames\n\nLinearRegressor = @load MultitargetLinearRegressor pkg=MultivariateStats\nlinear_regressor = LinearRegressor()\n\nX, y = make_regression(100, 9; n_targets = 2) # a table and a table (synthetic data)\n\nmach = machine(linear_regressor, X, y) |> fit!\n\nXnew, _ = make_regression(3, 9)\nyhat = predict(mach, Xnew) # new predictions\n```\n\nSee also [`LinearRegressor`](@ref), [`RidgeRegressor`](@ref), [`MultitargetRidgeRegressor`](@ref)\n" -":name" = "MultitargetLinearRegressor" -":human_name" = "multitarget linear regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:bias,)`" -":hyperparameter_types" = "`(\"Bool\",)`" -":hyperparameter_ranges" = "`(nothing,)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":reporting_operations" = "`()`" +":constructor" = "`nothing`" [MultivariateStats.BayesianSubspaceLDA] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -5634,41 +5214,6 @@ ":reporting_operations" = "`()`" ":constructor" = "`nothing`" -[MLJBalancing.BalancedModelDeterministic] -":input_scitype" = "`ScientificTypesBase.Unknown`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_pure_julia" = "`false`" -":package_name" = "MLJBalancing" -":package_license" = "MIT" -":load_path" = "MLJBalancing.BalancedModel" -":package_uuid" = "45f359ea-796d-4f51-95a5-deb1a414c586" -":package_url" = "https://github.com/JuliaAI/MLJBalancing.jl" -":is_wrapper" = "`true`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = "```\nBalancedModel(; model=nothing, balancer1=balancer_model1, balancer2=balancer_model2, ...)\nBalancedModel(model; balancer1=balancer_model1, balancer2=balancer_model2, ...)\n```\n\nGiven a classification model, and one or more balancer models that all implement the `MLJModelInterface`, `BalancedModel` allows constructing a sequential pipeline that wraps an arbitrary number of balancing models and a classifier together in a sequential pipeline.\n\n# Operation\n\n * During training, data is first passed to `balancer1` and the result is passed to `balancer2` and so on, the result from the final balancer is then passed to the classifier for training.\n * During prediction, the balancers have no effect.\n\n# Arguments\n\n * `model::Supervised`: A classification model that implements the `MLJModelInterface`.\n * `balancer1::Static=...`: The first balancer model to pass the data to. This keyword argument can have any name.\n * `balancer2::Static=...`: The second balancer model to pass the data to. This keyword argument can have any name.\n * and so on for an arbitrary number of balancers.\n\n# Returns\n\n * An instance of type ProbabilisticBalancedModel or DeterministicBalancedModel, depending on the prediction type of model.\n\n# Example\n\n```julia\nusing MLJ\nusing Imbalance\n\n# generate data\nX, y = Imbalance.generate_imbalanced_data(1000, 5; class_probs=[0.2, 0.3, 0.5])\n\n# prepare classification and balancing models\nSMOTENC = @load SMOTENC pkg=Imbalance verbosity=0\nTomekUndersampler = @load TomekUndersampler pkg=Imbalance verbosity=0\nLogisticClassifier = @load LogisticClassifier pkg=MLJLinearModels verbosity=0\n\noversampler = SMOTENC(k=5, ratios=1.0, rng=42)\nundersampler = TomekUndersampler(min_ratios=0.5, rng=42)\nlogistic_model = LogisticClassifier()\n\n# wrap them in a BalancedModel\nbalanced_model = BalancedModel(model=logistic_model, balancer1=oversampler, balancer2=undersampler)\n\n# now this behaves as a unified model that can be trained, validated, fine-tuned, etc.\nmach = machine(balanced_model, X, y)\nfit!(mach)\n```\n" -":name" = "BalancedModelDeterministic" -":human_name" = "balanced model deterministic" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":getproperty", ":setproperty!"] -":hyperparameters" = "`(:balancers, :model)`" -":hyperparameter_types" = "`(\"Any\", \"MLJModelInterface.Deterministic\")`" -":hyperparameter_ranges" = "`(nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" -":constructor" = "`BalancedModel`" - [MLJBalancing.BalancedBaggingClassifier] ":input_scitype" = "`ScientificTypesBase.Unknown`" ":output_scitype" = "`ScientificTypesBase.Unknown`" @@ -5687,129 +5232,24 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nBalancedBaggingClassifier\n```\n\nA model type for constructing a balanced bagging classifier, based on [MLJBalancing.jl](https://github.com/JuliaAI/MLJBalancing).\n\nFrom MLJ, the type can be imported using\n\n`BalancedBaggingClassifier = @load BalancedBaggingClassifier pkg=MLJBalancing`\n\nConstruct an instance with default hyper-parameters using the syntax `bagging_model = BalancedBaggingClassifier(model=...)`\n\nGiven a probablistic classifier.`BalancedBaggingClassifier` performs bagging by undersampling only majority data in each bag so that its includes as much samples as in the minority data. This is proposed with an Adaboost classifier where the output scores are averaged in the paper Xu-Ying Liu, Jianxin Wu, & Zhi-Hua Zhou. (2009). Exploratory Undersampling for Class-Imbalance Learning. IEEE Transactions on Systems, Man, and Cybernetics, Part B (Cybernetics), 39 (2), 539–5501\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: input features of a form supported by the `model` being wrapped (typically a table, e.g., `DataFrame`, with `Continuous` columns will be supported, as a minimum)\n * `y`: the binary target, which can be any `AbstractVector` where `length(unique(y)) == 2`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `model::Probabilistic`: The classifier to use to train on each bag.\n * `T::Integer=0`: The number of bags to be used in the ensemble. If not given, will be set as the ratio between the frequency of the majority and minority classes. Can be later found in `report(mach)`.\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if Julia `VERSION>=1.7`. Otherwise, uses MersenneTwister`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given\n\nfeatures `Xnew` having the same scitype as `X` above. Predictions are probabilistic, but uncalibrated.\n\n * `predict_mode(mach, Xnew)`: return the mode of each prediction above\n\n# Example\n\n```julia\nusing MLJ\nusing Imbalance\n\n# Load base classifier and BalancedBaggingClassifier\nBalancedBaggingClassifier = @load BalancedBaggingClassifier pkg=MLJBalancing\nLogisticClassifier = @load LogisticClassifier pkg=MLJLinearModels verbosity=0\n\n# Construct the base classifier and use it to construct a BalancedBaggingClassifier\nlogistic_model = LogisticClassifier()\nmodel = BalancedBaggingClassifier(model=logistic_model, T=5)\n\n# Load the data and train the BalancedBaggingClassifier\nX, y = Imbalance.generate_imbalanced_data(100, 5; num_vals_per_category = [3, 2],\n class_probs = [0.9, 0.1],\n type = \"ColTable\",\n rng=42)\njulia> Imbalance.checkbalance(y)\n1: ▇▇▇▇▇▇▇▇▇▇ 16 (19.0%)\n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 84 (100.0%)\n\nmach = machine(model, X, y) |> fit!\n\n# Predict using the trained model\n\nyhat = predict(mach, X) # probabilistic predictions\npredict_mode(mach, X) # point predictions\n```\n" -":name" = "BalancedBaggingClassifier" -":human_name" = "balanced bagging classifier" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [] -":hyperparameters" = "`(:model, :T, :rng)`" -":hyperparameter_types" = "`(\"MLJModelInterface.Probabilistic\", \"Integer\", \"Union{Integer, Random.AbstractRNG}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" -":constructor" = "`MLJBalancing.BalancedBaggingClassifier`" - -[MLJBalancing.BalancedModelInterval] -":input_scitype" = "`ScientificTypesBase.Unknown`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_pure_julia" = "`false`" -":package_name" = "MLJBalancing" -":package_license" = "MIT" -":load_path" = "MLJBalancing.BalancedModel" -":package_uuid" = "45f359ea-796d-4f51-95a5-deb1a414c586" -":package_url" = "https://github.com/JuliaAI/MLJBalancing.jl" -":is_wrapper" = "`true`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = "```\nBalancedModel(; model=nothing, balancer1=balancer_model1, balancer2=balancer_model2, ...)\nBalancedModel(model; balancer1=balancer_model1, balancer2=balancer_model2, ...)\n```\n\nGiven a classification model, and one or more balancer models that all implement the `MLJModelInterface`, `BalancedModel` allows constructing a sequential pipeline that wraps an arbitrary number of balancing models and a classifier together in a sequential pipeline.\n\n# Operation\n\n * During training, data is first passed to `balancer1` and the result is passed to `balancer2` and so on, the result from the final balancer is then passed to the classifier for training.\n * During prediction, the balancers have no effect.\n\n# Arguments\n\n * `model::Supervised`: A classification model that implements the `MLJModelInterface`.\n * `balancer1::Static=...`: The first balancer model to pass the data to. This keyword argument can have any name.\n * `balancer2::Static=...`: The second balancer model to pass the data to. This keyword argument can have any name.\n * and so on for an arbitrary number of balancers.\n\n# Returns\n\n * An instance of type ProbabilisticBalancedModel or DeterministicBalancedModel, depending on the prediction type of model.\n\n# Example\n\n```julia\nusing MLJ\nusing Imbalance\n\n# generate data\nX, y = Imbalance.generate_imbalanced_data(1000, 5; class_probs=[0.2, 0.3, 0.5])\n\n# prepare classification and balancing models\nSMOTENC = @load SMOTENC pkg=Imbalance verbosity=0\nTomekUndersampler = @load TomekUndersampler pkg=Imbalance verbosity=0\nLogisticClassifier = @load LogisticClassifier pkg=MLJLinearModels verbosity=0\n\noversampler = SMOTENC(k=5, ratios=1.0, rng=42)\nundersampler = TomekUndersampler(min_ratios=0.5, rng=42)\nlogistic_model = LogisticClassifier()\n\n# wrap them in a BalancedModel\nbalanced_model = BalancedModel(model=logistic_model, balancer1=oversampler, balancer2=undersampler)\n\n# now this behaves as a unified model that can be trained, validated, fine-tuned, etc.\nmach = machine(balanced_model, X, y)\nfit!(mach)\n```\n" -":name" = "BalancedModelInterval" -":human_name" = "balanced model interval" -":is_supervised" = "`true`" -":prediction_type" = ":interval" -":abstract_type" = "`MLJModelInterface.Interval`" -":implemented_methods" = [":getproperty", ":setproperty!"] -":hyperparameters" = "`(:balancers, :model)`" -":hyperparameter_types" = "`(\"Any\", \"MLJModelInterface.Interval\")`" -":hyperparameter_ranges" = "`(nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" -":constructor" = "`BalancedModel`" - -[MLJBalancing.BalancedModelProbabilistic] -":input_scitype" = "`ScientificTypesBase.Unknown`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_pure_julia" = "`false`" -":package_name" = "MLJBalancing" -":package_license" = "MIT" -":load_path" = "MLJBalancing.BalancedModel" -":package_uuid" = "45f359ea-796d-4f51-95a5-deb1a414c586" -":package_url" = "https://github.com/JuliaAI/MLJBalancing.jl" -":is_wrapper" = "`true`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = "```\nBalancedModel(; model=nothing, balancer1=balancer_model1, balancer2=balancer_model2, ...)\nBalancedModel(model; balancer1=balancer_model1, balancer2=balancer_model2, ...)\n```\n\nGiven a classification model, and one or more balancer models that all implement the `MLJModelInterface`, `BalancedModel` allows constructing a sequential pipeline that wraps an arbitrary number of balancing models and a classifier together in a sequential pipeline.\n\n# Operation\n\n * During training, data is first passed to `balancer1` and the result is passed to `balancer2` and so on, the result from the final balancer is then passed to the classifier for training.\n * During prediction, the balancers have no effect.\n\n# Arguments\n\n * `model::Supervised`: A classification model that implements the `MLJModelInterface`.\n * `balancer1::Static=...`: The first balancer model to pass the data to. This keyword argument can have any name.\n * `balancer2::Static=...`: The second balancer model to pass the data to. This keyword argument can have any name.\n * and so on for an arbitrary number of balancers.\n\n# Returns\n\n * An instance of type ProbabilisticBalancedModel or DeterministicBalancedModel, depending on the prediction type of model.\n\n# Example\n\n```julia\nusing MLJ\nusing Imbalance\n\n# generate data\nX, y = Imbalance.generate_imbalanced_data(1000, 5; class_probs=[0.2, 0.3, 0.5])\n\n# prepare classification and balancing models\nSMOTENC = @load SMOTENC pkg=Imbalance verbosity=0\nTomekUndersampler = @load TomekUndersampler pkg=Imbalance verbosity=0\nLogisticClassifier = @load LogisticClassifier pkg=MLJLinearModels verbosity=0\n\noversampler = SMOTENC(k=5, ratios=1.0, rng=42)\nundersampler = TomekUndersampler(min_ratios=0.5, rng=42)\nlogistic_model = LogisticClassifier()\n\n# wrap them in a BalancedModel\nbalanced_model = BalancedModel(model=logistic_model, balancer1=oversampler, balancer2=undersampler)\n\n# now this behaves as a unified model that can be trained, validated, fine-tuned, etc.\nmach = machine(balanced_model, X, y)\nfit!(mach)\n```\n" -":name" = "BalancedModelProbabilistic" -":human_name" = "balanced model probabilistic" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":getproperty", ":propertynames", ":setproperty!"] -":hyperparameters" = "`(:balancers, :model)`" -":hyperparameter_types" = "`(\"Any\", \"MLJModelInterface.Probabilistic\")`" -":hyperparameter_ranges" = "`(nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" -":constructor" = "`BalancedModel`" - -[MLJTuning.DeterministicTunedModel] -":input_scitype" = "`ScientificTypesBase.Unknown`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_pure_julia" = "`false`" -":package_name" = "MLJTuning" -":package_license" = "MIT" -":load_path" = "MLJTuning.TunedModel" -":package_uuid" = "03970b2e-30c4-11ea-3135-d1576263f10f" -":package_url" = "https://github.com/alan-turing-institute/MLJTuning.jl" -":is_wrapper" = "`true`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = "```\ntuned_model = TunedModel(; model=,\n tuning=RandomSearch(),\n resampling=Holdout(),\n range=nothing,\n measure=nothing,\n n=default_n(tuning, range),\n operation=nothing,\n other_options...)\n```\n\nConstruct a model wrapper for hyper-parameter optimization of a supervised learner, specifying the `tuning` strategy and `model` whose hyper-parameters are to be mutated.\n\n```\ntuned_model = TunedModel(; models=,\n resampling=Holdout(),\n measure=nothing,\n n=length(models),\n operation=nothing,\n other_options...)\n```\n\nConstruct a wrapper for multiple `models`, for selection of an optimal one (equivalent to specifying `tuning=Explicit()` and `range=models` above). Elements of the iterator `models` need not have a common type, but they must all be `Deterministic` or all be `Probabilistic` *and this is not checked* but inferred from the first element generated.\n\nSee below for a complete list of options.\n\n### Training\n\nCalling `fit!(mach)` on a machine `mach=machine(tuned_model, X, y)` or `mach=machine(tuned_model, X, y, w)` will:\n\n * Instigate a search, over clones of `model`, with the hyperparameter mutations specified by `range`, for a model optimizing the specified `measure`, using performance evaluations carried out using the specified `tuning` strategy and `resampling` strategy. In the case `models` is explictly listed, the search is instead over the models generated by the iterator `models`.\n * Fit an internal machine, based on the optimal model `fitted_params(mach).best_model`, wrapping the optimal `model` object in *all* the provided data `X`, `y`(, `w`). Calling `predict(mach, Xnew)` then returns predictions on `Xnew` of this internal machine. The final train can be supressed by setting `train_best=false`.\n\n### Search space\n\nThe `range` objects supported depend on the `tuning` strategy specified. Query the `strategy` docstring for details. To optimize over an explicit list `v` of models of the same type, use `strategy=Explicit()` and specify `model=v[1]` and `range=v`.\n\nThe number of models searched is specified by `n`. If unspecified, then `MLJTuning.default_n(tuning, range)` is used. When `n` is increased and `fit!(mach)` called again, the old search history is re-instated and the search continues where it left off.\n\n### Measures (metrics)\n\nIf more than one `measure` is specified, then only the first is optimized (unless `strategy` is multi-objective) but the performance against every measure specified will be computed and reported in `report(mach).best_performance` and other relevant attributes of the generated report. Options exist to pass per-observation weights or class weights to measures; see below.\n\n*Important.* If a custom measure, `my_measure` is used, and the measure is a score, rather than a loss, be sure to check that `MLJ.orientation(my_measure) == :score` to ensure maximization of the measure, rather than minimization. Override an incorrect value with `MLJ.orientation(::typeof(my_measure)) = :score`.\n\n### Accessing the fitted parameters and other training (tuning) outcomes\n\nA Plots.jl plot of performance estimates is returned by `plot(mach)` or `heatmap(mach)`.\n\nOnce a tuning machine `mach` has bee trained as above, then `fitted_params(mach)` has these keys/values:\n\n| key | value |\n| --------------------:| ---------------------------------------:|\n| `best_model` | optimal model instance |\n| `best_fitted_params` | learned parameters of the optimal model |\n\nThe named tuple `report(mach)` includes these keys/values:\n\n| key | value |\n| --------------------:| ------------------------------------------------------------------:|\n| `best_model` | optimal model instance |\n| `best_history_entry` | corresponding entry in the history, including performance estimate |\n| `best_report` | report generated by fitting the optimal model to all data |\n| `history` | tuning strategy-specific history of all evaluations |\n\nplus other key/value pairs specific to the `tuning` strategy.\n\nEach element of `history` is a property-accessible object with these properties:\n\n| key | value |\n| -------------:| -----------------------------------------------------------------:|\n| `measure` | vector of measures (metrics) |\n| `measurement` | vector of measurements, one per measure |\n| `per_fold` | vector of vectors of unaggregated per-fold measurements |\n| `evaluation` | full `PerformanceEvaluation`/`CompactPerformaceEvaluation` object |\n\n### Complete list of key-word options\n\n * `model`: `Supervised` model prototype that is cloned and mutated to generate models for evaluation\n * `models`: Alternatively, an iterator of MLJ models to be explicitly evaluated. These may have varying types.\n * `tuning=RandomSearch()`: tuning strategy to be applied (eg, `Grid()`). See the [Tuning Models](https://alan-turing-institute.github.io/MLJ.jl/dev/tuning_models/#Tuning-Models) section of the MLJ manual for a complete list of options.\n * `resampling=Holdout()`: resampling strategy (eg, `Holdout()`, `CV()`), `StratifiedCV()`) to be applied in performance evaluations\n * `measure`: measure or measures to be applied in performance evaluations; only the first used in optimization (unless the strategy is multi-objective) but all reported to the history\n * `weights`: per-observation weights to be passed the measure(s) in performance evaluations, where supported. Check support with `supports_weights(measure)`.\n * `class_weights`: class weights to be passed the measure(s) in performance evaluations, where supported. Check support with `supports_class_weights(measure)`.\n * `repeats=1`: for generating train/test sets multiple times in resampling (\"Monte Carlo\" resampling); see [`evaluate!`](@ref) for details\n * `operation`/`operations` - One of `predict`, `predict_mean`, `predict_mode`, `predict_median`, or `predict_joint`, or a vector of these of the same length as `measure`/`measures`. Automatically inferred if left unspecified.\n * `range`: range object; tuning strategy documentation describes supported types\n * `selection_heuristic`: the rule determining how the best model is decided. According to the default heuristic, `NaiveSelection()`, `measure` (or the first element of `measure`) is evaluated for each resample and these per-fold measurements are aggregrated. The model with the lowest (resp. highest) aggregate is chosen if the measure is a `:loss` (resp. a `:score`).\n * `n`: number of iterations (ie, models to be evaluated); set by tuning strategy if left unspecified\n * `train_best=true`: whether to train the optimal model\n * `acceleration=default_resource()`: mode of parallelization for tuning strategies that support this\n * `acceleration_resampling=CPU1()`: mode of parallelization for resampling\n * `check_measure=true`: whether to check `measure` is compatible with the specified `model` and `operation`)\n * `cache=true`: whether to cache model-specific representations of user-suplied data; set to `false` to conserve memory. Speed gains likely limited to the case `resampling isa Holdout`.\n * `compact_history=true`: whether to write `CompactPerformanceEvaluation`](@ref) or regular [`PerformanceEvaluation`](@ref) objects to the history (accessed via the `:evaluation` key); the compact form excludes some fields to conserve memory.\n" -":name" = "DeterministicTunedModel" -":human_name" = "deterministic tuned model" +":docstring" = "```\nBalancedBaggingClassifier\n```\n\nA model type for constructing a balanced bagging classifier, based on [MLJBalancing.jl](https://github.com/JuliaAI/MLJBalancing).\n\nFrom MLJ, the type can be imported using\n\n`BalancedBaggingClassifier = @load BalancedBaggingClassifier pkg=MLJBalancing`\n\nConstruct an instance with default hyper-parameters using the syntax `bagging_model = BalancedBaggingClassifier(model=...)`\n\nGiven a probablistic classifier.`BalancedBaggingClassifier` performs bagging by undersampling only majority data in each bag so that its includes as much samples as in the minority data. This is proposed with an Adaboost classifier where the output scores are averaged in the paper Xu-Ying Liu, Jianxin Wu, & Zhi-Hua Zhou. (2009). Exploratory Undersampling for Class-Imbalance Learning. IEEE Transactions on Systems, Man, and Cybernetics, Part B (Cybernetics), 39 (2), 539–5501\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: input features of a form supported by the `model` being wrapped (typically a table, e.g., `DataFrame`, with `Continuous` columns will be supported, as a minimum)\n * `y`: the binary target, which can be any `AbstractVector` where `length(unique(y)) == 2`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `model::Probabilistic`: The classifier to use to train on each bag.\n * `T::Integer=0`: The number of bags to be used in the ensemble. If not given, will be set as the ratio between the frequency of the majority and minority classes. Can be later found in `report(mach)`.\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if Julia `VERSION>=1.7`. Otherwise, uses MersenneTwister`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given\n\nfeatures `Xnew` having the same scitype as `X` above. Predictions are probabilistic, but uncalibrated.\n\n * `predict_mode(mach, Xnew)`: return the mode of each prediction above\n\n# Example\n\n```julia\nusing MLJ\nusing Imbalance\n\n# Load base classifier and BalancedBaggingClassifier\nBalancedBaggingClassifier = @load BalancedBaggingClassifier pkg=MLJBalancing\nLogisticClassifier = @load LogisticClassifier pkg=MLJLinearModels verbosity=0\n\n# Construct the base classifier and use it to construct a BalancedBaggingClassifier\nlogistic_model = LogisticClassifier()\nmodel = BalancedBaggingClassifier(model=logistic_model, T=5)\n\n# Load the data and train the BalancedBaggingClassifier\nX, y = Imbalance.generate_imbalanced_data(100, 5; num_vals_per_category = [3, 2],\n class_probs = [0.9, 0.1],\n type = \"ColTable\",\n rng=42)\njulia> Imbalance.checkbalance(y)\n1: ▇▇▇▇▇▇▇▇▇▇ 16 (19.0%)\n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 84 (100.0%)\n\nmach = machine(model, X, y) |> fit!\n\n# Predict using the trained model\n\nyhat = predict(mach, X) # probabilistic predictions\npredict_mode(mach, X) # point predictions\n```\n" +":name" = "BalancedBaggingClassifier" +":human_name" = "balanced bagging classifier" ":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" +":prediction_type" = ":probabilistic" +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [] -":hyperparameters" = "`(:model, :tuning, :resampling, :measure, :weights, :class_weights, :operation, :range, :selection_heuristic, :train_best, :repeats, :n, :acceleration, :acceleration_resampling, :check_measure, :cache, :compact_history, :logger)`" -":hyperparameter_types" = "`(\"Union{MLJModelInterface.Deterministic, MLJModelInterface.DeterministicSupervisedDetector, MLJModelInterface.DeterministicUnsupervisedDetector}\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, AbstractVector{<:Real}}\", \"Union{Nothing, AbstractDict}\", \"Any\", \"Any\", \"Any\", \"Bool\", \"Int64\", \"Union{Nothing, Int64}\", \"ComputationalResources.AbstractResource\", \"ComputationalResources.AbstractResource\", \"Bool\", \"Bool\", \"Bool\", \"Any\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = ":n" -":supports_training_losses" = "`true`" +":hyperparameters" = "`(:model, :T, :rng)`" +":hyperparameter_types" = "`(\"MLJModelInterface.Probabilistic\", \"Integer\", \"Union{Integer, Random.AbstractRNG}\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`TunedModel`" +":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" +":constructor" = "`MLJBalancing.BalancedBaggingClassifier`" -[MLJTuning.ProbabilisticTunedModel] +[MLJBalancing.BalancedModel] ":input_scitype" = "`ScientificTypesBase.Unknown`" ":output_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" @@ -5818,31 +5258,31 @@ ":transform_scitype" = "`ScientificTypesBase.Unknown`" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":is_pure_julia" = "`false`" -":package_name" = "MLJTuning" +":package_name" = "MLJBalancing" ":package_license" = "MIT" -":load_path" = "MLJTuning.TunedModel" -":package_uuid" = "03970b2e-30c4-11ea-3135-d1576263f10f" -":package_url" = "https://github.com/alan-turing-institute/MLJTuning.jl" +":load_path" = "MLJBalancing.BalancedModel" +":package_uuid" = "45f359ea-796d-4f51-95a5-deb1a414c586" +":package_url" = "https://github.com/JuliaAI/MLJBalancing.jl" ":is_wrapper" = "`true`" ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\ntuned_model = TunedModel(; model=,\n tuning=RandomSearch(),\n resampling=Holdout(),\n range=nothing,\n measure=nothing,\n n=default_n(tuning, range),\n operation=nothing,\n other_options...)\n```\n\nConstruct a model wrapper for hyper-parameter optimization of a supervised learner, specifying the `tuning` strategy and `model` whose hyper-parameters are to be mutated.\n\n```\ntuned_model = TunedModel(; models=,\n resampling=Holdout(),\n measure=nothing,\n n=length(models),\n operation=nothing,\n other_options...)\n```\n\nConstruct a wrapper for multiple `models`, for selection of an optimal one (equivalent to specifying `tuning=Explicit()` and `range=models` above). Elements of the iterator `models` need not have a common type, but they must all be `Deterministic` or all be `Probabilistic` *and this is not checked* but inferred from the first element generated.\n\nSee below for a complete list of options.\n\n### Training\n\nCalling `fit!(mach)` on a machine `mach=machine(tuned_model, X, y)` or `mach=machine(tuned_model, X, y, w)` will:\n\n * Instigate a search, over clones of `model`, with the hyperparameter mutations specified by `range`, for a model optimizing the specified `measure`, using performance evaluations carried out using the specified `tuning` strategy and `resampling` strategy. In the case `models` is explictly listed, the search is instead over the models generated by the iterator `models`.\n * Fit an internal machine, based on the optimal model `fitted_params(mach).best_model`, wrapping the optimal `model` object in *all* the provided data `X`, `y`(, `w`). Calling `predict(mach, Xnew)` then returns predictions on `Xnew` of this internal machine. The final train can be supressed by setting `train_best=false`.\n\n### Search space\n\nThe `range` objects supported depend on the `tuning` strategy specified. Query the `strategy` docstring for details. To optimize over an explicit list `v` of models of the same type, use `strategy=Explicit()` and specify `model=v[1]` and `range=v`.\n\nThe number of models searched is specified by `n`. If unspecified, then `MLJTuning.default_n(tuning, range)` is used. When `n` is increased and `fit!(mach)` called again, the old search history is re-instated and the search continues where it left off.\n\n### Measures (metrics)\n\nIf more than one `measure` is specified, then only the first is optimized (unless `strategy` is multi-objective) but the performance against every measure specified will be computed and reported in `report(mach).best_performance` and other relevant attributes of the generated report. Options exist to pass per-observation weights or class weights to measures; see below.\n\n*Important.* If a custom measure, `my_measure` is used, and the measure is a score, rather than a loss, be sure to check that `MLJ.orientation(my_measure) == :score` to ensure maximization of the measure, rather than minimization. Override an incorrect value with `MLJ.orientation(::typeof(my_measure)) = :score`.\n\n### Accessing the fitted parameters and other training (tuning) outcomes\n\nA Plots.jl plot of performance estimates is returned by `plot(mach)` or `heatmap(mach)`.\n\nOnce a tuning machine `mach` has bee trained as above, then `fitted_params(mach)` has these keys/values:\n\n| key | value |\n| --------------------:| ---------------------------------------:|\n| `best_model` | optimal model instance |\n| `best_fitted_params` | learned parameters of the optimal model |\n\nThe named tuple `report(mach)` includes these keys/values:\n\n| key | value |\n| --------------------:| ------------------------------------------------------------------:|\n| `best_model` | optimal model instance |\n| `best_history_entry` | corresponding entry in the history, including performance estimate |\n| `best_report` | report generated by fitting the optimal model to all data |\n| `history` | tuning strategy-specific history of all evaluations |\n\nplus other key/value pairs specific to the `tuning` strategy.\n\nEach element of `history` is a property-accessible object with these properties:\n\n| key | value |\n| -------------:| -----------------------------------------------------------------:|\n| `measure` | vector of measures (metrics) |\n| `measurement` | vector of measurements, one per measure |\n| `per_fold` | vector of vectors of unaggregated per-fold measurements |\n| `evaluation` | full `PerformanceEvaluation`/`CompactPerformaceEvaluation` object |\n\n### Complete list of key-word options\n\n * `model`: `Supervised` model prototype that is cloned and mutated to generate models for evaluation\n * `models`: Alternatively, an iterator of MLJ models to be explicitly evaluated. These may have varying types.\n * `tuning=RandomSearch()`: tuning strategy to be applied (eg, `Grid()`). See the [Tuning Models](https://alan-turing-institute.github.io/MLJ.jl/dev/tuning_models/#Tuning-Models) section of the MLJ manual for a complete list of options.\n * `resampling=Holdout()`: resampling strategy (eg, `Holdout()`, `CV()`), `StratifiedCV()`) to be applied in performance evaluations\n * `measure`: measure or measures to be applied in performance evaluations; only the first used in optimization (unless the strategy is multi-objective) but all reported to the history\n * `weights`: per-observation weights to be passed the measure(s) in performance evaluations, where supported. Check support with `supports_weights(measure)`.\n * `class_weights`: class weights to be passed the measure(s) in performance evaluations, where supported. Check support with `supports_class_weights(measure)`.\n * `repeats=1`: for generating train/test sets multiple times in resampling (\"Monte Carlo\" resampling); see [`evaluate!`](@ref) for details\n * `operation`/`operations` - One of `predict`, `predict_mean`, `predict_mode`, `predict_median`, or `predict_joint`, or a vector of these of the same length as `measure`/`measures`. Automatically inferred if left unspecified.\n * `range`: range object; tuning strategy documentation describes supported types\n * `selection_heuristic`: the rule determining how the best model is decided. According to the default heuristic, `NaiveSelection()`, `measure` (or the first element of `measure`) is evaluated for each resample and these per-fold measurements are aggregrated. The model with the lowest (resp. highest) aggregate is chosen if the measure is a `:loss` (resp. a `:score`).\n * `n`: number of iterations (ie, models to be evaluated); set by tuning strategy if left unspecified\n * `train_best=true`: whether to train the optimal model\n * `acceleration=default_resource()`: mode of parallelization for tuning strategies that support this\n * `acceleration_resampling=CPU1()`: mode of parallelization for resampling\n * `check_measure=true`: whether to check `measure` is compatible with the specified `model` and `operation`)\n * `cache=true`: whether to cache model-specific representations of user-suplied data; set to `false` to conserve memory. Speed gains likely limited to the case `resampling isa Holdout`.\n * `compact_history=true`: whether to write `CompactPerformanceEvaluation`](@ref) or regular [`PerformanceEvaluation`](@ref) objects to the history (accessed via the `:evaluation` key); the compact form excludes some fields to conserve memory.\n" -":name" = "ProbabilisticTunedModel" -":human_name" = "probabilistic tuned model" +":docstring" = "```\nBalancedModel(; model=nothing, balancer1=balancer_model1, balancer2=balancer_model2, ...)\nBalancedModel(model; balancer1=balancer_model1, balancer2=balancer_model2, ...)\n```\n\nGiven a classification model, and one or more balancer models that all implement the `MLJModelInterface`, `BalancedModel` allows constructing a sequential pipeline that wraps an arbitrary number of balancing models and a classifier together in a sequential pipeline.\n\n# Operation\n\n * During training, data is first passed to `balancer1` and the result is passed to `balancer2` and so on, the result from the final balancer is then passed to the classifier for training.\n * During prediction, the balancers have no effect.\n\n# Arguments\n\n * `model::Supervised`: A classification model that implements the `MLJModelInterface`.\n * `balancer1::Static=...`: The first balancer model to pass the data to. This keyword argument can have any name.\n * `balancer2::Static=...`: The second balancer model to pass the data to. This keyword argument can have any name.\n * and so on for an arbitrary number of balancers.\n\n# Returns\n\n * An instance of type ProbabilisticBalancedModel or DeterministicBalancedModel, depending on the prediction type of model.\n\n# Example\n\n```julia\nusing MLJ\nusing Imbalance\n\n# generate data\nX, y = Imbalance.generate_imbalanced_data(1000, 5; class_probs=[0.2, 0.3, 0.5])\n\n# prepare classification and balancing models\nSMOTENC = @load SMOTENC pkg=Imbalance verbosity=0\nTomekUndersampler = @load TomekUndersampler pkg=Imbalance verbosity=0\nLogisticClassifier = @load LogisticClassifier pkg=MLJLinearModels verbosity=0\n\noversampler = SMOTENC(k=5, ratios=1.0, rng=42)\nundersampler = TomekUndersampler(min_ratios=0.5, rng=42)\nlogistic_model = LogisticClassifier()\n\n# wrap them in a BalancedModel\nbalanced_model = BalancedModel(model=logistic_model, balancer1=oversampler, balancer2=undersampler)\n\n# now this behaves as a unified model that can be trained, validated, fine-tuned, etc.\nmach = machine(balanced_model, X, y)\nfit!(mach)\n```\n" +":name" = "BalancedModel" +":human_name" = "balanced model probabilistic" ":is_supervised" = "`true`" ":prediction_type" = ":probabilistic" ":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [] -":hyperparameters" = "`(:model, :tuning, :resampling, :measure, :weights, :class_weights, :operation, :range, :selection_heuristic, :train_best, :repeats, :n, :acceleration, :acceleration_resampling, :check_measure, :cache, :compact_history, :logger)`" -":hyperparameter_types" = "`(\"Union{MLJModelInterface.Probabilistic, MLJModelInterface.ProbabilisticSupervisedDetector, MLJModelInterface.ProbabilisticUnsupervisedDetector}\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, AbstractVector{<:Real}}\", \"Union{Nothing, AbstractDict}\", \"Any\", \"Any\", \"Any\", \"Bool\", \"Int64\", \"Union{Nothing, Int64}\", \"ComputationalResources.AbstractResource\", \"ComputationalResources.AbstractResource\", \"Bool\", \"Bool\", \"Bool\", \"Any\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = ":n" -":supports_training_losses" = "`true`" +":implemented_methods" = [":getproperty", ":propertynames", ":setproperty!"] +":hyperparameters" = "`(:balancers, :model)`" +":hyperparameter_types" = "`(\"Any\", \"MLJModelInterface.Probabilistic\")`" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`TunedModel`" +":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" +":constructor" = "`BalancedModel`" [Imbalance.RandomOversampler] ":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Infinite}}, AbstractVector}`" @@ -6229,6 +5669,41 @@ ":reporting_operations" = "`()`" ":constructor" = "`nothing`" +[MLJTuning.TunedModel] +":input_scitype" = "`ScientificTypesBase.Unknown`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_pure_julia" = "`false`" +":package_name" = "MLJTuning" +":package_license" = "MIT" +":load_path" = "MLJTuning.TunedModel" +":package_uuid" = "03970b2e-30c4-11ea-3135-d1576263f10f" +":package_url" = "https://github.com/alan-turing-institute/MLJTuning.jl" +":is_wrapper" = "`true`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "```\ntuned_model = TunedModel(; model=,\n tuning=RandomSearch(),\n resampling=Holdout(),\n range=nothing,\n measure=nothing,\n n=default_n(tuning, range),\n operation=nothing,\n other_options...)\n```\n\nConstruct a model wrapper for hyper-parameter optimization of a supervised learner, specifying the `tuning` strategy and `model` whose hyper-parameters are to be mutated.\n\n```\ntuned_model = TunedModel(; models=,\n resampling=Holdout(),\n measure=nothing,\n n=length(models),\n operation=nothing,\n other_options...)\n```\n\nConstruct a wrapper for multiple `models`, for selection of an optimal one (equivalent to specifying `tuning=Explicit()` and `range=models` above). Elements of the iterator `models` need not have a common type, but they must all be `Deterministic` or all be `Probabilistic` *and this is not checked* but inferred from the first element generated.\n\nSee below for a complete list of options.\n\n### Training\n\nCalling `fit!(mach)` on a machine `mach=machine(tuned_model, X, y)` or `mach=machine(tuned_model, X, y, w)` will:\n\n * Instigate a search, over clones of `model`, with the hyperparameter mutations specified by `range`, for a model optimizing the specified `measure`, using performance evaluations carried out using the specified `tuning` strategy and `resampling` strategy. In the case `models` is explictly listed, the search is instead over the models generated by the iterator `models`.\n * Fit an internal machine, based on the optimal model `fitted_params(mach).best_model`, wrapping the optimal `model` object in *all* the provided data `X`, `y`(, `w`). Calling `predict(mach, Xnew)` then returns predictions on `Xnew` of this internal machine. The final train can be supressed by setting `train_best=false`.\n\n### Search space\n\nThe `range` objects supported depend on the `tuning` strategy specified. Query the `strategy` docstring for details. To optimize over an explicit list `v` of models of the same type, use `strategy=Explicit()` and specify `model=v[1]` and `range=v`.\n\nThe number of models searched is specified by `n`. If unspecified, then `MLJTuning.default_n(tuning, range)` is used. When `n` is increased and `fit!(mach)` called again, the old search history is re-instated and the search continues where it left off.\n\n### Measures (metrics)\n\nIf more than one `measure` is specified, then only the first is optimized (unless `strategy` is multi-objective) but the performance against every measure specified will be computed and reported in `report(mach).best_performance` and other relevant attributes of the generated report. Options exist to pass per-observation weights or class weights to measures; see below.\n\n*Important.* If a custom measure, `my_measure` is used, and the measure is a score, rather than a loss, be sure to check that `MLJ.orientation(my_measure) == :score` to ensure maximization of the measure, rather than minimization. Override an incorrect value with `MLJ.orientation(::typeof(my_measure)) = :score`.\n\n### Accessing the fitted parameters and other training (tuning) outcomes\n\nA Plots.jl plot of performance estimates is returned by `plot(mach)` or `heatmap(mach)`.\n\nOnce a tuning machine `mach` has bee trained as above, then `fitted_params(mach)` has these keys/values:\n\n| key | value |\n| --------------------:| ---------------------------------------:|\n| `best_model` | optimal model instance |\n| `best_fitted_params` | learned parameters of the optimal model |\n\nThe named tuple `report(mach)` includes these keys/values:\n\n| key | value |\n| --------------------:| ------------------------------------------------------------------:|\n| `best_model` | optimal model instance |\n| `best_history_entry` | corresponding entry in the history, including performance estimate |\n| `best_report` | report generated by fitting the optimal model to all data |\n| `history` | tuning strategy-specific history of all evaluations |\n\nplus other key/value pairs specific to the `tuning` strategy.\n\nEach element of `history` is a property-accessible object with these properties:\n\n| key | value |\n| -------------:| -----------------------------------------------------------------:|\n| `measure` | vector of measures (metrics) |\n| `measurement` | vector of measurements, one per measure |\n| `per_fold` | vector of vectors of unaggregated per-fold measurements |\n| `evaluation` | full `PerformanceEvaluation`/`CompactPerformaceEvaluation` object |\n\n### Complete list of key-word options\n\n * `model`: `Supervised` model prototype that is cloned and mutated to generate models for evaluation\n * `models`: Alternatively, an iterator of MLJ models to be explicitly evaluated. These may have varying types.\n * `tuning=RandomSearch()`: tuning strategy to be applied (eg, `Grid()`). See the [Tuning Models](https://alan-turing-institute.github.io/MLJ.jl/dev/tuning_models/#Tuning-Models) section of the MLJ manual for a complete list of options.\n * `resampling=Holdout()`: resampling strategy (eg, `Holdout()`, `CV()`), `StratifiedCV()`) to be applied in performance evaluations\n * `measure`: measure or measures to be applied in performance evaluations; only the first used in optimization (unless the strategy is multi-objective) but all reported to the history\n * `weights`: per-observation weights to be passed the measure(s) in performance evaluations, where supported. Check support with `supports_weights(measure)`.\n * `class_weights`: class weights to be passed the measure(s) in performance evaluations, where supported. Check support with `supports_class_weights(measure)`.\n * `repeats=1`: for generating train/test sets multiple times in resampling (\"Monte Carlo\" resampling); see [`evaluate!`](@ref) for details\n * `operation`/`operations` - One of `predict`, `predict_mean`, `predict_mode`, `predict_median`, or `predict_joint`, or a vector of these of the same length as `measure`/`measures`. Automatically inferred if left unspecified.\n * `range`: range object; tuning strategy documentation describes supported types\n * `selection_heuristic`: the rule determining how the best model is decided. According to the default heuristic, `NaiveSelection()`, `measure` (or the first element of `measure`) is evaluated for each resample and these per-fold measurements are aggregrated. The model with the lowest (resp. highest) aggregate is chosen if the measure is a `:loss` (resp. a `:score`).\n * `n`: number of iterations (ie, models to be evaluated); set by tuning strategy if left unspecified\n * `train_best=true`: whether to train the optimal model\n * `acceleration=default_resource()`: mode of parallelization for tuning strategies that support this\n * `acceleration_resampling=CPU1()`: mode of parallelization for resampling\n * `check_measure=true`: whether to check `measure` is compatible with the specified `model` and `operation`)\n * `cache=true`: whether to cache model-specific representations of user-suplied data; set to `false` to conserve memory. Speed gains likely limited to the case `resampling isa Holdout`.\n * `compact_history=true`: whether to write `CompactPerformanceEvaluation`](@ref) or regular [`PerformanceEvaluation`](@ref) objects to the history (accessed via the `:evaluation` key); the compact form excludes some fields to conserve memory.\n" +":name" = "TunedModel" +":human_name" = "probabilistic tuned model" +":is_supervised" = "`true`" +":prediction_type" = ":probabilistic" +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":implemented_methods" = [] +":hyperparameters" = "`(:model, :tuning, :resampling, :measure, :weights, :class_weights, :operation, :range, :selection_heuristic, :train_best, :repeats, :n, :acceleration, :acceleration_resampling, :check_measure, :cache, :compact_history, :logger)`" +":hyperparameter_types" = "`(\"Union{MLJModelInterface.Probabilistic, MLJModelInterface.ProbabilisticSupervisedDetector, MLJModelInterface.ProbabilisticUnsupervisedDetector}\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, AbstractVector{<:Real}}\", \"Union{Nothing, AbstractDict}\", \"Any\", \"Any\", \"Any\", \"Bool\", \"Int64\", \"Union{Nothing, Int64}\", \"ComputationalResources.AbstractResource\", \"ComputationalResources.AbstractResource\", \"Bool\", \"Bool\", \"Bool\", \"Any\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":iteration_parameter" = ":n" +":supports_training_losses" = "`true`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`TunedModel`" + [Clustering.HierarchicalClustering] ":input_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" @@ -6377,62 +5852,237 @@ ":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_pure_julia" = "`true`" -":package_name" = "EvoLinear" -":package_license" = "MIT" -":load_path" = "EvoLinear.EvoSplineRegressor" -":package_uuid" = "ab853011-1780-437f-b4b5-5de6f4777246" -":package_url" = "https://github.com/jeremiedb/EvoLinear.jl" +":is_pure_julia" = "`true`" +":package_name" = "EvoLinear" +":package_license" = "MIT" +":load_path" = "EvoLinear.EvoSplineRegressor" +":package_uuid" = "ab853011-1780-437f-b4b5-5de6f4777246" +":package_url" = "https://github.com/jeremiedb/EvoLinear.jl" +":is_wrapper" = "`false`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "```\nEvoSplineRegressor(; kwargs...)\n```\n\nA model type for constructing a EvoSplineRegressor, based on [EvoLinear.jl](https://github.com/jeremiedb/EvoLinear.jl), and implementing both an internal API and the MLJ model interface.\n\n# Keyword arguments\n\n * `loss=:mse`: loss function to be minimised. Can be one of:\n\n * `:mse`\n * `:logistic`\n * `:poisson`\n * `:gamma`\n * `:tweedie`\n * `nrounds=10`: maximum number of training rounds.\n * `eta=1`: Learning rate. Typically in the range `[1e-2, 1]`.\n * `L1=0`: Regularization penalty applied by shrinking to 0 weight update if update is < L1. No penalty if update > L1. Results in sparse feature selection. Typically in the `[0, 1]` range on normalized features.\n * `L2=0`: Regularization penalty applied to the squared of the weight update value. Restricts large parameter values. Typically in the `[0, 1]` range on normalized features.\n * `rng=123`: random seed. Not used at the moment.\n * `updater=:all`: training method. Only `:all` is supported at the moment. Gradients for each feature are computed simultaneously, then bias is updated based on all features update.\n * `device=:cpu`: Only `:cpu` is supported at the moment.\n\n# Internal API\n\nDo `config = EvoSplineRegressor()` to construct an hyper-parameter struct with default hyper-parameters. Provide keyword arguments as listed above to override defaults, for example:\n\n```julia\nEvoSplineRegressor(loss=:logistic, L1=1e-3, L2=1e-2, nrounds=100)\n```\n\n## Training model\n\nA model is built using [`fit`](@ref):\n\n```julia\nconfig = EvoSplineRegressor()\nm = fit(config; x, y, w)\n```\n\n## Inference\n\nFitted results is an `EvoLinearModel` which acts as a prediction function when passed a features matrix as argument. \n\n```julia\npreds = m(x)\n```\n\n# MLJ Interface\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoSplineRegressor = @load EvoSplineRegressor pkg=EvoLinear\n```\n\nDo `model = EvoLinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoSplineRegressor(loss=...)`.\n\n## Training model\n\nIn MLJ or MLJBase, bind an instance `model` to data with `mach = machine(model, X, y)` where: \n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given\n\nfeatures `Xnew` having the same scitype as `X` above. Predictions are deterministic.\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `:fitresult`: the `SplineModel` object returned by EvoSplineRegressor fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n * `:coef`: Vector of coefficients (βs) associated to each of the features.\n * `:bias`: Value of the bias.\n * `:names`: Names of each of the features.\n" +":name" = "EvoSplineRegressor" +":human_name" = "evo spline regressor" +":is_supervised" = "`true`" +":prediction_type" = ":deterministic" +":abstract_type" = "`MLJModelInterface.Deterministic`" +":implemented_methods" = [":fit", ":predict", ":update"] +":hyperparameters" = "`(:nrounds, :opt, :batchsize, :act, :eta, :L2, :knots, :rng, :device)`" +":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Int64\", \"Symbol\", \"Any\", \"Any\", \"Union{Nothing, Dict}\", \"Any\", \"Symbol\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":iteration_parameter" = ":nrounds" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`nothing`" + +[EvoLinear.EvoLinearRegressor] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_pure_julia" = "`true`" +":package_name" = "EvoLinear" +":package_license" = "MIT" +":load_path" = "EvoLinear.EvoLinearRegressor" +":package_uuid" = "ab853011-1780-437f-b4b5-5de6f4777246" +":package_url" = "https://github.com/jeremiedb/EvoLinear.jl" +":is_wrapper" = "`false`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "```\nEvoLinearRegressor(; kwargs...)\n```\n\nA model type for constructing a EvoLinearRegressor, based on [EvoLinear.jl](https://github.com/jeremiedb/EvoLinear.jl), and implementing both an internal API and the MLJ model interface.\n\n# Keyword arguments\n\n * `loss=:mse`: loss function to be minimised. Can be one of:\n\n * `:mse`\n * `:logistic`\n * `:poisson`\n * `:gamma`\n * `:tweedie`\n * `nrounds=10`: maximum number of training rounds.\n * `eta=1`: Learning rate. Typically in the range `[1e-2, 1]`.\n * `L1=0`: Regularization penalty applied by shrinking to 0 weight update if update is < L1. No penalty if update > L1. Results in sparse feature selection. Typically in the `[0, 1]` range on normalized features.\n * `L2=0`: Regularization penalty applied to the squared of the weight update value. Restricts large parameter values. Typically in the `[0, 1]` range on normalized features.\n * `rng=123`: random seed. Not used at the moment.\n * `updater=:all`: training method. Only `:all` is supported at the moment. Gradients for each feature are computed simultaneously, then bias is updated based on all features update.\n * `device=:cpu`: Only `:cpu` is supported at the moment.\n\n# Internal API\n\nDo `config = EvoLinearRegressor()` to construct an hyper-parameter struct with default hyper-parameters. Provide keyword arguments as listed above to override defaults, for example:\n\n```julia\nEvoLinearRegressor(loss=:logistic, L1=1e-3, L2=1e-2, nrounds=100)\n```\n\n## Training model\n\nA model is built using [`fit`](@ref):\n\n```julia\nconfig = EvoLinearRegressor()\nm = fit(config; x, y, w)\n```\n\n## Inference\n\nFitted results is an `EvoLinearModel` which acts as a prediction function when passed a features matrix as argument. \n\n```julia\npreds = m(x)\n```\n\n# MLJ Interface\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoLinearRegressor = @load EvoLinearRegressor pkg=EvoLinear\n```\n\nDo `model = EvoLinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoLinearRegressor(loss=...)`.\n\n## Training model\n\nIn MLJ or MLJBase, bind an instance `model` to data with `mach = machine(model, X, y)` where: \n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given\n\nfeatures `Xnew` having the same scitype as `X` above. Predictions are deterministic.\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `:fitresult`: the `EvoLinearModel` object returned by EvoLnear.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n * `:coef`: Vector of coefficients (βs) associated to each of the features.\n * `:bias`: Value of the bias.\n * `:names`: Names of each of the features.\n" +":name" = "EvoLinearRegressor" +":human_name" = "evo linear regressor" +":is_supervised" = "`true`" +":prediction_type" = ":deterministic" +":abstract_type" = "`MLJModelInterface.Deterministic`" +":implemented_methods" = [":fit", ":predict", ":update"] +":hyperparameters" = "`(:updater, :nrounds, :eta, :L1, :L2, :rng, :device)`" +":hyperparameter_types" = "`(\"Symbol\", \"Int64\", \"Any\", \"Any\", \"Any\", \"Any\", \"Symbol\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":iteration_parameter" = ":nrounds" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`nothing`" + +[MLJText.TfidfTransformer] +":input_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" +":output_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" +":inverse_transform_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" +":is_pure_julia" = "`true`" +":package_name" = "MLJText" +":package_license" = "MIT" +":load_path" = "MLJText.TfidfTransformer" +":package_uuid" = "7876af07-990d-54b4-ab0e-23690620f79a" +":package_url" = "https://github.com/JuliaAI/MLJText.jl" +":is_wrapper" = "`false`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "```\nTfidfTransformer\n```\n\nA model type for constructing a TF-IFD transformer, based on [MLJText.jl](https://github.com/JuliaAI/MLJText.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nTfidfTransformer = @load TfidfTransformer pkg=MLJText\n```\n\nDo `model = TfidfTransformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `TfidfTransformer(max_doc_freq=...)`.\n\nThe transformer converts a collection of documents, tokenized or pre-parsed as bags of words/ngrams, to a matrix of [TF-IDF scores](https://en.wikipedia.org/wiki/Tf–idf#Inverse_document_frequency_2). Here \"TF\" means term-frequency while \"IDF\" means inverse document frequency (defined below). The TF-IDF score is the product of the two. This is a common term weighting scheme in information retrieval, that has also found good use in document classification. The goal of using TF-IDF instead of the raw frequencies of occurrence of a token in a given document is to scale down the impact of tokens that occur very frequently in a given corpus and that are hence empirically less informative than features that occur in a small fraction of the training corpus.\n\nIn textbooks and implementations there is variation in the definition of IDF. Here two IDF definitions are available. The default, smoothed option provides the IDF for a term `t` as `log((1 + n)/(1 + df(t))) + 1`, where `n` is the total number of documents and `df(t)` the number of documents in which `t` appears. Setting `smooth_df = false` provides an IDF of `log(n/df(t)) + 1`.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any vector whose elements are either tokenized documents or bags of words/ngrams. Specifically, each element is one of the following:\n\n * A vector of abstract strings (tokens), e.g., `[\"I\", \"like\", \"Sam\", \".\", \"Sam\", \"is\", \"nice\", \".\"]` (scitype `AbstractVector{Textual}`)\n * A dictionary of counts, indexed on abstract strings, e.g., `Dict(\"I\"=>1, \"Sam\"=>2, \"Sam is\"=>1)` (scitype `Multiset{Textual}}`)\n * A dictionary of counts, indexed on plain ngrams, e.g., `Dict((\"I\",)=>1, (\"Sam\",)=>2, (\"I\", \"Sam\")=>1)` (scitype `Multiset{<:NTuple{N,Textual} where N}`); here a *plain ngram* is a tuple of abstract strings.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `max_doc_freq=1.0`: Restricts the vocabulary that the transformer will consider. Terms that occur in `> max_doc_freq` documents will not be considered by the transformer. For example, if `max_doc_freq` is set to 0.9, terms that are in more than 90% of the documents will be removed.\n * `min_doc_freq=0.0`: Restricts the vocabulary that the transformer will consider. Terms that occur in `< max_doc_freq` documents will not be considered by the transformer. A value of 0.01 means that only terms that are at least in 1% of the documents will be included.\n * `smooth_idf=true`: Control which definition of IDF to use (see above).\n\n# Operations\n\n * `transform(mach, Xnew)`: Based on the vocabulary and IDF learned in training, return the matrix of TF-IDF scores for `Xnew`, a vector of the same form as `X` above. The matrix has size `(n, p)`, where `n = length(Xnew)` and `p` the size of the vocabulary. Tokens/ngrams not appearing in the learned vocabulary are scored zero.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `vocab`: A vector containing the strings used in the transformer's vocabulary.\n * `idf_vector`: The transformer's calculated IDF vector.\n\n# Examples\n\n`TfidfTransformer` accepts a variety of inputs. The example below transforms tokenized documents:\n\n```julia\nusing MLJ\nimport TextAnalysis\n\nTfidfTransformer = @load TfidfTransformer pkg=MLJText\n\ndocs = [\"Hi my name is Sam.\", \"How are you today?\"]\ntfidf_transformer = TfidfTransformer()\n\njulia> tokenized_docs = TextAnalysis.tokenize.(docs)\n2-element Vector{Vector{String}}:\n [\"Hi\", \"my\", \"name\", \"is\", \"Sam\", \".\"]\n [\"How\", \"are\", \"you\", \"today\", \"?\"]\n\nmach = machine(tfidf_transformer, tokenized_docs)\nfit!(mach)\n\nfitted_params(mach)\n\ntfidf_mat = transform(mach, tokenized_docs)\n```\n\nAlternatively, one can provide documents pre-parsed as ngrams counts:\n\n```julia\nusing MLJ\nimport TextAnalysis\n\ndocs = [\"Hi my name is Sam.\", \"How are you today?\"]\ncorpus = TextAnalysis.Corpus(TextAnalysis.NGramDocument.(docs, 1, 2))\nngram_docs = TextAnalysis.ngrams.(corpus)\n\njulia> ngram_docs[1]\nDict{AbstractString, Int64} with 11 entries:\n \"is\" => 1\n \"my\" => 1\n \"name\" => 1\n \".\" => 1\n \"Hi\" => 1\n \"Sam\" => 1\n \"my name\" => 1\n \"Hi my\" => 1\n \"name is\" => 1\n \"Sam .\" => 1\n \"is Sam\" => 1\n\ntfidf_transformer = TfidfTransformer()\nmach = machine(tfidf_transformer, ngram_docs)\nMLJ.fit!(mach)\nfitted_params(mach)\n\ntfidf_mat = transform(mach, ngram_docs)\n```\n\nSee also [`CountTransformer`](@ref), [`BM25Transformer`](@ref)\n" +":name" = "TfidfTransformer" +":human_name" = "TF-IFD transformer" +":is_supervised" = "`false`" +":prediction_type" = ":unknown" +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":implemented_methods" = [":fitted_params"] +":hyperparameters" = "`(:max_doc_freq, :min_doc_freq, :smooth_idf)`" +":hyperparameter_types" = "`(\"Float64\", \"Float64\", \"Bool\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`nothing`" + +[MLJText.CountTransformer] +":input_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" +":output_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" +":inverse_transform_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" +":is_pure_julia" = "`true`" +":package_name" = "MLJText" +":package_license" = "MIT" +":load_path" = "MLJText.CountTransformer" +":package_uuid" = "7876af07-990d-54b4-ab0e-23690620f79a" +":package_url" = "https://github.com/JuliaAI/MLJText.jl" +":is_wrapper" = "`false`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "```\nCountTransformer\n```\n\nA model type for constructing a count transformer, based on [MLJText.jl](https://github.com/JuliaAI/MLJText.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nCountTransformer = @load CountTransformer pkg=MLJText\n```\n\nDo `model = CountTransformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `CountTransformer(max_doc_freq=...)`.\n\nThe transformer converts a collection of documents, tokenized or pre-parsed as bags of words/ngrams, to a matrix of term counts.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any vector whose elements are either tokenized documents or bags of words/ngrams. Specifically, each element is one of the following:\n\n * A vector of abstract strings (tokens), e.g., `[\"I\", \"like\", \"Sam\", \".\", \"Sam\", \"is\", \"nice\", \".\"]` (scitype `AbstractVector{Textual}`)\n * A dictionary of counts, indexed on abstract strings, e.g., `Dict(\"I\"=>1, \"Sam\"=>2, \"Sam is\"=>1)` (scitype `Multiset{Textual}}`)\n * A dictionary of counts, indexed on plain ngrams, e.g., `Dict((\"I\",)=>1, (\"Sam\",)=>2, (\"I\", \"Sam\")=>1)` (scitype `Multiset{<:NTuple{N,Textual} where N}`); here a *plain ngram* is a tuple of abstract strings.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `max_doc_freq=1.0`: Restricts the vocabulary that the transformer will consider. Terms that occur in `> max_doc_freq` documents will not be considered by the transformer. For example, if `max_doc_freq` is set to 0.9, terms that are in more than 90% of the documents will be removed.\n * `min_doc_freq=0.0`: Restricts the vocabulary that the transformer will consider. Terms that occur in `< max_doc_freq` documents will not be considered by the transformer. A value of 0.01 means that only terms that are at least in 1% of the documents will be included.\n\n# Operations\n\n * `transform(mach, Xnew)`: Based on the vocabulary learned in training, return the matrix of counts for `Xnew`, a vector of the same form as `X` above. The matrix has size `(n, p)`, where `n = length(Xnew)` and `p` the size of the vocabulary. Tokens/ngrams not appearing in the learned vocabulary are scored zero.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `vocab`: A vector containing the string used in the transformer's vocabulary.\n\n# Examples\n\n`CountTransformer` accepts a variety of inputs. The example below transforms tokenized documents:\n\n```julia\nusing MLJ\nimport TextAnalysis\n\nCountTransformer = @load CountTransformer pkg=MLJText\n\ndocs = [\"Hi my name is Sam.\", \"How are you today?\"]\ncount_transformer = CountTransformer()\n\njulia> tokenized_docs = TextAnalysis.tokenize.(docs)\n2-element Vector{Vector{String}}:\n [\"Hi\", \"my\", \"name\", \"is\", \"Sam\", \".\"]\n [\"How\", \"are\", \"you\", \"today\", \"?\"]\n\nmach = machine(count_transformer, tokenized_docs)\nfit!(mach)\n\nfitted_params(mach)\n\ntfidf_mat = transform(mach, tokenized_docs)\n```\n\nAlternatively, one can provide documents pre-parsed as ngrams counts:\n\n```julia\nusing MLJ\nimport TextAnalysis\n\ndocs = [\"Hi my name is Sam.\", \"How are you today?\"]\ncorpus = TextAnalysis.Corpus(TextAnalysis.NGramDocument.(docs, 1, 2))\nngram_docs = TextAnalysis.ngrams.(corpus)\n\njulia> ngram_docs[1]\nDict{AbstractString, Int64} with 11 entries:\n \"is\" => 1\n \"my\" => 1\n \"name\" => 1\n \".\" => 1\n \"Hi\" => 1\n \"Sam\" => 1\n \"my name\" => 1\n \"Hi my\" => 1\n \"name is\" => 1\n \"Sam .\" => 1\n \"is Sam\" => 1\n\ncount_transformer = CountTransformer()\nmach = machine(count_transformer, ngram_docs)\nMLJ.fit!(mach)\nfitted_params(mach)\n\ntfidf_mat = transform(mach, ngram_docs)\n```\n\nSee also [`TfidfTransformer`](@ref), [`BM25Transformer`](@ref)\n" +":name" = "CountTransformer" +":human_name" = "count transformer" +":is_supervised" = "`false`" +":prediction_type" = ":unknown" +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":implemented_methods" = [":fitted_params"] +":hyperparameters" = "`(:max_doc_freq, :min_doc_freq)`" +":hyperparameter_types" = "`(\"Float64\", \"Float64\")`" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`nothing`" + +[MLJText.BM25Transformer] +":input_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" +":output_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" +":inverse_transform_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" +":is_pure_julia" = "`true`" +":package_name" = "MLJText" +":package_license" = "MIT" +":load_path" = "MLJText.BM25Transformer" +":package_uuid" = "7876af07-990d-54b4-ab0e-23690620f79a" +":package_url" = "https://github.com/JuliaAI/MLJText.jl" +":is_wrapper" = "`false`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "```\nBM25Transformer\n```\n\nA model type for constructing a b m25 transformer, based on [MLJText.jl](https://github.com/JuliaAI/MLJText.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nBM25Transformer = @load BM25Transformer pkg=MLJText\n```\n\nDo `model = BM25Transformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `BM25Transformer(max_doc_freq=...)`.\n\nThe transformer converts a collection of documents, tokenized or pre-parsed as bags of words/ngrams, to a matrix of [Okapi BM25 document-word statistics](https://en.wikipedia.org/wiki/Okapi_BM25). The BM25 scoring function uses both term frequency (TF) and inverse document frequency (IDF, defined below), as in [`TfidfTransformer`](@ref), but additionally adjusts for the probability that a user will consider a search result relevant based, on the terms in the search query and those in each document.\n\nIn textbooks and implementations there is variation in the definition of IDF. Here two IDF definitions are available. The default, smoothed option provides the IDF for a term `t` as `log((1 + n)/(1 + df(t))) + 1`, where `n` is the total number of documents and `df(t)` the number of documents in which `t` appears. Setting `smooth_df = false` provides an IDF of `log(n/df(t)) + 1`.\n\nReferences:\n\n * http://ethen8181.github.io/machine-learning/search/bm25_intro.html\n * https://en.wikipedia.org/wiki/Okapi_BM25\n * https://nlp.stanford.edu/IR-book/html/htmledition/okapi-bm25-a-non-binary-model-1.html\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any vector whose elements are either tokenized documents or bags of words/ngrams. Specifically, each element is one of the following:\n\n * A vector of abstract strings (tokens), e.g., `[\"I\", \"like\", \"Sam\", \".\", \"Sam\", \"is\", \"nice\", \".\"]` (scitype `AbstractVector{Textual}`)\n * A dictionary of counts, indexed on abstract strings, e.g., `Dict(\"I\"=>1, \"Sam\"=>2, \"Sam is\"=>1)` (scitype `Multiset{Textual}}`)\n * A dictionary of counts, indexed on plain ngrams, e.g., `Dict((\"I\",)=>1, (\"Sam\",)=>2, (\"I\", \"Sam\")=>1)` (scitype `Multiset{<:NTuple{N,Textual} where N}`); here a *plain ngram* is a tuple of abstract strings.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `max_doc_freq=1.0`: Restricts the vocabulary that the transformer will consider. Terms that occur in `> max_doc_freq` documents will not be considered by the transformer. For example, if `max_doc_freq` is set to 0.9, terms that are in more than 90% of the documents will be removed.\n * `min_doc_freq=0.0`: Restricts the vocabulary that the transformer will consider. Terms that occur in `< max_doc_freq` documents will not be considered by the transformer. A value of 0.01 means that only terms that are at least in 1% of the documents will be included.\n * `κ=2`: The term frequency saturation characteristic. Higher values represent slower saturation. What we mean by saturation is the degree to which a term occurring extra times adds to the overall score.\n * `β=0.075`: Amplifies the particular document length compared to the average length. The bigger β is, the more document length is amplified in terms of the overall score. The default value is 0.75, and the bounds are restricted between 0 and 1.\n * `smooth_idf=true`: Control which definition of IDF to use (see above).\n\n# Operations\n\n * `transform(mach, Xnew)`: Based on the vocabulary, IDF, and mean word counts learned in training, return the matrix of BM25 scores for `Xnew`, a vector of the same form as `X` above. The matrix has size `(n, p)`, where `n = length(Xnew)` and `p` the size of the vocabulary. Tokens/ngrams not appearing in the learned vocabulary are scored zero.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `vocab`: A vector containing the string used in the transformer's vocabulary.\n * `idf_vector`: The transformer's calculated IDF vector.\n * `mean_words_in_docs`: The mean number of words in each document.\n\n# Examples\n\n`BM25Transformer` accepts a variety of inputs. The example below transforms tokenized documents:\n\n```julia\nusing MLJ\nimport TextAnalysis\n\nBM25Transformer = @load BM25Transformer pkg=MLJText\n\ndocs = [\"Hi my name is Sam.\", \"How are you today?\"]\nbm25_transformer = BM25Transformer()\n\njulia> tokenized_docs = TextAnalysis.tokenize.(docs)\n2-element Vector{Vector{String}}:\n [\"Hi\", \"my\", \"name\", \"is\", \"Sam\", \".\"]\n [\"How\", \"are\", \"you\", \"today\", \"?\"]\n\nmach = machine(bm25_transformer, tokenized_docs)\nfit!(mach)\n\nfitted_params(mach)\n\ntfidf_mat = transform(mach, tokenized_docs)\n```\n\nAlternatively, one can provide documents pre-parsed as ngrams counts:\n\n```julia\nusing MLJ\nimport TextAnalysis\n\ndocs = [\"Hi my name is Sam.\", \"How are you today?\"]\ncorpus = TextAnalysis.Corpus(TextAnalysis.NGramDocument.(docs, 1, 2))\nngram_docs = TextAnalysis.ngrams.(corpus)\n\njulia> ngram_docs[1]\nDict{AbstractString, Int64} with 11 entries:\n \"is\" => 1\n \"my\" => 1\n \"name\" => 1\n \".\" => 1\n \"Hi\" => 1\n \"Sam\" => 1\n \"my name\" => 1\n \"Hi my\" => 1\n \"name is\" => 1\n \"Sam .\" => 1\n \"is Sam\" => 1\n\nbm25_transformer = BM25Transformer()\nmach = machine(bm25_transformer, ngram_docs)\nMLJ.fit!(mach)\nfitted_params(mach)\n\ntfidf_mat = transform(mach, ngram_docs)\n```\n\nSee also [`TfidfTransformer`](@ref), [`CountTransformer`](@ref)\n" +":name" = "BM25Transformer" +":human_name" = "b m25 transformer" +":is_supervised" = "`false`" +":prediction_type" = ":unknown" +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":implemented_methods" = [":fitted_params"] +":hyperparameters" = "`(:max_doc_freq, :min_doc_freq, :κ, :β, :smooth_idf)`" +":hyperparameter_types" = "`(\"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Bool\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`nothing`" + +[LightGBM.LGBMClassifier] +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_pure_julia" = "`false`" +":package_name" = "LightGBM" +":package_license" = "MIT Expat" +":load_path" = "LightGBM.MLJInterface.LGBMClassifier" +":package_uuid" = "7acf609c-83a4-11e9-1ffb-b912bcd3b04a" +":package_url" = "https://github.com/IQVIA-ML/LightGBM.jl" ":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":supports_weights" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nEvoSplineRegressor(; kwargs...)\n```\n\nA model type for constructing a EvoSplineRegressor, based on [EvoLinear.jl](https://github.com/jeremiedb/EvoLinear.jl), and implementing both an internal API and the MLJ model interface.\n\n# Keyword arguments\n\n * `loss=:mse`: loss function to be minimised. Can be one of:\n\n * `:mse`\n * `:logistic`\n * `:poisson`\n * `:gamma`\n * `:tweedie`\n * `nrounds=10`: maximum number of training rounds.\n * `eta=1`: Learning rate. Typically in the range `[1e-2, 1]`.\n * `L1=0`: Regularization penalty applied by shrinking to 0 weight update if update is < L1. No penalty if update > L1. Results in sparse feature selection. Typically in the `[0, 1]` range on normalized features.\n * `L2=0`: Regularization penalty applied to the squared of the weight update value. Restricts large parameter values. Typically in the `[0, 1]` range on normalized features.\n * `rng=123`: random seed. Not used at the moment.\n * `updater=:all`: training method. Only `:all` is supported at the moment. Gradients for each feature are computed simultaneously, then bias is updated based on all features update.\n * `device=:cpu`: Only `:cpu` is supported at the moment.\n\n# Internal API\n\nDo `config = EvoSplineRegressor()` to construct an hyper-parameter struct with default hyper-parameters. Provide keyword arguments as listed above to override defaults, for example:\n\n```julia\nEvoSplineRegressor(loss=:logistic, L1=1e-3, L2=1e-2, nrounds=100)\n```\n\n## Training model\n\nA model is built using [`fit`](@ref):\n\n```julia\nconfig = EvoSplineRegressor()\nm = fit(config; x, y, w)\n```\n\n## Inference\n\nFitted results is an `EvoLinearModel` which acts as a prediction function when passed a features matrix as argument. \n\n```julia\npreds = m(x)\n```\n\n# MLJ Interface\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoSplineRegressor = @load EvoSplineRegressor pkg=EvoLinear\n```\n\nDo `model = EvoLinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoSplineRegressor(loss=...)`.\n\n## Training model\n\nIn MLJ or MLJBase, bind an instance `model` to data with `mach = machine(model, X, y)` where: \n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given\n\nfeatures `Xnew` having the same scitype as `X` above. Predictions are deterministic.\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `:fitresult`: the `SplineModel` object returned by EvoSplineRegressor fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n * `:coef`: Vector of coefficients (βs) associated to each of the features.\n * `:bias`: Value of the bias.\n * `:names`: Names of each of the features.\n" -":name" = "EvoSplineRegressor" -":human_name" = "evo spline regressor" +":docstring" = "Microsoft LightGBM FFI wrapper: Classifier" +":name" = "LGBMClassifier" +":human_name" = "lgbm classifier" ":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":fit", ":predict", ":update"] -":hyperparameters" = "`(:nrounds, :opt, :batchsize, :act, :eta, :L2, :knots, :rng, :device)`" -":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Int64\", \"Symbol\", \"Any\", \"Any\", \"Union{Nothing, Dict}\", \"Any\", \"Symbol\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = ":nrounds" +":prediction_type" = ":probabilistic" +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":implemented_methods" = [":clean!", ":fit", ":predict", ":update"] +":hyperparameters" = "`(:boosting, :num_iterations, :learning_rate, :num_leaves, :max_depth, :tree_learner, :histogram_pool_size, :min_data_in_leaf, :min_sum_hessian_in_leaf, :max_delta_step, :lambda_l1, :lambda_l2, :min_gain_to_split, :feature_fraction, :feature_fraction_bynode, :feature_fraction_seed, :bagging_fraction, :pos_bagging_fraction, :neg_bagging_fraction, :bagging_freq, :bagging_seed, :early_stopping_round, :extra_trees, :extra_seed, :max_bin, :bin_construct_sample_cnt, :init_score, :drop_rate, :max_drop, :skip_drop, :xgboost_dart_mode, :uniform_drop, :drop_seed, :top_rate, :other_rate, :min_data_per_group, :max_cat_threshold, :cat_l2, :cat_smooth, :objective, :categorical_feature, :data_random_seed, :is_sparse, :is_unbalance, :boost_from_average, :scale_pos_weight, :use_missing, :linear_tree, :feature_pre_filter, :metric, :metric_freq, :is_training_metric, :ndcg_at, :num_machines, :num_threads, :local_listen_port, :time_out, :machine_list_file, :save_binary, :device_type, :gpu_use_dp, :gpu_platform_id, :gpu_device_id, :num_gpu, :force_col_wise, :force_row_wise, :truncate_booster)`" +":hyperparameter_types" = "`(\"String\", \"Int64\", \"Float64\", \"Int64\", \"Int64\", \"String\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Int64\", \"Bool\", \"Int64\", \"Int64\", \"Any\", \"String\", \"Float64\", \"Int64\", \"Float64\", \"Bool\", \"Bool\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"String\", \"Vector{Int64}\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Any\", \"Bool\", \"Bool\", \"Bool\", \"Vector{String}\", \"Int64\", \"Bool\", \"Vector{Int64}\", \"Int64\", \"Int64\", \"Int64\", \"Int64\", \"String\", \"Bool\", \"String\", \"Bool\", \"Int64\", \"Int64\", \"Int64\", \"Bool\", \"Bool\", \"Bool\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" ":reporting_operations" = "`()`" ":constructor" = "`nothing`" -[EvoLinear.EvoLinearRegressor] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +[LightGBM.LGBMRegressor] +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_pure_julia" = "`true`" -":package_name" = "EvoLinear" -":package_license" = "MIT" -":load_path" = "EvoLinear.EvoLinearRegressor" -":package_uuid" = "ab853011-1780-437f-b4b5-5de6f4777246" -":package_url" = "https://github.com/jeremiedb/EvoLinear.jl" +":is_pure_julia" = "`false`" +":package_name" = "LightGBM" +":package_license" = "MIT Expat" +":load_path" = "LightGBM.MLJInterface.LGBMRegressor" +":package_uuid" = "7acf609c-83a4-11e9-1ffb-b912bcd3b04a" +":package_url" = "https://github.com/IQVIA-ML/LightGBM.jl" ":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":supports_weights" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nEvoLinearRegressor(; kwargs...)\n```\n\nA model type for constructing a EvoLinearRegressor, based on [EvoLinear.jl](https://github.com/jeremiedb/EvoLinear.jl), and implementing both an internal API and the MLJ model interface.\n\n# Keyword arguments\n\n * `loss=:mse`: loss function to be minimised. Can be one of:\n\n * `:mse`\n * `:logistic`\n * `:poisson`\n * `:gamma`\n * `:tweedie`\n * `nrounds=10`: maximum number of training rounds.\n * `eta=1`: Learning rate. Typically in the range `[1e-2, 1]`.\n * `L1=0`: Regularization penalty applied by shrinking to 0 weight update if update is < L1. No penalty if update > L1. Results in sparse feature selection. Typically in the `[0, 1]` range on normalized features.\n * `L2=0`: Regularization penalty applied to the squared of the weight update value. Restricts large parameter values. Typically in the `[0, 1]` range on normalized features.\n * `rng=123`: random seed. Not used at the moment.\n * `updater=:all`: training method. Only `:all` is supported at the moment. Gradients for each feature are computed simultaneously, then bias is updated based on all features update.\n * `device=:cpu`: Only `:cpu` is supported at the moment.\n\n# Internal API\n\nDo `config = EvoLinearRegressor()` to construct an hyper-parameter struct with default hyper-parameters. Provide keyword arguments as listed above to override defaults, for example:\n\n```julia\nEvoLinearRegressor(loss=:logistic, L1=1e-3, L2=1e-2, nrounds=100)\n```\n\n## Training model\n\nA model is built using [`fit`](@ref):\n\n```julia\nconfig = EvoLinearRegressor()\nm = fit(config; x, y, w)\n```\n\n## Inference\n\nFitted results is an `EvoLinearModel` which acts as a prediction function when passed a features matrix as argument. \n\n```julia\npreds = m(x)\n```\n\n# MLJ Interface\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoLinearRegressor = @load EvoLinearRegressor pkg=EvoLinear\n```\n\nDo `model = EvoLinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoLinearRegressor(loss=...)`.\n\n## Training model\n\nIn MLJ or MLJBase, bind an instance `model` to data with `mach = machine(model, X, y)` where: \n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given\n\nfeatures `Xnew` having the same scitype as `X` above. Predictions are deterministic.\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `:fitresult`: the `EvoLinearModel` object returned by EvoLnear.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n * `:coef`: Vector of coefficients (βs) associated to each of the features.\n * `:bias`: Value of the bias.\n * `:names`: Names of each of the features.\n" -":name" = "EvoLinearRegressor" -":human_name" = "evo linear regressor" +":docstring" = "Microsoft LightGBM FFI wrapper: Regressor" +":name" = "LGBMRegressor" +":human_name" = "lgbm regressor" ":is_supervised" = "`true`" ":prediction_type" = ":deterministic" ":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":fit", ":predict", ":update"] -":hyperparameters" = "`(:updater, :nrounds, :eta, :L1, :L2, :rng, :device)`" -":hyperparameter_types" = "`(\"Symbol\", \"Int64\", \"Any\", \"Any\", \"Any\", \"Any\", \"Symbol\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = ":nrounds" +":implemented_methods" = [":clean!", ":fit", ":predict", ":update"] +":hyperparameters" = "`(:boosting, :num_iterations, :learning_rate, :num_leaves, :max_depth, :tree_learner, :histogram_pool_size, :min_data_in_leaf, :min_sum_hessian_in_leaf, :max_delta_step, :lambda_l1, :lambda_l2, :min_gain_to_split, :feature_fraction, :feature_fraction_bynode, :feature_fraction_seed, :bagging_fraction, :bagging_freq, :bagging_seed, :early_stopping_round, :extra_trees, :extra_seed, :max_bin, :bin_construct_sample_cnt, :init_score, :drop_rate, :max_drop, :skip_drop, :xgboost_dart_mode, :uniform_drop, :drop_seed, :top_rate, :other_rate, :min_data_per_group, :max_cat_threshold, :cat_l2, :cat_smooth, :objective, :categorical_feature, :data_random_seed, :is_sparse, :is_unbalance, :boost_from_average, :use_missing, :linear_tree, :feature_pre_filter, :alpha, :metric, :metric_freq, :is_training_metric, :ndcg_at, :num_machines, :num_threads, :local_listen_port, :time_out, :machine_list_file, :save_binary, :device_type, :gpu_use_dp, :gpu_platform_id, :gpu_device_id, :num_gpu, :force_col_wise, :force_row_wise, :truncate_booster)`" +":hyperparameter_types" = "`(\"String\", \"Int64\", \"Float64\", \"Int64\", \"Int64\", \"String\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Int64\", \"Int64\", \"Int64\", \"Bool\", \"Int64\", \"Int64\", \"Any\", \"String\", \"Float64\", \"Int64\", \"Float64\", \"Bool\", \"Bool\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"String\", \"Vector{Int64}\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Float64\", \"Vector{String}\", \"Int64\", \"Bool\", \"Vector{Int64}\", \"Int64\", \"Int64\", \"Int64\", \"Int64\", \"String\", \"Bool\", \"String\", \"Bool\", \"Int64\", \"Int64\", \"Int64\", \"Bool\", \"Bool\", \"Bool\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" @@ -6544,76 +6194,6 @@ ":reporting_operations" = "`()`" ":constructor" = "`nothing`" -[LightGBM.LGBMClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_pure_julia" = "`false`" -":package_name" = "LightGBM" -":package_license" = "MIT Expat" -":load_path" = "LightGBM.MLJInterface.LGBMClassifier" -":package_uuid" = "7acf609c-83a4-11e9-1ffb-b912bcd3b04a" -":package_url" = "https://github.com/IQVIA-ML/LightGBM.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`true`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = "Microsoft LightGBM FFI wrapper: Classifier" -":name" = "LGBMClassifier" -":human_name" = "lgbm classifier" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":clean!", ":fit", ":predict", ":update"] -":hyperparameters" = "`(:boosting, :num_iterations, :learning_rate, :num_leaves, :max_depth, :tree_learner, :histogram_pool_size, :min_data_in_leaf, :min_sum_hessian_in_leaf, :max_delta_step, :lambda_l1, :lambda_l2, :min_gain_to_split, :feature_fraction, :feature_fraction_bynode, :feature_fraction_seed, :bagging_fraction, :pos_bagging_fraction, :neg_bagging_fraction, :bagging_freq, :bagging_seed, :early_stopping_round, :extra_trees, :extra_seed, :max_bin, :bin_construct_sample_cnt, :init_score, :drop_rate, :max_drop, :skip_drop, :xgboost_dart_mode, :uniform_drop, :drop_seed, :top_rate, :other_rate, :min_data_per_group, :max_cat_threshold, :cat_l2, :cat_smooth, :objective, :categorical_feature, :data_random_seed, :is_sparse, :is_unbalance, :boost_from_average, :scale_pos_weight, :use_missing, :linear_tree, :feature_pre_filter, :metric, :metric_freq, :is_training_metric, :ndcg_at, :num_machines, :num_threads, :local_listen_port, :time_out, :machine_list_file, :save_binary, :device_type, :gpu_use_dp, :gpu_platform_id, :gpu_device_id, :num_gpu, :force_col_wise, :force_row_wise, :truncate_booster)`" -":hyperparameter_types" = "`(\"String\", \"Int64\", \"Float64\", \"Int64\", \"Int64\", \"String\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Int64\", \"Bool\", \"Int64\", \"Int64\", \"Any\", \"String\", \"Float64\", \"Int64\", \"Float64\", \"Bool\", \"Bool\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"String\", \"Vector{Int64}\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Any\", \"Bool\", \"Bool\", \"Bool\", \"Vector{String}\", \"Int64\", \"Bool\", \"Vector{Int64}\", \"Int64\", \"Int64\", \"Int64\", \"Int64\", \"String\", \"Bool\", \"String\", \"Bool\", \"Int64\", \"Int64\", \"Int64\", \"Bool\", \"Bool\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" - -[LightGBM.LGBMRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_pure_julia" = "`false`" -":package_name" = "LightGBM" -":package_license" = "MIT Expat" -":load_path" = "LightGBM.MLJInterface.LGBMRegressor" -":package_uuid" = "7acf609c-83a4-11e9-1ffb-b912bcd3b04a" -":package_url" = "https://github.com/IQVIA-ML/LightGBM.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`true`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = "Microsoft LightGBM FFI wrapper: Regressor" -":name" = "LGBMRegressor" -":human_name" = "lgbm regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":predict", ":update"] -":hyperparameters" = "`(:boosting, :num_iterations, :learning_rate, :num_leaves, :max_depth, :tree_learner, :histogram_pool_size, :min_data_in_leaf, :min_sum_hessian_in_leaf, :max_delta_step, :lambda_l1, :lambda_l2, :min_gain_to_split, :feature_fraction, :feature_fraction_bynode, :feature_fraction_seed, :bagging_fraction, :bagging_freq, :bagging_seed, :early_stopping_round, :extra_trees, :extra_seed, :max_bin, :bin_construct_sample_cnt, :init_score, :drop_rate, :max_drop, :skip_drop, :xgboost_dart_mode, :uniform_drop, :drop_seed, :top_rate, :other_rate, :min_data_per_group, :max_cat_threshold, :cat_l2, :cat_smooth, :objective, :categorical_feature, :data_random_seed, :is_sparse, :is_unbalance, :boost_from_average, :use_missing, :linear_tree, :feature_pre_filter, :alpha, :metric, :metric_freq, :is_training_metric, :ndcg_at, :num_machines, :num_threads, :local_listen_port, :time_out, :machine_list_file, :save_binary, :device_type, :gpu_use_dp, :gpu_platform_id, :gpu_device_id, :num_gpu, :force_col_wise, :force_row_wise, :truncate_booster)`" -":hyperparameter_types" = "`(\"String\", \"Int64\", \"Float64\", \"Int64\", \"Int64\", \"String\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Int64\", \"Int64\", \"Int64\", \"Bool\", \"Int64\", \"Int64\", \"Any\", \"String\", \"Float64\", \"Int64\", \"Float64\", \"Bool\", \"Bool\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"String\", \"Vector{Int64}\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Float64\", \"Vector{String}\", \"Int64\", \"Bool\", \"Vector{Int64}\", \"Int64\", \"Int64\", \"Int64\", \"Int64\", \"String\", \"Bool\", \"String\", \"Bool\", \"Int64\", \"Int64\", \"Int64\", \"Bool\", \"Bool\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" - [SymbolicRegression.MultitargetSRRegressor] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" @@ -6636,117 +6216,12 @@ ":name" = "MultitargetSRRegressor" ":human_name" = "Multi-Target Symbolic Regression via Evolutionary Search" ":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [] -":hyperparameters" = "`(:binary_operators, :unary_operators, :constraints, :elementwise_loss, :loss_function, :tournament_selection_n, :tournament_selection_p, :topn, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :parsimony, :dimensional_constraint_penalty, :alpha, :maxsize, :maxdepth, :turbo, :migration, :hof_migration, :should_simplify, :should_optimize_constants, :output_file, :populations, :perturbation_factor, :annealing, :batching, :batch_size, :mutation_weights, :crossover_probability, :warmup_maxsize_by, :use_frequency, :use_frequency_in_tournament, :adaptive_parsimony_scaling, :population_size, :ncycles_per_iteration, :fraction_replaced, :fraction_replaced_hof, :verbosity, :print_precision, :save_to_file, :probability_negate_constant, :seed, :bin_constraints, :una_constraints, :progress, :terminal_width, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_options, :val_recorder, :recorder_file, :early_stop_condition, :timeout_in_seconds, :max_evals, :skip_mutation_failures, :enable_autodiff, :nested_constraints, :deterministic, :define_helper_functions, :fast_cycle, :npopulations, :npop, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :heap_size_hint_in_bytes, :runtests, :loss_type, :selection_method, :dimensions_type)`" -":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Integer\", \"Real\", \"Integer\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Real\", \"Union{Nothing, Real}\", \"Real\", \"Integer\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Bool\", \"Union{Nothing, AbstractString}\", \"Integer\", \"Real\", \"Bool\", \"Bool\", \"Integer\", \"Union{SymbolicRegression.CoreModule.OptionsStructModule.MutationWeights, NamedTuple, AbstractVector}\", \"Real\", \"Real\", \"Bool\", \"Bool\", \"Real\", \"Integer\", \"Integer\", \"Real\", \"Real\", \"Union{Nothing, Integer}\", \"Integer\", \"Bool\", \"Real\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"AbstractString\", \"Integer\", \"Real\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Val\", \"AbstractString\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"Any\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Bool\", \"Any\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" - -[SymbolicRegression.SRRegressor] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_pure_julia" = "`true`" -":package_name" = "SymbolicRegression" -":package_license" = "Apache-2.0" -":load_path" = "SymbolicRegression.MLJInterfaceModule.SRRegressor" -":package_uuid" = "8254be44-1295-4e6a-a16d-46603ac705cb" -":package_url" = "https://github.com/MilesCranmer/SymbolicRegression.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`true`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = "```\nSRRegressor\n```\n\nA model type for constructing a Symbolic Regression via Evolutionary Search, based on [SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSRRegressor = @load SRRegressor pkg=SymbolicRegression\n```\n\nDo `model = SRRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SRRegressor(binary_operators=...)`.\n\nSingle-target Symbolic Regression regressor (`SRRegressor`) searches for symbolic expressions that predict a single target variable from a set of input variables. All data is assumed to be `Continuous`. The search is performed using an evolutionary algorithm. This algorithm is described in the paper https://arxiv.org/abs/2305.01582.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nOR\n\n```\nmach = machine(model, X, y, w)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`. Variable names in discovered expressions will be taken from the column names of `X`, if available. Units in columns of `X` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`. Units in `y` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n * `w` is the observation weights which can either be `nothing` (default) or an `AbstractVector` whoose element scitype is `Count` or `Continuous`.\n\nTrain the machine using `fit!(mach)`, inspect the discovered expressions with `report(mach)`, and predict on new data with `predict(mach, Xnew)`. Note that unlike other regressors, symbolic regression stores a list of trained models. The model chosen from this list is defined by the function `selection_method` keyword argument, which by default balances accuracy and complexity. You can override this at prediction time by passing a named tuple with keys `data` and `idx`.\n\n# Hyper-parameters\n\n * `binary_operators`: Vector of binary operators (functions) to use. Each operator should be defined for two input scalars, and one output scalar. All operators need to be defined over the entire real line (excluding infinity - these are stopped before they are input), or return `NaN` where not defined. For speed, define it so it takes two reals of the same type as input, and outputs the same type. For the SymbolicUtils simplification backend, you will need to define a generic method of the operator so it takes arbitrary types.\n * `unary_operators`: Same, but for unary operators (one input scalar, gives an output scalar).\n * `constraints`: Array of pairs specifying size constraints for each operator. The constraints for a binary operator should be a 2-tuple (e.g., `(-1, -1)`) and the constraints for a unary operator should be an `Int`. A size constraint is a limit to the size of the subtree in each argument of an operator. e.g., `[(^)=>(-1, 3)]` means that the `^` operator can have arbitrary size (`-1`) in its left argument, but a maximum size of `3` in its right argument. Default is no constraints.\n * `batching`: Whether to evolve based on small mini-batches of data, rather than the entire dataset.\n * `batch_size`: What batch size to use if using batching.\n * `elementwise_loss`: What elementwise loss function to use. Can be one of the following losses, or any other loss of type `SupervisedLoss`. You can also pass a function that takes a scalar target (left argument), and scalar predicted (right argument), and returns a scalar. This will be averaged over the predicted data. If weights are supplied, your function should take a third argument for the weight scalar. Included losses: Regression: - `LPDistLoss{P}()`, - `L1DistLoss()`, - `L2DistLoss()` (mean square), - `LogitDistLoss()`, - `HuberLoss(d)`, - `L1EpsilonInsLoss(ϵ)`, - `L2EpsilonInsLoss(ϵ)`, - `PeriodicLoss(c)`, - `QuantileLoss(τ)`, Classification: - `ZeroOneLoss()`, - `PerceptronLoss()`, - `L1HingeLoss()`, - `SmoothedL1HingeLoss(γ)`, - `ModifiedHuberLoss()`, - `L2MarginLoss()`, - `ExpLoss()`, - `SigmoidLoss()`, - `DWDMarginLoss(q)`.\n * `loss_function`: Alternatively, you may redefine the loss used as any function of `tree::Node{T}`, `dataset::Dataset{T}`, and `options::Options`, so long as you output a non-negative scalar of type `T`. This is useful if you want to use a loss that takes into account derivatives, or correlations across the dataset. This also means you could use a custom evaluation for a particular expression. If you are using `batching=true`, then your function should accept a fourth argument `idx`, which is either `nothing` (indicating that the full dataset should be used), or a vector of indices to use for the batch. For example,\n\n ```\n function my_loss(tree, dataset::Dataset{T,L}, options)::L where {T,L}\n prediction, flag = eval_tree_array(tree, dataset.X, options)\n if !flag\n return L(Inf)\n end\n return sum((prediction .- dataset.y) .^ 2) / dataset.n\n end\n ```\n * `populations`: How many populations of equations to use.\n * `population_size`: How many equations in each population.\n * `ncycles_per_iteration`: How many generations to consider per iteration.\n * `tournament_selection_n`: Number of expressions considered in each tournament.\n * `tournament_selection_p`: The fittest expression in a tournament is to be selected with probability `p`, the next fittest with probability `p*(1-p)`, and so forth.\n * `topn`: Number of equations to return to the host process, and to consider for the hall of fame.\n * `complexity_of_operators`: What complexity should be assigned to each operator, and the occurrence of a constant or variable. By default, this is 1 for all operators. Can be a real number as well, in which case the complexity of an expression will be rounded to the nearest integer. Input this in the form of, e.g., [(^) => 3, sin => 2].\n * `complexity_of_constants`: What complexity should be assigned to use of a constant. By default, this is 1.\n * `complexity_of_variables`: What complexity should be assigned to each variable. By default, this is 1.\n * `alpha`: The probability of accepting an equation mutation during regularized evolution is given by exp(-delta_loss/(alpha * T)), where T goes from 1 to 0. Thus, alpha=infinite is the same as no annealing.\n * `maxsize`: Maximum size of equations during the search.\n * `maxdepth`: Maximum depth of equations during the search, by default this is set equal to the maxsize.\n * `parsimony`: A multiplicative factor for how much complexity is punished.\n * `dimensional_constraint_penalty`: An additive factor if the dimensional constraint is violated.\n * `use_frequency`: Whether to use a parsimony that adapts to the relative proportion of equations at each complexity; this will ensure that there are a balanced number of equations considered for every complexity.\n * `use_frequency_in_tournament`: Whether to use the adaptive parsimony described above inside the score, rather than just at the mutation accept/reject stage.\n * `adaptive_parsimony_scaling`: How much to scale the adaptive parsimony term in the loss. Increase this if the search is spending too much time optimizing the most complex equations.\n * `turbo`: Whether to use `LoopVectorization.@turbo` to evaluate expressions. This can be significantly faster, but is only compatible with certain operators. *Experimental!*\n * `migration`: Whether to migrate equations between processes.\n * `hof_migration`: Whether to migrate equations from the hall of fame to processes.\n * `fraction_replaced`: What fraction of each population to replace with migrated equations at the end of each cycle.\n * `fraction_replaced_hof`: What fraction to replace with hall of fame equations at the end of each cycle.\n * `should_simplify`: Whether to simplify equations. If you pass a custom objective, this will be set to `false`.\n * `should_optimize_constants`: Whether to use an optimization algorithm to periodically optimize constants in equations.\n * `optimizer_nrestarts`: How many different random starting positions to consider for optimization of constants.\n * `optimizer_algorithm`: Select algorithm to use for optimizing constants. Default is \"BFGS\", but \"NelderMead\" is also supported.\n * `optimizer_options`: General options for the constant optimization. For details we refer to the documentation on `Optim.Options` from the `Optim.jl` package. Options can be provided here as `NamedTuple`, e.g. `(iterations=16,)`, as a `Dict`, e.g. Dict(:x_tol => 1.0e-32,), or as an `Optim.Options` instance.\n * `output_file`: What file to store equations to, as a backup.\n * `perturbation_factor`: When mutating a constant, either multiply or divide by (1+perturbation_factor)^(rand()+1).\n * `probability_negate_constant`: Probability of negating a constant in the equation when mutating it.\n * `mutation_weights`: Relative probabilities of the mutations. The struct `MutationWeights` should be passed to these options. See its documentation on `MutationWeights` for the different weights.\n * `crossover_probability`: Probability of performing crossover.\n * `annealing`: Whether to use simulated annealing.\n * `warmup_maxsize_by`: Whether to slowly increase the max size from 5 up to `maxsize`. If nonzero, specifies the fraction through the search at which the maxsize should be reached.\n * `verbosity`: Whether to print debugging statements or not.\n * `print_precision`: How many digits to print when printing equations. By default, this is 5.\n * `save_to_file`: Whether to save equations to a file during the search.\n * `bin_constraints`: See `constraints`. This is the same, but specified for binary operators only (for example, if you have an operator that is both a binary and unary operator).\n * `una_constraints`: Likewise, for unary operators.\n * `seed`: What random seed to use. `nothing` uses no seed.\n * `progress`: Whether to use a progress bar output (`verbosity` will have no effect).\n * `early_stop_condition`: Float - whether to stop early if the mean loss gets below this value. Function - a function taking (loss, complexity) as arguments and returning true or false.\n * `timeout_in_seconds`: Float64 - the time in seconds after which to exit (as an alternative to the number of iterations).\n * `max_evals`: Int (or Nothing) - the maximum number of evaluations of expressions to perform.\n * `skip_mutation_failures`: Whether to simply skip over mutations that fail or are rejected, rather than to replace the mutated expression with the original expression and proceed normally.\n * `enable_autodiff`: Whether to enable automatic differentiation functionality. This is turned off by default. If turned on, this will be turned off if one of the operators does not have well-defined gradients.\n * `nested_constraints`: Specifies how many times a combination of operators can be nested. For example, `[sin => [cos => 0], cos => [cos => 2]]` specifies that `cos` may never appear within a `sin`, but `sin` can be nested with itself an unlimited number of times. The second term specifies that `cos` can be nested up to 2 times within a `cos`, so that `cos(cos(cos(x)))` is allowed (as well as any combination of `+` or `-` within it), but `cos(cos(cos(cos(x))))` is not allowed. When an operator is not specified, it is assumed that it can be nested an unlimited number of times. This requires that there is no operator which is used both in the unary operators and the binary operators (e.g., `-` could be both subtract, and negation). For binary operators, both arguments are treated the same way, and the max of each argument is constrained.\n * `deterministic`: Use a global counter for the birth time, rather than calls to `time()`. This gives perfect resolution, and is therefore deterministic. However, it is not thread safe, and must be used in serial mode.\n * `define_helper_functions`: Whether to define helper functions for constructing and evaluating trees.\n * `niterations::Int=10`: The number of iterations to perform the search. More iterations will improve the results.\n * `parallelism=:multithreading`: What parallelism mode to use. The options are `:multithreading`, `:multiprocessing`, and `:serial`. By default, multithreading will be used. Multithreading uses less memory, but multiprocessing can handle multi-node compute. If using `:multithreading` mode, the number of threads available to julia are used. If using `:multiprocessing`, `numprocs` processes will be created dynamically if `procs` is unset. If you have already allocated processes, pass them to the `procs` argument and they will be used. You may also pass a string instead of a symbol, like `\"multithreading\"`.\n * `numprocs::Union{Int, Nothing}=nothing`: The number of processes to use, if you want `equation_search` to set this up automatically. By default this will be `4`, but can be any number (you should pick a number <= the number of cores available).\n * `procs::Union{Vector{Int}, Nothing}=nothing`: If you have set up a distributed run manually with `procs = addprocs()` and `@everywhere`, pass the `procs` to this keyword argument.\n * `addprocs_function::Union{Function, Nothing}=nothing`: If using multiprocessing (`parallelism=:multithreading`), and are not passing `procs` manually, then they will be allocated dynamically using `addprocs`. However, you may also pass a custom function to use instead of `addprocs`. This function should take a single positional argument, which is the number of processes to use, as well as the `lazy` keyword argument. For example, if set up on a slurm cluster, you could pass `addprocs_function = addprocs_slurm`, which will set up slurm processes.\n * `heap_size_hint_in_bytes::Union{Int,Nothing}=nothing`: On Julia 1.9+, you may set the `--heap-size-hint` flag on Julia processes, recommending garbage collection once a process is close to the recommended size. This is important for long-running distributed jobs where each process has an independent memory, and can help avoid out-of-memory errors. By default, this is set to `Sys.free_memory() / numprocs`.\n * `runtests::Bool=true`: Whether to run (quick) tests before starting the search, to see if there will be any problems during the equation search related to the host environment.\n * `loss_type::Type=Nothing`: If you would like to use a different type for the loss than for the data you passed, specify the type here. Note that if you pass complex data `::Complex{L}`, then the loss type will automatically be set to `L`.\n * `selection_method::Function`: Function to selection expression from the Pareto frontier for use in `predict`. See `SymbolicRegression.MLJInterfaceModule.choose_best` for an example. This function should return a single integer specifying the index of the expression to use. By default, this maximizes the score (a pound-for-pound rating) of expressions reaching the threshold of 1.5x the minimum loss. To override this at prediction time, you can pass a named tuple with keys `data` and `idx` to `predict`. See the Operations section for details.\n * `dimensions_type::AbstractDimensions`: The type of dimensions to use when storing the units of the data. By default this is `DynamicQuantities.SymbolicDimensions`.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. The expression used for prediction is defined by the `selection_method` function, which can be seen by viewing `report(mach).best_idx`.\n * `predict(mach, (data=Xnew, idx=i))`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. By passing a named tuple with keys `data` and `idx`, you are able to specify the equation you wish to evaluate in `idx`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `best_idx::Int`: The index of the best expression in the Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Node{T}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity). `T` is equal to the element type of the passed data.\n * `equation_strings::Vector{String}`: The expressions discovered by the search, represented as strings for easy inspection.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `best_idx::Int`: The index of the best expression in the Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Node{T}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity).\n * `equation_strings::Vector{String}`: The expressions discovered by the search, represented as strings for easy inspection.\n * `complexities::Vector{Int}`: The complexity of each expression in the Pareto frontier.\n * `losses::Vector{L}`: The loss of each expression in the Pareto frontier, according to the loss function specified in the model. The type `L` is the loss type, which is usually the same as the element type of data passed (i.e., `T`), but can differ if complex data types are passed.\n * `scores::Vector{L}`: A metric which considers both the complexity and loss of an expression, equal to the change in the log-loss divided by the change in complexity, relative to the previous expression along the Pareto frontier. A larger score aims to indicate an expression is more likely to be the true expression generating the data, but this is very problem-dependent and generally several other factors should be considered.\n\n# Examples\n\n```julia\nusing MLJ\nSRRegressor = @load SRRegressor pkg=SymbolicRegression\nX, y = @load_boston\nmodel = SRRegressor(binary_operators=[+, -, *], unary_operators=[exp], niterations=100)\nmach = machine(model, X, y)\nfit!(mach)\ny_hat = predict(mach, X)\n# View the equation used:\nr = report(mach)\nprintln(\"Equation used:\", r.equation_strings[r.best_idx])\n```\n\nWith units and variable names:\n\n```julia\nusing MLJ\nusing DynamicQuantities\nSRegressor = @load SRRegressor pkg=SymbolicRegression\n\nX = (; x1=rand(32) .* us\"km/h\", x2=rand(32) .* us\"km\")\ny = @. X.x2 / X.x1 + 0.5us\"h\"\nmodel = SRRegressor(binary_operators=[+, -, *, /])\nmach = machine(model, X, y)\nfit!(mach)\ny_hat = predict(mach, X)\n# View the equation used:\nr = report(mach)\nprintln(\"Equation used:\", r.equation_strings[r.best_idx])\n```\n\nSee also [`MultitargetSRRegressor`](@ref).\n" -":name" = "SRRegressor" -":human_name" = "Symbolic Regression via Evolutionary Search" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [] -":hyperparameters" = "`(:binary_operators, :unary_operators, :constraints, :elementwise_loss, :loss_function, :tournament_selection_n, :tournament_selection_p, :topn, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :parsimony, :dimensional_constraint_penalty, :alpha, :maxsize, :maxdepth, :turbo, :migration, :hof_migration, :should_simplify, :should_optimize_constants, :output_file, :populations, :perturbation_factor, :annealing, :batching, :batch_size, :mutation_weights, :crossover_probability, :warmup_maxsize_by, :use_frequency, :use_frequency_in_tournament, :adaptive_parsimony_scaling, :population_size, :ncycles_per_iteration, :fraction_replaced, :fraction_replaced_hof, :verbosity, :print_precision, :save_to_file, :probability_negate_constant, :seed, :bin_constraints, :una_constraints, :progress, :terminal_width, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_options, :val_recorder, :recorder_file, :early_stop_condition, :timeout_in_seconds, :max_evals, :skip_mutation_failures, :enable_autodiff, :nested_constraints, :deterministic, :define_helper_functions, :fast_cycle, :npopulations, :npop, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :heap_size_hint_in_bytes, :runtests, :loss_type, :selection_method, :dimensions_type)`" -":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Integer\", \"Real\", \"Integer\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Real\", \"Union{Nothing, Real}\", \"Real\", \"Integer\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Bool\", \"Union{Nothing, AbstractString}\", \"Integer\", \"Real\", \"Bool\", \"Bool\", \"Integer\", \"Union{SymbolicRegression.CoreModule.OptionsStructModule.MutationWeights, NamedTuple, AbstractVector}\", \"Real\", \"Real\", \"Bool\", \"Bool\", \"Real\", \"Integer\", \"Integer\", \"Real\", \"Real\", \"Union{Nothing, Integer}\", \"Integer\", \"Bool\", \"Real\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"AbstractString\", \"Integer\", \"Real\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Val\", \"AbstractString\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"Any\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Bool\", \"Any\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" - -[MLJText.TfidfTransformer] -":input_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" -":output_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" -":inverse_transform_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" -":is_pure_julia" = "`true`" -":package_name" = "MLJText" -":package_license" = "MIT" -":load_path" = "MLJText.TfidfTransformer" -":package_uuid" = "7876af07-990d-54b4-ab0e-23690620f79a" -":package_url" = "https://github.com/JuliaAI/MLJText.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = "```\nTfidfTransformer\n```\n\nA model type for constructing a TF-IFD transformer, based on [MLJText.jl](https://github.com/JuliaAI/MLJText.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nTfidfTransformer = @load TfidfTransformer pkg=MLJText\n```\n\nDo `model = TfidfTransformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `TfidfTransformer(max_doc_freq=...)`.\n\nThe transformer converts a collection of documents, tokenized or pre-parsed as bags of words/ngrams, to a matrix of [TF-IDF scores](https://en.wikipedia.org/wiki/Tf–idf#Inverse_document_frequency_2). Here \"TF\" means term-frequency while \"IDF\" means inverse document frequency (defined below). The TF-IDF score is the product of the two. This is a common term weighting scheme in information retrieval, that has also found good use in document classification. The goal of using TF-IDF instead of the raw frequencies of occurrence of a token in a given document is to scale down the impact of tokens that occur very frequently in a given corpus and that are hence empirically less informative than features that occur in a small fraction of the training corpus.\n\nIn textbooks and implementations there is variation in the definition of IDF. Here two IDF definitions are available. The default, smoothed option provides the IDF for a term `t` as `log((1 + n)/(1 + df(t))) + 1`, where `n` is the total number of documents and `df(t)` the number of documents in which `t` appears. Setting `smooth_df = false` provides an IDF of `log(n/df(t)) + 1`.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any vector whose elements are either tokenized documents or bags of words/ngrams. Specifically, each element is one of the following:\n\n * A vector of abstract strings (tokens), e.g., `[\"I\", \"like\", \"Sam\", \".\", \"Sam\", \"is\", \"nice\", \".\"]` (scitype `AbstractVector{Textual}`)\n * A dictionary of counts, indexed on abstract strings, e.g., `Dict(\"I\"=>1, \"Sam\"=>2, \"Sam is\"=>1)` (scitype `Multiset{Textual}}`)\n * A dictionary of counts, indexed on plain ngrams, e.g., `Dict((\"I\",)=>1, (\"Sam\",)=>2, (\"I\", \"Sam\")=>1)` (scitype `Multiset{<:NTuple{N,Textual} where N}`); here a *plain ngram* is a tuple of abstract strings.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `max_doc_freq=1.0`: Restricts the vocabulary that the transformer will consider. Terms that occur in `> max_doc_freq` documents will not be considered by the transformer. For example, if `max_doc_freq` is set to 0.9, terms that are in more than 90% of the documents will be removed.\n * `min_doc_freq=0.0`: Restricts the vocabulary that the transformer will consider. Terms that occur in `< max_doc_freq` documents will not be considered by the transformer. A value of 0.01 means that only terms that are at least in 1% of the documents will be included.\n * `smooth_idf=true`: Control which definition of IDF to use (see above).\n\n# Operations\n\n * `transform(mach, Xnew)`: Based on the vocabulary and IDF learned in training, return the matrix of TF-IDF scores for `Xnew`, a vector of the same form as `X` above. The matrix has size `(n, p)`, where `n = length(Xnew)` and `p` the size of the vocabulary. Tokens/ngrams not appearing in the learned vocabulary are scored zero.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `vocab`: A vector containing the strings used in the transformer's vocabulary.\n * `idf_vector`: The transformer's calculated IDF vector.\n\n# Examples\n\n`TfidfTransformer` accepts a variety of inputs. The example below transforms tokenized documents:\n\n```julia\nusing MLJ\nimport TextAnalysis\n\nTfidfTransformer = @load TfidfTransformer pkg=MLJText\n\ndocs = [\"Hi my name is Sam.\", \"How are you today?\"]\ntfidf_transformer = TfidfTransformer()\n\njulia> tokenized_docs = TextAnalysis.tokenize.(docs)\n2-element Vector{Vector{String}}:\n [\"Hi\", \"my\", \"name\", \"is\", \"Sam\", \".\"]\n [\"How\", \"are\", \"you\", \"today\", \"?\"]\n\nmach = machine(tfidf_transformer, tokenized_docs)\nfit!(mach)\n\nfitted_params(mach)\n\ntfidf_mat = transform(mach, tokenized_docs)\n```\n\nAlternatively, one can provide documents pre-parsed as ngrams counts:\n\n```julia\nusing MLJ\nimport TextAnalysis\n\ndocs = [\"Hi my name is Sam.\", \"How are you today?\"]\ncorpus = TextAnalysis.Corpus(TextAnalysis.NGramDocument.(docs, 1, 2))\nngram_docs = TextAnalysis.ngrams.(corpus)\n\njulia> ngram_docs[1]\nDict{AbstractString, Int64} with 11 entries:\n \"is\" => 1\n \"my\" => 1\n \"name\" => 1\n \".\" => 1\n \"Hi\" => 1\n \"Sam\" => 1\n \"my name\" => 1\n \"Hi my\" => 1\n \"name is\" => 1\n \"Sam .\" => 1\n \"is Sam\" => 1\n\ntfidf_transformer = TfidfTransformer()\nmach = machine(tfidf_transformer, ngram_docs)\nMLJ.fit!(mach)\nfitted_params(mach)\n\ntfidf_mat = transform(mach, ngram_docs)\n```\n\nSee also [`CountTransformer`](@ref), [`BM25Transformer`](@ref)\n" -":name" = "TfidfTransformer" -":human_name" = "TF-IFD transformer" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":implemented_methods" = [":fitted_params"] -":hyperparameters" = "`(:max_doc_freq, :min_doc_freq, :smooth_idf)`" -":hyperparameter_types" = "`(\"Float64\", \"Float64\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" - -[MLJText.CountTransformer] -":input_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" -":output_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" -":inverse_transform_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" -":is_pure_julia" = "`true`" -":package_name" = "MLJText" -":package_license" = "MIT" -":load_path" = "MLJText.CountTransformer" -":package_uuid" = "7876af07-990d-54b4-ab0e-23690620f79a" -":package_url" = "https://github.com/JuliaAI/MLJText.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = "```\nCountTransformer\n```\n\nA model type for constructing a count transformer, based on [MLJText.jl](https://github.com/JuliaAI/MLJText.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nCountTransformer = @load CountTransformer pkg=MLJText\n```\n\nDo `model = CountTransformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `CountTransformer(max_doc_freq=...)`.\n\nThe transformer converts a collection of documents, tokenized or pre-parsed as bags of words/ngrams, to a matrix of term counts.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any vector whose elements are either tokenized documents or bags of words/ngrams. Specifically, each element is one of the following:\n\n * A vector of abstract strings (tokens), e.g., `[\"I\", \"like\", \"Sam\", \".\", \"Sam\", \"is\", \"nice\", \".\"]` (scitype `AbstractVector{Textual}`)\n * A dictionary of counts, indexed on abstract strings, e.g., `Dict(\"I\"=>1, \"Sam\"=>2, \"Sam is\"=>1)` (scitype `Multiset{Textual}}`)\n * A dictionary of counts, indexed on plain ngrams, e.g., `Dict((\"I\",)=>1, (\"Sam\",)=>2, (\"I\", \"Sam\")=>1)` (scitype `Multiset{<:NTuple{N,Textual} where N}`); here a *plain ngram* is a tuple of abstract strings.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `max_doc_freq=1.0`: Restricts the vocabulary that the transformer will consider. Terms that occur in `> max_doc_freq` documents will not be considered by the transformer. For example, if `max_doc_freq` is set to 0.9, terms that are in more than 90% of the documents will be removed.\n * `min_doc_freq=0.0`: Restricts the vocabulary that the transformer will consider. Terms that occur in `< max_doc_freq` documents will not be considered by the transformer. A value of 0.01 means that only terms that are at least in 1% of the documents will be included.\n\n# Operations\n\n * `transform(mach, Xnew)`: Based on the vocabulary learned in training, return the matrix of counts for `Xnew`, a vector of the same form as `X` above. The matrix has size `(n, p)`, where `n = length(Xnew)` and `p` the size of the vocabulary. Tokens/ngrams not appearing in the learned vocabulary are scored zero.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `vocab`: A vector containing the string used in the transformer's vocabulary.\n\n# Examples\n\n`CountTransformer` accepts a variety of inputs. The example below transforms tokenized documents:\n\n```julia\nusing MLJ\nimport TextAnalysis\n\nCountTransformer = @load CountTransformer pkg=MLJText\n\ndocs = [\"Hi my name is Sam.\", \"How are you today?\"]\ncount_transformer = CountTransformer()\n\njulia> tokenized_docs = TextAnalysis.tokenize.(docs)\n2-element Vector{Vector{String}}:\n [\"Hi\", \"my\", \"name\", \"is\", \"Sam\", \".\"]\n [\"How\", \"are\", \"you\", \"today\", \"?\"]\n\nmach = machine(count_transformer, tokenized_docs)\nfit!(mach)\n\nfitted_params(mach)\n\ntfidf_mat = transform(mach, tokenized_docs)\n```\n\nAlternatively, one can provide documents pre-parsed as ngrams counts:\n\n```julia\nusing MLJ\nimport TextAnalysis\n\ndocs = [\"Hi my name is Sam.\", \"How are you today?\"]\ncorpus = TextAnalysis.Corpus(TextAnalysis.NGramDocument.(docs, 1, 2))\nngram_docs = TextAnalysis.ngrams.(corpus)\n\njulia> ngram_docs[1]\nDict{AbstractString, Int64} with 11 entries:\n \"is\" => 1\n \"my\" => 1\n \"name\" => 1\n \".\" => 1\n \"Hi\" => 1\n \"Sam\" => 1\n \"my name\" => 1\n \"Hi my\" => 1\n \"name is\" => 1\n \"Sam .\" => 1\n \"is Sam\" => 1\n\ncount_transformer = CountTransformer()\nmach = machine(count_transformer, ngram_docs)\nMLJ.fit!(mach)\nfitted_params(mach)\n\ntfidf_mat = transform(mach, ngram_docs)\n```\n\nSee also [`TfidfTransformer`](@ref), [`BM25Transformer`](@ref)\n" -":name" = "CountTransformer" -":human_name" = "count transformer" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":implemented_methods" = [":fitted_params"] -":hyperparameters" = "`(:max_doc_freq, :min_doc_freq)`" -":hyperparameter_types" = "`(\"Float64\", \"Float64\")`" -":hyperparameter_ranges" = "`(nothing, nothing)`" +":prediction_type" = ":deterministic" +":abstract_type" = "`MLJModelInterface.Deterministic`" +":implemented_methods" = [] +":hyperparameters" = "`(:binary_operators, :unary_operators, :constraints, :elementwise_loss, :loss_function, :tournament_selection_n, :tournament_selection_p, :topn, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :parsimony, :dimensional_constraint_penalty, :alpha, :maxsize, :maxdepth, :turbo, :migration, :hof_migration, :should_simplify, :should_optimize_constants, :output_file, :populations, :perturbation_factor, :annealing, :batching, :batch_size, :mutation_weights, :crossover_probability, :warmup_maxsize_by, :use_frequency, :use_frequency_in_tournament, :adaptive_parsimony_scaling, :population_size, :ncycles_per_iteration, :fraction_replaced, :fraction_replaced_hof, :verbosity, :print_precision, :save_to_file, :probability_negate_constant, :seed, :bin_constraints, :una_constraints, :progress, :terminal_width, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_options, :val_recorder, :recorder_file, :early_stop_condition, :timeout_in_seconds, :max_evals, :skip_mutation_failures, :enable_autodiff, :nested_constraints, :deterministic, :define_helper_functions, :fast_cycle, :npopulations, :npop, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :heap_size_hint_in_bytes, :runtests, :loss_type, :selection_method, :dimensions_type)`" +":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Integer\", \"Real\", \"Integer\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Real\", \"Union{Nothing, Real}\", \"Real\", \"Integer\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Bool\", \"Union{Nothing, AbstractString}\", \"Integer\", \"Real\", \"Bool\", \"Bool\", \"Integer\", \"Union{SymbolicRegression.CoreModule.OptionsStructModule.MutationWeights, NamedTuple, AbstractVector}\", \"Real\", \"Real\", \"Bool\", \"Bool\", \"Real\", \"Integer\", \"Integer\", \"Real\", \"Real\", \"Union{Nothing, Integer}\", \"Integer\", \"Bool\", \"Real\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"AbstractString\", \"Integer\", \"Real\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Val\", \"AbstractString\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"Any\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Bool\", \"Any\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`false`" @@ -6754,34 +6229,34 @@ ":reporting_operations" = "`()`" ":constructor" = "`nothing`" -[MLJText.BM25Transformer] -":input_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" -":output_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" -":inverse_transform_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" +[SymbolicRegression.SRRegressor] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":is_pure_julia" = "`true`" -":package_name" = "MLJText" -":package_license" = "MIT" -":load_path" = "MLJText.BM25Transformer" -":package_uuid" = "7876af07-990d-54b4-ab0e-23690620f79a" -":package_url" = "https://github.com/JuliaAI/MLJText.jl" +":package_name" = "SymbolicRegression" +":package_license" = "Apache-2.0" +":load_path" = "SymbolicRegression.MLJInterfaceModule.SRRegressor" +":package_uuid" = "8254be44-1295-4e6a-a16d-46603ac705cb" +":package_url" = "https://github.com/MilesCranmer/SymbolicRegression.jl" ":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":supports_weights" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nBM25Transformer\n```\n\nA model type for constructing a b m25 transformer, based on [MLJText.jl](https://github.com/JuliaAI/MLJText.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nBM25Transformer = @load BM25Transformer pkg=MLJText\n```\n\nDo `model = BM25Transformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `BM25Transformer(max_doc_freq=...)`.\n\nThe transformer converts a collection of documents, tokenized or pre-parsed as bags of words/ngrams, to a matrix of [Okapi BM25 document-word statistics](https://en.wikipedia.org/wiki/Okapi_BM25). The BM25 scoring function uses both term frequency (TF) and inverse document frequency (IDF, defined below), as in [`TfidfTransformer`](@ref), but additionally adjusts for the probability that a user will consider a search result relevant based, on the terms in the search query and those in each document.\n\nIn textbooks and implementations there is variation in the definition of IDF. Here two IDF definitions are available. The default, smoothed option provides the IDF for a term `t` as `log((1 + n)/(1 + df(t))) + 1`, where `n` is the total number of documents and `df(t)` the number of documents in which `t` appears. Setting `smooth_df = false` provides an IDF of `log(n/df(t)) + 1`.\n\nReferences:\n\n * http://ethen8181.github.io/machine-learning/search/bm25_intro.html\n * https://en.wikipedia.org/wiki/Okapi_BM25\n * https://nlp.stanford.edu/IR-book/html/htmledition/okapi-bm25-a-non-binary-model-1.html\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any vector whose elements are either tokenized documents or bags of words/ngrams. Specifically, each element is one of the following:\n\n * A vector of abstract strings (tokens), e.g., `[\"I\", \"like\", \"Sam\", \".\", \"Sam\", \"is\", \"nice\", \".\"]` (scitype `AbstractVector{Textual}`)\n * A dictionary of counts, indexed on abstract strings, e.g., `Dict(\"I\"=>1, \"Sam\"=>2, \"Sam is\"=>1)` (scitype `Multiset{Textual}}`)\n * A dictionary of counts, indexed on plain ngrams, e.g., `Dict((\"I\",)=>1, (\"Sam\",)=>2, (\"I\", \"Sam\")=>1)` (scitype `Multiset{<:NTuple{N,Textual} where N}`); here a *plain ngram* is a tuple of abstract strings.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `max_doc_freq=1.0`: Restricts the vocabulary that the transformer will consider. Terms that occur in `> max_doc_freq` documents will not be considered by the transformer. For example, if `max_doc_freq` is set to 0.9, terms that are in more than 90% of the documents will be removed.\n * `min_doc_freq=0.0`: Restricts the vocabulary that the transformer will consider. Terms that occur in `< max_doc_freq` documents will not be considered by the transformer. A value of 0.01 means that only terms that are at least in 1% of the documents will be included.\n * `κ=2`: The term frequency saturation characteristic. Higher values represent slower saturation. What we mean by saturation is the degree to which a term occurring extra times adds to the overall score.\n * `β=0.075`: Amplifies the particular document length compared to the average length. The bigger β is, the more document length is amplified in terms of the overall score. The default value is 0.75, and the bounds are restricted between 0 and 1.\n * `smooth_idf=true`: Control which definition of IDF to use (see above).\n\n# Operations\n\n * `transform(mach, Xnew)`: Based on the vocabulary, IDF, and mean word counts learned in training, return the matrix of BM25 scores for `Xnew`, a vector of the same form as `X` above. The matrix has size `(n, p)`, where `n = length(Xnew)` and `p` the size of the vocabulary. Tokens/ngrams not appearing in the learned vocabulary are scored zero.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `vocab`: A vector containing the string used in the transformer's vocabulary.\n * `idf_vector`: The transformer's calculated IDF vector.\n * `mean_words_in_docs`: The mean number of words in each document.\n\n# Examples\n\n`BM25Transformer` accepts a variety of inputs. The example below transforms tokenized documents:\n\n```julia\nusing MLJ\nimport TextAnalysis\n\nBM25Transformer = @load BM25Transformer pkg=MLJText\n\ndocs = [\"Hi my name is Sam.\", \"How are you today?\"]\nbm25_transformer = BM25Transformer()\n\njulia> tokenized_docs = TextAnalysis.tokenize.(docs)\n2-element Vector{Vector{String}}:\n [\"Hi\", \"my\", \"name\", \"is\", \"Sam\", \".\"]\n [\"How\", \"are\", \"you\", \"today\", \"?\"]\n\nmach = machine(bm25_transformer, tokenized_docs)\nfit!(mach)\n\nfitted_params(mach)\n\ntfidf_mat = transform(mach, tokenized_docs)\n```\n\nAlternatively, one can provide documents pre-parsed as ngrams counts:\n\n```julia\nusing MLJ\nimport TextAnalysis\n\ndocs = [\"Hi my name is Sam.\", \"How are you today?\"]\ncorpus = TextAnalysis.Corpus(TextAnalysis.NGramDocument.(docs, 1, 2))\nngram_docs = TextAnalysis.ngrams.(corpus)\n\njulia> ngram_docs[1]\nDict{AbstractString, Int64} with 11 entries:\n \"is\" => 1\n \"my\" => 1\n \"name\" => 1\n \".\" => 1\n \"Hi\" => 1\n \"Sam\" => 1\n \"my name\" => 1\n \"Hi my\" => 1\n \"name is\" => 1\n \"Sam .\" => 1\n \"is Sam\" => 1\n\nbm25_transformer = BM25Transformer()\nmach = machine(bm25_transformer, ngram_docs)\nMLJ.fit!(mach)\nfitted_params(mach)\n\ntfidf_mat = transform(mach, ngram_docs)\n```\n\nSee also [`TfidfTransformer`](@ref), [`CountTransformer`](@ref)\n" -":name" = "BM25Transformer" -":human_name" = "b m25 transformer" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":implemented_methods" = [":fitted_params"] -":hyperparameters" = "`(:max_doc_freq, :min_doc_freq, :κ, :β, :smooth_idf)`" -":hyperparameter_types" = "`(\"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +":docstring" = "```\nSRRegressor\n```\n\nA model type for constructing a Symbolic Regression via Evolutionary Search, based on [SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSRRegressor = @load SRRegressor pkg=SymbolicRegression\n```\n\nDo `model = SRRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SRRegressor(binary_operators=...)`.\n\nSingle-target Symbolic Regression regressor (`SRRegressor`) searches for symbolic expressions that predict a single target variable from a set of input variables. All data is assumed to be `Continuous`. The search is performed using an evolutionary algorithm. This algorithm is described in the paper https://arxiv.org/abs/2305.01582.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nOR\n\n```\nmach = machine(model, X, y, w)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`. Variable names in discovered expressions will be taken from the column names of `X`, if available. Units in columns of `X` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`. Units in `y` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n * `w` is the observation weights which can either be `nothing` (default) or an `AbstractVector` whoose element scitype is `Count` or `Continuous`.\n\nTrain the machine using `fit!(mach)`, inspect the discovered expressions with `report(mach)`, and predict on new data with `predict(mach, Xnew)`. Note that unlike other regressors, symbolic regression stores a list of trained models. The model chosen from this list is defined by the function `selection_method` keyword argument, which by default balances accuracy and complexity. You can override this at prediction time by passing a named tuple with keys `data` and `idx`.\n\n# Hyper-parameters\n\n * `binary_operators`: Vector of binary operators (functions) to use. Each operator should be defined for two input scalars, and one output scalar. All operators need to be defined over the entire real line (excluding infinity - these are stopped before they are input), or return `NaN` where not defined. For speed, define it so it takes two reals of the same type as input, and outputs the same type. For the SymbolicUtils simplification backend, you will need to define a generic method of the operator so it takes arbitrary types.\n * `unary_operators`: Same, but for unary operators (one input scalar, gives an output scalar).\n * `constraints`: Array of pairs specifying size constraints for each operator. The constraints for a binary operator should be a 2-tuple (e.g., `(-1, -1)`) and the constraints for a unary operator should be an `Int`. A size constraint is a limit to the size of the subtree in each argument of an operator. e.g., `[(^)=>(-1, 3)]` means that the `^` operator can have arbitrary size (`-1`) in its left argument, but a maximum size of `3` in its right argument. Default is no constraints.\n * `batching`: Whether to evolve based on small mini-batches of data, rather than the entire dataset.\n * `batch_size`: What batch size to use if using batching.\n * `elementwise_loss`: What elementwise loss function to use. Can be one of the following losses, or any other loss of type `SupervisedLoss`. You can also pass a function that takes a scalar target (left argument), and scalar predicted (right argument), and returns a scalar. This will be averaged over the predicted data. If weights are supplied, your function should take a third argument for the weight scalar. Included losses: Regression: - `LPDistLoss{P}()`, - `L1DistLoss()`, - `L2DistLoss()` (mean square), - `LogitDistLoss()`, - `HuberLoss(d)`, - `L1EpsilonInsLoss(ϵ)`, - `L2EpsilonInsLoss(ϵ)`, - `PeriodicLoss(c)`, - `QuantileLoss(τ)`, Classification: - `ZeroOneLoss()`, - `PerceptronLoss()`, - `L1HingeLoss()`, - `SmoothedL1HingeLoss(γ)`, - `ModifiedHuberLoss()`, - `L2MarginLoss()`, - `ExpLoss()`, - `SigmoidLoss()`, - `DWDMarginLoss(q)`.\n * `loss_function`: Alternatively, you may redefine the loss used as any function of `tree::Node{T}`, `dataset::Dataset{T}`, and `options::Options`, so long as you output a non-negative scalar of type `T`. This is useful if you want to use a loss that takes into account derivatives, or correlations across the dataset. This also means you could use a custom evaluation for a particular expression. If you are using `batching=true`, then your function should accept a fourth argument `idx`, which is either `nothing` (indicating that the full dataset should be used), or a vector of indices to use for the batch. For example,\n\n ```\n function my_loss(tree, dataset::Dataset{T,L}, options)::L where {T,L}\n prediction, flag = eval_tree_array(tree, dataset.X, options)\n if !flag\n return L(Inf)\n end\n return sum((prediction .- dataset.y) .^ 2) / dataset.n\n end\n ```\n * `populations`: How many populations of equations to use.\n * `population_size`: How many equations in each population.\n * `ncycles_per_iteration`: How many generations to consider per iteration.\n * `tournament_selection_n`: Number of expressions considered in each tournament.\n * `tournament_selection_p`: The fittest expression in a tournament is to be selected with probability `p`, the next fittest with probability `p*(1-p)`, and so forth.\n * `topn`: Number of equations to return to the host process, and to consider for the hall of fame.\n * `complexity_of_operators`: What complexity should be assigned to each operator, and the occurrence of a constant or variable. By default, this is 1 for all operators. Can be a real number as well, in which case the complexity of an expression will be rounded to the nearest integer. Input this in the form of, e.g., [(^) => 3, sin => 2].\n * `complexity_of_constants`: What complexity should be assigned to use of a constant. By default, this is 1.\n * `complexity_of_variables`: What complexity should be assigned to each variable. By default, this is 1.\n * `alpha`: The probability of accepting an equation mutation during regularized evolution is given by exp(-delta_loss/(alpha * T)), where T goes from 1 to 0. Thus, alpha=infinite is the same as no annealing.\n * `maxsize`: Maximum size of equations during the search.\n * `maxdepth`: Maximum depth of equations during the search, by default this is set equal to the maxsize.\n * `parsimony`: A multiplicative factor for how much complexity is punished.\n * `dimensional_constraint_penalty`: An additive factor if the dimensional constraint is violated.\n * `use_frequency`: Whether to use a parsimony that adapts to the relative proportion of equations at each complexity; this will ensure that there are a balanced number of equations considered for every complexity.\n * `use_frequency_in_tournament`: Whether to use the adaptive parsimony described above inside the score, rather than just at the mutation accept/reject stage.\n * `adaptive_parsimony_scaling`: How much to scale the adaptive parsimony term in the loss. Increase this if the search is spending too much time optimizing the most complex equations.\n * `turbo`: Whether to use `LoopVectorization.@turbo` to evaluate expressions. This can be significantly faster, but is only compatible with certain operators. *Experimental!*\n * `migration`: Whether to migrate equations between processes.\n * `hof_migration`: Whether to migrate equations from the hall of fame to processes.\n * `fraction_replaced`: What fraction of each population to replace with migrated equations at the end of each cycle.\n * `fraction_replaced_hof`: What fraction to replace with hall of fame equations at the end of each cycle.\n * `should_simplify`: Whether to simplify equations. If you pass a custom objective, this will be set to `false`.\n * `should_optimize_constants`: Whether to use an optimization algorithm to periodically optimize constants in equations.\n * `optimizer_nrestarts`: How many different random starting positions to consider for optimization of constants.\n * `optimizer_algorithm`: Select algorithm to use for optimizing constants. Default is \"BFGS\", but \"NelderMead\" is also supported.\n * `optimizer_options`: General options for the constant optimization. For details we refer to the documentation on `Optim.Options` from the `Optim.jl` package. Options can be provided here as `NamedTuple`, e.g. `(iterations=16,)`, as a `Dict`, e.g. Dict(:x_tol => 1.0e-32,), or as an `Optim.Options` instance.\n * `output_file`: What file to store equations to, as a backup.\n * `perturbation_factor`: When mutating a constant, either multiply or divide by (1+perturbation_factor)^(rand()+1).\n * `probability_negate_constant`: Probability of negating a constant in the equation when mutating it.\n * `mutation_weights`: Relative probabilities of the mutations. The struct `MutationWeights` should be passed to these options. See its documentation on `MutationWeights` for the different weights.\n * `crossover_probability`: Probability of performing crossover.\n * `annealing`: Whether to use simulated annealing.\n * `warmup_maxsize_by`: Whether to slowly increase the max size from 5 up to `maxsize`. If nonzero, specifies the fraction through the search at which the maxsize should be reached.\n * `verbosity`: Whether to print debugging statements or not.\n * `print_precision`: How many digits to print when printing equations. By default, this is 5.\n * `save_to_file`: Whether to save equations to a file during the search.\n * `bin_constraints`: See `constraints`. This is the same, but specified for binary operators only (for example, if you have an operator that is both a binary and unary operator).\n * `una_constraints`: Likewise, for unary operators.\n * `seed`: What random seed to use. `nothing` uses no seed.\n * `progress`: Whether to use a progress bar output (`verbosity` will have no effect).\n * `early_stop_condition`: Float - whether to stop early if the mean loss gets below this value. Function - a function taking (loss, complexity) as arguments and returning true or false.\n * `timeout_in_seconds`: Float64 - the time in seconds after which to exit (as an alternative to the number of iterations).\n * `max_evals`: Int (or Nothing) - the maximum number of evaluations of expressions to perform.\n * `skip_mutation_failures`: Whether to simply skip over mutations that fail or are rejected, rather than to replace the mutated expression with the original expression and proceed normally.\n * `enable_autodiff`: Whether to enable automatic differentiation functionality. This is turned off by default. If turned on, this will be turned off if one of the operators does not have well-defined gradients.\n * `nested_constraints`: Specifies how many times a combination of operators can be nested. For example, `[sin => [cos => 0], cos => [cos => 2]]` specifies that `cos` may never appear within a `sin`, but `sin` can be nested with itself an unlimited number of times. The second term specifies that `cos` can be nested up to 2 times within a `cos`, so that `cos(cos(cos(x)))` is allowed (as well as any combination of `+` or `-` within it), but `cos(cos(cos(cos(x))))` is not allowed. When an operator is not specified, it is assumed that it can be nested an unlimited number of times. This requires that there is no operator which is used both in the unary operators and the binary operators (e.g., `-` could be both subtract, and negation). For binary operators, both arguments are treated the same way, and the max of each argument is constrained.\n * `deterministic`: Use a global counter for the birth time, rather than calls to `time()`. This gives perfect resolution, and is therefore deterministic. However, it is not thread safe, and must be used in serial mode.\n * `define_helper_functions`: Whether to define helper functions for constructing and evaluating trees.\n * `niterations::Int=10`: The number of iterations to perform the search. More iterations will improve the results.\n * `parallelism=:multithreading`: What parallelism mode to use. The options are `:multithreading`, `:multiprocessing`, and `:serial`. By default, multithreading will be used. Multithreading uses less memory, but multiprocessing can handle multi-node compute. If using `:multithreading` mode, the number of threads available to julia are used. If using `:multiprocessing`, `numprocs` processes will be created dynamically if `procs` is unset. If you have already allocated processes, pass them to the `procs` argument and they will be used. You may also pass a string instead of a symbol, like `\"multithreading\"`.\n * `numprocs::Union{Int, Nothing}=nothing`: The number of processes to use, if you want `equation_search` to set this up automatically. By default this will be `4`, but can be any number (you should pick a number <= the number of cores available).\n * `procs::Union{Vector{Int}, Nothing}=nothing`: If you have set up a distributed run manually with `procs = addprocs()` and `@everywhere`, pass the `procs` to this keyword argument.\n * `addprocs_function::Union{Function, Nothing}=nothing`: If using multiprocessing (`parallelism=:multithreading`), and are not passing `procs` manually, then they will be allocated dynamically using `addprocs`. However, you may also pass a custom function to use instead of `addprocs`. This function should take a single positional argument, which is the number of processes to use, as well as the `lazy` keyword argument. For example, if set up on a slurm cluster, you could pass `addprocs_function = addprocs_slurm`, which will set up slurm processes.\n * `heap_size_hint_in_bytes::Union{Int,Nothing}=nothing`: On Julia 1.9+, you may set the `--heap-size-hint` flag on Julia processes, recommending garbage collection once a process is close to the recommended size. This is important for long-running distributed jobs where each process has an independent memory, and can help avoid out-of-memory errors. By default, this is set to `Sys.free_memory() / numprocs`.\n * `runtests::Bool=true`: Whether to run (quick) tests before starting the search, to see if there will be any problems during the equation search related to the host environment.\n * `loss_type::Type=Nothing`: If you would like to use a different type for the loss than for the data you passed, specify the type here. Note that if you pass complex data `::Complex{L}`, then the loss type will automatically be set to `L`.\n * `selection_method::Function`: Function to selection expression from the Pareto frontier for use in `predict`. See `SymbolicRegression.MLJInterfaceModule.choose_best` for an example. This function should return a single integer specifying the index of the expression to use. By default, this maximizes the score (a pound-for-pound rating) of expressions reaching the threshold of 1.5x the minimum loss. To override this at prediction time, you can pass a named tuple with keys `data` and `idx` to `predict`. See the Operations section for details.\n * `dimensions_type::AbstractDimensions`: The type of dimensions to use when storing the units of the data. By default this is `DynamicQuantities.SymbolicDimensions`.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. The expression used for prediction is defined by the `selection_method` function, which can be seen by viewing `report(mach).best_idx`.\n * `predict(mach, (data=Xnew, idx=i))`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. By passing a named tuple with keys `data` and `idx`, you are able to specify the equation you wish to evaluate in `idx`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `best_idx::Int`: The index of the best expression in the Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Node{T}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity). `T` is equal to the element type of the passed data.\n * `equation_strings::Vector{String}`: The expressions discovered by the search, represented as strings for easy inspection.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `best_idx::Int`: The index of the best expression in the Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Node{T}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity).\n * `equation_strings::Vector{String}`: The expressions discovered by the search, represented as strings for easy inspection.\n * `complexities::Vector{Int}`: The complexity of each expression in the Pareto frontier.\n * `losses::Vector{L}`: The loss of each expression in the Pareto frontier, according to the loss function specified in the model. The type `L` is the loss type, which is usually the same as the element type of data passed (i.e., `T`), but can differ if complex data types are passed.\n * `scores::Vector{L}`: A metric which considers both the complexity and loss of an expression, equal to the change in the log-loss divided by the change in complexity, relative to the previous expression along the Pareto frontier. A larger score aims to indicate an expression is more likely to be the true expression generating the data, but this is very problem-dependent and generally several other factors should be considered.\n\n# Examples\n\n```julia\nusing MLJ\nSRRegressor = @load SRRegressor pkg=SymbolicRegression\nX, y = @load_boston\nmodel = SRRegressor(binary_operators=[+, -, *], unary_operators=[exp], niterations=100)\nmach = machine(model, X, y)\nfit!(mach)\ny_hat = predict(mach, X)\n# View the equation used:\nr = report(mach)\nprintln(\"Equation used:\", r.equation_strings[r.best_idx])\n```\n\nWith units and variable names:\n\n```julia\nusing MLJ\nusing DynamicQuantities\nSRegressor = @load SRRegressor pkg=SymbolicRegression\n\nX = (; x1=rand(32) .* us\"km/h\", x2=rand(32) .* us\"km\")\ny = @. X.x2 / X.x1 + 0.5us\"h\"\nmodel = SRRegressor(binary_operators=[+, -, *, /])\nmach = machine(model, X, y)\nfit!(mach)\ny_hat = predict(mach, X)\n# View the equation used:\nr = report(mach)\nprintln(\"Equation used:\", r.equation_strings[r.best_idx])\n```\n\nSee also [`MultitargetSRRegressor`](@ref).\n" +":name" = "SRRegressor" +":human_name" = "Symbolic Regression via Evolutionary Search" +":is_supervised" = "`true`" +":prediction_type" = ":deterministic" +":abstract_type" = "`MLJModelInterface.Deterministic`" +":implemented_methods" = [] +":hyperparameters" = "`(:binary_operators, :unary_operators, :constraints, :elementwise_loss, :loss_function, :tournament_selection_n, :tournament_selection_p, :topn, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :parsimony, :dimensional_constraint_penalty, :alpha, :maxsize, :maxdepth, :turbo, :migration, :hof_migration, :should_simplify, :should_optimize_constants, :output_file, :populations, :perturbation_factor, :annealing, :batching, :batch_size, :mutation_weights, :crossover_probability, :warmup_maxsize_by, :use_frequency, :use_frequency_in_tournament, :adaptive_parsimony_scaling, :population_size, :ncycles_per_iteration, :fraction_replaced, :fraction_replaced_hof, :verbosity, :print_precision, :save_to_file, :probability_negate_constant, :seed, :bin_constraints, :una_constraints, :progress, :terminal_width, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_options, :val_recorder, :recorder_file, :early_stop_condition, :timeout_in_seconds, :max_evals, :skip_mutation_failures, :enable_autodiff, :nested_constraints, :deterministic, :define_helper_functions, :fast_cycle, :npopulations, :npop, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :heap_size_hint_in_bytes, :runtests, :loss_type, :selection_method, :dimensions_type)`" +":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Integer\", \"Real\", \"Integer\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Real\", \"Union{Nothing, Real}\", \"Real\", \"Integer\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Bool\", \"Union{Nothing, AbstractString}\", \"Integer\", \"Real\", \"Bool\", \"Bool\", \"Integer\", \"Union{SymbolicRegression.CoreModule.OptionsStructModule.MutationWeights, NamedTuple, AbstractVector}\", \"Real\", \"Real\", \"Bool\", \"Bool\", \"Real\", \"Integer\", \"Integer\", \"Real\", \"Real\", \"Union{Nothing, Integer}\", \"Integer\", \"Bool\", \"Real\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"AbstractString\", \"Integer\", \"Real\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Val\", \"AbstractString\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"Any\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Bool\", \"Any\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`false`" @@ -7034,41 +6509,6 @@ ":reporting_operations" = "`()`" ":constructor" = "`nothing`" -[MLJModels.ThresholdSupervisedDetector] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}`" -":predict_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_pure_julia" = "`false`" -":package_name" = "MLJModels" -":package_license" = "unknown" -":load_path" = "MLJModels.BinaryThresholdPredictor" -":package_uuid" = "" -":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":is_wrapper" = "`true`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = "```\nBinaryThresholdPredictor(model; threshold=0.5)\n```\n\nWrap the `Probabilistic` model, `model`, assumed to support binary classification, as a `Deterministic` model, by applying the specified `threshold` to the positive class probability. In addition to conventional supervised classifiers, it can also be applied to outlier detection models that predict normalized scores - in the form of appropriate `UnivariateFinite` distributions - that is, models that subtype `AbstractProbabilisticUnsupervisedDetector` or `AbstractProbabilisticSupervisedDetector`.\n\nBy convention the positive class is the second class returned by `levels(y)`, where `y` is the target.\n\nIf `threshold=0.5` then calling `predict` on the wrapped model is equivalent to calling `predict_mode` on the atomic model.\n\n# Example\n\nBelow is an application to the well-known Pima Indian diabetes dataset, including optimization of the `threshold` parameter, with a high balanced accuracy the objective. The target class distribution is 500 positives to 268 negatives.\n\nLoading the data:\n\n```julia\nusing MLJ, Random\nrng = Xoshiro(123)\n\ndiabetes = OpenML.load(43582)\noutcome, X = unpack(diabetes, ==(:Outcome), rng=rng);\ny = coerce(Int.(outcome), OrderedFactor);\n```\n\nChoosing a probabilistic classifier:\n\n```julia\nEvoTreesClassifier = @load EvoTreesClassifier\nprob_predictor = EvoTreesClassifier()\n```\n\nWrapping in `TunedModel` to get a deterministic classifier with `threshold` as a new hyperparameter:\n\n```julia\npoint_predictor = BinaryThresholdPredictor(prob_predictor, threshold=0.6)\nXnew, _ = make_moons(3, rng=rng)\nmach = machine(point_predictor, X, y) |> fit!\npredict(mach, X)[1:3] # [0, 0, 0]\n```\n\nEstimating performance:\n\n```julia\nbalanced = BalancedAccuracy(adjusted=true)\ne = evaluate!(mach, resampling=CV(nfolds=6), measures=[balanced, accuracy])\ne.measurement[1] # 0.405 ± 0.089\n```\n\nWrapping in tuning strategy to learn `threshold` that maximizes balanced accuracy:\n\n```julia\nr = range(point_predictor, :threshold, lower=0.1, upper=0.9)\ntuned_point_predictor = TunedModel(\n point_predictor,\n tuning=RandomSearch(rng=rng),\n resampling=CV(nfolds=6),\n range = r,\n measure=balanced,\n n=30,\n)\nmach2 = machine(tuned_point_predictor, X, y) |> fit!\noptimized_point_predictor = report(mach2).best_model\noptimized_point_predictor.threshold # 0.260\npredict(mach2, X)[1:3] # [1, 1, 0]\n```\n\nEstimating the performance of the auto-thresholding model (nested resampling here):\n\n```julia\ne = evaluate!(mach2, resampling=CV(nfolds=6), measure=[balanced, accuracy])\ne.measurement[1] # 0.477 ± 0.110\n```\n" -":name" = "ThresholdSupervisedDetector" -":human_name" = "threshold supervised detector" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.DeterministicSupervisedDetector`" -":implemented_methods" = [] -":hyperparameters" = "`(:model, :threshold)`" -":hyperparameter_types" = "`(\"MLJModelInterface.ProbabilisticSupervisedDetector\", \"Float64\")`" -":hyperparameter_ranges" = "`(nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`BinaryThresholdPredictor`" - [MLJModels.DeterministicConstantClassifier] ":input_scitype" = "`ScientificTypesBase.Table`" ":output_scitype" = "`ScientificTypesBase.Unknown`" @@ -7454,41 +6894,6 @@ ":reporting_operations" = "`()`" ":constructor" = "`nothing`" -[MLJModels.ThresholdUnsupervisedDetector] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":predict_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_pure_julia" = "`false`" -":package_name" = "MLJModels" -":package_license" = "unknown" -":load_path" = "MLJModels.BinaryThresholdPredictor" -":package_uuid" = "" -":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":is_wrapper" = "`true`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = "```\nBinaryThresholdPredictor(model; threshold=0.5)\n```\n\nWrap the `Probabilistic` model, `model`, assumed to support binary classification, as a `Deterministic` model, by applying the specified `threshold` to the positive class probability. In addition to conventional supervised classifiers, it can also be applied to outlier detection models that predict normalized scores - in the form of appropriate `UnivariateFinite` distributions - that is, models that subtype `AbstractProbabilisticUnsupervisedDetector` or `AbstractProbabilisticSupervisedDetector`.\n\nBy convention the positive class is the second class returned by `levels(y)`, where `y` is the target.\n\nIf `threshold=0.5` then calling `predict` on the wrapped model is equivalent to calling `predict_mode` on the atomic model.\n\n# Example\n\nBelow is an application to the well-known Pima Indian diabetes dataset, including optimization of the `threshold` parameter, with a high balanced accuracy the objective. The target class distribution is 500 positives to 268 negatives.\n\nLoading the data:\n\n```julia\nusing MLJ, Random\nrng = Xoshiro(123)\n\ndiabetes = OpenML.load(43582)\noutcome, X = unpack(diabetes, ==(:Outcome), rng=rng);\ny = coerce(Int.(outcome), OrderedFactor);\n```\n\nChoosing a probabilistic classifier:\n\n```julia\nEvoTreesClassifier = @load EvoTreesClassifier\nprob_predictor = EvoTreesClassifier()\n```\n\nWrapping in `TunedModel` to get a deterministic classifier with `threshold` as a new hyperparameter:\n\n```julia\npoint_predictor = BinaryThresholdPredictor(prob_predictor, threshold=0.6)\nXnew, _ = make_moons(3, rng=rng)\nmach = machine(point_predictor, X, y) |> fit!\npredict(mach, X)[1:3] # [0, 0, 0]\n```\n\nEstimating performance:\n\n```julia\nbalanced = BalancedAccuracy(adjusted=true)\ne = evaluate!(mach, resampling=CV(nfolds=6), measures=[balanced, accuracy])\ne.measurement[1] # 0.405 ± 0.089\n```\n\nWrapping in tuning strategy to learn `threshold` that maximizes balanced accuracy:\n\n```julia\nr = range(point_predictor, :threshold, lower=0.1, upper=0.9)\ntuned_point_predictor = TunedModel(\n point_predictor,\n tuning=RandomSearch(rng=rng),\n resampling=CV(nfolds=6),\n range = r,\n measure=balanced,\n n=30,\n)\nmach2 = machine(tuned_point_predictor, X, y) |> fit!\noptimized_point_predictor = report(mach2).best_model\noptimized_point_predictor.threshold # 0.260\npredict(mach2, X)[1:3] # [1, 1, 0]\n```\n\nEstimating the performance of the auto-thresholding model (nested resampling here):\n\n```julia\ne = evaluate!(mach2, resampling=CV(nfolds=6), measure=[balanced, accuracy])\ne.measurement[1] # 0.477 ± 0.110\n```\n" -":name" = "ThresholdUnsupervisedDetector" -":human_name" = "threshold unsupervised detector" -":is_supervised" = "`false`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.DeterministicUnsupervisedDetector`" -":implemented_methods" = [] -":hyperparameters" = "`(:model, :threshold)`" -":hyperparameter_types" = "`(\"MLJModelInterface.ProbabilisticUnsupervisedDetector\", \"Float64\")`" -":hyperparameter_ranges" = "`(nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`BinaryThresholdPredictor`" - [MLJModels.DeterministicConstantRegressor] ":input_scitype" = "`ScientificTypesBase.Table`" ":output_scitype" = "`ScientificTypesBase.Unknown`" @@ -7594,6 +6999,41 @@ ":reporting_operations" = "`()`" ":constructor" = "`nothing`" +[OneRule.OneRuleClassifier] +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_pure_julia" = "`true`" +":package_name" = "OneRule" +":package_license" = "MIT" +":load_path" = "OneRule.OneRuleClassifier" +":package_uuid" = "90484964-6d6a-4979-af09-8657dbed84ff" +":package_url" = "https://github.com/roland-KA/OneRule.jl" +":is_wrapper" = "`false`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "```\nOneRuleClassifier\n```\n\nA model type for constructing a one rule classifier, based on [OneRule.jl](https://github.com/roland-KA/OneRule.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nOneRuleClassifier = @load OneRuleClassifier pkg=OneRule\n```\n\nDo `model = OneRuleClassifier()` to construct an instance with default hyper-parameters. \n\n`OneRuleClassifier` implements the OneRule method for classification by Robert Holte (\"Very simple classification rules perform well on most commonly used datasets\" in: Machine Learning 11.1 (1993), pp. 63-90). \n\n```\nFor more information see:\n\n- Witten, Ian H., Eibe Frank, and Mark A. Hall. \n Data Mining Practical Machine Learning Tools and Techniques Third Edition. \n Morgan Kaufmann, 2017, pp. 93-96.\n- [Machine Learning - (One|Simple) Rule](https://datacadamia.com/data_mining/one_rule)\n- [OneRClassifier - One Rule for Classification](http://rasbt.github.io/mlxtend/user_guide/classifier/OneRClassifier/)\n```\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with mach = machine(model, X, y) where\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Multiclass`, `OrderedFactor`, or `<:Finite`; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\nThis classifier has no hyper-parameters.\n\n# Operations\n\n * `predict(mach, Xnew)`: return (deterministic) predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `tree`: the tree (a `OneTree`) returned by the core OneTree.jl algorithm\n * `all_classes`: all classes (i.e. levels) of the target (used also internally to transfer `levels`-information to `predict`)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `tree`: The `OneTree` created based on the training data\n * `nrules`: The number of rules `tree` contains\n * `error_rate`: fraction of wrongly classified instances\n * `error_count`: number of wrongly classified instances\n * `classes_seen`: list of target classes actually observed in training\n * `features`: the names of the features encountered in training\n\n# Examples\n\n```\nusing MLJ\n\nORClassifier = @load OneRuleClassifier pkg=OneRule\n\norc = ORClassifier()\n\noutlook = [\"sunny\", \"sunny\", \"overcast\", \"rainy\", \"rainy\", \"rainy\", \"overcast\", \"sunny\", \"sunny\", \"rainy\", \"sunny\", \"overcast\", \"overcast\", \"rainy\"]\ntemperature = [\"hot\", \"hot\", \"hot\", \"mild\", \"cool\", \"cool\", \"cool\", \"mild\", \"cool\", \"mild\", \"mild\", \"mild\", \"hot\", \"mild\"]\nhumidity = [\"high\", \"high\", \"high\", \"high\", \"normal\", \"normal\", \"normal\", \"high\", \"normal\", \"normal\", \"normal\", \"high\", \"normal\", \"high\"]\nwindy = [\"false\", \"true\", \"false\", \"false\", \"false\", \"true\", \"true\", \"false\", \"false\", \"false\", \"true\", \"true\", \"false\", \"true\"]\n\nweather_data = (outlook = outlook, temperature = temperature, humidity = humidity, windy = windy)\nplay_data = [\"no\", \"no\", \"yes\", \"yes\", \"yes\", \"no\", \"yes\", \"no\", \"yes\", \"yes\", \"yes\", \"yes\", \"yes\", \"no\"]\n\nweather = coerce(weather_data, Textual => Multiclass)\nplay = coerce(play, Multiclass)\n\nmach = machine(orc, weather, play)\nfit!(mach)\n\nyhat = MLJ.predict(mach, weather) # in a real context 'new' `weather` data would be used\none_tree = fitted_params(mach).tree\nreport(mach).error_rate\n```\n\nSee also [OneRule.jl](https://github.com/roland-KA/OneRule.jl).\n" +":name" = "OneRuleClassifier" +":human_name" = "one rule classifier" +":is_supervised" = "`true`" +":prediction_type" = ":deterministic" +":abstract_type" = "`MLJModelInterface.Deterministic`" +":implemented_methods" = [":fit", ":fitted_params", ":predict"] +":hyperparameters" = "`()`" +":hyperparameter_types" = "`()`" +":hyperparameter_ranges" = "`()`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`nothing`" + [OutlierDetectionPython.MCDDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" @@ -8364,41 +7804,6 @@ ":reporting_operations" = "`()`" ":constructor" = "`nothing`" -[OneRule.OneRuleClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_pure_julia" = "`true`" -":package_name" = "OneRule" -":package_license" = "MIT" -":load_path" = "OneRule.OneRuleClassifier" -":package_uuid" = "90484964-6d6a-4979-af09-8657dbed84ff" -":package_url" = "https://github.com/roland-KA/OneRule.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = "```\nOneRuleClassifier\n```\n\nA model type for constructing a one rule classifier, based on [OneRule.jl](https://github.com/roland-KA/OneRule.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nOneRuleClassifier = @load OneRuleClassifier pkg=OneRule\n```\n\nDo `model = OneRuleClassifier()` to construct an instance with default hyper-parameters. \n\n`OneRuleClassifier` implements the OneRule method for classification by Robert Holte (\"Very simple classification rules perform well on most commonly used datasets\" in: Machine Learning 11.1 (1993), pp. 63-90). \n\n```\nFor more information see:\n\n- Witten, Ian H., Eibe Frank, and Mark A. Hall. \n Data Mining Practical Machine Learning Tools and Techniques Third Edition. \n Morgan Kaufmann, 2017, pp. 93-96.\n- [Machine Learning - (One|Simple) Rule](https://datacadamia.com/data_mining/one_rule)\n- [OneRClassifier - One Rule for Classification](http://rasbt.github.io/mlxtend/user_guide/classifier/OneRClassifier/)\n```\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with mach = machine(model, X, y) where\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Multiclass`, `OrderedFactor`, or `<:Finite`; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\nThis classifier has no hyper-parameters.\n\n# Operations\n\n * `predict(mach, Xnew)`: return (deterministic) predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `tree`: the tree (a `OneTree`) returned by the core OneTree.jl algorithm\n * `all_classes`: all classes (i.e. levels) of the target (used also internally to transfer `levels`-information to `predict`)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `tree`: The `OneTree` created based on the training data\n * `nrules`: The number of rules `tree` contains\n * `error_rate`: fraction of wrongly classified instances\n * `error_count`: number of wrongly classified instances\n * `classes_seen`: list of target classes actually observed in training\n * `features`: the names of the features encountered in training\n\n# Examples\n\n```\nusing MLJ\n\nORClassifier = @load OneRuleClassifier pkg=OneRule\n\norc = ORClassifier()\n\noutlook = [\"sunny\", \"sunny\", \"overcast\", \"rainy\", \"rainy\", \"rainy\", \"overcast\", \"sunny\", \"sunny\", \"rainy\", \"sunny\", \"overcast\", \"overcast\", \"rainy\"]\ntemperature = [\"hot\", \"hot\", \"hot\", \"mild\", \"cool\", \"cool\", \"cool\", \"mild\", \"cool\", \"mild\", \"mild\", \"mild\", \"hot\", \"mild\"]\nhumidity = [\"high\", \"high\", \"high\", \"high\", \"normal\", \"normal\", \"normal\", \"high\", \"normal\", \"normal\", \"normal\", \"high\", \"normal\", \"high\"]\nwindy = [\"false\", \"true\", \"false\", \"false\", \"false\", \"true\", \"true\", \"false\", \"false\", \"false\", \"true\", \"true\", \"false\", \"true\"]\n\nweather_data = (outlook = outlook, temperature = temperature, humidity = humidity, windy = windy)\nplay_data = [\"no\", \"no\", \"yes\", \"yes\", \"yes\", \"no\", \"yes\", \"no\", \"yes\", \"yes\", \"yes\", \"yes\", \"yes\", \"no\"]\n\nweather = coerce(weather_data, Textual => Multiclass)\nplay = coerce(play, Multiclass)\n\nmach = machine(orc, weather, play)\nfit!(mach)\n\nyhat = MLJ.predict(mach, weather) # in a real context 'new' `weather` data would be used\none_tree = fitted_params(mach).tree\nreport(mach).error_rate\n```\n\nSee also [OneRule.jl](https://github.com/roland-KA/OneRule.jl).\n" -":name" = "OneRuleClassifier" -":human_name" = "one rule classifier" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`()`" -":hyperparameter_types" = "`()`" -":hyperparameter_ranges" = "`()`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" - [SelfOrganizingMaps.SelfOrganizingMap] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" ":output_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" @@ -8434,32 +7839,32 @@ ":reporting_operations" = "`()`" ":constructor" = "`nothing`" -[LIBSVM.ProbabilisticNuSVC] +[LIBSVM.SVC] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" +":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}, Any}}`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":is_pure_julia" = "`false`" ":package_name" = "LIBSVM" ":package_license" = "unknown" -":load_path" = "MLJLIBSVMInterface.ProbabilisticNuSVC" +":load_path" = "MLJLIBSVMInterface.SVC" ":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" ":package_url" = "https://github.com/mpastell/LIBSVM.jl" ":is_wrapper" = "`false`" ":supports_weights" = "`false`" -":supports_class_weights" = "`false`" +":supports_class_weights" = "`true`" ":supports_online" = "`false`" -":docstring" = "```\nProbabilisticNuSVC\n```\n\nA model type for constructing a probabilistic ν-support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nProbabilisticNuSVC = @load ProbabilisticNuSVC pkg=LIBSVM\n```\n\nDo `model = ProbabilisticNuSVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ProbabilisticNuSVC(kernel=...)`.\n\nThis model is identical to [`NuSVC`](@ref) with the exception that it predicts probabilities, instead of actual class labels. Probabilities are computed using Platt scaling, which will add to total computation time.\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\n[Platt, John (1999): \"Probabilistic Outputs for Support Vector Machines and Comparisons to Regularized Likelihood Methods.\"](https://citeseerx.ist.psu.edu/doc_view/pid/42e5ed832d4310ce4378c44d05570439df28a393)\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with:\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `nu=0.5` (range (0, 1]): An upper bound on the fraction of margin errors and a lower bound of the fraction of support vectors. Denoted `ν` in the cited paper. Changing `nu` changes the thickness of the margin (a neighborhood of the decision surface) and a margin error is said to have occurred if a training observation lies on the wrong side of the surface or within the margin.\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```\nusing MLJ\nimport LIBSVM\n\nProbabilisticNuSVC = @load ProbabilisticNuSVC pkg=LIBSVM # model type\nmodel = ProbabilisticNuSVC(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> probs = predict(mach, Xnew)\n3-element UnivariateFiniteVector{Multiclass{3}, String, UInt32, Float64}:\n UnivariateFinite{Multiclass{3}}(setosa=>0.00313, versicolor=>0.0247, virginica=>0.972)\n UnivariateFinite{Multiclass{3}}(setosa=>0.000598, versicolor=>0.0155, virginica=>0.984)\n UnivariateFinite{Multiclass{3}}(setosa=>2.27e-6, versicolor=>2.73e-6, virginica=>1.0)\n\njulia> yhat = mode.(probs)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## User-defined kernels\n\n```\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = ProbabilisticNuSVC(kernel=k)\nmach = machine(model, X, y) |> fit!\n\nprobs = predict(mach, Xnew)\n```\n\nSee also the classifiers [`NuSVC`](@ref), [`SVC`](@ref), [`ProbabilisticSVC`](@ref) and [`LinearSVC`](@ref). And see [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation. [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n" -":name" = "ProbabilisticNuSVC" -":human_name" = "probabilistic ν-support vector classifier" +":docstring" = "```\nSVC\n```\n\nA model type for constructing a C-support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSVC = @load SVC pkg=LIBSVM\n```\n\nDo `model = SVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SVC(kernel=...)`.\n\nThis model predicts actual class labels. To predict probabilities, use instead [`ProbabilisticSVC`](@ref).\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n * `w`: a dictionary of class weights, keyed on `levels(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `cost=1.0` (range (0, `Inf`)): the parameter denoted $C$ in the cited reference; for greater regularization, decrease `cost`\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```\nusing MLJ\nimport LIBSVM\n\nSVC = @load SVC pkg=LIBSVM # model type\nmodel = SVC(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## User-defined kernels\n\n```\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = SVC(kernel=k)\nmach = machine(model, X, y) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## Incorporating class weights\n\nIn either scenario above, we can do:\n\n```julia\nweights = Dict(\"virginica\" => 1, \"versicolor\" => 20, \"setosa\" => 1)\nmach = machine(model, X, y, weights) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"versicolor\"\n \"versicolor\"\n \"versicolor\"\n```\n\nSee also the classifiers [`ProbabilisticSVC`](@ref), [`NuSVC`](@ref) and [`LinearSVC`](@ref). And see [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n" +":name" = "SVC" +":human_name" = "C-support vector classifier" ":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" +":prediction_type" = ":deterministic" +":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:kernel, :gamma, :nu, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" +":hyperparameters" = "`(:kernel, :gamma, :cost, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" ":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" @@ -8644,32 +8049,32 @@ ":reporting_operations" = "`()`" ":constructor" = "`nothing`" -[LIBSVM.SVC] +[LIBSVM.ProbabilisticNuSVC] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}, Any}}`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":is_pure_julia" = "`false`" ":package_name" = "LIBSVM" ":package_license" = "unknown" -":load_path" = "MLJLIBSVMInterface.SVC" +":load_path" = "MLJLIBSVMInterface.ProbabilisticNuSVC" ":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" ":package_url" = "https://github.com/mpastell/LIBSVM.jl" ":is_wrapper" = "`false`" ":supports_weights" = "`false`" -":supports_class_weights" = "`true`" +":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nSVC\n```\n\nA model type for constructing a C-support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSVC = @load SVC pkg=LIBSVM\n```\n\nDo `model = SVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SVC(kernel=...)`.\n\nThis model predicts actual class labels. To predict probabilities, use instead [`ProbabilisticSVC`](@ref).\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n * `w`: a dictionary of class weights, keyed on `levels(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `cost=1.0` (range (0, `Inf`)): the parameter denoted $C$ in the cited reference; for greater regularization, decrease `cost`\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```\nusing MLJ\nimport LIBSVM\n\nSVC = @load SVC pkg=LIBSVM # model type\nmodel = SVC(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## User-defined kernels\n\n```\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = SVC(kernel=k)\nmach = machine(model, X, y) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## Incorporating class weights\n\nIn either scenario above, we can do:\n\n```julia\nweights = Dict(\"virginica\" => 1, \"versicolor\" => 20, \"setosa\" => 1)\nmach = machine(model, X, y, weights) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"versicolor\"\n \"versicolor\"\n \"versicolor\"\n```\n\nSee also the classifiers [`ProbabilisticSVC`](@ref), [`NuSVC`](@ref) and [`LinearSVC`](@ref). And see [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n" -":name" = "SVC" -":human_name" = "C-support vector classifier" +":docstring" = "```\nProbabilisticNuSVC\n```\n\nA model type for constructing a probabilistic ν-support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nProbabilisticNuSVC = @load ProbabilisticNuSVC pkg=LIBSVM\n```\n\nDo `model = ProbabilisticNuSVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ProbabilisticNuSVC(kernel=...)`.\n\nThis model is identical to [`NuSVC`](@ref) with the exception that it predicts probabilities, instead of actual class labels. Probabilities are computed using Platt scaling, which will add to total computation time.\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\n[Platt, John (1999): \"Probabilistic Outputs for Support Vector Machines and Comparisons to Regularized Likelihood Methods.\"](https://citeseerx.ist.psu.edu/doc_view/pid/42e5ed832d4310ce4378c44d05570439df28a393)\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with:\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `nu=0.5` (range (0, 1]): An upper bound on the fraction of margin errors and a lower bound of the fraction of support vectors. Denoted `ν` in the cited paper. Changing `nu` changes the thickness of the margin (a neighborhood of the decision surface) and a margin error is said to have occurred if a training observation lies on the wrong side of the surface or within the margin.\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```\nusing MLJ\nimport LIBSVM\n\nProbabilisticNuSVC = @load ProbabilisticNuSVC pkg=LIBSVM # model type\nmodel = ProbabilisticNuSVC(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> probs = predict(mach, Xnew)\n3-element UnivariateFiniteVector{Multiclass{3}, String, UInt32, Float64}:\n UnivariateFinite{Multiclass{3}}(setosa=>0.00313, versicolor=>0.0247, virginica=>0.972)\n UnivariateFinite{Multiclass{3}}(setosa=>0.000598, versicolor=>0.0155, virginica=>0.984)\n UnivariateFinite{Multiclass{3}}(setosa=>2.27e-6, versicolor=>2.73e-6, virginica=>1.0)\n\njulia> yhat = mode.(probs)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## User-defined kernels\n\n```\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = ProbabilisticNuSVC(kernel=k)\nmach = machine(model, X, y) |> fit!\n\nprobs = predict(mach, Xnew)\n```\n\nSee also the classifiers [`NuSVC`](@ref), [`SVC`](@ref), [`ProbabilisticSVC`](@ref) and [`LinearSVC`](@ref). And see [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation. [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n" +":name" = "ProbabilisticNuSVC" +":human_name" = "probabilistic ν-support vector classifier" ":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" +":prediction_type" = ":probabilistic" +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:kernel, :gamma, :cost, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" +":hyperparameters" = "`(:kernel, :gamma, :nu, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" ":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" @@ -8994,42 +8399,7 @@ ":reporting_operations" = "`()`" ":constructor" = "`nothing`" -[MLJEnsembles.DeterministicEnsembleModel] -":input_scitype" = "`ScientificTypesBase.Unknown`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_pure_julia" = "`false`" -":package_name" = "MLJEnsembles" -":package_license" = "unknown" -":load_path" = "MLJEnsembles.EnsembleModel" -":package_uuid" = "50ed68f4-41fd-4504-931a-ed422449fee0" -":package_url" = "https://github.com/JuliaAI/MLJEnsembles.jl" -":is_wrapper" = "`true`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = "```\nEnsembleModel(model,\n atomic_weights=Float64[],\n bagging_fraction=0.8,\n n=100,\n rng=GLOBAL_RNG,\n acceleration=CPU1(),\n out_of_bag_measure=[])\n```\n\nCreate a model for training an ensemble of `n` clones of `model`, with optional bagging. Ensembling is useful if `fit!(machine(atom, data...))` does not create identical models on repeated calls (ie, is a stochastic model, such as a decision tree with randomized node selection criteria), or if `bagging_fraction` is set to a value less than 1.0, or both.\n\nHere the atomic `model` must support targets with scitype `AbstractVector{<:Finite}` (single-target classifiers) or `AbstractVector{<:Continuous}` (single-target regressors).\n\nIf `rng` is an integer, then `MersenneTwister(rng)` is the random number generator used for bagging. Otherwise some `AbstractRNG` object is expected.\n\nThe atomic predictions are optionally weighted according to the vector `atomic_weights` (to allow for external optimization) except in the case that `model` is a `Deterministic` classifier, in which case `atomic_weights` are ignored.\n\nThe ensemble model is `Deterministic` or `Probabilistic`, according to the corresponding supertype of `atom`. In the case of deterministic classifiers (`target_scitype(atom) <: Abstract{<:Finite}`), the predictions are majority votes, and for regressors (`target_scitype(atom)<: AbstractVector{<:Continuous}`) they are ordinary averages. Probabilistic predictions are obtained by averaging the atomic probability distribution/mass functions; in particular, for regressors, the ensemble prediction on each input pattern has the type `MixtureModel{VF,VS,D}` from the Distributions.jl package, where `D` is the type of predicted distribution for `atom`.\n\nSpecify `acceleration=CPUProcesses()` for distributed computing, or `CPUThreads()` for multithreading.\n\nIf a single measure or non-empty vector of measures is specified by `out_of_bag_measure`, then out-of-bag estimates of performance are written to the training report (call `report` on the trained machine wrapping the ensemble model).\n\n*Important:* If per-observation or class weights `w` (not to be confused with atomic weights) are specified when constructing a machine for the ensemble model, as in `mach = machine(ensemble_model, X, y, w)`, then `w` is used by any measures specified in `out_of_bag_measure` that support them.\n" -":name" = "DeterministicEnsembleModel" -":human_name" = "deterministic ensemble model" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [] -":hyperparameters" = "`(:model, :atomic_weights, :bagging_fraction, :rng, :n, :acceleration, :out_of_bag_measure)`" -":hyperparameter_types" = "`(\"MLJModelInterface.Deterministic\", \"Vector{Float64}\", \"Float64\", \"Union{Int64, Random.AbstractRNG}\", \"Int64\", \"ComputationalResources.AbstractResource\", \"Any\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`EnsembleModel`" - -[MLJEnsembles.ProbabilisticEnsembleModel] +[MLJEnsembles.EnsembleModel] ":input_scitype" = "`ScientificTypesBase.Unknown`" ":output_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" @@ -9048,7 +8418,7 @@ ":supports_class_weights" = "`false`" ":supports_online" = "`false`" ":docstring" = "```\nEnsembleModel(model,\n atomic_weights=Float64[],\n bagging_fraction=0.8,\n n=100,\n rng=GLOBAL_RNG,\n acceleration=CPU1(),\n out_of_bag_measure=[])\n```\n\nCreate a model for training an ensemble of `n` clones of `model`, with optional bagging. Ensembling is useful if `fit!(machine(atom, data...))` does not create identical models on repeated calls (ie, is a stochastic model, such as a decision tree with randomized node selection criteria), or if `bagging_fraction` is set to a value less than 1.0, or both.\n\nHere the atomic `model` must support targets with scitype `AbstractVector{<:Finite}` (single-target classifiers) or `AbstractVector{<:Continuous}` (single-target regressors).\n\nIf `rng` is an integer, then `MersenneTwister(rng)` is the random number generator used for bagging. Otherwise some `AbstractRNG` object is expected.\n\nThe atomic predictions are optionally weighted according to the vector `atomic_weights` (to allow for external optimization) except in the case that `model` is a `Deterministic` classifier, in which case `atomic_weights` are ignored.\n\nThe ensemble model is `Deterministic` or `Probabilistic`, according to the corresponding supertype of `atom`. In the case of deterministic classifiers (`target_scitype(atom) <: Abstract{<:Finite}`), the predictions are majority votes, and for regressors (`target_scitype(atom)<: AbstractVector{<:Continuous}`) they are ordinary averages. Probabilistic predictions are obtained by averaging the atomic probability distribution/mass functions; in particular, for regressors, the ensemble prediction on each input pattern has the type `MixtureModel{VF,VS,D}` from the Distributions.jl package, where `D` is the type of predicted distribution for `atom`.\n\nSpecify `acceleration=CPUProcesses()` for distributed computing, or `CPUThreads()` for multithreading.\n\nIf a single measure or non-empty vector of measures is specified by `out_of_bag_measure`, then out-of-bag estimates of performance are written to the training report (call `report` on the trained machine wrapping the ensemble model).\n\n*Important:* If per-observation or class weights `w` (not to be confused with atomic weights) are specified when constructing a machine for the ensemble model, as in `mach = machine(ensemble_model, X, y, w)`, then `w` is used by any measures specified in `out_of_bag_measure` that support them.\n" -":name" = "ProbabilisticEnsembleModel" +":name" = "EnsembleModel" ":human_name" = "probabilistic ensemble model" ":is_supervised" = "`true`" ":prediction_type" = ":probabilistic" diff --git a/src/registry/Models.toml b/src/registry/Models.toml index cabe4514..19b2d2e9 100644 --- a/src/registry/Models.toml +++ b/src/registry/Models.toml @@ -1,34 +1,34 @@ -BetaML = ["RandomForestRegressor", "GaussianMixtureImputer", "RandomForestClassifier", "RandomForestImputer", "PerceptronClassifier", "AutoEncoder", "DecisionTreeRegressor", "PegasosClassifier", "NeuralNetworkRegressor", "KMeansClusterer", "MultitargetGaussianMixtureRegressor", "GaussianMixtureRegressor", "MultitargetNeuralNetworkRegressor", "DecisionTreeClassifier", "GeneralImputer", "NeuralNetworkClassifier", "SimpleImputer", "GaussianMixtureClusterer", "KernelPerceptronClassifier", "KMedoidsClusterer"] +BetaML = ["RandomForestRegressor", "GaussianMixtureImputer", "RandomForestClassifier", "RandomForestImputer", "PerceptronClassifier", "AutoEncoder", "DecisionTreeRegressor", "PegasosClassifier", "KMeansClusterer", "NeuralNetworkRegressor", "MultitargetGaussianMixtureRegressor", "GaussianMixtureRegressor", "MultitargetNeuralNetworkRegressor", "DecisionTreeClassifier", "GeneralImputer", "NeuralNetworkClassifier", "SimpleImputer", "GaussianMixtureClusterer", "KernelPerceptronClassifier", "KMedoidsClusterer"] CatBoost = ["CatBoostRegressor", "CatBoostClassifier"] NearestNeighborModels = ["KNNClassifier", "MultitargetKNNClassifier", "MultitargetKNNRegressor", "KNNRegressor"] -MLJScikitLearnInterface = ["ProbabilisticSGDClassifier", "RidgeCVClassifier", "LogisticClassifier", "RandomForestRegressor", "ElasticNetCVRegressor", "PerceptronClassifier", "MultiTaskLassoRegressor", "LinearRegressor", "HDBSCAN", "DBSCAN", "RidgeRegressor", "LassoLarsICRegressor", "ARDRegressor", "SVMNuRegressor", "RidgeClassifier", "SGDRegressor", "ComplementNBClassifier", "HuberRegressor", "SVMNuClassifier", "GradientBoostingClassifier", "GaussianProcessRegressor", "SVMLinearRegressor", "LarsRegressor", "MeanShift", "HistGradientBoostingClassifier", "AdaBoostRegressor", "AffinityPropagation", "MultiTaskLassoCVRegressor", "OrthogonalMatchingPursuitRegressor", "RidgeCVRegressor", "PassiveAggressiveClassifier", "SVMRegressor", "BernoulliNBClassifier", "GaussianNBClassifier", "ExtraTreesClassifier", "KMeans", "MultiTaskElasticNetCVRegressor", "LassoLarsCVRegressor", "OrthogonalMatchingPursuitCVRegressor", "AdaBoostClassifier", "PassiveAggressiveRegressor", "BayesianRidgeRegressor", "RANSACRegressor", "BaggingClassifier", "GaussianProcessClassifier", "OPTICS", "KNeighborsRegressor", "HistGradientBoostingRegressor", "MiniBatchKMeans", "LassoCVRegressor", "DummyRegressor", "BisectingKMeans", "LassoLarsRegressor", "LarsCVRegressor", "KNeighborsClassifier", "SVMLinearClassifier", "FeatureAgglomeration", "DummyClassifier", "BaggingRegressor", "BayesianQDA", "BayesianLDA", "SGDClassifier", "TheilSenRegressor", "SpectralClustering", "Birch", "AgglomerativeClustering", "ElasticNetRegressor", "RandomForestClassifier", "LogisticCVClassifier", "MultiTaskElasticNetRegressor", "ExtraTreesRegressor", "LassoRegressor", "MultinomialNBClassifier", "GradientBoostingRegressor", "SVMClassifier"] +MLJScikitLearnInterface = ["ProbabilisticSGDClassifier", "RidgeCVClassifier", "LogisticClassifier", "RandomForestRegressor", "ElasticNetCVRegressor", "PerceptronClassifier", "MultiTaskLassoRegressor", "LinearRegressor", "HDBSCAN", "DBSCAN", "RidgeRegressor", "LassoLarsICRegressor", "ARDRegressor", "SVMNuRegressor", "RidgeClassifier", "SGDRegressor", "ComplementNBClassifier", "HuberRegressor", "SVMNuClassifier", "GradientBoostingClassifier", "GaussianProcessRegressor", "SVMLinearRegressor", "LarsRegressor", "MeanShift", "HistGradientBoostingClassifier", "AdaBoostRegressor", "AffinityPropagation", "MultiTaskLassoCVRegressor", "OrthogonalMatchingPursuitRegressor", "BernoulliNBClassifier", "PassiveAggressiveClassifier", "RidgeCVRegressor", "SVMRegressor", "GaussianNBClassifier", "ExtraTreesClassifier", "KMeans", "MultiTaskElasticNetCVRegressor", "LassoLarsCVRegressor", "OrthogonalMatchingPursuitCVRegressor", "AdaBoostClassifier", "PassiveAggressiveRegressor", "BayesianRidgeRegressor", "GaussianProcessClassifier", "BaggingClassifier", "OPTICS", "RANSACRegressor", "KNeighborsRegressor", "HistGradientBoostingRegressor", "MiniBatchKMeans", "LassoCVRegressor", "DummyRegressor", "BisectingKMeans", "LassoLarsRegressor", "LarsCVRegressor", "KNeighborsClassifier", "SVMLinearClassifier", "FeatureAgglomeration", "DummyClassifier", "BaggingRegressor", "BayesianQDA", "BayesianLDA", "SGDClassifier", "TheilSenRegressor", "SpectralClustering", "Birch", "AgglomerativeClustering", "ElasticNetRegressor", "RandomForestClassifier", "LogisticCVClassifier", "MultiTaskElasticNetRegressor", "ExtraTreesbRegressor", "LassoRegressor", "MultinomialNBClassifier", "GradientBoostingRegressor", "SVMClassifier"] OutlierDetectionNeighbors = ["ABODDetector", "DNNDetector", "LOFDetector", "KNNDetector", "COFDetector"] -SIRUS = ["StableRulesClassifier", "StableRulesRegressor", "StableForestClassifier", "StableForestRegressor"] -MLJIteration = ["DeterministicIteratedModel", "ProbabilisticIteratedModel"] +SIRUS = ["StableRulesClassifier", "StableForestClassifier", "StableRulesRegressor", "StableForestRegressor"] +MLJIteration = ["IteratedModel"] PartialLeastSquaresRegressor = ["KPLSRegressor", "PLSRegressor"] PartitionedLS = ["PartLS"] MLJLinearModels = ["QuantileRegressor", "LogisticClassifier", "MultinomialClassifier", "LADRegressor", "RidgeRegressor", "RobustRegressor", "ElasticNetRegressor", "LinearRegressor", "LassoRegressor", "HuberRegressor"] ParallelKMeans = ["KMeans"] NaiveBayes = ["GaussianNBClassifier", "MultinomialNBClassifier"] -MLJBase = ["DeterministicStack", "ProbabilisticStack", "Resampler", "TransformedTargetModelInterval", "TransformedTargetModelDeterministicUnsupervisedDetector", "TransformedTargetModelProbabilistic", "StaticPipeline", "TransformedTargetModelProbabilisticSupervisedDetector", "ProbabilisticPipeline", "DeterministicPipeline", "TransformedTargetModelDeterministic", "IntervalPipeline", "UnsupervisedPipeline", "TransformedTargetModelProbabilisticUnsupervisedDetector", "TransformedTargetModelDeterministicSupervisedDetector"] +MLJBase = ["Pipeline", "Resampler", "Stack", "TransformedTargetModel"] MultivariateStats = ["LDA", "MultitargetLinearRegressor", "BayesianSubspaceLDA", "FactorAnalysis", "LinearRegressor", "ICA", "PPCA", "RidgeRegressor", "KernelPCA", "MultitargetRidgeRegressor", "SubspaceLDA", "BayesianLDA", "PCA"] DecisionTree = ["AdaBoostStumpClassifier", "DecisionTreeRegressor", "DecisionTreeClassifier", "RandomForestRegressor", "RandomForestClassifier"] -MLJBalancing = ["BalancedModelDeterministic", "BalancedBaggingClassifier", "BalancedModelInterval", "BalancedModelProbabilistic"] -MLJTuning = ["DeterministicTunedModel", "ProbabilisticTunedModel"] +MLJBalancing = ["BalancedBaggingClassifier", "BalancedModel"] Imbalance = ["RandomOversampler", "SMOTENC", "TomekUndersampler", "ClusterUndersampler", "SMOTE", "SMOTEN", "ROSE", "RandomUndersampler", "ENNUndersampler", "BorderlineSMOTE1", "RandomWalkOversampler"] +MLJTuning = ["TunedModel"] Clustering = ["HierarchicalClustering", "DBSCAN", "KMeans", "KMedoids"] EvoLinear = ["EvoSplineRegressor", "EvoLinearRegressor"] -XGBoost = ["XGBoostCount", "XGBoostRegressor", "XGBoostClassifier"] +MLJText = ["TfidfTransformer", "CountTransformer", "BM25Transformer"] LightGBM = ["LGBMClassifier", "LGBMRegressor"] +XGBoost = ["XGBoostCount", "XGBoostRegressor", "XGBoostClassifier"] SymbolicRegression = ["MultitargetSRRegressor", "SRRegressor"] -MLJText = ["TfidfTransformer", "CountTransformer", "BM25Transformer"] EvoTrees = ["EvoTreeClassifier", "EvoTreeGaussian", "EvoTreeMLE", "EvoTreeRegressor", "EvoTreeCount"] -MLJModels = ["ConstantClassifier", "Standardizer", "ThresholdSupervisedDetector", "DeterministicConstantClassifier", "UnivariateTimeTypeToContinuous", "OneHotEncoder", "ContinuousEncoder", "UnivariateBoxCoxTransformer", "InteractionTransformer", "ConstantRegressor", "FeatureSelector", "UnivariateDiscretizer", "BinaryThresholdPredictor", "FillImputer", "ThresholdUnsupervisedDetector", "DeterministicConstantRegressor", "UnivariateStandardizer", "UnivariateFillImputer"] -OutlierDetectionPython = ["MCDDetector", "COPODDetector", "HBOSDetector", "IForestDetector", "SOSDetector", "ABODDetector", "LOFDetector", "PCADetector", "INNEDetector", "OCSVMDetector", "ECODDetector", "SODDetector", "LODADetector", "KDEDetector", "CDDetector", "KNNDetector", "GMMDetector", "COFDetector", "CBLOFDetector", "LOCIDetector", "LMDDDetector", "RODDetector"] +MLJModels = ["ConstantClassifier", "Standardizer", "DeterministicConstantClassifier", "UnivariateTimeTypeToContinuous", "OneHotEncoder", "ContinuousEncoder", "UnivariateBoxCoxTransformer", "InteractionTransformer", "ConstantRegressor", "FeatureSelector", "UnivariateDiscretizer", "BinaryThresholdPredictor", "FillImputer", "DeterministicConstantRegressor", "UnivariateStandardizer", "UnivariateFillImputer"] OneRule = ["OneRuleClassifier"] +OutlierDetectionPython = ["MCDDetector", "COPODDetector", "HBOSDetector", "IForestDetector", "SOSDetector", "ABODDetector", "LOFDetector", "PCADetector", "INNEDetector", "OCSVMDetector", "ECODDetector", "SODDetector", "LODADetector", "KDEDetector", "CDDetector", "KNNDetector", "GMMDetector", "COFDetector", "CBLOFDetector", "LOCIDetector", "LMDDDetector", "RODDetector"] SelfOrganizingMaps = ["SelfOrganizingMap"] -LIBSVM = ["ProbabilisticNuSVC", "EpsilonSVR", "LinearSVC", "ProbabilisticSVC", "NuSVR", "NuSVC", "SVC", "OneClassSVM"] +LIBSVM = ["SVC", "EpsilonSVR", "LinearSVC", "ProbabilisticSVC", "NuSVR", "NuSVC", "ProbabilisticNuSVC", "OneClassSVM"] TSVD = ["TSVDTransformer"] GLM = ["LinearBinaryClassifier", "LinearCountRegressor", "LinearRegressor"] MLJFlux = ["MultitargetNeuralNetworkRegressor", "NeuralNetworkClassifier", "ImageClassifier", "NeuralNetworkRegressor"] -MLJEnsembles = ["DeterministicEnsembleModel", "ProbabilisticEnsembleModel"] +MLJEnsembles = ["EnsembleModel"] diff --git a/src/registry/src/Registry.jl b/src/registry/src/Registry.jl index 7f812653..054443fb 100644 --- a/src/registry/src/Registry.jl +++ b/src/registry/src/Registry.jl @@ -1,4 +1,4 @@ -module Registry +module Registry using Pkg import Pkg.TOML @@ -22,6 +22,9 @@ export @update, check_registry, activate_registry_project, info_dict const srcdir = dirname(@__FILE__) # the directory containing this file const environment_path = joinpath(srcdir, "..") +# has tool to generate dictionary of model types keyed on constructor +include("constructors.jl") + # for extracting model traits from a loaded model type include("info_dict.jl") diff --git a/src/registry/src/constructors.jl b/src/registry/src/constructors.jl new file mode 100644 index 00000000..ff75d48e --- /dev/null +++ b/src/registry/src/constructors.jl @@ -0,0 +1,31 @@ +""" + model_type_given_constructor() + +**Private method.** + +Return a dictionary of all subtypes of MLJ.Model, keyed on constructor. Where multiple +types share a single constructor, there can only be one key, and which key appears is +ambiguous. + +Typically a model type and it's constructor have the same name, but for wrappers, such as +`TunedModel`, several types share the same constructor (e.g., `DeterministicTunedModel`, +`ProbabilisticTunedModel`). + +""" +function model_type_given_constructor() + + # Note that wrappers are required to overload `MLJModelInterface.constructor` and the + # fallback is `nothing`. + modeltypes = + MLJModels.Registry.finaltypes(MLJModels.Model) + filter!(modeltypes) do T + !isabstracttype(T) + end + + return Dict( + map(modeltypes) do M + C = MLJModelInterface.constructor(M) + Pair(isnothing(C) ? M : C, M) + end..., + ) +end diff --git a/src/registry/src/update.jl b/src/registry/src/update.jl index 41b8e1fb..7f340b3f 100644 --- a/src/registry/src/update.jl +++ b/src/registry/src/update.jl @@ -112,25 +112,25 @@ function _update(mod, test_env_only) @info "Generating model metadata..." - modeltypes = - MLJModels.Registry.finaltypes(MLJModels.Model) - filter!(modeltypes) do T - !isabstracttype(T) - end + model_type_given_constructor = MLJModels.Registry.model_type_given_constructor() + constructors = keys(model_type_given_constructor) |> collect + sort!(constructors, by=string) # generate and write to file the model metadata: api_packages = string.(MLJModels.Registry.PACKAGES) meta_given_package = Dict() - for M in modeltypes + for C in constructors + M = model_type_given_constructor[C] _info = MLJModels.info_dict(M) + constructor_name = split(string(C), '.') |> last + _info[:name] = constructor_name pkg = _info[:package_name] path = _info[:load_path] api_pkg = split(path, '.') |> first pkg in ["unknown",] && begin global warnings *= "$M `package_name` or `load_path` is \"unknown\")\n" end - modelname = _info[:name] api_pkg in api_packages || begin global warnings *= "Bad `load_path` trait for $M: "* "`$api_pkg` not a registered package.\n" @@ -138,11 +138,11 @@ function _update(mod, test_env_only) haskey(meta_given_package, pkg) || (meta_given_package[pkg] = Dict()) - haskey(meta_given_package, modelname) && + haskey(meta_given_package, constructor_name) && error("Encountered multiple model names for "* "`package_name=$pkg`") - meta_given_package[pkg][modelname] = _info - println(M, "\u2714 ") + meta_given_package[pkg][constructor_name] = _info + println(C, "\u2714 ") end print("\r") From d4806ae04c9153e53d28520c501820614f1f2d20 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Mon, 3 Jun 2024 15:22:34 +1200 Subject: [PATCH 11/13] bump 0.17 --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index afaba46a..60e00435 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "MLJModels" uuid = "d491faf4-2d78-11e9-2867-c94bc002c0b7" authors = ["Anthony D. Blaom "] -version = "0.16.18" +version = "0.17.0" [deps] CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" From 5cb3d01fd2fe4105de32cec7de4d36e17e4c218b Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Wed, 5 Jun 2024 17:42:29 +1200 Subject: [PATCH 12/13] update model registry, incl new FeatureSelection models --- src/registry/Metadata.toml | 105 ++++++++++++++++++++++++------------- src/registry/Models.toml | 5 +- src/registry/Project.toml | 1 + 3 files changed, 74 insertions(+), 37 deletions(-) diff --git a/src/registry/Metadata.toml b/src/registry/Metadata.toml index e6ecbc84..0712bc49 100644 --- a/src/registry/Metadata.toml +++ b/src/registry/Metadata.toml @@ -5704,6 +5704,76 @@ ":reporting_operations" = "`()`" ":constructor" = "`TunedModel`" +[FeatureSelection.FeatureSelector] +":input_scitype" = "`ScientificTypesBase.Table`" +":output_scitype" = "`ScientificTypesBase.Table`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Table`" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":is_pure_julia" = "`true`" +":package_name" = "FeatureSelection" +":package_license" = "MIT" +":load_path" = "FeatureSelection.FeatureSelector" +":package_uuid" = "33837fe5-dbff-4c9e-8c2f-c5612fe2b8b6" +":package_url" = "https://github.com/JuliaAI/FeatureSelection.jl" +":is_wrapper" = "`false`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "```\nFeatureSelector\n```\n\nA model type for constructing a feature selector, based on [FeatureSelection.jl](https://github.com/JuliaAI/FeatureSelection.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nFeatureSelector = @load FeatureSelector pkg=FeatureSelection\n```\n\nDo `model = FeatureSelector()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FeatureSelector(features=...)`.\n\nUse this model to select features (columns) of a table, usually as part of a model `Pipeline`.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any table of input features, where \"table\" is in the sense of Tables.jl\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: one of the following, with the behavior indicated:\n\n * `[]` (empty, the default): filter out all features (columns) which were not encountered in training\n * non-empty vector of feature names (symbols): keep only the specified features (`ignore=false`) or keep only unspecified features (`ignore=true`)\n * function or other callable: keep a feature if the callable returns `true` on its name. For example, specifying `FeatureSelector(features = name -> name in [:x1, :x3], ignore = true)` has the same effect as `FeatureSelector(features = [:x1, :x3], ignore = true)`, namely to select all features, with the exception of `:x1` and `:x3`.\n * `ignore`: whether to ignore or keep specified `features`, as explained above\n\n# Operations\n\n * `transform(mach, Xnew)`: select features from the table `Xnew` as specified by the model, taking features seen during training into account, if relevant\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_to_keep`: the features that will be selected\n\n# Example\n\n```\nusing MLJ\n\nX = (ordinal1 = [1, 2, 3],\n ordinal2 = coerce([\"x\", \"y\", \"x\"], OrderedFactor),\n ordinal3 = [10.0, 20.0, 30.0],\n ordinal4 = [-20.0, -30.0, -40.0],\n nominal = coerce([\"Your father\", \"he\", \"is\"], Multiclass));\n\nselector = FeatureSelector(features=[:ordinal3, ], ignore=true);\n\njulia> transform(fit!(machine(selector, X)), X)\n(ordinal1 = [1, 2, 3],\n ordinal2 = CategoricalValue{Symbol,UInt32}[\"x\", \"y\", \"x\"],\n ordinal4 = [-20.0, -30.0, -40.0],\n nominal = CategoricalValue{String,UInt32}[\"Your father\", \"he\", \"is\"],)\n\n```\n" +":name" = "FeatureSelector" +":human_name" = "feature selector" +":is_supervised" = "`false`" +":prediction_type" = ":unknown" +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":transform"] +":hyperparameters" = "`(:features, :ignore)`" +":hyperparameter_types" = "`(\"Union{Function, Vector{Symbol}}\", \"Bool\")`" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`nothing`" + +[FeatureSelection.RecursiveFeatureElimination] +":input_scitype" = "`ScientificTypesBase.Unknown`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_pure_julia" = "`true`" +":package_name" = "FeatureSelection" +":package_license" = "MIT" +":load_path" = "FeatureSelection.RecursiveFeatureElimination" +":package_uuid" = "33837fe5-dbff-4c9e-8c2f-c5612fe2b8b6" +":package_url" = "https://github.com/JuliaAI/FeatureSelection.jl" +":is_wrapper" = "`false`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "```\nRecursiveFeatureElimination(model, n_features, step)\n```\n\nThis model implements a recursive feature elimination algorithm for feature selection. It recursively removes features, training a base model on the remaining features and evaluating their importance until the desired number of features is selected.\n\nConstruct an instance with default hyper-parameters using the syntax `rfe_model = RecursiveFeatureElimination(model=...)`. Provide keyword arguments to override hyper-parameter defaults.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `rfe_model` to data with\n\n```\nmach = machine(rfe_model, X, y)\n```\n\nOR, if the base model supports weights, as\n\n```\nmach = machine(rfe_model, X, y, w)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of the scitype as that required by the base model; check column scitypes with `schema(X)` and column scitypes required by base model with `input_scitype(basemodel)`.\n * `y` is the target, which can be any table of responses whose element scitype is `Continuous` or `Finite` depending on the `target_scitype` required by the base model; check the scitype with `scitype(y)`.\n * `w` is the observation weights which can either be `nothing`(default) or an `AbstractVector` whoose element scitype is `Count` or `Continuous`. This is different from `weights` kernel which is an hyperparameter to the model, see below.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * model: A base model with a `fit` method that provides information on feature feature importance (i.e `reports_feature_importances(model) == true`)\n * n_features::Real = 0: The number of features to select. If `0`, half of the features are selected. If a positive integer, the parameter is the absolute number of features to select. If a real number between 0 and 1, it is the fraction of features to select.\n * step::Real=1: If the value of step is at least 1, it signifies the quantity of features to eliminate in each iteration. Conversely, if step falls strictly within the range of 0.0 to 1.0, it denotes the proportion (rounded down) of features to remove during each iteration.\n\n# Operations\n\n * `transform(mach, X)`: transform the input table `X` into a new table containing only\n\ncolumns corresponding to features gotten from the RFE algorithm.\n\n * `predict(mach, X)`: transform the input table `X` into a new table same as in\n * `transform(mach, X)` above and predict using the fitted base model on the transformed table.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_left`: names of features remaining after recursive feature elimination.\n * `model_fitresult`: fitted parameters of the base model.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `ranking`: The feature ranking of each features in the training dataset.\n * `model_report`: report for the fitted base model.\n * `features`: names of features seen during the training process.\n\n# Examples\n\n```\nusing FeatureSelection, MLJ, StableRNGs\n\nRandomForestRegressor = @load RandomForestRegressor pkg=DecisionTree\n\n# Creates a dataset where the target only depends on the first 5 columns of the input table.\nA = rand(rng, 50, 10);\ny = 10 .* sin.(\n pi .* A[:, 1] .* A[:, 2]\n ) + 20 .* (A[:, 3] .- 0.5).^ 2 .+ 10 .* A[:, 4] .+ 5 * A[:, 5]);\nX = MLJ.table(A);\n\n# fit a rfe model\nrf = RandomForestRegressor()\nselector = RecursiveFeatureElimination(model = rf)\nmach = machine(selector, X, y)\nfit!(mach)\n\n# view the feature importances\nfeature_importances(mach)\n\n# predict using the base model\nXnew = MLJ.table(rand(rng, 50, 10));\npredict(mach, Xnew)\n\n```\n" +":name" = "RecursiveFeatureElimination" +":human_name" = "deterministic recursive feature elimination" +":is_supervised" = "`true`" +":prediction_type" = ":probabilistic" +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":implemented_methods" = [] +":hyperparameters" = "`(:model, :n_features, :step)`" +":hyperparameter_types" = "`(\"MLJModelInterface.Supervised\", \"Float64\", \"Float64\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`RecursiveFeatureElimination`" + [Clustering.HierarchicalClustering] ":input_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" @@ -6754,41 +6824,6 @@ ":reporting_operations" = "`()`" ":constructor" = "`nothing`" -[MLJModels.FeatureSelector] -":input_scitype" = "`ScientificTypesBase.Table`" -":output_scitype" = "`ScientificTypesBase.Table`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Table`" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" -":is_pure_julia" = "`true`" -":package_name" = "MLJModels" -":package_license" = "MIT" -":load_path" = "MLJModels.FeatureSelector" -":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = "```\nFeatureSelector\n```\n\nA model type for constructing a feature selector, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nFeatureSelector = @load FeatureSelector pkg=MLJModels\n```\n\nDo `model = FeatureSelector()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FeatureSelector(features=...)`.\n\nUse this model to select features (columns) of a table, usually as part of a model `Pipeline`.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any table of input features, where \"table\" is in the sense of Tables.jl\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: one of the following, with the behavior indicated:\n\n * `[]` (empty, the default): filter out all features (columns) which were not encountered in training\n * non-empty vector of feature names (symbols): keep only the specified features (`ignore=false`) or keep only unspecified features (`ignore=true`)\n * function or other callable: keep a feature if the callable returns `true` on its name. For example, specifying `FeatureSelector(features = name -> name in [:x1, :x3], ignore = true)` has the same effect as `FeatureSelector(features = [:x1, :x3], ignore = true)`, namely to select all features, with the exception of `:x1` and `:x3`.\n * `ignore`: whether to ignore or keep specified `features`, as explained above\n\n# Operations\n\n * `transform(mach, Xnew)`: select features from the table `Xnew` as specified by the model, taking features seen during training into account, if relevant\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_to_keep`: the features that will be selected\n\n# Example\n\n```\nusing MLJ\n\nX = (ordinal1 = [1, 2, 3],\n ordinal2 = coerce([\"x\", \"y\", \"x\"], OrderedFactor),\n ordinal3 = [10.0, 20.0, 30.0],\n ordinal4 = [-20.0, -30.0, -40.0],\n nominal = coerce([\"Your father\", \"he\", \"is\"], Multiclass));\n\nselector = FeatureSelector(features=[:ordinal3, ], ignore=true);\n\njulia> transform(fit!(machine(selector, X)), X)\n(ordinal1 = [1, 2, 3],\n ordinal2 = CategoricalValue{Symbol,UInt32}[\"x\", \"y\", \"x\"],\n ordinal4 = [-20.0, -30.0, -40.0],\n nominal = CategoricalValue{String,UInt32}[\"Your father\", \"he\", \"is\"],)\n\n```\n" -":name" = "FeatureSelector" -":human_name" = "feature selector" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":transform"] -":hyperparameters" = "`(:features, :ignore)`" -":hyperparameter_types" = "`(\"Union{Function, Vector{Symbol}}\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" - [MLJModels.UnivariateDiscretizer] ":input_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":output_scitype" = "`AbstractVector{<:ScientificTypesBase.OrderedFactor}`" diff --git a/src/registry/Models.toml b/src/registry/Models.toml index 19b2d2e9..1c6fd001 100644 --- a/src/registry/Models.toml +++ b/src/registry/Models.toml @@ -1,7 +1,7 @@ BetaML = ["RandomForestRegressor", "GaussianMixtureImputer", "RandomForestClassifier", "RandomForestImputer", "PerceptronClassifier", "AutoEncoder", "DecisionTreeRegressor", "PegasosClassifier", "KMeansClusterer", "NeuralNetworkRegressor", "MultitargetGaussianMixtureRegressor", "GaussianMixtureRegressor", "MultitargetNeuralNetworkRegressor", "DecisionTreeClassifier", "GeneralImputer", "NeuralNetworkClassifier", "SimpleImputer", "GaussianMixtureClusterer", "KernelPerceptronClassifier", "KMedoidsClusterer"] CatBoost = ["CatBoostRegressor", "CatBoostClassifier"] NearestNeighborModels = ["KNNClassifier", "MultitargetKNNClassifier", "MultitargetKNNRegressor", "KNNRegressor"] -MLJScikitLearnInterface = ["ProbabilisticSGDClassifier", "RidgeCVClassifier", "LogisticClassifier", "RandomForestRegressor", "ElasticNetCVRegressor", "PerceptronClassifier", "MultiTaskLassoRegressor", "LinearRegressor", "HDBSCAN", "DBSCAN", "RidgeRegressor", "LassoLarsICRegressor", "ARDRegressor", "SVMNuRegressor", "RidgeClassifier", "SGDRegressor", "ComplementNBClassifier", "HuberRegressor", "SVMNuClassifier", "GradientBoostingClassifier", "GaussianProcessRegressor", "SVMLinearRegressor", "LarsRegressor", "MeanShift", "HistGradientBoostingClassifier", "AdaBoostRegressor", "AffinityPropagation", "MultiTaskLassoCVRegressor", "OrthogonalMatchingPursuitRegressor", "BernoulliNBClassifier", "PassiveAggressiveClassifier", "RidgeCVRegressor", "SVMRegressor", "GaussianNBClassifier", "ExtraTreesClassifier", "KMeans", "MultiTaskElasticNetCVRegressor", "LassoLarsCVRegressor", "OrthogonalMatchingPursuitCVRegressor", "AdaBoostClassifier", "PassiveAggressiveRegressor", "BayesianRidgeRegressor", "GaussianProcessClassifier", "BaggingClassifier", "OPTICS", "RANSACRegressor", "KNeighborsRegressor", "HistGradientBoostingRegressor", "MiniBatchKMeans", "LassoCVRegressor", "DummyRegressor", "BisectingKMeans", "LassoLarsRegressor", "LarsCVRegressor", "KNeighborsClassifier", "SVMLinearClassifier", "FeatureAgglomeration", "DummyClassifier", "BaggingRegressor", "BayesianQDA", "BayesianLDA", "SGDClassifier", "TheilSenRegressor", "SpectralClustering", "Birch", "AgglomerativeClustering", "ElasticNetRegressor", "RandomForestClassifier", "LogisticCVClassifier", "MultiTaskElasticNetRegressor", "ExtraTreesbRegressor", "LassoRegressor", "MultinomialNBClassifier", "GradientBoostingRegressor", "SVMClassifier"] +MLJScikitLearnInterface = ["ProbabilisticSGDClassifier", "RidgeCVClassifier", "LogisticClassifier", "RandomForestRegressor", "ElasticNetCVRegressor", "PerceptronClassifier", "MultiTaskLassoRegressor", "LinearRegressor", "HDBSCAN", "DBSCAN", "RidgeRegressor", "LassoLarsICRegressor", "ARDRegressor", "SVMNuRegressor", "RidgeClassifier", "SGDRegressor", "ComplementNBClassifier", "HuberRegressor", "SVMNuClassifier", "GradientBoostingClassifier", "GaussianProcessRegressor", "SVMLinearRegressor", "LarsRegressor", "MeanShift", "HistGradientBoostingClassifier", "AdaBoostRegressor", "AffinityPropagation", "MultiTaskLassoCVRegressor", "OrthogonalMatchingPursuitRegressor", "BernoulliNBClassifier", "PassiveAggressiveClassifier", "RidgeCVRegressor", "SVMRegressor", "GaussianNBClassifier", "ExtraTreesClassifier", "KMeans", "MultiTaskElasticNetCVRegressor", "LassoLarsCVRegressor", "OrthogonalMatchingPursuitCVRegressor", "AdaBoostClassifier", "PassiveAggressiveRegressor", "BayesianRidgeRegressor", "GaussianProcessClassifier", "BaggingClassifier", "OPTICS", "RANSACRegressor", "KNeighborsRegressor", "HistGradientBoostingRegressor", "MiniBatchKMeans", "LassoCVRegressor", "DummyRegressor", "BisectingKMeans", "LassoLarsRegressor", "LarsCVRegressor", "KNeighborsClassifier", "SVMLinearClassifier", "FeatureAgglomeration", "DummyClassifier", "BaggingRegressor", "BayesianQDA", "BayesianLDA", "SGDClassifier", "TheilSenRegressor", "SpectralClustering", "Birch", "AgglomerativeClustering", "ElasticNetRegressor", "RandomForestClassifier", "LogisticCVClassifier", "MultiTaskElasticNetRegressor", "ExtraTreesRegressor", "LassoRegressor", "MultinomialNBClassifier", "GradientBoostingRegressor", "SVMClassifier"] OutlierDetectionNeighbors = ["ABODDetector", "DNNDetector", "LOFDetector", "KNNDetector", "COFDetector"] SIRUS = ["StableRulesClassifier", "StableForestClassifier", "StableRulesRegressor", "StableForestRegressor"] MLJIteration = ["IteratedModel"] @@ -16,6 +16,7 @@ DecisionTree = ["AdaBoostStumpClassifier", "DecisionTreeRegressor", "DecisionTre MLJBalancing = ["BalancedBaggingClassifier", "BalancedModel"] Imbalance = ["RandomOversampler", "SMOTENC", "TomekUndersampler", "ClusterUndersampler", "SMOTE", "SMOTEN", "ROSE", "RandomUndersampler", "ENNUndersampler", "BorderlineSMOTE1", "RandomWalkOversampler"] MLJTuning = ["TunedModel"] +FeatureSelection = ["FeatureSelector", "RecursiveFeatureElimination"] Clustering = ["HierarchicalClustering", "DBSCAN", "KMeans", "KMedoids"] EvoLinear = ["EvoSplineRegressor", "EvoLinearRegressor"] MLJText = ["TfidfTransformer", "CountTransformer", "BM25Transformer"] @@ -23,7 +24,7 @@ LightGBM = ["LGBMClassifier", "LGBMRegressor"] XGBoost = ["XGBoostCount", "XGBoostRegressor", "XGBoostClassifier"] SymbolicRegression = ["MultitargetSRRegressor", "SRRegressor"] EvoTrees = ["EvoTreeClassifier", "EvoTreeGaussian", "EvoTreeMLE", "EvoTreeRegressor", "EvoTreeCount"] -MLJModels = ["ConstantClassifier", "Standardizer", "DeterministicConstantClassifier", "UnivariateTimeTypeToContinuous", "OneHotEncoder", "ContinuousEncoder", "UnivariateBoxCoxTransformer", "InteractionTransformer", "ConstantRegressor", "FeatureSelector", "UnivariateDiscretizer", "BinaryThresholdPredictor", "FillImputer", "DeterministicConstantRegressor", "UnivariateStandardizer", "UnivariateFillImputer"] +MLJModels = ["ConstantClassifier", "Standardizer", "DeterministicConstantClassifier", "UnivariateTimeTypeToContinuous", "OneHotEncoder", "ContinuousEncoder", "UnivariateBoxCoxTransformer", "InteractionTransformer", "ConstantRegressor", "UnivariateDiscretizer", "BinaryThresholdPredictor", "FillImputer", "DeterministicConstantRegressor", "UnivariateStandardizer", "UnivariateFillImputer"] OneRule = ["OneRuleClassifier"] OutlierDetectionPython = ["MCDDetector", "COPODDetector", "HBOSDetector", "IForestDetector", "SOSDetector", "ABODDetector", "LOFDetector", "PCADetector", "INNEDetector", "OCSVMDetector", "ECODDetector", "SODDetector", "LODADetector", "KDEDetector", "CDDetector", "KNNDetector", "GMMDetector", "COFDetector", "CBLOFDetector", "LOCIDetector", "LMDDDetector", "RODDetector"] SelfOrganizingMaps = ["SelfOrganizingMap"] diff --git a/src/registry/Project.toml b/src/registry/Project.toml index 0120f262..2e38d679 100644 --- a/src/registry/Project.toml +++ b/src/registry/Project.toml @@ -3,6 +3,7 @@ BetaML = "024491cd-cc6b-443e-8034-08ea7eb7db2b" CatBoost = "e2e10f9a-a85d-4fa9-b6b2-639a32100a12" EvoLinear = "ab853011-1780-437f-b4b5-5de6f4777246" EvoTrees = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5" +FeatureSelection = "33837fe5-dbff-4c9e-8c2f-c5612fe2b8b6" Imbalance = "c709b415-507b-45b7-9a3d-1767c89fde68" InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" LightGBM = "7acf609c-83a4-11e9-1ffb-b912bcd3b04a" From ae426cb133dba71503375bc777de8d1db9d4e6c1 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Wed, 5 Jun 2024 17:58:58 +1200 Subject: [PATCH 13/13] Metadata.toml hack to force is_wrapper(::RFE) = true --- src/registry/Metadata.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/registry/Metadata.toml b/src/registry/Metadata.toml index 0712bc49..9c8f1220 100644 --- a/src/registry/Metadata.toml +++ b/src/registry/Metadata.toml @@ -5753,7 +5753,7 @@ ":load_path" = "FeatureSelection.RecursiveFeatureElimination" ":package_uuid" = "33837fe5-dbff-4c9e-8c2f-c5612fe2b8b6" ":package_url" = "https://github.com/JuliaAI/FeatureSelection.jl" -":is_wrapper" = "`false`" +":is_wrapper" = "`true`" ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`"