diff --git a/.github/workflows/check_registry.yml b/.github/workflows/check_registry.yml index 4f0fa721..ac3f5352 100644 --- a/.github/workflows/check_registry.yml +++ b/.github/workflows/check_registry.yml @@ -17,7 +17,11 @@ jobs: arch: - x64 env: - PYTHON: "" + PYTHON: Conda + # remove next line (and others marked below) when + # JuliaAI/MLJScikitLearnInterface.jl#42 properly resolved + LD_LIBRARY_PATH: /home/runner/.julia/conda/3/lib + steps: - uses: actions/checkout@v2 - uses: julia-actions/setup-julia@v1 @@ -34,9 +38,20 @@ jobs: ${{ runner.os }}-test-${{ env.cache-name }}- ${{ runner.os }}-test- ${{ runner.os }}- + - uses: julia-actions/julia-buildpkg@v1 - run: julia -e 'using Pkg; Pkg.Registry.update()' - run: julia -e 'using Pkg; Pkg.develop(Pkg.PackageSpec(path = pwd()))' - run: julia -e 'using Pkg; Pkg.add("Test")' + + # remove next eight lines (and one other marked above) when + # JuliaAI/MLJScikitLearnInterface.jl#42 gets properly resolved + - name: "Install Conda" + run: julia -e 'using Pkg; Pkg.add("Conda");' + - name: "Install Scikit-learn" + run: | + julia -e 'using Conda; Conda.add("scikit-learn");'\ + cd $LD_LIBRARY_PATH #just to check that the path is valid + - run: julia -e 'using Pkg; Pkg.update()' - run: julia -e 'using Pkg; Pkg.precompile()' - run: julia -e 'using Pkg; Pkg.status()' diff --git a/Project.toml b/Project.toml index e3cb611b..90298e5d 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "MLJModels" uuid = "d491faf4-2d78-11e9-2867-c94bc002c0b7" authors = ["Anthony D. Blaom "] -version = "0.15.9" +version = "0.15.10" [deps] CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" diff --git a/README.md b/README.md index ab5ec2f3..65d7c164 100644 --- a/README.md +++ b/README.md @@ -70,34 +70,7 @@ MLJModels contains: Generally model registration is performed by administrators. If you have an interface you would like registered, open an issue -[here](https://github.com/alan-turing-institute/MLJ.jl/issues). +[here](https://github.com/alan-turing-institute/MLJ.jl/issues). -**Administrator instructions.** To register all the models in -GreatNewPackage with MLJ: - -- In the dev branch of a clone of the dev branch of MLJModels, change - to the `/src/registry/` directory and, in the latest version of julia, activate the - environment specified by the Project.toml there, after checking the - [compat] conditions there are up to date. **Do not use** - `Revise`. - -- Add `GreatNewPackage` to the environment. - -- In some environment to which your MLJModels clone has been added - (using `Pkg.dev`) execute `using MLJModels; MLJModels.@update`. This updates - `src/registry/Metadata.toml` and `src/registry/Models.toml` (the - latter is generated for convenience and not used by MLJ). - -- Quit your REPL session and make a trivial commit to your MLJModels - branch to force pre-compilation in a new julia session when you run - `using MLJModels`. (For technical reasons the registry is not loaded - in `__init__`()`, so without pre-compiliation the new ]registry is not - available.) - -- Test that the interfaces load properly with - `MLJModels.check_registry()`. (CI will fail on dev -> master if - this test fails.) - -- Push your changes to an appropriate branch of MLJModels to make - the updated metadata available to users of the next MLJModels tagged - release. +**Administrator instructions.** These are given in the +`MLJModels.@update` document string. diff --git a/src/MLJModels.jl b/src/MLJModels.jl index ea4f2c4b..84602611 100755 --- a/src/MLJModels.jl +++ b/src/MLJModels.jl @@ -74,14 +74,15 @@ const MODEL_TRAITS_IN_REGISTRY = model_traits_in_registry(INFO_GIVEN_HANDLE) # model search and registry code: include("model_search.jl") include("loading.jl") -include("registry/src/info_dict.jl") include("registry/src/Registry.jl") -include("registry/src/check_registry.jl") -import .Registry.@update +using .Registry # load built-in models: include("builtins/Constant.jl") include("builtins/Transformers.jl") include("builtins/ThresholdPredictors.jl") +# finalize: +include("init.jl") + end # module diff --git a/src/builtins/Transformers.jl b/src/builtins/Transformers.jl index 4f4b6a9a..cfeefcd1 100644 --- a/src/builtins/Transformers.jl +++ b/src/builtins/Transformers.jl @@ -860,7 +860,7 @@ function MMI.fit(transformer::OneHotEncoder, verbosity::Int, X) if T <: allowed_scitypes && ftr in specified_features ref_name_pairs_given_feature[ftr] = Pair{<:Unsigned,Symbol}[] shift = transformer.drop_last ? 1 : 0 - levels = classes(first(col)) + levels = classes(col) fitted_levels_given_feature[ftr] = levels if verbosity > 0 @info "Spawning $(length(levels)-shift) sub-features "* diff --git a/src/init.jl b/src/init.jl new file mode 100644 index 00000000..15b484c6 --- /dev/null +++ b/src/init.jl @@ -0,0 +1,6 @@ +function __init__() + project = open(joinpath(@__DIR__, "registry", "Project.toml")) do io + readlines(io) + end + global REGISTRY_PROJECT = Ref{Vector{String}}(project) +end diff --git a/src/loading.jl b/src/loading.jl index 25189f33..a1239d2a 100644 --- a/src/loading.jl +++ b/src/loading.jl @@ -15,25 +15,9 @@ function _append!(program, ex, doprint::Bool, tick_early::Bool) end function _import(modl, api_pkg, pkg, doprint) - # can be removed once MLJModel #331 is resolved: - if pkg == :NearestNeighbors - doprint && print("import NearestNeighbors") - try - modl.eval(:(import MLJModels)) - catch - try - modl.eval(:(import MLJ.MLJModels)) - catch - error("Problem putting MLJModels into scope. ") - end - end - modl.eval(:(import NearestNeighbors)) - doprint && println(" \u2714") - else - doprint && print("import $api_pkg") - modl.eval(:(import $api_pkg)) - doprint && println(" \u2714") - end + doprint && print("import $api_pkg") + modl.eval(:(import $api_pkg)) + doprint && println(" \u2714") end function _eval(modl, path::Union{Expr,Symbol}) @@ -44,19 +28,26 @@ end ## OVERLOADING load_path """ - load_path(model::String, pkg=nothing) + load_path(model_name::String, pkg=nothing) + +Return the load path for model type with name `model_name`, specifying +the algorithm=providing package name `pkg` to resolve name conflicts, +if necessary. + + load_path(proxy::NamedTuple) -Return the load path for model type with name `model`, specifying the -package name `pkg` to resolve name conflicts if necessary. +Return the load path for the model whose name is `proxy.name` and whose +algorithm-providing package has name `proxy.package_name`. For example, +`proxy` could be any element of the vector returned by `models()`. load_path(model) Return the load path of a `model` instance or type. Usually requires -necessary model code to have been separately loaded. Supply a string +necessary model code to have been separately loaded. Supply strings as above if code is not loaded. """ -function MLJModelInterface.load_path(proxy::ModelProxy) +function MLJModelInterface.load_path(proxy::NamedTuple) handle = (name=proxy.name, pkg=proxy.package_name) return INFO_GIVEN_HANDLE[handle][:load_path] end @@ -213,15 +204,30 @@ function _load(modl, name_ex, kw_exs...; interactive=false) end -## NO LONGER SUPPORTED +""" + MLJModels.load(name; pkg=nothing, add=false, verbosity=0, mod=Main) + +Experimental method. +Currently private. + +Loads model code into specified module `mod` at run time, as opposed +to `@load` which loads coad into calling module at time of invokation. + +""" +function load( + name::String; + pkg::Union{String,Nothing}=nothing, + add::Bool=false, + verbosity::Integer=0, + mod=Main +) + ex = if isnothing(pkg) + :(@load $name add=$add verbosity=$verbosity) + else + :(@load $name pkg=$pkg add=$add verbosity=$verbosity) + end + mod.eval(ex) +end -_deperror() = error( - "The `load` function is no longer supported. "* - "Use the `@load` macro instead, as in "* - "`@load RandomForestRegressor pkg = DecisionTree`.\n"* - "For explicit importing, you can discover a model's "* - "full load path with the `load_path` function, as in "* - "`load_path(\"RandomForestRegressor\", pkg=\"DecisionTree\")`. )") -load(proxy::ModelProxy; kwargs...) = _deperror() -load(name::String; kwargs...) = _deperror() +#load(proxy, ...) = ... diff --git a/src/registry/Metadata.toml b/src/registry/Metadata.toml index 69515080..d3c6d7b8 100644 --- a/src/registry/Metadata.toml +++ b/src/registry/Metadata.toml @@ -1,4 +1,38 @@ +[BetaML.BetaMLGenericImputer] +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Known}}`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Known}}`" +":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}`" +":is_pure_julia" = "`true`" +":package_name" = "BetaML" +":package_license" = "MIT" +":load_path" = "BetaML.Imputation.BetaMLGenericImputer" +":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":package_url" = "https://github.com/sylvaticus/BetaML.jl" +":is_wrapper" = "`false`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "Impute missing values using a vector (one per column) of arbitrary learning models (classifiers/regressors) that implement `m = Model([options])`, `train!(m,X,Y)` and `predict(m,X)` (default to Random Forests), from the Beta Machine Learning Toolkit (BetaML). Experimental." +":name" = "BetaMLGenericImputer" +":human_name" = "beta ml generic imputer" +":is_supervised" = "`false`" +":prediction_type" = ":unknown" +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":implemented_methods" = [":fit", ":transform"] +":hyperparameters" = "`(:models, :recursivePassages, :verbosity, :rng)`" +":hyperparameter_types" = "`(\"Union{Nothing, Vector}\", \"Int64\", \"BetaML.Api.Verbosity\", \"Random.AbstractRNG\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" + [BetaML.RandomForestRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" @@ -29,7 +63,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [BetaML.RandomForestClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}`" @@ -61,7 +97,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [BetaML.PerceptronClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Infinite}}`" @@ -93,7 +131,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [BetaML.DecisionTreeRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}`" @@ -125,7 +165,43 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" + +[BetaML.BetaMLGMMImputer] +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}}}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}}`" +":is_pure_julia" = "`true`" +":package_name" = "BetaML" +":package_license" = "MIT" +":load_path" = "BetaML.Imputation.BetaMLGMMImputer" +":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":package_url" = "https://github.com/sylvaticus/BetaML.jl" +":is_wrapper" = "`false`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "Impute missing values using a probabilistic approach (Gaussian Mixture Models) fitted using the Expectation-Maximisation algorithm, from the Beta Machine Learning Toolkit (BetaML). Experimental." +":name" = "BetaMLGMMImputer" +":human_name" = "beta mlgmm imputer" +":is_supervised" = "`false`" +":prediction_type" = ":unknown" +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":implemented_methods" = [":fit", ":transform"] +":hyperparameters" = "`(:nClasses, :probMixtures, :mixtures, :tol, :minVariance, :minCovariance, :initStrategy, :verbosity, :rng)`" +":hyperparameter_types" = "`(\"Int64\", \"Vector{Float64}\", \"Symbol\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"BetaML.Api.Verbosity\", \"Random.AbstractRNG\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" [BetaML.PegasosClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Infinite}}`" @@ -157,7 +233,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [BetaML.KMedoids] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -189,7 +267,43 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" + +[BetaML.BetaMLGMMRegressor] +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Infinite}}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Infinite}}}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_pure_julia" = "`true`" +":package_name" = "BetaML" +":package_license" = "MIT" +":load_path" = "BetaML.GMM.BetaMLGMMRegressor" +":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":package_url" = "https://github.com/sylvaticus/BetaML.jl" +":is_wrapper" = "`false`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "A non-linear regressor derived from fitting the data on a probabilistic model (Gaussian Mixture Model). Relatively fast." +":name" = "BetaMLGMMRegressor" +":human_name" = "beta mlgmm regressor" +":is_supervised" = "`true`" +":prediction_type" = ":deterministic" +":abstract_type" = "`MLJModelInterface.Deterministic`" +":implemented_methods" = [":predict", ":fit"] +":hyperparameters" = "`(:nClasses, :probMixtures, :mixtures, :tol, :minVariance, :minCovariance, :initStrategy, :maxIter, :verbosity, :rng)`" +":hyperparameter_types" = "`(\"Int64\", \"Vector{Float64}\", \"Symbol\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Int64\", \"BetaML.Api.Verbosity\", \"Random.AbstractRNG\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" [BetaML.KMeans] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -221,7 +335,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [BetaML.DecisionTreeClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}`" @@ -253,7 +369,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [BetaML.GMMClusterer] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}}`" @@ -266,7 +384,7 @@ ":is_pure_julia" = "`true`" ":package_name" = "BetaML" ":package_license" = "MIT" -":load_path" = "BetaML.Clustering.GMMClusterer" +":load_path" = "BetaML.GMM.GMMClusterer" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":package_url" = "https://github.com/sylvaticus/BetaML.jl" ":is_wrapper" = "`false`" @@ -281,11 +399,13 @@ ":abstract_type" = "`MLJModelInterface.Unsupervised`" ":implemented_methods" = [":fitted_params", ":predict", ":fit"] ":hyperparameters" = "`(:K, :p₀, :mixtures, :tol, :minVariance, :minCovariance, :initStrategy, :rng)`" -":hyperparameter_types" = "`(\"Int64\", \"Union{Nothing, AbstractVector{Float64}}\", \"Symbol\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Random.AbstractRNG\")`" +":hyperparameter_types" = "`(\"Int64\", \"AbstractVector{Float64}\", \"Symbol\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Random.AbstractRNG\")`" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [BetaML.MissingImputator] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}}`" @@ -298,26 +418,96 @@ ":is_pure_julia" = "`true`" ":package_name" = "BetaML" ":package_license" = "MIT" -":load_path" = "BetaML.Clustering.MissingImputator" +":load_path" = "BetaML.Imputation.MissingImputator" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":package_url" = "https://github.com/sylvaticus/BetaML.jl" ":is_wrapper" = "`false`" ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "Impute missing values using an Expectation-Maximisation clustering algorithm, from the Beta Machine Learning Toolkit (BetaML)." +":docstring" = "Impute missing values using an Expectation-Maximisation clustering algorithm, from the Beta Machine Learning Toolkit (BetaML). Old API, consider also `BetaMLGMMImputer` (equivalent, experimental)" ":name" = "MissingImputator" ":human_name" = "missing imputator" ":is_supervised" = "`false`" ":prediction_type" = ":unknown" ":abstract_type" = "`MLJModelInterface.Unsupervised`" ":implemented_methods" = [":fit", ":transform"] -":hyperparameters" = "`(:K, :p₀, :mixtures, :tol, :minVariance, :minCovariance, :initStrategy, :rng)`" -":hyperparameter_types" = "`(\"Int64\", \"Union{Nothing, AbstractVector{Float64}}\", \"Symbol\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Random.AbstractRNG\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameters" = "`(:K, :p₀, :mixtures, :tol, :minVariance, :minCovariance, :initStrategy, :verbosity, :rng)`" +":hyperparameter_types" = "`(\"Int64\", \"AbstractVector{Float64}\", \"Symbol\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"BetaML.Api.Verbosity\", \"Random.AbstractRNG\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" + +[BetaML.BetaMLMeanImputer] +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}}}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}}`" +":is_pure_julia" = "`true`" +":package_name" = "BetaML" +":package_license" = "MIT" +":load_path" = "BetaML.Imputation.BetaMLMeanImputer" +":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":package_url" = "https://github.com/sylvaticus/BetaML.jl" +":is_wrapper" = "`false`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "Impute missing values using feature (column) mean, with optional record normalisation (using l-`norm` norms), from the Beta Machine Learning Toolkit (BetaML). Experimental." +":name" = "BetaMLMeanImputer" +":human_name" = "beta ml mean imputer" +":is_supervised" = "`false`" +":prediction_type" = ":unknown" +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":implemented_methods" = [":fit", ":transform"] +":hyperparameters" = "`(:norm,)`" +":hyperparameter_types" = "`(\"Int64\",)`" +":hyperparameter_ranges" = "`(nothing,)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" + +[BetaML.BetaMLRFImputer] +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Known}}`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Known}}`" +":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}`" +":is_pure_julia" = "`true`" +":package_name" = "BetaML" +":package_license" = "MIT" +":load_path" = "BetaML.Imputation.BetaMLRFImputer" +":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":package_url" = "https://github.com/sylvaticus/BetaML.jl" +":is_wrapper" = "`false`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = "Impute missing values using Random Forests, from the Beta Machine Learning Toolkit (BetaML). Experimental." +":name" = "BetaMLRFImputer" +":human_name" = "beta mlrf imputer" +":is_supervised" = "`false`" +":prediction_type" = ":unknown" +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":implemented_methods" = [":fit", ":transform"] +":hyperparameters" = "`(:nTrees, :maxDepth, :minGain, :minRecords, :maxFeatures, :forcedCategoricalCols, :splittingCriterion, :recursivePassages, :verbosity, :rng)`" +":hyperparameter_types" = "`(\"Int64\", \"Union{Nothing, Int64}\", \"Float64\", \"Int64\", \"Union{Nothing, Int64}\", \"Vector{Int64}\", \"Union{Nothing, Function}\", \"Int64\", \"BetaML.Api.Verbosity\", \"Random.AbstractRNG\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" [BetaML.KernelPerceptronClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Infinite}}`" @@ -349,7 +539,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [NearestNeighborModels.KNNClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -381,7 +573,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [NearestNeighborModels.MultitargetKNNClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -413,7 +607,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [NearestNeighborModels.MultitargetKNNRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -445,7 +641,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [NearestNeighborModels.KNNRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -477,7 +675,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [OutlierDetectionNeighbors.ABODDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -509,7 +709,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [OutlierDetectionNeighbors.DNNDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -541,7 +743,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [OutlierDetectionNeighbors.LOFDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -573,7 +777,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [OutlierDetectionNeighbors.KNNDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -605,7 +811,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [OutlierDetectionNeighbors.COFDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -637,7 +845,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [PartialLeastSquaresRegressor.KPLSRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -669,7 +879,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [PartialLeastSquaresRegressor.PLSRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -701,7 +913,9 @@ ":hyperparameter_ranges" = "`(nothing,)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [MLJLinearModels.QuantileRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -733,7 +947,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [MLJLinearModels.LogisticClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -765,7 +981,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [MLJLinearModels.MultinomialClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -797,7 +1015,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [MLJLinearModels.LADRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -829,7 +1049,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [MLJLinearModels.RidgeRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -861,7 +1083,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [MLJLinearModels.RobustRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -893,7 +1117,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [MLJLinearModels.ElasticNetRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -925,7 +1151,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [MLJLinearModels.LinearRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -957,7 +1185,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [MLJLinearModels.LassoRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -989,7 +1219,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [MLJLinearModels.HuberRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1021,7 +1253,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.ProbabilisticSGDClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1053,7 +1287,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.RidgeCVClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1085,7 +1321,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.LogisticClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1117,7 +1355,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.RandomForestRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.Continuous}}}`" @@ -1149,7 +1389,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.ElasticNetCVRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1181,7 +1423,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.PerceptronClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1213,7 +1457,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.MultiTaskLassoRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1245,7 +1491,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.LinearRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1277,7 +1525,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.DBSCAN] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1309,7 +1559,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.RidgeRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1341,7 +1593,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.LassoLarsICRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1373,7 +1627,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.ARDRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1405,7 +1661,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.SVMNuRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1437,7 +1695,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.RidgeClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1469,7 +1729,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.SGDRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1501,7 +1763,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.ComplementNBClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Count}}`" @@ -1533,7 +1797,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.HuberRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1565,7 +1831,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.SVMNuClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1597,7 +1865,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.GradientBoostingClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1629,7 +1899,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.GaussianProcessRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1661,7 +1933,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.SVMLinearRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1693,7 +1967,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.LarsRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1725,7 +2001,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.MeanShift] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1757,7 +2035,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.AdaBoostRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1789,7 +2069,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.AffinityPropagation] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1821,7 +2103,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.MultiTaskLassoCVRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1853,7 +2137,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.OrthogonalMatchingPursuitRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1885,7 +2171,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.RidgeCVRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1917,7 +2205,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.PassiveAggressiveClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1949,7 +2239,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.SVMRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -1981,7 +2273,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.BernoulliNBClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Count}}`" @@ -2013,7 +2307,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.GaussianNBClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2045,7 +2341,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.ExtraTreesClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2077,7 +2375,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.KMeans] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2109,7 +2409,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.MultiTaskElasticNetCVRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2141,7 +2443,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.LassoLarsCVRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2173,7 +2477,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.OrthogonalMatchingPursuitCVRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2205,7 +2511,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.AdaBoostClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2237,7 +2545,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.PassiveAggressiveRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2269,7 +2579,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.BayesianRidgeRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2301,7 +2613,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.RANSACRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2333,7 +2647,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.BaggingClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2365,7 +2681,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.GaussianProcessClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2397,7 +2715,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.OPTICS] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2429,7 +2749,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.KNeighborsRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2461,7 +2783,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.MiniBatchKMeans] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2493,7 +2817,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.LassoCVRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2525,7 +2851,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.DummyRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2557,7 +2885,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.LassoLarsRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2589,7 +2919,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.LarsCVRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2621,7 +2953,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.KNeighborsClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2653,7 +2987,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.SVMLinearClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2685,7 +3021,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.FeatureAgglomeration] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2717,7 +3055,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.DummyClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2749,7 +3089,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.BaggingRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2781,7 +3123,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.BayesianQDA] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2813,7 +3157,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.BayesianLDA] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2845,7 +3191,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.SGDClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2877,7 +3225,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.TheilSenRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2909,7 +3259,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.SpectralClustering] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2941,7 +3293,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.Birch] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -2973,7 +3327,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.AgglomerativeClustering] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -3005,7 +3361,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.ElasticNetRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -3037,7 +3395,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.RandomForestClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.Continuous}}}`" @@ -3069,7 +3429,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.LogisticCVClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -3101,7 +3463,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.MultiTaskElasticNetRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -3133,7 +3497,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.ExtraTreesRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -3165,7 +3531,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.LassoRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -3197,7 +3565,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.MultinomialNBClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Count}}`" @@ -3229,7 +3599,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.GradientBoostingRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -3261,7 +3633,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ScikitLearn.SVMClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -3293,7 +3667,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [ParallelKMeans.KMeans] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -3325,13 +3701,15 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [NaiveBayes.GaussianNBClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" ":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" @@ -3345,9 +3723,9 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nGaussianNBClassifier\n```\n\nA model type for constructing a gaussian nb classifier, based on\n[NaiveBayes.jl](https://github.com/dfdx/NaiveBayes.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n\n```\nGaussianNBClassifier = @load GaussianNBClassifier pkg=NaiveBayes\n```\n\nDo `model = GaussianNBClassifier()` to construct an instance with default hyper-parameters. " +":docstring" = "```\nGaussianNBClassifier\n```\n\nA model type for constructing a Gaussian naive Bayes classifier, based on [NaiveBayes.jl](https://github.com/dfdx/NaiveBayes.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nGaussianNBClassifier = @load GaussianNBClassifier pkg=NaiveBayes\n```\n\nDo `model = GaussianNBClassifier()` to construct an instance with default hyper-parameters. \n\nGiven each class taken on by the target variable `y`, it is supposed that the conditional probability distribution for the input variables `X` is a multivariate Gaussian. The mean and covariance of these Gaussian distributions are estimated using maximum likelihood, and a probability distribution for `y` given `X` is deduced by applying Bayes' rule. The required marginal for `y` is estimated using class frequency in the training data.\n\n**Important.** The name \"naive Bayes classifier\" is perhaps misleading. Since we are learning the full multivariate Gaussian distributions for `X` given `y`, we are not applying the usual naive Bayes independence condition, which would amount to forcing the covariance matrix to be diagonal.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the column scitypes with `schema(X)`\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Finite`; check the scitype with `schema(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above. Predictions are probabilistic.\n * `predict_mode(mach, Xnew)`: Return the mode of above predictions.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `c_counts`: A dictionary containing the observed count of each input class.\n * `c_stats`: A dictionary containing observed statistics on each input class. Each class is represented by a `DataStats` object, with the following fields:\n\n * `n_vars`: The number of variables used to describe the class's behavior.\n * `n_obs`: The number of times the class is observed.\n * `obs_axis`: The axis along which the observations were computed.\n * `gaussians`: A per class dictionary of Gaussians, each representing the distribution of the class. Represented with type `Distributions.MvNormal` from the Distributions.jl package.\n * `n_obs`: The total number of observations in the training data.\n\n# Examples\n\n```\nusing MLJ\nGaussianNB = @load GaussianNBClassifier pkg=NaiveBayes\n\nX, y = @load_iris\nclf = GaussianNB()\nmach = machine(clf, X, y) |> fit!\n\nfitted_params(mach)\n\npreds = predict(mach, X) # probabilistic predictions\npreds[1]\npredict_mode(mach, X) # point predictions\n```\n\nSee also [`MultinomialNBClassifier`](@ref)\n" ":name" = "GaussianNBClassifier" -":human_name" = "gaussian nb classifier" +":human_name" = "Gaussian naive Bayes classifier" ":is_supervised" = "`true`" ":prediction_type" = ":probabilistic" ":abstract_type" = "`MLJModelInterface.Probabilistic`" @@ -3357,13 +3735,15 @@ ":hyperparameter_ranges" = "`()`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [NaiveBayes.MultinomialNBClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Count}}`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Count}}, AbstractMatrix{<:ScientificTypesBase.Count}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Count}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Count}}, AbstractMatrix{<:ScientificTypesBase.Count}}, AbstractVector{<:ScientificTypesBase.Finite}}`" ":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" @@ -3377,9 +3757,9 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nMultinomialNBClassifier\n```\n\nA model type for constructing a multinomial nb classifier, based on\n[NaiveBayes.jl](https://github.com/dfdx/NaiveBayes.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultinomialNBClassifier = @load MultinomialNBClassifier pkg=NaiveBayes\n```\n\nDo `model = MultinomialNBClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`MultinomialNBClassifier(alpha=...)`.\n# Hyper-parameters\n\n- `alpha = 1`\n\n" +":docstring" = "```\nMultinomialNBClassifier\n```\n\nA model type for constructing a multinomial naive Bayes classifier, based on [NaiveBayes.jl](https://github.com/dfdx/NaiveBayes.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultinomialNBClassifier = @load MultinomialNBClassifier pkg=NaiveBayes\n```\n\nDo `model = MultinomialNBClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultinomialNBClassifier(alpha=...)`.\n\nThe [multinomial naive Bayes classifier](https://en.wikipedia.org/wiki/Naive_Bayes_classifier#Multinomial_naive_Bayes) is often applied when input features consist of a counts (scitype `Count`) and when observations for a fixed target class are generated from a multinomial distribution with fixed probability vector, but whose sample length varies from observation to observation. For example, features might represent word counts in text documents being classified by sentiment.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Count`; check the column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Finite`; check the scitype with `schema(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `alpha=1`: Lindstone smoothing in estimation of multinomial probability vectors from training histograms (default corresponds to Laplacian smoothing).\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n * `predict_mode(mach, Xnew)`: Return the mode of above predictions.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `c_counts`: A dictionary containing the observed count of each input class.\n * `x_counts`: A dictionary containing the categorical counts of each input class.\n * `x_totals`: The sum of each count (input feature), ungrouped.\n * `n_obs`: The total number of observations in the training data.\n\n# Examples\n\n```\nusing MLJ\nimport TextAnalysis\n\nCountTransformer = @load CountTransformer pkg=MLJText\nMultinomialNBClassifier = @load MultinomialNBClassifier pkg=NaiveBayes\n\ntokenized_docs = TextAnalysis.tokenize.([\n \"I am very mad. You never listen.\",\n \"You seem to be having trouble? Can I help you?\",\n \"Our boss is mad at me. I hope he dies.\",\n \"His boss wants to help me. She is nice.\",\n \"Thank you for your help. It is nice working with you.\",\n \"Never do that again! I am so mad. \",\n])\n\nsentiment = [\n \"negative\",\n \"positive\",\n \"negative\",\n \"positive\",\n \"positive\",\n \"negative\",\n]\n\nmach1 = machine(CountTransformer(), tokenized_docs) |> fit!\n\n# matrix of counts:\nX = transform(mach1, tokenized_docs)\n\n# to ensure scitype(y) <: AbstractVector{<:OrderedFactor}:\ny = coerce(sentiment, OrderedFactor)\n\nclassifier = MultinomialNBClassifier()\nmach2 = machine(classifier, X, y)\nfit!(mach2, rows=1:4)\n\n# probabilistic predictions:\ny_prob = predict(mach2, rows=5:6) # distributions\npdf.(y_prob, \"positive\") # probabilities for \"positive\"\nlog_loss(y_prob, y[5:6])\n\n# point predictions:\nyhat = mode.(y_prob) # or `predict_mode(mach2, rows=5:6)`\n```\n\nSee also [`GaussianNBClassifier`](@ref)\n" ":name" = "MultinomialNBClassifier" -":human_name" = "multinomial nb classifier" +":human_name" = "multinomial naive Bayes classifier" ":is_supervised" = "`true`" ":prediction_type" = ":probabilistic" ":abstract_type" = "`MLJModelInterface.Probabilistic`" @@ -3389,7 +3769,9 @@ ":hyperparameter_ranges" = "`(nothing,)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [MultivariateStats.LDA] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -3409,19 +3791,21 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = " Multiclass linear discriminant analysis. The algorithm learns a\nprojection matrix `P` that projects a feature matrix `Xtrain` onto a lower dimensional\nspace of dimension `out_dim` such that the trace of the transformed between-class \nscatter matrix(`Pᵀ*Sb*P`) is maximized relative to the trace of the transformed \nwithin-class scatter matrix (`Pᵀ*Sw*P`).The projection matrix is scaled such that \n`Pᵀ*Sw*P=I` or `Pᵀ*Σw*P=I`(where `Σw` is the within-class covariance matrix) .\nPredicted class posterior probability for feature matrix `Xtest` are derived by \napplying a softmax transformationto a matrix `Pr`, such that rowᵢ of `Pr` contains \ncomputed distances(based on a distance metric) in the transformed space of rowᵢ in \n`Xtest` to the centroid of each class.\n" +":docstring" = "```\nLDA\n```\n\nA model type for constructing a linear discriminant analysis model, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLDA = @load LDA pkg=unknown\n```\n\nDo `model = LDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LDA(method=...)`.\n\n[Multiclass linear discriminant analysis](https://en.wikipedia.org/wiki/Linear_discriminant_analysis) learns a projection in a space of features to a lower dimensional space, in a way that attempts to preserve as much as possible the degree to which the classes of a discrete target variable can be discriminated. This can be used either for dimension reduction of the features (see `transform` below) or for probabilistic classification of the target (see `predict` below).\n\nIn the case of prediction, the class probability for a new observation reflects the proximity of that observation to training observations associated with that class, and how far away the observation is from observations associated with other classes. Specifically, the distances, in the transformed (projected) space, of a new observation, from the centroid of each target class, is computed; the resulting vector of distances, multiplied by minus one, is passed to a softmax function to obtain a class probability prediction. Here \"distance\" is computed using a user-specified distance function.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns\n\nare of scitype `Continuous`; check column scitypes with `schema(X)`.\n\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `method::Symbol=:gevd`: The solver, one of `:gevd` or `:whiten` methods.\n * `cov_w::StatsBase.SimpleCovariance()`: An estimator for the within-class covariance (used in computing the within-class scatter matrix, `Sw`). Any robust estimator from `CovarianceEstimation.jl` can be used.\n * `cov_b::StatsBase.SimpleCovariance()`: The same as `cov_w` but for the between-class covariance (used in computing the between-class scatter matrix, `Sb`).\n * `outdim::Int=0`: The output dimension, i.e dimension of the transformed space, automatically set if equal to 0.\n * `regcoef::Float64=1e-6`: The regularization coefficient. A positive value `regcoef*eigmax(Sw)` where `Sw` is the within-class scatter matrix, is added to the diagonal of `Sw` to improve numerical stability. This can be useful if using the standard covariance estimator.\n * `dist=Distances.SqEuclidean()`: The distance metric to use when performing classification (to compare the distance between a new point and centroids in the transformed space); must be a subtype of `Distances.SemiMetric` from Distances.jl, e.g., `Distances.CosineDist`.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projected_class_means`: The matrix comprised of class-specific means as columns, of size `(indim, nclasses)`, where `indim` is the number of input features (columns) and `nclasses` the number of target classes.\n * `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where\n\n`indim` and `outdim` are the input and output dimensions respectively.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `classes`: The classes seen during model fitting.\n * `outdim`: The dimensions the model is projected to.\n * `projected_class_means`: The matrix comprised of class-specific means as columns (see above).\n * `mean`: The mean of the untransformed training data, of length `indim`.\n * `class_weights`: The weights of each class.\n * `Sb`: The between class scatter matrix.\n * `Sw`: The within class scatter matrix.\n * `nclasses`: The number of classes directly observed in the training data (which can be less than the total number of classes in the class pool)\n\n# Examples\n\n```\nusing MLJ\n\nLDA = @load LDA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = LDA()\nmach = machine(model, X, y) |> fit!\n\nXproj = transform(mach, X)\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n\n```\n\nSee also [`BayesianLDA`](@ref), [`SubspaceLDA`](@ref), [`BayesianSubspaceLDA`](@ref)\n" ":name" = "LDA" -":human_name" = "lda" +":human_name" = "linear discriminant analysis model" ":is_supervised" = "`true`" ":prediction_type" = ":probabilistic" ":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":clean!", ":fitted_params", ":predict", ":fit", ":transform"] -":hyperparameters" = "`(:method, :cov_w, :cov_b, :out_dim, :regcoef, :dist)`" +":hyperparameters" = "`(:method, :cov_w, :cov_b, :outdim, :regcoef, :dist)`" ":hyperparameter_types" = "`(\"Symbol\", \"StatsBase.CovarianceEstimator\", \"StatsBase.CovarianceEstimator\", \"Int64\", \"Float64\", \"Distances.SemiMetric\")`" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [MultivariateStats.MultitargetLinearRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -3441,7 +3825,7 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "Multitarget Linear Regression. Learns linear combinations of given\nvariables to fit the responses by minimizing the squared error between.\n" +":docstring" = "```\nMultitargetLinearRegressor\n```\n\nA model type for constructing a multitarget linear regressor, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultitargetLinearRegressor = @load MultitargetLinearRegressor pkg=unknown\n```\n\nDo `model = MultitargetLinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetLinearRegressor(bias=...)`.\n\n`MultitargetLinearRegressor` assumes the target variable is vector-valued with continuous components. It trains a linear prediction function using the least squares algorithm. Options exist to specify a bias term.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns\n\nare of scitype `Continuous`; check column scitypes with `schema(X)`.\n\n * `y` is the target, which can be any table of responses whose element scitype is `Continuous`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `bias=true`: Include the bias term if true, otherwise fit without bias term.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coefficients`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Examples\n\n```\nusing MLJ\nusing DataFrames\n\nLinearRegressor = @load MultitargetLinearRegressor pkg=MultivariateStats\nlinear_regressor = LinearRegressor()\n\nX, y = make_regression(100, 9; n_targets = 2) # a table and a table (synthetic data)\n\nmach = machine(linear_regressor, X, y) |> fit!\n\nXnew, _ = make_regression(3, 9)\nyhat = predict(mach, Xnew) # new predictions\n```\n\nSee also [`LinearRegressor`](@ref), [`RidgeRegressor`](@ref), [`MultitargetRidgeRegressor`](@ref)\n" ":name" = "MultitargetLinearRegressor" ":human_name" = "multitarget linear regressor" ":is_supervised" = "`true`" @@ -3453,7 +3837,9 @@ ":hyperparameter_ranges" = "`(nothing,)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [MultivariateStats.BayesianSubspaceLDA] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -3473,19 +3859,21 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = " Bayesian Multiclass linear discriminant analysis. Suitable for high dimensional data \n(Avoids computing scatter matrices `Sw` ,`Sb`). The algorithm learns a projection \nmatrix `P = W*L` (`Sw`), that projects a feature matrix `Xtrain` onto a lower \ndimensional space of dimension `nc-1` such that the trace of the transformed \nbetween-class scatter matrix(`Pᵀ*Sb*P`) is maximized relative to the trace of the \ntransformed within-class scatter matrix (`Pᵀ*Sw*P`). The projection matrix is scaled \nsuch that `Pᵀ*Sw*P = mult*I` or `Pᵀ*Σw*P=mult/(n-nc)*I` (where `n` is the number of \ntraining samples, `mult` is one of `n` or `1` depending on whether `Sb` is normalized,\n`Σw` is the within-class covariance matrix, and `nc` is the number of unique classes in\n`y`) and also obeys `Wᵀ*Sb*p = λ*Wᵀ*Sw*p`, for every column `p` in `P`.\nPosterior class probability distibution are derived by applying Bayes rule with a\nmultivariate Gaussian class-conditional distribution\n" +":docstring" = "```\nBayesianSubspaceLDA\n```\n\nA model type for constructing a Bayesian subspace LDA model, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nBayesianSubspaceLDA = @load BayesianSubspaceLDA pkg=unknown\n```\n\nDo `model = BayesianSubspaceLDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `BayesianSubspaceLDA(normalize=...)`.\n\nThe Bayesian multiclass subspace linear discriminant analysis algorithm learns a projection matrix as described in [`SubspaceLDA`](@ref). The posterior class probability distribution is derived as in [`BayesianLDA`](@ref).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `normalize=true`: Option to normalize the between class variance for the number of observations in each class, one of `true` or `false`.\n * `outdim`: the ouput dimension, automatically set if equal to `0`. If a non-zero `outdim` is passed, then the actual output dimension used is `min(rank, outdim)` where `rank` is the rank of the within-class covariance matrix.\n * `priors::Union{Nothing, Vector{Float64}}=nothing`: For use in prediction with Bayes' rule. If `priors = nothing` then `priors` are estimated from the class proportions in the training data. Otherwise it requires a `Vector` containing class probabilities with probabilities specified using the order given by `levels(y)` where `y` is the training target.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projected_class_means`: The matrix comprised of class-specific means as columns, of size `(indim, nclasses)`, where `indim` is the number of input features (columns) and `nclasses` the number of target classes.\n * `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where\n\n`indim` and `outdim` are the input and output dimensions respectively.\n\n * `priors`: The class priors for classification. As inferred from training target `y`, if not user-specified. A vector with order consistent with `levels(y)`.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `explained_variance_ratio`: The ratio of explained variance to total variance. Each dimension corresponds to an eigenvalue.\n * `classes`: The classes seen during model fitting.\n * `projected_class_means`: The matrix comprised of class-specific means as columns (see above).\n * `mean`: The mean of the untransformed training data, of length `indim`.\n * `class_weights`: The weights of each class.\n * `nclasses`: The number of classes directly observed in the training data (which can be less than the total number of classes in the class pool)\n\n# Examples\n\n```\nusing MLJ\n\nBayesianSubspaceLDA = @load BayesianSubspaceLDA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = BayesianSubspaceLDA()\nmach = machine(model, X, y) |> fit!\n\nXproj = transform(mach, X)\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n```\n\nSee also [`LDA`](@ref), [`BayesianLDA`](@ref), [`SubspaceLDA`](@ref)\n" ":name" = "BayesianSubspaceLDA" -":human_name" = "bayesian subspace lda" +":human_name" = "Bayesian subspace LDA model" ":is_supervised" = "`true`" ":prediction_type" = ":probabilistic" ":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":clean!", ":fitted_params", ":predict", ":fit", ":transform"] -":hyperparameters" = "`(:normalize, :out_dim, :priors)`" +":hyperparameters" = "`(:normalize, :outdim, :priors)`" ":hyperparameter_types" = "`(\"Bool\", \"Int64\", \"Union{Nothing, Vector{Float64}}\")`" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [MultivariateStats.FactorAnalysis] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -3505,9 +3893,9 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "Factor Analysis" +":docstring" = "```\nFactorAnalysis\n```\n\nA model type for constructing a factor analysis model, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nFactorAnalysis = @load FactorAnalysis pkg=unknown\n```\n\nDo `model = FactorAnalysis()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FactorAnalysis(method=...)`.\n\nFactor analysis is a linear-Gaussian latent variable model that is closely related to probabilistic PCA. In contrast to the probabilistic PCA model, the covariance of conditional distribution of the observed variable given the latent variable is diagonal rather than isotropic.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `method::Symbol=:cm`: Method to use to solve the problem, one of `:ml`, `:em`, `:bayes`.\n * `maxoutdim=0`: Controls the the dimension (number of columns) of the output, `outdim`. Specifically, `outdim = min(n, indim, maxoutdim)`, where `n` is the number of observations and `indim` the input dimension.\n * `maxiter::Int=1000`: Maximum number of iterations.\n * `tol::Real=1e-6`: Convergence tolerance.\n * `eta::Real=tol`: Variance lower bound.\n * `mean::Union{Nothing, Real, Vector{Float64}}=nothing`: If `nothing`, centering will be computed and applied; if set to `0` no centering is applied (data is assumed pre-centered); if a vector, the centering is done with that vector.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `inverse_transform(mach, Xsmall)`: For a dimension-reduced table `Xsmall`, such as returned by `transform`, reconstruct a table, having same the number of columns as the original training data `X`, that transforms to `Xsmall`. Mathematically, `inverse_transform` is a right-inverse for the PCA projection map, whose image is orthogonal to the kernel of that map. In particular, if `Xsmall = transform(mach, Xnew)`, then `inverse_transform(Xsmall)` is only an approximation to `Xnew`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: Returns the projection matrix, which has size `(indim, outdim)`, where `indim` and `outdim` are the number of features of the input and ouput respectively. Each column of the projection matrix corresponds to a factor.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim`: Dimension of transformed data (number of factors).\n * `variance`: The variance of the factors.\n * `covariance_matrix`: The estimated covariance matrix.\n * `mean`: The mean of the untransformed training data, of length `indim`.\n * `loadings`: The factor loadings.\n\n# Examples\n\n```\nusing MLJ\n\nFactorAnalysis = @load FactorAnalysis pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = FactorAnalysis(maxoutdim=2)\nmach = machine(model, X) |> fit!\n\nXproj = transform(mach, X)\n```\n\nSee also [`KernelPCA`](@ref), [`ICA`](@ref), [`PPCA`](@ref), [`PCA`](@ref)\n" ":name" = "FactorAnalysis" -":human_name" = "factor analysis" +":human_name" = "factor analysis model" ":is_supervised" = "`false`" ":prediction_type" = ":unknown" ":abstract_type" = "`MLJModelInterface.Unsupervised`" @@ -3517,7 +3905,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [MultivariateStats.LinearRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -3537,7 +3927,7 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "Linear Regression. Learns a linear combination of given\nvariables to fit the response by minimizing the squared error between.\n" +":docstring" = "```\nLinearRegressor\n```\n\nA model type for constructing a linear regressor, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearRegressor = @load LinearRegressor pkg=unknown\n```\n\nDo `model = LinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearRegressor(bias=...)`.\n\n`LinearRegressor` assumes the target is a `Continuous` variable and trains a linear prediction function using the least squares algorithm. Options exist to specify a bias term.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns\n\nare of scitype `Continuous`; check the column scitypes with `schema(X)`.\n\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `bias=true`: Include the bias term if true, otherwise fit without bias term.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coefficients`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Examples\n\n```\nusing MLJ\n\nLinearRegressor = @load LinearRegressor pkg=MultivariateStats\nlinear_regressor = LinearRegressor()\n\nX, y = make_regression(100, 2) # a table and a vector (synthetic data)\nmach = machine(linear_regressor, X, y) |> fit!\n\nXnew, _ = make_regression(3, 2)\nyhat = predict(mach, Xnew) # new predictions\n```\n\nSee also [`MultitargetLinearRegressor`](@ref), [`RidgeRegressor`](@ref), [`MultitargetRidgeRegressor`](@ref)\n" ":name" = "LinearRegressor" ":human_name" = "linear regressor" ":is_supervised" = "`true`" @@ -3549,7 +3939,9 @@ ":hyperparameter_ranges" = "`(nothing,)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [MultivariateStats.ICA] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -3569,19 +3961,21 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "Independent component analysis." +":docstring" = "```\nICA\n```\n\nA model type for constructing a independent component analysis model, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nICA = @load ICA pkg=unknown\n```\n\nDo `model = ICA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ICA(outdim=...)`.\n\nIndependent component analysis is a computational technique for separating a multivariate signal into additive subcomponents, with the assumption that the subcomponents are non-Gaussian and independent from each other.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `outdim::Int=0`: The number of independent components to recover, set automatically if `0`.\n * `alg::Symbol=:fastica`: The algorithm to use (only `:fastica` is supported at the moment).\n * `fun::Symbol=:tanh`: The approximate neg-entropy function, one of `:tanh`, `:gaus`.\n * `do_whiten::Bool=true`: Whether or not to perform pre-whitening.\n * `maxiter::Int=100`: The maximum number of iterations.\n * `tol::Real=1e-6`: The convergence tolerance for change in the unmixing matrix W.\n * `mean::Union{Nothing, Real, Vector{Float64}}=nothing`: mean to use, if nothing (default) centering is computed and applied, if zero, no centering; otherwise a vector of means can be passed.\n * `winit::Union{Nothing,Matrix{<:Real}}=nothing`: Initial guess for the unmixing matrix `W`: either an empty matrix (for random initialization of `W`), a matrix of size `m × k` (if `do_whiten` is true), or a matrix of size `m × k`. Here `m` is the number of components (columns) of the input.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return the component-separated version of input `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: The estimated component matrix.\n * `mean`: The estimated mean vector.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim`: Dimension of transformed data.\n * `mean`: The mean of the untransformed training data, of length `indim`.\n\n# Examples\n\n```\nusing MLJ\n\nICA = @load ICA pkg=MultivariateStats\n\ntimes = range(0, 8, length=2000)\n\nsine_wave = sin.(2*times)\nsquare_wave = sign.(sin.(3*times))\nsawtooth_wave = map(t -> mod(2t, 2) - 1, times)\nsignals = hcat(sine_wave, square_wave, sawtooth_wave)\nnoisy_signals = signals + 0.2*randn(size(signals))\n\nmixing_matrix = [ 1 1 1; 0.5 2 1; 1.5 1 2]\nX = MLJ.table(noisy_signals*mixing_matrix)\n\nmodel = ICA(outdim = 3, tol=0.1)\nmach = machine(model, X) |> fit!\n\nX_unmixed = transform(mach, X)\n\nusing Plots\n\nplot(X.x2)\nplot(X.x2)\nplot(X.x3)\n\nplot(X_unmixed.x1)\nplot(X_unmixed.x2)\nplot(X_unmixed.x3)\n\n```\n\nSee also [`PCA`](@ref), [`KernelPCA`](@ref), [`FactorAnalysis`](@ref), [`PPCA`](@ref)\n" ":name" = "ICA" -":human_name" = "ica" +":human_name" = "independent component analysis model" ":is_supervised" = "`false`" ":prediction_type" = ":unknown" ":abstract_type" = "`MLJModelInterface.Unsupervised`" -":implemented_methods" = [":clean!", ":fitted_params", ":fit", ":transform"] -":hyperparameters" = "`(:k, :alg, :fun, :do_whiten, :maxiter, :tol, :winit, :mean)`" +":implemented_methods" = [":clean!", ":fitted_params", ":inverse_transform", ":fit", ":transform"] +":hyperparameters" = "`(:outdim, :alg, :fun, :do_whiten, :maxiter, :tol, :winit, :mean)`" ":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Symbol\", \"Bool\", \"Int64\", \"Real\", \"Union{Nothing, Matrix{<:Real}}\", \"Union{Nothing, Real, Vector{Float64}}\")`" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [MultivariateStats.PPCA] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -3601,9 +3995,9 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "Probabilistic principal component analysis" +":docstring" = "```\nPPCA\n```\n\nA model type for constructing a probabilistic PCA model, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nPPCA = @load PPCA pkg=unknown\n```\n\nDo `model = PPCA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `PPCA(maxoutdim=...)`.\n\nProbabilistic principal component analysis is a dimension-reduction algorithm which represents a constrained form of the Gaussian distribution in which the number of free parameters can be restricted while still allowing the model to capture the dominant correlations in a data set. It is expressed as the maximum likelihood solution of a probabilistic latent variable model. For details, see Bishop (2006): C. M. Pattern Recognition and Machine Learning.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `maxoutdim=0`: Controls the the dimension (number of columns) of the output, `outdim`. Specifically, `outdim = min(n, indim, maxoutdim)`, where `n` is the number of observations and `indim` the input dimension.\n * `method::Symbol=:ml`: The method to use to solve the problem, one of `:ml`, `:em`, `:bayes`.\n * `maxiter::Int=1000`: The maximum number of iterations.\n * `tol::Real=1e-6`: The convergence tolerance.\n * `mean::Union{Nothing, Real, Vector{Float64}}=nothing`: If `nothing`, centering will be computed and applied; if set to `0` no centering is applied (data is assumed pre-centered); if a vector, the centering is done with that vector.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `inverse_transform(mach, Xsmall)`: For a dimension-reduced table `Xsmall`, such as returned by `transform`, reconstruct a table, having same the number of columns as the original training data `X`, that transforms to `Xsmall`. Mathematically, `inverse_transform` is a right-inverse for the PCA projection map, whose image is orthogonal to the kernel of that map. In particular, if `Xsmall = transform(mach, Xnew)`, then `inverse_transform(Xsmall)` is only an approximation to `Xnew`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: Returns the projection matrix, which has size `(indim, outdim)`, where `indim` and `outdim` are the number of features of the input and ouput respectively. Each column of the projection matrix corresponds to a principal component.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim`: Dimension of transformed data.\n * `tvat`: The variance of the components.\n * `loadings`: The models loadings, weights for each variable used when calculating principal components.\n\n# Examples\n\n```\nusing MLJ\n\nPPCA = @load PPCA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = PPCA(maxoutdim=2)\nmach = machine(model, X) |> fit!\n\nXproj = transform(mach, X)\n```\n\nSee also [`KernelPCA`](@ref), [`ICA`](@ref), [`FactorAnalysis`](@ref), [`PCA`](@ref)\n" ":name" = "PPCA" -":human_name" = "ppca" +":human_name" = "probabilistic PCA model" ":is_supervised" = "`false`" ":prediction_type" = ":unknown" ":abstract_type" = "`MLJModelInterface.Unsupervised`" @@ -3613,7 +4007,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [MultivariateStats.RidgeRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -3633,7 +4029,7 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "Ridge regressor with regularization parameter lambda. Learns a\nlinear regression with a penalty on the l2 norm of the coefficients.\n" +":docstring" = "```\nRidgeRegressor\n```\n\nA model type for constructing a ridge regressor, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nRidgeRegressor = @load RidgeRegressor pkg=unknown\n```\n\nDo `model = RidgeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `RidgeRegressor(lambda=...)`.\n\n`RidgeRegressor` adds a quadratic penalty term to least squares regression, for regularization. Ridge regression is particularly useful in the case of multicollinearity. Options exist to specify a bias term, and to adjust the strength of the penalty term.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns\n\nare of scitype `Continuous`; check column scitypes with `schema(X)`.\n\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `lambda=1.0`: Is the non-negative parameter for the regularization strength. If lambda is 0, ridge regression is equivalent to linear least squares regression, and as lambda approaches infinity, all the linear coefficients approach 0.\n * `bias=true`: Include the bias term if true, otherwise fit without bias term.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coefficients`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Examples\n\n```\nusing MLJ\n\nRidgeRegressor = @load RidgeRegressor pkg=MultivariateStats\npipe = Standardizer() |> RidgeRegressor(lambda=10)\n\nX, y = @load_boston\n\nmach = machine(pipe, X, y) |> fit!\nyhat = predict(mach, X)\ntraining_error = l1(yhat, y) |> mean\n```\n\nSee also [`LinearRegressor`](@ref), [`MultitargetLinearRegressor`](@ref), [`MultitargetRidgeRegressor`](@ref)\n" ":name" = "RidgeRegressor" ":human_name" = "ridge regressor" ":is_supervised" = "`true`" @@ -3645,7 +4041,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [MultivariateStats.KernelPCA] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -3665,9 +4063,9 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "Kernel principal component analysis." +":docstring" = "```\nKernelPCA\n```\n\nA model type for constructing a kernel prinicipal component analysis model, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nKernelPCA = @load KernelPCA pkg=unknown\n```\n\nDo `model = KernelPCA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `KernelPCA(maxoutdim=...)`.\n\nIn kernel PCA the linear operations of ordinary principal component analysis are performed in a [reproducing Hilbert space](https://en.wikipedia.org/wiki/Reproducing_kernel_Hilbert_space).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `maxoutdim=0`: Controls the the dimension (number of columns) of the output, `outdim`. Specifically, `outdim = min(n, indim, maxoutdim)`, where `n` is the number of observations and `indim` the input dimension.\n * `kernel::Function=(x,y)->x'y`: The kernel function, takes in 2 vector arguments x and y, returns a scalar value. Defaults to the dot product of `x` and `y`.\n * `solver::Symbol=:eig`: solver to use for the eigenvalues, one of `:eig`(default, uses `LinearAlgebra.eigen`), `:eigs`(uses `Arpack.eigs`).\n * `inverse::Bool=true`: perform calculations needed for inverse transform\n * `beta::Real=1.0`: strength of the ridge regression that learns the inverse transform when inverse is true.\n * `tol::Real=0.0`: Convergence tolerance for eigenvalue solver.\n * `maxiter::Int=300`: maximum number of iterations for eigenvalue solver.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `inverse_transform(mach, Xsmall)`: For a dimension-reduced table `Xsmall`, such as returned by `transform`, reconstruct a table, having same the number of columns as the original training data `X`, that transforms to `Xsmall`. Mathematically, `inverse_transform` is a right-inverse for the PCA projection map, whose image is orthogonal to the kernel of that map. In particular, if `Xsmall = transform(mach, Xnew)`, then `inverse_transform(Xsmall)` is only an approximation to `Xnew`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: Returns the projection matrix, which has size `(indim, outdim)`, where `indim` and `outdim` are the number of features of the input and ouput respectively.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim`: Dimension of transformed data.\n * `principalvars`: The variance of the principal components.\n\n# Examples\n\n```\nusing MLJ\nusing LinearAlgebra\n\nKernelPCA = @load KernelPCA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nfunction rbf_kernel(length_scale)\n return (x,y) -> norm(x-y)^2 / ((2 * length_scale)^2)\nend\n\nmodel = KernelPCA(maxoutdim=2, kernel=rbf_kernel(1))\nmach = machine(model, X) |> fit!\n\nXproj = transform(mach, X)\n```\n\nSee also [`PCA`](@ref), [`ICA`](@ref), [`FactorAnalysis`](@ref), [`PPCA`](@ref)\n" ":name" = "KernelPCA" -":human_name" = "kernel pca" +":human_name" = "kernel prinicipal component analysis model" ":is_supervised" = "`false`" ":prediction_type" = ":unknown" ":abstract_type" = "`MLJModelInterface.Unsupervised`" @@ -3677,7 +4075,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [MultivariateStats.MultitargetRidgeRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -3697,7 +4097,7 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "Multitarget Ridge regressor with regularization parameter lambda. Learns a\nMultitarget linear regression with a penalty on the l2 norm of the coefficients.\n" +":docstring" = "```\nMultitargetRidgeRegressor\n```\n\nA model type for constructing a multitarget ridge regressor, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultitargetRidgeRegressor = @load MultitargetRidgeRegressor pkg=unknown\n```\n\nDo `model = MultitargetRidgeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetRidgeRegressor(lambda=...)`.\n\nMulti-target ridge regression adds a quadratic penalty term to multi-target least squares regression, for regularization. Ridge regression is particularly useful in the case of multicollinearity. In this case, the output represents a response vector. Options exist to specify a bias term, and to adjust the strength of the penalty term.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns\n\nare of scitype `Continuous`; check column scitypes with `schema(X)`.\n\n * `y` is the target, which can be any table of responses whose element scitype is `Continuous`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `lambda=1.0`: Is the non-negative parameter for the regularization strength. If lambda is 0, ridge regression is equivalent to linear least squares regression, and as lambda approaches infinity, all the linear coefficients approach 0.\n * `bias=true`: Include the bias term if true, otherwise fit without bias term.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coefficients`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Examples\n\n```\nusing MLJ\nusing DataFrames\n\nRidgeRegressor = @load MultitargetRidgeRegressor pkg=MultivariateStats\n\nX, y = make_regression(100, 6; n_targets = 2) # a table and a table (synthetic data)\n\nridge_regressor = RidgeRegressor(lambda=1.5)\nmach = machine(ridge_regressor, X, y) |> fit!\n\nXnew, _ = make_regression(3, 6)\nyhat = predict(mach, Xnew) # new predictions\n```\n\nSee also [`LinearRegressor`](@ref), [`MultitargetLinearRegressor`](@ref), [`RidgeRegressor`](@ref)\n" ":name" = "MultitargetRidgeRegressor" ":human_name" = "multitarget ridge regressor" ":is_supervised" = "`true`" @@ -3709,7 +4109,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [MultivariateStats.SubspaceLDA] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -3729,19 +4131,21 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "Multiclass linear discriminant analysis. Suitable for high\ndimensional data (Avoids computing scatter matrices `Sw` ,`Sb`). The algorithm learns a\nprojection matrix `P = W*L` that projects a feature matrix `Xtrain` onto a lower\ndimensional space of dimension `nc - 1` such that the trace of the transformed\nbetween-class scatter matrix(`Pᵀ*Sb*P`) is maximized relative to the trace of the\ntransformed within-class scatter matrix (`Pᵀ*Sw*P`). The projection matrix is scaled \nsuch that `Pᵀ*Sw*P = mult*I` or `Pᵀ*Σw*P=mult/(n-nc)*I` (where `n` is the number of \ntraining samples, mult` is one of `n` or `1` depending on whether `Sb` is normalized, \n`Σw` is the within-class covariance matrix, and `nc` is the number of unique classes \nin `y`) and also obeys `Wᵀ*Sb*p = λ*Wᵀ*Sw*p`, for every column `p` in `P`.\nPredicted class posterior probability for feature matrix `Xtest` are derived by \napplying a softmax transformation to a matrix `Pr`, such that rowᵢ of `Pr` contains \ncomputed distances(based on a distance metric) in the transformed space of rowᵢ in \n`Xtest` to the centroid of each class.\n" +":docstring" = "```\nSubspaceLDA\n```\n\nA model type for constructing a subpace LDA model, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSubspaceLDA = @load SubspaceLDA pkg=unknown\n```\n\nDo `model = SubspaceLDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SubspaceLDA(normalize=...)`.\n\nMulticlass subspace linear discriminant analysis (LDA) is a variation on ordinary [`LDA`](@ref) suitable for high dimensional data, as it avoids storing scatter matrices. For details, refer the [MultivariateStats.jl documentation](https://juliastats.org/MultivariateStats.jl/stable/).\n\nIn addition to dimension reduction (using `transform`) probabilistic classification is provided (using `predict`). In the case of classification, the class probability for a new observation reflects the proximity of that observation to training observations associated with that class, and how far away the observation is from observations associated with other classes. Specifically, the distances, in the transformed (projected) space, of a new observation, from the centroid of each target class, is computed; the resulting vector of distances, multiplied by minus one, is passed to a softmax function to obtain a class probability prediction. Here \"distance\" is computed using a user-specified distance function.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns\n\nare of scitype `Continuous`; check column scitypes with `schema(X)`.\n\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `normalize=true`: Option to normalize the between class variance for the number of observations in each class, one of `true` or `false`.\n * `outdim`: the ouput dimension, automatically set if equal to `0`. If a non-zero `outdim` is passed, then the actual output dimension used is `min(rank, outdim)` where `rank` is the rank of the within-class covariance matrix.\n * `dist=Distances.SqEuclidean()`: The distance metric to use when performing classification (to compare the distance between a new point and centroids in the transformed space); must be a subtype of `Distances.SemiMetric` from Distances.jl, e.g., `Distances.CosineDist`.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projected_class_means`: The matrix comprised of class-specific means as columns, of size `(indim, nclasses)`, where `indim` is the number of input features (columns) and `nclasses` the number of target classes.\n * `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where `indim` and `outdim` are the input and output dimensions respectively.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `explained_variance_ratio`: The ratio of explained variance to total variance. Each dimension corresponds to an eigenvalue.\n * `classes`: The classes seen during model fitting.\n * `projected_class_means`: The matrix comprised of class-specific means as columns (see above).\n * `mean`: The mean of the untransformed training data, of length `indim`.\n * `class_weights`: The weights of each class.\n * `nclasses`: The number of classes directly observed in the training data (which can be less than the total number of classes in the class pool)\n\n# Examples\n\n```\nusing MLJ\n\nSubspaceLDA = @load SubspaceLDA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = SubspaceLDA()\nmach = machine(model, X, y) |> fit!\n\nXproj = transform(mach, X)\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n```\n\nSee also [`LDA`](@ref), [`BayesianLDA`](@ref), [`BayesianSubspaceLDA`](@ref)\n" ":name" = "SubspaceLDA" -":human_name" = "subspace lda" +":human_name" = "subpace LDA model" ":is_supervised" = "`true`" ":prediction_type" = ":probabilistic" ":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":clean!", ":fitted_params", ":predict", ":fit", ":transform"] -":hyperparameters" = "`(:normalize, :out_dim, :dist)`" +":hyperparameters" = "`(:normalize, :outdim, :dist)`" ":hyperparameter_types" = "`(\"Bool\", \"Int64\", \"Distances.SemiMetric\")`" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [MultivariateStats.BayesianLDA] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -3761,19 +4165,21 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = " Bayesian Multiclass linear discriminant analysis. The algorithm\nlearns a projection matrix `P` that projects a feature matrix `Xtrain` onto a lower\ndimensional space of dimension `out_dim` such that the trace of the transformed\nbetween-class scatter matrix(`Pᵀ*Sb*P`) is maximized relative to the trace of the\ntransformed within-class scatter matrix (`Pᵀ*Sw*P`). The projection matrix is scaled \nsuch that `Pᵀ*Sw*P = n` or `Pᵀ*Σw*P=I` (Where `n` is the number of training samples \nand `Σw` is the within-class covariance matrix).\nPredicted class posterior probability distibution are derived by applying Bayes rule \nwith a multivariate Gaussian class-conditional distribution.\n" +":docstring" = "```\nBayesianLDA\n```\n\nA model type for constructing a Bayesian LDA model, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nBayesianLDA = @load BayesianLDA pkg=unknown\n```\n\nDo `model = BayesianLDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `BayesianLDA(method=...)`.\n\nThe Bayesian multiclass LDA algorithm learns a projection matrix as described in ordinary [`LDA`](@ref). Predicted class posterior probability distributions are derived by applying Bayes' rule with a multivariate Gaussian class-conditional distribution. A prior class distribution can be specified by the user or inferred from training data class frequency.\n\nSee also the [package documentation](https://multivariatestatsjl.readthedocs.io/en/latest/lda.html). For more information about the algorithm, see [Li, Zhu and Ogihara (2006): Using Discriminant Analysis for Multi-class Classification: An Experimental Investigation](https://doi.org/10.1007/s10115-006-0013-y).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `method::Symbol=:gevd`: choice of solver, one of `:gevd` or `:whiten` methods.\n * `cov_w::StatsBase.SimpleCovariance()`: An estimator for the within-class covariance (used in computing the within-class scatter matrix, `Sw`). Any robust estimator from `CovarianceEstimation.jl` can be used.\n * `cov_b::StatsBase.SimpleCovariance()`: The same as `cov_w` but for the between-class covariance (used in computing the between-class scatter matrix, `Sb`).\n * `outdim::Int=0`: The output dimension, i.e., dimension of the transformed space, automatically set if equal to 0.\n * `regcoef::Float64=1e-6`: The regularization coefficient. A positive value `regcoef*eigmax(Sw)` where `Sw` is the within-class scatter matrix, is added to the diagonal of `Sw` to improve numerical stability. This can be useful if using the standard covariance estimator.\n * `priors::Union{Nothing, Vector{Float64}}=nothing`: For use in prediction with Bayes' rule. If `priors = nothing` then `priors` are estimated from the class proportions in the training data. Otherwise it requires a `Vector` containing class probabilities with probabilities specified using the order given by `levels(y)`, where `y` is the training target.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have the same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projected_class_means`: The matrix comprised of class-specific means as columns, of size `(indim, nclasses)`, where `indim` is the number of input features (columns) and `nclasses` the number of target classes.\n * `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where\n\n`indim` and `outdim` are the input and output dimensions respectively.\n\n * `priors`: The class priors for classification. As inferred from training target `y`, if not user-specified. A vector with order consistent with `levels(y)`.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `classes`: The classes seen during model fitting.\n * `outdim`: The dimensions the model is projected to.\n * `projected_class_means`: The matrix comprised of class-specific means as columns (see above).\n * `mean`: The mean of the untransformed training data, of length `indim`.\n * `class_weights`: The weights of each class.\n * `Sb`: The between class scatter matrix.\n * `Sw`: The within class scatter matrix.\n * `nclasses`: The number of classes directly observed in the training data (which can be less than the total number of classes in the class pool)\n\n# Examples\n\n```\nusing MLJ\n\nBayesianLDA = @load BayesianLDA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = BayesianLDA()\nmach = machine(model, X, y) |> fit!\n\nXproj = transform(mach, X)\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n```\n\nSee also [`LDA`](@ref), [`SubspaceLDA`](@ref), [`BayesianSubspaceLDA`](@ref)\n" ":name" = "BayesianLDA" -":human_name" = "bayesian lda" +":human_name" = "Bayesian LDA model" ":is_supervised" = "`true`" ":prediction_type" = ":probabilistic" ":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":clean!", ":fitted_params", ":predict", ":fit", ":transform"] -":hyperparameters" = "`(:method, :cov_w, :cov_b, :out_dim, :regcoef, :priors)`" +":hyperparameters" = "`(:method, :cov_w, :cov_b, :outdim, :regcoef, :priors)`" ":hyperparameter_types" = "`(\"Symbol\", \"StatsBase.CovarianceEstimator\", \"StatsBase.CovarianceEstimator\", \"Int64\", \"Float64\", \"Union{Nothing, Vector{Float64}}\")`" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [MultivariateStats.PCA] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -3793,19 +4199,21 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = " Principal component analysis. Learns a linear transformation to\nproject the data on a lower dimensional space while preserving most of the initial\nvariance.\n" +":docstring" = "```\nPCA\n```\n\nA model type for constructing a pca, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nPCA = @load PCA pkg=unknown\n```\n\nDo `model = PCA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `PCA(maxoutdim=...)`.\n\nPrincipal component analysis learns a linear projection onto a lower dimensional space while preserving most of the initial variance seen in the training data.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `maxoutdim=0`: Together with `variance_ratio`, controls the output dimension `outdim` chosen by the model. Specifically, suppose that `k` is the smallest integer such that retaining the `k` most significant principal components accounts for `variance_ratio` of the total variance in the training data. Then `outdim = min(outdim, maxoutdim)`. If `maxoutdim=0` (default) then the effective `maxoutdim` is `min(n, indim - 1)` where `n` is the number of observations and `indim` the number of features in the training data.\n * `variance_ratio::Float64=0.99`: The ratio of variance preserved after the transformation\n * `method=:auto`: The method to use to solve the problem. Choices are\n\n * `:svd`: Support Vector Decomposition of the matrix.\n * `:cov`: Covariance matrix decomposition.\n * `:auto`: Use `:cov` if the matrices first dimension is smaller than its second dimension and otherwise use `:svd`\n * `mean=nothing`: if `nothing`, centering will be computed and applied, if set to `0` no centering (data is assumed pre-centered); if a vector is passed, the centering is done with that vector.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `inverse_transform(mach, Xsmall)`: For a dimension-reduced table `Xsmall`, such as returned by `transform`, reconstruct a table, having same the number of columns as the original training data `X`, that transforms to `Xsmall`. Mathematically, `inverse_transform` is a right-inverse for the PCA projection map, whose image is orthogonal to the kernel of that map. In particular, if `Xsmall = transform(mach, Xnew)`, then `inverse_transform(Xsmall)` is only an approximation to `Xnew`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: Returns the projection matrix, which has size `(indim, outdim)`, where `indim` and `outdim` are the number of features of the input and output respectively.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim = min(n, indim, maxoutdim)` is the output dimension; here `n` is the number of observations.\n * `tprincipalvar`: Total variance of the principal components.\n * `tresidualvar`: Total residual variance.\n * `tvar`: Total observation variance (principal + residual variance).\n * `mean`: The mean of the untransformed training data, of length `indim`.\n * `principalvars`: The variance of the principal components.\n\n# Examples\n\n```\nusing MLJ\n\nPCA = @load PCA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = PCA(maxoutdim=2)\nmach = machine(model, X) |> fit!\n\nXproj = transform(mach, X)\n```\n\nSee also [`KernelPCA`](@ref), [`ICA`](@ref), [`FactorAnalysis`](@ref), [`PPCA`](@ref)\n" ":name" = "PCA" ":human_name" = "pca" ":is_supervised" = "`false`" ":prediction_type" = ":unknown" ":abstract_type" = "`MLJModelInterface.Unsupervised`" ":implemented_methods" = [":clean!", ":fitted_params", ":inverse_transform", ":fit", ":transform"] -":hyperparameters" = "`(:maxoutdim, :method, :pratio, :mean)`" +":hyperparameters" = "`(:maxoutdim, :method, :variance_ratio, :mean)`" ":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Float64\", \"Union{Nothing, Real, Vector{Float64}}\")`" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [DecisionTree.AdaBoostStumpClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}`" @@ -3825,19 +4233,21 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nAdaBoostStumpClassifier\n```\n\nA model type for constructing a Ada-boosted stump classifier, based on [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nAdaBoostStumpClassifier = @load AdaBoostStumpClassifier pkg=DecisionTree\n```\n\nDo `model = AdaBoostStumpClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `AdaBoostStumpClassifier(n_iter=...)`.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere:\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `n_iter=10`: number of iterations of AdaBoost\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic, but uncalibrated.\n * `predict_mode(mach, Xnew)`: instead return the mode of each prediction above.\n\n# Fitted Parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `stumps`: the `Ensemble` object returned by the core DecisionTree.jl algorithm.\n * `coefficients`: the stump coefficients (one per stump)\n\n```\nusing MLJ\nBooster = @load AdaBoostStumpClassifier pkg=DecisionTree\nbooster = Booster(n_iter=15)\n\nX, y = @load_iris\nmach = machine(booster, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\nyhat = predict(mach, Xnew) # probabilistic predictions\npredict_mode(mach, Xnew) # point predictions\npdf.(yhat, \"virginica\") # probabilities for the \"verginica\" class\n\nfitted_params(mach).stumps # raw `Ensemble` object from DecisionTree.jl\nfitted_params(mach).coefs # coefficient associated with each stump\n```\n\nSee also [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl) and the unwrapped model type [`MLJDecisionTreeInterface.DecisionTree.AdaBoostStumpClassifier`](@ref).\n" +":docstring" = "```\nAdaBoostStumpClassifier\n```\n\nA model type for constructing a Ada-boosted stump classifier, based on [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nAdaBoostStumpClassifier = @load AdaBoostStumpClassifier pkg=DecisionTree\n```\n\nDo `model = AdaBoostStumpClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `AdaBoostStumpClassifier(n_iter=...)`.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere:\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `n_iter=10`: number of iterations of AdaBoost\n * `feature_importance`: method to use for computing feature importances. One of `(:impurity, :split)`\n * `rng=Random.GLOBAL_RNG`: random number generator or seed\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic, but uncalibrated.\n * `predict_mode(mach, Xnew)`: instead return the mode of each prediction above.\n\n# Fitted Parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `stumps`: the `Ensemble` object returned by the core DecisionTree.jl algorithm.\n * `coefficients`: the stump coefficients (one per stump)\n\n# Report\n\n * `features`: the names of the features encountered in training\n\n```\nusing MLJ\nBooster = @load AdaBoostStumpClassifier pkg=DecisionTree\nbooster = Booster(n_iter=15)\n\nX, y = @load_iris\nmach = machine(booster, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\nyhat = predict(mach, Xnew) # probabilistic predictions\npredict_mode(mach, Xnew) # point predictions\npdf.(yhat, \"virginica\") # probabilities for the \"verginica\" class\n\nfitted_params(mach).stumps # raw `Ensemble` object from DecisionTree.jl\nfitted_params(mach).coefs # coefficient associated with each stump\n```\n\nSee also [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl) and the unwrapped model type [`MLJDecisionTreeInterface.DecisionTree.AdaBoostStumpClassifier`](@ref).\n" ":name" = "AdaBoostStumpClassifier" ":human_name" = "Ada-boosted stump classifier" ":is_supervised" = "`true`" ":prediction_type" = ":probabilistic" ":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":clean!", ":fitted_params", ":predict", ":fit"] -":hyperparameters" = "`(:n_iter,)`" -":hyperparameter_types" = "`(\"Int64\",)`" -":hyperparameter_ranges" = "`(nothing,)`" +":implemented_methods" = [":clean!", ":fitted_params", ":predict", ":fit", ":feature_importances"] +":hyperparameters" = "`(:n_iter, :feature_importance, :rng)`" +":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Union{Integer, Random.AbstractRNG}\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [DecisionTree.DecisionTreeRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}`" @@ -3857,19 +4267,21 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nDecisionTreeRegressor\n```\n\nA model type for constructing a CART decision tree regressor, based on [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nDecisionTreeRegressor = @load DecisionTreeRegressor pkg=DecisionTree\n```\n\nDo `model = DecisionTreeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `DecisionTreeRegressor(max_depth=...)`.\n\n`DecisionTreeRegressor` implements the [CART algorithm](https://en.wikipedia.org/wiki/Decision_tree_learning), originally published in Breiman, Leo; Friedman, J. H.; Olshen, R. A.; Stone, C. J. (1984): \"Classification and regression trees\". *Monterey, CA: Wadsworth & Brooks/Cole Advanced Books & Software.*.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `max_depth=-1`: max depth of the decision tree (-1=any)\n * `min_samples_leaf=1`: max number of samples each leaf needs to have\n * `min_samples_split=2`: min number of samples needed for a split\n * `min_purity_increase=0`: min purity needed for a split\n * `n_subfeatures=0`: number of features to select at random (0 for all, -1 for square root of number of features)\n * `post_prune=false`: set to `true` for post-fit pruning\n * `merge_purity_threshold=1.0`: (post-pruning) merge leaves having combined purity `>= merge_purity_threshold`\n * `rng=Random.GLOBAL_RNG`: random number generator or seed\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `tree`: the tree or stump object returned by the core DecisionTree.jl algorithm\n\n# Examples\n\n```\nusing MLJ\nTree = @load DecisionTreeRegressor pkg=DecisionTree\ntree = Tree(max_depth=4, min_samples_split=3)\n\nX, y = make_regression(100, 2) # synthetic data\nmach = machine(tree, X, y) |> fit!\n\nXnew, _ = make_regression(3, 2)\nyhat = predict(mach, Xnew) # new predictions\n\nfitted_params(mach).tree # raw tree or stump object from DecisionTree.jl\n```\n\nSee also [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl) and the unwrapped model type [`MLJDecisionTreeInterface.DecisionTree.DecisionTreeRegressor`](@ref).\n" +":docstring" = "```\nDecisionTreeRegressor\n```\n\nA model type for constructing a CART decision tree regressor, based on [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nDecisionTreeRegressor = @load DecisionTreeRegressor pkg=DecisionTree\n```\n\nDo `model = DecisionTreeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `DecisionTreeRegressor(max_depth=...)`.\n\n`DecisionTreeRegressor` implements the [CART algorithm](https://en.wikipedia.org/wiki/Decision_tree_learning), originally published in Breiman, Leo; Friedman, J. H.; Olshen, R. A.; Stone, C. J. (1984): \"Classification and regression trees\". *Monterey, CA: Wadsworth & Brooks/Cole Advanced Books & Software.*.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `max_depth=-1`: max depth of the decision tree (-1=any)\n * `min_samples_leaf=1`: max number of samples each leaf needs to have\n * `min_samples_split=2`: min number of samples needed for a split\n * `min_purity_increase=0`: min purity needed for a split\n * `n_subfeatures=0`: number of features to select at random (0 for all, -1 for square root of number of features)\n * `post_prune=false`: set to `true` for post-fit pruning\n * `merge_purity_threshold=1.0`: (post-pruning) merge leaves having combined purity `>= merge_purity_threshold`\n * `feature_importance`: method to use for computing feature importances. One of `(:impurity, :split)`\n * `rng=Random.GLOBAL_RNG`: random number generator or seed\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `tree`: the tree or stump object returned by the core DecisionTree.jl algorithm\n\n# Report\n\n * `features`: the names of the features encountered in training\n\n# Examples\n\n```\nusing MLJ\nTree = @load DecisionTreeRegressor pkg=DecisionTree\ntree = Tree(max_depth=4, min_samples_split=3)\n\nX, y = make_regression(100, 2) # synthetic data\nmach = machine(tree, X, y) |> fit!\n\nXnew, _ = make_regression(3, 2)\nyhat = predict(mach, Xnew) # new predictions\n\nfitted_params(mach).tree # raw tree or stump object from DecisionTree.jl\n```\n\nSee also [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl) and the unwrapped model type [`MLJDecisionTreeInterface.DecisionTree.DecisionTreeRegressor`](@ref).\n" ":name" = "DecisionTreeRegressor" ":human_name" = "CART decision tree regressor" ":is_supervised" = "`true`" ":prediction_type" = ":deterministic" ":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fitted_params", ":predict", ":fit"] -":hyperparameters" = "`(:max_depth, :min_samples_leaf, :min_samples_split, :min_purity_increase, :n_subfeatures, :post_prune, :merge_purity_threshold, :rng)`" -":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Bool\", \"Float64\", \"Union{Integer, Random.AbstractRNG}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":implemented_methods" = [":clean!", ":fitted_params", ":predict", ":fit", ":feature_importances"] +":hyperparameters" = "`(:max_depth, :min_samples_leaf, :min_samples_split, :min_purity_increase, :n_subfeatures, :post_prune, :merge_purity_threshold, :feature_importance, :rng)`" +":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Bool\", \"Float64\", \"Symbol\", \"Union{Integer, Random.AbstractRNG}\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [DecisionTree.DecisionTreeClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}`" @@ -3889,19 +4301,21 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nDecisionTreeClassifier\n```\n\nA model type for constructing a CART decision tree classifier, based on [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nDecisionTreeClassifier = @load DecisionTreeClassifier pkg=DecisionTree\n```\n\nDo `model = DecisionTreeClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `DecisionTreeClassifier(max_depth=...)`.\n\n`DecisionTreeClassifier` implements the [CART algorithm](https://en.wikipedia.org/wiki/Decision_tree_learning), originally published in Breiman, Leo; Friedman, J. H.; Olshen, R. A.; Stone, C. J. (1984): \"Classification and regression trees\". *Monterey, CA: Wadsworth & Brooks/Cole Advanced Books & Software.*.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `max_depth=-1`: max depth of the decision tree (-1=any)\n * `min_samples_leaf=1`: max number of samples each leaf needs to have\n * `min_samples_split=2`: min number of samples needed for a split\n * `min_purity_increase=0`: min purity needed for a split\n * `n_subfeatures=0`: number of features to select at random (0 for all, -1 for square root of number of features)\n * `post_prune=false`: set to `true` for post-fit pruning\n * `merge_purity_threshold=1.0`: (post-pruning) merge leaves having combined purity `>= merge_purity_threshold`\n * `display_depth=5`: max depth to show when displaying the tree\n * `rng=Random.GLOBAL_RNG`: random number generator or seed\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic, but uncalibrated.\n * `predict_mode(mach, Xnew)`: instead return the mode of each prediction above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `tree`: the tree or stump object returned by the core DecisionTree.jl algorithm\n * `encoding`: dictionary of target classes keyed on integers used internally by DecisionTree.jl; needed to interpret pretty printing of tree (obtained by calling `fit!(mach, verbosity=2)` or from report - see below)\n * `features`: the names of the features encountered in training, in an order consistent with the output of `print_tree` (see below)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `classes_seen`: list of target classes actually observed in training\n * `print_tree`: method to print a pretty representation of the fitted tree, with single argument the tree depth; interpretation requires internal integer-class encoding (see \"Fitted parameters\" above).\n * `features`: the names of the features encountered in training, in an order consistent with the output of `print_tree` (see below)\n\n# Examples\n\n```\nusing MLJ\nTree = @load DecisionTreeClassifier pkg=DecisionTree\ntree = Tree(max_depth=4, min_samples_split=3)\n\nX, y = @load_iris\nmach = machine(tree, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\nyhat = predict(mach, Xnew) # probabilistic predictions\npredict_mode(mach, Xnew) # point predictions\npdf.(yhat, \"virginica\") # probabilities for the \"verginica\" class\n\nfitted_params(mach).tree # raw tree or stump object from DecisionTrees.jl\n\njulia> report(mach).print_tree(3)\nFeature 4, Threshold 0.8\nL-> 1 : 50/50\nR-> Feature 4, Threshold 1.75\n L-> Feature 3, Threshold 4.95\n L->\n R->\n R-> Feature 3, Threshold 4.85\n L->\n R-> 3 : 43/43\n```\n\nTo interpret the internal class labelling:\n\n```\njulia> fitted_params(mach).encoding\nDict{CategoricalArrays.CategoricalValue{String, UInt32}, UInt32} with 3 entries:\n \"virginica\" => 0x00000003\n \"setosa\" => 0x00000001\n \"versicolor\" => 0x00000002\n```\n\nSee also [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl) and the unwrapped model type [`MLJDecisionTreeInterface.DecisionTree.DecisionTreeClassifier`](@ref).\n" +":docstring" = "```\nDecisionTreeClassifier\n```\n\nA model type for constructing a CART decision tree classifier, based on [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nDecisionTreeClassifier = @load DecisionTreeClassifier pkg=DecisionTree\n```\n\nDo `model = DecisionTreeClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `DecisionTreeClassifier(max_depth=...)`.\n\n`DecisionTreeClassifier` implements the [CART algorithm](https://en.wikipedia.org/wiki/Decision_tree_learning), originally published in Breiman, Leo; Friedman, J. H.; Olshen, R. A.; Stone, C. J. (1984): \"Classification and regression trees\". *Monterey, CA: Wadsworth & Brooks/Cole Advanced Books & Software.*.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `max_depth=-1`: max depth of the decision tree (-1=any)\n * `min_samples_leaf=1`: max number of samples each leaf needs to have\n * `min_samples_split=2`: min number of samples needed for a split\n * `min_purity_increase=0`: min purity needed for a split\n * `n_subfeatures=0`: number of features to select at random (0 for all, -1 for square root of number of features)\n * `post_prune=false`: set to `true` for post-fit pruning\n * `merge_purity_threshold=1.0`: (post-pruning) merge leaves having combined purity `>= merge_purity_threshold`\n * `display_depth=5`: max depth to show when displaying the tree\n * `feature_importance`: method to use for computing feature importances. One of `(:impurity, :split)`\n * `rng=Random.GLOBAL_RNG`: random number generator or seed\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic, but uncalibrated.\n * `predict_mode(mach, Xnew)`: instead return the mode of each prediction above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `tree`: the tree or stump object returned by the core DecisionTree.jl algorithm\n * `encoding`: dictionary of target classes keyed on integers used internally by DecisionTree.jl; needed to interpret pretty printing of tree (obtained by calling `fit!(mach, verbosity=2)` or from report - see below)\n * `features`: the names of the features encountered in training, in an order consistent with the output of `print_tree` (see below)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `classes_seen`: list of target classes actually observed in training\n * `print_tree`: method to print a pretty representation of the fitted tree, with single argument the tree depth; interpretation requires internal integer-class encoding (see \"Fitted parameters\" above).\n * `features`: the names of the features encountered in training, in an order consistent with the output of `print_tree` (see below)\n\n# Examples\n\n```\nusing MLJ\nTree = @load DecisionTreeClassifier pkg=DecisionTree\ntree = Tree(max_depth=4, min_samples_split=3)\n\nX, y = @load_iris\nmach = machine(tree, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\nyhat = predict(mach, Xnew) # probabilistic predictions\npredict_mode(mach, Xnew) # point predictions\npdf.(yhat, \"virginica\") # probabilities for the \"verginica\" class\n\nfitted_params(mach).tree # raw tree or stump object from DecisionTrees.jl\n\njulia> report(mach).print_tree(3)\nFeature 4, Threshold 0.8\nL-> 1 : 50/50\nR-> Feature 4, Threshold 1.75\n L-> Feature 3, Threshold 4.95\n L->\n R->\n R-> Feature 3, Threshold 4.85\n L->\n R-> 3 : 43/43\n```\n\nTo interpret the internal class labelling:\n\n```\njulia> fitted_params(mach).encoding\nDict{CategoricalArrays.CategoricalValue{String, UInt32}, UInt32} with 3 entries:\n \"virginica\" => 0x00000003\n \"setosa\" => 0x00000001\n \"versicolor\" => 0x00000002\n```\n\nSee also [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl) and the unwrapped model type [`MLJDecisionTreeInterface.DecisionTree.DecisionTreeClassifier`](@ref).\n" ":name" = "DecisionTreeClassifier" ":human_name" = "CART decision tree classifier" ":is_supervised" = "`true`" ":prediction_type" = ":probabilistic" ":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":clean!", ":fitted_params", ":predict", ":fit"] -":hyperparameters" = "`(:max_depth, :min_samples_leaf, :min_samples_split, :min_purity_increase, :n_subfeatures, :post_prune, :merge_purity_threshold, :display_depth, :rng)`" -":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Bool\", \"Float64\", \"Int64\", \"Union{Integer, Random.AbstractRNG}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":implemented_methods" = [":clean!", ":fitted_params", ":predict", ":fit", ":feature_importances"] +":hyperparameters" = "`(:max_depth, :min_samples_leaf, :min_samples_split, :min_purity_increase, :n_subfeatures, :post_prune, :merge_purity_threshold, :display_depth, :feature_importance, :rng)`" +":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Bool\", \"Float64\", \"Int64\", \"Symbol\", \"Union{Integer, Random.AbstractRNG}\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [DecisionTree.RandomForestRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}`" @@ -3921,19 +4335,21 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nRandomForestRegressor\n```\n\nA model type for constructing a CART random forest regressor, based on [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nRandomForestRegressor = @load RandomForestRegressor pkg=DecisionTree\n```\n\nDo `model = RandomForestRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `RandomForestRegressor(max_depth=...)`.\n\n`DecisionTreeRegressor` implements the standard [Random Forest algorithm](https://en.wikipedia.org/wiki/Random_forest), originally published in Breiman, L. (2001): \"Random Forests.\", *Machine Learning*, vol. 45, pp. 5–32\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `max_depth=-1`: max depth of the decision tree (-1=any)\n * `min_samples_leaf=1`: min number of samples each leaf needs to have\n * `min_samples_split=2`: min number of samples needed for a split\n * `min_purity_increase=0`: min purity needed for a split\n * `n_subfeatures=-1`: number of features to select at random (0 for all, -1 for square root of number of features)\n * `n_trees=10`: number of trees to train\n * `sampling_fraction=0.7` fraction of samples to train each tree on\n * `rng=Random.GLOBAL_RNG`: random number generator or seed\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `forest`: the `Ensemble` object returned by the core DecisionTree.jl algorithm\n\n# Examples\n\n```\nusing MLJ\nForest = @load RandomForestRegressor pkg=DecisionTree\nforest = Forest(max_depth=4, min_samples_split=3)\n\nX, y = make_regression(100, 2) # synthetic data\nmach = machine(forest, X, y) |> fit!\n\nXnew, _ = make_regression(3, 2)\nyhat = predict(mach, Xnew) # new predictions\n\nfitted_params(mach).forest # raw `Ensemble` object from DecisionTree.jl\n```\n\nSee also [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl) and the unwrapped model type [`MLJDecisionTreeInterface.DecisionTree.RandomForestRegressor`](@ref).\n" +":docstring" = "```\nRandomForestRegressor\n```\n\nA model type for constructing a CART random forest regressor, based on [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nRandomForestRegressor = @load RandomForestRegressor pkg=DecisionTree\n```\n\nDo `model = RandomForestRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `RandomForestRegressor(max_depth=...)`.\n\n`DecisionTreeRegressor` implements the standard [Random Forest algorithm](https://en.wikipedia.org/wiki/Random_forest), originally published in Breiman, L. (2001): \"Random Forests.\", *Machine Learning*, vol. 45, pp. 5–32\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `max_depth=-1`: max depth of the decision tree (-1=any)\n * `min_samples_leaf=1`: min number of samples each leaf needs to have\n * `min_samples_split=2`: min number of samples needed for a split\n * `min_purity_increase=0`: min purity needed for a split\n * `n_subfeatures=-1`: number of features to select at random (0 for all, -1 for square root of number of features)\n * `n_trees=10`: number of trees to train\n * `sampling_fraction=0.7` fraction of samples to train each tree on\n * `feature_importance`: method to use for computing feature importances. One of `(:impurity, :split)`\n * `rng=Random.GLOBAL_RNG`: random number generator or seed\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `forest`: the `Ensemble` object returned by the core DecisionTree.jl algorithm\n\n# Report\n\n * `features`: the names of the features encountered in training\n\n# Examples\n\n```\nusing MLJ\nForest = @load RandomForestRegressor pkg=DecisionTree\nforest = Forest(max_depth=4, min_samples_split=3)\n\nX, y = make_regression(100, 2) # synthetic data\nmach = machine(forest, X, y) |> fit!\n\nXnew, _ = make_regression(3, 2)\nyhat = predict(mach, Xnew) # new predictions\n\nfitted_params(mach).forest # raw `Ensemble` object from DecisionTree.jl\n```\n\nSee also [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl) and the unwrapped model type [`MLJDecisionTreeInterface.DecisionTree.RandomForestRegressor`](@ref).\n" ":name" = "RandomForestRegressor" ":human_name" = "CART random forest regressor" ":is_supervised" = "`true`" ":prediction_type" = ":deterministic" ":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fitted_params", ":predict", ":fit"] -":hyperparameters" = "`(:max_depth, :min_samples_leaf, :min_samples_split, :min_purity_increase, :n_subfeatures, :n_trees, :sampling_fraction, :rng)`" -":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Union{Integer, Random.AbstractRNG}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":implemented_methods" = [":clean!", ":fitted_params", ":predict", ":fit", ":feature_importances"] +":hyperparameters" = "`(:max_depth, :min_samples_leaf, :min_samples_split, :min_purity_increase, :n_subfeatures, :n_trees, :sampling_fraction, :feature_importance, :rng)`" +":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Symbol\", \"Union{Integer, Random.AbstractRNG}\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [DecisionTree.RandomForestClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}`" @@ -3953,19 +4369,21 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nRandomForestClassifier\n```\n\nA model type for constructing a CART random forest classifier, based on [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nRandomForestClassifier = @load RandomForestClassifier pkg=DecisionTree\n```\n\nDo `model = RandomForestClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `RandomForestClassifier(max_depth=...)`.\n\n`RandomForestClassifier` implements the standard [Random Forest algorithm](https://en.wikipedia.org/wiki/Random_forest), originally published in Breiman, L. (2001): \"Random Forests.\", *Machine Learning*, vol. 45, pp. 5–32.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `max_depth=-1`: max depth of the decision tree (-1=any)\n * `min_samples_leaf=1`: min number of samples each leaf needs to have\n * `min_samples_split=2`: min number of samples needed for a split\n * `min_purity_increase=0`: min purity needed for a split\n * `n_subfeatures=-1`: number of features to select at random (0 for all, -1 for square root of number of features)\n * `n_trees=10`: number of trees to train\n * `sampling_fraction=0.7` fraction of samples to train each tree on\n * `rng=Random.GLOBAL_RNG`: random number generator or seed\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic, but uncalibrated.\n * `predict_mode(mach, Xnew)`: instead return the mode of each prediction above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `forest`: the `Ensemble` object returned by the core DecisionTree.jl algorithm\n\n# Examples\n\n```\nusing MLJ\nForest = @load RandomForestClassifier pkg=DecisionTree\nforest = Forest(min_samples_split=6, n_subfeatures=3)\n\nX, y = @load_iris\nmach = machine(forest, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\nyhat = predict(mach, Xnew) # probabilistic predictions\npredict_mode(mach, Xnew) # point predictions\npdf.(yhat, \"virginica\") # probabilities for the \"verginica\" class\n\nfitted_params(mach).forest # raw `Ensemble` object from DecisionTrees.jl\n```\n\nSee also [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl) and the unwrapped model type [`MLJDecisionTreeInterface.DecisionTree.RandomForestClassifier`](@ref).\n" +":docstring" = "```\nRandomForestClassifier\n```\n\nA model type for constructing a CART random forest classifier, based on [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nRandomForestClassifier = @load RandomForestClassifier pkg=DecisionTree\n```\n\nDo `model = RandomForestClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `RandomForestClassifier(max_depth=...)`.\n\n`RandomForestClassifier` implements the standard [Random Forest algorithm](https://en.wikipedia.org/wiki/Random_forest), originally published in Breiman, L. (2001): \"Random Forests.\", *Machine Learning*, vol. 45, pp. 5–32.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `max_depth=-1`: max depth of the decision tree (-1=any)\n * `min_samples_leaf=1`: min number of samples each leaf needs to have\n * `min_samples_split=2`: min number of samples needed for a split\n * `min_purity_increase=0`: min purity needed for a split\n * `n_subfeatures=-1`: number of features to select at random (0 for all, -1 for square root of number of features)\n * `n_trees=10`: number of trees to train\n * `sampling_fraction=0.7` fraction of samples to train each tree on\n * `feature_importance`: method to use for computing feature importances. One of `(:impurity, :split)`\n * `rng=Random.GLOBAL_RNG`: random number generator or seed\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic, but uncalibrated.\n * `predict_mode(mach, Xnew)`: instead return the mode of each prediction above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `forest`: the `Ensemble` object returned by the core DecisionTree.jl algorithm\n\n# Report\n\n * `features`: the names of the features encountered in training\n\n# Examples\n\n```\nusing MLJ\nForest = @load RandomForestClassifier pkg=DecisionTree\nforest = Forest(min_samples_split=6, n_subfeatures=3)\n\nX, y = @load_iris\nmach = machine(forest, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\nyhat = predict(mach, Xnew) # probabilistic predictions\npredict_mode(mach, Xnew) # point predictions\npdf.(yhat, \"virginica\") # probabilities for the \"verginica\" class\n\nfitted_params(mach).forest # raw `Ensemble` object from DecisionTrees.jl\n\nfeature_importances(mach) # `:impurity` feature importances\nforest.feature_importance = :split\nfeature_importance(mach) # `:split` feature importances\n\n```\n\nSee also [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl) and the unwrapped model type [`MLJDecisionTreeInterface.DecisionTree.RandomForestClassifier`](@ref).\n" ":name" = "RandomForestClassifier" ":human_name" = "CART random forest classifier" ":is_supervised" = "`true`" ":prediction_type" = ":probabilistic" ":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":clean!", ":fitted_params", ":predict", ":fit"] -":hyperparameters" = "`(:max_depth, :min_samples_leaf, :min_samples_split, :min_purity_increase, :n_subfeatures, :n_trees, :sampling_fraction, :rng)`" -":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Union{Integer, Random.AbstractRNG}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":implemented_methods" = [":clean!", ":fitted_params", ":predict", ":fit", ":feature_importances"] +":hyperparameters" = "`(:max_depth, :min_samples_leaf, :min_samples_split, :min_purity_increase, :n_subfeatures, :n_trees, :sampling_fraction, :feature_importance, :rng)`" +":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Symbol\", \"Union{Integer, Random.AbstractRNG}\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`true`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [Clustering.KMeans] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -3985,9 +4403,9 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "K-Means algorithm: find K centroids corresponding to K clusters in the data. \n" +":docstring" = "```\nKMeans\n```\n\nA model type for constructing a K-means clusterer, based on [Clustering.jl](https://github.com/JuliaStats/Clustering.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nKMeans = @load KMeans pkg=Clustering\n```\n\nDo `model = KMeans()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `KMeans(k=...)`.\n\n[K-means](http://en.wikipedia.org/wiki/K_means) is a classical method for clustering or vector quantization. It produces a fixed number of clusters, each associated with a *center* (also known as a *prototype*), and each data point is assigned to a cluster with the nearest center.\n\nFrom a mathematical standpoint, K-means is a coordinate descent algorithm that solves the following optimization problem:\n\n$$\n\\text{minimize} \\ \\sum_{i=1}^n \\| \\mathbf{x}_i - \\boldsymbol{\\mu}_{z_i} \\|^2 \\ \\text{w.r.t.} \\ (\\boldsymbol{\\mu}, z)\n$$\n\nHere, $\\boldsymbol{\\mu}_k$ is the center of the $k$-th cluster, and $z_i$ is an index of the cluster for $i$-th point $\\mathbf{x}_i$.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `k=3`: The number of centroids to use in clustering.\n * `metric::SemiMetric=Distances.SqEuclidean`: The metric used to calculate the clustering. Must have type `PreMetric` from Distances.jl.\n\n# Operations\n\n * `predict(mach, Xnew)`: return cluster label assignments, given new features `Xnew` having the same Scitype as `X` above.\n * `transform(mach, Xnew)`: instead return the mean pairwise distances from new samples to the cluster centers.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `centers`: The coordinates of the cluster centers.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `assignments`: The cluster assignments of each point in the training data.\n * `cluster_labels`: The labels assigned to each cluster.\n\n# Examples\n\n```\nusing MLJ\nKMeans = @load KMeans pkg=Clustering\n\ntable = load_iris()\ny, X = unpack(table, ==(:target), rng=123)\nmodel = KMeans(k=3)\nmach = machine(model, X) |> fit!\n\nyhat = predict(mach, X)\n@assert yhat == report(mach).assignments\n\ncompare = zip(yhat, y) |> collect;\ncompare[1:8] # clusters align with classes\n\ncenter_dists = transform(mach, fitted_params(mach).centers')\n\n@assert center_dists[1][1] == 0.0\n@assert center_dists[2][2] == 0.0\n@assert center_dists[3][3] == 0.0\n```\n\nSee also [`KMedoids`](@ref)\n" ":name" = "KMeans" -":human_name" = "k means" +":human_name" = "K-means clusterer" ":is_supervised" = "`false`" ":prediction_type" = ":unknown" ":abstract_type" = "`MLJModelInterface.Unsupervised`" @@ -3997,7 +4415,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [Clustering.KMedoids] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -4017,9 +4437,9 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "K-Medoids algorithm: find K centroids corresponding to K clusters in the data.\nUnlike K-Means, the centroids are found among data points themselves.\n" +":docstring" = "```\nKMedoids\n```\n\nA model type for constructing a K-medoids clusterer, based on [Clustering.jl](https://github.com/JuliaStats/Clustering.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nKMedoids = @load KMedoids pkg=Clustering\n```\n\nDo `model = KMedoids()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `KMedoids(k=...)`.\n\n[K-medoids](http://en.wikipedia.org/wiki/K-medoids) is a clustering algorithm that works by finding $k$ data points (called *medoids*) such that the total distance between each data point and the closest *medoid* is minimal.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `k=3`: The number of centroids to use in clustering.\n * `metric::SemiMetric=Distances.SqEuclidean`: The metric used to calculate the clustering. Must have type `PreMetric` from Distances.jl.\n\n# Operations\n\n * `predict(mach, Xnew)`: return cluster label assignments, given new features `Xnew` having the same Scitype as `X` above.\n * `transform(mach, Xnew)`: instead return the mean pairwise distances from new samples to the cluster centers.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `medoids`: The coordinates of the cluster medoids.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `assignments`: The cluster assignments of each point in the training data.\n * `cluster_labels`: The labels assigned to each cluster.\n\n# Examples\n\n```\nusing MLJ\nKMedoids = @load KMedoids pkg=Clustering\n\ntable = load_iris()\ny, X = unpack(table, ==(:target), rng=123)\nmodel = KMedoids(k=3)\nmach = machine(model, X) |> fit!\n\nyhat = predict(mach, X)\n@assert yhat == report(mach).assignments\n\ncompare = zip(yhat, y) |> collect;\ncompare[1:8] # clusters align with classes\n\ncenter_dists = transform(mach, fitted_params(mach).medoids')\n\n@assert center_dists[1][1] == 0.0\n@assert center_dists[2][2] == 0.0\n@assert center_dists[3][3] == 0.0\n```\n\nSee also [`KMeans`](@ref)\n" ":name" = "KMedoids" -":human_name" = "k medoids" +":human_name" = "K-medoids clusterer" ":is_supervised" = "`false`" ":prediction_type" = ":unknown" ":abstract_type" = "`MLJModelInterface.Unsupervised`" @@ -4029,7 +4449,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [MLJText.TfidfTransformer] ":input_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" @@ -4061,7 +4483,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [MLJText.CountTransformer] ":input_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" @@ -4093,7 +4517,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [MLJText.BM25Transformer] ":input_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" @@ -4125,7 +4551,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [XGBoost.XGBoostCount] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -4157,7 +4585,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [XGBoost.XGBoostRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -4189,7 +4619,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [XGBoost.XGBoostClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -4221,7 +4653,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [LightGBM.LGBMClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -4253,7 +4687,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [LightGBM.LGBMRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -4285,7 +4721,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [OutlierDetectionNetworks.AEDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -4305,7 +4743,7 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nAEDetector(encoder= Chain(),\n decoder = Chain(),\n batchsize= 32,\n epochs = 1,\n shuffle = false,\n partial = true,\n opt = ADAM(),\n loss = mse)\n```\n\nCalculate the anomaly score of an instance based on the reconstruction loss of an autoencoder, see [1] for an explanation of auto encoders.\n\n## Parameters\n\n```\nencoder::Chain\n```\n\nTransforms the input data into a latent state with a fixed shape.\n\n```\ndecoder::Chain\n```\n\nTransforms the latent state back into the shape of the input data.\n\n```\nbatchsize::Integer\n```\n\nThe number of samples to work through before updating the internal model parameters.\n\n```\nepochs::Integer\n```\n\nThe number of passes of the entire training dataset the machine learning algorithm has completed. \n\n```\nshuffle::Bool\n```\n\nIf `shuffle=true`, shuffles the observations each time iterations are re-started, else no shuffling is performed.\n\n```\npartial::Bool\n```\n\nIf `partial=false`, drops the last mini-batch if it is smaller than the batchsize.\n\n```\nopt::Any\n```\n\nAny Flux-compatibale optimizer, typically a `struct` that holds all the optimiser parameters along with a definition of `apply!` that defines how to apply the update rule associated with the optimizer.\n\n```\nloss::Function\n```\n\nThe loss function used to calculate the reconstruction error, see [https://fluxml.ai/Flux.jl/stable/models/losses/](https://fluxml.ai/Flux.jl/stable/models/losses/) for examples.\n\n## Examples\n\n```julia\nusing OutlierDetection: AEDetector, fit, score\ndetector = AEDetector()\nX = rand(10, 100)\nresult = fit(detector, X)\ntest_scores = transform(detector, result.model, X)\n```\n\n## References\n\n[1] Aggarwal, Charu C. (2017): Outlier Analysis.\n" +":docstring" = "```\nAEDetector(encoder= Chain(),\n decoder = Chain(),\n batchsize= 32,\n epochs = 1,\n shuffle = false,\n partial = true,\n opt = Adam(),\n loss = mse)\n```\n\nCalculate the anomaly score of an instance based on the reconstruction loss of an autoencoder, see [1] for an explanation of auto encoders.\n\n## Parameters\n\n```\nencoder::Chain\n```\n\nTransforms the input data into a latent state with a fixed shape.\n\n```\ndecoder::Chain\n```\n\nTransforms the latent state back into the shape of the input data.\n\n```\nbatchsize::Integer\n```\n\nThe number of samples to work through before updating the internal model parameters.\n\n```\nepochs::Integer\n```\n\nThe number of passes of the entire training dataset the machine learning algorithm has completed. \n\n```\nshuffle::Bool\n```\n\nIf `shuffle=true`, shuffles the observations each time iterations are re-started, else no shuffling is performed.\n\n```\npartial::Bool\n```\n\nIf `partial=false`, drops the last mini-batch if it is smaller than the batchsize.\n\n```\nopt::Any\n```\n\nAny Flux-compatibale optimizer, typically a `struct` that holds all the optimiser parameters along with a definition of `apply!` that defines how to apply the update rule associated with the optimizer.\n\n```\nloss::Function\n```\n\nThe loss function used to calculate the reconstruction error, see [https://fluxml.ai/Flux.jl/stable/models/losses/](https://fluxml.ai/Flux.jl/stable/models/losses/) for examples.\n\n## Examples\n\n```julia\nusing OutlierDetection: AEDetector, fit, score\ndetector = AEDetector()\nX = rand(10, 100)\nresult = fit(detector, X)\ntest_scores = transform(detector, result.model, X)\n```\n\n## References\n\n[1] Aggarwal, Charu C. (2017): Outlier Analysis.\n" ":name" = "AEDetector" ":human_name" = "ae detector" ":is_supervised" = "`false`" @@ -4317,7 +4755,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [OutlierDetectionNetworks.DSADDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -4337,7 +4777,7 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nDSADDetector(encoder = Chain(),\n decoder = Chain(),\n batchsize = 32,\n epochs = 1,\n shuffle = true,\n partial = false,\n opt = ADAM(),\n loss = mse,\n eta = 1,\n eps = 1e-6,\n callback = _ -> () -> ())\n```\n\nDeep Semi-Supervised Anomaly detection technique based on the distance to a hypersphere center as described in [1].\n\n## Parameters\n\n```\nencoder::Chain\n```\n\nTransforms the input data into a latent state with a fixed shape.\n\n```\ndecoder::Chain\n```\n\nTransforms the latent state back into the shape of the input data.\n\n```\nbatchsize::Integer\n```\n\nThe number of samples to work through before updating the internal model parameters.\n\n```\nepochs::Integer\n```\n\nThe number of passes of the entire training dataset the machine learning algorithm has completed. \n\n```\nshuffle::Bool\n```\n\nIf `shuffle=true`, shuffles the observations each time iterations are re-started, else no shuffling is performed.\n\n```\npartial::Bool\n```\n\nIf `partial=false`, drops the last mini-batch if it is smaller than the batchsize.\n\n```\nopt::Any\n```\n\nAny Flux-compatibale optimizer, typically a `struct` that holds all the optimiser parameters along with a definition of `apply!` that defines how to apply the update rule associated with the optimizer.\n\n```\nloss::Function\n```\n\nThe loss function used to calculate the reconstruction error, see [https://fluxml.ai/Flux.jl/stable/models/losses/](https://fluxml.ai/Flux.jl/stable/models/losses/) for examples.\n\n```\neta::Real\n```\n\nWeighting parameter for the labeled data; i.e. higher values of eta assign higher weight to labeled data in the svdd loss function. For a sensitivity analysis of this parameter, see [1].\n\n```\neps::Real\n```\n\nBecause the inverse distance used in the svdd loss can lead to division by zero, the parameters `eps` is added for numerical stability.\n\n```\ncallback::Function\n```\n\n*Experimental parameter that might change*. A function to be called after the model parameters have been updated that can call Flux's callback helpers, see [https://fluxml.ai/Flux.jl/stable/utilities/#Callback-Helpers-1](https://fluxml.ai/Flux.jl/stable/utilities/#Callback-Helpers-1).\n\n**Notice:** The parameters `batchsize`, `epochs`, `shuffle`, `partial`, `opt` and `callback` can also be tuples of size 2, specifying the corresponding values for (1) pretraining and (2) training; otherwise the same values are used for pretraining and training.\n\n## Examples\n\n```julia\nusing OutlierDetection: DSADDetector, fit, score\ndetector = DSADDetector()\nX = rand(10, 100)\ny = rand([-1,1], 100)\nmodel = fit(detector, X, y)\ntrain_scores, test_scores = score(detector, model, X)\n```\n\n## References\n\n[1] Ruff, Lukas; Vandermeulen, Robert A.; Görnitz, Nico; Binder, Alexander; Müller, Emmanuel; Müller, Klaus-Robert; Kloft, Marius (2019): Deep Semi-Supervised Anomaly Detection.\n" +":docstring" = "```\nDSADDetector(encoder = Chain(),\n decoder = Chain(),\n batchsize = 32,\n epochs = 1,\n shuffle = true,\n partial = false,\n opt = Adam(),\n loss = mse,\n eta = 1,\n eps = 1e-6,\n callback = _ -> () -> ())\n```\n\nDeep Semi-Supervised Anomaly detection technique based on the distance to a hypersphere center as described in [1].\n\n## Parameters\n\n```\nencoder::Chain\n```\n\nTransforms the input data into a latent state with a fixed shape.\n\n```\ndecoder::Chain\n```\n\nTransforms the latent state back into the shape of the input data.\n\n```\nbatchsize::Integer\n```\n\nThe number of samples to work through before updating the internal model parameters.\n\n```\nepochs::Integer\n```\n\nThe number of passes of the entire training dataset the machine learning algorithm has completed. \n\n```\nshuffle::Bool\n```\n\nIf `shuffle=true`, shuffles the observations each time iterations are re-started, else no shuffling is performed.\n\n```\npartial::Bool\n```\n\nIf `partial=false`, drops the last mini-batch if it is smaller than the batchsize.\n\n```\nopt::Any\n```\n\nAny Flux-compatibale optimizer, typically a `struct` that holds all the optimiser parameters along with a definition of `apply!` that defines how to apply the update rule associated with the optimizer.\n\n```\nloss::Function\n```\n\nThe loss function used to calculate the reconstruction error, see [https://fluxml.ai/Flux.jl/stable/models/losses/](https://fluxml.ai/Flux.jl/stable/models/losses/) for examples.\n\n```\neta::Real\n```\n\nWeighting parameter for the labeled data; i.e. higher values of eta assign higher weight to labeled data in the svdd loss function. For a sensitivity analysis of this parameter, see [1].\n\n```\neps::Real\n```\n\nBecause the inverse distance used in the svdd loss can lead to division by zero, the parameters `eps` is added for numerical stability.\n\n```\ncallback::Function\n```\n\n*Experimental parameter that might change*. A function to be called after the model parameters have been updated that can call Flux's callback helpers, see [https://fluxml.ai/Flux.jl/stable/utilities/#Callback-Helpers-1](https://fluxml.ai/Flux.jl/stable/utilities/#Callback-Helpers-1).\n\n**Notice:** The parameters `batchsize`, `epochs`, `shuffle`, `partial`, `opt` and `callback` can also be tuples of size 2, specifying the corresponding values for (1) pretraining and (2) training; otherwise the same values are used for pretraining and training.\n\n## Examples\n\n```julia\nusing OutlierDetection: DSADDetector, fit, score\ndetector = DSADDetector()\nX = rand(10, 100)\ny = rand([-1,1], 100)\nmodel = fit(detector, X, y)\ntrain_scores, test_scores = score(detector, model, X)\n```\n\n## References\n\n[1] Ruff, Lukas; Vandermeulen, Robert A.; Görnitz, Nico; Binder, Alexander; Müller, Emmanuel; Müller, Klaus-Robert; Kloft, Marius (2019): Deep Semi-Supervised Anomaly Detection.\n" ":name" = "DSADDetector" ":human_name" = "dsad detector" ":is_supervised" = "`true`" @@ -4349,7 +4789,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [OutlierDetectionNetworks.ESADDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -4369,7 +4811,7 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "```\nESADDetector(encoder = Chain(),\n decoder = Chain(),\n batchsize = 32,\n epochs = 1,\n shuffle = false,\n partial = true,\n opt = ADAM(),\n λ1 = 1,\n λ2 = 1,\n noise = identity)\n```\n\nEnd-to-End semi-supervised anomaly detection algorithm similar to DeepSAD, but without the pretraining phase. The algorithm was published by Huang et al., see [1].\n\n## Parameters\n\n```\nencoder::Chain\n```\n\nTransforms the input data into a latent state with a fixed shape.\n\n```\ndecoder::Chain\n```\n\nTransforms the latent state back into the shape of the input data.\n\n```\nbatchsize::Integer\n```\n\nThe number of samples to work through before updating the internal model parameters.\n\n```\nepochs::Integer\n```\n\nThe number of passes of the entire training dataset the machine learning algorithm has completed. \n\n```\nshuffle::Bool\n```\n\nIf `shuffle=true`, shuffles the observations each time iterations are re-started, else no shuffling is performed.\n\n```\npartial::Bool\n```\n\nIf `partial=false`, drops the last mini-batch if it is smaller than the batchsize.\n\n```\nopt::Any\n```\n\nAny Flux-compatibale optimizer, typically a `struct` that holds all the optimiser parameters along with a definition of `apply!` that defines how to apply the update rule associated with the optimizer.\n\n```\nλ1::Real\n```\n\nWeighting parameter of the norm loss, which minimizes the empirical variance and thus minimizes entropy.\n\n```\nλ2::Real\n```\n\nWeighting parameter of the assistent loss function to define the consistency between the two encoders.\n\n```\nnoise::Function (AbstractArray{T} -> AbstractArray{T})\n```\n\nA function to be applied to a batch of input data to add noise, see [1] for an explanation.\n\n## Examples\n\n```julia\nusing OutlierDetection: ESADDetector, fit, score\ndetector = ESADDetector()\nX = rand(10, 100)\ny = rand([-1,1], 100)\nmodel = fit(detector, X, y)\ntrain_scores, test_scores = score(detector, model, X)\n```\n\n## References\n\n[1] Huang, Chaoqin; Ye, Fei; Zhang, Ya; Wang, Yan-Feng; Tian, Qi (2020): ESAD: End-to-end Deep Semi-supervised Anomaly Detection.\n" +":docstring" = "```\nESADDetector(encoder = Chain(),\n decoder = Chain(),\n batchsize = 32,\n epochs = 1,\n shuffle = false,\n partial = true,\n opt = Adam(),\n λ1 = 1,\n λ2 = 1,\n noise = identity)\n```\n\nEnd-to-End semi-supervised anomaly detection algorithm similar to DeepSAD, but without the pretraining phase. The algorithm was published by Huang et al., see [1].\n\n## Parameters\n\n```\nencoder::Chain\n```\n\nTransforms the input data into a latent state with a fixed shape.\n\n```\ndecoder::Chain\n```\n\nTransforms the latent state back into the shape of the input data.\n\n```\nbatchsize::Integer\n```\n\nThe number of samples to work through before updating the internal model parameters.\n\n```\nepochs::Integer\n```\n\nThe number of passes of the entire training dataset the machine learning algorithm has completed. \n\n```\nshuffle::Bool\n```\n\nIf `shuffle=true`, shuffles the observations each time iterations are re-started, else no shuffling is performed.\n\n```\npartial::Bool\n```\n\nIf `partial=false`, drops the last mini-batch if it is smaller than the batchsize.\n\n```\nopt::Any\n```\n\nAny Flux-compatibale optimizer, typically a `struct` that holds all the optimiser parameters along with a definition of `apply!` that defines how to apply the update rule associated with the optimizer.\n\n```\nλ1::Real\n```\n\nWeighting parameter of the norm loss, which minimizes the empirical variance and thus minimizes entropy.\n\n```\nλ2::Real\n```\n\nWeighting parameter of the assistent loss function to define the consistency between the two encoders.\n\n```\nnoise::Function (AbstractArray{T} -> AbstractArray{T})\n```\n\nA function to be applied to a batch of input data to add noise, see [1] for an explanation.\n\n## Examples\n\n```julia\nusing OutlierDetection: ESADDetector, fit, score\ndetector = ESADDetector()\nX = rand(10, 100)\ny = rand([-1,1], 100)\nmodel = fit(detector, X, y)\ntrain_scores, test_scores = score(detector, model, X)\n```\n\n## References\n\n[1] Huang, Chaoqin; Ye, Fei; Zhang, Ya; Wang, Yan-Feng; Tian, Qi (2020): ESAD: End-to-end Deep Semi-supervised Anomaly Detection.\n" ":name" = "ESADDetector" ":human_name" = "esad detector" ":is_supervised" = "`true`" @@ -4381,13 +4823,15 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [EvoTrees.EvoTreeClassifier] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" ":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" @@ -4408,18 +4852,20 @@ ":prediction_type" = ":probabilistic" ":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":predict"] -":hyperparameters" = "`(:loss, :nrounds, :λ, :γ, :η, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :α, :metric, :rng, :device)`" +":hyperparameters" = "`(:loss, :nrounds, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :metric, :rng, :device)`" ":hyperparameter_types" = "`(\"EvoTrees.ModelType\", \"Int64\", \"AbstractFloat\", \"AbstractFloat\", \"AbstractFloat\", \"Int64\", \"AbstractFloat\", \"AbstractFloat\", \"AbstractFloat\", \"Int64\", \"AbstractFloat\", \"Symbol\", \"Any\", \"Any\")`" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = ":nrounds" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [EvoTrees.EvoTreeGaussian] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Continuous}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" @@ -4440,18 +4886,20 @@ ":prediction_type" = ":probabilistic" ":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":predict"] -":hyperparameters" = "`(:loss, :nrounds, :λ, :γ, :η, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :α, :metric, :rng, :device)`" +":hyperparameters" = "`(:loss, :nrounds, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :metric, :rng, :device)`" ":hyperparameter_types" = "`(\"EvoTrees.ModelType\", \"Int64\", \"AbstractFloat\", \"AbstractFloat\", \"AbstractFloat\", \"Int64\", \"AbstractFloat\", \"AbstractFloat\", \"AbstractFloat\", \"Int64\", \"AbstractFloat\", \"Symbol\", \"Any\", \"Any\")`" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = ":nrounds" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [EvoTrees.EvoTreeRegressor] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" @@ -4472,18 +4920,20 @@ ":prediction_type" = ":deterministic" ":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":predict"] -":hyperparameters" = "`(:loss, :nrounds, :λ, :γ, :η, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :α, :metric, :rng, :device)`" +":hyperparameters" = "`(:loss, :nrounds, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :metric, :rng, :device)`" ":hyperparameter_types" = "`(\"EvoTrees.ModelType\", \"Int64\", \"AbstractFloat\", \"AbstractFloat\", \"AbstractFloat\", \"Int64\", \"AbstractFloat\", \"AbstractFloat\", \"AbstractFloat\", \"Int64\", \"AbstractFloat\", \"Symbol\", \"Any\", \"Any\")`" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = ":nrounds" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [EvoTrees.EvoTreeCount] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Count}`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Count}}`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Count}}`" ":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Count}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" @@ -4504,12 +4954,14 @@ ":prediction_type" = ":probabilistic" ":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":predict"] -":hyperparameters" = "`(:loss, :nrounds, :λ, :γ, :η, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :α, :metric, :rng, :device)`" +":hyperparameters" = "`(:loss, :nrounds, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :metric, :rng, :device)`" ":hyperparameter_types" = "`(\"EvoTrees.ModelType\", \"Int64\", \"AbstractFloat\", \"AbstractFloat\", \"AbstractFloat\", \"Int64\", \"AbstractFloat\", \"AbstractFloat\", \"AbstractFloat\", \"Int64\", \"AbstractFloat\", \"Symbol\", \"Any\", \"Any\")`" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = ":nrounds" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [MLJModels.ConstantClassifier] ":input_scitype" = "`ScientificTypesBase.Table`" @@ -4541,7 +4993,9 @@ ":hyperparameter_ranges" = "`()`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [MLJModels.Standardizer] ":input_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -4573,7 +5027,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [MLJModels.DeterministicConstantClassifier] ":input_scitype" = "`ScientificTypesBase.Table`" @@ -4605,7 +5061,9 @@ ":hyperparameter_ranges" = "`()`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [MLJModels.UnivariateTimeTypeToContinuous] ":input_scitype" = "`AbstractVector{<:ScientificTypesBase.ScientificTimeType}`" @@ -4637,7 +5095,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [MLJModels.OneHotEncoder] ":input_scitype" = "`ScientificTypesBase.Table`" @@ -4669,7 +5129,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [MLJModels.ContinuousEncoder] ":input_scitype" = "`ScientificTypesBase.Table`" @@ -4701,7 +5163,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [MLJModels.UnivariateBoxCoxTransformer] ":input_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" @@ -4733,7 +5197,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [MLJModels.ConstantRegressor] ":input_scitype" = "`ScientificTypesBase.Table`" @@ -4765,7 +5231,9 @@ ":hyperparameter_ranges" = "`(nothing,)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [MLJModels.FeatureSelector] ":input_scitype" = "`ScientificTypesBase.Table`" @@ -4797,7 +5265,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [MLJModels.UnivariateDiscretizer] ":input_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" @@ -4829,7 +5299,9 @@ ":hyperparameter_ranges" = "`(nothing,)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [MLJModels.FillImputer] ":input_scitype" = "`ScientificTypesBase.Table`" @@ -4861,7 +5333,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [MLJModels.DeterministicConstantRegressor] ":input_scitype" = "`ScientificTypesBase.Table`" @@ -4893,7 +5367,9 @@ ":hyperparameter_ranges" = "`()`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [MLJModels.UnivariateStandardizer] ":input_scitype" = "`AbstractVector{<:ScientificTypesBase.Infinite}`" @@ -4925,7 +5401,9 @@ ":hyperparameter_ranges" = "`()`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [MLJModels.UnivariateFillImputer] ":input_scitype" = "`Union{AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Count}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}}`" @@ -4957,7 +5435,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [OutlierDetectionPython.MCDDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -4989,7 +5469,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [OutlierDetectionPython.COPODDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -5021,7 +5503,9 @@ ":hyperparameter_ranges" = "`()`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [OutlierDetectionPython.HBOSDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -5053,7 +5537,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [OutlierDetectionPython.IForestDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -5085,7 +5571,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [OutlierDetectionPython.SOSDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -5117,7 +5605,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [OutlierDetectionPython.ABODDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -5149,7 +5639,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [OutlierDetectionPython.LOFDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -5181,7 +5673,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [OutlierDetectionPython.PCADetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -5213,7 +5707,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [OutlierDetectionPython.OCSVMDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -5245,7 +5741,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [OutlierDetectionPython.SODDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -5277,7 +5775,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [OutlierDetectionPython.LODADetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -5309,7 +5809,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [OutlierDetectionPython.KNNDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -5341,7 +5843,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [OutlierDetectionPython.COFDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -5373,7 +5877,9 @@ ":hyperparameter_ranges" = "`(nothing,)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [OutlierDetectionPython.CBLOFDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -5405,7 +5911,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [OutlierDetectionPython.LOCIDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -5437,7 +5945,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [OutlierDetectionPython.LMDDDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -5469,7 +5979,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [OutlierDetectionPython.RODDetector] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" @@ -5501,7 +6013,9 @@ ":hyperparameter_ranges" = "`(nothing,)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [OneRule.OneRuleClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}`" @@ -5533,7 +6047,9 @@ ":hyperparameter_ranges" = "`()`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [LIBSVM.EpsilonSVR] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -5565,7 +6081,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [LIBSVM.LinearSVC] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -5597,7 +6115,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [LIBSVM.NuSVR] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -5629,7 +6149,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [LIBSVM.NuSVC] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -5661,7 +6183,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [LIBSVM.SVC] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -5693,7 +6217,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [LIBSVM.OneClassSVM] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -5725,7 +6251,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [TSVD.TSVDTransformer] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" @@ -5757,7 +6285,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [GLM.LinearBinaryClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -5777,7 +6307,7 @@ ":supports_weights" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "Linear binary classifier with specified link (e.g. logistic)." +":docstring" = "```\nLinearBinaryClassifier\n```\n\nA model type for constructing a linear binary classifier, based on [GLM.jl](https://github.com/JuliaStats/GLM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearBinaryClassifier = @load LinearBinaryClassifier pkg=GLM\n```\n\nDo `model = LinearBinaryClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearBinaryClassifier(fit_intercept=...)`.\n\n`LinearBinaryClassifier` is a [generalized linear model](https://en.wikipedia.org/wiki/Generalized_linear_model#Variance_function), specialised to the case of a binary target variable, with a user-specified link function. Options exist to specify an intercept or offset feature.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nHere\n\n * `X`: is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the scitype with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor(2)` or `<:Multiclass(2)`; check the scitype with `schema(y)`\n * `w`: is a vector of `Real` per-observation weights\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `fit_intercept=true`: Whether to calculate the intercept for this model. If set to false, no intercept will be calculated (e.g. the data is expected to be centered)\n * `link=GLM.LogitLink`: The function which links the linear prediction function to the probability of a particular outcome or class. This must have type `GLM.Link01`. Options include `GLM.LogitLink()`, `GLM.ProbitLink()`, `CloglogLink(),`CauchitLink()`.\n * `offsetcol=nothing`: Name of the column to be used as an offset, if any. An offset is a variable which is known to have a coefficient of 1.\n * `maxiter::Integer=30`: The maximum number of iterations allowed to achieve convergence.\n * `atol::Real=1e-6`: Absolute threshold for convergence. Convergence is achieved when the relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid failure when deviance is unchanged except for rounding errors.\n * `rtol::Real=1e-6`: Relative threshold for convergence. Convergence is achieved when the relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid failure when deviance is unchanged except for rounding errors.\n * `minstepfac::Real=0.001`: Minimum step fraction. Must be between 0 and 1. Lower bound for the factor used to update the linear fit.\n * `report_keys::Union{Symbol, Nothing}=DEFAULT_KEYS`: keys to be used in the report. Should be one of: `:deviance`, `:dof_residual`, `:stderror`, `:vcov`, `:coef_table`.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features`: The names of the features used during model fitting.\n * `coef`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `deviance`: Measure of deviance of fitted model with respect to a perfectly fitted model. For a linear model, this is the weighted residual sum of squares\n * `dof_residual`: The degrees of freedom for residuals, when meaningful.\n * `stderror`: The standard errors of the coefficients.\n * `vcov`: The estimated variance-covariance matrix of the coefficient estimates.\n * `coef_table`: Table which displays coefficients and summarizes their significance and confidence intervals.\n\n# Examples\n\n```\nusing MLJ\nimport GLM # namespace must be available\n\nLinearBinaryClassifier = @load LinearBinaryClassifier pkg=GLM\nclf = LinearBinaryClassifier(fit_intercept=false, link=GLM.ProbitLink())\n\nX, y = @load_crabs\n\nmach = machine(clf, X, y) |> fit!\n\nXnew = (;FL = [8.1, 24.8, 7.2],\n RW = [5.1, 25.7, 6.4],\n CL = [15.9, 46.7, 14.3],\n CW = [18.7, 59.7, 12.2],\n BD = [6.2, 23.6, 8.4],)\n\nyhat = predict(mach, Xnew) # probabilistic predictions\npdf(yhat, levels(y)) # probability matrix\np_B = pdf.(yhat, \"B\")\nclass_labels = predict_mode(mach, Xnew)\n\nfitted_params(mach).features\nfitted_params(mach).coef\nfitted_params(mach).intercept\n\nreport(mach)\n```\n\nSee also [`LinearRegressor`](@ref), [`LinearCountRegressor`](@ref)\n" ":name" = "LinearBinaryClassifier" ":human_name" = "linear binary classifier" ":is_supervised" = "`true`" @@ -5789,7 +6319,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [GLM.LinearCountRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -5809,7 +6341,7 @@ ":supports_weights" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "Linear count regressor with specified link and distribution (e.g. log link and poisson)." +":docstring" = "```\nLinearCountRegressor\n```\n\nA model type for constructing a linear count regressor, based on [GLM.jl](https://github.com/JuliaStats/GLM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearCountRegressor = @load LinearCountRegressor pkg=GLM\n```\n\nDo `model = LinearCountRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearCountRegressor(fit_intercept=...)`.\n\n`LinearCountRegressor` is a [generalized linear model](https://en.wikipedia.org/wiki/Generalized_linear_model#Variance_function), specialised to the case of a `Count` target variable (non-negative, unbounded integer) with user-specified link function. Options exist to specify an intercept or offset feature.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nHere\n\n * `X`: is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the scitype with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `Count`; check the scitype with `schema(y)`\n * `w`: is a vector of `Real` per-observation weights\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `fit_intercept=true`: Whether to calculate the intercept for this model. If set to false, no intercept will be calculated (e.g. the data is expected to be centered)\n * `distribution=Distributions.Poisson()`: The distribution which the residuals/errors of the model should fit.\n * `link=GLM.LogLink()`: The function which links the linear prediction function to the probability of a particular outcome or class. This should be one of the following: `GLM.IdentityLink()`, `GLM.InverseLink()`, `GLM.InverseSquareLink()`, `GLM.LogLink()`, `GLM.SqrtLink()`.\n * `offsetcol=nothing`: Name of the column to be used as an offset, if any. An offset is a variable which is known to have a coefficient of 1.\n * `maxiter::Integer=30`: The maximum number of iterations allowed to achieve convergence.\n * `atol::Real=1e-6`: Absolute threshold for convergence. Convergence is achieved when the relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid failure when deviance is unchanged except for rounding errors.\n * `rtol::Real=1e-6`: Relative threshold for convergence. Convergence is achieved when the relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid failure when deviance is unchanged except for rounding errors.\n * `minstepfac::Real=0.001`: Minimum step fraction. Must be between 0 and 1. Lower bound for the factor used to update the linear fit.\n * `report_keys::Union{Symbol, Nothing}=DEFAULT_KEYS`: keys to be used in the report. Should be one of: `:deviance`, `:dof_residual`, `:stderror`, `:vcov`, `:coef_table`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew` having the same Scitype as `X` above. Predictions are probabilistic.\n * `predict_mean(mach, Xnew)`: instead return the mean of each prediction above\n * `predict_median(mach, Xnew)`: instead return the median of each prediction above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features`: The names of the features encountered during model fitting.\n * `coef`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `deviance`: Measure of deviance of fitted model with respect to a perfectly fitted model. For a linear model, this is the weighted residual sum of squares\n * `dof_residual`: The degrees of freedom for residuals, when meaningful.\n * `stderror`: The standard errors of the coefficients.\n * `vcov`: The estimated variance-covariance matrix of the coefficient estimates.\n * `coef_table`: Table which displays coefficients and summarizes their significance and confidence intervals.\n\n# Examples\n\n```\nusing MLJ\nimport MLJ.Distributions.Poisson\n\n# Generate some data whose target y looks Poisson when conditioned on\n# X:\nN = 10_000\nw = [1.0, -2.0, 3.0]\nmu(x) = exp(w'x) # mean for a log link function\nXmat = rand(N, 3)\nX = MLJ.table(Xmat)\ny = map(1:N) do i\n x = Xmat[i, :]\n rand(Poisson(mu(x)))\nend;\n\nCountRegressor = @load LinearCountRegressor pkg=GLM\nmodel = CountRegressor(fit_intercept=false)\nmach = machine(model, X, y)\nfit!(mach)\n\nXnew = MLJ.table(rand(3, 3))\nyhat = predict(mach, Xnew)\nyhat_point = predict_mean(mach, Xnew)\n\n# get coefficients approximating `w`:\njulia> fitted_params(mach).coef\n3-element Vector{Float64}:\n 0.9969008753103842\n -2.0255901752504775\n 3.014407534033522\n\nreport(mach)\n```\n\nSee also [`LinearRegressor`](@ref), [`LinearBinaryClassifier`](@ref)\n" ":name" = "LinearCountRegressor" ":human_name" = "linear count regressor" ":is_supervised" = "`true`" @@ -5821,7 +6353,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [GLM.LinearRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -5841,7 +6375,7 @@ ":supports_weights" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "Linear regressor (OLS) with a Normal model." +":docstring" = "```\nLinearRegressor\n```\n\nA model type for constructing a linear regressor, based on [GLM.jl](https://github.com/JuliaStats/GLM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearRegressor = @load LinearRegressor pkg=GLM\n```\n\nDo `model = LinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearRegressor(fit_intercept=...)`.\n\n`LinearRegressor` assumes the target is a continuous variable whose conditional distribution is normal with constant variance, and whose expected value is a linear combination of the features (identity link function). Options exist to specify an intercept or offset feature.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nHere\n\n * `X`: is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the scitype with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n * `w`: is a vector of `Real` per-observation weights\n\n# Hyper-parameters\n\n * `fit_intercept=true`: Whether to calculate the intercept for this model. If set to false, no intercept will be calculated (e.g. the data is expected to be centered)\n * `dropcollinear=false`: Whether to drop features in the training data to ensure linear independence. If true , only the first of each set of linearly-dependent features is used. The coefficient for redundant linearly dependent features is `0.0` and all associated statistics are set to `NaN`.\n * `offsetcol=nothing`: Name of the column to be used as an offset, if any. An offset is a variable which is known to have a coefficient of 1.\n * `report_keys::Union{Symbol, Nothing}=DEFAULT_KEYS`: keys to be used in the report. Should be one of: `:deviance`, `:dof_residual`, `:stderror`, `:vcov`, `:coef_table`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew` having the same Scitype as `X` above. Predictions are probabilistic.\n * `predict_mean(mach, Xnew)`: instead return the mean of each prediction above\n * `predict_median(mach, Xnew)`: instead return the median of each prediction above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features`: The names of the features encountered during model fitting.\n * `coef`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `deviance`: Measure of deviance of fitted model with respect to a perfectly fitted model. For a linear model, this is the weighted residual sum of squares\n * `dof_residual`: The degrees of freedom for residuals, when meaningful.\n * `stderror`: The standard errors of the coefficients.\n * `vcov`: The estimated variance-covariance matrix of the coefficient estimates.\n * `coef_table`: Table which displays coefficients and summarizes their significance and confidence intervals.\n\n# Examples\n\n```\nusing MLJ\nLinearRegressor = @load LinearRegressor pkg=GLM\nglm = LinearRegressor()\n\nX, y = make_regression(100, 2) # synthetic data\nmach = machine(glm, X, y) |> fit!\n\nXnew, _ = make_regression(3, 2)\nyhat = predict(mach, Xnew) # new predictions\nyhat_point = predict_mean(mach, Xnew) # new predictions\n\nfitted_params(mach).features\nfitted_params(mach).coef # x1, x2, intercept\nfitted_params(mach).intercept\n\nreport(mach)\n```\n\nSee also [`LinearCountRegressor`](@ref), [`LinearBinaryClassifier`](@ref)\n" ":name" = "LinearRegressor" ":human_name" = "linear regressor" ":is_supervised" = "`true`" @@ -5853,7 +6387,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" +":reporting_operations" = "`()`" [MLJFlux.MultitargetNeuralNetworkRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -5873,7 +6409,7 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "A neural network model for making deterministic predictions of a `Continuous` multi-target, presented as a table, given a table of `Continuous` features. " +":docstring" = "```\nMultitargetNeuralNetworkRegressor\n```\n\nA model type for constructing a multitarget neural network regressor, based on [MLJFlux.jl](https://github.com/alan-turing-institute/MLJFlux.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultitargetNeuralNetworkRegressor = @load MultitargetNeuralNetworkRegressor pkg=MLJFlux\n```\n\nDo `model = MultitargetNeuralNetworkRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetNeuralNetworkRegressor(builder=...)`.\n\n`MultitargetNeuralNetworkRegressor` is for training a data-dependent Flux.jl neural network to predict a multi-valued `Continuous` target, represented as a table, given a table of `Continuous` features. Users provide a recipe for constructing the network, based on properties of the data that is encountered, by specifying an appropriate `builder`. See MLJFlux documentation for more on builders.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any table of output targets whose element scitype is `Continuous`; check column scitypes with `schema(y)`.\n\n# Hyper-parameters\n\n * `builder=MLJFlux.Linear(σ=Flux.relu)`: An MLJFlux builder that constructs a neural network. Possible `builders` include: `Linear`, `Short`, and `MLP`. See MLJFlux documentation for more on builders, and the example below for using the `@builder` convenience macro.\n * `optimiser::Flux.Adam()`: A `Flux.Optimise` optimiser. The optimiser performs the updating of the weights of the network. For further reference, see [the Flux optimiser documentation](https://fluxml.ai/Flux.jl/stable/training/optimisers/). To choose a learning rate (the update rate of the optimizer), a good rule of thumb is to start out at `10e-3`, and tune using powers of 10 between `1` and `1e-7`.\n * `loss=Flux.mse`: The loss function which the network will optimize. Should be a function which can be called in the form `loss(yhat, y)`. Possible loss functions are listed in [the Flux loss function documentation](https://fluxml.ai/Flux.jl/stable/models/losses/). For a regression task, natural loss functions are:\n\n * `Flux.mse`\n * `Flux.mae`\n * `Flux.msle`\n * `Flux.huber_loss`\n\n Currently MLJ measures are not supported as loss functions here.\n * `epochs::Int=10`: The duration of training, in epochs. Typically, one epoch represents one pass through the complete the training dataset.\n * `batch_size::int=1`: the batch size to be used for training, representing the number of samples per update of the network weights. Typically, batch size is between 8 and\n\n 512. Increassing batch size may accelerate training if `acceleration=CUDALibs()` and a\n\n GPU is available.\n * `lambda::Float64=0`: The strength of the weight regularization penalty. Can be any value in the range `[0, ∞)`.\n * `alpha::Float64=0`: The L2/L1 mix of regularization, in the range `[0, 1]`. A value of 0 represents L2 regularization, and a value of 1 represents L1 regularization.\n * `rng::Union{AbstractRNG, Int64}`: The random number generator or seed used during training.\n * `optimizer_changes_trigger_retraining::Bool=false`: Defines what happens when re-fitting a machine if the associated optimiser has changed. If `true`, the associated machine will retrain from scratch on `fit!` call, otherwise it will not.\n * `acceleration::AbstractResource=CPU1()`: Defines on what hardware training is done. For Training on GPU, use `CUDALibs()`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew` having the same scitype as `X` above. Predictions are deterministic.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `chain`: The trained \"chain\" (Flux.jl model), namely the series of layers, functions, and activations which make up the neural network.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_losses`: A vector of training losses (penalised if `lambda != 0`) in historical order, of length `epochs + 1`. The first element is the pre-training loss.\n\n# Examples\n\nIn this example we apply a multi-target regression model to synthetic data:\n\n```julia\nusing MLJ\nimport MLJFlux\nusing Flux\n```\n\nFirst, we generate some synthetic data (needs MLJBase 0.20.16 or higher):\n\n```julia\nX, y = make_regression(100, 9; n_targets = 2) # both tables\nschema(y)\nschema(X)\n```\n\nSplitting off a test set:\n\n```julia\n(X, Xtest), (y, ytest) = partition((X, y), 0.7, multi=true);\n```\n\nNext, we can define a `builder`, making use of a convenience macro to do so. In the following `@builder` call, `n_in` is a proxy for the number input features and `n_out` the number of target variables (both known at `fit!` time), while `rng` is a proxy for a RNG (which will be passed from the `rng` field of `model` defined below).\n\n```julia\nbuilder = MLJFlux.@builder begin\n init=Flux.glorot_uniform(rng)\n Chain(\n Dense(n_in, 64, relu, init=init),\n Dense(64, 32, relu, init=init),\n Dense(32, n_out, init=init),\n )\nend\n```\n\nInstantiating the regression model:\n\n```julia\nMultitargetNeuralNetworkRegressor = @load MultitargetNeuralNetworkRegressor\nmodel = MultitargetNeuralNetworkRegressor(builder=builder, rng=123, epochs=20)\n```\n\nWe will arrange for standardization of the the target by wrapping our model in `TransformedTargetModel`, and standardization of the features by inserting the wrapped model in a pipeline:\n\n```julia\npipe = Standardizer |> TransformedTargetModel(model, target=Standardizer)\n```\n\nIf we fit with a high verbosity (>1), we will see the losses during training. We can also see the losses in the output of `report(mach)`\n\n```julia\nmach = machine(pipe, X, y)\nfit!(mach, verbosity=2)\n\n# first element initial loss, 2:end per epoch training losses\nreport(mach).transformed_target_model_deterministic.model.training_losses\n```\n\nFor experimenting with learning rate, see the [`NeuralNetworkRegressor`](@ref) example.\n\n```\npipe.transformed_target_model_deterministic.model.optimiser.eta = 0.0001\n```\n\nWith the learning rate fixed, we can now compute a CV estimate of the performance (using all data bound to `mach`) and compare this with performance on the test set:\n\n```julia\n# custom MLJ loss:\nmulti_loss(yhat, y) = l2(MLJ.matrix(yhat), MLJ.matrix(y)) |> mean\n\n# CV estimate, based on `(X, y)`:\nevaluate!(mach, resampling=CV(nfolds=5), measure=multi_loss)\n\n# loss for `(Xtest, test)`:\nfit!(mach) # trains on all data `(X, y)`\nyhat = predict(mach, Xtest)\nmulti_loss(yhat, ytest)\n```\n\nSee also [`NeuralNetworkRegressor`](@ref)\n" ":name" = "MultitargetNeuralNetworkRegressor" ":human_name" = "multitarget neural network regressor" ":is_supervised" = "`true`" @@ -5885,7 +6421,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = ":epochs" ":supports_training_losses" = "`true`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`(:optimiser, :builder)`" +":reporting_operations" = "`()`" [MLJFlux.NeuralNetworkClassifier] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -5905,7 +6443,7 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "A neural network model for making probabilistic predictions of a `Multiclass` or `OrderedFactor` target, given a table of `Continuous` features. " +":docstring" = "```\nNeuralNetworkClassifier\n```\n\nA model type for constructing a neural network classifier, based on [MLJFlux.jl](https://github.com/alan-turing-institute/MLJFlux.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nNeuralNetworkClassifier = @load NeuralNetworkClassifier pkg=MLJFlux\n```\n\nDo `model = NeuralNetworkClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `NeuralNetworkClassifier(builder=...)`.\n\n`NeuralNetworkClassifier` is for training a data-dependent Flux.jl neural network for making probabilistic predictions of a `Multiclass` or `OrderedFactor` target, given a table of `Continuous` features. Users provide a recipe for constructing the network, based on properties of the data that is encountered, by specifying an appropriate `builder`. See MLJFlux documentation for more on builders.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Multiclass` or `OrderedFactor`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `builder=MLJFlux.Short()`: An MLJFlux builder that constructs a neural network. Possible `builders` include: `MLJFlux.Linear`, `MLJFlux.Short`, and `MLJFlux.MLP`. See MLJFlux.jl documentation for examples of user-defined builders. See also `finaliser` below.\n * `optimiser::Flux.Adam()`: A `Flux.Optimise` optimiser. The optimiser performs the updating of the weights of the network. For further reference, see [the Flux optimiser documentation](https://fluxml.ai/Flux.jl/stable/training/optimisers/). To choose a learning rate (the update rate of the optimizer), a good rule of thumb is to start out at `10e-3`, and tune using powers of 10 between `1` and `1e-7`.\n * `loss=Flux.crossentropy`: The loss function which the network will optimize. Should be a function which can be called in the form `loss(yhat, y)`. Possible loss functions are listed in [the Flux loss function documentation](https://fluxml.ai/Flux.jl/stable/models/losses/). For a classification task, the most natural loss functions are:\n\n * `Flux.crossentropy`: Standard multiclass classification loss, also known as the log loss.\n * `Flux.logitcrossentopy`: Mathematically equal to crossentropy, but numerically more stable than finalising the outputs with `softmax` and then calculating crossentropy. You will need to specify `finaliser=identity` to remove MLJFlux's default softmax finaliser, and understand that the output of `predict` is then unnormalized (no longer probabilistic).\n * `Flux.tversky_loss`: Used with imbalanced data to give more weight to false negatives.\n * `Flux.focal_loss`: Used with highly imbalanced data. Weights harder examples more than easier examples.\n\n Currently MLJ measures are not supported values of `loss`.\n * `epochs::Int=10`: The duration of training, in epochs. Typically, one epoch represents one pass through the complete the training dataset.\n * `batch_size::int=1`: the batch size to be used for training, representing the number of samples per update of the network weights. Typically, batch size is between 8 and\n\n 512. Increassing batch size may accelerate training if `acceleration=CUDALibs()` and a\n\n GPU is available.\n * `lambda::Float64=0`: The strength of the weight regularization penalty. Can be any value in the range `[0, ∞)`.\n * `alpha::Float64=0`: The L2/L1 mix of regularization, in the range `[0, 1]`. A value of 0 represents L2 regularization, and a value of 1 represents L1 regularization.\n * `rng::Union{AbstractRNG, Int64}`: The random number generator or seed used during training.\n * `optimizer_changes_trigger_retraining::Bool=false`: Defines what happens when re-fitting a machine if the associated optimiser has changed. If `true`, the associated machine will retrain from scratch on `fit!` call, otherwise it will not.\n * `acceleration::AbstractResource=CPU1()`: Defines on what hardware training is done. For Training on GPU, use `CUDALibs()`.\n * `finaliser=Flux.softmax`: The final activation function of the neural network (applied after the network defined by `builder`). Defaults to `Flux.softmax`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `chain`: The trained \"chain\" (Flux.jl model), namely the series of layers, functions, and activations which make up the neural network. This includes the final layer specified by `finaliser` (eg, `softmax`).\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_losses`: A vector of training losses (penalised if `lambda != 0`) in historical order, of length `epochs + 1`. The first element is the pre-training loss.\n\n# Examples\n\nIn this example we build a classification model using the Iris dataset. This is a very basic example, using a default builder and no standardization. For a more advanced illustration, see [`NeuralNetworkRegressor`](@ref) or [`ImageClassifier`](@ref), and examples in the MLJFlux.jl documentation.\n\n```julia\nusing MLJ\nusing Flux\nimport RDatasets\n```\n\nFirst, we can load the data:\n\n```julia\niris = RDatasets.dataset(\"datasets\", \"iris\");\ny, X = unpack(iris, ==(:Species), rng=123); # a vector and a table\nNeuralNetworkClassifier = @load NeuralNetworkClassifier pkg=MLJFlux\nclf = NeuralNetworkClassifier()\n```\n\nNext, we can train the model:\n\n```julia\nmach = machine(clf, X, y)\nfit!(mach)\n```\n\nWe can train the model in an incremental fashion, altering the learning rate as we go, provided `optimizer_changes_trigger_retraining` is `false` (the default). Here, we also change the number of (total) iterations:\n\n```julia\nclf.optimiser.eta = clf.optimiser.eta * 2\nclf.epochs = clf.epochs + 5\n\nfit!(mach, verbosity=2) # trains 5 more epochs\n```\n\nWe can inspect the mean training loss using the `cross_entropy` function:\n\n```julia\ntraining_loss = cross_entropy(predict(mach, X), y) |> mean\n```\n\nAnd we can access the Flux chain (model) using `fitted_params`:\n\n```julia\nchain = fitted_params(mach).chain\n```\n\nFinally, we can see how the out-of-sample performance changes over time, using MLJ's `learning_curve` function:\n\n```julia\nr = range(clf, :epochs, lower=1, upper=200, scale=:log10)\ncurve = learning_curve(clf, X, y,\n range=r,\n resampling=Holdout(fraction_train=0.7),\n measure=cross_entropy)\nusing Plots\nplot(curve.parameter_values,\n curve.measurements,\n xlab=curve.parameter_name,\n xscale=curve.parameter_scale,\n ylab = \"Cross Entropy\")\n\n```\n\nSee also [`ImageClassifier`](@ref).\n" ":name" = "NeuralNetworkClassifier" ":human_name" = "neural network classifier" ":is_supervised" = "`true`" @@ -5917,7 +6455,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = ":epochs" ":supports_training_losses" = "`true`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`(:optimiser, :builder)`" +":reporting_operations" = "`()`" [MLJFlux.ImageClassifier] ":input_scitype" = "`AbstractVector{<:ScientificTypesBase.Image}`" @@ -5937,7 +6477,7 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "A neural network model for making probabilistic predictions of a `GrayImage` target, given a table of `Continuous` features. " +":docstring" = "```\nImageClassifier\n```\n\nA model type for constructing a image classifier, based on [MLJFlux.jl](https://github.com/alan-turing-institute/MLJFlux.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nImageClassifier = @load ImageClassifier pkg=MLJFlux\n```\n\nDo `model = ImageClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ImageClassifier(builder=...)`.\n\n`ImageClassifier` classifies images using a neural network adapted to the type of images provided (color or gray scale). Predictions are probabilistic. Users provide a recipe for constructing the network, based on properties of the image encountered, by specifying an appropriate `builder`. See MLJFlux documentation for more on builders.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any `AbstractVector` of images with `ColorImage` or `GrayImage` scitype; check the scitype with `scitype(X)` and refer to ScientificTypes.jl documentation on coercing typical image formats into an appropriate type.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Multiclass`; check the scitype with `scitype(y)`.\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `builder`: An MLJFlux builder that constructs the neural network. The fallback builds a depth-16 VGG architecture adapted to the image size and number of target classes, with no batch normalization; see the Metalhead.jl documentation for details. See the example below for a user-specified builder. A convenience macro `@builder` is also available. See also `finaliser` below.\n * `optimiser::Flux.Adam()`: A `Flux.Optimise` optimiser. The optimiser performs the updating of the weights of the network. For further reference, see [the Flux optimiser documentation](https://fluxml.ai/Flux.jl/stable/training/optimisers/). To choose a learning rate (the update rate of the optimizer), a good rule of thumb is to start out at `10e-3`, and tune using powers of 10 between `1` and `1e-7`.\n * `loss=Flux.crossentropy`: The loss function which the network will optimize. Should be a function which can be called in the form `loss(yhat, y)`. Possible loss functions are listed in [the Flux loss function documentation](https://fluxml.ai/Flux.jl/stable/models/losses/). For a classification task, the most natural loss functions are:\n\n * `Flux.crossentropy`: Standard multiclass classification loss, also known as the log loss.\n * `Flux.logitcrossentopy`: Mathematically equal to crossentropy, but numerically more stable than finalising the outputs with `softmax` and then calculating crossentropy. You will need to specify `finaliser=identity` to remove MLJFlux's default softmax finaliser, and understand that the output of `predict` is then unnormalized (no longer probabilistic).\n * `Flux.tversky_loss`: Used with imbalanced data to give more weight to false negatives.\n * `Flux.focal_loss`: Used with highly imbalanced data. Weights harder examples more than easier examples.\n\n Currently MLJ measures are not supported values of `loss`.\n * `epochs::Int=10`: The duration of training, in epochs. Typically, one epoch represents one pass through the complete the training dataset.\n * `batch_size::int=1`: the batch size to be used for training, representing the number of samples per update of the network weights. Typically, batch size is between 8 and\n\n 512. Increassing batch size may accelerate training if `acceleration=CUDALibs()` and a\n\n GPU is available.\n * `lambda::Float64=0`: The strength of the weight regularization penalty. Can be any value in the range `[0, ∞)`.\n * `alpha::Float64=0`: The L2/L1 mix of regularization, in the range `[0, 1]`. A value of 0 represents L2 regularization, and a value of 1 represents L1 regularization.\n * `rng::Union{AbstractRNG, Int64}`: The random number generator or seed used during training.\n * `optimizer_changes_trigger_retraining::Bool=false`: Defines what happens when re-fitting a machine if the associated optimiser has changed. If `true`, the associated machine will retrain from scratch on `fit!` call, otherwise it will not.\n * `acceleration::AbstractResource=CPU1()`: Defines on what hardware training is done. For Training on GPU, use `CUDALibs()`.\n * `finaliser=Flux.softmax`: The final activation function of the neural network (applied after the network defined by `builder`). Defaults to `Flux.softmax`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `chain`: The trained \"chain\" (Flux.jl model), namely the series of layers, functions, and activations which make up the neural network. This includes the final layer specified by `finaliser` (eg, `softmax`).\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_losses`: A vector of training losses (penalised if `lambda != 0`) in historical order, of length `epochs + 1`. The first element is the pre-training loss.\n\n# Examples\n\nIn this example we use MLJFlux and a custom builder to classify the MNIST image dataset.\n\n```julia\nusing MLJ\nusing Flux\nimport MLJFlux\nimport MLJIteration # for `skip` control\n```\n\nFirst we want to download the MNIST dataset, and unpack into images and labels:\n\n```julia\nimport MLDatasets: MNIST\ndata = MNIST(split=:train)\nimages, labels = data.features, data.targets\n```\n\nIn MLJ, integers cannot be used for encoding categorical data, so we must coerce them into the `Multiclass` scitype:\n\n```julia\nlabels = coerce(labels, Multiclass);\n```\n\nAbove `images` is a single array but MLJFlux requires the images to be a vector of individual image arrays:\n\n```\nimages = coerce(images, GrayImage);\nimages[1]\n```\n\nWe start by defining a suitable `builder` object. This is a recipe for building the neural network. Our builder will work for images of any (constant) size, whether they be color or black and white (ie, single or multi-channel). The architecture always consists of six alternating convolution and max-pool layers, and a final dense layer; the filter size and the number of channels after each convolution layer is customizable.\n\n```julia\nimport MLJFlux\n\nstruct MyConvBuilder\n filter_size::Int\n channels1::Int\n channels2::Int\n channels3::Int\nend\n\nmake2d(x::AbstractArray) = reshape(x, :, size(x)[end])\n\nfunction MLJFlux.build(b::MyConvBuilder, rng, n_in, n_out, n_channels)\n k, c1, c2, c3 = b.filter_size, b.channels1, b.channels2, b.channels3\n mod(k, 2) == 1 || error(\"`filter_size` must be odd. \")\n p = div(k - 1, 2) # padding to preserve image size\n init = Flux.glorot_uniform(rng)\n front = Chain(\n Conv((k, k), n_channels => c1, pad=(p, p), relu, init=init),\n MaxPool((2, 2)),\n Conv((k, k), c1 => c2, pad=(p, p), relu, init=init),\n MaxPool((2, 2)),\n Conv((k, k), c2 => c3, pad=(p, p), relu, init=init),\n MaxPool((2 ,2)),\n make2d)\n d = Flux.outputsize(front, (n_in..., n_channels, 1)) |> first\n return Chain(front, Dense(d, n_out, init=init))\nend\n```\n\nIt is important to note that in our `build` function, there is no final `softmax`. This is applied by default in all MLJFlux classifiers (override this using the `finaliser` hyperparameter).\n\nNow that our builder is defined, we can instantiate the actual MLJFlux model. If you have a GPU, you can substitute in `acceleration=CUDALibs()` below to speed up training.\n\n```julia\nImageClassifier = @load ImageClassifier pkg=MLJFlux\nclf = ImageClassifier(builder=MyConvBuilder(3, 16, 32, 32),\n batch_size=50,\n epochs=10,\n rng=123)\n```\n\nYou can add Flux options such as `optimiser` and `loss` in the snippet above. Currently, `loss` must be a flux-compatible loss, and not an MLJ measure.\n\nNext, we can bind the model with the data in a machine, and train using the first 500 images:\n\n```julia\nmach = machine(clf, images, labels);\nfit!(mach, rows=1:500, verbosity=2);\nreport(mach)\nchain = fitted_params(mach)\nFlux.params(chain)[2]\n```\n\nWe can tack on 20 more epochs by modifying the `epochs` field, and iteratively fit some more:\n\n```julia\nclf.epochs = clf.epochs + 20\nfit!(mach, rows=1:500, verbosity=2);\n```\n\nWe can also make predictions and calculate an out-of-sample loss estimate, using any MLJ measure (loss/score):\n\n```julia\npredicted_labels = predict(mach, rows=501:1000);\ncross_entropy(predicted_labels, labels[501:1000]) |> mean\n```\n\nThe preceding `fit!`/`predict`/evaluate workflow can be alternatively executed as follows:\n\n```julia\nevaluate!(mach,\n resampling=Holdout(fraction_train=0.5),\n measure=cross_entropy,\n rows=1:1000,\n verbosity=0)\n```\n\nSee also [`NeuralNetworkClassifier`](@ref).\n" ":name" = "ImageClassifier" ":human_name" = "image classifier" ":is_supervised" = "`true`" @@ -5949,7 +6489,9 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = ":epochs" ":supports_training_losses" = "`true`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`(:optimiser, :builder)`" +":reporting_operations" = "`()`" [MLJFlux.NeuralNetworkRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -5969,7 +6511,7 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "A neural network model for making deterministic predictions of a `Continuous` target, given a table of `Continuous` features. " +":docstring" = "```\nNeuralNetworkRegressor\n```\n\nA model type for constructing a neural network regressor, based on [MLJFlux.jl](https://github.com/alan-turing-institute/MLJFlux.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nNeuralNetworkRegressor = @load NeuralNetworkRegressor pkg=MLJFlux\n```\n\nDo `model = NeuralNetworkRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `NeuralNetworkRegressor(builder=...)`.\n\n`NeuralNetworkRegressor` is for training a data-dependent Flux.jl neural network to predict a `Continuous` target, given a table of `Continuous` features. Users provide a recipe for constructing the network, based on properties of the data that is encountered, by specifying an appropriate `builder`. See MLJFlux documentation for more on builders.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `builder=MLJFlux.Linear(σ=Flux.relu)`: An MLJFlux builder that constructs a neural network. Possible `builders` include: `MLJFlux.Linear`, `MLJFlux.Short`, and `MLJFlux.MLP`. See MLJFlux documentation for more on builders, and the example below for using the `@builder` convenience macro.\n * `optimiser::Flux.Adam()`: A `Flux.Optimise` optimiser. The optimiser performs the updating of the weights of the network. For further reference, see [the Flux optimiser documentation](https://fluxml.ai/Flux.jl/stable/training/optimisers/). To choose a learning rate (the update rate of the optimizer), a good rule of thumb is to start out at `10e-3`, and tune using powers of 10 between `1` and `1e-7`.\n * `loss=Flux.mse`: The loss function which the network will optimize. Should be a function which can be called in the form `loss(yhat, y)`. Possible loss functions are listed in [the Flux loss function documentation](https://fluxml.ai/Flux.jl/stable/models/losses/). For a regression task, natural loss functions are:\n\n * `Flux.mse`\n * `Flux.mae`\n * `Flux.msle`\n * `Flux.huber_loss`\n\n Currently MLJ measures are not supported as loss functions here.\n * `epochs::Int=10`: The duration of training, in epochs. Typically, one epoch represents one pass through the complete the training dataset.\n * `batch_size::int=1`: the batch size to be used for training, representing the number of samples per update of the network weights. Typically, batch size is between 8 and\n\n 512. Increasing batch size may accelerate training if `acceleration=CUDALibs()` and a\n\n GPU is available.\n * `lambda::Float64=0`: The strength of the weight regularization penalty. Can be any value in the range `[0, ∞)`.\n * `alpha::Float64=0`: The L2/L1 mix of regularization, in the range `[0, 1]`. A value of 0 represents L2 regularization, and a value of 1 represents L1 regularization.\n * `rng::Union{AbstractRNG, Int64}`: The random number generator or seed used during training.\n * `optimizer_changes_trigger_retraining::Bool=false`: Defines what happens when re-fitting a machine if the associated optimiser has changed. If `true`, the associated machine will retrain from scratch on `fit!` call, otherwise it will not.\n * `acceleration::AbstractResource=CPU1()`: Defines on what hardware training is done. For Training on GPU, use `CUDALibs()`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `chain`: The trained \"chain\" (Flux.jl model), namely the series of layers, functions, and activations which make up the neural network.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_losses`: A vector of training losses (penalized if `lambda != 0`) in historical order, of length `epochs + 1`. The first element is the pre-training loss.\n\n# Examples\n\nIn this example we build a regression model for the Boston house price dataset.\n\n```julia\nusing MLJ\nimport MLJFlux\nusing Flux\n```\n\nFirst, we load in the data: The `:MEDV` column becomes the target vector `y`, and all remaining columns go into a table `X`, with the exception of `:CHAS`:\n\n```julia\ndata = OpenML.load(531); # Loads from https://www.openml.org/d/531\ny, X = unpack(data, ==(:MEDV), !=(:CHAS); rng=123);\n\nscitype(y)\nschema(X)\n```\n\nSince MLJFlux models do not handle ordered factors, we'll treat `:RAD` as `Continuous`:\n\n```julia\nX = coerce(X, :RAD=>Continuous)\n```\n\nSplitting off a test set:\n\n```julia\n(X, Xtest), (y, ytest) = partition((X, y), 0.7, multi=true);\n```\n\nNext, we can define a `builder`, making use of a convenience macro to do so. In the following `@builder` call, `n_in` is a proxy for the number input features (which will be known at `fit!` time) and `rng` is a proxy for a RNG (which will be passed from the `rng` field of `model` defined below). We also have the parameter `n_out` which is the number of output features. As we are doing single target regression, the value passed will always be `1`, but the builder we define will also work for [`MultitargetNeuralRegressor`](@ref).\n\n```julia\nbuilder = MLJFlux.@builder begin\n init=Flux.glorot_uniform(rng)\n Chain(\n Dense(n_in, 64, relu, init=init),\n Dense(64, 32, relu, init=init),\n Dense(32, n_out, init=init),\n )\nend\n```\n\nInstantiating a model:\n\n```julia\nNeuralNetworkRegressor = @load NeuralNetworkRegressor pkg=MLJFlux\nmodel = NeuralNetworkRegressor(\n builder=builder,\n rng=123,\n epochs=20\n)\n```\n\nWe arrange for standardization of the the target by wrapping our model in `TransformedTargetModel`, and standardization of the features by inserting the wrapped model in a pipeline:\n\n```julia\npipe = Standardizer |> TransformedTargetModel(model, target=Standardizer)\n```\n\nIf we fit with a high verbosity (>1), we will see the losses during training. We can also see the losses in the output of `report(mach)`.\n\n```julia\nmach = machine(pipe, X, y)\nfit!(mach, verbosity=2)\n\n# first element initial loss, 2:end per epoch training losses\nreport(mach).transformed_target_model_deterministic.model.training_losses\n```\n\n## Experimenting with learning rate\n\nWe can visually compare how the learning rate affects the predictions:\n\n```julia\nusing Plots\n\nrates = rates = [5e-5, 1e-4, 0.005, 0.001, 0.05]\nplt=plot()\n\nforeach(rates) do η\n pipe.transformed_target_model_deterministic.model.optimiser.eta = η\n fit!(mach, force=true, verbosity=0)\n losses =\n report(mach).transformed_target_model_deterministic.model.training_losses[3:end]\n plot!(1:length(losses), losses, label=η)\nend\n\nplt\n\npipe.transformed_target_model_deterministic.model.optimiser.eta = 0.0001\n```\n\nWith the learning rate fixed, we compute a CV estimate of the performance (using all data bound to `mach`) and compare this with performance on the test set:\n\n```julia\n# CV estimate, based on `(X, y)`:\nevaluate!(mach, resampling=CV(nfolds=5), measure=l2)\n\n# loss for `(Xtest, test)`:\nfit!(mach) # train on `(X, y)`\nyhat = predict(mach, Xtest)\nl2(yhat, ytest) |> mean\n```\n\nThese losses, for the pipeline model, refer to the target on the original, unstandardized, scale.\n\nFor implementing stopping criterion and other iteration controls, refer to examples linked from the MLJFlux documentation.\n\nSee also [`MultitargetNeuralNetworkRegressor`](@ref)\n" ":name" = "NeuralNetworkRegressor" ":human_name" = "neural network regressor" ":is_supervised" = "`true`" @@ -5981,4 +6523,6 @@ ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = ":epochs" ":supports_training_losses" = "`true`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`(:optimiser, :builder)`" +":reporting_operations" = "`()`" diff --git a/src/registry/Models.toml b/src/registry/Models.toml index e66aa21f..9af8557b 100644 --- a/src/registry/Models.toml +++ b/src/registry/Models.toml @@ -1,4 +1,4 @@ -BetaML = ["RandomForestRegressor", "RandomForestClassifier", "PerceptronClassifier", "DecisionTreeRegressor", "PegasosClassifier", "KMedoids", "KMeans", "DecisionTreeClassifier", "GMMClusterer", "MissingImputator", "KernelPerceptronClassifier"] +BetaML = ["BetaMLGenericImputer", "RandomForestRegressor", "RandomForestClassifier", "PerceptronClassifier", "DecisionTreeRegressor", "BetaMLGMMImputer", "PegasosClassifier", "KMedoids", "BetaMLGMMRegressor", "KMeans", "DecisionTreeClassifier", "GMMClusterer", "MissingImputator", "BetaMLMeanImputer", "BetaMLRFImputer", "KernelPerceptronClassifier"] NearestNeighborModels = ["KNNClassifier", "MultitargetKNNClassifier", "MultitargetKNNRegressor", "KNNRegressor"] OutlierDetectionNeighbors = ["ABODDetector", "DNNDetector", "LOFDetector", "KNNDetector", "COFDetector"] PartialLeastSquaresRegressor = ["KPLSRegressor", "PLSRegressor"] diff --git a/src/registry/Project.toml b/src/registry/Project.toml index 94823604..5e3987fe 100644 --- a/src/registry/Project.toml +++ b/src/registry/Project.toml @@ -15,6 +15,7 @@ MLJMultivariateStatsInterface = "1b6a4a23-ba22-4f51-9698-8599985d3728" MLJNaiveBayesInterface = "33e4bacb-b9e2-458e-9a13-5d9a90b235fa" MLJScikitLearnInterface = "5ae90465-5518-4432-b9d2-8a1def2f0cab" MLJTSVDInterface = "7fa162e1-0e29-41ca-a6fa-c000ca4e7e7e" +MLJTestIntegration = "697918b4-fdc1-4f9e-8ff9-929724cee270" MLJText = "5e27fcf9-6bac-46ba-8580-b5712f3d6387" MLJXGBoostInterface = "54119dfa-1dab-4055-a167-80440f4f7a91" NearestNeighborModels = "636a865e-7cf4-491e-846c-de09b730eb36" diff --git a/src/registry/README.md b/src/registry/README.md index 9e809b4d..594fd934 100644 --- a/src/registry/README.md +++ b/src/registry/README.md @@ -1 +1,12 @@ -[Instructions](https://github.com/alan-turing-institute/MLJ.jl/blob/master/REGISTRY.md) on registering models implementing the MLJ interface. +The *MLJ Model Registry*, also known as the *model metadata database*, +is the current directory called file +[METADATA.toml](METADATA.toml). It is generated by loading all +packages in [Project.toml](Project.toml), searching for all types +implementing MLJ's model interface, and extracting [model +trait](https://github.com/JuliaAI/MLJModelInterface.jl/blob/dev/src/MLJModelInterface.jl) +values for all such types. The abbreviated list of models in +[Models.toml](Models.toml) is generated at the same time. + + +[Instructions](https://github.com/JuliaAI/MLJModels.jl#instructions-for-updating-the-mlj-model-registry) on registering models implementing the MLJ interface. + diff --git a/src/registry/src/Registry.jl b/src/registry/src/Registry.jl index 1049e1cd..2901bd14 100644 --- a/src/registry/src/Registry.jl +++ b/src/registry/src/Registry.jl @@ -1,8 +1,15 @@ -module Registry +module Registry -# for this module -import Pkg +using Pkg import Pkg.TOML +using MLJModels +import MLJModelInterface +import MLJModelInterface.Model +for T in MLJModelInterface.MODEL_TRAITS + @eval(import MLJModelInterface.$T) +end + +using OrderedCollections using InteractiveUtils # TODO: is this import really needed?? @@ -10,128 +17,22 @@ using InteractiveUtils import ScientificTypes: Found, Continuous, Finite, Infinite import ScientificTypes: OrderedFactor, Count, Multiclass, Binary +export @update, check_registry, activate_registry_project, info_dict + const srcdir = dirname(@__FILE__) # the directory containing this file const environment_path = joinpath(srcdir, "..") -## METHODS TO GENERATE METADATA AND WRITE TO ARCHIVE - -function finaltypes(T::Type) - s = InteractiveUtils.subtypes(T) - if isempty(s) - return [T, ] - else - return reduce(vcat, [finaltypes(S) for S in s]) - end -end - -const project_toml = joinpath(srcdir, "../Project.toml") -const packages = map(Symbol, - keys(TOML.parsefile(project_toml)["deps"])|>collect) -push!(packages, :MLJModels) -filter!(packages) do pkg - !(pkg in (:InteractiveUtils, :Pkg, :MLJModelInterface)) -end - -const package_import_commands = [:(import $pkg) for pkg in packages] - -macro update() - mod = __module__ - _update(mod, false) -end - -macro update(ex) - mod = __module__ - test_env_only = eval(ex) - test_env_only isa Bool || "b in @update(b) must be Bool. " - _update(mod, test_env_only) -end - -function _update(mod, test_env_only) - - test_env_only && @info "Testing registry environment only. " +# for extracting model traits from a loaded model type +include("info_dict.jl") - program1 = quote - @info "Packages to be searched for model implementations:" - for pkg in $packages - println(pkg) - end - using Pkg - Pkg.activate($environment_path) - @info "resolving registry environment..." - Pkg.resolve() - end +# for generating and serializing the complete model metadata database +include("update.jl") - program2 = quote +# for checking `@load` works for all models in the database +include("check_registry.jl") - @info "Instantiating registry environment..." - Pkg.instantiate() +# for activating a clone of the registry environment: +include("activate_registry_project.jl") - @info "Loading registered packages..." - import MLJModels - using Pkg.TOML - - # import the packages - $(Registry.package_import_commands...) - - @info "Generating model metadata..." - - modeltypes = - MLJModels.Registry.finaltypes(MLJModels.Model) - filter!(modeltypes) do T - !isabstracttype(T) && !MLJModels.MLJModelInterface.is_wrapper(T) - end - - # generate and write to file the model metadata: - api_packages = string.(MLJModels.Registry.packages) - meta_given_package = Dict() - - for M in modeltypes - _info = MLJModels.info_dict(M) - pkg = _info[:package_name] - path = _info[:load_path] - api_pkg = split(path, '.') |> first - pkg in ["unknown",] && - @warn "$M `package_name` or `load_path` is \"unknown\")" - modelname = _info[:name] - api_pkg in api_packages || - error("Bad `load_path` trait for $M: "* - "$api_pkg not a registered package. ") - haskey(meta_given_package, pkg) || - (meta_given_package[pkg] = Dict()) - haskey(meta_given_package, modelname) && - error("Encountered multiple model names for "* - "`package_name=$pkg`") - meta_given_package[pkg][modelname] = _info - println(M, "\u2714 ") - end - print("\r") - - open(joinpath(MLJModels.Registry.srcdir, "../Metadata.toml"), "w") do file - TOML.print(file, MLJModels.encode_dic(meta_given_package)) - end - - # generate and write to file list of models for each package: - models_given_pkg = Dict() - for pkg in keys(meta_given_package) - models_given_pkg[pkg] = collect(keys(meta_given_package[pkg])) - end - open(joinpath(MLJModels.Registry.srcdir, "../Models.toml"), "w") do file - TOML.print(file, models_given_pkg) - end - - :(println("Local Metadata.toml updated.")) - - end - - mod.eval(program1) - test_env_only || mod.eval(program2) - - println("\n You can check the registry by running "* - "`MLJModels.check_registry() but may need to force "* - "recompilation of MLJModels.\n\n"* - "You can safely ignore \"conflicting import\" warnings. ") - - true -end end # module diff --git a/src/registry/src/activate_registry_project.jl b/src/registry/src/activate_registry_project.jl new file mode 100644 index 00000000..6abde5dd --- /dev/null +++ b/src/registry/src/activate_registry_project.jl @@ -0,0 +1,42 @@ +""" + registry_project() + +Experimental, private method. + +Return, as a `Vector{String}`, the lines of the Project.toml used to +generate MLJ Model Registry (aka, model metadata). This Project.toml +file lists as dependencies all packages that provide registered +models. + +""" +registry_project() = MLJModels.REGISTRY_PROJECT[] + +""" + activate_registry_project() + activate_registry_project(path) + +Experimental, private method. + +In the first case, activate a temporary environment using a copy of +the [MLJ Project +Registry](https://github.com/JuliaAI/MLJModels.jl/tree/dev/src/registry) +Project.toml file. This environment will include all packages +providing registered models. + +In the second case, create the environment at the specified `path`. + +To instantiate the environment (for which no Manifest.toml will exist) +run `using Pkg; Pkg.instantiate()`. + +""" +function activate_registry_project(projectdir=mktempdir(; cleanup=false)) + filename, stream = mktemp(projectdir) + for line in registry_project() + write(stream, line*"\n") + end + close(stream) + project_filename = joinpath(first(splitdir(filename)), "Project.toml") + cp(filename, project_filename) + Pkg.activate(projectdir) + return nothing +end diff --git a/src/registry/src/check_registry.jl b/src/registry/src/check_registry.jl index c08c6b07..418d03bf 100644 --- a/src/registry/src/check_registry.jl +++ b/src/registry/src/check_registry.jl @@ -1,32 +1,33 @@ -function check_registry() +""" + MLJModels.check_registry(; mod=Main, verbosity=1) + +Check that every model in the [MLJ aodel +Registry](https://github.com/JuliaAI/MLJModels.jl/tree/dev/src/registry) +has a working `load_path` trait by using it to import the model +type. Here `mod` should be the module from which the method is called +- `Main` by default, but `mod=@__MODULE__` should work in general. + +Returns a row table detailing the failures, which is empty in the case +of no failures. + +""" +function check_registry(; mod=Main, verbosity=1) basedir = Registry.environment_path mljmodelsdir = joinpath(basedir, "..", "..", ".") Pkg.activate(basedir) Pkg.develop(PackageSpec(path=mljmodelsdir)) Pkg.instantiate() - Pkg.precompile() - # Read Metadata.toml - dict = TOML.parsefile(joinpath(basedir, "Metadata.toml")) + quote + using MLJTestIntegration + fails, _ = MLJTestIntegration.test( + MLJModels.models(); + level=1, + mod=$mod, + verbosity=$verbosity + ) + fails + end |> mod.eval - problems = String[] - for (package, model_dict) in dict - for (model, meta) in model_dict - # check if new entry or changed entry, otherwise don't test - key = "$package.$model" - program = quote - @load $model pkg=$package verbosity=-1 - end - try - eval(program) - # add/refresh entry - print(rpad("Entry for $key was loaded properly ✓", 79)*"\r") - catch ex - push!(problems, string(key)) - @error "⚠ there was an issue trying to load $key" exception=ex - end - end - end - return problems end diff --git a/src/registry/src/update.jl b/src/registry/src/update.jl new file mode 100644 index 00000000..a68dbffa --- /dev/null +++ b/src/registry/src/update.jl @@ -0,0 +1,161 @@ +## METHODS TO GENERATE METADATA AND WRITE TO ARCHIVE + +function finaltypes(T::Type) + s = InteractiveUtils.subtypes(T) + if isempty(s) + return [T, ] + else + return reduce(vcat, [finaltypes(S) for S in s]) + end +end + +const project_toml = joinpath(srcdir, "../Project.toml") +const packages = map(Symbol, + keys(TOML.parsefile(project_toml)["deps"])|>collect) +push!(packages, :MLJModels) +filter!(packages) do pkg + !(pkg in (:InteractiveUtils, :Pkg, :MLJModelInterface, :MLJTestIntegration)) +end + +const package_import_commands = [:(import $pkg) for pkg in packages] + +macro update() + mod = __module__ + _update(mod, false) +end + +""" + MLJModels.@update + +Update the [MLJ Model +Registry](https://github.com/JuliaAI/MLJModels.jl/tree/dev/src/registry) +by loading all packages in the registry Project.toml file and +searching for types implementing the MLJ model interface. + +*For MLJ administrators only.* + +To register all the models in GreatNewPackage with MLJ: + +- In the dev branch of a clone of the dev branch of MLJModels, change + to the `/src/registry/` directory and, in the latest version of + julia, activate the environment specified by the Project.toml there, + after checking the [compat] conditions there are up to date. It is + suggested you do not use `Revise`. + +- Add `GreatNewPackage` to the environment. + +- In some environment to which your MLJModels clone has been added + (using `Pkg.dev`) execute `using MLJModels; MLJModels.@update`. This updates + `src/registry/Metadata.toml` and `src/registry/Models.toml` (the + latter is generated for convenience and not used by MLJ). + +- Quit your REPL session and make a trivial commit to your MLJModels + branch to force pre-compilation in a new julia session when you run + `using MLJModels`. (For technical reasons the registry is not loaded + in `__init__`()`, so without pre-compiliation the new ]registry is not + available.) + +- Test that the interfaces load properly with + `MLJModels.check_registry()`. (CI will fail on dev -> master if + this test fails.) + +- Push your changes to an appropriate branch of MLJModels to make + the updated metadata available to users of the next MLJModels tagged + release. + + +""" +macro update(ex) + mod = __module__ + test_env_only = eval(ex) + test_env_only isa Bool || "b in @update(b) must be Bool. " + _update(mod, test_env_only) +end + +function _update(mod, test_env_only) + + test_env_only && @info "Testing registry environment only. " + + program1 = quote + @info "Packages to be searched for model implementations:" + for pkg in $packages + println(pkg) + end + using Pkg + Pkg.activate($environment_path) + @info "resolving registry environment..." + Pkg.resolve() + end + + program2 = quote + + @info "Instantiating registry environment..." + Pkg.instantiate() + + @info "Loading registered packages..." + import MLJModels + using Pkg.TOML + + # import the packages + $(Registry.package_import_commands...) + + @info "Generating model metadata..." + + modeltypes = + MLJModels.Registry.finaltypes(MLJModels.Model) + filter!(modeltypes) do T + !isabstracttype(T) && !MLJModels.MLJModelInterface.is_wrapper(T) + end + + # generate and write to file the model metadata: + api_packages = string.(MLJModels.Registry.packages) + meta_given_package = Dict() + + for M in modeltypes + _info = MLJModels.info_dict(M) + pkg = _info[:package_name] + path = _info[:load_path] + api_pkg = split(path, '.') |> first + pkg in ["unknown",] && + @warn "$M `package_name` or `load_path` is \"unknown\")" + modelname = _info[:name] + api_pkg in api_packages || + error("Bad `load_path` trait for $M: "* + "$api_pkg not a registered package. ") + haskey(meta_given_package, pkg) || + (meta_given_package[pkg] = Dict()) + haskey(meta_given_package, modelname) && + error("Encountered multiple model names for "* + "`package_name=$pkg`") + meta_given_package[pkg][modelname] = _info + println(M, "\u2714 ") + end + print("\r") + + open(joinpath(MLJModels.Registry.srcdir, "../Metadata.toml"), "w") do file + TOML.print(file, MLJModels.encode_dic(meta_given_package)) + end + + # generate and write to file list of models for each package: + models_given_pkg = Dict() + for pkg in keys(meta_given_package) + models_given_pkg[pkg] = collect(keys(meta_given_package[pkg])) + end + open(joinpath(MLJModels.Registry.srcdir, "../Models.toml"), "w") do file + TOML.print(file, models_given_pkg) + end + + :(println("Local Metadata.toml updated.")) + + end + + mod.eval(program1) + test_env_only || mod.eval(program2) + + println("\n You can check the registry by running "* + "`MLJModels.check_registry() but may need to force "* + "recompilation of MLJModels.\n\n"* + "You can safely ignore \"conflicting import\" warnings. ") + + true +end diff --git a/test/info_dict.jl b/src/registry/test/info_dict.jl similarity index 100% rename from test/info_dict.jl rename to src/registry/test/info_dict.jl diff --git a/src/registry/test/runtests.jl b/src/registry/test/runtests.jl new file mode 100644 index 00000000..d484e258 --- /dev/null +++ b/src/registry/test/runtests.jl @@ -0,0 +1,5 @@ +@testset "metadata.jl" begin + @test include("info_dict.jl") +end + +true diff --git a/test/builtins/Transformers.jl b/test/builtins/Transformers.jl index ee8e403c..f4d19933 100644 --- a/test/builtins/Transformers.jl +++ b/test/builtins/Transformers.jl @@ -522,6 +522,16 @@ end @test_throws Exception Xt.favourite_number__10 @test_throws Exception Xt.name__Mary @test report.new_features == collect(MLJBase.schema(Xt).names) + + # Test when the first value is missing + X = (name=categorical([missing, "John", "Mary", "John"]),) + t = OneHotEncoder() + f, _, _ = MLJBase.fit(t, 0, X) + Xt = MLJBase.transform(t, f, X) + @test Xt.name__John[1] === Xt.name__Mary[1] === missing + @test Xt.name__John[2:end] == Union{Missing, Float64}[1.0, 0.0, 1.0] + @test Xt.name__Mary[2:end] == Union{Missing, Float64}[0.0, 1.0, 0.0] + end diff --git a/test/runtests.jl b/test/runtests.jl index cf1b5928..3336055a 100755 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,10 +1,10 @@ using Test, MLJModels -@testset "metadata" begin - @testset "info_dict" begin - @test include("info_dict.jl") - end +@testset "registry" begin + @test include(joinpath("..", "src", "registry", "test", "runtests.jl")) +end +@testset "metadata" begin @testset "metadata.jl" begin @test include("metadata.jl") end