From c7ca6233741863ab21bec8243f9f751f919ec1e6 Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Sat, 27 Jan 2024 10:16:56 +1300
Subject: [PATCH 1/2] update registry

---
 src/MLJModels.jl           |   2 +-
 src/registry/Metadata.toml | 370 ++++++++++++++++++-------------------
 src/registry/Models.toml   |   2 +-
 3 files changed, 187 insertions(+), 187 deletions(-)

diff --git a/src/MLJModels.jl b/src/MLJModels.jl
index 763a6be2..639df9f3 100755
--- a/src/MLJModels.jl
+++ b/src/MLJModels.jl
@@ -1,4 +1,4 @@
-module MLJModels 
+module MLJModels
 
 import MLJModelInterface
 import MLJModelInterface: Model, metadata_pkg, metadata_model, @mlj_model, info,
diff --git a/src/registry/Metadata.toml b/src/registry/Metadata.toml
index 8bf297c6..e07a51d7 100644
--- a/src/registry/Metadata.toml
+++ b/src/registry/Metadata.toml
@@ -10,7 +10,7 @@
 ":is_pure_julia" = "`true`"
 ":package_name" = "BetaML"
 ":package_license" = "MIT"
-":load_path" = "BetaML.Trees.RandomForestRegressor"
+":load_path" = "BetaML.Bmlj.RandomForestRegressor"
 ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b"
 ":package_url" = "https://github.com/sylvaticus/BetaML.jl"
 ":is_wrapper" = "`false`"
@@ -44,7 +44,7 @@
 ":is_pure_julia" = "`true`"
 ":package_name" = "BetaML"
 ":package_license" = "MIT"
-":load_path" = "BetaML.Imputation.GaussianMixtureImputer"
+":load_path" = "BetaML.Bmlj.GaussianMixtureImputer"
 ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b"
 ":package_url" = "https://github.com/sylvaticus/BetaML.jl"
 ":is_wrapper" = "`false`"
@@ -78,7 +78,7 @@
 ":is_pure_julia" = "`true`"
 ":package_name" = "BetaML"
 ":package_license" = "MIT"
-":load_path" = "BetaML.Trees.RandomForestClassifier"
+":load_path" = "BetaML.Bmlj.RandomForestClassifier"
 ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b"
 ":package_url" = "https://github.com/sylvaticus/BetaML.jl"
 ":is_wrapper" = "`false`"
@@ -112,7 +112,7 @@
 ":is_pure_julia" = "`true`"
 ":package_name" = "BetaML"
 ":package_license" = "MIT"
-":load_path" = "BetaML.Imputation.RandomForestImputer"
+":load_path" = "BetaML.Bmlj.RandomForestImputer"
 ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b"
 ":package_url" = "https://github.com/sylvaticus/BetaML.jl"
 ":is_wrapper" = "`false`"
@@ -135,6 +135,74 @@
 ":deep_properties" = "`()`"
 ":reporting_operations" = "`()`"
 
+[BetaML.PerceptronClassifier]
+":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Infinite}}`"
+":output_scitype" = "`ScientificTypesBase.Unknown`"
+":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`"
+":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Infinite}}, AbstractVector{<:ScientificTypesBase.Finite}}`"
+":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`"
+":transform_scitype" = "`ScientificTypesBase.Unknown`"
+":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`"
+":is_pure_julia" = "`true`"
+":package_name" = "BetaML"
+":package_license" = "MIT"
+":load_path" = "BetaML.Bmlj.PerceptronClassifier"
+":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b"
+":package_url" = "https://github.com/sylvaticus/BetaML.jl"
+":is_wrapper" = "`false`"
+":supports_weights" = "`false`"
+":supports_class_weights" = "`false`"
+":supports_online" = "`false`"
+":docstring" = "```julia\nmutable struct PerceptronClassifier <: MLJModelInterface.Probabilistic\n```\n\nThe classical perceptron algorithm using one-vs-all for multiclass, from the Beta Machine Learning Toolkit (BetaML).\n\n# Hyperparameters:\n\n  * `initial_coefficients::Union{Nothing, Matrix{Float64}}`: N-classes by D-dimensions matrix of initial linear coefficients [def: `nothing`, i.e. zeros]\n  * `initial_constant::Union{Nothing, Vector{Float64}}`: N-classes vector of initial contant terms [def: `nothing`, i.e. zeros]\n  * `epochs::Int64`: Maximum number of epochs, i.e. passages trough the whole training sample [def: `1000`]\n  * `shuffle::Bool`: Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]\n  * `force_origin::Bool`: Whether to force the parameter associated with the constant term to remain zero [def: `false`]\n  * `return_mean_hyperplane::Bool`: Whether to return the average hyperplane coefficients instead of the final ones  [def: `false`]\n  * `rng::Random.AbstractRNG`: A Random Number Generator to be used in stochastic parts of the code [deafult: `Random.GLOBAL_RNG`]\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y        = @load_iris;\n\njulia> modelType   = @load PerceptronClassifier pkg = \"BetaML\"\n[ Info: For silent loading, specify `verbosity=0`. \nimport BetaML ✔\nBetaML.Perceptron.PerceptronClassifier\n\njulia> model       = modelType()\nPerceptronClassifier(\n  initial_coefficients = nothing, \n  initial_constant = nothing, \n  epochs = 1000, \n  shuffle = true, \n  force_origin = false, \n  return_mean_hyperplane = false, \n  rng = Random._GLOBAL_RNG())\n\njulia> mach        = machine(model, X, y);\n\njulia> fit!(mach);\n[ Info: Training machine(PerceptronClassifier(initial_coefficients = nothing, …), …).\n*** Avg. error after epoch 2 : 0.0 (all elements of the set has been correctly classified)\njulia> est_classes = predict(mach, X)\n150-element CategoricalDistributions.UnivariateFiniteVector{Multiclass{3}, String, UInt8, Float64}:\n UnivariateFinite{Multiclass{3}}(setosa=>1.0, versicolor=>2.53e-34, virginica=>0.0)\n UnivariateFinite{Multiclass{3}}(setosa=>1.0, versicolor=>1.27e-18, virginica=>1.86e-310)\n ⋮\n UnivariateFinite{Multiclass{3}}(setosa=>2.77e-57, versicolor=>1.1099999999999999e-82, virginica=>1.0)\n UnivariateFinite{Multiclass{3}}(setosa=>3.09e-22, versicolor=>4.03e-25, virginica=>1.0)\n```\n"
+":name" = "PerceptronClassifier"
+":human_name" = "perceptron classifier"
+":is_supervised" = "`true`"
+":prediction_type" = ":probabilistic"
+":abstract_type" = "`MLJModelInterface.Probabilistic`"
+":implemented_methods" = [":fit", ":predict"]
+":hyperparameters" = "`(:initial_coefficients, :initial_constant, :epochs, :shuffle, :force_origin, :return_mean_hyperplane, :rng)`"
+":hyperparameter_types" = "`(\"Union{Nothing, Matrix{Float64}}\", \"Union{Nothing, Vector{Float64}}\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Random.AbstractRNG\")`"
+":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
+":iteration_parameter" = "`nothing`"
+":supports_training_losses" = "`false`"
+":reports_feature_importances" = "`false`"
+":deep_properties" = "`()`"
+":reporting_operations" = "`()`"
+
+[BetaML.AutoEncoder]
+":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`"
+":output_scitype" = "`AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}`"
+":target_scitype" = "`ScientificTypesBase.Unknown`"
+":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`"
+":predict_scitype" = "`ScientificTypesBase.Unknown`"
+":transform_scitype" = "`AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}`"
+":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`"
+":is_pure_julia" = "`true`"
+":package_name" = "BetaML"
+":package_license" = "MIT"
+":load_path" = "BetaML.Bmlj.AutoEncoder"
+":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b"
+":package_url" = "https://github.com/sylvaticus/BetaML.jl"
+":is_wrapper" = "`false`"
+":supports_weights" = "`false`"
+":supports_class_weights" = "`false`"
+":supports_online" = "`false`"
+":docstring" = "```julia\nmutable struct AutoEncoder <: MLJModelInterface.Unsupervised\n```\n\nA ready-to use AutoEncoder, from the Beta Machine Learning Toolkit (BetaML) for ecoding and decoding of data using neural networks\n\n# Parameters:\n\n  * `e_layers`: The layers (vector of `AbstractLayer`s) responsable of the encoding of the data [def: `nothing`, i.e. two dense layers with the inner one of `innerdims`]. See `subtypes(BetaML.AbstractLayer)` for supported layers\n  * `d_layers`: The layers (vector of `AbstractLayer`s) responsable of the decoding of the data [def: `nothing`, i.e. two dense layers with the inner one of `innerdims`]. See `subtypes(BetaML.AbstractLayer)` for supported layers\n  * `outdims`: The number of neurons (i.e. dimensions) of the encoded data. If the value is a float it is consiered a percentual (to be rounded) of the dimensionality of the data [def: `0.33`]\n  * `innerdims`: Inner layer dimension (i.e. number of neurons). If the value is a float it is considered a percentual (to be rounded) of the dimensionality of the data [def: `nothing` that applies a specific heuristic]. Consider that the underlying neural network is trying to predict multiple values at the same times. Normally this requires many more neurons than a scalar prediction. If `e_layers` or `d_layers` are specified, this parameter is ignored for the respective part.\n  * `loss`: Loss (cost) function [def: `BetaML.squared_cost`]. Should always assume y and ŷ as (n x d) matrices.\n\n    !!! warning\n        If you change the parameter `loss`, you need to either provide its derivative on the parameter `dloss` or use autodiff with `dloss=nothing`.\n\n  * `dloss`: Derivative of the loss function [def: `BetaML.dsquared_cost` if `loss==squared_cost`, `nothing` otherwise, i.e. use the derivative of the squared cost or autodiff]\n  * `epochs`: Number of epochs, i.e. passages trough the whole training sample [def: `200`]\n  * `batch_size`: Size of each individual batch [def: `8`]\n  * `opt_alg`: The optimisation algorithm to update the gradient at each batch [def: `BetaML.ADAM()`] See `subtypes(BetaML.OptimisationAlgorithm)` for supported optimizers\n  * `shuffle`: Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]\n  * `tunemethod`: The method - and its parameters - to employ for hyperparameters autotuning. See [`SuccessiveHalvingSearch`](@ref) for the default method. To implement automatic hyperparameter tuning during the (first) `fit!` call simply set `autotune=true` and eventually change the default `tunemethod` options (including the parameter ranges, the resources to employ and the loss function to adopt).\n\n  * `descr`: An optional title and/or description for this model\n  * `rng`: Random Number Generator (see [`FIXEDSEED`](@ref)) [deafult: `Random.GLOBAL_RNG`]\n\n# Notes:\n\n  * data must be numerical\n  * use `transform` to obtain the encoded data, and `inverse_trasnform` to decode to the original data\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y        = @load_iris;\n\njulia> modelType   = @load AutoEncoder pkg = \"BetaML\" verbosity=0;\n\njulia> model       = modelType(outdims=2,innerdims=10);\n\njulia> mach        = machine(model, X)\nuntrained Machine; caches model-specific representations of data\n  model: AutoEncoder(e_layers = nothing, …)\n  args: \n    1:\tSource @334 ⏎ Table{AbstractVector{Continuous}}\n\njulia> fit!(mach,verbosity=2)\n[ Info: Training machine(AutoEncoder(e_layers = nothing, …), …).\n***\n*** Training  for 200 epochs with algorithm BetaML.Nn.ADAM.\nTraining.. \t avg loss on epoch 1 (1): \t 35.48243542158747\nTraining.. \t avg loss on epoch 20 (20): \t 0.07528042222678126\nTraining.. \t avg loss on epoch 40 (40): \t 0.06293071729378613\nTraining.. \t avg loss on epoch 60 (60): \t 0.057035588828991145\nTraining.. \t avg loss on epoch 80 (80): \t 0.056313167754822875\nTraining.. \t avg loss on epoch 100 (100): \t 0.055521461091809436\nTraining the Neural Network...  52%|██████████████████████████████████████                                   |  ETA: 0:00:01Training.. \t avg loss on epoch 120 (120): \t 0.06015206472927942\nTraining.. \t avg loss on epoch 140 (140): \t 0.05536835903285201\nTraining.. \t avg loss on epoch 160 (160): \t 0.05877560142428245\nTraining.. \t avg loss on epoch 180 (180): \t 0.05476302769966953\nTraining.. \t avg loss on epoch 200 (200): \t 0.049240864053557445\nTraining the Neural Network... 100%|█████████████████████████████████████████████████████████████████████████| Time: 0:00:01\nTraining of 200 epoch completed. Final epoch error: 0.049240864053557445.\ntrained Machine; caches model-specific representations of data\n  model: AutoEncoder(e_layers = nothing, …)\n  args: \n    1:\tSource @334 ⏎ Table{AbstractVector{Continuous}}\n\n\njulia> X_latent    = transform(mach, X)\n150×2 Matrix{Float64}:\n 7.01701   -2.77285\n 6.50615   -2.9279\n 6.5233    -2.60754\n ⋮        \n 6.70196  -10.6059\n 6.46369  -11.1117\n 6.20212  -10.1323\n\njulia> X_recovered = inverse_transform(mach,X_latent)\n150×4 Matrix{Float64}:\n 5.04973  3.55838  1.43251  0.242215\n 4.73689  3.19985  1.44085  0.295257\n 4.65128  3.25308  1.30187  0.244354\n ⋮                          \n 6.50077  2.93602  5.3303   1.87647\n 6.38639  2.83864  5.54395  2.04117\n 6.01595  2.67659  5.03669  1.83234\n\njulia> BetaML.relative_mean_error(MLJ.matrix(X),X_recovered)\n0.03387721261716176\n\n\n```\n"
+":name" = "AutoEncoder"
+":human_name" = "auto encoder"
+":is_supervised" = "`false`"
+":prediction_type" = ":unknown"
+":abstract_type" = "`MLJModelInterface.Unsupervised`"
+":implemented_methods" = [":fit", ":inverse_transform", ":transform"]
+":hyperparameters" = "`(:e_layers, :d_layers, :outdims, :innerdims, :loss, :dloss, :epochs, :batch_size, :opt_alg, :shuffle, :tunemethod, :descr, :rng)`"
+":hyperparameter_types" = "`(\"Union{Nothing, Vector{BetaML.Nn.AbstractLayer}}\", \"Union{Nothing, Vector{BetaML.Nn.AbstractLayer}}\", \"Union{Float64, Int64}\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Int64\", \"Int64\", \"BetaML.Nn.OptimisationAlgorithm\", \"Bool\", \"BetaML.Api.AutoTuneMethod\", \"String\", \"Random.AbstractRNG\")`"
+":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
+":iteration_parameter" = "`nothing`"
+":supports_training_losses" = "`false`"
+":reports_feature_importances" = "`false`"
+":deep_properties" = "`()`"
+":reporting_operations" = "`()`"
+
 [BetaML.DecisionTreeRegressor]
 ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}`"
 ":output_scitype" = "`ScientificTypesBase.Unknown`"
@@ -146,7 +214,7 @@
 ":is_pure_julia" = "`true`"
 ":package_name" = "BetaML"
 ":package_license" = "MIT"
-":load_path" = "BetaML.Trees.DecisionTreeRegressor"
+":load_path" = "BetaML.Bmlj.DecisionTreeRegressor"
 ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b"
 ":package_url" = "https://github.com/sylvaticus/BetaML.jl"
 ":is_wrapper" = "`false`"
@@ -169,7 +237,7 @@
 ":deep_properties" = "`()`"
 ":reporting_operations" = "`()`"
 
-[BetaML.LinearPerceptron]
+[BetaML.PegasosClassifier]
 ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Infinite}}`"
 ":output_scitype" = "`ScientificTypesBase.Unknown`"
 ":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`"
@@ -180,64 +248,64 @@
 ":is_pure_julia" = "`true`"
 ":package_name" = "BetaML"
 ":package_license" = "MIT"
-":load_path" = "BetaML.Perceptron.LinearPerceptron"
+":load_path" = "BetaML.Bmlj.PegasosClassifier"
 ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b"
 ":package_url" = "https://github.com/sylvaticus/BetaML.jl"
 ":is_wrapper" = "`false`"
 ":supports_weights" = "`false`"
 ":supports_class_weights" = "`false`"
 ":supports_online" = "`false`"
-":docstring" = "```julia\nmutable struct LinearPerceptron <: MLJModelInterface.Probabilistic\n```\n\nThe classical perceptron algorithm using one-vs-all for multiclass, from the Beta Machine Learning Toolkit (BetaML).\n\n# Hyperparameters:\n\n  * `initial_coefficients::Union{Nothing, Matrix{Float64}}`: N-classes by D-dimensions matrix of initial linear coefficients [def: `nothing`, i.e. zeros]\n  * `initial_constant::Union{Nothing, Vector{Float64}}`: N-classes vector of initial contant terms [def: `nothing`, i.e. zeros]\n  * `epochs::Int64`: Maximum number of epochs, i.e. passages trough the whole training sample [def: `1000`]\n  * `shuffle::Bool`: Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]\n  * `force_origin::Bool`: Whether to force the parameter associated with the constant term to remain zero [def: `false`]\n  * `return_mean_hyperplane::Bool`: Whether to return the average hyperplane coefficients instead of the final ones  [def: `false`]\n  * `rng::Random.AbstractRNG`: A Random Number Generator to be used in stochastic parts of the code [deafult: `Random.GLOBAL_RNG`]\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y        = @load_iris;\n\njulia> modelType   = @load LinearPerceptron pkg = \"BetaML\"\n[ Info: For silent loading, specify `verbosity=0`. \nimport BetaML ✔\nBetaML.Perceptron.LinearPerceptron\n\njulia> model       = modelType()\nLinearPerceptron(\n  initial_coefficients = nothing, \n  initial_constant = nothing, \n  epochs = 1000, \n  shuffle = true, \n  force_origin = false, \n  return_mean_hyperplane = false, \n  rng = Random._GLOBAL_RNG())\n\njulia> mach        = machine(model, X, y);\n\njulia> fit!(mach);\n[ Info: Training machine(LinearPerceptron(initial_coefficients = nothing, …), …).\n*** Avg. error after epoch 2 : 0.0 (all elements of the set has been correctly classified)\njulia> est_classes = predict(mach, X)\n150-element CategoricalDistributions.UnivariateFiniteVector{Multiclass{3}, String, UInt8, Float64}:\n UnivariateFinite{Multiclass{3}}(setosa=>1.0, versicolor=>2.53e-34, virginica=>0.0)\n UnivariateFinite{Multiclass{3}}(setosa=>1.0, versicolor=>1.27e-18, virginica=>1.86e-310)\n ⋮\n UnivariateFinite{Multiclass{3}}(setosa=>2.77e-57, versicolor=>1.1099999999999999e-82, virginica=>1.0)\n UnivariateFinite{Multiclass{3}}(setosa=>3.09e-22, versicolor=>4.03e-25, virginica=>1.0)\n```\n"
-":name" = "LinearPerceptron"
-":human_name" = "linear perceptron"
+":docstring" = "```julia\nmutable struct PegasosClassifier <: MLJModelInterface.Probabilistic\n```\n\nThe gradient-based linear \"pegasos\" classifier using one-vs-all for multiclass, from the Beta Machine Learning Toolkit (BetaML).\n\n# Hyperparameters:\n\n  * `initial_coefficients::Union{Nothing, Matrix{Float64}}`: N-classes by D-dimensions matrix of initial linear coefficients [def: `nothing`, i.e. zeros]\n  * `initial_constant::Union{Nothing, Vector{Float64}}`: N-classes vector of initial contant terms [def: `nothing`, i.e. zeros]\n  * `learning_rate::Function`: Learning rate [def: (epoch -> 1/sqrt(epoch))]\n  * `learning_rate_multiplicative::Float64`: Multiplicative term of the learning rate [def: `0.5`]\n  * `epochs::Int64`: Maximum number of epochs, i.e. passages trough the whole training sample [def: `1000`]\n  * `shuffle::Bool`: Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]\n  * `force_origin::Bool`: Whether to force the parameter associated with the constant term to remain zero [def: `false`]\n  * `return_mean_hyperplane::Bool`: Whether to return the average hyperplane coefficients instead of the final ones  [def: `false`]\n  * `rng::Random.AbstractRNG`: A Random Number Generator to be used in stochastic parts of the code [deafult: `Random.GLOBAL_RNG`]\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y        = @load_iris;\n\njulia> modelType   = @load PegasosClassifier pkg = \"BetaML\" verbosity=0\nBetaML.Perceptron.PegasosClassifier\n\njulia> model       = modelType()\nPegasosClassifier(\n  initial_coefficients = nothing, \n  initial_constant = nothing, \n  learning_rate = BetaML.Perceptron.var\"#71#73\"(), \n  learning_rate_multiplicative = 0.5, \n  epochs = 1000, \n  shuffle = true, \n  force_origin = false, \n  return_mean_hyperplane = false, \n  rng = Random._GLOBAL_RNG())\n\njulia> mach        = machine(model, X, y);\n\njulia> fit!(mach);\n\njulia> est_classes = predict(mach, X)\n150-element CategoricalDistributions.UnivariateFiniteVector{Multiclass{3}, String, UInt8, Float64}:\n UnivariateFinite{Multiclass{3}}(setosa=>0.817, versicolor=>0.153, virginica=>0.0301)\n UnivariateFinite{Multiclass{3}}(setosa=>0.791, versicolor=>0.177, virginica=>0.0318)\n ⋮\n UnivariateFinite{Multiclass{3}}(setosa=>0.254, versicolor=>0.5, virginica=>0.246)\n UnivariateFinite{Multiclass{3}}(setosa=>0.283, versicolor=>0.51, virginica=>0.207)\n```\n"
+":name" = "PegasosClassifier"
+":human_name" = "pegasos classifier"
 ":is_supervised" = "`true`"
 ":prediction_type" = ":probabilistic"
 ":abstract_type" = "`MLJModelInterface.Probabilistic`"
 ":implemented_methods" = [":fit", ":predict"]
-":hyperparameters" = "`(:initial_coefficients, :initial_constant, :epochs, :shuffle, :force_origin, :return_mean_hyperplane, :rng)`"
-":hyperparameter_types" = "`(\"Union{Nothing, Matrix{Float64}}\", \"Union{Nothing, Vector{Float64}}\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Random.AbstractRNG\")`"
-":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
+":hyperparameters" = "`(:initial_coefficients, :initial_constant, :learning_rate, :learning_rate_multiplicative, :epochs, :shuffle, :force_origin, :return_mean_hyperplane, :rng)`"
+":hyperparameter_types" = "`(\"Union{Nothing, Matrix{Float64}}\", \"Union{Nothing, Vector{Float64}}\", \"Function\", \"Float64\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Random.AbstractRNG\")`"
+":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
 ":iteration_parameter" = "`nothing`"
 ":supports_training_losses" = "`false`"
 ":reports_feature_importances" = "`false`"
 ":deep_properties" = "`()`"
 ":reporting_operations" = "`()`"
 
-[BetaML.Pegasos]
-":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Infinite}}`"
+[BetaML.NeuralNetworkRegressor]
+":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`"
 ":output_scitype" = "`ScientificTypesBase.Unknown`"
-":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`"
-":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Infinite}}, AbstractVector{<:ScientificTypesBase.Finite}}`"
-":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`"
+":target_scitype" = "`AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}`"
+":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`"
+":predict_scitype" = "`AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}`"
 ":transform_scitype" = "`ScientificTypesBase.Unknown`"
 ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`"
 ":is_pure_julia" = "`true`"
 ":package_name" = "BetaML"
 ":package_license" = "MIT"
-":load_path" = "BetaML.Perceptron.Pegasos"
+":load_path" = "BetaML.Bmlj.NeuralNetworkRegressor"
 ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b"
 ":package_url" = "https://github.com/sylvaticus/BetaML.jl"
 ":is_wrapper" = "`false`"
 ":supports_weights" = "`false`"
 ":supports_class_weights" = "`false`"
 ":supports_online" = "`false`"
-":docstring" = "```julia\nmutable struct Pegasos <: MLJModelInterface.Probabilistic\n```\n\nThe gradient-based linear \"pegasos\" classifier using one-vs-all for multiclass, from the Beta Machine Learning Toolkit (BetaML).\n\n# Hyperparameters:\n\n  * `initial_coefficients::Union{Nothing, Matrix{Float64}}`: N-classes by D-dimensions matrix of initial linear coefficients [def: `nothing`, i.e. zeros]\n  * `initial_constant::Union{Nothing, Vector{Float64}}`: N-classes vector of initial contant terms [def: `nothing`, i.e. zeros]\n  * `learning_rate::Function`: Learning rate [def: (epoch -> 1/sqrt(epoch))]\n  * `learning_rate_multiplicative::Float64`: Multiplicative term of the learning rate [def: `0.5`]\n  * `epochs::Int64`: Maximum number of epochs, i.e. passages trough the whole training sample [def: `1000`]\n  * `shuffle::Bool`: Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]\n  * `force_origin::Bool`: Whether to force the parameter associated with the constant term to remain zero [def: `false`]\n  * `return_mean_hyperplane::Bool`: Whether to return the average hyperplane coefficients instead of the final ones  [def: `false`]\n  * `rng::Random.AbstractRNG`: A Random Number Generator to be used in stochastic parts of the code [deafult: `Random.GLOBAL_RNG`]\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y        = @load_iris;\n\njulia> modelType   = @load Pegasos pkg = \"BetaML\" verbosity=0\nBetaML.Perceptron.Pegasos\n\njulia> model       = modelType()\nPegasos(\n  initial_coefficients = nothing, \n  initial_constant = nothing, \n  learning_rate = BetaML.Perceptron.var\"#71#73\"(), \n  learning_rate_multiplicative = 0.5, \n  epochs = 1000, \n  shuffle = true, \n  force_origin = false, \n  return_mean_hyperplane = false, \n  rng = Random._GLOBAL_RNG())\n\njulia> mach        = machine(model, X, y);\n\njulia> fit!(mach);\n\njulia> est_classes = predict(mach, X)\n150-element CategoricalDistributions.UnivariateFiniteVector{Multiclass{3}, String, UInt8, Float64}:\n UnivariateFinite{Multiclass{3}}(setosa=>0.817, versicolor=>0.153, virginica=>0.0301)\n UnivariateFinite{Multiclass{3}}(setosa=>0.791, versicolor=>0.177, virginica=>0.0318)\n ⋮\n UnivariateFinite{Multiclass{3}}(setosa=>0.254, versicolor=>0.5, virginica=>0.246)\n UnivariateFinite{Multiclass{3}}(setosa=>0.283, versicolor=>0.51, virginica=>0.207)\n```\n"
-":name" = "Pegasos"
-":human_name" = "pegasos"
+":docstring" = "```julia\nmutable struct NeuralNetworkRegressor <: MLJModelInterface.Deterministic\n```\n\nA simple but flexible Feedforward Neural Network, from the Beta Machine Learning Toolkit (BetaML) for regression of a single dimensional target.\n\n# Parameters:\n\n  * `layers`: Array of layer objects [def: `nothing`, i.e. basic network]. See `subtypes(BetaML.AbstractLayer)` for supported layers\n  * `loss`: Loss (cost) function [def: `BetaML.squared_cost`]. Should always assume y and ŷ as matrices, even if the regression task is 1-D\n\n    !!! warning\n        If you change the parameter `loss`, you need to either provide its derivative on the parameter `dloss` or use autodiff with `dloss=nothing`.\n\n  * `dloss`: Derivative of the loss function [def: `BetaML.dsquared_cost`, i.e. use the derivative of the squared cost]. Use `nothing` for autodiff.\n  * `epochs`: Number of epochs, i.e. passages trough the whole training sample [def: `200`]\n  * `batch_size`: Size of each individual batch [def: `16`]\n  * `opt_alg`: The optimisation algorithm to update the gradient at each batch [def: `BetaML.ADAM()`]. See `subtypes(BetaML.OptimisationAlgorithm)` for supported optimizers\n  * `shuffle`: Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]\n  * `descr`: An optional title and/or description for this model\n  * `cb`: A call back function to provide information during training [def: `fitting_info`]\n  * `rng`: Random Number Generator (see [`FIXEDSEED`](@ref)) [deafult: `Random.GLOBAL_RNG`]\n\n# Notes:\n\n  * data must be numerical\n  * the label should be be a *n-records* vector.\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y        = @load_boston;\n\njulia> modelType   = @load NeuralNetworkRegressor pkg = \"BetaML\" verbosity=0\nBetaML.Nn.NeuralNetworkRegressor\n\njulia> layers                      = [BetaML.DenseLayer(12,20,f=BetaML.relu),BetaML.DenseLayer(20,20,f=BetaML.relu),BetaML.DenseLayer(20,1,f=BetaML.relu)];\n\njulia> model       = modelType(layers=layers,opt_alg=BetaML.ADAM());\nNeuralNetworkRegressor(\n  layers = BetaML.Nn.AbstractLayer[BetaML.Nn.DenseLayer([-0.23249759178069676 -0.4125090172711131 … 0.41401934928739 -0.33017881111237535; -0.27912169279319965 0.270551221249931 … 0.19258414323473344 0.1703002982374256; … ; 0.31186742456482447 0.14776438287394805 … 0.3624993442655036 0.1438885872964824; 0.24363744610286758 -0.3221033024934767 … 0.14886090419299408 0.038411663101909355], [-0.42360286004241765, -0.34355377040029594, 0.11510963232946697, 0.29078650404397893, -0.04940236502546075, 0.05142849152316714, -0.177685375947775, 0.3857630523957018, -0.25454667127064756, -0.1726731848206195, 0.29832456225553444, -0.21138505291162835, -0.15763643112604903, -0.08477044513587562, -0.38436681165349196, 0.20538016429104916, -0.25008157754468335, 0.268681800562054, 0.10600581996650865, 0.4262194464325672], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.08534180387478185 0.19659398307677617 … -0.3413633217504578 -0.0484925247381256; 0.0024419192794883915 -0.14614102508129 … -0.21912059923003044 0.2680725396694708; … ; 0.25151545823147886 -0.27532269951606037 … 0.20739970895058063 0.2891938885916349; -0.1699020711688904 -0.1350423717084296 … 0.16947589410758873 0.3629006047373296], [0.2158116357688406, -0.3255582642532289, -0.057314442103850394, 0.29029696770539953, 0.24994080694366455, 0.3624239027782297, -0.30674318230919984, -0.3854738338935017, 0.10809721838554087, 0.16073511121016176, -0.005923262068960489, 0.3157147976348795, -0.10938918304264739, -0.24521229198853187, -0.307167732178712, 0.0808907777008302, -0.014577497150872254, -0.0011287181458157214, 0.07522282588658086, 0.043366500526073104], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.021367697115938555 -0.28326652172347155 … 0.05346175368370165 -0.26037328415871647], [-0.2313659199724562], BetaML.Utils.relu, BetaML.Utils.drelu)], \n  loss = BetaML.Utils.squared_cost, \n  dloss = BetaML.Utils.dsquared_cost, \n  epochs = 100, \n  batch_size = 32, \n  opt_alg = BetaML.Nn.ADAM(BetaML.Nn.var\"#90#93\"(), 1.0, 0.9, 0.999, 1.0e-8, BetaML.Nn.Learnable[], BetaML.Nn.Learnable[]), \n  shuffle = true, \n  descr = \"\", \n  cb = BetaML.Nn.fitting_info, \n  rng = Random._GLOBAL_RNG())\n\njulia> mach        = machine(model, X, y);\n\njulia> fit!(mach);\n\njulia> ŷ    = predict(mach, X);\n\njulia> hcat(y,ŷ)\n506×2 Matrix{Float64}:\n 24.0  30.7726\n 21.6  28.0811\n 34.7  31.3194\n  ⋮    \n 23.9  30.9032\n 22.0  29.49\n 11.9  27.2438\n```\n"
+":name" = "NeuralNetworkRegressor"
+":human_name" = "neural network regressor"
 ":is_supervised" = "`true`"
-":prediction_type" = ":probabilistic"
-":abstract_type" = "`MLJModelInterface.Probabilistic`"
+":prediction_type" = ":deterministic"
+":abstract_type" = "`MLJModelInterface.Deterministic`"
 ":implemented_methods" = [":fit", ":predict"]
-":hyperparameters" = "`(:initial_coefficients, :initial_constant, :learning_rate, :learning_rate_multiplicative, :epochs, :shuffle, :force_origin, :return_mean_hyperplane, :rng)`"
-":hyperparameter_types" = "`(\"Union{Nothing, Matrix{Float64}}\", \"Union{Nothing, Vector{Float64}}\", \"Function\", \"Float64\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Random.AbstractRNG\")`"
-":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
+":hyperparameters" = "`(:layers, :loss, :dloss, :epochs, :batch_size, :opt_alg, :shuffle, :descr, :cb, :rng)`"
+":hyperparameter_types" = "`(\"Union{Nothing, Vector{BetaML.Nn.AbstractLayer}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Int64\", \"Int64\", \"BetaML.Nn.OptimisationAlgorithm\", \"Bool\", \"String\", \"Function\", \"Random.AbstractRNG\")`"
+":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
 ":iteration_parameter" = "`nothing`"
 ":supports_training_losses" = "`false`"
 ":reports_feature_importances" = "`false`"
 ":deep_properties" = "`()`"
 ":reporting_operations" = "`()`"
 
-[BetaML.KMedoids]
+[BetaML.KMeansClusterer]
 ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`"
 ":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`"
 ":target_scitype" = "`AbstractArray{<:ScientificTypesBase.Multiclass}`"
@@ -248,16 +316,16 @@
 ":is_pure_julia" = "`true`"
 ":package_name" = "BetaML"
 ":package_license" = "MIT"
-":load_path" = "BetaML.Clustering.KMedoids"
+":load_path" = "BetaML.Bmlj.KMeansClusterer"
 ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b"
 ":package_url" = "https://github.com/sylvaticus/BetaML.jl"
 ":is_wrapper" = "`false`"
 ":supports_weights" = "`false`"
 ":supports_class_weights" = "`false`"
 ":supports_online" = "`false`"
-":docstring" = "```julia\nmutable struct KMedoids <: MLJModelInterface.Unsupervised\n```\n\n# Parameters:\n\n  * `n_classes::Int64`: Number of classes to discriminate the data [def: 3]\n  * `dist::Function`: Function to employ as distance. Default to the Euclidean distance. Can be one of the predefined distances (`l1_distance`, `l2_distance`, `l2squared_distance`),  `cosine_distance`), any user defined function accepting two vectors and returning a scalar or an anonymous function with the same characteristics.\n  * `initialisation_strategy::String`: The computation method of the vector of the initial representatives. One of the following:\n\n      * \"random\": randomly in the X space\n      * \"grid\": using a grid approach\n      * \"shuffle\": selecting randomly within the available points [default]\n      * \"given\": using a provided set of initial representatives provided in the `initial_representatives` parameter\n\n  * `initial_representatives::Union{Nothing, Matrix{Float64}}`: Provided (K x D) matrix of initial representatives (useful only with `initialisation_strategy=\"given\"`) [default: `nothing`]\n  * `rng::Random.AbstractRNG`: Random Number Generator [deafult: `Random.GLOBAL_RNG`]\n\nThe K-medoids clustering algorithm with customisable distance function, from the Beta Machine Learning Toolkit (BetaML).\n\nSimilar to K-Means, but the \"representatives\" (the cetroids) are guaranteed to be one of the training points. The algorithm work with any arbitrary distance measure.\n\n# Notes:\n\n  * data must be numerical\n  * online fitting (re-fitting with new data) is supported\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y        = @load_iris;\n\njulia> modelType   = @load KMedoids pkg = \"BetaML\" verbosity=0\nBetaML.Clustering.KMedoids\n\njulia> model       = modelType()\nKMedoids(\n  n_classes = 3, \n  dist = BetaML.Clustering.var\"#39#41\"(), \n  initialisation_strategy = \"shuffle\", \n  initial_representatives = nothing, \n  rng = Random._GLOBAL_RNG())\n\njulia> mach        = machine(model, X);\n\njulia> fit!(mach);\n[ Info: Training machine(KMedoids(n_classes = 3, …), …).\n\njulia> classes_est = predict(mach, X);\n\njulia> hcat(y,classes_est)\n150×2 CategoricalArrays.CategoricalArray{Union{Int64, String},2,UInt32}:\n \"setosa\"     3\n \"setosa\"     3\n \"setosa\"     3\n ⋮            \n \"virginica\"  1\n \"virginica\"  1\n \"virginica\"  2\n```\n"
-":name" = "KMedoids"
-":human_name" = "k medoids"
+":docstring" = "```julia\nmutable struct KMeansClusterer <: MLJModelInterface.Unsupervised\n```\n\nThe classical KMeansClusterer clustering algorithm, from the Beta Machine Learning Toolkit (BetaML).\n\n# Parameters:\n\n  * `n_classes::Int64`: Number of classes to discriminate the data [def: 3]\n  * `dist::Function`: Function to employ as distance. Default to the Euclidean distance. Can be one of the predefined distances (`l1_distance`, `l2_distance`, `l2squared_distance`),  `cosine_distance`), any user defined function accepting two vectors and returning a scalar or an anonymous function with the same characteristics. Attention that, contrary to `KMedoidsClusterer`, the `KMeansClusterer` algorithm is not guaranteed to converge with other distances than the Euclidean one.\n  * `initialisation_strategy::String`: The computation method of the vector of the initial representatives. One of the following:\n\n      * \"random\": randomly in the X space\n      * \"grid\": using a grid approach\n      * \"shuffle\": selecting randomly within the available points [default]\n      * \"given\": using a provided set of initial representatives provided in the `initial_representatives` parameter\n\n  * `initial_representatives::Union{Nothing, Matrix{Float64}}`: Provided (K x D) matrix of initial representatives (useful only with `initialisation_strategy=\"given\"`) [default: `nothing`]\n  * `rng::Random.AbstractRNG`: Random Number Generator [deafult: `Random.GLOBAL_RNG`]\n\n# Notes:\n\n  * data must be numerical\n  * online fitting (re-fitting with new data) is supported\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y        = @load_iris;\n\njulia> modelType   = @load KMeansClusterer pkg = \"BetaML\" verbosity=0\nBetaML.Clustering.KMeansClusterer\n\njulia> model       = modelType()\nKMeansClusterer(\n  n_classes = 3, \n  dist = BetaML.Clustering.var\"#34#36\"(), \n  initialisation_strategy = \"shuffle\", \n  initial_representatives = nothing, \n  rng = Random._GLOBAL_RNG())\n\njulia> mach        = machine(model, X);\n\njulia> fit!(mach);\n[ Info: Training machine(KMeansClusterer(n_classes = 3, …), …).\n\njulia> classes_est = predict(mach, X);\n\njulia> hcat(y,classes_est)\n150×2 CategoricalArrays.CategoricalArray{Union{Int64, String},2,UInt32}:\n \"setosa\"     2\n \"setosa\"     2\n \"setosa\"     2\n ⋮            \n \"virginica\"  3\n \"virginica\"  3\n \"virginica\"  1\n```\n"
+":name" = "KMeansClusterer"
+":human_name" = "k means clusterer"
 ":is_supervised" = "`false`"
 ":prediction_type" = ":unknown"
 ":abstract_type" = "`MLJModelInterface.Unsupervised`"
@@ -271,40 +339,6 @@
 ":deep_properties" = "`()`"
 ":reporting_operations" = "`()`"
 
-[BetaML.NeuralNetworkRegressor]
-":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`"
-":output_scitype" = "`ScientificTypesBase.Unknown`"
-":target_scitype" = "`AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}`"
-":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`"
-":predict_scitype" = "`AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}`"
-":transform_scitype" = "`ScientificTypesBase.Unknown`"
-":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`"
-":is_pure_julia" = "`true`"
-":package_name" = "BetaML"
-":package_license" = "MIT"
-":load_path" = "BetaML.Nn.NeuralNetworkRegressor"
-":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b"
-":package_url" = "https://github.com/sylvaticus/BetaML.jl"
-":is_wrapper" = "`false`"
-":supports_weights" = "`false`"
-":supports_class_weights" = "`false`"
-":supports_online" = "`false`"
-":docstring" = "```julia\nmutable struct NeuralNetworkRegressor <: MLJModelInterface.Deterministic\n```\n\nA simple but flexible Feedforward Neural Network, from the Beta Machine Learning Toolkit (BetaML) for regression of a single dimensional target.\n\n# Parameters:\n\n  * `layers`: Array of layer objects [def: `nothing`, i.e. basic network]. See `subtypes(BetaML.AbstractLayer)` for supported layers\n  * `loss`: Loss (cost) function [def: `BetaML.squared_cost`]. Should always assume y and ŷ as matrices, even if the regression task is 1-D\n\n    !!! warning\n        If you change the parameter `loss`, you need to either provide its derivative on the parameter `dloss` or use autodiff with `dloss=nothing`.\n\n  * `dloss`: Derivative of the loss function [def: `BetaML.dsquared_cost`, i.e. use the derivative of the squared cost]. Use `nothing` for autodiff.\n  * `epochs`: Number of epochs, i.e. passages trough the whole training sample [def: `200`]\n  * `batch_size`: Size of each individual batch [def: `16`]\n  * `opt_alg`: The optimisation algorithm to update the gradient at each batch [def: `BetaML.ADAM()`]. See `subtypes(BetaML.OptimisationAlgorithm)` for supported optimizers\n  * `shuffle`: Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]\n  * `descr`: An optional title and/or description for this model\n  * `cb`: A call back function to provide information during training [def: `fitting_info`]\n  * `rng`: Random Number Generator (see [`BetaML.FIXEDSEED`](@ref)) [deafult: `Random.GLOBAL_RNG`]\n\n# Notes:\n\n  * data must be numerical\n  * the label should be be a *n-records* vector.\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y        = @load_boston;\n\njulia> modelType   = @load NeuralNetworkRegressor pkg = \"BetaML\" verbosity=0\nBetaML.Nn.NeuralNetworkRegressor\n\njulia> layers                      = [BetaML.DenseLayer(12,20,f=BetaML.relu),BetaML.DenseLayer(20,20,f=BetaML.relu),BetaML.DenseLayer(20,1,f=BetaML.relu)];\n\njulia> model       = modelType(layers=layers,opt_alg=BetaML.ADAM());\nNeuralNetworkRegressor(\n  layers = BetaML.Nn.AbstractLayer[BetaML.Nn.DenseLayer([-0.23249759178069676 -0.4125090172711131 … 0.41401934928739 -0.33017881111237535; -0.27912169279319965 0.270551221249931 … 0.19258414323473344 0.1703002982374256; … ; 0.31186742456482447 0.14776438287394805 … 0.3624993442655036 0.1438885872964824; 0.24363744610286758 -0.3221033024934767 … 0.14886090419299408 0.038411663101909355], [-0.42360286004241765, -0.34355377040029594, 0.11510963232946697, 0.29078650404397893, -0.04940236502546075, 0.05142849152316714, -0.177685375947775, 0.3857630523957018, -0.25454667127064756, -0.1726731848206195, 0.29832456225553444, -0.21138505291162835, -0.15763643112604903, -0.08477044513587562, -0.38436681165349196, 0.20538016429104916, -0.25008157754468335, 0.268681800562054, 0.10600581996650865, 0.4262194464325672], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.08534180387478185 0.19659398307677617 … -0.3413633217504578 -0.0484925247381256; 0.0024419192794883915 -0.14614102508129 … -0.21912059923003044 0.2680725396694708; … ; 0.25151545823147886 -0.27532269951606037 … 0.20739970895058063 0.2891938885916349; -0.1699020711688904 -0.1350423717084296 … 0.16947589410758873 0.3629006047373296], [0.2158116357688406, -0.3255582642532289, -0.057314442103850394, 0.29029696770539953, 0.24994080694366455, 0.3624239027782297, -0.30674318230919984, -0.3854738338935017, 0.10809721838554087, 0.16073511121016176, -0.005923262068960489, 0.3157147976348795, -0.10938918304264739, -0.24521229198853187, -0.307167732178712, 0.0808907777008302, -0.014577497150872254, -0.0011287181458157214, 0.07522282588658086, 0.043366500526073104], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.021367697115938555 -0.28326652172347155 … 0.05346175368370165 -0.26037328415871647], [-0.2313659199724562], BetaML.Utils.relu, BetaML.Utils.drelu)], \n  loss = BetaML.Utils.squared_cost, \n  dloss = BetaML.Utils.dsquared_cost, \n  epochs = 100, \n  batch_size = 32, \n  opt_alg = BetaML.Nn.ADAM(BetaML.Nn.var\"#90#93\"(), 1.0, 0.9, 0.999, 1.0e-8, BetaML.Nn.Learnable[], BetaML.Nn.Learnable[]), \n  shuffle = true, \n  descr = \"\", \n  cb = BetaML.Nn.fitting_info, \n  rng = Random._GLOBAL_RNG())\n\njulia> mach        = machine(model, X, y);\n\njulia> fit!(mach);\n\njulia> ŷ    = predict(mach, X);\n\njulia> hcat(y,ŷ)\n506×2 Matrix{Float64}:\n 24.0  30.7726\n 21.6  28.0811\n 34.7  31.3194\n  ⋮    \n 23.9  30.9032\n 22.0  29.49\n 11.9  27.2438\n```\n"
-":name" = "NeuralNetworkRegressor"
-":human_name" = "neural network regressor"
-":is_supervised" = "`true`"
-":prediction_type" = ":deterministic"
-":abstract_type" = "`MLJModelInterface.Deterministic`"
-":implemented_methods" = [":fit", ":predict"]
-":hyperparameters" = "`(:layers, :loss, :dloss, :epochs, :batch_size, :opt_alg, :shuffle, :descr, :cb, :rng)`"
-":hyperparameter_types" = "`(\"Union{Nothing, Vector{BetaML.Nn.AbstractLayer}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Int64\", \"Int64\", \"BetaML.Nn.OptimisationAlgorithm\", \"Bool\", \"String\", \"Function\", \"Random.AbstractRNG\")`"
-":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
-":iteration_parameter" = "`nothing`"
-":supports_training_losses" = "`false`"
-":reports_feature_importances" = "`false`"
-":deep_properties" = "`()`"
-":reporting_operations" = "`()`"
-
 [BetaML.MultitargetGaussianMixtureRegressor]
 ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Infinite}}}`"
 ":output_scitype" = "`ScientificTypesBase.Unknown`"
@@ -316,7 +350,7 @@
 ":is_pure_julia" = "`true`"
 ":package_name" = "BetaML"
 ":package_license" = "MIT"
-":load_path" = "BetaML.GMM.MultitargetGaussianMixtureRegressor"
+":load_path" = "BetaML.Bmlj.MultitargetGaussianMixtureRegressor"
 ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b"
 ":package_url" = "https://github.com/sylvaticus/BetaML.jl"
 ":is_wrapper" = "`false`"
@@ -350,7 +384,7 @@
 ":is_pure_julia" = "`true`"
 ":package_name" = "BetaML"
 ":package_license" = "MIT"
-":load_path" = "BetaML.GMM.GaussianMixtureRegressor"
+":load_path" = "BetaML.Bmlj.GaussianMixtureRegressor"
 ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b"
 ":package_url" = "https://github.com/sylvaticus/BetaML.jl"
 ":is_wrapper" = "`false`"
@@ -384,14 +418,14 @@
 ":is_pure_julia" = "`true`"
 ":package_name" = "BetaML"
 ":package_license" = "MIT"
-":load_path" = "BetaML.Nn.MultitargetNeuralNetworkRegressor"
+":load_path" = "BetaML.Bmlj.MultitargetNeuralNetworkRegressor"
 ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b"
 ":package_url" = "https://github.com/sylvaticus/BetaML.jl"
 ":is_wrapper" = "`false`"
 ":supports_weights" = "`false`"
 ":supports_class_weights" = "`false`"
 ":supports_online" = "`false`"
-":docstring" = "```julia\nmutable struct MultitargetNeuralNetworkRegressor <: MLJModelInterface.Deterministic\n```\n\nA simple but flexible Feedforward Neural Network, from the Beta Machine Learning Toolkit (BetaML) for regression of multiple dimensional targets.\n\n# Parameters:\n\n  * `layers`: Array of layer objects [def: `nothing`, i.e. basic network]. See `subtypes(BetaML.AbstractLayer)` for supported layers\n  * `loss`: Loss (cost) function [def: `BetaML.squared_cost`].  Should always assume y and ŷ as matrices.\n\n    !!! warning\n        If you change the parameter `loss`, you need to either provide its derivative on the parameter `dloss` or use autodiff with `dloss=nothing`.\n\n  * `dloss`: Derivative of the loss function [def: `BetaML.dsquared_cost`, i.e. use the derivative of the squared cost]. Use `nothing` for autodiff.\n  * `epochs`: Number of epochs, i.e. passages trough the whole training sample [def: `300`]\n  * `batch_size`: Size of each individual batch [def: `16`]\n  * `opt_alg`: The optimisation algorithm to update the gradient at each batch [def: `BetaML.ADAM()`]. See `subtypes(BetaML.OptimisationAlgorithm)` for supported optimizers\n  * `shuffle`: Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]\n  * `descr`: An optional title and/or description for this model\n  * `cb`: A call back function to provide information during training [def: `BetaML.fitting_info`]\n  * `rng`: Random Number Generator (see [`BetaML.FIXEDSEED`](@ref)) [deafult: `Random.GLOBAL_RNG`]\n\n# Notes:\n\n  * data must be numerical\n  * the label should be a *n-records* by *n-dimensions* matrix\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y        = @load_boston;\n\njulia> ydouble     = hcat(y, y .*2  .+5);\n\njulia> modelType   = @load MultitargetNeuralNetworkRegressor pkg = \"BetaML\" verbosity=0\nBetaML.Nn.MultitargetNeuralNetworkRegressor\n\njulia> layers                      = [BetaML.DenseLayer(12,50,f=BetaML.relu),BetaML.DenseLayer(50,50,f=BetaML.relu),BetaML.DenseLayer(50,50,f=BetaML.relu),BetaML.DenseLayer(50,2,f=BetaML.relu)];\n\njulia> model       = modelType(layers=layers,opt_alg=BetaML.ADAM(),epochs=500)\nMultitargetNeuralNetworkRegressor(\n  layers = BetaML.Nn.AbstractLayer[BetaML.Nn.DenseLayer([-0.2591582523441157 -0.027962845131416225 … 0.16044535560124418 -0.12838827994676857; -0.30381834909561184 0.2405495243851402 … -0.2588144861880588 0.09538577909777807; … ; -0.017320292924711156 -0.14042266424603767 … 0.06366999105841187 -0.13419651752478906; 0.07393079961409338 0.24521350531110264 … 0.04256867886217541 -0.0895506802948175], [0.14249427336553644, 0.24719379413682485, -0.25595911822556566, 0.10034088778965933, -0.017086404878505712, 0.21932184025609347, -0.031413516834861266, -0.12569076082247596, -0.18080140982481183, 0.14551901873323253  …  -0.13321995621967364, 0.2436582233332092, 0.0552222336976439, 0.07000814133633904, 0.2280064379660025, -0.28885681475734193, -0.07414214246290696, -0.06783184733650621, -0.055318068046308455, -0.2573488383282579], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.0395424111703751 -0.22531232360829911 … -0.04341228943744482 0.024336206858365517; -0.16481887432946268 0.17798073384748508 … -0.18594039305095766 0.051159225856547474; … ; -0.011639475293705043 -0.02347011206244673 … 0.20508869536159186 -0.1158382446274592; -0.19078069527757857 -0.007487540070740484 … -0.21341165344291158 -0.24158671316310726], [-0.04283623889330032, 0.14924461547060602, -0.17039563392959683, 0.00907774027816255, 0.21738885963113852, -0.06308040225941691, -0.14683286822101105, 0.21726892197970937, 0.19784321784707126, -0.0344988665714947  …  -0.23643089430602846, -0.013560425201427584, 0.05323948910726356, -0.04644175812567475, -0.2350400292671211, 0.09628312383424742, 0.07016420995205697, -0.23266392927140334, -0.18823664451487, 0.2304486691429084], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.11504184627266828 0.08601794194664503 … 0.03843129724045469 -0.18417305624127284; 0.10181551438831654 0.13459759904443674 … 0.11094951365942118 -0.1549466590355218; … ; 0.15279817525427697 0.0846661196058916 … -0.07993619892911122 0.07145402617285884; -0.1614160186346092 -0.13032002335149 … -0.12310552194729624 -0.15915773071049827], [-0.03435885900946367, -0.1198543931290306, 0.008454985905194445, -0.17980887188986966, -0.03557204910359624, 0.19125847393334877, -0.10949700778538696, -0.09343206702591, -0.12229583511781811, -0.09123969069220564  …  0.22119233518322862, 0.2053873143308657, 0.12756489387198222, 0.11567243705173319, -0.20982445664020496, 0.1595157838386987, -0.02087331046544119, -0.20556423263489765, -0.1622837764237961, -0.019220998739847395], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.25796717031347993 0.17579536633402948 … -0.09992960168785256 -0.09426177454620635; -0.026436330246675632 0.18070899284865127 … -0.19310119102392206 -0.06904005900252091], [0.16133004882307822, -0.3061228721091248], BetaML.Utils.relu, BetaML.Utils.drelu)], \n  loss = BetaML.Utils.squared_cost, \n  dloss = BetaML.Utils.dsquared_cost, \n  epochs = 500, \n  batch_size = 32, \n  opt_alg = BetaML.Nn.ADAM(BetaML.Nn.var\"#90#93\"(), 1.0, 0.9, 0.999, 1.0e-8, BetaML.Nn.Learnable[], BetaML.Nn.Learnable[]), \n  shuffle = true, \n  descr = \"\", \n  cb = BetaML.Nn.fitting_info, \n  rng = Random._GLOBAL_RNG())\n\njulia> mach        = machine(model, X, ydouble);\n\njulia> fit!(mach);\n\njulia> ŷdouble    = predict(mach, X);\n\njulia> hcat(ydouble,ŷdouble)\n506×4 Matrix{Float64}:\n 24.0  53.0  28.4624  62.8607\n 21.6  48.2  22.665   49.7401\n 34.7  74.4  31.5602  67.9433\n 33.4  71.8  33.0869  72.4337\n  ⋮                   \n 23.9  52.8  23.3573  50.654\n 22.0  49.0  22.1141  48.5926\n 11.9  28.8  19.9639  45.5823\n```\n"
+":docstring" = "```julia\nmutable struct MultitargetNeuralNetworkRegressor <: MLJModelInterface.Deterministic\n```\n\nA simple but flexible Feedforward Neural Network, from the Beta Machine Learning Toolkit (BetaML) for regression of multiple dimensional targets.\n\n# Parameters:\n\n  * `layers`: Array of layer objects [def: `nothing`, i.e. basic network]. See `subtypes(BetaML.AbstractLayer)` for supported layers\n  * `loss`: Loss (cost) function [def: `BetaML.squared_cost`].  Should always assume y and ŷ as matrices.\n\n    !!! warning\n        If you change the parameter `loss`, you need to either provide its derivative on the parameter `dloss` or use autodiff with `dloss=nothing`.\n\n  * `dloss`: Derivative of the loss function [def: `BetaML.dsquared_cost`, i.e. use the derivative of the squared cost]. Use `nothing` for autodiff.\n  * `epochs`: Number of epochs, i.e. passages trough the whole training sample [def: `300`]\n  * `batch_size`: Size of each individual batch [def: `16`]\n  * `opt_alg`: The optimisation algorithm to update the gradient at each batch [def: `BetaML.ADAM()`]. See `subtypes(BetaML.OptimisationAlgorithm)` for supported optimizers\n  * `shuffle`: Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]\n  * `descr`: An optional title and/or description for this model\n  * `cb`: A call back function to provide information during training [def: `BetaML.fitting_info`]\n  * `rng`: Random Number Generator (see [`FIXEDSEED`](@ref)) [deafult: `Random.GLOBAL_RNG`]\n\n# Notes:\n\n  * data must be numerical\n  * the label should be a *n-records* by *n-dimensions* matrix\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y        = @load_boston;\n\njulia> ydouble     = hcat(y, y .*2  .+5);\n\njulia> modelType   = @load MultitargetNeuralNetworkRegressor pkg = \"BetaML\" verbosity=0\nBetaML.Nn.MultitargetNeuralNetworkRegressor\n\njulia> layers                      = [BetaML.DenseLayer(12,50,f=BetaML.relu),BetaML.DenseLayer(50,50,f=BetaML.relu),BetaML.DenseLayer(50,50,f=BetaML.relu),BetaML.DenseLayer(50,2,f=BetaML.relu)];\n\njulia> model       = modelType(layers=layers,opt_alg=BetaML.ADAM(),epochs=500)\nMultitargetNeuralNetworkRegressor(\n  layers = BetaML.Nn.AbstractLayer[BetaML.Nn.DenseLayer([-0.2591582523441157 -0.027962845131416225 … 0.16044535560124418 -0.12838827994676857; -0.30381834909561184 0.2405495243851402 … -0.2588144861880588 0.09538577909777807; … ; -0.017320292924711156 -0.14042266424603767 … 0.06366999105841187 -0.13419651752478906; 0.07393079961409338 0.24521350531110264 … 0.04256867886217541 -0.0895506802948175], [0.14249427336553644, 0.24719379413682485, -0.25595911822556566, 0.10034088778965933, -0.017086404878505712, 0.21932184025609347, -0.031413516834861266, -0.12569076082247596, -0.18080140982481183, 0.14551901873323253  …  -0.13321995621967364, 0.2436582233332092, 0.0552222336976439, 0.07000814133633904, 0.2280064379660025, -0.28885681475734193, -0.07414214246290696, -0.06783184733650621, -0.055318068046308455, -0.2573488383282579], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.0395424111703751 -0.22531232360829911 … -0.04341228943744482 0.024336206858365517; -0.16481887432946268 0.17798073384748508 … -0.18594039305095766 0.051159225856547474; … ; -0.011639475293705043 -0.02347011206244673 … 0.20508869536159186 -0.1158382446274592; -0.19078069527757857 -0.007487540070740484 … -0.21341165344291158 -0.24158671316310726], [-0.04283623889330032, 0.14924461547060602, -0.17039563392959683, 0.00907774027816255, 0.21738885963113852, -0.06308040225941691, -0.14683286822101105, 0.21726892197970937, 0.19784321784707126, -0.0344988665714947  …  -0.23643089430602846, -0.013560425201427584, 0.05323948910726356, -0.04644175812567475, -0.2350400292671211, 0.09628312383424742, 0.07016420995205697, -0.23266392927140334, -0.18823664451487, 0.2304486691429084], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.11504184627266828 0.08601794194664503 … 0.03843129724045469 -0.18417305624127284; 0.10181551438831654 0.13459759904443674 … 0.11094951365942118 -0.1549466590355218; … ; 0.15279817525427697 0.0846661196058916 … -0.07993619892911122 0.07145402617285884; -0.1614160186346092 -0.13032002335149 … -0.12310552194729624 -0.15915773071049827], [-0.03435885900946367, -0.1198543931290306, 0.008454985905194445, -0.17980887188986966, -0.03557204910359624, 0.19125847393334877, -0.10949700778538696, -0.09343206702591, -0.12229583511781811, -0.09123969069220564  …  0.22119233518322862, 0.2053873143308657, 0.12756489387198222, 0.11567243705173319, -0.20982445664020496, 0.1595157838386987, -0.02087331046544119, -0.20556423263489765, -0.1622837764237961, -0.019220998739847395], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.25796717031347993 0.17579536633402948 … -0.09992960168785256 -0.09426177454620635; -0.026436330246675632 0.18070899284865127 … -0.19310119102392206 -0.06904005900252091], [0.16133004882307822, -0.3061228721091248], BetaML.Utils.relu, BetaML.Utils.drelu)], \n  loss = BetaML.Utils.squared_cost, \n  dloss = BetaML.Utils.dsquared_cost, \n  epochs = 500, \n  batch_size = 32, \n  opt_alg = BetaML.Nn.ADAM(BetaML.Nn.var\"#90#93\"(), 1.0, 0.9, 0.999, 1.0e-8, BetaML.Nn.Learnable[], BetaML.Nn.Learnable[]), \n  shuffle = true, \n  descr = \"\", \n  cb = BetaML.Nn.fitting_info, \n  rng = Random._GLOBAL_RNG())\n\njulia> mach        = machine(model, X, ydouble);\n\njulia> fit!(mach);\n\njulia> ŷdouble    = predict(mach, X);\n\njulia> hcat(ydouble,ŷdouble)\n506×4 Matrix{Float64}:\n 24.0  53.0  28.4624  62.8607\n 21.6  48.2  22.665   49.7401\n 34.7  74.4  31.5602  67.9433\n 33.4  71.8  33.0869  72.4337\n  ⋮                   \n 23.9  52.8  23.3573  50.654\n 22.0  49.0  22.1141  48.5926\n 11.9  28.8  19.9639  45.5823\n```\n"
 ":name" = "MultitargetNeuralNetworkRegressor"
 ":human_name" = "multitarget neural network regressor"
 ":is_supervised" = "`true`"
@@ -407,74 +441,6 @@
 ":deep_properties" = "`()`"
 ":reporting_operations" = "`()`"
 
-[BetaML.KernelPerceptron]
-":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Infinite}}`"
-":output_scitype" = "`ScientificTypesBase.Unknown`"
-":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`"
-":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Infinite}}, AbstractVector{<:ScientificTypesBase.Finite}}`"
-":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`"
-":transform_scitype" = "`ScientificTypesBase.Unknown`"
-":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`"
-":is_pure_julia" = "`true`"
-":package_name" = "BetaML"
-":package_license" = "MIT"
-":load_path" = "BetaML.Perceptron.KernelPerceptron"
-":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b"
-":package_url" = "https://github.com/sylvaticus/BetaML.jl"
-":is_wrapper" = "`false`"
-":supports_weights" = "`false`"
-":supports_class_weights" = "`false`"
-":supports_online" = "`false`"
-":docstring" = "```julia\nmutable struct KernelPerceptron <: MLJModelInterface.Probabilistic\n```\n\nThe kernel perceptron algorithm using one-vs-one for multiclass, from the Beta Machine Learning Toolkit (BetaML).\n\n# Hyperparameters:\n\n  * `kernel::Function`: Kernel function to employ. See `?radial_kernel` or `?polynomial_kernel` (once loaded the BetaML package) for details or check `?BetaML.Utils` to verify if other kernels are defined (you can alsways define your own kernel) [def: [`radial_kernel`](@ref)]\n  * `epochs::Int64`: Maximum number of epochs, i.e. passages trough the whole training sample [def: `100`]\n  * `initial_errors::Union{Nothing, Vector{Vector{Int64}}}`: Initial distribution of the number of errors errors [def: `nothing`, i.e. zeros]. If provided, this should be a nModels-lenght vector of nRecords integer values vectors , where nModels is computed as `(n_classes  * (n_classes - 1)) / 2`\n  * `shuffle::Bool`: Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]\n  * `rng::Random.AbstractRNG`: A Random Number Generator to be used in stochastic parts of the code [deafult: `Random.GLOBAL_RNG`]\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y        = @load_iris;\n\njulia> modelType   = @load KernelPerceptron pkg = \"BetaML\"\n[ Info: For silent loading, specify `verbosity=0`. \nimport BetaML ✔\nBetaML.Perceptron.KernelPerceptron\n\njulia> model       = modelType()\nKernelPerceptron(\n  kernel = BetaML.Utils.radial_kernel, \n  epochs = 100, \n  initial_errors = nothing, \n  shuffle = true, \n  rng = Random._GLOBAL_RNG())\n\njulia> mach        = machine(model, X, y);\n\njulia> fit!(mach);\n\njulia> est_classes = predict(mach, X)\n150-element CategoricalDistributions.UnivariateFiniteVector{Multiclass{3}, String, UInt8, Float64}:\n UnivariateFinite{Multiclass{3}}(setosa=>0.665, versicolor=>0.245, virginica=>0.09)\n UnivariateFinite{Multiclass{3}}(setosa=>0.665, versicolor=>0.245, virginica=>0.09)\n ⋮\n UnivariateFinite{Multiclass{3}}(setosa=>0.09, versicolor=>0.245, virginica=>0.665)\n UnivariateFinite{Multiclass{3}}(setosa=>0.09, versicolor=>0.665, virginica=>0.245)\n```\n"
-":name" = "KernelPerceptron"
-":human_name" = "kernel perceptron"
-":is_supervised" = "`true`"
-":prediction_type" = ":probabilistic"
-":abstract_type" = "`MLJModelInterface.Probabilistic`"
-":implemented_methods" = [":fit", ":predict"]
-":hyperparameters" = "`(:kernel, :epochs, :initial_errors, :shuffle, :rng)`"
-":hyperparameter_types" = "`(\"Function\", \"Int64\", \"Union{Nothing, Vector{Vector{Int64}}}\", \"Bool\", \"Random.AbstractRNG\")`"
-":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`"
-":iteration_parameter" = "`nothing`"
-":supports_training_losses" = "`false`"
-":reports_feature_importances" = "`false`"
-":deep_properties" = "`()`"
-":reporting_operations" = "`()`"
-
-[BetaML.KMeans]
-":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`"
-":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`"
-":target_scitype" = "`AbstractArray{<:ScientificTypesBase.Multiclass}`"
-":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`"
-":predict_scitype" = "`ScientificTypesBase.Unknown`"
-":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`"
-":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`"
-":is_pure_julia" = "`true`"
-":package_name" = "BetaML"
-":package_license" = "MIT"
-":load_path" = "BetaML.Clustering.KMeans"
-":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b"
-":package_url" = "https://github.com/sylvaticus/BetaML.jl"
-":is_wrapper" = "`false`"
-":supports_weights" = "`false`"
-":supports_class_weights" = "`false`"
-":supports_online" = "`false`"
-":docstring" = "```julia\nmutable struct KMeans <: MLJModelInterface.Unsupervised\n```\n\nThe classical KMeans clustering algorithm, from the Beta Machine Learning Toolkit (BetaML).\n\n# Parameters:\n\n  * `n_classes::Int64`: Number of classes to discriminate the data [def: 3]\n  * `dist::Function`: Function to employ as distance. Default to the Euclidean distance. Can be one of the predefined distances (`l1_distance`, `l2_distance`, `l2squared_distance`),  `cosine_distance`), any user defined function accepting two vectors and returning a scalar or an anonymous function with the same characteristics. Attention that, contrary to `KMedoids`, the `KMeansClusterer` algorithm is not guaranteed to converge with other distances than the Euclidean one.\n  * `initialisation_strategy::String`: The computation method of the vector of the initial representatives. One of the following:\n\n      * \"random\": randomly in the X space\n      * \"grid\": using a grid approach\n      * \"shuffle\": selecting randomly within the available points [default]\n      * \"given\": using a provided set of initial representatives provided in the `initial_representatives` parameter\n\n  * `initial_representatives::Union{Nothing, Matrix{Float64}}`: Provided (K x D) matrix of initial representatives (useful only with `initialisation_strategy=\"given\"`) [default: `nothing`]\n  * `rng::Random.AbstractRNG`: Random Number Generator [deafult: `Random.GLOBAL_RNG`]\n\n# Notes:\n\n  * data must be numerical\n  * online fitting (re-fitting with new data) is supported\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y        = @load_iris;\n\njulia> modelType   = @load KMeans pkg = \"BetaML\" verbosity=0\nBetaML.Clustering.KMeans\n\njulia> model       = modelType()\nKMeans(\n  n_classes = 3, \n  dist = BetaML.Clustering.var\"#34#36\"(), \n  initialisation_strategy = \"shuffle\", \n  initial_representatives = nothing, \n  rng = Random._GLOBAL_RNG())\n\njulia> mach        = machine(model, X);\n\njulia> fit!(mach);\n[ Info: Training machine(KMeans(n_classes = 3, …), …).\n\njulia> classes_est = predict(mach, X);\n\njulia> hcat(y,classes_est)\n150×2 CategoricalArrays.CategoricalArray{Union{Int64, String},2,UInt32}:\n \"setosa\"     2\n \"setosa\"     2\n \"setosa\"     2\n ⋮            \n \"virginica\"  3\n \"virginica\"  3\n \"virginica\"  1\n```\n"
-":name" = "KMeans"
-":human_name" = "k means"
-":is_supervised" = "`false`"
-":prediction_type" = ":unknown"
-":abstract_type" = "`MLJModelInterface.Unsupervised`"
-":implemented_methods" = [":fit", ":fitted_params", ":predict", ":transform"]
-":hyperparameters" = "`(:n_classes, :dist, :initialisation_strategy, :initial_representatives, :rng)`"
-":hyperparameter_types" = "`(\"Int64\", \"Function\", \"String\", \"Union{Nothing, Matrix{Float64}}\", \"Random.AbstractRNG\")`"
-":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`"
-":iteration_parameter" = "`nothing`"
-":supports_training_losses" = "`false`"
-":reports_feature_importances" = "`false`"
-":deep_properties" = "`()`"
-":reporting_operations" = "`()`"
-
 [BetaML.DecisionTreeClassifier]
 ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}`"
 ":output_scitype" = "`ScientificTypesBase.Unknown`"
@@ -486,7 +452,7 @@
 ":is_pure_julia" = "`true`"
 ":package_name" = "BetaML"
 ":package_license" = "MIT"
-":load_path" = "BetaML.Trees.DecisionTreeClassifier"
+":load_path" = "BetaML.Bmlj.DecisionTreeClassifier"
 ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b"
 ":package_url" = "https://github.com/sylvaticus/BetaML.jl"
 ":is_wrapper" = "`false`"
@@ -509,40 +475,6 @@
 ":deep_properties" = "`()`"
 ":reporting_operations" = "`()`"
 
-[BetaML.AutoEncoderMLJ]
-":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`"
-":output_scitype" = "`AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}`"
-":target_scitype" = "`ScientificTypesBase.Unknown`"
-":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, ScientificTypesBase.Unknown}`"
-":predict_scitype" = "`ScientificTypesBase.Unknown`"
-":transform_scitype" = "`ScientificTypesBase.Unknown`"
-":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`"
-":is_pure_julia" = "`true`"
-":package_name" = "BetaML"
-":package_license" = "MIT"
-":load_path" = "BetaML.Utils.AutoEncoderMLJ"
-":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b"
-":package_url" = "https://github.com/sylvaticus/BetaML.jl"
-":is_wrapper" = "`false`"
-":supports_weights" = "`false`"
-":supports_class_weights" = "`false`"
-":supports_online" = "`false`"
-":docstring" = "```julia\nmutable struct AutoEncoderMLJ <: MLJModelInterface.Deterministic\n```\n\nA ready-to use AutoEncoder, from the Beta Machine Learning Toolkit (BetaML) for ecoding and decoding of data using neural networks\n\n# Parameters:\n\n  * `e_layers`: The layers (vector of `AbstractLayer`s) responsable of the encoding of the data [def: `nothing`, i.e. two dense layers with the inner one of `innerdims`]. See `subtypes(BetaML.AbstractLayer)` for supported layers\n  * `d_layers`: The layers (vector of `AbstractLayer`s) responsable of the decoding of the data [def: `nothing`, i.e. two dense layers with the inner one of `innerdims`]. See `subtypes(BetaML.AbstractLayer)` for supported layers\n  * `outdims`: The number of neurons (i.e. dimensions) of the encoded data. If the value is a float it is consiered a percentual (to be rounded) of the dimensionality of the data [def: `0.33`]\n  * `innerdims`: Inner layer dimension (i.e. number of neurons). If the value is a float it is considered a percentual (to be rounded) of the dimensionality of the data [def: `nothing` that applies a specific heuristic]. Consider that the underlying neural network is trying to predict multiple values at the same times. Normally this requires many more neurons than a scalar prediction. If `e_layers` or `d_layers` are specified, this parameter is ignored for the respective part.\n  * `loss`: Loss (cost) function [def: `BetaML.squared_cost`]. Should always assume y and ŷ as (n x d) matrices.\n\n    !!! warning\n        If you change the parameter `loss`, you need to either provide its derivative on the parameter `dloss` or use autodiff with `dloss=nothing`.\n\n  * `dloss`: Derivative of the loss function [def: `BetaML.dsquared_cost` if `loss==squared_cost`, `nothing` otherwise, i.e. use the derivative of the squared cost or autodiff]\n  * `epochs`: Number of epochs, i.e. passages trough the whole training sample [def: `200`]\n  * `batch_size`: Size of each individual batch [def: `8`]\n  * `opt_alg`: The optimisation algorithm to update the gradient at each batch [def: `BetaML.ADAM()`] See `subtypes(BetaML.OptimisationAlgorithm)` for supported optimizers\n  * `shuffle`: Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]\n  * `tunemethod`: The method - and its parameters - to employ for hyperparameters autotuning. See [`SuccessiveHalvingSearch`](@ref) for the default method. To implement automatic hyperparameter tuning during the (first) `fit!` call simply set `autotune=true` and eventually change the default `tunemethod` options (including the parameter ranges, the resources to employ and the loss function to adopt).\n\n  * `descr`: An optional title and/or description for this model\n  * `rng`: Random Number Generator (see [`FIXEDSEED`](@ref)) [deafult: `Random.GLOBAL_RNG`]\n\n# Notes:\n\n  * data must be numerical\n  * use `transform` to obtain the encoded data, and `inverse_trasnform` to decode to the original data\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y        = @load_iris;\n\njulia> modelType   = @load AutoEncoderMLJ pkg = \"BetaML\" verbosity=0;\n\njulia> model       = modelType(outdims=2,innerdims=10);\n\njulia> mach        = machine(model, X)\nuntrained Machine; caches model-specific representations of data\n  model: AutoEncoderMLJ(e_layers = nothing, …)\n  args: \n    1:\tSource @334 ⏎ Table{AbstractVector{Continuous}}\n\njulia> fit!(mach,verbosity=2)\n[ Info: Training machine(AutoEncoderMLJ(e_layers = nothing, …), …).\n***\n*** Training  for 200 epochs with algorithm BetaML.Nn.ADAM.\nTraining.. \t avg loss on epoch 1 (1): \t 35.48243542158747\nTraining.. \t avg loss on epoch 20 (20): \t 0.07528042222678126\nTraining.. \t avg loss on epoch 40 (40): \t 0.06293071729378613\nTraining.. \t avg loss on epoch 60 (60): \t 0.057035588828991145\nTraining.. \t avg loss on epoch 80 (80): \t 0.056313167754822875\nTraining.. \t avg loss on epoch 100 (100): \t 0.055521461091809436\nTraining the Neural Network...  52%|██████████████████████████████████████                                   |  ETA: 0:00:01Training.. \t avg loss on epoch 120 (120): \t 0.06015206472927942\nTraining.. \t avg loss on epoch 140 (140): \t 0.05536835903285201\nTraining.. \t avg loss on epoch 160 (160): \t 0.05877560142428245\nTraining.. \t avg loss on epoch 180 (180): \t 0.05476302769966953\nTraining.. \t avg loss on epoch 200 (200): \t 0.049240864053557445\nTraining the Neural Network... 100%|█████████████████████████████████████████████████████████████████████████| Time: 0:00:01\nTraining of 200 epoch completed. Final epoch error: 0.049240864053557445.\ntrained Machine; caches model-specific representations of data\n  model: AutoEncoderMLJ(e_layers = nothing, …)\n  args: \n    1:\tSource @334 ⏎ Table{AbstractVector{Continuous}}\n\n\njulia> X_latent    = transform(mach, X)\n150×2 Matrix{Float64}:\n 7.01701   -2.77285\n 6.50615   -2.9279\n 6.5233    -2.60754\n ⋮        \n 6.70196  -10.6059\n 6.46369  -11.1117\n 6.20212  -10.1323\n\njulia> X_recovered = inverse_transform(mach,X_latent)\n150×4 Matrix{Float64}:\n 5.04973  3.55838  1.43251  0.242215\n 4.73689  3.19985  1.44085  0.295257\n 4.65128  3.25308  1.30187  0.244354\n ⋮                          \n 6.50077  2.93602  5.3303   1.87647\n 6.38639  2.83864  5.54395  2.04117\n 6.01595  2.67659  5.03669  1.83234\n\njulia> BetaML.relative_mean_error(MLJ.matrix(X),X_recovered)\n0.03387721261716176\n\n\n```\n"
-":name" = "AutoEncoderMLJ"
-":human_name" = "auto encoder mlj"
-":is_supervised" = "`true`"
-":prediction_type" = ":deterministic"
-":abstract_type" = "`MLJModelInterface.Deterministic`"
-":implemented_methods" = [":fit", ":inverse_transform", ":predict", ":transform"]
-":hyperparameters" = "`(:e_layers, :d_layers, :outdims, :innerdims, :loss, :dloss, :epochs, :batch_size, :opt_alg, :shuffle, :tunemethod, :descr, :rng)`"
-":hyperparameter_types" = "`(\"Union{Nothing, Vector{BetaML.Nn.AbstractLayer}}\", \"Union{Nothing, Vector{BetaML.Nn.AbstractLayer}}\", \"Union{Float64, Int64}\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Int64\", \"Int64\", \"BetaML.Nn.OptimisationAlgorithm\", \"Bool\", \"BetaML.Api.AutoTuneMethod\", \"String\", \"Random.AbstractRNG\")`"
-":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
-":iteration_parameter" = "`nothing`"
-":supports_training_losses" = "`false`"
-":reports_feature_importances" = "`false`"
-":deep_properties" = "`()`"
-":reporting_operations" = "`()`"
-
 [BetaML.GeneralImputer]
 ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}`"
 ":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Known}}`"
@@ -554,7 +486,7 @@
 ":is_pure_julia" = "`true`"
 ":package_name" = "BetaML"
 ":package_license" = "MIT"
-":load_path" = "BetaML.Imputation.GeneralImputer"
+":load_path" = "BetaML.Bmlj.GeneralImputer"
 ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b"
 ":package_url" = "https://github.com/sylvaticus/BetaML.jl"
 ":is_wrapper" = "`false`"
@@ -588,7 +520,7 @@
 ":is_pure_julia" = "`true`"
 ":package_name" = "BetaML"
 ":package_license" = "MIT"
-":load_path" = "BetaML.Nn.NeuralNetworkClassifier"
+":load_path" = "BetaML.Bmlj.NeuralNetworkClassifier"
 ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b"
 ":package_url" = "https://github.com/sylvaticus/BetaML.jl"
 ":is_wrapper" = "`false`"
@@ -622,7 +554,7 @@
 ":is_pure_julia" = "`true`"
 ":package_name" = "BetaML"
 ":package_license" = "MIT"
-":load_path" = "BetaML.Imputation.SimpleImputer"
+":load_path" = "BetaML.Bmlj.SimpleImputer"
 ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b"
 ":package_url" = "https://github.com/sylvaticus/BetaML.jl"
 ":is_wrapper" = "`false`"
@@ -656,7 +588,7 @@
 ":is_pure_julia" = "`true`"
 ":package_name" = "BetaML"
 ":package_license" = "MIT"
-":load_path" = "BetaML.GMM.GaussianMixtureClusterer"
+":load_path" = "BetaML.Bmlj.GaussianMixtureClusterer"
 ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b"
 ":package_url" = "https://github.com/sylvaticus/BetaML.jl"
 ":is_wrapper" = "`false`"
@@ -679,6 +611,74 @@
 ":deep_properties" = "`()`"
 ":reporting_operations" = "`()`"
 
+[BetaML.KernelPerceptronClassifier]
+":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Infinite}}`"
+":output_scitype" = "`ScientificTypesBase.Unknown`"
+":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`"
+":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Infinite}}, AbstractVector{<:ScientificTypesBase.Finite}}`"
+":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`"
+":transform_scitype" = "`ScientificTypesBase.Unknown`"
+":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`"
+":is_pure_julia" = "`true`"
+":package_name" = "BetaML"
+":package_license" = "MIT"
+":load_path" = "BetaML.Bmlj.KernelPerceptronClassifier"
+":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b"
+":package_url" = "https://github.com/sylvaticus/BetaML.jl"
+":is_wrapper" = "`false`"
+":supports_weights" = "`false`"
+":supports_class_weights" = "`false`"
+":supports_online" = "`false`"
+":docstring" = "```julia\nmutable struct KernelPerceptronClassifier <: MLJModelInterface.Probabilistic\n```\n\nThe kernel perceptron algorithm using one-vs-one for multiclass, from the Beta Machine Learning Toolkit (BetaML).\n\n# Hyperparameters:\n\n  * `kernel::Function`: Kernel function to employ. See `?radial_kernel` or `?polynomial_kernel` (once loaded the BetaML package) for details or check `?BetaML.Utils` to verify if other kernels are defined (you can alsways define your own kernel) [def: [`radial_kernel`](@ref)]\n  * `epochs::Int64`: Maximum number of epochs, i.e. passages trough the whole training sample [def: `100`]\n  * `initial_errors::Union{Nothing, Vector{Vector{Int64}}}`: Initial distribution of the number of errors errors [def: `nothing`, i.e. zeros]. If provided, this should be a nModels-lenght vector of nRecords integer values vectors , where nModels is computed as `(n_classes  * (n_classes - 1)) / 2`\n  * `shuffle::Bool`: Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]\n  * `rng::Random.AbstractRNG`: A Random Number Generator to be used in stochastic parts of the code [deafult: `Random.GLOBAL_RNG`]\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y        = @load_iris;\n\njulia> modelType   = @load KernelPerceptronClassifier pkg = \"BetaML\"\n[ Info: For silent loading, specify `verbosity=0`. \nimport BetaML ✔\nBetaML.Perceptron.KernelPerceptronClassifier\n\njulia> model       = modelType()\nKernelPerceptronClassifier(\n  kernel = BetaML.Utils.radial_kernel, \n  epochs = 100, \n  initial_errors = nothing, \n  shuffle = true, \n  rng = Random._GLOBAL_RNG())\n\njulia> mach        = machine(model, X, y);\n\njulia> fit!(mach);\n\njulia> est_classes = predict(mach, X)\n150-element CategoricalDistributions.UnivariateFiniteVector{Multiclass{3}, String, UInt8, Float64}:\n UnivariateFinite{Multiclass{3}}(setosa=>0.665, versicolor=>0.245, virginica=>0.09)\n UnivariateFinite{Multiclass{3}}(setosa=>0.665, versicolor=>0.245, virginica=>0.09)\n ⋮\n UnivariateFinite{Multiclass{3}}(setosa=>0.09, versicolor=>0.245, virginica=>0.665)\n UnivariateFinite{Multiclass{3}}(setosa=>0.09, versicolor=>0.665, virginica=>0.245)\n```\n"
+":name" = "KernelPerceptronClassifier"
+":human_name" = "kernel perceptron classifier"
+":is_supervised" = "`true`"
+":prediction_type" = ":probabilistic"
+":abstract_type" = "`MLJModelInterface.Probabilistic`"
+":implemented_methods" = [":fit", ":predict"]
+":hyperparameters" = "`(:kernel, :epochs, :initial_errors, :shuffle, :rng)`"
+":hyperparameter_types" = "`(\"Function\", \"Int64\", \"Union{Nothing, Vector{Vector{Int64}}}\", \"Bool\", \"Random.AbstractRNG\")`"
+":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`"
+":iteration_parameter" = "`nothing`"
+":supports_training_losses" = "`false`"
+":reports_feature_importances" = "`false`"
+":deep_properties" = "`()`"
+":reporting_operations" = "`()`"
+
+[BetaML.KMedoidsClusterer]
+":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`"
+":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`"
+":target_scitype" = "`AbstractArray{<:ScientificTypesBase.Multiclass}`"
+":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`"
+":predict_scitype" = "`ScientificTypesBase.Unknown`"
+":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`"
+":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`"
+":is_pure_julia" = "`true`"
+":package_name" = "BetaML"
+":package_license" = "MIT"
+":load_path" = "BetaML.Bmlj.KMedoidsClusterer"
+":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b"
+":package_url" = "https://github.com/sylvaticus/BetaML.jl"
+":is_wrapper" = "`false`"
+":supports_weights" = "`false`"
+":supports_class_weights" = "`false`"
+":supports_online" = "`false`"
+":docstring" = "```julia\nmutable struct KMedoidsClusterer <: MLJModelInterface.Unsupervised\n```\n\n# Parameters:\n\n  * `n_classes::Int64`: Number of classes to discriminate the data [def: 3]\n  * `dist::Function`: Function to employ as distance. Default to the Euclidean distance. Can be one of the predefined distances (`l1_distance`, `l2_distance`, `l2squared_distance`),  `cosine_distance`), any user defined function accepting two vectors and returning a scalar or an anonymous function with the same characteristics.\n  * `initialisation_strategy::String`: The computation method of the vector of the initial representatives. One of the following:\n\n      * \"random\": randomly in the X space\n      * \"grid\": using a grid approach\n      * \"shuffle\": selecting randomly within the available points [default]\n      * \"given\": using a provided set of initial representatives provided in the `initial_representatives` parameter\n\n  * `initial_representatives::Union{Nothing, Matrix{Float64}}`: Provided (K x D) matrix of initial representatives (useful only with `initialisation_strategy=\"given\"`) [default: `nothing`]\n  * `rng::Random.AbstractRNG`: Random Number Generator [deafult: `Random.GLOBAL_RNG`]\n\nThe K-medoids clustering algorithm with customisable distance function, from the Beta Machine Learning Toolkit (BetaML).\n\nSimilar to K-Means, but the \"representatives\" (the cetroids) are guaranteed to be one of the training points. The algorithm work with any arbitrary distance measure.\n\n# Notes:\n\n  * data must be numerical\n  * online fitting (re-fitting with new data) is supported\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y        = @load_iris;\n\njulia> modelType   = @load KMedoidsClusterer pkg = \"BetaML\" verbosity=0\nBetaML.Clustering.KMedoidsClusterer\n\njulia> model       = modelType()\nKMedoidsClusterer(\n  n_classes = 3, \n  dist = BetaML.Clustering.var\"#39#41\"(), \n  initialisation_strategy = \"shuffle\", \n  initial_representatives = nothing, \n  rng = Random._GLOBAL_RNG())\n\njulia> mach        = machine(model, X);\n\njulia> fit!(mach);\n[ Info: Training machine(KMedoidsClusterer(n_classes = 3, …), …).\n\njulia> classes_est = predict(mach, X);\n\njulia> hcat(y,classes_est)\n150×2 CategoricalArrays.CategoricalArray{Union{Int64, String},2,UInt32}:\n \"setosa\"     3\n \"setosa\"     3\n \"setosa\"     3\n ⋮            \n \"virginica\"  1\n \"virginica\"  1\n \"virginica\"  2\n```\n"
+":name" = "KMedoidsClusterer"
+":human_name" = "k medoids clusterer"
+":is_supervised" = "`false`"
+":prediction_type" = ":unknown"
+":abstract_type" = "`MLJModelInterface.Unsupervised`"
+":implemented_methods" = [":fit", ":fitted_params", ":predict", ":transform"]
+":hyperparameters" = "`(:n_classes, :dist, :initialisation_strategy, :initial_representatives, :rng)`"
+":hyperparameter_types" = "`(\"Int64\", \"Function\", \"String\", \"Union{Nothing, Matrix{Float64}}\", \"Random.AbstractRNG\")`"
+":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`"
+":iteration_parameter" = "`nothing`"
+":supports_training_losses" = "`false`"
+":reports_feature_importances" = "`false`"
+":deep_properties" = "`()`"
+":reporting_operations" = "`()`"
+
 [CatBoost.CatBoostRegressor]
 ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`"
 ":output_scitype" = "`ScientificTypesBase.Unknown`"
@@ -5797,7 +5797,7 @@
 ":supports_weights" = "`true`"
 ":supports_class_weights" = "`false`"
 ":supports_online" = "`false`"
-":docstring" = "EvoTreeClassifier(;kwargs...)\n\nA model type for constructing a EvoTreeClassifier, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API and the MLJ model interface. EvoTreeClassifier is used to perform multi-class classification, using cross-entropy loss.\n\n# Hyper-parameters\n\n  * `nrounds=10`:           Number of rounds. It corresponds to the number of trees that will be sequentially stacked. Must be >= 1.\n  * `eta=0.1`:              Learning rate. Each tree raw predictions are scaled by `eta` prior to be added to the stack of predictions. Must be > 0. A lower `eta` results in slower learning, requiring a higher `nrounds` but typically improves model performance.\n  * `L2::T=0.0`:            L2 regularization factor on aggregate gain. Must be >= 0. Higher L2 can result in a more robust model.\n  * `lambda::T=0.0`:        L2 regularization factor on individual gain. Must be >= 0. Higher lambda can result in a more robust model.\n  * `gamma::T=0.0`:         Minimum gain improvement needed to perform a node split. Higher gamma can result in a more robust model. Must be >= 0.\n  * `max_depth=5`:          Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to `2^max_depth`. Typical optimal values are in the 3 to 9 range.\n  * `min_weight=1.0`:       Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector. Must be > 0.\n  * `rowsample=1.0`:        Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `colsample=1.0`:        Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `nbins=32`:             Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins. Should be between 2 and 255.\n  * `tree_type=\"binary\"`    Tree structure to be used. One of:\n\n      * `binary`:       Each node of a tree is grown independently. Tree are built depthwise until max depth is reach or if min weight or gain (see `gamma`) stops further node splits.\n      * `oblivious`:    A common splitting condition is imposed to all nodes of a given depth.\n  * `rng=123`:              Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n\n# Internal API\n\nDo `config = EvoTreeClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeClassifier(max_depth=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Matrix` of size `[nobs, K]` where `K` is the number of classes:\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeClassifier = @load EvoTreeClassifier pkg=EvoTrees\n```\n\nDo `model = EvoTreeClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeClassifier(loss=...)`.\n\n## Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Multiclas` or `<:OrderedFactor`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n  * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic.\n  * `predict_mode(mach, Xnew)`: returns the mode of each of the prediction above.\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n  * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nconfig = EvoTreeClassifier(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(1:3, nobs)\nmodel = fit_evotree(config; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\n# MLJ Interface\nusing MLJ\nEvoTreeClassifier = @load EvoTreeClassifier pkg=EvoTrees\nmodel = EvoTreeClassifier(max_depth=5, nbins=32, nrounds=100)\nX, y = @load_iris\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\npreds = predict_mode(mach, X)\n```\n\nSee also [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl).\n"
+":docstring" = "EvoTreeClassifier(;kwargs...)\n\nA model type for constructing a EvoTreeClassifier, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API and the MLJ model interface. EvoTreeClassifier is used to perform multi-class classification, using cross-entropy loss.\n\n# Hyper-parameters\n\n  * `nrounds=100`:           Number of rounds. It corresponds to the number of trees that will be sequentially stacked. Must be >= 1.\n  * `eta=0.1`:              Learning rate. Each tree raw predictions are scaled by `eta` prior to be added to the stack of predictions. Must be > 0. A lower `eta` results in slower learning, requiring a higher `nrounds` but typically improves model performance.\n  * `L2::T=0.0`:            L2 regularization factor on aggregate gain. Must be >= 0. Higher L2 can result in a more robust model.\n  * `lambda::T=0.0`:        L2 regularization factor on individual gain. Must be >= 0. Higher lambda can result in a more robust model.\n  * `gamma::T=0.0`:         Minimum gain improvement needed to perform a node split. Higher gamma can result in a more robust model. Must be >= 0.\n  * `max_depth=6`:          Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to `2^max_depth`. Typical optimal values are in the 3 to 9 range.\n  * `min_weight=1.0`:       Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector. Must be > 0.\n  * `rowsample=1.0`:        Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `colsample=1.0`:        Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `nbins=64`:             Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins. Should be between 2 and 255.\n  * `tree_type=\"binary\"`    Tree structure to be used. One of:\n\n      * `binary`:       Each node of a tree is grown independently. Tree are built depthwise until max depth is reach or if min weight or gain (see `gamma`) stops further node splits.\n      * `oblivious`:    A common splitting condition is imposed to all nodes of a given depth.\n  * `rng=123`:              Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n\n# Internal API\n\nDo `config = EvoTreeClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeClassifier(max_depth=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Matrix` of size `[nobs, K]` where `K` is the number of classes:\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeClassifier = @load EvoTreeClassifier pkg=EvoTrees\n```\n\nDo `model = EvoTreeClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeClassifier(loss=...)`.\n\n## Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Multiclas` or `<:OrderedFactor`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n  * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic.\n  * `predict_mode(mach, Xnew)`: returns the mode of each of the prediction above.\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n  * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nconfig = EvoTreeClassifier(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(1:3, nobs)\nmodel = fit_evotree(config; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\n# MLJ Interface\nusing MLJ\nEvoTreeClassifier = @load EvoTreeClassifier pkg=EvoTrees\nmodel = EvoTreeClassifier(max_depth=5, nbins=32, nrounds=100)\nX, y = @load_iris\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\npreds = predict_mode(mach, X)\n```\n\nSee also [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl).\n"
 ":name" = "EvoTreeClassifier"
 ":human_name" = "evo tree classifier"
 ":is_supervised" = "`true`"
@@ -5831,7 +5831,7 @@
 ":supports_weights" = "`true`"
 ":supports_class_weights" = "`false`"
 ":supports_online" = "`false`"
-":docstring" = "EvoTreeGaussian(;kwargs...)\n\nA model type for constructing a EvoTreeGaussian, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API the MLJ model interface. EvoTreeGaussian is used to perform Gaussian probabilistic regression, fitting μ and σ parameters to maximize likelihood.\n\n# Hyper-parameters\n\n  * `nrounds=10`:           Number of rounds. It corresponds to the number of trees that will be sequentially stacked. Must be >= 1.\n  * `eta=0.1`:              Learning rate. Each tree raw predictions are scaled by `eta` prior to be added to the stack of predictions. Must be > 0. A lower `eta` results in slower learning, requiring a higher `nrounds` but typically improves model performance.\n  * `L2::T=0.0`:            L2 regularization factor on aggregate gain. Must be >= 0. Higher L2 can result in a more robust model.\n  * `lambda::T=0.0`:        L2 regularization factor on individual gain. Must be >= 0. Higher lambda can result in a more robust model.\n  * `gamma::T=0.0`:         Minimum gain imprvement needed to perform a node split. Higher gamma can result in a more robust model. Must be >= 0.\n  * `max_depth=5`:          Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to 2^max_depth. Typical optimal values are in the 3 to 9 range.\n  * `min_weight=8.0`:       Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector. Must be > 0.\n  * `rowsample=1.0`:        Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `colsample=1.0`:        Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `nbins=32`:             Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins. Should be between 2 and 255.\n  * `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing).  !Experimental feature: note that for Gaussian regression, constraints may not be enforce systematically.\n  * `tree_type=\"binary\"`    Tree structure to be used. One of:\n\n      * `binary`:       Each node of a tree is grown independently. Tree are built depthwise until max depth is reach or if min weight or gain (see `gamma`) stops further node splits.\n      * `oblivious`:    A common splitting condition is imposed to all nodes of a given depth.\n  * `rng=123`:              Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n\n# Internal API\n\nDo `config = EvoTreeGaussian()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeGaussian(max_depth=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Matrix` of size `[nobs, 2]` where the second dimensions refer to `μ` and `σ` respectively:\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeGaussian = @load EvoTreeGaussian pkg=EvoTrees\n```\n\nDo `model = EvoTreeGaussian()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeGaussian(loss=...)`.\n\n## Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n  * `predict(mach, Xnew)`: returns a vector of Gaussian distributions given features `Xnew` having the same scitype as `X` above.\n\nPredictions are probabilistic.\n\nSpecific metrics can also be predicted using:\n\n  * `predict_mean(mach, Xnew)`\n  * `predict_mode(mach, Xnew)`\n  * `predict_median(mach, Xnew)`\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n  * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nparams = EvoTreeGaussian(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(nobs)\nmodel = fit_evotree(params; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\n# MLJ Interface\nusing MLJ\nEvoTreeGaussian = @load EvoTreeGaussian pkg=EvoTrees\nmodel = EvoTreeGaussian(max_depth=5, nbins=32, nrounds=100)\nX, y = @load_boston\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\npreds = predict_mean(mach, X)\npreds = predict_mode(mach, X)\npreds = predict_median(mach, X)\n```\n"
+":docstring" = "EvoTreeGaussian(;kwargs...)\n\nA model type for constructing a EvoTreeGaussian, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API the MLJ model interface. EvoTreeGaussian is used to perform Gaussian probabilistic regression, fitting μ and σ parameters to maximize likelihood.\n\n# Hyper-parameters\n\n  * `nrounds=100`:           Number of rounds. It corresponds to the number of trees that will be sequentially stacked. Must be >= 1.\n  * `eta=0.1`:              Learning rate. Each tree raw predictions are scaled by `eta` prior to be added to the stack of predictions. Must be > 0. A lower `eta` results in slower learning, requiring a higher `nrounds` but typically improves model performance.\n  * `L2::T=0.0`:            L2 regularization factor on aggregate gain. Must be >= 0. Higher L2 can result in a more robust model.\n  * `lambda::T=0.0`:        L2 regularization factor on individual gain. Must be >= 0. Higher lambda can result in a more robust model.\n  * `gamma::T=0.0`:         Minimum gain imprvement needed to perform a node split. Higher gamma can result in a more robust model. Must be >= 0.\n  * `max_depth=6`:          Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to 2^max_depth. Typical optimal values are in the 3 to 9 range.\n  * `min_weight=8.0`:       Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector. Must be > 0.\n  * `rowsample=1.0`:        Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `colsample=1.0`:        Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `nbins=64`:             Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins. Should be between 2 and 255.\n  * `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing).  !Experimental feature: note that for Gaussian regression, constraints may not be enforce systematically.\n  * `tree_type=\"binary\"`    Tree structure to be used. One of:\n\n      * `binary`:       Each node of a tree is grown independently. Tree are built depthwise until max depth is reach or if min weight or gain (see `gamma`) stops further node splits.\n      * `oblivious`:    A common splitting condition is imposed to all nodes of a given depth.\n  * `rng=123`:              Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n\n# Internal API\n\nDo `config = EvoTreeGaussian()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeGaussian(max_depth=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Matrix` of size `[nobs, 2]` where the second dimensions refer to `μ` and `σ` respectively:\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeGaussian = @load EvoTreeGaussian pkg=EvoTrees\n```\n\nDo `model = EvoTreeGaussian()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeGaussian(loss=...)`.\n\n## Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n  * `predict(mach, Xnew)`: returns a vector of Gaussian distributions given features `Xnew` having the same scitype as `X` above.\n\nPredictions are probabilistic.\n\nSpecific metrics can also be predicted using:\n\n  * `predict_mean(mach, Xnew)`\n  * `predict_mode(mach, Xnew)`\n  * `predict_median(mach, Xnew)`\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n  * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nparams = EvoTreeGaussian(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(nobs)\nmodel = fit_evotree(params; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\n# MLJ Interface\nusing MLJ\nEvoTreeGaussian = @load EvoTreeGaussian pkg=EvoTrees\nmodel = EvoTreeGaussian(max_depth=5, nbins=32, nrounds=100)\nX, y = @load_boston\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\npreds = predict_mean(mach, X)\npreds = predict_mode(mach, X)\npreds = predict_median(mach, X)\n```\n"
 ":name" = "EvoTreeGaussian"
 ":human_name" = "evo tree gaussian"
 ":is_supervised" = "`true`"
@@ -5865,7 +5865,7 @@
 ":supports_weights" = "`true`"
 ":supports_class_weights" = "`false`"
 ":supports_online" = "`false`"
-":docstring" = "EvoTreeMLE(;kwargs...)\n\nA model type for constructing a EvoTreeMLE, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API the MLJ model interface. EvoTreeMLE performs maximum likelihood estimation. Assumed distribution is specified through `loss` kwargs. Both Gaussian and Logistic distributions are supported.\n\n# Hyper-parameters\n\n`loss=:gaussian`:         Loss to be be minimized during training. One of:\n\n  * `:gaussian` / `:gaussian_mle`\n  * `:logistic` / `:logistic_mle`\n  * `nrounds=10`:           Number of rounds. It corresponds to the number of trees that will be sequentially stacked. Must be >= 1.\n  * `eta=0.1`:              Learning rate. Each tree raw predictions are scaled by `eta` prior to be added to the stack of predictions. Must be > 0.\n\nA lower `eta` results in slower learning, requiring a higher `nrounds` but typically improves model performance.  \n\n  * `L2::T=0.0`:            L2 regularization factor on aggregate gain. Must be >= 0. Higher L2 can result in a more robust model.\n  * `lambda::T=0.0`:        L2 regularization factor on individual gain. Must be >= 0. Higher lambda can result in a more robust model.\n  * `gamma::T=0.0`:         Minimum gain imprvement needed to perform a node split. Higher gamma can result in a more robust model. Must be >= 0.\n  * `max_depth=5`:          Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to 2^max_depth. Typical optimal values are in the 3 to 9 range.\n  * `min_weight=8.0`:       Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector. Must be > 0.\n  * `rowsample=1.0`:        Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `colsample=1.0`:        Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `nbins=32`:             Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins. Should be between 2 and 255.\n  * `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing).  !Experimental feature: note that for MLE regression, constraints may not be enforced systematically.\n  * `tree_type=\"binary\"`    Tree structure to be used. One of:\n\n      * `binary`:       Each node of a tree is grown independently. Tree are built depthwise until max depth is reach or if min weight or gain (see `gamma`) stops further node splits.\n      * `oblivious`:    A common splitting condition is imposed to all nodes of a given depth.\n  * `rng=123`:              Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n\n# Internal API\n\nDo `config = EvoTreeMLE()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeMLE(max_depth=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Matrix` of size `[nobs, nparams]` where the second dimensions refer to `μ` & `σ` for Normal/Gaussian and `μ` & `s` for Logistic.\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeMLE = @load EvoTreeMLE pkg=EvoTrees\n```\n\nDo `model = EvoTreeMLE()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeMLE(loss=...)`.\n\n## Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n  * `predict(mach, Xnew)`: returns a vector of Gaussian or Logistic distributions (according to provided `loss`) given features `Xnew` having the same scitype as `X` above.\n\nPredictions are probabilistic.\n\nSpecific metrics can also be predicted using:\n\n  * `predict_mean(mach, Xnew)`\n  * `predict_mode(mach, Xnew)`\n  * `predict_median(mach, Xnew)`\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n  * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nconfig = EvoTreeMLE(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(nobs)\nmodel = fit_evotree(config; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\n# MLJ Interface\nusing MLJ\nEvoTreeMLE = @load EvoTreeMLE pkg=EvoTrees\nmodel = EvoTreeMLE(max_depth=5, nbins=32, nrounds=100)\nX, y = @load_boston\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\npreds = predict_mean(mach, X)\npreds = predict_mode(mach, X)\npreds = predict_median(mach, X)\n```\n"
+":docstring" = "EvoTreeMLE(;kwargs...)\n\nA model type for constructing a EvoTreeMLE, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API the MLJ model interface. EvoTreeMLE performs maximum likelihood estimation. Assumed distribution is specified through `loss` kwargs. Both Gaussian and Logistic distributions are supported.\n\n# Hyper-parameters\n\n`loss=:gaussian`:         Loss to be be minimized during training. One of:\n\n  * `:gaussian` / `:gaussian_mle`\n  * `:logistic` / `:logistic_mle`\n  * `nrounds=100`:           Number of rounds. It corresponds to the number of trees that will be sequentially stacked. Must be >= 1.\n  * `eta=0.1`:              Learning rate. Each tree raw predictions are scaled by `eta` prior to be added to the stack of predictions. Must be > 0.\n\nA lower `eta` results in slower learning, requiring a higher `nrounds` but typically improves model performance.  \n\n  * `L2::T=0.0`:            L2 regularization factor on aggregate gain. Must be >= 0. Higher L2 can result in a more robust model.\n  * `lambda::T=0.0`:        L2 regularization factor on individual gain. Must be >= 0. Higher lambda can result in a more robust model.\n  * `gamma::T=0.0`:         Minimum gain imprvement needed to perform a node split. Higher gamma can result in a more robust model. Must be >= 0.\n  * `max_depth=6`:          Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to 2^max_depth. Typical optimal values are in the 3 to 9 range.\n  * `min_weight=8.0`:       Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector. Must be > 0.\n  * `rowsample=1.0`:        Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `colsample=1.0`:        Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `nbins=64`:             Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins. Should be between 2 and 255.\n  * `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing).  !Experimental feature: note that for MLE regression, constraints may not be enforced systematically.\n  * `tree_type=\"binary\"`    Tree structure to be used. One of:\n\n      * `binary`:       Each node of a tree is grown independently. Tree are built depthwise until max depth is reach or if min weight or gain (see `gamma`) stops further node splits.\n      * `oblivious`:    A common splitting condition is imposed to all nodes of a given depth.\n  * `rng=123`:              Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n\n# Internal API\n\nDo `config = EvoTreeMLE()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeMLE(max_depth=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Matrix` of size `[nobs, nparams]` where the second dimensions refer to `μ` & `σ` for Normal/Gaussian and `μ` & `s` for Logistic.\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeMLE = @load EvoTreeMLE pkg=EvoTrees\n```\n\nDo `model = EvoTreeMLE()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeMLE(loss=...)`.\n\n## Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n  * `predict(mach, Xnew)`: returns a vector of Gaussian or Logistic distributions (according to provided `loss`) given features `Xnew` having the same scitype as `X` above.\n\nPredictions are probabilistic.\n\nSpecific metrics can also be predicted using:\n\n  * `predict_mean(mach, Xnew)`\n  * `predict_mode(mach, Xnew)`\n  * `predict_median(mach, Xnew)`\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n  * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nconfig = EvoTreeMLE(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(nobs)\nmodel = fit_evotree(config; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\n# MLJ Interface\nusing MLJ\nEvoTreeMLE = @load EvoTreeMLE pkg=EvoTrees\nmodel = EvoTreeMLE(max_depth=5, nbins=32, nrounds=100)\nX, y = @load_boston\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\npreds = predict_mean(mach, X)\npreds = predict_mode(mach, X)\npreds = predict_median(mach, X)\n```\n"
 ":name" = "EvoTreeMLE"
 ":human_name" = "evo tree mle"
 ":is_supervised" = "`true`"
@@ -5899,7 +5899,7 @@
 ":supports_weights" = "`true`"
 ":supports_class_weights" = "`false`"
 ":supports_online" = "`false`"
-":docstring" = "EvoTreeRegressor(;kwargs...)\n\nA model type for constructing a EvoTreeRegressor, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API and the MLJ model interface.\n\n# Hyper-parameters\n\n  * `loss=:mse`:         Loss to be be minimized during training. One of:\n\n      * `:mse`\n      * `:logloss`\n      * `:gamma`\n      * `:tweedie`\n      * `:quantile`\n      * `:l1`\n  * `nrounds=10`:           Number of rounds. It corresponds to the number of trees that will be sequentially stacked. Must be >= 1.\n  * `eta=0.1`:              Learning rate. Each tree raw predictions are scaled by `eta` prior to be added to the stack of predictions. Must be > 0. A lower `eta` results in slower learning, requiring a higher `nrounds` but typically improves model performance.\n  * `L2::T=0.0`:            L2 regularization factor on aggregate gain. Must be >= 0. Higher L2 can result in a more robust model.\n  * `lambda::T=0.0`:        L2 regularization factor on individual gain. Must be >= 0. Higher lambda can result in a more robust model.\n  * `gamma::T=0.0`:         Minimum gain improvement needed to perform a node split. Higher gamma can result in a more robust model. Must be >= 0.\n  * `alpha::T=0.5`:         Loss specific parameter in the [0, 1] range:                           - `:quantile`: target quantile for the regression.                           - `:l1`: weighting parameters to positive vs negative residuals.                                 - Positive residual weights = `alpha`                                 - Negative residual weights = `(1 - alpha)`\n  * `max_depth=5`:          Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to `2^max_depth`. Typical optimal values are in the 3 to 9 range.\n  * `min_weight=1.0`:       Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector. Must be > 0.\n  * `rowsample=1.0`:        Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `colsample=1.0`:        Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `nbins=32`:             Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins. Should be between 2 and 255.\n  * `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing).  Only `:linear`, `:logistic`, `:gamma` and `tweedie` losses are supported at the moment.\n  * `tree_type=\"binary\"`    Tree structure to be used. One of:\n\n      * `binary`:       Each node of a tree is grown independently. Tree are built depthwise until max depth is reach or if min weight or gain (see `gamma`) stops further node splits.\n      * `oblivious`:    A common splitting condition is imposed to all nodes of a given depth.\n  * `rng=123`:              Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n\n# Internal API\n\nDo `config = EvoTreeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeRegressor(loss=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Vector` of length `nobs`:\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ Interface\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeRegressor = @load EvoTreeRegressor pkg=EvoTrees\n```\n\nDo `model = EvoTreeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeRegressor(loss=...)`.\n\n## Training model\n\nIn MLJ or MLJBase, bind an instance `model` to data with     `mach = machine(model, X, y)` where\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n  * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are deterministic.\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n  * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nconfig = EvoTreeRegressor(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(nobs)\nmodel = fit_evotree(config; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\n# MLJ Interface\nusing MLJ\nEvoTreeRegressor = @load EvoTreeRegressor pkg=EvoTrees\nmodel = EvoTreeRegressor(max_depth=5, nbins=32, nrounds=100)\nX, y = @load_boston\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\n```\n"
+":docstring" = "EvoTreeRegressor(;kwargs...)\n\nA model type for constructing a EvoTreeRegressor, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API and the MLJ model interface.\n\n# Hyper-parameters\n\n  * `loss=:mse`:         Loss to be be minimized during training. One of:\n\n      * `:mse`\n      * `:logloss`\n      * `:gamma`\n      * `:tweedie`\n      * `:quantile`\n      * `:l1`\n  * `nrounds=100`:           Number of rounds. It corresponds to the number of trees that will be sequentially stacked. Must be >= 1.\n  * `eta=0.1`:              Learning rate. Each tree raw predictions are scaled by `eta` prior to be added to the stack of predictions. Must be > 0. A lower `eta` results in slower learning, requiring a higher `nrounds` but typically improves model performance.\n  * `L2::T=0.0`:            L2 regularization factor on aggregate gain. Must be >= 0. Higher L2 can result in a more robust model.\n  * `lambda::T=0.0`:        L2 regularization factor on individual gain. Must be >= 0. Higher lambda can result in a more robust model.\n  * `gamma::T=0.0`:         Minimum gain improvement needed to perform a node split. Higher gamma can result in a more robust model. Must be >= 0.\n  * `alpha::T=0.5`:         Loss specific parameter in the [0, 1] range:                           - `:quantile`: target quantile for the regression.                           - `:l1`: weighting parameters to positive vs negative residuals.                                 - Positive residual weights = `alpha`                                 - Negative residual weights = `(1 - alpha)`\n  * `max_depth=6`:          Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to `2^max_depth`. Typical optimal values are in the 3 to 9 range.\n  * `min_weight=1.0`:       Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector. Must be > 0.\n  * `rowsample=1.0`:        Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `colsample=1.0`:        Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `nbins=64`:             Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins. Should be between 2 and 255.\n  * `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing).  Only `:linear`, `:logistic`, `:gamma` and `tweedie` losses are supported at the moment.\n  * `tree_type=\"binary\"`    Tree structure to be used. One of:\n\n      * `binary`:       Each node of a tree is grown independently. Tree are built depthwise until max depth is reach or if min weight or gain (see `gamma`) stops further node splits.\n      * `oblivious`:    A common splitting condition is imposed to all nodes of a given depth.\n  * `rng=123`:              Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n\n# Internal API\n\nDo `config = EvoTreeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeRegressor(loss=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Vector` of length `nobs`:\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ Interface\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeRegressor = @load EvoTreeRegressor pkg=EvoTrees\n```\n\nDo `model = EvoTreeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeRegressor(loss=...)`.\n\n## Training model\n\nIn MLJ or MLJBase, bind an instance `model` to data with     `mach = machine(model, X, y)` where\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n  * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are deterministic.\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n  * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nconfig = EvoTreeRegressor(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(nobs)\nmodel = fit_evotree(config; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\n# MLJ Interface\nusing MLJ\nEvoTreeRegressor = @load EvoTreeRegressor pkg=EvoTrees\nmodel = EvoTreeRegressor(max_depth=5, nbins=32, nrounds=100)\nX, y = @load_boston\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\n```\n"
 ":name" = "EvoTreeRegressor"
 ":human_name" = "evo tree regressor"
 ":is_supervised" = "`true`"
@@ -5933,7 +5933,7 @@
 ":supports_weights" = "`true`"
 ":supports_class_weights" = "`false`"
 ":supports_online" = "`false`"
-":docstring" = "EvoTreeCount(;kwargs...)\n\nA model type for constructing a EvoTreeCount, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API the MLJ model interface. EvoTreeCount is used to perform Poisson probabilistic regression on count target.\n\n# Hyper-parameters\n\n  * `nrounds=10`:           Number of rounds. It corresponds to the number of trees that will be sequentially stacked. Must be >= 1.\n  * `eta=0.1`:              Learning rate. Each tree raw predictions are scaled by `eta` prior to be added to the stack of predictions. Must be > 0. A lower `eta` results in slower learning, requiring a higher `nrounds` but typically improves model performance.\n  * `L2::T=0.0`:            L2 regularization factor on aggregate gain. Must be >= 0. Higher L2 can result in a more robust model.\n  * `lambda::T=0.0`:        L2 regularization factor on individual gain. Must be >= 0. Higher lambda can result in a more robust model.\n  * `gamma::T=0.0`:         Minimum gain imprvement needed to perform a node split. Higher gamma can result in a more robust model.\n  * `max_depth=5`:          Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to 2^max_depth. Typical optimal values are in the 3 to 9 range.\n  * `min_weight=1.0`:       Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector. Must be > 0.\n  * `rowsample=1.0`:        Proportion of rows that are sampled at each iteration to build the tree. Should be `]0, 1]`.\n  * `colsample=1.0`:        Proportion of columns / features that are sampled at each iteration to build the tree. Should be `]0, 1]`.\n  * `nbins=32`:             Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins. Should be between 2 and 255.\n  * `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing).\n  * `tree_type=\"binary\"`    Tree structure to be used. One of:\n\n      * `binary`:       Each node of a tree is grown independently. Tree are built depthwise until max depth is reach or if min weight or gain (see `gamma`) stops further node splits.\n      * `oblivious`:    A common splitting condition is imposed to all nodes of a given depth.\n  * `rng=123`:              Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n\n# Internal API\n\nDo `config = EvoTreeCount()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeCount(max_depth=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Vector` of length `nobs`:\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeCount = @load EvoTreeCount pkg=EvoTrees\n```\n\nDo `model = EvoTreeCount()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeCount(loss=...)`.\n\n## Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with     mach = machine(model, X, y) where\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Count`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Operations\n\n  * `predict(mach, Xnew)`: returns a vector of Poisson distributions given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic.\n\nSpecific metrics can also be predicted using:\n\n  * `predict_mean(mach, Xnew)`\n  * `predict_mode(mach, Xnew)`\n  * `predict_median(mach, Xnew)`\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n  * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nconfig = EvoTreeCount(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(0:2, nobs)\nmodel = fit_evotree(config; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\nusing MLJ\nEvoTreeCount = @load EvoTreeCount pkg=EvoTrees\nmodel = EvoTreeCount(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nX, y = randn(nobs, nfeats), rand(0:2, nobs)\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\npreds = predict_mean(mach, X)\npreds = predict_mode(mach, X)\npreds = predict_median(mach, X)\n\n```\n\nSee also [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl).\n"
+":docstring" = "EvoTreeCount(;kwargs...)\n\nA model type for constructing a EvoTreeCount, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API the MLJ model interface. EvoTreeCount is used to perform Poisson probabilistic regression on count target.\n\n# Hyper-parameters\n\n  * `nrounds=100`:           Number of rounds. It corresponds to the number of trees that will be sequentially stacked. Must be >= 1.\n  * `eta=0.1`:              Learning rate. Each tree raw predictions are scaled by `eta` prior to be added to the stack of predictions. Must be > 0. A lower `eta` results in slower learning, requiring a higher `nrounds` but typically improves model performance.\n  * `L2::T=0.0`:            L2 regularization factor on aggregate gain. Must be >= 0. Higher L2 can result in a more robust model.\n  * `lambda::T=0.0`:        L2 regularization factor on individual gain. Must be >= 0. Higher lambda can result in a more robust model.\n  * `gamma::T=0.0`:         Minimum gain imprvement needed to perform a node split. Higher gamma can result in a more robust model.\n  * `max_depth=6`:          Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to 2^max_depth. Typical optimal values are in the 3 to 9 range.\n  * `min_weight=1.0`:       Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector. Must be > 0.\n  * `rowsample=1.0`:        Proportion of rows that are sampled at each iteration to build the tree. Should be `]0, 1]`.\n  * `colsample=1.0`:        Proportion of columns / features that are sampled at each iteration to build the tree. Should be `]0, 1]`.\n  * `nbins=64`:             Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins. Should be between 2 and 255.\n  * `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing).\n  * `tree_type=\"binary\"`    Tree structure to be used. One of:\n\n      * `binary`:       Each node of a tree is grown independently. Tree are built depthwise until max depth is reach or if min weight or gain (see `gamma`) stops further node splits.\n      * `oblivious`:    A common splitting condition is imposed to all nodes of a given depth.\n  * `rng=123`:              Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n\n# Internal API\n\nDo `config = EvoTreeCount()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeCount(max_depth=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Vector` of length `nobs`:\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeCount = @load EvoTreeCount pkg=EvoTrees\n```\n\nDo `model = EvoTreeCount()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeCount(loss=...)`.\n\n## Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with     mach = machine(model, X, y) where\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Count`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Operations\n\n  * `predict(mach, Xnew)`: returns a vector of Poisson distributions given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic.\n\nSpecific metrics can also be predicted using:\n\n  * `predict_mean(mach, Xnew)`\n  * `predict_mode(mach, Xnew)`\n  * `predict_median(mach, Xnew)`\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n  * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nconfig = EvoTreeCount(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(0:2, nobs)\nmodel = fit_evotree(config; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\nusing MLJ\nEvoTreeCount = @load EvoTreeCount pkg=EvoTrees\nmodel = EvoTreeCount(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nX, y = randn(nobs, nfeats), rand(0:2, nobs)\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\npreds = predict_mean(mach, X)\npreds = predict_mode(mach, X)\npreds = predict_median(mach, X)\n\n```\n\nSee also [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl).\n"
 ":name" = "EvoTreeCount"
 ":human_name" = "evo tree count"
 ":is_supervised" = "`true`"
@@ -7599,7 +7599,7 @@
 ":supports_weights" = "`true`"
 ":supports_class_weights" = "`false`"
 ":supports_online" = "`false`"
-":docstring" = "```\nLinearBinaryClassifier\n```\n\nA model type for constructing a linear binary classifier, based on [GLM.jl](https://github.com/JuliaStats/GLM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearBinaryClassifier = @load LinearBinaryClassifier pkg=GLM\n```\n\nDo `model = LinearBinaryClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearBinaryClassifier(fit_intercept=...)`.\n\n`LinearBinaryClassifier` is a [generalized linear model](https://en.wikipedia.org/wiki/Generalized_linear_model#Variance_function), specialised to the case of a binary target variable, with a user-specified link function. Options exist to specify an intercept or offset feature.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nHere\n\n  * `X`: is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the scitype with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor(2)` or `<:Multiclass(2)`; check the scitype with `schema(y)`\n  * `w`: is a vector of `Real` per-observation weights\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n  * `fit_intercept=true`: Whether to calculate the intercept for this model.  If set to false,  no intercept will be calculated (e.g. the data is expected to be centered)\n  * `link=GLM.LogitLink`: The function which links the linear prediction function to the  probability of a particular outcome or class. This must have type `GLM.Link01`. Options  include `GLM.LogitLink()`, `GLM.ProbitLink()`, `CloglogLink(),`CauchitLink()`.\n  * `offsetcol=nothing`: Name of the column to be used as an offset, if any.  An offset is a  variable which is known to have a coefficient of 1.\n  * `maxiter::Integer=30`: The maximum number of iterations allowed to achieve convergence.\n  * `atol::Real=1e-6`: Absolute threshold for convergence. Convergence is achieved when the  relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid  failure when deviance is unchanged except for rounding errors.\n  * `rtol::Real=1e-6`: Relative threshold for convergence. Convergence is achieved when the  relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid  failure when deviance is unchanged except for rounding errors.\n  * `minstepfac::Real=0.001`: Minimum step fraction. Must be between 0 and 1. Lower bound for the factor used to update the linear fit.\n  * `report_keys::Union{Symbol, Nothing}=DEFAULT_KEYS`: keys to be used in the report. Should be one of: `:deviance`, `:dof_residual`, `:stderror`, `:vcov`, `:coef_table`.\n\n# Operations\n\n  * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic.\n  * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned  above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `features`: The names of the features used during model fitting.\n  * `coef`: The linear coefficients determined by the model.\n  * `intercept`: The intercept determined by the model.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n  * `deviance`: Measure of deviance of fitted model with respect to a perfectly fitted model. For a linear model, this is the weighted residual sum of squares\n  * `dof_residual`: The degrees of freedom for residuals, when meaningful.\n  * `stderror`: The standard errors of the coefficients.\n  * `vcov`: The estimated variance-covariance matrix of the coefficient estimates.\n  * `coef_table`: Table which displays coefficients and summarizes their significance and confidence intervals.\n\n# Examples\n\n```\nusing MLJ\nimport GLM # namespace must be available\n\nLinearBinaryClassifier = @load LinearBinaryClassifier pkg=GLM\nclf = LinearBinaryClassifier(fit_intercept=false, link=GLM.ProbitLink())\n\nX, y = @load_crabs\n\nmach = machine(clf, X, y) |> fit!\n\nXnew = (;FL = [8.1, 24.8, 7.2],\n        RW = [5.1, 25.7, 6.4],\n        CL = [15.9, 46.7, 14.3],\n        CW = [18.7, 59.7, 12.2],\n        BD = [6.2, 23.6, 8.4],)\n\nyhat = predict(mach, Xnew) # probabilistic predictions\npdf(yhat, levels(y)) # probability matrix\np_B = pdf.(yhat, \"B\")\nclass_labels = predict_mode(mach, Xnew)\n\nfitted_params(mach).features\nfitted_params(mach).coef\nfitted_params(mach).intercept\n\nreport(mach)\n```\n\nSee also [`LinearRegressor`](@ref), [`LinearCountRegressor`](@ref)\n"
+":docstring" = "```\nLinearBinaryClassifier\n```\n\nA model type for constructing a linear binary classifier, based on [GLM.jl](https://github.com/JuliaStats/GLM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearBinaryClassifier = @load LinearBinaryClassifier pkg=GLM\n```\n\nDo `model = LinearBinaryClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearBinaryClassifier(fit_intercept=...)`.\n\n`LinearBinaryClassifier` is a [generalized linear model](https://en.wikipedia.org/wiki/Generalized_linear_model#Variance_function), specialised to the case of a binary target variable, with a user-specified link function. Options exist to specify an intercept or offset feature.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nHere\n\n  * `X`: is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the scitype with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor(2)` or `<:Multiclass(2)`; check the scitype with `schema(y)`\n  * `w`: is a vector of `Real` per-observation weights\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n  * `fit_intercept=true`: Whether to calculate the intercept for this model.  If set to false,  no intercept will be calculated (e.g. the data is expected to be centered)\n  * `link=GLM.LogitLink`: The function which links the linear prediction function to the  probability of a particular outcome or class. This must have type `GLM.Link01`. Options  include `GLM.LogitLink()`, `GLM.ProbitLink()`, `CloglogLink(),`CauchitLink()`.\n  * `offsetcol=nothing`: Name of the column to be used as an offset, if any.  An offset is a  variable which is known to have a coefficient of 1.\n  * `maxiter::Integer=30`: The maximum number of iterations allowed to achieve convergence.\n  * `atol::Real=1e-6`: Absolute threshold for convergence. Convergence is achieved when the  relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid  failure when deviance is unchanged except for rounding errors.\n  * `rtol::Real=1e-6`: Relative threshold for convergence. Convergence is achieved when the  relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid  failure when deviance is unchanged except for rounding errors.\n  * `minstepfac::Real=0.001`: Minimum step fraction. Must be between 0 and 1. Lower bound for the factor used to update the linear fit.\n  * `report_keys`: `Vector` of keys for the report. Possible keys are: `:deviance`, `:dof_residual`, `:stderror`, `:vcov`, `:coef_table` and `:glm_model`. By default only `:glm_model` is excluded.\n\n# Operations\n\n  * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic.\n  * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned  above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `features`: The names of the features used during model fitting.\n  * `coef`: The linear coefficients determined by the model.\n  * `intercept`: The intercept determined by the model.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n  * `deviance`: Measure of deviance of fitted model with respect to a perfectly fitted model. For a linear model, this is the weighted residual sum of squares\n  * `dof_residual`: The degrees of freedom for residuals, when meaningful.\n  * `stderror`: The standard errors of the coefficients.\n  * `vcov`: The estimated variance-covariance matrix of the coefficient estimates.\n  * `coef_table`: Table which displays coefficients and summarizes their significance and confidence intervals.\n  * `glm_model`: The raw fitted model returned by `GLM.lm`. Note this points to training data. Refer to the GLM.jl documentation for usage.\n\n# Examples\n\n```\nusing MLJ\nimport GLM # namespace must be available\n\nLinearBinaryClassifier = @load LinearBinaryClassifier pkg=GLM\nclf = LinearBinaryClassifier(fit_intercept=false, link=GLM.ProbitLink())\n\nX, y = @load_crabs\n\nmach = machine(clf, X, y) |> fit!\n\nXnew = (;FL = [8.1, 24.8, 7.2],\n        RW = [5.1, 25.7, 6.4],\n        CL = [15.9, 46.7, 14.3],\n        CW = [18.7, 59.7, 12.2],\n        BD = [6.2, 23.6, 8.4],)\n\nyhat = predict(mach, Xnew) # probabilistic predictions\npdf(yhat, levels(y)) # probability matrix\np_B = pdf.(yhat, \"B\")\nclass_labels = predict_mode(mach, Xnew)\n\nfitted_params(mach).features\nfitted_params(mach).coef\nfitted_params(mach).intercept\n\nreport(mach)\n```\n\nSee also [`LinearRegressor`](@ref), [`LinearCountRegressor`](@ref)\n"
 ":name" = "LinearBinaryClassifier"
 ":human_name" = "linear binary classifier"
 ":is_supervised" = "`true`"
@@ -7633,7 +7633,7 @@
 ":supports_weights" = "`true`"
 ":supports_class_weights" = "`false`"
 ":supports_online" = "`false`"
-":docstring" = "```\nLinearCountRegressor\n```\n\nA model type for constructing a linear count regressor, based on [GLM.jl](https://github.com/JuliaStats/GLM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearCountRegressor = @load LinearCountRegressor pkg=GLM\n```\n\nDo `model = LinearCountRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearCountRegressor(fit_intercept=...)`.\n\n`LinearCountRegressor` is a [generalized linear model](https://en.wikipedia.org/wiki/Generalized_linear_model#Variance_function), specialised to the case of a `Count` target variable (non-negative, unbounded integer) with user-specified link function. Options exist to specify an intercept or offset feature.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nHere\n\n  * `X`: is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the scitype with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `Count`; check the scitype with `schema(y)`\n  * `w`: is a vector of `Real` per-observation weights\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n  * `fit_intercept=true`: Whether to calculate the intercept for this model. If set to false,  no intercept will be calculated (e.g. the data is expected to be centered)\n  * `distribution=Distributions.Poisson()`: The distribution which the residuals/errors of the  model should fit.\n  * `link=GLM.LogLink()`: The function which links the linear prediction function to the  probability of a particular outcome or class. This should be one of the following:  `GLM.IdentityLink()`, `GLM.InverseLink()`, `GLM.InverseSquareLink()`, `GLM.LogLink()`,  `GLM.SqrtLink()`.\n  * `offsetcol=nothing`: Name of the column to be used as an offset, if any.  An offset is a  variable which is known to have a coefficient of 1.\n  * `maxiter::Integer=30`: The maximum number of iterations allowed to achieve convergence.\n  * `atol::Real=1e-6`: Absolute threshold for convergence. Convergence is achieved when the  relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid  failure when deviance is unchanged except for rounding errors.\n  * `rtol::Real=1e-6`: Relative threshold for convergence. Convergence is achieved when the  relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid  failure when deviance is unchanged except for rounding errors.\n  * `minstepfac::Real=0.001`: Minimum step fraction. Must be between 0 and 1. Lower bound for the factor used to update the linear fit.\n  * `report_keys::Union{Symbol, Nothing}=DEFAULT_KEYS`: keys to be used in the report. Should be one of: `:deviance`, `:dof_residual`, `:stderror`, `:vcov`, `:coef_table`.\n\n# Operations\n\n  * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew` having  the same Scitype as `X` above. Predictions are probabilistic.\n  * `predict_mean(mach, Xnew)`: instead return the mean of each prediction above\n  * `predict_median(mach, Xnew)`: instead return the median of each prediction above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `features`: The names of the features encountered during model fitting.\n  * `coef`: The linear coefficients determined by the model.\n  * `intercept`: The intercept determined by the model.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n  * `deviance`: Measure of deviance of fitted model with respect to a perfectly fitted model. For a linear model, this is the weighted residual sum of squares\n  * `dof_residual`: The degrees of freedom for residuals, when meaningful.\n  * `stderror`: The standard errors of the coefficients.\n  * `vcov`: The estimated variance-covariance matrix of the coefficient estimates.\n  * `coef_table`: Table which displays coefficients and summarizes their significance and confidence intervals.\n\n# Examples\n\n```\nusing MLJ\nimport MLJ.Distributions.Poisson\n\n# Generate some data whose target y looks Poisson when conditioned on\n# X:\nN = 10_000\nw = [1.0, -2.0, 3.0]\nmu(x) = exp(w'x) # mean for a log link function\nXmat = rand(N, 3)\nX = MLJ.table(Xmat)\ny = map(1:N) do i\n    x = Xmat[i, :]\n    rand(Poisson(mu(x)))\nend;\n\nCountRegressor = @load LinearCountRegressor pkg=GLM\nmodel = CountRegressor(fit_intercept=false)\nmach = machine(model, X, y)\nfit!(mach)\n\nXnew = MLJ.table(rand(3, 3))\nyhat = predict(mach, Xnew)\nyhat_point = predict_mean(mach, Xnew)\n\n# get coefficients approximating `w`:\njulia> fitted_params(mach).coef\n3-element Vector{Float64}:\n  0.9969008753103842\n -2.0255901752504775\n  3.014407534033522\n\nreport(mach)\n```\n\nSee also [`LinearRegressor`](@ref), [`LinearBinaryClassifier`](@ref)\n"
+":docstring" = "```\nLinearCountRegressor\n```\n\nA model type for constructing a linear count regressor, based on [GLM.jl](https://github.com/JuliaStats/GLM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearCountRegressor = @load LinearCountRegressor pkg=GLM\n```\n\nDo `model = LinearCountRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearCountRegressor(fit_intercept=...)`.\n\n`LinearCountRegressor` is a [generalized linear model](https://en.wikipedia.org/wiki/Generalized_linear_model#Variance_function), specialised to the case of a `Count` target variable (non-negative, unbounded integer) with user-specified link function. Options exist to specify an intercept or offset feature.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nHere\n\n  * `X`: is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the scitype with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `Count`; check the scitype with `schema(y)`\n  * `w`: is a vector of `Real` per-observation weights\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n  * `fit_intercept=true`: Whether to calculate the intercept for this model. If set to false,  no intercept will be calculated (e.g. the data is expected to be centered)\n  * `distribution=Distributions.Poisson()`: The distribution which the residuals/errors of the  model should fit.\n  * `link=GLM.LogLink()`: The function which links the linear prediction function to the  probability of a particular outcome or class. This should be one of the following:  `GLM.IdentityLink()`, `GLM.InverseLink()`, `GLM.InverseSquareLink()`, `GLM.LogLink()`,  `GLM.SqrtLink()`.\n  * `offsetcol=nothing`: Name of the column to be used as an offset, if any.  An offset is a  variable which is known to have a coefficient of 1.\n  * `maxiter::Integer=30`: The maximum number of iterations allowed to achieve convergence.\n  * `atol::Real=1e-6`: Absolute threshold for convergence. Convergence is achieved when the  relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid  failure when deviance is unchanged except for rounding errors.\n  * `rtol::Real=1e-6`: Relative threshold for convergence. Convergence is achieved when the  relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid  failure when deviance is unchanged except for rounding errors.\n  * `minstepfac::Real=0.001`: Minimum step fraction. Must be between 0 and 1. Lower bound for the factor used to update the linear fit.\n  * `report_keys`: `Vector` of keys for the report. Possible keys are: `:deviance`, `:dof_residual`, `:stderror`, `:vcov`, `:coef_table` and `:glm_model`. By default only `:glm_model` is excluded.\n\n# Operations\n\n  * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew` having  the same Scitype as `X` above. Predictions are probabilistic.\n  * `predict_mean(mach, Xnew)`: instead return the mean of each prediction above\n  * `predict_median(mach, Xnew)`: instead return the median of each prediction above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `features`: The names of the features encountered during model fitting.\n  * `coef`: The linear coefficients determined by the model.\n  * `intercept`: The intercept determined by the model.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n  * `deviance`: Measure of deviance of fitted model with respect to a perfectly fitted model. For a linear model, this is the weighted residual sum of squares\n  * `dof_residual`: The degrees of freedom for residuals, when meaningful.\n  * `stderror`: The standard errors of the coefficients.\n  * `vcov`: The estimated variance-covariance matrix of the coefficient estimates.\n  * `coef_table`: Table which displays coefficients and summarizes their significance and confidence intervals.\n  * `glm_model`: The raw fitted model returned by `GLM.lm`. Note this points to training data. Refer to the GLM.jl documentation for usage.\n\n# Examples\n\n```\nusing MLJ\nimport MLJ.Distributions.Poisson\n\n# Generate some data whose target y looks Poisson when conditioned on\n# X:\nN = 10_000\nw = [1.0, -2.0, 3.0]\nmu(x) = exp(w'x) # mean for a log link function\nXmat = rand(N, 3)\nX = MLJ.table(Xmat)\ny = map(1:N) do i\n    x = Xmat[i, :]\n    rand(Poisson(mu(x)))\nend;\n\nCountRegressor = @load LinearCountRegressor pkg=GLM\nmodel = CountRegressor(fit_intercept=false)\nmach = machine(model, X, y)\nfit!(mach)\n\nXnew = MLJ.table(rand(3, 3))\nyhat = predict(mach, Xnew)\nyhat_point = predict_mean(mach, Xnew)\n\n# get coefficients approximating `w`:\njulia> fitted_params(mach).coef\n3-element Vector{Float64}:\n  0.9969008753103842\n -2.0255901752504775\n  3.014407534033522\n\nreport(mach)\n```\n\nSee also [`LinearRegressor`](@ref), [`LinearBinaryClassifier`](@ref)\n"
 ":name" = "LinearCountRegressor"
 ":human_name" = "linear count regressor"
 ":is_supervised" = "`true`"
@@ -7667,7 +7667,7 @@
 ":supports_weights" = "`true`"
 ":supports_class_weights" = "`false`"
 ":supports_online" = "`false`"
-":docstring" = "```\nLinearRegressor\n```\n\nA model type for constructing a linear regressor, based on [GLM.jl](https://github.com/JuliaStats/GLM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearRegressor = @load LinearRegressor pkg=GLM\n```\n\nDo `model = LinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearRegressor(fit_intercept=...)`.\n\n`LinearRegressor` assumes the target is a continuous variable whose conditional distribution is normal with constant variance, and whose expected value is a linear combination of the features (identity link function). Options exist to specify an intercept or offset feature.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nHere\n\n  * `X`: is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the scitype with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n  * `w`: is a vector of `Real` per-observation weights\n\n# Hyper-parameters\n\n  * `fit_intercept=true`: Whether to calculate the intercept for this model.  If set to false, no intercept will be calculated (e.g. the data is expected  to be centered)\n  * `dropcollinear=false`: Whether to drop features in the training data to ensure linear independence.  If true , only the first of each set of linearly-dependent features is used. The coefficient for redundant linearly dependent features is `0.0` and all associated statistics are set to `NaN`.\n  * `offsetcol=nothing`: Name of the column to be used as an offset, if any.  An offset is a variable which is known to have a coefficient of 1.\n  * `report_keys::Union{Symbol, Nothing}=DEFAULT_KEYS`: vector of keys to be used in the report. Should be one of: `:deviance`, `:dof_residual`, `:stderror`, `:vcov`, `:coef_table`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Operations\n\n  * `predict(mach, Xnew)`: return predictions of the target given new  features `Xnew` having the same Scitype as `X` above. Predictions are  probabilistic.\n  * `predict_mean(mach, Xnew)`: instead return the mean of  each prediction above\n  * `predict_median(mach, Xnew)`: instead return the median of  each prediction above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `features`: The names of the features encountered during model fitting.\n  * `coef`: The linear coefficients determined by the model.\n  * `intercept`: The intercept determined by the model.\n\n# Report\n\nWhen all keys are enabled in `report_keys`, the following fields are available in `report(mach)`:\n\n  * `deviance`: Measure of deviance of fitted model with respect to a perfectly fitted model. For a linear model, this is the weighted residual sum of squares\n  * `dof_residual`: The degrees of freedom for residuals, when meaningful.\n  * `stderror`: The standard errors of the coefficients.\n  * `vcov`: The estimated variance-covariance matrix of the coefficient estimates.\n  * `coef_table`: Table which displays coefficients and summarizes their significance and confidence intervals.\n\n# Examples\n\n```\nusing MLJ\nLinearRegressor = @load LinearRegressor pkg=GLM\nglm = LinearRegressor()\n\nX, y = make_regression(100, 2) # synthetic data\nmach = machine(glm, X, y) |> fit!\n\nXnew, _ = make_regression(3, 2)\nyhat = predict(mach, Xnew) # new predictions\nyhat_point = predict_mean(mach, Xnew) # new predictions\n\nfitted_params(mach).features\nfitted_params(mach).coef # x1, x2, intercept\nfitted_params(mach).intercept\n\nreport(mach)\n```\n\nSee also [`LinearCountRegressor`](@ref), [`LinearBinaryClassifier`](@ref)\n"
+":docstring" = "```\nLinearRegressor\n```\n\nA model type for constructing a linear regressor, based on [GLM.jl](https://github.com/JuliaStats/GLM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearRegressor = @load LinearRegressor pkg=GLM\n```\n\nDo `model = LinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearRegressor(fit_intercept=...)`.\n\n`LinearRegressor` assumes the target is a continuous variable whose conditional distribution is normal with constant variance, and whose expected value is a linear combination of the features (identity link function). Options exist to specify an intercept or offset feature.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nHere\n\n  * `X`: is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the scitype with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n  * `w`: is a vector of `Real` per-observation weights\n\n# Hyper-parameters\n\n  * `fit_intercept=true`: Whether to calculate the intercept for this model.  If set to false, no intercept will be calculated (e.g. the data is expected  to be centered)\n  * `dropcollinear=false`: Whether to drop features in the training data to ensure linear independence.  If true , only the first of each set of linearly-dependent features is used. The coefficient for redundant linearly dependent features is `0.0` and all associated statistics are set to `NaN`.\n  * `offsetcol=nothing`: Name of the column to be used as an offset, if any.  An offset is a variable which is known to have a coefficient of 1.\n  * `report_keys`: `Vector` of keys for the report. Possible keys are: `:deviance`, `:dof_residual`, `:stderror`, `:vcov`, `:coef_table` and `:glm_model`. By default only `:glm_model` is excluded.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Operations\n\n  * `predict(mach, Xnew)`: return predictions of the target given new  features `Xnew` having the same Scitype as `X` above. Predictions are  probabilistic.\n  * `predict_mean(mach, Xnew)`: instead return the mean of  each prediction above\n  * `predict_median(mach, Xnew)`: instead return the median of  each prediction above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `features`: The names of the features encountered during model fitting.\n  * `coef`: The linear coefficients determined by the model.\n  * `intercept`: The intercept determined by the model.\n\n# Report\n\nWhen all keys are enabled in `report_keys`, the following fields are available in `report(mach)`:\n\n  * `deviance`: Measure of deviance of fitted model with respect to a perfectly fitted model. For a linear model, this is the weighted residual sum of squares\n  * `dof_residual`: The degrees of freedom for residuals, when meaningful.\n  * `stderror`: The standard errors of the coefficients.\n  * `vcov`: The estimated variance-covariance matrix of the coefficient estimates.\n  * `coef_table`: Table which displays coefficients and summarizes their significance and confidence intervals.\n  * `glm_model`: The raw fitted model returned by `GLM.lm`. Note this points to training data. Refer to the GLM.jl documentation for usage.\n\n# Examples\n\n```\nusing MLJ\nLinearRegressor = @load LinearRegressor pkg=GLM\nglm = LinearRegressor()\n\nX, y = make_regression(100, 2) # synthetic data\nmach = machine(glm, X, y) |> fit!\n\nXnew, _ = make_regression(3, 2)\nyhat = predict(mach, Xnew) # new predictions\nyhat_point = predict_mean(mach, Xnew) # new predictions\n\nfitted_params(mach).features\nfitted_params(mach).coef # x1, x2, intercept\nfitted_params(mach).intercept\n\nreport(mach)\n```\n\nSee also [`LinearCountRegressor`](@ref), [`LinearBinaryClassifier`](@ref)\n"
 ":name" = "LinearRegressor"
 ":human_name" = "linear regressor"
 ":is_supervised" = "`true`"
diff --git a/src/registry/Models.toml b/src/registry/Models.toml
index a08805ef..cf4497fe 100644
--- a/src/registry/Models.toml
+++ b/src/registry/Models.toml
@@ -1,4 +1,4 @@
-BetaML = ["RandomForestRegressor", "GaussianMixtureImputer", "RandomForestClassifier", "RandomForestImputer", "DecisionTreeRegressor", "LinearPerceptron", "Pegasos", "KMedoids", "NeuralNetworkRegressor", "MultitargetGaussianMixtureRegressor", "GaussianMixtureRegressor", "MultitargetNeuralNetworkRegressor", "KernelPerceptron", "KMeans", "DecisionTreeClassifier", "AutoEncoderMLJ", "GeneralImputer", "NeuralNetworkClassifier", "SimpleImputer", "GaussianMixtureClusterer"]
+BetaML = ["RandomForestRegressor", "GaussianMixtureImputer", "RandomForestClassifier", "RandomForestImputer", "PerceptronClassifier", "AutoEncoder", "DecisionTreeRegressor", "PegasosClassifier", "NeuralNetworkRegressor", "KMeansClusterer", "MultitargetGaussianMixtureRegressor", "GaussianMixtureRegressor", "MultitargetNeuralNetworkRegressor", "DecisionTreeClassifier", "GeneralImputer", "NeuralNetworkClassifier", "SimpleImputer", "GaussianMixtureClusterer", "KernelPerceptronClassifier", "KMedoidsClusterer"]
 CatBoost = ["CatBoostRegressor", "CatBoostClassifier"]
 NearestNeighborModels = ["KNNClassifier", "MultitargetKNNClassifier", "MultitargetKNNRegressor", "KNNRegressor"]
 MLJScikitLearnInterface = ["ProbabilisticSGDClassifier", "RidgeCVClassifier", "LogisticClassifier", "RandomForestRegressor", "ElasticNetCVRegressor", "PerceptronClassifier", "MultiTaskLassoRegressor", "LinearRegressor", "HDBSCAN", "DBSCAN", "RidgeRegressor", "LassoLarsICRegressor", "ARDRegressor", "SVMNuRegressor", "RidgeClassifier", "SGDRegressor", "ComplementNBClassifier", "HuberRegressor", "SVMNuClassifier", "GradientBoostingClassifier", "GaussianProcessRegressor", "SVMLinearRegressor", "LarsRegressor", "MeanShift", "HistGradientBoostingClassifier", "AdaBoostRegressor", "AffinityPropagation", "MultiTaskLassoCVRegressor", "OrthogonalMatchingPursuitRegressor", "RidgeCVRegressor", "PassiveAggressiveClassifier", "SVMRegressor", "BernoulliNBClassifier", "GaussianNBClassifier", "ExtraTreesClassifier", "KMeans", "MultiTaskElasticNetCVRegressor", "LassoLarsCVRegressor", "OrthogonalMatchingPursuitCVRegressor", "AdaBoostClassifier", "PassiveAggressiveRegressor", "BayesianRidgeRegressor", "RANSACRegressor", "BaggingClassifier", "GaussianProcessClassifier", "OPTICS", "KNeighborsRegressor", "HistGradientBoostingRegressor", "MiniBatchKMeans", "LassoCVRegressor", "DummyRegressor", "BisectingKMeans", "LassoLarsRegressor", "LarsCVRegressor", "KNeighborsClassifier", "SVMLinearClassifier", "FeatureAgglomeration", "DummyClassifier", "BaggingRegressor", "BayesianQDA", "BayesianLDA", "SGDClassifier", "TheilSenRegressor", "SpectralClustering", "Birch", "AgglomerativeClustering", "ElasticNetRegressor", "RandomForestClassifier", "LogisticCVClassifier", "MultiTaskElasticNetRegressor", "ExtraTreesRegressor", "LassoRegressor", "MultinomialNBClassifier", "GradientBoostingRegressor", "SVMClassifier"]

From 384b58c08b10bfae9c7ebab9844210985cf8f494 Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Sat, 27 Jan 2024 10:17:15 +1300
Subject: [PATCH 2/2] update registry

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index b79a06f7..e5d1e9a8 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "MLJModels"
 uuid = "d491faf4-2d78-11e9-2867-c94bc002c0b7"
 authors = ["Anthony D. Blaom <anthony.blaom@gmail.com>"]
-version = "0.16.14"
+version = "0.16.15"
 
 [deps]
 CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"