From 282808293b3c399f443ab1a9d7bbe38fd8ab5210 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Tue, 16 Jan 2024 16:50:37 +1100 Subject: [PATCH] update model registry --- src/MLJModels.jl | 2 +- src/registry/Metadata.toml | 22 +++++++++++----------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/MLJModels.jl b/src/MLJModels.jl index 639df9f3..763a6be2 100755 --- a/src/MLJModels.jl +++ b/src/MLJModels.jl @@ -1,4 +1,4 @@ -module MLJModels +module MLJModels import MLJModelInterface import MLJModelInterface: Model, metadata_pkg, metadata_model, @mlj_model, info, diff --git a/src/registry/Metadata.toml b/src/registry/Metadata.toml index 4e750763..8bf297c6 100644 --- a/src/registry/Metadata.toml +++ b/src/registry/Metadata.toml @@ -786,7 +786,7 @@ ":output_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}`" ":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":predict_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Density{ScientificTypesBase.Finite}}}`" +":predict_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":is_pure_julia" = "`true`" @@ -4879,7 +4879,7 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "Initiate a random oversampling model with the given hyper-parameters.\n\n```\nRandomOversampler\n```\n\nA model type for constructing a random oversampler, based on [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nRandomOversampler = @load RandomOversampler pkg=Imbalance\n```\n\nDo `model = RandomOversampler()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `RandomOversampler(ratios=...)`.\n\n`RandomOversampler` implements naive oversampling by repeating existing observations with replacement.\n\n# Training data\n\nIn MLJ or MLJBase, wrap the model in a machine by mach = machine(model)\n\nThere is no need to provide any data here because the model is a static transformer.\n\nLikewise, there is no need to `fit!(mach)`. \n\nFor default values of the hyper-parameters, model can be constructed by model = RandomOverSampler()\n\n# Hyperparameters\n\n * `ratios=1.0`: A parameter that controls the amount of oversampling to be done for each class\n\n * Can be a float and in this case each class will be oversampled to the size of the majority class times the float. By default, all classes are oversampled to the size of the majority class\n * Can be a dictionary mapping each class label to the float ratio for that class\n\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if the Julia `VERSION` supports it. Otherwise, uses MersenneTwister`.\n\n# Transform Inputs\n\n * `X`: A matrix or table of floats where each row is an observation from the dataset\n * `y`: An abstract vector of labels (e.g., strings) that correspond to the observations in `X`\n\n# Transform Outputs\n\n * `Xover`: A matrix or table that includes original data and the new observations due to oversampling. depending on whether the input `X` is a matrix or table respectively\n * `yover`: An abstract vector of labels corresponding to `Xover`\n\n# Operations\n\n * `transform(mach, X, y)`: resample the data `X` and `y` using RandomOversampler, returning both the new and original observations\n\n# Example\n\n```julia\nusing MLJ\nimport Imbalance\n\n# set probability of each class\nclass_probs = [0.5, 0.2, 0.3] \nnum_rows, num_continuous_feats = 100, 5\n# generate a table and categorical vector accordingly\nX, y = Imbalance.generate_imbalanced_data(num_rows, num_continuous_feats; \n class_probs, rng=42) \n\njulia> Imbalance.checkbalance(y)\n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (39.6%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 33 (68.8%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n\n# load RandomOversampler\nRandomOversampler = @load RandomOversampler pkg=Imbalance\n\n# wrap the model in a machine\noversampler = RandomOversampler(ratios=Dict(0=>1.0, 1=> 0.9, 2=>0.8), rng=42)\nmach = machine(oversampler)\n\n# provide the data to transform (there is nothing to fit)\nXover, yover = transform(mach, X, y)\n\njulia> Imbalance.checkbalance(yover)\n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 38 (79.2%) \n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 43 (89.6%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n```\n" +":docstring" = "Initiate a random oversampling model with the given hyper-parameters.\n\n```\nRandomOversampler\n```\n\nA model type for constructing a random oversampler, based on [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nRandomOversampler = @load RandomOversampler pkg=Imbalance\n```\n\nDo `model = RandomOversampler()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `RandomOversampler(ratios=...)`.\n\n`RandomOversampler` implements naive oversampling by repeating existing observations with replacement.\n\n# Training data\n\nIn MLJ or MLJBase, wrap the model in a machine by mach = machine(model)\n\nThere is no need to provide any data here because the model is a static transformer.\n\nLikewise, there is no need to `fit!(mach)`. \n\nFor default values of the hyper-parameters, model can be constructed by model = RandomOverSampler()\n\n# Hyperparameters\n\n * `ratios=1.0`: A parameter that controls the amount of oversampling to be done for each class\n\n * Can be a float and in this case each class will be oversampled to the size of the majority class times the float. By default, all classes are oversampled to the size of the majority class\n * Can be a dictionary mapping each class label to the float ratio for that class\n\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if the Julia `VERSION` supports it. Otherwise, uses MersenneTwister`.\n\n# Transform Inputs\n\n * `X`: A matrix of real numbers or a table with element [scitypes](https://juliaai.github.io/ScientificTypes.jl/) that subtype `Union{Finite, Infinite}`. Elements in nominal columns should subtype `Finite` (i.e., have [scitype](https://juliaai.github.io/ScientificTypes.jl/) `OrderedFactor` or `Multiclass`) and elements in continuous columns should subtype `Infinite` (i.e., have [scitype](https://juliaai.github.io/ScientificTypes.jl/) `Count` or `Continuous`).\n * `y`: An abstract vector of labels (e.g., strings) that correspond to the observations in `X`\n\n# Transform Outputs\n\n * `Xover`: A matrix or table that includes original data and the new observations due to oversampling. depending on whether the input `X` is a matrix or table respectively\n * `yover`: An abstract vector of labels corresponding to `Xover`\n\n# Operations\n\n * `transform(mach, X, y)`: resample the data `X` and `y` using RandomOversampler, returning both the new and original observations\n\n# Example\n\n```julia\nusing MLJ\nimport Imbalance\n\n# set probability of each class\nclass_probs = [0.5, 0.2, 0.3] \nnum_rows, num_continuous_feats = 100, 5\n# generate a table and categorical vector accordingly\nX, y = Imbalance.generate_imbalanced_data(num_rows, num_continuous_feats; \n class_probs, rng=42) \n\njulia> Imbalance.checkbalance(y)\n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (39.6%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 33 (68.8%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n\n# load RandomOversampler\nRandomOversampler = @load RandomOversampler pkg=Imbalance\n\n# wrap the model in a machine\noversampler = RandomOversampler(ratios=Dict(0=>1.0, 1=> 0.9, 2=>0.8), rng=42)\nmach = machine(oversampler)\n\n# provide the data to transform (there is nothing to fit)\nXover, yover = transform(mach, X, y)\n\njulia> Imbalance.checkbalance(yover)\n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 38 (79.2%) \n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 43 (89.6%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n```\n" ":name" = "RandomOversampler" ":human_name" = "random oversampler" ":is_supervised" = "`false`" @@ -4913,7 +4913,7 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "Initiate a SMOTENC model with the given hyper-parameters.\n\n```\nSMOTENC\n```\n\nA model type for constructing a smotenc, based on [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSMOTENC = @load SMOTENC pkg=Imbalance\n```\n\nDo `model = SMOTENC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SMOTENC(k=...)`.\n\n`SMOTENC` implements the SMOTENC algorithm to correct for class imbalance as in N. V. Chawla, K. W. Bowyer, L. O.Hall, W. P. Kegelmeyer, “SMOTE: synthetic minority over-sampling technique,” Journal of artificial intelligence research, 321-357, 2002.\n\n# Training data\n\nIn MLJ or MLJBase, wrap the model in a machine by\n\n```\nmach = machine(model)\n```\n\nThere is no need to provide any data here because the model is a static transformer.\n\nLikewise, there is no need to `fit!(mach)`.\n\nFor default values of the hyper-parameters, model can be constructed by\n\n```\nmodel = SMOTENC()\n```\n\n# Hyperparameters\n\n * `k=5`: Number of nearest neighbors to consider in the SMOTENC algorithm. Should be within the range `[1, n - 1]`, where `n` is the number of observations; otherwise set to the nearest of these two values.\n * `ratios=1.0`: A parameter that controls the amount of oversampling to be done for each class\n\n * Can be a float and in this case each class will be oversampled to the size of the majority class times the float. By default, all classes are oversampled to the size of the majority class\n * Can be a dictionary mapping each class label to the float ratio for that class\n\n * `knn_tree`: Decides the tree used in KNN computations. Either `\"Brute\"` or `\"Ball\"`. BallTree can be much faster but may lead to inaccurate results.\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if the Julia `VERSION` supports it. Otherwise, uses MersenneTwister`.\n\n# Transform Inputs\n\n * `X`: A matrix or table of floats where each row is an observation from the dataset\n * `y`: An abstract vector of labels (e.g., strings) that correspond to the observations in `X`\n\n# Transform Outputs\n\n * `Xover`: A matrix or table that includes original data and the new observations due to oversampling. depending on whether the input `X` is a matrix or table respectively\n * `yover`: An abstract vector of labels corresponding to `Xover`\n\n# Operations\n\n * `transform(mach, X, y)`: resample the data `X` and `y` using SMOTENC, returning both the new and original observations\n\n# Example\n\n```julia\nusing MLJ\nusing ScientificTypes\nimport Imbalance\n\n# set probability of each class\nclass_probs = [0.5, 0.2, 0.3] \nnum_rows = 100\nnum_continuous_feats = 3\n# want two categorical features with three and two possible values respectively\nnum_vals_per_category = [3, 2]\n\n# generate a table and categorical vector accordingly\nX, y = Imbalance.generate_imbalanced_data(num_rows, num_continuous_feats; \n class_probs, num_vals_per_category, rng=42) \njulia> Imbalance.checkbalance(y)\n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (39.6%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 33 (68.8%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n\njulia> ScientificTypes.schema(X).scitypes\n(Continuous, Continuous, Continuous, Continuous, Continuous)\n# coerce nominal columns to a finite scitype (multiclass or ordered factor)\nX = coerce(X, :Column4=>Multiclass, :Column5=>Multiclass)\n\n# load SMOTE-NC\nSMOTENC = @load SMOTENC pkg=Imbalance\n\n# wrap the model in a machine\noversampler = SMOTENC(k=5, ratios=Dict(0=>1.0, 1=> 0.9, 2=>0.8), rng=42)\nmach = machine(oversampler)\n\n# provide the data to transform (there is nothing to fit)\nXover, yover = transform(mach, X, y)\n\njulia> Imbalance.checkbalance(yover)\n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 38 (79.2%) \n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 43 (89.6%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n```\n" +":docstring" = "Initiate a SMOTENC model with the given hyper-parameters.\n\n```\nSMOTENC\n```\n\nA model type for constructing a smotenc, based on [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSMOTENC = @load SMOTENC pkg=Imbalance\n```\n\nDo `model = SMOTENC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SMOTENC(k=...)`.\n\n`SMOTENC` implements the SMOTENC algorithm to correct for class imbalance as in N. V. Chawla, K. W. Bowyer, L. O.Hall, W. P. Kegelmeyer, “SMOTE: synthetic minority over-sampling technique,” Journal of artificial intelligence research, 321-357, 2002.\n\n# Training data\n\nIn MLJ or MLJBase, wrap the model in a machine by\n\n```\nmach = machine(model)\n```\n\nThere is no need to provide any data here because the model is a static transformer.\n\nLikewise, there is no need to `fit!(mach)`.\n\nFor default values of the hyper-parameters, model can be constructed by\n\n```\nmodel = SMOTENC()\n```\n\n# Hyperparameters\n\n * `k=5`: Number of nearest neighbors to consider in the SMOTENC algorithm. Should be within the range `[1, n - 1]`, where `n` is the number of observations; otherwise set to the nearest of these two values.\n * `ratios=1.0`: A parameter that controls the amount of oversampling to be done for each class\n\n * Can be a float and in this case each class will be oversampled to the size of the majority class times the float. By default, all classes are oversampled to the size of the majority class\n * Can be a dictionary mapping each class label to the float ratio for that class\n\n * `knn_tree`: Decides the tree used in KNN computations. Either `\"Brute\"` or `\"Ball\"`. BallTree can be much faster but may lead to inaccurate results.\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if the Julia `VERSION` supports it. Otherwise, uses MersenneTwister`.\n\n# Transform Inputs\n\n * `X`: A table with element [scitypes](https://juliaai.github.io/ScientificTypes.jl/) that subtype `Union{Finite, Infinite}`. Elements in nominal columns should subtype `Finite` (i.e., have [scitype](https://juliaai.github.io/ScientificTypes.jl/) `OrderedFactor` or `Multiclass`) and elements in continuous columns should subtype `Infinite` (i.e., have [scitype](https://juliaai.github.io/ScientificTypes.jl/) `Count` or `Continuous`).\n * `y`: An abstract vector of labels (e.g., strings) that correspond to the observations in `X`\n\n# Transform Outputs\n\n * `Xover`: A matrix or table that includes original data and the new observations due to oversampling. depending on whether the input `X` is a matrix or table respectively\n * `yover`: An abstract vector of labels corresponding to `Xover`\n\n# Operations\n\n * `transform(mach, X, y)`: resample the data `X` and `y` using SMOTENC, returning both the new and original observations\n\n# Example\n\n```julia\nusing MLJ\nusing ScientificTypes\nimport Imbalance\n\n# set probability of each class\nclass_probs = [0.5, 0.2, 0.3] \nnum_rows = 100\nnum_continuous_feats = 3\n# want two categorical features with three and two possible values respectively\nnum_vals_per_category = [3, 2]\n\n# generate a table and categorical vector accordingly\nX, y = Imbalance.generate_imbalanced_data(num_rows, num_continuous_feats; \n class_probs, num_vals_per_category, rng=42) \njulia> Imbalance.checkbalance(y)\n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (39.6%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 33 (68.8%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n\njulia> ScientificTypes.schema(X).scitypes\n(Continuous, Continuous, Continuous, Continuous, Continuous)\n# coerce nominal columns to a finite scitype (multiclass or ordered factor)\nX = coerce(X, :Column4=>Multiclass, :Column5=>Multiclass)\n\n# load SMOTE-NC\nSMOTENC = @load SMOTENC pkg=Imbalance\n\n# wrap the model in a machine\noversampler = SMOTENC(k=5, ratios=Dict(0=>1.0, 1=> 0.9, 2=>0.8), rng=42)\nmach = machine(oversampler)\n\n# provide the data to transform (there is nothing to fit)\nXover, yover = transform(mach, X, y)\n\njulia> Imbalance.checkbalance(yover)\n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 38 (79.2%) \n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 43 (89.6%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n```\n" ":name" = "SMOTENC" ":human_name" = "smotenc" ":is_supervised" = "`false`" @@ -5032,13 +5032,13 @@ ":reporting_operations" = "`()`" [Imbalance.SMOTEN] -":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractMatrix{ScientificTypesBase.Finite}}, AbstractVector}`" -":output_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractMatrix{ScientificTypesBase.Finite}}, AbstractVector}`" +":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractMatrix{<:ScientificTypesBase.Finite}}, AbstractVector}`" +":output_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractMatrix{<:ScientificTypesBase.Finite}}, AbstractVector}`" ":target_scitype" = "`ScientificTypesBase.Unknown`" ":fit_data_scitype" = "`Tuple{}`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractMatrix{ScientificTypesBase.Finite}}, AbstractVector}`" -":inverse_transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractMatrix{ScientificTypesBase.Finite}}, AbstractVector}`" +":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractMatrix{<:ScientificTypesBase.Finite}}, AbstractVector}`" +":inverse_transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractMatrix{<:ScientificTypesBase.Finite}}, AbstractVector}`" ":is_pure_julia" = "`true`" ":package_name" = "Imbalance" ":package_license" = "unknown" @@ -5049,7 +5049,7 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "Initiate a SMOTEN model with the given hyper-parameters.\n\n```\nSMOTEN\n```\n\nA model type for constructing a smoten, based on [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSMOTEN = @load SMOTEN pkg=Imbalance\n```\n\nDo `model = SMOTEN()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SMOTEN(k=...)`.\n\n`SMOTEN` implements the SMOTEN algorithm to correct for class imbalance as in N. V. Chawla, K. W. Bowyer, L. O.Hall, W. P. Kegelmeyer, “SMOTEN: synthetic minority over-sampling technique,” Journal of artificial intelligence research, 321-357, 2002.\n\n# Training data\n\nIn MLJ or MLJBase, wrap the model in a machine by\n\n```\nmach = machine(model)\n```\n\nThere is no need to provide any data here because the model is a static transformer.\n\nLikewise, there is no need to `fit!(mach)`.\n\nFor default values of the hyper-parameters, model can be constructed by\n\n```\nmodel = SMOTEN()\n```\n\n# Hyperparameters\n\n * `k=5`: Number of nearest neighbors to consider in the SMOTEN algorithm. Should be within the range `[1, n - 1]`, where `n` is the number of observations; otherwise set to the nearest of these two values.\n * `ratios=1.0`: A parameter that controls the amount of oversampling to be done for each class\n\n * Can be a float and in this case each class will be oversampled to the size of the majority class times the float. By default, all classes are oversampled to the size of the majority class\n * Can be a dictionary mapping each class label to the float ratio for that class\n\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if the Julia `VERSION` supports it. Otherwise, uses MersenneTwister`.\n\n# Transform Inputs\n\n * `X`: A matrix or table of floats where each row is an observation from the dataset\n * `y`: An abstract vector of labels (e.g., strings) that correspond to the observations in `X`\n\n# Transform Outputs\n\n * `Xover`: A matrix or table that includes original data and the new observations due to oversampling. depending on whether the input `X` is a matrix or table respectively\n * `yover`: An abstract vector of labels corresponding to `Xover`\n\n# Operations\n\n * `transform(mach, X, y)`: resample the data `X` and `y` using SMOTEN, returning both the new and original observations\n\n# Example\n\n```julia\nusing MLJ\nusing ScientificTypes\nimport Imbalance\n\n# set probability of each class\nclass_probs = [0.5, 0.2, 0.3] \nnum_rows = 100\nnum_continuous_feats = 0\n# want two categorical features with three and two possible values respectively\nnum_vals_per_category = [3, 2]\n\n# generate a table and categorical vector accordingly\nX, y = Imbalance.generate_imbalanced_data(num_rows, num_continuous_feats; \n class_probs, num_vals_per_category, rng=42) \njulia> Imbalance.checkbalance(y)\n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (39.6%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 33 (68.8%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n\njulia> ScientificTypes.schema(X).scitypes\n(Count, Count)\n\n# coerce to a finite scitype (multiclass or ordered factor)\nX = coerce(X, autotype(X, :few_to_finite))\n\n# load SMOTEN\nSMOTEN = @load SMOTEN pkg=Imbalance\n\n# wrap the model in a machine\noversampler = SMOTEN(k=5, ratios=Dict(0=>1.0, 1=> 0.9, 2=>0.8), rng=42)\nmach = machine(oversampler)\n\n# provide the data to transform (there is nothing to fit)\nXover, yover = transform(mach, X, y)\n\njulia> Imbalance.checkbalance(yover)\n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 38 (79.2%) \n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 43 (89.6%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n```\n" +":docstring" = "Initiate a SMOTEN model with the given hyper-parameters.\n\n```\nSMOTEN\n```\n\nA model type for constructing a smoten, based on [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSMOTEN = @load SMOTEN pkg=Imbalance\n```\n\nDo `model = SMOTEN()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SMOTEN(k=...)`.\n\n`SMOTEN` implements the SMOTEN algorithm to correct for class imbalance as in N. V. Chawla, K. W. Bowyer, L. O.Hall, W. P. Kegelmeyer, “SMOTEN: synthetic minority over-sampling technique,” Journal of artificial intelligence research, 321-357, 2002.\n\n# Training data\n\nIn MLJ or MLJBase, wrap the model in a machine by\n\n```\nmach = machine(model)\n```\n\nThere is no need to provide any data here because the model is a static transformer.\n\nLikewise, there is no need to `fit!(mach)`.\n\nFor default values of the hyper-parameters, model can be constructed by\n\n```\nmodel = SMOTEN()\n```\n\n# Hyperparameters\n\n * `k=5`: Number of nearest neighbors to consider in the SMOTEN algorithm. Should be within the range `[1, n - 1]`, where `n` is the number of observations; otherwise set to the nearest of these two values.\n * `ratios=1.0`: A parameter that controls the amount of oversampling to be done for each class\n\n * Can be a float and in this case each class will be oversampled to the size of the majority class times the float. By default, all classes are oversampled to the size of the majority class\n * Can be a dictionary mapping each class label to the float ratio for that class\n\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if the Julia `VERSION` supports it. Otherwise, uses MersenneTwister`.\n\n# Transform Inputs\n\n * `X`: A matrix of integers or a table with element [scitypes](https://juliaai.github.io/ScientificTypes.jl/) that subtype `Finite`. That is, for table inputs each column should have either `OrderedFactor` or `Multiclass` as the element [scitype](https://juliaai.github.io/ScientificTypes.jl/).\n * `y`: An abstract vector of labels (e.g., strings) that correspond to the observations in `X`\n\n# Transform Outputs\n\n * `Xover`: A matrix or table that includes original data and the new observations due to oversampling. depending on whether the input `X` is a matrix or table respectively\n * `yover`: An abstract vector of labels corresponding to `Xover`\n\n# Operations\n\n * `transform(mach, X, y)`: resample the data `X` and `y` using SMOTEN, returning both the new and original observations\n\n# Example\n\n```julia\nusing MLJ\nusing ScientificTypes\nimport Imbalance\n\n# set probability of each class\nclass_probs = [0.5, 0.2, 0.3] \nnum_rows = 100\nnum_continuous_feats = 0\n# want two categorical features with three and two possible values respectively\nnum_vals_per_category = [3, 2]\n\n# generate a table and categorical vector accordingly\nX, y = Imbalance.generate_imbalanced_data(num_rows, num_continuous_feats; \n class_probs, num_vals_per_category, rng=42) \njulia> Imbalance.checkbalance(y)\n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (39.6%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 33 (68.8%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n\njulia> ScientificTypes.schema(X).scitypes\n(Count, Count)\n\n# coerce to a finite scitype (multiclass or ordered factor)\nX = coerce(X, autotype(X, :few_to_finite))\n\n# load SMOTEN\nSMOTEN = @load SMOTEN pkg=Imbalance\n\n# wrap the model in a machine\noversampler = SMOTEN(k=5, ratios=Dict(0=>1.0, 1=> 0.9, 2=>0.8), rng=42)\nmach = machine(oversampler)\n\n# provide the data to transform (there is nothing to fit)\nXover, yover = transform(mach, X, y)\n\njulia> Imbalance.checkbalance(yover)\n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 38 (79.2%) \n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 43 (89.6%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n```\n" ":name" = "SMOTEN" ":human_name" = "smoten" ":is_supervised" = "`false`" @@ -5117,7 +5117,7 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "Initiate a random undersampling model with the given hyper-parameters.\n\n```\nRandomUndersampler\n```\n\nA model type for constructing a random undersampler, based on [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nRandomUndersampler = @load RandomUndersampler pkg=Imbalance\n```\n\nDo `model = RandomUndersampler()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `RandomUndersampler(ratios=...)`.\n\n`RandomUndersampler` implements naive undersampling by randomly removing existing observations. \n\n# Training data\n\nIn MLJ or MLJBase, wrap the model in a machine by mach = machine(model)\n\nThere is no need to provide any data here because the model is a static transformer.\n\nLikewise, there is no need to `fit!(mach)`. \n\nFor default values of the hyper-parameters, model can be constructed by model = RandomUndersampler()\n\n# Hyperparameters\n\n * `ratios=1.0`: A parameter that controls the amount of undersampling to be done for each class\n\n * Can be a float and in this case each class will be undersampled to the size of the minority class times the float. By default, all classes are undersampled to the size of the minority class\n * Can be a dictionary mapping each class label to the float ratio for that class\n\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if the Julia `VERSION` supports it. Otherwise, uses MersenneTwister`.\n\n# Transform Inputs\n\n * `X`: A matrix or table of floats where each row is an observation from the dataset\n * `y`: An abstract vector of labels (e.g., strings) that correspond to the observations in `X`\n\n# Transform Outputs\n\n * `X_under`: A matrix or table that includes the data after undersampling depending on whether the input `X` is a matrix or table respectively\n * `y_under`: An abstract vector of labels corresponding to `X_under`\n\n# Operations\n\n * `transform(mach, X, y)`: resample the data `X` and `y` using RandomUndersampler, returning both the new and original observations\n\n# Example\n\n```julia\nusing MLJ\nimport Imbalance\n\n# set probability of each class\nclass_probs = [0.5, 0.2, 0.3] \nnum_rows, num_continuous_feats = 100, 5\n# generate a table and categorical vector accordingly\nX, y = Imbalance.generate_imbalanced_data(num_rows, num_continuous_feats; \n class_probs, rng=42) \n\njulia> Imbalance.checkbalance(y; ref=\"minority\")\n 1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n 2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 33 (173.7%) \n 0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (252.6%) \n\n# load RandomUndersampler\nRandomUndersampler = @load RandomUndersampler pkg=Imbalance\n\n# wrap the model in a machine\nundersampler = RandomUndersampler(ratios=Dict(0=>1.0, 1=> 1.0, 2=>1.0), \n rng=42)\nmach = machine(undersampler)\n\n# provide the data to transform (there is nothing to fit)\nX_under, y_under = transform(mach, X, y)\n \njulia> Imbalance.checkbalance(y_under; ref=\"minority\")\n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n```\n" +":docstring" = "Initiate a random undersampling model with the given hyper-parameters.\n\n```\nRandomUndersampler\n```\n\nA model type for constructing a random undersampler, based on [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nRandomUndersampler = @load RandomUndersampler pkg=Imbalance\n```\n\nDo `model = RandomUndersampler()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `RandomUndersampler(ratios=...)`.\n\n`RandomUndersampler` implements naive undersampling by randomly removing existing observations. \n\n# Training data\n\nIn MLJ or MLJBase, wrap the model in a machine by mach = machine(model)\n\nThere is no need to provide any data here because the model is a static transformer.\n\nLikewise, there is no need to `fit!(mach)`. \n\nFor default values of the hyper-parameters, model can be constructed by model = RandomUndersampler()\n\n# Hyperparameters\n\n * `ratios=1.0`: A parameter that controls the amount of undersampling to be done for each class\n\n * Can be a float and in this case each class will be undersampled to the size of the minority class times the float. By default, all classes are undersampled to the size of the minority class\n * Can be a dictionary mapping each class label to the float ratio for that class\n\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if the Julia `VERSION` supports it. Otherwise, uses MersenneTwister`.\n\n# Transform Inputs\n\n * `X`: A matrix of real numbers or a table with element [scitypes](https://juliaai.github.io/ScientificTypes.jl/) that subtype `Union{Finite, Infinite}`. Elements in nominal columns should subtype `Finite` (i.e., have [scitype](https://juliaai.github.io/ScientificTypes.jl/) `OrderedFactor` or `Multiclass`) and elements in continuous columns should subtype `Infinite` (i.e., have [scitype](https://juliaai.github.io/ScientificTypes.jl/) `Count` or `Continuous`).\n * `y`: An abstract vector of labels (e.g., strings) that correspond to the observations in `X`\n\n# Transform Outputs\n\n * `X_under`: A matrix or table that includes the data after undersampling depending on whether the input `X` is a matrix or table respectively\n * `y_under`: An abstract vector of labels corresponding to `X_under`\n\n# Operations\n\n * `transform(mach, X, y)`: resample the data `X` and `y` using RandomUndersampler, returning both the new and original observations\n\n# Example\n\n```julia\nusing MLJ\nimport Imbalance\n\n# set probability of each class\nclass_probs = [0.5, 0.2, 0.3] \nnum_rows, num_continuous_feats = 100, 5\n# generate a table and categorical vector accordingly\nX, y = Imbalance.generate_imbalanced_data(num_rows, num_continuous_feats; \n class_probs, rng=42) \n\njulia> Imbalance.checkbalance(y; ref=\"minority\")\n 1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n 2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 33 (173.7%) \n 0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (252.6%) \n\n# load RandomUndersampler\nRandomUndersampler = @load RandomUndersampler pkg=Imbalance\n\n# wrap the model in a machine\nundersampler = RandomUndersampler(ratios=Dict(0=>1.0, 1=> 1.0, 2=>1.0), \n rng=42)\nmach = machine(undersampler)\n\n# provide the data to transform (there is nothing to fit)\nX_under, y_under = transform(mach, X, y)\n \njulia> Imbalance.checkbalance(y_under; ref=\"minority\")\n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n```\n" ":name" = "RandomUndersampler" ":human_name" = "random undersampler" ":is_supervised" = "`false`" @@ -5219,7 +5219,7 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "Initiate a RandomWalkOversampler model with the given hyper-parameters.\n\n```\nRandomWalkOversampler\n```\n\nA model type for constructing a random walk oversampler, based on [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nRandomWalkOversampler = @load RandomWalkOversampler pkg=Imbalance\n```\n\nDo `model = RandomWalkOversampler()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `RandomWalkOversampler(ratios=...)`.\n\n`RandomWalkOversampler` implements the random walk oversampling algorithm to correct for class imbalance as in Zhang, H., & Li, M. (2014). RWO-Sampling: A random walk over-sampling approach to imbalanced data classification. Information Fusion, 25, 4-20.\n\n# Training data\n\nIn MLJ or MLJBase, wrap the model in a machine by\n\n```\nmach = machine(model)\n```\n\nThere is no need to provide any data here because the model is a static transformer.\n\nLikewise, there is no need to `fit!(mach)`.\n\nFor default values of the hyper-parameters, model can be constructed by\n\n```\nmodel = RandomWalkOversampler()\n```\n\n# Hyperparameters\n\n * `ratios=1.0`: A parameter that controls the amount of oversampling to be done for each class\n\n * Can be a float and in this case each class will be oversampled to the size of the majority class times the float. By default, all classes are oversampled to the size of the majority class\n * Can be a dictionary mapping each class label to the float ratio for that class\n\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if the Julia `VERSION` supports it. Otherwise, uses MersenneTwister`.\n\n# Transform Inputs\n\n * `X`: A matrix of floats or a table with element [scitypes](https://juliaai.github.io/ScientificTypes.jl/) that subtype `Union{Finite, Infinite}`. Elements in nominal columns should subtype `Finite` (i.e., have [scitype](https://juliaai.github.io/ScientificTypes.jl/) `OrderedFactor` or `Multiclass`) and\n\n```\n elements in continuous columns should subtype `Infinite` (i.e., have \n [scitype](https://juliaai.github.io/ScientificTypes.jl/) `Count` or `Continuous`).\n```\n\n * `y`: An abstract vector of labels (e.g., strings) that correspond to the observations in `X`\n\n# Transform Outputs\n\n * `Xover`: A matrix or table that includes original data and the new observations due to oversampling. depending on whether the input `X` is a matrix or table respectively\n * `yover`: An abstract vector of labels corresponding to `Xover`\n\n# Operations\n\n * `transform(mach, X, y)`: resample the data `X` and `y` using RandomWalkOversampler, returning both the new and original observations\n\n# Example\n\n```julia\nusing MLJ\nusing ScientificTypes\nimport Imbalance\n\n# set probability of each class\nclass_probs = [0.5, 0.2, 0.3] \nnum_rows = 100\nnum_continuous_feats = 3\n# want two categorical features with three and two possible values respectively\nnum_vals_per_category = [3, 2]\n\n# generate a table and categorical vector accordingly\nX, y = Imbalance.generate_imbalanced_data(num_rows, num_continuous_feats; \n class_probs, num_vals_per_category, rng=42) \njulia> Imbalance.checkbalance(y)\n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (39.6%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 33 (68.8%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n\n\njulia> ScientificTypes.schema(X).scitypes\n(Continuous, Continuous, Continuous, Continuous, Continuous)\n# coerce nominal columns to a finite scitype (multiclass or ordered factor)\nX = coerce(X, :Column4=>Multiclass, :Column5=>Multiclass)\n\n# load RandomWalkOversampler model type:\nRandomWalkOversampler = @load RandomWalkOversampler pkg=Imbalance\n\n# oversample the minority classes to sizes relative to the majority class:\noversampler = RandomWalkOversampler(ratios = Dict(0=>1.0, 1=> 0.9, 2=>0.8), rng = 42)\nmach = machine(oversampler)\nXover, yover = transform(mach, X, y)\n\njulia> Imbalance.checkbalance(yover)\n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 38 (79.2%) \n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 43 (89.6%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%)\n```\n" +":docstring" = "Initiate a RandomWalkOversampler model with the given hyper-parameters.\n\n```\nRandomWalkOversampler\n```\n\nA model type for constructing a random walk oversampler, based on [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nRandomWalkOversampler = @load RandomWalkOversampler pkg=Imbalance\n```\n\nDo `model = RandomWalkOversampler()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `RandomWalkOversampler(ratios=...)`.\n\n`RandomWalkOversampler` implements the random walk oversampling algorithm to correct for class imbalance as in Zhang, H., & Li, M. (2014). RWO-Sampling: A random walk over-sampling approach to imbalanced data classification. Information Fusion, 25, 4-20.\n\n# Training data\n\nIn MLJ or MLJBase, wrap the model in a machine by\n\n```\nmach = machine(model)\n```\n\nThere is no need to provide any data here because the model is a static transformer.\n\nLikewise, there is no need to `fit!(mach)`.\n\nFor default values of the hyper-parameters, model can be constructed by\n\n```\nmodel = RandomWalkOversampler()\n```\n\n# Hyperparameters\n\n * `ratios=1.0`: A parameter that controls the amount of oversampling to be done for each class\n\n * Can be a float and in this case each class will be oversampled to the size of the majority class times the float. By default, all classes are oversampled to the size of the majority class\n * Can be a dictionary mapping each class label to the float ratio for that class\n\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if the Julia `VERSION` supports it. Otherwise, uses MersenneTwister`.\n\n# Transform Inputs\n\n * `X`: A table with element [scitypes](https://juliaai.github.io/ScientificTypes.jl/) that subtype `Union{Finite, Infinite}`. Elements in nominal columns should subtype `Finite` (i.e., have [scitype](https://juliaai.github.io/ScientificTypes.jl/) `OrderedFactor` or `Multiclass`) and\n\n```\n elements in continuous columns should subtype `Infinite` (i.e., have \n [scitype](https://juliaai.github.io/ScientificTypes.jl/) `Count` or `Continuous`).\n```\n\n * `y`: An abstract vector of labels (e.g., strings) that correspond to the observations in `X`\n\n# Transform Outputs\n\n * `Xover`: A matrix or table that includes original data and the new observations due to oversampling. depending on whether the input `X` is a matrix or table respectively\n * `yover`: An abstract vector of labels corresponding to `Xover`\n\n# Operations\n\n * `transform(mach, X, y)`: resample the data `X` and `y` using RandomWalkOversampler, returning both the new and original observations\n\n# Example\n\n```julia\nusing MLJ\nusing ScientificTypes\nimport Imbalance\n\n# set probability of each class\nclass_probs = [0.5, 0.2, 0.3] \nnum_rows = 100\nnum_continuous_feats = 3\n# want two categorical features with three and two possible values respectively\nnum_vals_per_category = [3, 2]\n\n# generate a table and categorical vector accordingly\nX, y = Imbalance.generate_imbalanced_data(num_rows, num_continuous_feats; \n class_probs, num_vals_per_category, rng=42) \njulia> Imbalance.checkbalance(y)\n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (39.6%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 33 (68.8%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n\n\njulia> ScientificTypes.schema(X).scitypes\n(Continuous, Continuous, Continuous, Continuous, Continuous)\n# coerce nominal columns to a finite scitype (multiclass or ordered factor)\nX = coerce(X, :Column4=>Multiclass, :Column5=>Multiclass)\n\n# load RandomWalkOversampler model type:\nRandomWalkOversampler = @load RandomWalkOversampler pkg=Imbalance\n\n# oversample the minority classes to sizes relative to the majority class:\noversampler = RandomWalkOversampler(ratios = Dict(0=>1.0, 1=> 0.9, 2=>0.8), rng = 42)\nmach = machine(oversampler)\nXover, yover = transform(mach, X, y)\n\njulia> Imbalance.checkbalance(yover)\n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 38 (79.2%) \n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 43 (89.6%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%)\n```\n" ":name" = "RandomWalkOversampler" ":human_name" = "random walk oversampler" ":is_supervised" = "`false`" @@ -7586,7 +7586,7 @@ ":output_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Binary}`" ":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Binary}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Binary}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Binary}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":is_pure_julia" = "`true`"