Skip to content

Commit

Permalink
Merge pull request #23 from JuliaAI/dev
Browse files Browse the repository at this point in the history
For a 0.2.1 release
  • Loading branch information
ablaom authored Jul 30, 2024
2 parents 8f548ad + c105173 commit 833bbdc
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 30 deletions.
9 changes: 9 additions & 0 deletions .github/codecov.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
coverage:
status:
project:
default:
threshold: 0.5%
removed_code_behavior: fully_covered_patch
patch:
default:
target: 80%
2 changes: 2 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ jobs:
with:
file: lcov.info
token: ${{ secrets.CODECOV_TOKEN }}
fail_ci_if_error: false

docs:
name: Documentation
runs-on: ubuntu-latest
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "FeatureSelection"
uuid = "33837fe5-dbff-4c9e-8c2f-c5612fe2b8b6"
authors = ["Anthony D. Blaom <[email protected]>", "Samuel Okon <[email protected]"]
version = "0.2.0"
version = "0.2.1"

[deps]
MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea"
Expand Down
73 changes: 44 additions & 29 deletions src/models/rfe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -41,17 +41,14 @@ eval(:(const RFE{M} =

# Common keyword constructor for both model types
"""
RecursiveFeatureElimination(model, n_features, step)
RecursiveFeatureElimination(model; n_features=0, step=1)
This model implements a recursive feature elimination algorithm for feature selection.
It recursively removes features, training a base model on the remaining features and
evaluating their importance until the desired number of features is selected.
Construct an instance with default hyper-parameters using the syntax
`rfe_model = RecursiveFeatureElimination(model=...)`. Provide keyword arguments to override
hyper-parameter defaults.
# Training data
In MLJ or MLJBase, bind an instance `rfe_model` to data with
mach = machine(rfe_model, X, y)
Expand Down Expand Up @@ -92,53 +89,62 @@ Train the machine using `fit!(mach, rows=...)`.
# Operations
- `transform(mach, X)`: transform the input table `X` into a new table containing only
columns corresponding to features gotten from the RFE algorithm.
columns corresponding to features accepted by the RFE algorithm.
- `predict(mach, X)`: transform the input table `X` into a new table same as in
- `transform(mach, X)` above and predict using the fitted base model on the
transformed table.
`transform(mach, X)` above and predict using the fitted base model on the transformed
table.
# Fitted parameters
The fields of `fitted_params(mach)` are:
- `features_left`: names of features remaining after recursive feature elimination.
- `model_fitresult`: fitted parameters of the base model.
# Report
The fields of `report(mach)` are:
- `scores`: dictionary of scores for each feature in the training dataset.
The model deems highly scored variables more significant.
The model deems highly scored variables more significant.
- `model_report`: report for the fitted base model.
# Examples
The following example assumes you have MLJDecisionTreeInterface in the active package
ennvironment.
```
using FeatureSelection, MLJ, StableRNGs
using MLJ
RandomForestRegressor = @load RandomForestRegressor pkg=DecisionTree
# Creates a dataset where the target only depends on the first 5 columns of the input table.
A = rand(rng, 50, 10);
A = rand(50, 10);
y = 10 .* sin.(
pi .* A[:, 1] .* A[:, 2]
) + 20 .* (A[:, 3] .- 0.5).^ 2 .+ 10 .* A[:, 4] .+ 5 * A[:, 5]);
) + 20 .* (A[:, 3] .- 0.5).^ 2 .+ 10 .* A[:, 4] .+ 5 * A[:, 5];
X = MLJ.table(A);
# fit a rfe model
# fit a rfe model:
rf = RandomForestRegressor()
selector = RecursiveFeatureElimination(model = rf)
selector = RecursiveFeatureElimination(rf, n_features=2)
mach = machine(selector, X, y)
fit!(mach)
# view the feature importances
feature_importances(mach)
# predict using the base model
Xnew = MLJ.table(rand(rng, 50, 10));
# predict using the base model trained on the reduced feature set:
Xnew = MLJ.table(rand(50, 10));
predict(mach, Xnew)
# transform data with all features to the reduced feature set:
transform(mach, Xnew)
```
"""
function RecursiveFeatureElimination(
Expand Down Expand Up @@ -173,7 +179,7 @@ function RecursiveFeatureElimination(
# This branch is hit just incase there are any models that supports_class_weights
# feature importance that aren't `<:Probabilistic` or `<:Deterministic`
# which is rare.
throw(ERR_MODEL_TYPE)
throw(ERR_MODEL_TYPE)
end
message = MMI.clean!(selector)
isempty(message) || @warn(message)
Expand Down Expand Up @@ -214,22 +220,30 @@ abs_last(x::Pair{<:Any, <:Real}) = abs(last(x))
"""
score_features!(scores_dict, features, importances, n_features_to_score)
Internal method that updates the `scores_dict` by increasing the score for each feature based on their
**Private method.**
Update the `scores_dict` by increasing the score for each feature based on their
importance and store the features in the `features` array.
# Arguments
- `scores_dict::Dict{Symbol, Int}`: A dictionary where the keys are features and
- `scores_dict::Dict{Symbol, Int}`: A dictionary where the keys are features and
the values are their corresponding scores.
- `features::Vector{Symbol}`: An array to store the top features based on importance.
- `importances::Vector{Pair(Symbol, <:Real)}}`: An array of tuples where each tuple
contains a feature and its importance score.
- `importances::Vector{Pair(Symbol, <:Real)}}`: An array of tuples where each tuple
contains a feature and its importance score.
- `n_features_to_score::Int`: The number of top features to score and store.
# Notes
Ensure that `n_features_to_score` is less than or equal to the minimum of the
Ensure that `n_features_to_score` is less than or equal to the minimum of the
lengths of `features` and `importances`.
# Example
```julia
scores_dict = Dict(:feature1 => 0, :feature2 => 0, :feature3 => 0)
features = [:x1, :x1, :x1]
Expand All @@ -244,7 +258,7 @@ features == [:feature1, :feature2, :x1]
function score_features!(scores_dict, features, importances, n_features_to_score)
for i in Base.OneTo(n_features_to_score)
ftr = first(importances[i])
features[i] = ftr
features[i] = ftr
scores_dict[ftr] += 1
end
end
Expand Down Expand Up @@ -273,7 +287,7 @@ function MMI.fit(selector::RFE, verbosity::Int, X, y, args...)
"n_features > number of features in training data, "*
"hence no feature will be eliminated."
)
end
end
end

_step = selector.step
Expand All @@ -296,17 +310,17 @@ function MMI.fit(selector::RFE, verbosity::Int, X, y, args...)
verbosity > 0 && @info("Fitting estimator with $(n_features_to_keep) features.")
data = MMI.reformat(model, MMI.selectcols(X, features_left), args...)
fitresult, _, report = MMI.fit(model, verbosity - 1, data...)
# Note that the MLJ feature importance API does not impose any restrictions on the
# ordering of `feature => score` pairs in the `importances` vector.
# Note that the MLJ feature importance API does not impose any restrictions on the
# ordering of `feature => score` pairs in the `importances` vector.
# Therefore, the order of `feature => score` pairs in the `importances` vector
# might differ from the order of features in the `features` vector, which is
# might differ from the order of features in the `features` vector, which is
# extracted from the feature matrix `X` above. Hence the need for a dictionary
# implementation.
importances = MMI.feature_importances(
selector.model,
fitresult,
report
)
)

# Eliminate the worse features and increase score of remaining features
sort!(importances, by=abs_last, rev = true)
Expand Down Expand Up @@ -396,6 +410,7 @@ end
MMI.load_path(::Type{<:RFE}) = "FeatureSelection.RecursiveFeatureElimination"
MMI.constructor(::Type{<:RFE}) = RecursiveFeatureElimination
MMI.package_name(::Type{<:RFE}) = "FeatureSelection"
MMI.is_wrapper(::Type{<:RFE}) = true

for trait in [
:supports_weights,
Expand Down

0 comments on commit 833bbdc

Please sign in to comment.