Skip to content

Commit

Permalink
added verbosity param (#166)
Browse files Browse the repository at this point in the history
* added verbosity param
  • Loading branch information
kainkad authored Nov 15, 2024
1 parent e1a1b19 commit 43c765d
Show file tree
Hide file tree
Showing 22 changed files with 180 additions and 142 deletions.
2 changes: 2 additions & 0 deletions src/MLJInterface.jl
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ MLJModelInterface.@mlj_model mutable struct LGBMRegressor <: MLJModelInterface.D
cegb_penalty_feature_coupled::Vector{Float64} = Vector{Float64}()
path_smooth::Float64 = 0.0::(_ >= 0.0)
interaction_constraints::String = ""
verbosity::Int = 1

# Dataset parameters
linear_tree::Bool = false
Expand Down Expand Up @@ -209,6 +210,7 @@ MLJModelInterface.@mlj_model mutable struct LGBMClassifier <: MLJModelInterface.
cegb_penalty_feature_coupled::Vector{Float64} = Vector{Float64}()
path_smooth::Float64 = 0.0::(_ >= 0.0)
interaction_constraints::String = ""
verbosity::Int = 1

# Dateset parameters
linear_tree::Bool = false
Expand Down
10 changes: 6 additions & 4 deletions src/cv.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,22 +24,24 @@ last valid iteration.
"""
function cv(
estimator::LGBMEstimator, X::Matrix{TX}, y::Vector{Ty}, splits;
verbosity::Integer = 1, truncate_booster::Bool = true,
verbosity::Integer = nothing, truncate_booster::Bool = true,
) where {TX<:Real,Ty<:Real}

ds_parameters = stringifyparams(estimator; verbosity=verbosity)
verbosity = isnothing(verbosity) ? estimator.verbosity : verbosity
ds_parameters = stringifyparams(estimator)
full_ds = LGBM_DatasetCreateFromMat(X, ds_parameters)
LGBM_DatasetSetField(full_ds, "label", y)

return cv(estimator, full_ds, splits, verbosity = verbosity, truncate_booster = truncate_booster)
end

# Pass Dataset class directly. This will speed up the process if it is part of an iterative process and a pre-constructed dataset is available
function cv(estimator::LGBMEstimator, dataset::Dataset, splits; verbosity::Integer = 1, truncate_booster::Bool=true)
function cv(estimator::LGBMEstimator, dataset::Dataset, splits; verbosity::Integer = nothing, truncate_booster::Bool=true)

start_time = now()
num_data = LGBM_DatasetGetNumData(dataset)
parameters = stringifyparams(estimator; verbosity=verbosity)
verbosity = isnothing(verbosity) ? estimator.verbosity : verbosity
parameters = stringifyparams(estimator)

split_scores = Dict{String,Dict{String,Vector{Float64}}}()
for (split_idx, train_inds) in enumerate(splits)
Expand Down
81 changes: 45 additions & 36 deletions src/estimators.jl
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ mutable struct LGBMRegression <: LGBMEstimator
cegb_penalty_feature_coupled::Vector{Float64}
path_smooth::Float64
interaction_constraints::String
verbosity::Int

# Dataset parameters
linear_tree::Bool
Expand Down Expand Up @@ -176,6 +177,7 @@ end
cegb_penalty_feature_coupled = Float64[],
path_smooth = 0.,
interaction_constraints = "",
verbosity = 1,
linear_tree = false,
max_bin = 255,
max_bin_by_feature = Int[],
Expand Down Expand Up @@ -276,6 +278,7 @@ function LGBMRegression(;
cegb_penalty_feature_coupled = Float64[],
path_smooth = 0.,
interaction_constraints = "",
verbosity = 1,
linear_tree = false,
max_bin = 255,
max_bin_by_feature = Int[],
Expand Down Expand Up @@ -318,20 +321,20 @@ function LGBMRegression(;
)

return LGBMRegression(
Booster(), "", objective, boosting, num_iterations, learning_rate, num_leaves,
tree_learner, num_threads, device_type, seed, deterministic, force_col_wise, force_row_wise, histogram_pool_size,
max_depth, min_data_in_leaf, min_sum_hessian_in_leaf,
bagging_fraction, bagging_freq, bagging_seed, feature_fraction, feature_fraction_bynode, feature_fraction_seed, extra_trees,
extra_seed, early_stopping_round, first_metric_only, max_delta_step, lambda_l1, lambda_l2, linear_lambda,
min_gain_to_split, drop_rate, max_drop, skip_drop,
Booster(), "", objective, boosting, num_iterations, learning_rate, num_leaves, tree_learner, num_threads, device_type, seed, deterministic,
force_col_wise, force_row_wise, histogram_pool_size, max_depth, min_data_in_leaf, min_sum_hessian_in_leaf,
bagging_fraction, bagging_freq, bagging_seed, feature_fraction, feature_fraction_bynode, feature_fraction_seed,
extra_trees, extra_seed, early_stopping_round, first_metric_only, max_delta_step,
lambda_l1, lambda_l2, linear_lambda, min_gain_to_split, drop_rate, max_drop, skip_drop,
xgboost_dart_mode, uniform_drop, drop_seed, top_rate, other_rate, min_data_per_group, max_cat_threshold,
cat_l2, cat_smooth, max_cat_to_onehot, top_k, monotone_constraints, monotone_constraints_method, monotone_penalty, feature_contri, refit_decay_rate,
cegb_tradeoff, cegb_penalty_split, cegb_penalty_feature_lazy, cegb_penalty_feature_coupled, path_smooth,
interaction_constraints, linear_tree, max_bin, max_bin_by_feature, min_data_in_bin, bin_construct_sample_cnt, data_random_seed,
cegb_tradeoff, cegb_penalty_split, cegb_penalty_feature_lazy, cegb_penalty_feature_coupled, path_smooth, interaction_constraints, verbosity,
linear_tree, max_bin, max_bin_by_feature, min_data_in_bin, bin_construct_sample_cnt, data_random_seed,
is_enable_sparse, enable_bundle, use_missing, zero_as_missing, feature_pre_filter, pre_partition, categorical_feature,
start_iteration_predict, num_iteration_predict, predict_raw_score, predict_leaf_index, predict_contrib, predict_disable_shape_check,
1, is_unbalance, boost_from_average, reg_sqrt, alpha, fair_c, poisson_max_delta_step, tweedie_variance_power, metric, metric_freq, is_provide_training_metric, eval_at, num_machines, local_listen_port, time_out,
machine_list_filename, machines, gpu_platform_id, gpu_device_id, gpu_use_dp, num_gpu,
1, is_unbalance, boost_from_average, reg_sqrt, alpha, fair_c, poisson_max_delta_step, tweedie_variance_power,
metric, metric_freq, is_provide_training_metric, eval_at,
num_machines, local_listen_port, time_out, machine_list_filename, machines, gpu_platform_id, gpu_device_id, gpu_use_dp, num_gpu,
)
end

Expand Down Expand Up @@ -401,6 +404,7 @@ mutable struct LGBMClassification <: LGBMEstimator
cegb_penalty_feature_coupled::Vector{Float64}
path_smooth::Float64
interaction_constraints::String
verbosity::Int

# Dataset parameters
linear_tree::Bool
Expand Down Expand Up @@ -517,6 +521,7 @@ end
cegb_penalty_feature_coupled = Float64[],
path_smooth = 0.
interaction_constraints = "",
verbosity = 1,
linear_tree = false,
max_bin = 255,
max_bin_by_feature = Int[],
Expand Down Expand Up @@ -622,6 +627,7 @@ function LGBMClassification(;
cegb_penalty_feature_coupled = Float64[],
path_smooth = 0.,
interaction_constraints = "",
verbosity = 1,
linear_tree = false,
max_bin = 255,
max_bin_by_feature = Int[],
Expand Down Expand Up @@ -667,21 +673,21 @@ function LGBMClassification(;
)

return LGBMClassification(
Booster(), "", objective, boosting, num_iterations, learning_rate,
num_leaves, tree_learner, num_threads, device_type, seed, deterministic, force_col_wise, force_row_wise, histogram_pool_size,
max_depth, min_data_in_leaf, min_sum_hessian_in_leaf,
bagging_fraction, pos_bagging_fraction, neg_bagging_fraction,bagging_freq,
bagging_seed, feature_fraction, feature_fraction_bynode, feature_fraction_seed, extra_trees, extra_seed, early_stopping_round, first_metric_only, max_delta_step, lambda_l1, lambda_l2, linear_lambda,
min_gain_to_split, drop_rate, max_drop, skip_drop, xgboost_dart_mode,
uniform_drop, drop_seed, top_rate, other_rate, min_data_per_group, max_cat_threshold, cat_l2, cat_smooth, max_cat_to_onehot, top_k, monotone_constraints, monotone_constraints_method, monotone_penalty,
feature_contri, refit_decay_rate, cegb_tradeoff, cegb_penalty_split, cegb_penalty_feature_lazy, cegb_penalty_feature_coupled,
path_smooth, interaction_constraints, linear_tree, max_bin, max_bin_by_feature, min_data_in_bin, bin_construct_sample_cnt,
data_random_seed, is_enable_sparse, enable_bundle, use_missing, zero_as_missing, feature_pre_filter, pre_partition, categorical_feature,
start_iteration_predict, num_iteration_predict, predict_raw_score, predict_leaf_index, predict_contrib,
predict_disable_shape_check, pred_early_stop, pred_early_stop_freq, pred_early_stop_margin,
Booster(), "", objective, boosting, num_iterations, learning_rate, num_leaves, tree_learner, num_threads, device_type, seed, deterministic,
force_col_wise, force_row_wise, histogram_pool_size, max_depth, min_data_in_leaf, min_sum_hessian_in_leaf,
bagging_fraction, pos_bagging_fraction, neg_bagging_fraction,bagging_freq, bagging_seed,
feature_fraction, feature_fraction_bynode, feature_fraction_seed, extra_trees, extra_seed, early_stopping_round, first_metric_only, max_delta_step,
lambda_l1, lambda_l2, linear_lambda, min_gain_to_split, drop_rate, max_drop, skip_drop,
xgboost_dart_mode, uniform_drop, drop_seed, top_rate, other_rate, min_data_per_group, max_cat_threshold,
cat_l2, cat_smooth, max_cat_to_onehot, top_k, monotone_constraints, monotone_constraints_method, monotone_penalty,
feature_contri, refit_decay_rate, cegb_tradeoff, cegb_penalty_split, cegb_penalty_feature_lazy, cegb_penalty_feature_coupled, path_smooth, interaction_constraints, verbosity,
linear_tree, max_bin, max_bin_by_feature, min_data_in_bin, bin_construct_sample_cnt, data_random_seed,
is_enable_sparse, enable_bundle, use_missing, zero_as_missing, feature_pre_filter, pre_partition, categorical_feature,
start_iteration_predict, num_iteration_predict, predict_raw_score, predict_leaf_index, predict_contrib, predict_disable_shape_check, pred_early_stop, pred_early_stop_freq, pred_early_stop_margin,
num_class, is_unbalance, scale_pos_weight, sigmoid, boost_from_average,
metric, metric_freq, is_provide_training_metric, eval_at, multi_error_top_k, auc_mu_weights, num_machines, local_listen_port, time_out,
machine_list_filename, machines, gpu_platform_id, gpu_device_id, gpu_use_dp, num_gpu,
metric, metric_freq, is_provide_training_metric, eval_at, multi_error_top_k, auc_mu_weights,
num_machines, local_listen_port, time_out, machine_list_filename,
machines, gpu_platform_id, gpu_device_id, gpu_use_dp, num_gpu,
)
end

Expand Down Expand Up @@ -750,6 +756,7 @@ mutable struct LGBMRanking <: LGBMEstimator
cegb_penalty_feature_coupled::Vector{Float64}
path_smooth::Float64
interaction_constraints::String
verbosity::Int

# Dataset parameters
linear_tree::Bool
Expand Down Expand Up @@ -869,6 +876,7 @@ end
cegb_penalty_feature_coupled = Float64[],
path_smooth = 0.,
interaction_constraints = "",
verbosity = 1,
linear_tree = false,
max_bin = 255,
max_bin_by_feature = Int[],
Expand Down Expand Up @@ -977,6 +985,7 @@ function LGBMRanking(;
cegb_penalty_feature_coupled = Float64[],
path_smooth = 0.,
interaction_constraints = "",
verbosity = 1,
linear_tree = false,
max_bin = 255,
max_bin_by_feature = Int[],
Expand Down Expand Up @@ -1025,20 +1034,20 @@ function LGBMRanking(;
)

return LGBMRanking(
Booster(), "", objective, boosting, num_iterations, learning_rate,
num_leaves, tree_learner, num_threads, device_type, seed, deterministic, force_col_wise, force_row_wise, histogram_pool_size,
max_depth, min_data_in_leaf, min_sum_hessian_in_leaf,
Booster(), "", objective, boosting, num_iterations, learning_rate, num_leaves, tree_learner, num_threads, device_type, seed, deterministic,
force_col_wise, force_row_wise, histogram_pool_size, max_depth, min_data_in_leaf, min_sum_hessian_in_leaf,
bagging_fraction, pos_bagging_fraction, neg_bagging_fraction, bagging_freq,
bagging_seed, feature_fraction, feature_fraction_bynode, feature_fraction_seed, extra_trees, extra_seed, early_stopping_round, first_metric_only, max_delta_step, lambda_l1, lambda_l2, linear_lambda,
min_gain_to_split, drop_rate, max_drop, skip_drop, xgboost_dart_mode,
uniform_drop, drop_seed, top_rate, other_rate, min_data_per_group, max_cat_threshold, cat_l2, cat_smooth, max_cat_to_onehot, top_k, monotone_constraints, monotone_constraints_method, monotone_penalty,
feature_contri, refit_decay_rate, cegb_tradeoff, cegb_penalty_split, cegb_penalty_feature_lazy, cegb_penalty_feature_coupled, path_smooth,
interaction_constraints, linear_tree, max_bin, max_bin_by_feature, min_data_in_bin, bin_construct_sample_cnt,
data_random_seed, is_enable_sparse, enable_bundle, use_missing, zero_as_missing, feature_pre_filter, pre_partition, group_column, categorical_feature,
start_iteration_predict, num_iteration_predict, predict_raw_score, predict_leaf_index, predict_contrib,
predict_disable_shape_check, pred_early_stop, pred_early_stop_freq, pred_early_stop_margin,
min_gain_to_split, drop_rate, max_drop, skip_drop,
xgboost_dart_mode, uniform_drop, drop_seed, top_rate, other_rate, min_data_per_group, max_cat_threshold,
cat_l2, cat_smooth, max_cat_to_onehot, top_k, monotone_constraints, monotone_constraints_method, monotone_penalty,
feature_contri, refit_decay_rate, cegb_tradeoff, cegb_penalty_split, cegb_penalty_feature_lazy, cegb_penalty_feature_coupled, path_smooth, interaction_constraints, verbosity,
linear_tree, max_bin, max_bin_by_feature, min_data_in_bin, bin_construct_sample_cnt, data_random_seed,
is_enable_sparse, enable_bundle, use_missing, zero_as_missing, feature_pre_filter, pre_partition, group_column, categorical_feature,
start_iteration_predict, num_iteration_predict, predict_raw_score, predict_leaf_index, predict_contrib, predict_disable_shape_check, pred_early_stop, pred_early_stop_freq, pred_early_stop_margin,
objective_seed, num_class, is_unbalance, scale_pos_weight, sigmoid, boost_from_average, lambdarank_truncation_level, lambdarank_norm, label_gain,
metric, metric_freq, is_provide_training_metric, eval_at, num_machines, local_listen_port, time_out,
machine_list_filename, machines, gpu_platform_id, gpu_device_id, gpu_use_dp, num_gpu,
metric, metric_freq, is_provide_training_metric, eval_at,
num_machines, local_listen_port, time_out, machine_list_filename, machines,
gpu_platform_id, gpu_device_id, gpu_use_dp, num_gpu,
)
end
19 changes: 9 additions & 10 deletions src/fit.jl
Original file line number Diff line number Diff line change
Expand Up @@ -44,18 +44,17 @@ array that holds the validation metric's value at each iteration.
"""
function fit!(
estimator::LGBMEstimator, X::AbstractMatrix{TX}, y::Vector{Ty}, test::Tuple{AbstractMatrix{TX},Vector{Ty}}...;
verbosity::Integer = 1,
is_row_major = false,
verbosity::Integer = nothing,
is_row_major::Bool = false,
weights::Vector{Tw} = Float32[],
init_score::Vector{Ti} = Float64[],
group::Vector{Int} = Int[],
truncate_booster::Bool=true,
) where {TX<:Real,Ty<:Real,Tw<:Real,Ti<:Real}

start_time = now()

verbosity = isnothing(verbosity) ? estimator.verbosity : verbosity
log_debug(verbosity, "Started creating LGBM training dataset\n")
ds_parameters = stringifyparams(estimator; verbosity=verbosity)
ds_parameters = stringifyparams(estimator)
train_ds = dataset_constructor(X, ds_parameters, is_row_major)
LGBM_DatasetSetField(train_ds, "label", y)
if length(weights) > 0
Expand Down Expand Up @@ -85,13 +84,13 @@ function fit!(
estimator::LGBMEstimator,
train_dataset::Dataset,
test_datasets::Dataset...;
verbosity::Integer = 1,
verbosity::Integer = nothing,
truncate_booster::Bool=true,
)

verbosity = isnothing(verbosity) ? estimator.verbosity : verbosity
start_time = now()
log_debug(verbosity, "Started creating LGBM booster\n")
bst_parameters = stringifyparams(estimator; verbosity=verbosity)
bst_parameters = stringifyparams(estimator)
estimator.booster = LGBM_BoosterCreate(train_dataset, bst_parameters)

n_tests = length(test_datasets)
Expand Down Expand Up @@ -305,7 +304,7 @@ function merge_metrics(
end


function stringifyparams(estimator::LGBMEstimator; verbosity::Int = 1)
function stringifyparams(estimator::LGBMEstimator)

paramstring = ""

Expand Down Expand Up @@ -336,5 +335,5 @@ function stringifyparams(estimator::LGBMEstimator; verbosity::Int = 1)
end

end
return paramstring[1:end - 1] * " verbosity=$verbosity"
return paramstring[1:end - 1]
end
5 changes: 3 additions & 2 deletions src/search_cv.jl
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,12 @@ function search_cv(
y::Vector{Ty},
splits,
params;
verbosity::Integer = 1,
verbosity::Integer = nothing,
truncate_booster::Bool=true
) where {TX<:Real,Ty<:Real}

ds_parameters = stringifyparams(estimator; verbosity=verbosity)
verbosity = isnothing(verbosity) ? estimator.verbosity : verbosity
ds_parameters = stringifyparams(estimator)
full_ds = LGBM_DatasetCreateFromMat(X, ds_parameters)
LGBM_DatasetSetField(full_ds, "label", y)

Expand Down
3 changes: 2 additions & 1 deletion test/basic/test_cv.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ estimator = LightGBM.LGBMClassification(
objective = "binary",
num_class = 1,
is_provide_training_metric = true,
metric = ["auc"]
metric = ["auc"],
verbosity = -1
)


Expand Down
Loading

0 comments on commit 43c765d

Please sign in to comment.