Skip to content

Commit

Permalink
Merge pull request #18 from JuliaAI/bugfixes
Browse files Browse the repository at this point in the history
fix issue #17
  • Loading branch information
ablaom authored Jun 2, 2022
2 parents 70e29d9 + 2cc89da commit 4f13cc3
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 12 deletions.
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ version = "0.2.0"

[deps]
MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
XGBoost = "009559a3-9522-5dbb-924b-0b6ed2b22bb9"

Expand Down
28 changes: 18 additions & 10 deletions src/MLJXGBoostInterface.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ const MMI = MLJModelInterface
import Tables: schema

import XGBoost
import SparseArrays

# helper for feature importances:
# XGBoost used "f
Expand Down Expand Up @@ -223,15 +224,20 @@ function MMI.clean!(model::XGBoostRegressor)
return warning
end

# For `XGBoost.DMatrix(Xmatrix, y)` `Xmatrix` must either be a julia `Array` or
# a `SparseMatrixCSC` while `y` must be a `Vector`
_to_array(x::Union{Array, SparseArrays.SparseMatrixCSC}) = x
_to_array(x::AbstractArray) = copyto!(similar(Array{eltype(x)}, axes(x)), x)

function MMI.fit(model::XGBoostRegressor
, verbosity::Int
, X
, y)

silent =
verbosity > 0 ? false : true
Xmatrix = MMI.matrix(X)
dm = XGBoost.DMatrix(Xmatrix,label=y)
Xmatrix = _to_array(MMI.matrix(X))
dm = XGBoost.DMatrix(Xmatrix, label=_to_array(y))

objective =
model.objective in ["linear", "gamma", "tweedie"] ?
Expand Down Expand Up @@ -259,7 +265,7 @@ end
function MMI.predict(model::XGBoostRegressor
, fitresult
, Xnew)
Xmatrix = MMI.matrix(Xnew)
Xmatrix = _to_array(MMI.matrix(Xnew))
return XGBoost.predict(fitresult, Xmatrix)
end

Expand Down Expand Up @@ -417,8 +423,8 @@ function MMI.fit(model::XGBoostCount

silent = verbosity > 0 ? false : true

Xmatrix = MMI.matrix(X)
dm = XGBoost.DMatrix(Xmatrix,label=y)
Xmatrix = _to_array(MMI.matrix(X))
dm = XGBoost.DMatrix(Xmatrix, label=_to_array(y))

seed =
model.seed == -1 ? generate_seed() : model.seed
Expand All @@ -439,7 +445,7 @@ end
function MMI.predict(model::XGBoostCount
, fitresult
, Xnew)
Xmatrix = MMI.matrix(Xnew)
Xmatrix = _to_array(MMI.matrix(Xnew))
return XGBoost.predict(fitresult, Xmatrix)
end

Expand Down Expand Up @@ -594,7 +600,7 @@ function MMI.fit(model::XGBoostClassifier
, verbosity::Int #> must be here even if unsupported in pkg
, X
, y)
Xmatrix = MMI.matrix(X)
Xmatrix = _to_array(MMI.matrix(X))

a_target_element = y[1] # a CategoricalValue or CategoricalString
num_class = length(MMI.classes(a_target_element))
Expand All @@ -608,14 +614,16 @@ function MMI.fit(model::XGBoostClassifier
eval_metric = "mlogloss"
end

y_plain = MMI.int(y) .- 1 # integer relabeling should start at 0
y_plain_ = MMI.int(y) .- 1 # integer relabeling should start at 0

if(num_class==2)
objective="binary:logistic"
y_plain = convert(Array{Bool}, y_plain)
y_plain_ = convert(Array{Bool}, y_plain_)
else
objective="multi:softprob"
end

y_plain = _to_array(y_plain_)

silent =
verbosity > 0 ? false : true
Expand Down Expand Up @@ -655,7 +663,7 @@ function MMI.predict(model::XGBoostClassifier
decode = MMI.decoder(a_target_element)
classes = MMI.classes(a_target_element)

Xmatrix = MMI.matrix(Xnew)
Xmatrix = _to_array(MMI.matrix(Xnew))
XGBpredictions = XGBoost.predict(result, Xmatrix)

nlevels = length(classes)
Expand Down
19 changes: 17 additions & 2 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,16 @@ Xtable = table(X)
α = 0.1
β = [-0.3, 0.2, -0.1]
λ = exp.(α .+ X * β)
ycount = [rand(rng, Poisson(λᵢ)) for λᵢ λ]
ycount_ = [rand(rng, Poisson(λᵢ)) for λᵢ λ]
ycount = @view(ycount_[:]) # intention is to simulate issue #17

fitresultC, cacheC, reportC = MLJBase.fit(count_regressor, 0, Xtable, ycount);
fitresultC_, cacheC_, reportC_ = MLJBase.fit(count_regressor, 0, Xtable, ycount_);
# the `cacheC` and `reportC` should be same for both models but the
# `fitresultC`s might be different since they may have different pointers to same
# information.
@test cacheC == cacheC_
@test reportC == reportC_
cpred = predict(count_regressor, fitresultC, Xtable);

importances = reportC.feature_importances
Expand Down Expand Up @@ -102,6 +109,15 @@ y = identity.(ycat) # make plain Vector with categ. elements
train, test = partition(eachindex(y), 0.6)
fitresult, cache, report = MLJBase.fit(plain_classifier, 0,
selectrows(X, train), y[train];)
fitresult_, cache_, report_ = MLJBase.fit(
plain_classifier, 0, selectrows(X, train), @view(y[train]);
) # mimick issue #17
# the `cache` and `report` should be same for both models but the
# `fitresult` might be different since they may have different pointers to same
# information.
@test cache == cache_
@test report == report_

yhat = mode.(predict(plain_classifier, fitresult, selectrows(X, test)))
misclassification_rate = sum(yhat .!= y[test])/length(test)
@test misclassification_rate < 0.01
Expand All @@ -128,7 +144,6 @@ restored_fitresult = MLJBase.restore(plain_classifier,
@test predict_mode(plain_classifier, restored_fitresult, selectrows(X, test)) ==
yhat


## MACHINE INTEGRATION

# count regressor (`count_regressor`, `Xtable` and `ycount`
Expand Down

0 comments on commit 4f13cc3

Please sign in to comment.