Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[R] Redesigned xgboost() interface skeleton #10456

Merged
merged 23 commits into from
Jul 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions R-package/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ Suggests:
igraph (>= 1.0.1),
float,
titanic,
RhpcBLASctl
RhpcBLASctl,
survival
Depends:
R (>= 4.3.0)
Imports:
Expand All @@ -66,6 +67,6 @@ Imports:
data.table (>= 1.9.6),
jsonlite (>= 1.0)
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.1
RoxygenNote: 7.3.2
Encoding: UTF-8
SystemRequirements: GNU make, C++17
1 change: 1 addition & 0 deletions R-package/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ S3method(predict,xgb.Booster)
S3method(print,xgb.Booster)
S3method(print,xgb.DMatrix)
S3method(print,xgb.cv.synchronous)
S3method(print,xgboost)
S3method(setinfo,xgb.Booster)
S3method(setinfo,xgb.DMatrix)
S3method(variable.names,xgb.Booster)
Expand Down
34 changes: 34 additions & 0 deletions R-package/R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,40 @@ NVL <- function(x, val) {
return(c('rank:pairwise', 'rank:ndcg', 'rank:map'))
}

.OBJECTIVES_NON_DEFAULT_MODE <- function() {
return(c("reg:logistic", "binary:logitraw", "multi:softmax"))
}

.BINARY_CLASSIF_OBJECTIVES <- function() {
return(c("binary:logistic", "binary:hinge"))
}

.MULTICLASS_CLASSIF_OBJECTIVES <- function() {
return("multi:softprob")
}

.SURVIVAL_RIGHT_CENSORING_OBJECTIVES <- function() { # nolint
return(c("survival:cox", "survival:aft"))
}

.SURVIVAL_ALL_CENSORING_OBJECTIVES <- function() { # nolint
return("survival:aft")
}

.REGRESSION_OBJECTIVES <- function() {
return(c(
"reg:squarederror", "reg:squaredlogerror", "reg:logistic", "reg:pseudohubererror",
"reg:absoluteerror", "reg:quantileerror", "count:poisson", "reg:gamma", "reg:tweedie"
))
}

.MULTI_TARGET_OBJECTIVES <- function() {
return(c(
"reg:squarederror", "reg:squaredlogerror", "reg:logistic", "reg:pseudohubererror",
"reg:quantileerror", "reg:gamma"
))
}


#
# Low-level functions for boosting --------------------------------------------
Expand Down
20 changes: 8 additions & 12 deletions R-package/R/xgb.Booster.R
Original file line number Diff line number Diff line change
Expand Up @@ -663,9 +663,8 @@ validate.features <- function(bst, newdata) {
#' data(agaricus.train, package = "xgboost")
#' train <- agaricus.train
#'
#' bst <- xgboost(
#' data = train$data,
#' label = train$label,
#' bst <- xgb.train(
#' data = xgb.DMatrix(train$data, label = train$label),
#' max_depth = 2,
#' eta = 1,
#' nthread = 2,
Expand Down Expand Up @@ -767,9 +766,8 @@ xgb.attributes <- function(object) {
#' data.table::setDTthreads(nthread)
#' train <- agaricus.train
#'
#' bst <- xgboost(
#' data = train$data,
#' label = train$label,
#' bst <- xgb.train(
#' data = xgb.DMatrix(train$data, label = train$label),
#' max_depth = 2,
#' eta = 1,
#' nthread = nthread,
Expand Down Expand Up @@ -817,9 +815,8 @@ xgb.config <- function(object) {
#' data(agaricus.train, package = "xgboost")
#' train <- agaricus.train
#'
#' bst <- xgboost(
#' data = train$data,
#' label = train$label,
#' bst <- xgb.train(
#' data = xgb.DMatrix(train$data, label = train$label),
#' max_depth = 2,
#' eta = 1,
#' nthread = 2,
Expand Down Expand Up @@ -1230,9 +1227,8 @@ xgb.is.same.Booster <- function(obj1, obj2) {
#' data(agaricus.train, package = "xgboost")
#' train <- agaricus.train
#'
#' bst <- xgboost(
#' data = train$data,
#' label = train$label,
#' bst <- xgb.train(
#' data = xgb.DMatrix(train$data, label = train$label),
#' max_depth = 2,
#' eta = 1,
#' nthread = 2,
Expand Down
30 changes: 0 additions & 30 deletions R-package/R/xgb.DMatrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -853,36 +853,6 @@ xgb.DMatrix.hasinfo <- function(object, info) {
}


# get dmatrix from data, label
# internal helper method
xgb.get.DMatrix <- function(data, label, missing, weight, nthread) {
if (inherits(data, "dgCMatrix") || is.matrix(data)) {
if (is.null(label)) {
stop("label must be provided when data is a matrix")
}
dtrain <- xgb.DMatrix(data, label = label, missing = missing, nthread = nthread)
if (!is.null(weight)) {
setinfo(dtrain, "weight", weight)
}
} else {
if (!is.null(label)) {
warning("xgboost: label will be ignored.")
}
if (is.character(data)) {
data <- path.expand(data)
dtrain <- xgb.DMatrix(data[1])
} else if (inherits(data, "xgb.DMatrix")) {
dtrain <- data
} else if (inherits(data, "data.frame")) {
stop("xgboost doesn't support data.frame as input. Convert it to matrix first.")
} else {
stop("xgboost: invalid input data")
}
}
return(dtrain)
}


#' Dimensions of xgb.DMatrix
#'
#' Returns a vector of numbers of rows and of columns in an \code{xgb.DMatrix}.
Expand Down
4 changes: 2 additions & 2 deletions R-package/R/xgb.dump.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@
#' data(agaricus.test, package='xgboost')
#' train <- agaricus.train
#' test <- agaricus.test
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
#' eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
#' bst <- xgb.train(data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
#' eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
#' # save the model in file 'xgb.model.dump'
#' dump_path = file.path(tempdir(), 'model.dump')
#' xgb.dump(bst, dump_path, with_stats = TRUE)
Expand Down
26 changes: 14 additions & 12 deletions R-package/R/xgb.importance.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,8 @@
#' # binomial classification using "gbtree":
#' data(agaricus.train, package = "xgboost")
#'
#' bst <- xgboost(
#' data = agaricus.train$data,
#' label = agaricus.train$label,
#' bst <- xgb.train(
#' data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label),
#' max_depth = 2,
#' eta = 1,
#' nthread = 2,
Expand All @@ -59,9 +58,8 @@
#' xgb.importance(model = bst)
#'
#' # binomial classification using "gblinear":
#' bst <- xgboost(
#' data = agaricus.train$data,
#' label = agaricus.train$label,
#' bst <- xgb.train(
#' data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label),
#' booster = "gblinear",
#' eta = 0.3,
#' nthread = 1,
Expand All @@ -73,9 +71,11 @@
#' # multiclass classification using "gbtree":
#' nclass <- 3
#' nrounds <- 10
#' mbst <- xgboost(
#' data = as.matrix(iris[, -5]),
#' label = as.numeric(iris$Species) - 1,
#' mbst <- xgb.train(
#' data = xgb.DMatrix(
#' as.matrix(iris[, -5]),
#' label = as.numeric(iris$Species) - 1
#' ),
#' max_depth = 3,
#' eta = 0.2,
#' nthread = 2,
Expand All @@ -99,9 +99,11 @@
#' )
#'
#' # multiclass classification using "gblinear":
#' mbst <- xgboost(
#' data = scale(as.matrix(iris[, -5])),
#' label = as.numeric(iris$Species) - 1,
#' mbst <- xgb.train(
#' data = xgb.DMatrix(
#' scale(as.matrix(iris[, -5])),
#' label = as.numeric(iris$Species) - 1
#' ),
#' booster = "gblinear",
#' eta = 0.2,
#' nthread = 1,
Expand Down
5 changes: 2 additions & 3 deletions R-package/R/xgb.model.dt.tree.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,8 @@
#' nthread <- 1
#' data.table::setDTthreads(nthread)
#'
#' bst <- xgboost(
#' data = agaricus.train$data,
#' label = agaricus.train$label,
#' bst <- xgb.train(
#' data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label),
#' max_depth = 2,
#' eta = 1,
#' nthread = nthread,
Expand Down
5 changes: 2 additions & 3 deletions R-package/R/xgb.plot.deepness.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,8 @@
#' data.table::setDTthreads(nthread)
#'
#' ## Change max_depth to a higher number to get a more significant result
#' bst <- xgboost(
#' data = agaricus.train$data,
#' label = agaricus.train$label,
#' bst <- xgb.train(
#' data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label),
#' max_depth = 6,
#' nthread = nthread,
#' nrounds = 50,
Expand Down
5 changes: 2 additions & 3 deletions R-package/R/xgb.plot.importance.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,8 @@
#' nthread <- 2
#' data.table::setDTthreads(nthread)
#'
#' bst <- xgboost(
#' data = agaricus.train$data,
#' label = agaricus.train$label,
#' bst <- xgb.train(
#' data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label),
#' max_depth = 3,
#' eta = 1,
#' nthread = nthread,
Expand Down
5 changes: 2 additions & 3 deletions R-package/R/xgb.plot.multi.trees.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,8 @@
#' nthread <- 2
#' data.table::setDTthreads(nthread)
#'
#' bst <- xgboost(
#' data = agaricus.train$data,
#' label = agaricus.train$label,
#' bst <- xgb.train(
#' data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label),
#' max_depth = 15,
#' eta = 1,
#' nthread = nthread,
Expand Down
10 changes: 4 additions & 6 deletions R-package/R/xgb.plot.shap.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,8 @@
#' data.table::setDTthreads(nthread)
#' nrounds <- 20
#'
#' bst <- xgboost(
#' agaricus.train$data,
#' agaricus.train$label,
#' bst <- xgb.train(
#' data = xgb.DMatrix(agaricus.train$data, agaricus.train$label),
#' nrounds = nrounds,
#' eta = 0.1,
#' max_depth = 3,
Expand All @@ -108,9 +107,8 @@
#' set.seed(123)
#' is.na(x[sample(nrow(x) * 4, 30)]) <- TRUE # introduce some missing values
#'
#' mbst <- xgboost(
#' data = x,
#' label = as.numeric(iris$Species) - 1,
#' mbst <- xgb.train(
#' data = xgb.DMatrix(x, label = as.numeric(iris$Species) - 1),
#' nrounds = nrounds,
#' max_depth = 2,
#' eta = 0.3,
Expand Down
5 changes: 2 additions & 3 deletions R-package/R/xgb.plot.tree.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,8 @@
#' @examples
#' data(agaricus.train, package = "xgboost")
#'
#' bst <- xgboost(
#' data = agaricus.train$data,
#' label = agaricus.train$label,
#' bst <- xgb.train(
#' data = xgb.DMatrix(agaricus.train$data, agaricus.train$label),
#' max_depth = 3,
#' eta = 1,
#' nthread = 2,
Expand Down
12 changes: 2 additions & 10 deletions R-package/R/xgb.train.R
Original file line number Diff line number Diff line change
Expand Up @@ -170,12 +170,6 @@
#' as R attributes, and thus do not get saved when using XGBoost's own serializaters like
#' \link{xgb.save} (but are kept when using R serializers like \link{saveRDS}).
#' @param ... other parameters to pass to \code{params}.
#' @param label vector of response values. Should not be provided when data is
#' a local data file name or an \code{xgb.DMatrix}.
#' @param missing by default is set to NA, which means that NA values should be considered as 'missing'
#' by the algorithm. Sometimes, 0 or other extreme value might be used to represent missing values.
#' This parameter is only used when input is a dense matrix.
#' @param weight a vector indicating the weight for each row of the input.
#'
#' @return
#' An object of class \code{xgb.Booster}.
Expand Down Expand Up @@ -316,12 +310,10 @@
#' early_stopping_rounds = 3)
#'
#' ## An 'xgboost' interface example:
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label,
#' max_depth = 2, eta = 1, nthread = nthread, nrounds = 2,
#' objective = "binary:logistic")
#' bst <- xgboost(x = agaricus.train$data, y = factor(agaricus.train$label),
#' params = list(max_depth = 2, eta = 1), nthread = nthread, nrounds = 2)
#' pred <- predict(bst, agaricus.test$data)
#'
#' @rdname xgb.train
#' @export
xgb.train <- function(params = list(), data, nrounds, evals = list(),
obj = NULL, feval = NULL, verbose = 1, print_every_n = 1L,
Expand Down
Loading
Loading