Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[R] Remove reshape argument in predict #10330

Merged
merged 2 commits into from
Aug 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions R-package/R/callbacks.R
Original file line number Diff line number Diff line change
Expand Up @@ -853,8 +853,7 @@ xgb.cb.cv.predict <- function(save_models = FALSE, outputmargin = FALSE) {
pr <- predict(
fd$bst,
fd$evals[[2L]],
outputmargin = env$outputmargin,
reshape = TRUE
outputmargin = env$outputmargin
)
if (is.null(pred)) {
if (NCOL(pr) > 1L) {
Expand Down
5 changes: 2 additions & 3 deletions R-package/R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,7 @@ xgb.iter.update <- function(bst, dtrain, iter, obj) {
bst,
dtrain,
outputmargin = TRUE,
training = TRUE,
reshape = TRUE
training = TRUE
)
gpair <- obj(pred, dtrain)
n_samples <- dim(dtrain)[1]
Expand Down Expand Up @@ -246,7 +245,7 @@ xgb.iter.eval <- function(bst, evals, iter, feval) {
res <- sapply(seq_along(evals), function(j) {
w <- evals[[j]]
## predict using all trees
preds <- predict(bst, w, outputmargin = TRUE, reshape = TRUE, iterationrange = "all")
preds <- predict(bst, w, outputmargin = TRUE, iterationrange = "all")
eval_res <- feval(preds, w)
out <- eval_res$value
names(out) <- paste0(evnames[j], "-", eval_res$metric)
Expand Down
181 changes: 76 additions & 105 deletions R-package/R/xgb.Booster.R
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,6 @@ xgb.get.handle <- function(object) {
#' @param predcontrib Whether to return feature contributions to individual predictions (see Details).
#' @param approxcontrib Whether to use a fast approximation for feature contributions (see Details).
#' @param predinteraction Whether to return contributions of feature interactions to individual predictions (see Details).
#' @param reshape Whether to reshape the vector of predictions to matrix form when there are several
#' prediction outputs per case. No effect if `predleaf`, `predcontrib`,
#' or `predinteraction` is `TRUE`.
#' @param training Whether the prediction result is used for training. For dart booster,
#' training predicting will perform dropout.
#' @param iterationrange Sequence of rounds/iterations from the model to use for prediction, specified by passing
Expand All @@ -128,8 +125,24 @@ xgb.get.handle <- function(object) {
#' of the iterations (rounds) otherwise.
#'
#' If passing "all", will use all of the rounds regardless of whether the model had early stopping or not.
#' @param strict_shape Default is `FALSE`. When set to `TRUE`, the output
#' type and shape of predictions are invariant to the model type.
#' @param strict_shape Whether to always return an array with the same dimensions for the given prediction mode
#' regardless of the model type - meaning that, for example, both a multi-class and a binary classification
#' model would generate output arrays with the same number of dimensions, with the 'class' dimension having
#' size equal to '1' for the binary model.
#'
#' If passing `FALSE` (the default), dimensions will be simplified according to the model type, so that a
#' binary classification model for example would not have a redundant dimension for 'class'.
#'
#' See documentation for the return type for the exact shape of the output arrays for each prediction mode.
#' @param avoid_transpose Whether to output the resulting predictions in the same memory layout in which they
#' are generated by the core XGBoost library, without transposing them to match the expected output shape.
#'
#' Internally, XGBoost uses row-major order for the predictions it generates, while R arrays use column-major
#' order, hence the result needs to be transposed in order to have the expected shape when represented as
#' an R array or matrix, which might be a slow operation.
#'
#' If passing `TRUE`, then the result will have dimensions in reverse order - for example, rows
#' will be the last dimensions instead of the first dimension.
#' @param base_margin Base margin used for boosting from existing model.
#'
#' Note that, if `newdata` is an `xgb.DMatrix` object, this argument will
Expand Down Expand Up @@ -180,28 +193,46 @@ xgb.get.handle <- function(object) {
#' Note that converting a matrix to [xgb.DMatrix()] uses multiple threads too.
#'
#' @return
#' The return type depends on `strict_shape`. If `FALSE` (default):
#' - For regression or binary classification: A vector of length `nrows(newdata)`.
#' - For multiclass classification: A vector of length `num_class * nrows(newdata)` or
#' a `(nrows(newdata), num_class)` matrix, depending on the `reshape` value.
#' - When `predleaf = TRUE`: A matrix with one column per tree.
#' - When `predcontrib = TRUE`: When not multiclass, a matrix with
#' ` num_features + 1` columns. The last "+ 1" column corresponds to the baseline value.
#' In the multiclass case, a list of `num_class` such matrices.
#' The contribution values are on the scale of untransformed margin
#' (e.g., for binary classification, the values are log-odds deviations from the baseline).
#' - When `predinteraction = TRUE`: When not multiclass, the output is a 3d array of
#' dimension `c(nrow, num_features + 1, num_features + 1)`. The off-diagonal (in the last two dimensions)
#' elements represent different feature interaction contributions. The array is symmetric WRT the last
#' two dimensions. The "+ 1" columns corresponds to the baselines. Summing this array along the last dimension should
#' produce practically the same result as `predcontrib = TRUE`.
#' In the multiclass case, a list of `num_class` such arrays.
#'
#' When `strict_shape = TRUE`, the output is always an array:
#' - For normal predictions, the output has dimension `(num_class, nrow(newdata))`.
#' - For `predcontrib = TRUE`, the dimension is `(ncol(newdata) + 1, num_class, nrow(newdata))`.
#' - For `predinteraction = TRUE`, the dimension is `(ncol(newdata) + 1, ncol(newdata) + 1, num_class, nrow(newdata))`.
#' - For `predleaf = TRUE`, the dimension is `(n_trees_in_forest, num_class, n_iterations, nrow(newdata))`.
#' A numeric vector or array, with corresponding dimensions depending on the prediction mode and on
#' parameter `strict_shape` as follows:
#'
#' If passing `strict_shape=FALSE`:\itemize{
#' \item For regression or binary classification: a vector of length `nrows`.
#' \item For multi-class and multi-target objectives: a matrix of dimensions `[nrows, ngroups]`.
#'
#' Note that objective variant `multi:softmax` defaults towards predicting most likely class (a vector
#' `nrows`) instead of per-class probabilities.
#' \item For `predleaf`: a matrix with one column per tree.
#'
#' For multi-class / multi-target, they will be arranged so that columns in the output will have
#' the leafs from one group followed by leafs of the other group (e.g. order will be `group1:feat1`,
#' `group1:feat2`, ..., `group2:feat1`, `group2:feat2`, ...).
#' \item For `predcontrib`: when not multi-class / multi-target, a matrix with dimensions
#' `[nrows, nfeats+1]`. The last "+ 1" column corresponds to the baseline value.
#'
#' For multi-class and multi-target objectives, will be an array with dimensions `[nrows, ngroups, nfeats+1]`.
#'
#' The contribution values are on the scale of untransformed margin (e.g., for binary classification,
#' the values are log-odds deviations from the baseline).
#' \item For `predinteraction`: when not multi-class / multi-target, the output is a 3D array of
#' dimensions `[nrows, nfeats+1, nfeats+1]`. The off-diagonal (in the last two dimensions)
#' elements represent different feature interaction contributions. The array is symmetric w.r.t. the last
#' two dimensions. The "+ 1" columns corresponds to the baselines. Summing this array along the last
#' dimension should produce practically the same result as `predcontrib = TRUE`.
#'
#' For multi-class and multi-target, will be a 4D array with dimensions `[nrows, ngroups, nfeats+1, nfeats+1]`
#' }
#'
#' If passing `strict_shape=FALSE`, the result is always an array:\itemize{
#' \item For normal predictions, the dimension is `[nrows, ngroups]`.
#' \item For `predcontrib=TRUE`, the dimension is `[nrows, ngroups, nfeats+1]`.
#' \item For `predinteraction=TRUE`, the dimension is `[nrows, ngroups, nfeats+1, nfeats+1]`.
#' \item For `predleaf=TRUE`, the dimension is `[nrows, niter, ngroups, num_parallel_tree]`.
#' }
#'
#' If passing `avoid_transpose=TRUE`, then the dimensions in all cases will be in reverse order - for
#' example, for `predinteraction`, they will be `[nfeats+1, nfeats+1, ngroups, nrows]`
#' instead of `[nrows, ngroups, nfeats+1, nfeats+1]`.
#' @seealso [xgb.train()]
#' @references
#' 1. Scott M. Lundberg, Su-In Lee, "A Unified Approach to Interpreting Model Predictions",
Expand Down Expand Up @@ -279,8 +310,6 @@ xgb.get.handle <- function(object) {
#' # predict for softmax returns num_class probability numbers per case:
#' pred <- predict(bst, as.matrix(iris[, -5]))
#' str(pred)
#' # reshape it to a num_class-columns matrix
#' pred <- matrix(pred, ncol = num_class, byrow = TRUE)
#' # convert the probabilities to softmax labels
#' pred_labels <- max.col(pred) - 1
#' # the following should result in the same error as seen in the last iteration
Expand Down Expand Up @@ -311,8 +340,11 @@ xgb.get.handle <- function(object) {
#' @export
predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FALSE,
predleaf = FALSE, predcontrib = FALSE, approxcontrib = FALSE, predinteraction = FALSE,
reshape = FALSE, training = FALSE, iterationrange = NULL, strict_shape = FALSE,
training = FALSE, iterationrange = NULL, strict_shape = FALSE, avoid_transpose = FALSE,
validate_features = FALSE, base_margin = NULL, ...) {
if (NROW(list(...))) {
warning("Passed unused prediction arguments: ", paste(names(list(...)), collapse = ", "), ".")
}
if (validate_features) {
newdata <- validate.features(object, newdata)
}
Expand Down Expand Up @@ -415,10 +447,9 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
return(val)
}

## We set strict_shape to TRUE then drop the dimensions conditionally
args <- list(
training = box(training),
strict_shape = box(TRUE),
strict_shape = as.logical(strict_shape),
iteration_begin = box(as.integer(iterationrange[1])),
iteration_end = box(as.integer(iterationrange[2])),
type = box(as.integer(0))
Expand All @@ -445,96 +476,36 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA

json_conf <- jsonlite::toJSON(args, auto_unbox = TRUE)
if (is_dmatrix) {
predts <- .Call(
arr <- .Call(
XGBoosterPredictFromDMatrix_R, xgb.get.handle(object), newdata, json_conf
)
} else if (use_as_dense_matrix) {
predts <- .Call(
arr <- .Call(
XGBoosterPredictFromDense_R, xgb.get.handle(object), newdata, missing, json_conf, base_margin
)
} else if (use_as_csr_matrix) {
predts <- .Call(
arr <- .Call(
XGBoosterPredictFromCSR_R, xgb.get.handle(object), csr_data, missing, json_conf, base_margin
)
} else if (use_as_df) {
predts <- .Call(
arr <- .Call(
XGBoosterPredictFromColumnar_R, xgb.get.handle(object), newdata, missing, json_conf, base_margin
)
}

names(predts) <- c("shape", "results")
shape <- predts$shape
arr <- predts$results

n_ret <- length(arr)
if (n_row != shape[1]) {
stop("Incorrect predict shape.")
}

.Call(XGSetArrayDimInplace_R, arr, rev(shape))

cnames <- if (!is.null(colnames(newdata))) c(colnames(newdata), "(Intercept)") else NULL
n_groups <- shape[2]

## Needed regardless of whether strict shape is being used.
if (predcontrib) {
.Call(XGSetArrayDimNamesInplace_R, arr, list(cnames, NULL, NULL))
} else if (predinteraction) {
.Call(XGSetArrayDimNamesInplace_R, arr, list(cnames, cnames, NULL, NULL))
}
if (strict_shape) {
return(arr) # strict shape is calculated by libxgboost uniformly.
if ((predcontrib || predinteraction) && !is.null(colnames(newdata))) {
cnames <- c(colnames(newdata), "(Intercept)")
dim_names <- vector(mode = "list", length = length(dim(arr)))
dim_names[[1L]] <- cnames
if (predinteraction) dim_names[[2L]] <- cnames
.Call(XGSetArrayDimNamesInplace_R, arr, dim_names)
}

if (predleaf) {
## Predict leaf
if (n_ret == n_row) {
.Call(XGSetArrayDimInplace_R, arr, c(n_row, 1L))
} else {
arr <- matrix(arr, nrow = n_row, byrow = TRUE)
}
} else if (predcontrib) {
## Predict contribution
arr <- aperm(a = arr, perm = c(2, 3, 1)) # [group, row, col]
if (n_ret == n_row) {
.Call(XGSetArrayDimInplace_R, arr, c(n_row, 1L))
.Call(XGSetArrayDimNamesInplace_R, arr, list(NULL, cnames))
} else if (n_groups != 1) {
## turns array into list of matrices
arr <- lapply(seq_len(n_groups), function(g) arr[g, , ])
} else {
## remove the first axis (group)
newdim <- dim(arr)[2:3]
newdn <- dimnames(arr)[2:3]
arr <- arr[1, , ]
.Call(XGSetArrayDimInplace_R, arr, newdim)
.Call(XGSetArrayDimNamesInplace_R, arr, newdn)
}
} else if (predinteraction) {
## Predict interaction
arr <- aperm(a = arr, perm = c(3, 4, 1, 2)) # [group, row, col, col]
if (n_ret == n_row) {
.Call(XGSetArrayDimInplace_R, arr, c(n_row, 1L))
.Call(XGSetArrayDimNamesInplace_R, arr, list(NULL, cnames))
} else if (n_groups != 1) {
## turns array into list of matrices
arr <- lapply(seq_len(n_groups), function(g) arr[g, , , ])
} else {
## remove the first axis (group)
arr <- arr[1, , , , drop = FALSE]
newdim <- dim(arr)[2:4]
newdn <- dimnames(arr)[2:4]
.Call(XGSetArrayDimInplace_R, arr, newdim)
.Call(XGSetArrayDimNamesInplace_R, arr, newdn)
}
} else {
## Normal prediction
if (reshape && n_groups != 1) {
arr <- matrix(arr, ncol = n_groups, byrow = TRUE)
} else {
.Call(XGSetArrayDimInplace_R, arr, NULL)
}
if (!avoid_transpose && is.array(arr)) {
arr <- aperm(arr)
}

return(arr)
}

Expand Down
48 changes: 35 additions & 13 deletions R-package/R/xgb.plot.shap.R
Original file line number Diff line number Diff line change
Expand Up @@ -294,8 +294,10 @@ xgb.shap.data <- function(data, shap_contrib = NULL, features = NULL, top_n = 1,
if (is.null(features) && (is.null(model) || !inherits(model, "xgb.Booster")))
stop("when features are not provided, one must provide an xgb.Booster model to rank the features")

last_dim <- function(v) dim(v)[length(dim(v))]

if (!is.null(shap_contrib) &&
(!is.matrix(shap_contrib) || nrow(shap_contrib) != nrow(data) || ncol(shap_contrib) != ncol(data) + 1))
(!is.array(shap_contrib) || nrow(shap_contrib) != nrow(data) || last_dim(shap_contrib) != ncol(data) + 1))
stop("shap_contrib is not compatible with the provided data")

if (is.character(features) && is.null(colnames(data)))
Expand All @@ -318,19 +320,39 @@ xgb.shap.data <- function(data, shap_contrib = NULL, features = NULL, top_n = 1,
colnames(data) <- paste0("X", seq_len(ncol(data)))
}

if (!is.null(shap_contrib)) {
if (is.list(shap_contrib)) { # multiclass: either choose a class or merge
shap_contrib <- if (!is.null(target_class)) shap_contrib[[target_class + 1]] else Reduce("+", lapply(shap_contrib, abs))
}
shap_contrib <- shap_contrib[idx, ]
if (is.null(colnames(shap_contrib))) {
colnames(shap_contrib) <- paste0("X", seq_len(ncol(data)))
}
} else {
shap_contrib <- predict(model, newdata = data, predcontrib = TRUE, approxcontrib = approxcontrib)
if (is.list(shap_contrib)) { # multiclass: either choose a class or merge
shap_contrib <- if (!is.null(target_class)) shap_contrib[[target_class + 1]] else Reduce("+", lapply(shap_contrib, abs))
reshape_3d_shap_contrib <- function(shap_contrib, target_class) {
# multiclass: either choose a class or merge
if (is.list(shap_contrib)) {
if (!is.null(target_class)) {
shap_contrib <- shap_contrib[[target_class + 1]]
} else {
shap_contrib <- Reduce("+", lapply(shap_contrib, abs))
}
} else if (length(dim(shap_contrib)) > 2) {
if (!is.null(target_class)) {
orig_shape <- dim(shap_contrib)
shap_contrib <- shap_contrib[, target_class + 1, , drop = TRUE]
if (!is.matrix(shap_contrib)) {
shap_contrib <- matrix(shap_contrib, orig_shape[c(1L, 3L)])
}
} else {
shap_contrib <- apply(abs(shap_contrib), c(1L, 3L), sum)
}
}
return(shap_contrib)
}

if (is.null(shap_contrib)) {
shap_contrib <- predict(
model,
newdata = data,
predcontrib = TRUE,
approxcontrib = approxcontrib
)
}
shap_contrib <- reshape_3d_shap_contrib(shap_contrib, target_class)
if (is.null(colnames(shap_contrib))) {
colnames(shap_contrib) <- paste0("X", seq_len(ncol(data)))
}

if (is.null(features)) {
Expand Down
Loading
Loading