dmlc · trivialfis · Aug 18, 2024 · Jun 2, 2024 · Aug 18, 2024
diff --git a/R-package/R/callbacks.R b/R-package/R/callbacks.R
@@ -853,8 +853,7 @@ xgb.cb.cv.predict <- function(save_models = FALSE, outputmargin = FALSE) {
         pr <- predict(
           fd$bst,
           fd$evals[[2L]],
-          outputmargin = env$outputmargin,
-          reshape = TRUE
+          outputmargin = env$outputmargin
         )
         if (is.null(pred)) {
           if (NCOL(pr) > 1L) {

diff --git a/R-package/R/utils.R b/R-package/R/utils.R
@@ -199,8 +199,7 @@ xgb.iter.update <- function(bst, dtrain, iter, obj) {
       bst,
       dtrain,
       outputmargin = TRUE,
-      training = TRUE,
-      reshape = TRUE
+      training = TRUE
     )
     gpair <- obj(pred, dtrain)
     n_samples <- dim(dtrain)[1]
@@ -246,7 +245,7 @@ xgb.iter.eval <- function(bst, evals, iter, feval) {
     res <- sapply(seq_along(evals), function(j) {
       w <- evals[[j]]
       ## predict using all trees
-      preds <- predict(bst, w, outputmargin = TRUE, reshape = TRUE, iterationrange = "all")
+      preds <- predict(bst, w, outputmargin = TRUE, iterationrange = "all")
       eval_res <- feval(preds, w)
       out <- eval_res$value
       names(out) <- paste0(evnames[j], "-", eval_res$metric)

diff --git a/R-package/R/xgb.Booster.R b/R-package/R/xgb.Booster.R
@@ -112,9 +112,6 @@ xgb.get.handle <- function(object) {
 #' @param predcontrib Whether to return feature contributions to individual predictions (see Details).
 #' @param approxcontrib Whether to use a fast approximation for feature contributions (see Details).
 #' @param predinteraction Whether to return contributions of feature interactions to individual predictions (see Details).
-#' @param reshape Whether to reshape the vector of predictions to matrix form when there are several
-#'        prediction outputs per case. No effect if `predleaf`, `predcontrib`,
-#'        or `predinteraction` is `TRUE`.
 #' @param training Whether the prediction result is used for training. For dart booster,
 #'        training predicting will perform dropout.
 #' @param iterationrange Sequence of rounds/iterations from the model to use for prediction, specified by passing
@@ -128,8 +125,24 @@ xgb.get.handle <- function(object) {
 #'        of the iterations (rounds) otherwise.
 #'
 #'        If passing "all", will use all of the rounds regardless of whether the model had early stopping or not.
-#' @param strict_shape Default is `FALSE`. When set to `TRUE`, the output
-#'        type and shape of predictions are invariant to the model type.
+#' @param strict_shape Whether to always return an array with the same dimensions for the given prediction mode
+#'        regardless of the model type - meaning that, for example, both a multi-class and a binary classification
+#'        model would generate output arrays with the same number of dimensions, with the 'class' dimension having
+#'        size equal to '1' for the binary model.
+#'
+#'        If passing `FALSE` (the default), dimensions will be simplified according to the model type, so that a
+#'        binary classification model for example would not have a redundant dimension for 'class'.
+#'
+#'        See documentation for the return type for the exact shape of the output arrays for each prediction mode.
+#' @param avoid_transpose Whether to output the resulting predictions in the same memory layout in which they
+#'        are generated by the core XGBoost library, without transposing them to match the expected output shape.
+#'
+#'        Internally, XGBoost uses row-major order for the predictions it generates, while R arrays use column-major
+#'        order, hence the result needs to be transposed in order to have the expected shape when represented as
+#'        an R array or matrix, which might be a slow operation.
+#'
+#'        If passing `TRUE`, then the result will have dimensions in reverse order - for example, rows
+#'        will be the last dimensions instead of the first dimension.
 #' @param base_margin Base margin used for boosting from existing model.
 #'
 #'        Note that, if `newdata` is an `xgb.DMatrix` object, this argument will
@@ -180,28 +193,46 @@ xgb.get.handle <- function(object) {
 #' Note that converting a matrix to [xgb.DMatrix()] uses multiple threads too.
 #'
 #' @return
-#' The return type depends on `strict_shape`. If `FALSE` (default):
-#' - For regression or binary classification: A vector of length `nrows(newdata)`.
-#' - For multiclass classification: A vector of length `num_class * nrows(newdata)` or
-#'   a `(nrows(newdata), num_class)` matrix, depending on the `reshape` value.
-#' - When `predleaf = TRUE`: A matrix with one column per tree.
-#' - When `predcontrib = TRUE`: When not multiclass, a matrix with
-#' ` num_features + 1` columns. The last "+ 1" column corresponds to the baseline value.
-#'   In the multiclass case, a list of `num_class` such matrices.
-#'   The contribution values are on the scale of untransformed margin
-#'   (e.g., for binary classification, the values are log-odds deviations from the baseline).
-#' - When `predinteraction = TRUE`: When not multiclass, the output is a 3d array of
-#'   dimension `c(nrow, num_features + 1, num_features + 1)`. The off-diagonal (in the last two dimensions)
-#'   elements represent different feature interaction contributions. The array is symmetric WRT the last
-#'   two dimensions. The "+ 1" columns corresponds to the baselines. Summing this array along the last dimension should
-#'   produce practically the same result as `predcontrib = TRUE`.
-#'   In the multiclass case, a list of `num_class` such arrays.
-#'
-#' When `strict_shape = TRUE`, the output is always an array:
-#' - For normal predictions, the output has dimension `(num_class, nrow(newdata))`.
-#' - For `predcontrib = TRUE`, the dimension is `(ncol(newdata) + 1, num_class, nrow(newdata))`.
-#' - For `predinteraction = TRUE`, the dimension is `(ncol(newdata) + 1, ncol(newdata) + 1, num_class, nrow(newdata))`.
-#' - For `predleaf = TRUE`, the dimension is `(n_trees_in_forest, num_class, n_iterations, nrow(newdata))`.
+#' A numeric vector or array, with corresponding dimensions depending on the prediction mode and on
+#' parameter `strict_shape` as follows:
+#'
+#' If passing `strict_shape=FALSE`:\itemize{
+#' \item For regression or binary classification: a vector of length `nrows`.
+#' \item For multi-class and multi-target objectives: a matrix of dimensions `[nrows, ngroups]`.
+#'
+#' Note that objective variant `multi:softmax` defaults towards predicting most likely class (a vector
+#' `nrows`) instead of per-class probabilities.
+#' \item For `predleaf`: a matrix with one column per tree.
+#'
+#' For multi-class / multi-target, they will be arranged so that columns in the output will have
+#' the leafs from one group followed by leafs of the other group (e.g. order will be `group1:feat1`,
+#' `group1:feat2`, ..., `group2:feat1`, `group2:feat2`, ...).
+#' \item For `predcontrib`: when not multi-class / multi-target, a matrix with dimensions
+#' `[nrows, nfeats+1]`. The last "+ 1" column corresponds to the baseline value.
+#'
+#' For multi-class and multi-target objectives, will be an array with dimensions `[nrows, ngroups, nfeats+1]`.
+#'
+#' The contribution values are on the scale of untransformed margin (e.g., for binary classification,
+#' the values are log-odds deviations from the baseline).
+#' \item For `predinteraction`: when not multi-class / multi-target, the output is a 3D array of
+#' dimensions `[nrows, nfeats+1, nfeats+1]`. The off-diagonal (in the last two dimensions)
+#' elements represent different feature interaction contributions. The array is symmetric w.r.t. the last
+#' two dimensions. The "+ 1" columns corresponds to the baselines. Summing this array along the last
+#' dimension should produce practically the same result as `predcontrib = TRUE`.
+#'
+#' For multi-class and multi-target, will be a 4D array with dimensions `[nrows, ngroups, nfeats+1, nfeats+1]`
+#' }
+#'
+#' If passing `strict_shape=FALSE`, the result is always an array:\itemize{
+#' \item For normal predictions, the dimension is `[nrows, ngroups]`.
+#' \item For `predcontrib=TRUE`, the dimension is `[nrows, ngroups, nfeats+1]`.
+#' \item For `predinteraction=TRUE`, the dimension is `[nrows, ngroups, nfeats+1, nfeats+1]`.
+#' \item For `predleaf=TRUE`, the dimension is `[nrows, niter, ngroups, num_parallel_tree]`.
+#' }
+#'
+#' If passing `avoid_transpose=TRUE`, then the dimensions in all cases will be in reverse order - for
+#' example, for `predinteraction`, they will be `[nfeats+1, nfeats+1, ngroups, nrows]`
+#' instead of `[nrows, ngroups, nfeats+1, nfeats+1]`.
 #' @seealso [xgb.train()]
 #' @references
 #' 1. Scott M. Lundberg, Su-In Lee, "A Unified Approach to Interpreting Model Predictions",
@@ -279,8 +310,6 @@ xgb.get.handle <- function(object) {
 #' # predict for softmax returns num_class probability numbers per case:
 #' pred <- predict(bst, as.matrix(iris[, -5]))
 #' str(pred)
-#' # reshape it to a num_class-columns matrix
-#' pred <- matrix(pred, ncol = num_class, byrow = TRUE)
 #' # convert the probabilities to softmax labels
 #' pred_labels <- max.col(pred) - 1
 #' # the following should result in the same error as seen in the last iteration
@@ -311,8 +340,11 @@ xgb.get.handle <- function(object) {
 #' @export
 predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FALSE,
                                 predleaf = FALSE, predcontrib = FALSE, approxcontrib = FALSE, predinteraction = FALSE,
-                                reshape = FALSE, training = FALSE, iterationrange = NULL, strict_shape = FALSE,
+                                training = FALSE, iterationrange = NULL, strict_shape = FALSE, avoid_transpose = FALSE,
                                 validate_features = FALSE, base_margin = NULL, ...) {
+  if (NROW(list(...))) {
+    warning("Passed unused prediction arguments: ", paste(names(list(...)), collapse = ", "), ".")
+  }
   if (validate_features) {
     newdata <- validate.features(object, newdata)
   }
@@ -415,10 +447,9 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
     return(val)
   }
 
-  ## We set strict_shape to TRUE then drop the dimensions conditionally
   args <- list(
     training = box(training),
-    strict_shape = box(TRUE),
+    strict_shape = as.logical(strict_shape),
     iteration_begin = box(as.integer(iterationrange[1])),
     iteration_end = box(as.integer(iterationrange[2])),
     type = box(as.integer(0))
@@ -445,96 +476,36 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
 
   json_conf <- jsonlite::toJSON(args, auto_unbox = TRUE)
   if (is_dmatrix) {
-    predts <- .Call(
+    arr <- .Call(
       XGBoosterPredictFromDMatrix_R, xgb.get.handle(object), newdata, json_conf
     )
   } else if (use_as_dense_matrix) {
-    predts <- .Call(
+    arr <- .Call(
       XGBoosterPredictFromDense_R, xgb.get.handle(object), newdata, missing, json_conf, base_margin
     )
   } else if (use_as_csr_matrix) {
-    predts <- .Call(
+    arr <- .Call(
       XGBoosterPredictFromCSR_R, xgb.get.handle(object), csr_data, missing, json_conf, base_margin
     )
   } else if (use_as_df) {
-    predts <- .Call(
+    arr <- .Call(
       XGBoosterPredictFromColumnar_R, xgb.get.handle(object), newdata, missing, json_conf, base_margin
     )
   }
 
-  names(predts) <- c("shape", "results")
-  shape <- predts$shape
-  arr <- predts$results
-
-  n_ret <- length(arr)
-  if (n_row != shape[1]) {
-    stop("Incorrect predict shape.")
-  }
-
-  .Call(XGSetArrayDimInplace_R, arr, rev(shape))
-
-  cnames <- if (!is.null(colnames(newdata))) c(colnames(newdata), "(Intercept)") else NULL
-  n_groups <- shape[2]
-
   ## Needed regardless of whether strict shape is being used.
-  if (predcontrib) {
-    .Call(XGSetArrayDimNamesInplace_R, arr, list(cnames, NULL, NULL))
-  } else if (predinteraction) {
-    .Call(XGSetArrayDimNamesInplace_R, arr, list(cnames, cnames, NULL, NULL))
-  }
-  if (strict_shape) {
-    return(arr) # strict shape is calculated by libxgboost uniformly.
+  if ((predcontrib || predinteraction) && !is.null(colnames(newdata))) {
+    cnames <- c(colnames(newdata), "(Intercept)")
+    dim_names <- vector(mode = "list", length = length(dim(arr)))
+    dim_names[[1L]] <- cnames
+    if (predinteraction) dim_names[[2L]] <- cnames
+    .Call(XGSetArrayDimNamesInplace_R, arr, dim_names)
   }
 
-  if (predleaf) {
-    ## Predict leaf
-    if (n_ret == n_row) {
-      .Call(XGSetArrayDimInplace_R, arr, c(n_row, 1L))
-    } else {
-      arr <- matrix(arr, nrow = n_row, byrow = TRUE)
-    }
-  } else if (predcontrib) {
-    ## Predict contribution
-    arr <- aperm(a = arr, perm = c(2, 3, 1)) # [group, row, col]
-    if (n_ret == n_row) {
-      .Call(XGSetArrayDimInplace_R, arr, c(n_row, 1L))
-      .Call(XGSetArrayDimNamesInplace_R, arr, list(NULL, cnames))
-    } else if (n_groups != 1) {
-      ## turns array into list of matrices
-      arr <- lapply(seq_len(n_groups), function(g) arr[g, , ])
-    } else {
-      ## remove the first axis (group)
-      newdim <- dim(arr)[2:3]
-      newdn <- dimnames(arr)[2:3]
-      arr <- arr[1, , ]
-      .Call(XGSetArrayDimInplace_R, arr, newdim)
-      .Call(XGSetArrayDimNamesInplace_R, arr, newdn)
-    }
-  } else if (predinteraction) {
-    ## Predict interaction
-    arr <- aperm(a = arr, perm = c(3, 4, 1, 2)) # [group, row, col, col]
-    if (n_ret == n_row) {
-      .Call(XGSetArrayDimInplace_R, arr, c(n_row, 1L))
-      .Call(XGSetArrayDimNamesInplace_R, arr, list(NULL, cnames))
-    } else if (n_groups != 1) {
-      ## turns array into list of matrices
-      arr <- lapply(seq_len(n_groups), function(g) arr[g, , , ])
-    } else {
-      ## remove the first axis (group)
-      arr <- arr[1, , , , drop = FALSE]
-      newdim <- dim(arr)[2:4]
-      newdn <- dimnames(arr)[2:4]
-      .Call(XGSetArrayDimInplace_R, arr, newdim)
-      .Call(XGSetArrayDimNamesInplace_R, arr, newdn)
-    }
-  } else {
-    ## Normal prediction
-    if (reshape && n_groups != 1) {
-      arr <- matrix(arr, ncol = n_groups, byrow = TRUE)
-    } else {
-      .Call(XGSetArrayDimInplace_R, arr, NULL)
-    }
+  if (!avoid_transpose && is.array(arr)) {
+    arr <- aperm(arr)
   }
+
   return(arr)
 }
 

diff --git a/R-package/R/xgb.plot.shap.R b/R-package/R/xgb.plot.shap.R
@@ -294,8 +294,10 @@ xgb.shap.data <- function(data, shap_contrib = NULL, features = NULL, top_n = 1,
   if (is.null(features) && (is.null(model) || !inherits(model, "xgb.Booster")))
     stop("when features are not provided, one must provide an xgb.Booster model to rank the features")
 
+  last_dim <- function(v) dim(v)[length(dim(v))]
+
   if (!is.null(shap_contrib) &&
-      (!is.matrix(shap_contrib) || nrow(shap_contrib) != nrow(data) || ncol(shap_contrib) != ncol(data) + 1))
+      (!is.array(shap_contrib) || nrow(shap_contrib) != nrow(data) || last_dim(shap_contrib) != ncol(data) + 1))
     stop("shap_contrib is not compatible with the provided data")
 
   if (is.character(features) && is.null(colnames(data)))
@@ -318,19 +320,39 @@ xgb.shap.data <- function(data, shap_contrib = NULL, features = NULL, top_n = 1,
     colnames(data) <- paste0("X", seq_len(ncol(data)))
   }
 
-  if (!is.null(shap_contrib)) {
-    if (is.list(shap_contrib)) { # multiclass: either choose a class or merge
-      shap_contrib <- if (!is.null(target_class)) shap_contrib[[target_class + 1]] else Reduce("+", lapply(shap_contrib, abs))
-    }
-    shap_contrib <- shap_contrib[idx, ]
-    if (is.null(colnames(shap_contrib))) {
-      colnames(shap_contrib) <- paste0("X", seq_len(ncol(data)))
-    }
-  } else {
-    shap_contrib <- predict(model, newdata = data, predcontrib = TRUE, approxcontrib = approxcontrib)
-    if (is.list(shap_contrib)) { # multiclass: either choose a class or merge
-      shap_contrib <- if (!is.null(target_class)) shap_contrib[[target_class + 1]] else Reduce("+", lapply(shap_contrib, abs))
+  reshape_3d_shap_contrib <- function(shap_contrib, target_class) {
+    # multiclass: either choose a class or merge
+    if (is.list(shap_contrib)) {
+      if (!is.null(target_class)) {
+        shap_contrib <- shap_contrib[[target_class + 1]]
+      } else {
+        shap_contrib <- Reduce("+", lapply(shap_contrib, abs))
+      }
+    } else if (length(dim(shap_contrib)) > 2) {
+      if (!is.null(target_class)) {
+        orig_shape <- dim(shap_contrib)
+        shap_contrib <- shap_contrib[, target_class + 1, , drop = TRUE]
+        if (!is.matrix(shap_contrib)) {
+          shap_contrib <- matrix(shap_contrib, orig_shape[c(1L, 3L)])
+        }
+      } else {
+        shap_contrib <- apply(abs(shap_contrib), c(1L, 3L), sum)
+      }
     }
+    return(shap_contrib)
+  }
+
+  if (is.null(shap_contrib)) {
+    shap_contrib <- predict(
+      model,
+      newdata = data,
+      predcontrib = TRUE,
+      approxcontrib = approxcontrib
+    )
+  }
+  shap_contrib <- reshape_3d_shap_contrib(shap_contrib, target_class)
+  if (is.null(colnames(shap_contrib))) {
+    colnames(shap_contrib) <- paste0("X", seq_len(ncol(data)))
   }
 
   if (is.null(features)) {