From 7b826dbd8d1b3c728a0aabc9906a830b3e671b46 Mon Sep 17 00:00:00 2001 From: Jacob Socolar Date: Fri, 21 May 2021 14:58:20 -0500 Subject: [PATCH 01/24] initial commit of get_indices --- R/get_indices.R | 83 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 R/get_indices.R diff --git a/R/get_indices.R b/R/get_indices.R new file mode 100644 index 00000000..25233a5d --- /dev/null +++ b/R/get_indices.R @@ -0,0 +1,83 @@ +#' Parse the indices from a list of variables (extracted e.g. from a +#' `draws_summary` object or a `draws` object) + +get_indices <- function(variables){ + vars_indices <- strsplit(variables, "(\\[|\\])") + vars <- sapply(vars_indices, `[[`, 1) + var_names <- unique(vars) + + var <- var_names[6] + + indices_list <- lapply(var_names, function (var) { + var_i <- vars == var + # reset class here as otherwise the draws arrays in the output rvars + # have type draws_matrix, which makes inspecting them hard + var_length <- sum(var_i) + + if (var_length == 1) { + # single variable, no indices + out <- 1 + dimnames(out) <- NULL + } else { + # variable with indices => we need to reshape the array + # basically, we're going to do a bunch of work up front to figure out + # a single array slice that does most of the work for us. + + # first, pull out the list of indices into a data frame + # where each column is an index variable + indices <- sapply(vars_indices[var_i], `[[`, 2) + indices <- as.data.frame(do.call(rbind, strsplit(indices, ",")), + stringsAsFactors = FALSE) + + unique_indices <- vector("list", length(indices)) + .dimnames <- vector("list", length(indices)) + names(unique_indices) <- names(indices) + for (i in seq_along(indices)) { + numeric_index <- suppressWarnings(as.numeric(indices[[i]])) + if (!anyNA(numeric_index) && rlang::is_integerish(numeric_index)) { + # for integer indices, we need to convert them to integers + # so that we can sort them in numerical order (not string order) + if (min(numeric_index) >= 1) { + # integer indices >= 1 are forced to lower bound of 1 + no dimnames + indices[[i]] <- as.integer(numeric_index) + unique_indices[[i]] <- seq.int(1, max(numeric_index)) + } else { + # indices with values < 1 are sorted but otherwise left as-is, and will create dimnames + indices[[i]] <- numeric_index + unique_indices[[i]] <- sort(unique(numeric_index)) + .dimnames[[i]] <- unique_indices[[i]] + } + } else { + # we convert non-numeric indices to factors so that we can force them + # to be ordered as they appear in the data (rather than in alphabetical order) + factor_levels <- unique(indices[[i]]) + indices[[i]] <- factor(indices[[i]], levels = factor_levels) + # these aren't sorted so they appear in original order + unique_indices[[i]] <- factor(factor_levels, levels = factor_levels) + .dimnames[[i]] <- unique_indices[[i]] + } + } + + # sort indices and fill in missing indices as NA to ensure + # (1) even if the order of the variables is something weird (like + # x[2,2] comes before x[1,1]) the result + # places those columns in the correct cells of the array + # (2) if some combination of indices is missing (say x[2,1] isn't + # in the input) that cell in the array gets an NA + + # Use expand.grid to get all cells in output array. We reverse indices + # here because it helps us do the sort after the merge, where + # we need to sort in reverse order of the indices (because + # the value of the last index should move slowest) + all_indices <- expand.grid(rev(unique_indices)) + # merge with all.x = TRUE (left join) to fill in missing cells with NA + indices <- merge(all_indices, cbind(indices, index = seq_len(nrow(indices))), + all.x = TRUE, sort = FALSE) + # need to do the sort manually after merge because when sort = TRUE, merge + # sorts factors as if they were strings, and we need factors to be sorted as factors + indices <- indices[do.call(order, as.list(indices[, -ncol(indices), drop = FALSE])),] + } + indices + }) + indices_list +} \ No newline at end of file From 4afd494cfeb7f28319e5f30d452b2e7274413fc2 Mon Sep 17 00:00:00 2001 From: Jacob Socolar Date: Fri, 21 May 2021 15:19:36 -0500 Subject: [PATCH 02/24] initial commit of rollup_summary.R --- R/rollup_summary.R | 106 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 R/rollup_summary.R diff --git a/R/rollup_summary.R b/R/rollup_summary.R new file mode 100644 index 00000000..5ccbdfd6 --- /dev/null +++ b/R/rollup_summary.R @@ -0,0 +1,106 @@ +#' "Roll up" `draws_summary` objects by collapsing over nonscalar parameters. +#' +#' By default, all variables containing `[` in the name are rolled up, but +#' there is an option to pass a list of parameter names, which will roll up +#' any variables that match `^parameter_name\\[.*` +#' +#' @name draws_summary_rollup +#' @param x a `draws_summary` object +#' @param rollup_vars a list of variable names (excluding brackets and indices) to roll up +#' @param min_only a character vector of varable names for which only minimum values are +#' desired in the rollup +#' @param max_only a character vector of varable names for which only maximum values are +#' desired in the rollup +#' +#'#' @return +#' The `rollup_summary()` methods return a list of [tibble][tibble::tibble] data frames. +#' The first element is a standard `draws_summary` for the variables that are not rolled up +#' The second element is a rollup of the variables to be rolled up and contains max and min +#' values of the summary functions attained by any element of the variable +#' +#' #' @details +#' By default, only the maximum value of `rhat` and the mimum values of [ess_bulk()] and +#' [ess_tail()] are returned. `NA`s are ignored unless all elements of the summary are `NA` +#' +#' #' @examples +#' ds <- summarise_draws(example_draws()) +#' ds2 <- summarise_draws(2 * example_draws()) +#' ds2$variable <- c("pi", "upsilon", +#' "omega[1,1]", "omega[2,1]", "omega[3,1]", "omega[4,1]", +#' "omega[1,2]", "omega[2,2]", "omega[3,2]", "omega[4,2]") +#' draws_summary <- rbind(ds, ds2) +#' rollup_summary(draws_summary) +#' rollup_summary(draws_summary, rollup_vars = c("theta", "omega")) +#' rollup_summary(draws_summary, rollup_vars = "theta") +NULL + +#' @export +rollup_summary <- function(x, ...) { + UseMethod("rollup_summary") +} + +# Can consider adding methods that first call `summarise_draws` + +rollup_summary.draws_summary <- function (draws_summary, rollup_vars = NULL, + min_only = c("ess_bulk", "ess_tail"), + max_only = "rhat") { + # get variable names + vars <- draws_summary$variable + # Determine which variable names need to be rolled up + if (is.null(rollup_vars)) { + vars_nonscalar <- grepl("\\[", vars) + } else { + vars_nonscalar <- as.logical(colSums(do.call(rbind, + lapply(paste0("^", rollup_vars, "\\["), + function(x){grepl(x, vars)})))) + } + # Separate out draws_summary into the scalar variables to leave alone and the nonscalar + # variables for rollup + ds_scalar <- draws_summary[!vars_nonscalar, ] + ds_nonscalar <- draws_summary[vars_nonscalar, ] + # Roll up the nonscalar variables + varnames_nonscalar <- gsub("\\[(.*)", "", ds_nonscalar$variable) + summary_names <- names(draws_summary)[-1] + names_minmax <- summary_names[!(summary_names %in% c(min_only, max_only))] + split_nonscalar <- split(ds_nonscalar, varnames_nonscalar)[unique(varnames_nonscalar)] + # [unique(varnames_nonscalar)] preserves the order of the names + min_max <- do.call(rbind, lapply(split_nonscalar, rollup_helper_minmax, + names = names_minmax)) + min_only <- do.call(rbind, lapply(split_nonscalar, rollup_helper_min, names = min_only)) + max_only <- do.call(rbind, lapply(split_nonscalar, rollup_helper_max, names = max_only)) + variable_column <- data.frame("variable" = unique(varnames_nonscalar)) + indices <- get_indices(ds_nonscalar$variable) + get_dims <- function(x){paste0("(", + paste(apply(x[, names(x) != "index", drop = FALSE], 2, function(x){max(as.integer(x))}), collapse = ","), + ")")} + dimensions <- lapply(indices, get_dims) + dimension_column <- data.frame("dimension" = unlist(dimensions)) + nonscalar_out <- tibble::as_tibble(cbind(variable_column, dimension_column, min_max, max_only, min_only)) + + # Add dimensions column to nonscalar_out + + + out <- list(unrolled_vars = ds_scalar, rolled_vars = nonscalar_out) + out +} + +rollup_helper_minmax <- function(x, names){ + x <- x[, names] + mm <- c(apply(x, 2, function(x) {c(min(x), max(x))})) + names(mm) <- paste0(rep(names(x), each = 2), c("_min", "_max")) + mm +} + +rollup_helper_min <- function(x, names){ + x <- x[, names] + min_only <- apply(x, 2, min) + names(min_only) <- paste0(names(x), "_min") + min_only +} + +rollup_helper_max <- function(x, names){ + x <- x[, names] + max_only <- apply(x, 2, max) + names(max_only) <- paste0(names(x), "_max") + max_only +} From fb9f21af5a6fce1a6834f129fb3aac17beeadb32 Mon Sep 17 00:00:00 2001 From: Jacob Socolar Date: Fri, 21 May 2021 15:20:53 -0500 Subject: [PATCH 03/24] run roxygenise --- NAMESPACE | 1 + man/draws_summary_rollup.Rd | 42 +++++++++++++++++++++++++++++++++++++ man/get_indices.Rd | 13 ++++++++++++ 3 files changed, 56 insertions(+) create mode 100644 man/draws_summary_rollup.Rd create mode 100644 man/get_indices.Rd diff --git a/NAMESPACE b/NAMESPACE index 93948f33..c1153a99 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -325,6 +325,7 @@ export(resample_draws) export(rfun) export(rhat) export(rhat_basic) +export(rollup_summary) export(rstar) export(rvar) export(rvar_all) diff --git a/man/draws_summary_rollup.Rd b/man/draws_summary_rollup.Rd new file mode 100644 index 00000000..ff4e6875 --- /dev/null +++ b/man/draws_summary_rollup.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rollup_summary.R +\name{draws_summary_rollup} +\alias{draws_summary_rollup} +\title{"Roll up" \code{draws_summary} objects by collapsing over nonscalar parameters.} +\arguments{ +\item{x}{a \code{draws_summary} object} + +\item{rollup_vars}{a list of variable names (excluding brackets and indices) to roll up} + +\item{min_only}{a character vector of varable names for which only minimum values are +desired in the rollup} + +\item{max_only}{a character vector of varable names for which only maximum values are +desired in the rollup + +#' @return +The \code{rollup_summary()} methods return a list of \link[tibble:tibble]{tibble} data frames. +The first element is a standard \code{draws_summary} for the variables that are not rolled up +The second element is a rollup of the variables to be rolled up and contains max and min +values of the summary functions attained by any element of the variable + +#' @details +By default, only the maximum value of \code{rhat} and the mimum values of \code{\link[=ess_bulk]{ess_bulk()}} and +\code{\link[=ess_tail]{ess_tail()}} are returned. \code{NA}s are ignored unless all elements of the summary are \code{NA} + +#' @examples +ds <- summarise_draws(example_draws()) +ds2 <- summarise_draws(2 * example_draws()) +ds2$variable <- c("pi", "upsilon", +"omega\link{1,1}", "omega\link{2,1}", "omega\link{3,1}", "omega\link{4,1}", +"omega\link{1,2}", "omega\link{2,2}", "omega\link{3,2}", "omega\link{4,2}") +draws_summary <- rbind(ds, ds2) +rollup_summary(draws_summary) +rollup_summary(draws_summary, rollup_vars = c("theta", "omega")) +rollup_summary(draws_summary, rollup_vars = "theta")} +} +\description{ +By default, all variables containing \code{[} in the name are rolled up, but +there is an option to pass a list of parameter names, which will roll up +any variables that match \verb{^parameter_name\\\[.*} +} diff --git a/man/get_indices.Rd b/man/get_indices.Rd new file mode 100644 index 00000000..74dc3158 --- /dev/null +++ b/man/get_indices.Rd @@ -0,0 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_indices.R +\name{get_indices} +\alias{get_indices} +\title{Parse the indices from a list of variables (extracted e.g. from a +\code{draws_summary} object or a \code{draws} object)} +\usage{ +get_indices(variables) +} +\description{ +Parse the indices from a list of variables (extracted e.g. from a +\code{draws_summary} object or a \code{draws} object) +} From 9ff675446f51d3e6ac9206235b9154417bc174b5 Mon Sep 17 00:00:00 2001 From: Jacob Socolar Date: Fri, 21 May 2021 15:40:14 -0500 Subject: [PATCH 04/24] roxygenise and related changes --- NAMESPACE | 2 ++ R/rollup_summary.R | 25 +++++++++++++++++-------- man/draws_summary_rollup.Rd | 25 ++++++++++++++++++++++--- 3 files changed, 41 insertions(+), 11 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index c1153a99..94d40e8f 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -206,6 +206,8 @@ S3method(reserved_variables,draws_df) S3method(reserved_variables,draws_list) S3method(reserved_variables,draws_matrix) S3method(reserved_variables,draws_rvars) +S3method(rollup_summary,default) +S3method(rollup_summary,draws_summary) S3method(sd,default) S3method(sd,rvar) S3method(split_chains,draws) diff --git a/R/rollup_summary.R b/R/rollup_summary.R index 5ccbdfd6..7d0a8473 100644 --- a/R/rollup_summary.R +++ b/R/rollup_summary.R @@ -5,7 +5,8 @@ #' any variables that match `^parameter_name\\[.*` #' #' @name draws_summary_rollup -#' @param x a `draws_summary` object +#' @param ... Optional arguments to be passed to `summarise_draws` if x is a `draws` object +#' @param x a `draws_summary` object or a `draws` object to be summarised #' @param rollup_vars a list of variable names (excluding brackets and indices) to roll up #' @param min_only a character vector of varable names for which only minimum values are #' desired in the rollup @@ -30,17 +31,29 @@ #' "omega[1,2]", "omega[2,2]", "omega[3,2]", "omega[4,2]") #' draws_summary <- rbind(ds, ds2) #' rollup_summary(draws_summary) -#' rollup_summary(draws_summary, rollup_vars = c("theta", "omega")) #' rollup_summary(draws_summary, rollup_vars = "theta") +#' rollup_summary(example_draws()) NULL +#' @rdname draws_summary_rollup #' @export -rollup_summary <- function(x, ...) { +rollup_summary <- function(x, rollup_vars = NULL, + min_only = c("ess_bulk", "ess_tail"), + max_only = "rhat") { UseMethod("rollup_summary") } -# Can consider adding methods that first call `summarise_draws` +#' @export +rollup_summary.default <- function(x, ..., rollup_vars = NULL, + min_only = c("ess_bulk", "ess_tail"), + max_only = "rhat") { + rollup_summary(summarise_draws(x, ...), rollup_vars = NULL, + min_only = c("ess_bulk", "ess_tail"), + max_only = "rhat") +} +#' @rdname draws_summary_rollup +#' @export rollup_summary.draws_summary <- function (draws_summary, rollup_vars = NULL, min_only = c("ess_bulk", "ess_tail"), max_only = "rhat") { @@ -76,10 +89,6 @@ rollup_summary.draws_summary <- function (draws_summary, rollup_vars = NULL, dimensions <- lapply(indices, get_dims) dimension_column <- data.frame("dimension" = unlist(dimensions)) nonscalar_out <- tibble::as_tibble(cbind(variable_column, dimension_column, min_max, max_only, min_only)) - - # Add dimensions column to nonscalar_out - - out <- list(unrolled_vars = ds_scalar, rolled_vars = nonscalar_out) out } diff --git a/man/draws_summary_rollup.Rd b/man/draws_summary_rollup.Rd index ff4e6875..07078458 100644 --- a/man/draws_summary_rollup.Rd +++ b/man/draws_summary_rollup.Rd @@ -2,9 +2,26 @@ % Please edit documentation in R/rollup_summary.R \name{draws_summary_rollup} \alias{draws_summary_rollup} +\alias{rollup_summary} +\alias{rollup_summary.draws_summary} \title{"Roll up" \code{draws_summary} objects by collapsing over nonscalar parameters.} +\usage{ +rollup_summary( + x, + rollup_vars = NULL, + min_only = c("ess_bulk", "ess_tail"), + max_only = "rhat" +) + +\method{rollup_summary}{draws_summary}( + draws_summary, + rollup_vars = NULL, + min_only = c("ess_bulk", "ess_tail"), + max_only = "rhat" +) +} \arguments{ -\item{x}{a \code{draws_summary} object} +\item{x}{a \code{draws_summary} object or a \code{draws} object to be summarised} \item{rollup_vars}{a list of variable names (excluding brackets and indices) to roll up} @@ -32,8 +49,10 @@ ds2$variable <- c("pi", "upsilon", "omega\link{1,2}", "omega\link{2,2}", "omega\link{3,2}", "omega\link{4,2}") draws_summary <- rbind(ds, ds2) rollup_summary(draws_summary) -rollup_summary(draws_summary, rollup_vars = c("theta", "omega")) -rollup_summary(draws_summary, rollup_vars = "theta")} +rollup_summary(draws_summary, rollup_vars = "theta") +rollup_summary(example_draws())} + +\item{...}{Optional arguments to be passed to \code{summarise_draws} if x is a \code{draws} object} } \description{ By default, all variables containing \code{[} in the name are rolled up, but From 0426a91e2f5a3fb478d6026f08fa91ce1be598f0 Mon Sep 17 00:00:00 2001 From: Jacob Socolar Date: Fri, 21 May 2021 16:18:43 -0500 Subject: [PATCH 05/24] fixing doc --- R/rollup_summary.R | 6 +++--- man/draws_summary_rollup.Rd | 34 +++++++++++++++++----------------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/R/rollup_summary.R b/R/rollup_summary.R index 7d0a8473..dde8c047 100644 --- a/R/rollup_summary.R +++ b/R/rollup_summary.R @@ -13,17 +13,17 @@ #' @param max_only a character vector of varable names for which only maximum values are #' desired in the rollup #' -#'#' @return +#' @return #' The `rollup_summary()` methods return a list of [tibble][tibble::tibble] data frames. #' The first element is a standard `draws_summary` for the variables that are not rolled up #' The second element is a rollup of the variables to be rolled up and contains max and min #' values of the summary functions attained by any element of the variable #' -#' #' @details +#' @details #' By default, only the maximum value of `rhat` and the mimum values of [ess_bulk()] and #' [ess_tail()] are returned. `NA`s are ignored unless all elements of the summary are `NA` #' -#' #' @examples +#' @examples #' ds <- summarise_draws(example_draws()) #' ds2 <- summarise_draws(2 * example_draws()) #' ds2$variable <- c("pi", "upsilon", diff --git a/man/draws_summary_rollup.Rd b/man/draws_summary_rollup.Rd index 07078458..d91d82c1 100644 --- a/man/draws_summary_rollup.Rd +++ b/man/draws_summary_rollup.Rd @@ -29,33 +29,33 @@ rollup_summary( desired in the rollup} \item{max_only}{a character vector of varable names for which only maximum values are -desired in the rollup +desired in the rollup} -#' @return +\item{...}{Optional arguments to be passed to \code{summarise_draws} if x is a \code{draws} object} +} +\value{ The \code{rollup_summary()} methods return a list of \link[tibble:tibble]{tibble} data frames. The first element is a standard \code{draws_summary} for the variables that are not rolled up The second element is a rollup of the variables to be rolled up and contains max and min values of the summary functions attained by any element of the variable - -#' @details +} +\description{ +By default, all variables containing \code{[} in the name are rolled up, but +there is an option to pass a list of parameter names, which will roll up +any variables that match \verb{^parameter_name\\\[.*} +} +\details{ By default, only the maximum value of \code{rhat} and the mimum values of \code{\link[=ess_bulk]{ess_bulk()}} and \code{\link[=ess_tail]{ess_tail()}} are returned. \code{NA}s are ignored unless all elements of the summary are \code{NA} - -#' @examples +} +\examples{ ds <- summarise_draws(example_draws()) ds2 <- summarise_draws(2 * example_draws()) -ds2$variable <- c("pi", "upsilon", -"omega\link{1,1}", "omega\link{2,1}", "omega\link{3,1}", "omega\link{4,1}", -"omega\link{1,2}", "omega\link{2,2}", "omega\link{3,2}", "omega\link{4,2}") +ds2$variable <- c("pi", "upsilon", + "omega[1,1]", "omega[2,1]", "omega[3,1]", "omega[4,1]", + "omega[1,2]", "omega[2,2]", "omega[3,2]", "omega[4,2]") draws_summary <- rbind(ds, ds2) rollup_summary(draws_summary) rollup_summary(draws_summary, rollup_vars = "theta") -rollup_summary(example_draws())} - -\item{...}{Optional arguments to be passed to \code{summarise_draws} if x is a \code{draws} object} -} -\description{ -By default, all variables containing \code{[} in the name are rolled up, but -there is an option to pass a list of parameter names, which will roll up -any variables that match \verb{^parameter_name\\\[.*} +rollup_summary(example_draws()) } From b7d4ac49c834e8f57f6446856afc7a87bf5ab625 Mon Sep 17 00:00:00 2001 From: Jacob Socolar Date: Fri, 21 May 2021 16:23:53 -0500 Subject: [PATCH 06/24] minor doc cleanup --- R/rollup_summary.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/rollup_summary.R b/R/rollup_summary.R index dde8c047..7a6b9819 100644 --- a/R/rollup_summary.R +++ b/R/rollup_summary.R @@ -5,8 +5,8 @@ #' any variables that match `^parameter_name\\[.*` #' #' @name draws_summary_rollup -#' @param ... Optional arguments to be passed to `summarise_draws` if x is a `draws` object #' @param x a `draws_summary` object or a `draws` object to be summarised +#' @param ... Optional arguments to be passed to `summarise_draws` if x is a `draws` object #' @param rollup_vars a list of variable names (excluding brackets and indices) to roll up #' @param min_only a character vector of varable names for which only minimum values are #' desired in the rollup @@ -54,7 +54,7 @@ rollup_summary.default <- function(x, ..., rollup_vars = NULL, #' @rdname draws_summary_rollup #' @export -rollup_summary.draws_summary <- function (draws_summary, rollup_vars = NULL, +rollup_summary.draws_summary <- function (x, rollup_vars = NULL, min_only = c("ess_bulk", "ess_tail"), max_only = "rhat") { # get variable names From 0abbdc550de64928a150625b526fd62a562f4cc8 Mon Sep 17 00:00:00 2001 From: Jacob Socolar Date: Fri, 21 May 2021 16:24:51 -0500 Subject: [PATCH 07/24] rerun roxygenise --- man/draws_summary_rollup.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/draws_summary_rollup.Rd b/man/draws_summary_rollup.Rd index d91d82c1..089ec2e5 100644 --- a/man/draws_summary_rollup.Rd +++ b/man/draws_summary_rollup.Rd @@ -14,7 +14,7 @@ rollup_summary( ) \method{rollup_summary}{draws_summary}( - draws_summary, + x, rollup_vars = NULL, min_only = c("ess_bulk", "ess_tail"), max_only = "rhat" From 0af369977f1bc50c957a5dac825052b9ee9d692e Mon Sep 17 00:00:00 2001 From: Jacob Socolar Date: Fri, 21 May 2021 16:31:24 -0500 Subject: [PATCH 08/24] clean up function args --- R/rollup_summary.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/rollup_summary.R b/R/rollup_summary.R index 7a6b9819..446dd607 100644 --- a/R/rollup_summary.R +++ b/R/rollup_summary.R @@ -37,7 +37,7 @@ NULL #' @rdname draws_summary_rollup #' @export -rollup_summary <- function(x, rollup_vars = NULL, +rollup_summary <- function(x, ..., rollup_vars = NULL, min_only = c("ess_bulk", "ess_tail"), max_only = "rhat") { UseMethod("rollup_summary") From 29180d38cd29437b0e0d546be9d331ac836152ed Mon Sep 17 00:00:00 2001 From: Jacob Socolar Date: Fri, 21 May 2021 16:32:09 -0500 Subject: [PATCH 09/24] run roxygenise --- man/draws_summary_rollup.Rd | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/man/draws_summary_rollup.Rd b/man/draws_summary_rollup.Rd index 089ec2e5..0de5162d 100644 --- a/man/draws_summary_rollup.Rd +++ b/man/draws_summary_rollup.Rd @@ -8,6 +8,7 @@ \usage{ rollup_summary( x, + ..., rollup_vars = NULL, min_only = c("ess_bulk", "ess_tail"), max_only = "rhat" @@ -23,6 +24,8 @@ rollup_summary( \arguments{ \item{x}{a \code{draws_summary} object or a \code{draws} object to be summarised} +\item{...}{Optional arguments to be passed to \code{summarise_draws} if x is a \code{draws} object} + \item{rollup_vars}{a list of variable names (excluding brackets and indices) to roll up} \item{min_only}{a character vector of varable names for which only minimum values are @@ -30,8 +33,6 @@ desired in the rollup} \item{max_only}{a character vector of varable names for which only maximum values are desired in the rollup} - -\item{...}{Optional arguments to be passed to \code{summarise_draws} if x is a \code{draws} object} } \value{ The \code{rollup_summary()} methods return a list of \link[tibble:tibble]{tibble} data frames. From 463678838fc015863e2a2774edc0c42be6a73d6c Mon Sep 17 00:00:00 2001 From: Jacob Socolar Date: Fri, 21 May 2021 16:40:46 -0500 Subject: [PATCH 10/24] cleaning up doc for get_indices --- R/get_indices.R | 15 +++++++++++++-- man/get_indices.Rd | 13 ------------- man/variable_indices.Rd | 19 +++++++++++++++++++ 3 files changed, 32 insertions(+), 15 deletions(-) delete mode 100644 man/get_indices.Rd create mode 100644 man/variable_indices.Rd diff --git a/R/get_indices.R b/R/get_indices.R index 25233a5d..19262632 100644 --- a/R/get_indices.R +++ b/R/get_indices.R @@ -1,7 +1,18 @@ -#' Parse the indices from a list of variables (extracted e.g. from a +#' Parse the indices from a vector of variables (extracted e.g. from a #' `draws_summary` object or a `draws` object) +#' +#' @name variable_indices +#' @param x a character vector of variables +#' +#' @return +#' A list of indices for each variable +#' +#' @details +#' Assumes that variable indexing is reflected by square brackes in the names +#' +NULL -get_indices <- function(variables){ +get_indices <- function(x){ vars_indices <- strsplit(variables, "(\\[|\\])") vars <- sapply(vars_indices, `[[`, 1) var_names <- unique(vars) diff --git a/man/get_indices.Rd b/man/get_indices.Rd deleted file mode 100644 index 74dc3158..00000000 --- a/man/get_indices.Rd +++ /dev/null @@ -1,13 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_indices.R -\name{get_indices} -\alias{get_indices} -\title{Parse the indices from a list of variables (extracted e.g. from a -\code{draws_summary} object or a \code{draws} object)} -\usage{ -get_indices(variables) -} -\description{ -Parse the indices from a list of variables (extracted e.g. from a -\code{draws_summary} object or a \code{draws} object) -} diff --git a/man/variable_indices.Rd b/man/variable_indices.Rd new file mode 100644 index 00000000..e07672df --- /dev/null +++ b/man/variable_indices.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_indices.R +\name{variable_indices} +\alias{variable_indices} +\title{Parse the indices from a vector of variables (extracted e.g. from a +\code{draws_summary} object or a \code{draws} object)} +\arguments{ +\item{x}{a character vector of variables} +} +\value{ +A list of indices for each variable +} +\description{ +Parse the indices from a vector of variables (extracted e.g. from a +\code{draws_summary} object or a \code{draws} object) +} +\details{ +Assumes that variable indexing is reflected by square brackes in the names +} From 640b2c8933d296571ca582aaece8d4a802582465 Mon Sep 17 00:00:00 2001 From: Jacob Socolar Date: Fri, 21 May 2021 18:09:10 -0500 Subject: [PATCH 11/24] cleaning up args --- R/get_indices.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/get_indices.R b/R/get_indices.R index 19262632..b6c298e1 100644 --- a/R/get_indices.R +++ b/R/get_indices.R @@ -13,7 +13,7 @@ NULL get_indices <- function(x){ - vars_indices <- strsplit(variables, "(\\[|\\])") + vars_indices <- strsplit(x, "(\\[|\\])") vars <- sapply(vars_indices, `[[`, 1) var_names <- unique(vars) From a12fe52a0deaafc4b1a8cbbd26de1e6c9b352b47 Mon Sep 17 00:00:00 2001 From: Jacob Socolar Date: Fri, 21 May 2021 21:24:30 -0500 Subject: [PATCH 12/24] rearranged args --- R/rollup_summary.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/R/rollup_summary.R b/R/rollup_summary.R index 446dd607..c3bbc244 100644 --- a/R/rollup_summary.R +++ b/R/rollup_summary.R @@ -6,13 +6,13 @@ #' #' @name draws_summary_rollup #' @param x a `draws_summary` object or a `draws` object to be summarised -#' @param ... Optional arguments to be passed to `summarise_draws` if x is a `draws` object #' @param rollup_vars a list of variable names (excluding brackets and indices) to roll up #' @param min_only a character vector of varable names for which only minimum values are #' desired in the rollup #' @param max_only a character vector of varable names for which only maximum values are #' desired in the rollup -#' +#' @param ... Optional arguments to be passed to `summarise_draws` if x is a `draws` object + #' @return #' The `rollup_summary()` methods return a list of [tibble][tibble::tibble] data frames. #' The first element is a standard `draws_summary` for the variables that are not rolled up @@ -37,16 +37,16 @@ NULL #' @rdname draws_summary_rollup #' @export -rollup_summary <- function(x, ..., rollup_vars = NULL, +rollup_summary <- function(x, rollup_vars = NULL, min_only = c("ess_bulk", "ess_tail"), - max_only = "rhat") { + max_only = "rhat", ...) { UseMethod("rollup_summary") } #' @export -rollup_summary.default <- function(x, ..., rollup_vars = NULL, +rollup_summary.default <- function(x, rollup_vars = NULL, min_only = c("ess_bulk", "ess_tail"), - max_only = "rhat") { + max_only = "rhat", ...) { rollup_summary(summarise_draws(x, ...), rollup_vars = NULL, min_only = c("ess_bulk", "ess_tail"), max_only = "rhat") From c60f840d841fb632d154cecf60d8fc928040f1fb Mon Sep 17 00:00:00 2001 From: Jacob Socolar Date: Fri, 21 May 2021 21:38:27 -0500 Subject: [PATCH 13/24] misc --- R/{get_indices.R => parse_variable_indices.R} | 8 +++---- R/rollup_summary.R | 21 +++++++++---------- man/draws_summary_rollup.Rd | 15 ++++++------- man/variable_indices.Rd | 4 ++-- 4 files changed, 22 insertions(+), 26 deletions(-) rename R/{get_indices.R => parse_variable_indices.R} (96%) diff --git a/R/get_indices.R b/R/parse_variable_indices.R similarity index 96% rename from R/get_indices.R rename to R/parse_variable_indices.R index b6c298e1..4053fa32 100644 --- a/R/get_indices.R +++ b/R/parse_variable_indices.R @@ -8,12 +8,12 @@ #' A list of indices for each variable #' #' @details -#' Assumes that variable indexing is reflected by square brackes in the names +#' Assumes that variable indexing is reflected by square brackets in the names #' NULL -get_indices <- function(x){ - vars_indices <- strsplit(x, "(\\[|\\])") +parse_variable_indices <- function(x){ + vars_indices <- strsplit(x, "(\\[|\\]$)") vars <- sapply(vars_indices, `[[`, 1) var_names <- unique(vars) @@ -91,4 +91,4 @@ get_indices <- function(x){ indices }) indices_list -} \ No newline at end of file +} diff --git a/R/rollup_summary.R b/R/rollup_summary.R index c3bbc244..fa270406 100644 --- a/R/rollup_summary.R +++ b/R/rollup_summary.R @@ -1,8 +1,8 @@ #' "Roll up" `draws_summary` objects by collapsing over nonscalar parameters. #' -#' By default, all variables containing `[` in the name are rolled up, but -#' there is an option to pass a list of parameter names, which will roll up -#' any variables that match `^parameter_name\\[.*` +#' By default, all variables with names matched by `\\[.*\\]$` are rolled up, +#' but there is an option to pass a list of parameter names, which will roll up +#' any variables matched by `^parameter_name\\[.*\\]$` #' #' @name draws_summary_rollup #' @param x a `draws_summary` object or a `draws` object to be summarised @@ -11,7 +11,6 @@ #' desired in the rollup #' @param max_only a character vector of varable names for which only maximum values are #' desired in the rollup -#' @param ... Optional arguments to be passed to `summarise_draws` if x is a `draws` object #' @return #' The `rollup_summary()` methods return a list of [tibble][tibble::tibble] data frames. @@ -20,15 +19,15 @@ #' values of the summary functions attained by any element of the variable #' #' @details -#' By default, only the maximum value of `rhat` and the mimum values of [ess_bulk()] and -#' [ess_tail()] are returned. `NA`s are ignored unless all elements of the summary are `NA` +#' By default, only the maximum value of `rhat` and the minimum values of [ess_bulk()] and +#' [ess_tail()] are returned. # INSERT HOW WE HANDLE NA SUMMARIES #' #' @examples #' ds <- summarise_draws(example_draws()) #' ds2 <- summarise_draws(2 * example_draws()) #' ds2$variable <- c("pi", "upsilon", #' "omega[1,1]", "omega[2,1]", "omega[3,1]", "omega[4,1]", -#' "omega[1,2]", "omega[2,2]", "omega[3,2]", "omega[4,2]") +#' "omega[1,2]", "omega[2,2]", "omega[3,2]", "omega[4,2]") #' draws_summary <- rbind(ds, ds2) #' rollup_summary(draws_summary) #' rollup_summary(draws_summary, rollup_vars = "theta") @@ -39,15 +38,15 @@ NULL #' @export rollup_summary <- function(x, rollup_vars = NULL, min_only = c("ess_bulk", "ess_tail"), - max_only = "rhat", ...) { + max_only = "rhat") { UseMethod("rollup_summary") } #' @export rollup_summary.default <- function(x, rollup_vars = NULL, min_only = c("ess_bulk", "ess_tail"), - max_only = "rhat", ...) { - rollup_summary(summarise_draws(x, ...), rollup_vars = NULL, + max_only = "rhat") { + rollup_summary(summarise_draws(x), rollup_vars = NULL, min_only = c("ess_bulk", "ess_tail"), max_only = "rhat") } @@ -82,7 +81,7 @@ rollup_summary.draws_summary <- function (x, rollup_vars = NULL, min_only <- do.call(rbind, lapply(split_nonscalar, rollup_helper_min, names = min_only)) max_only <- do.call(rbind, lapply(split_nonscalar, rollup_helper_max, names = max_only)) variable_column <- data.frame("variable" = unique(varnames_nonscalar)) - indices <- get_indices(ds_nonscalar$variable) + indices <- parse_variable_indices(ds_nonscalar$variable) get_dims <- function(x){paste0("(", paste(apply(x[, names(x) != "index", drop = FALSE], 2, function(x){max(as.integer(x))}), collapse = ","), ")")} diff --git a/man/draws_summary_rollup.Rd b/man/draws_summary_rollup.Rd index 0de5162d..da7a26b1 100644 --- a/man/draws_summary_rollup.Rd +++ b/man/draws_summary_rollup.Rd @@ -8,7 +8,6 @@ \usage{ rollup_summary( x, - ..., rollup_vars = NULL, min_only = c("ess_bulk", "ess_tail"), max_only = "rhat" @@ -24,8 +23,6 @@ rollup_summary( \arguments{ \item{x}{a \code{draws_summary} object or a \code{draws} object to be summarised} -\item{...}{Optional arguments to be passed to \code{summarise_draws} if x is a \code{draws} object} - \item{rollup_vars}{a list of variable names (excluding brackets and indices) to roll up} \item{min_only}{a character vector of varable names for which only minimum values are @@ -41,20 +38,20 @@ The second element is a rollup of the variables to be rolled up and contains max values of the summary functions attained by any element of the variable } \description{ -By default, all variables containing \code{[} in the name are rolled up, but -there is an option to pass a list of parameter names, which will roll up -any variables that match \verb{^parameter_name\\\[.*} +By default, all variables with names matched by \verb{\\\[.*\\\]$} are rolled up, +but there is an option to pass a list of parameter names, which will roll up +any variables matched by \verb{^parameter_name\\\[.*\\\]$} } \details{ -By default, only the maximum value of \code{rhat} and the mimum values of \code{\link[=ess_bulk]{ess_bulk()}} and -\code{\link[=ess_tail]{ess_tail()}} are returned. \code{NA}s are ignored unless all elements of the summary are \code{NA} +By default, only the maximum value of \code{rhat} and the minimum values of \code{\link[=ess_bulk]{ess_bulk()}} and +\code{\link[=ess_tail]{ess_tail()}} are returned. # INSERT HOW WE HANDLE NA SUMMARIES } \examples{ ds <- summarise_draws(example_draws()) ds2 <- summarise_draws(2 * example_draws()) ds2$variable <- c("pi", "upsilon", "omega[1,1]", "omega[2,1]", "omega[3,1]", "omega[4,1]", - "omega[1,2]", "omega[2,2]", "omega[3,2]", "omega[4,2]") + "omega[1,2]", "omega[2,2]", "omega[3,2]", "omega[4,2]") draws_summary <- rbind(ds, ds2) rollup_summary(draws_summary) rollup_summary(draws_summary, rollup_vars = "theta") diff --git a/man/variable_indices.Rd b/man/variable_indices.Rd index e07672df..121f622b 100644 --- a/man/variable_indices.Rd +++ b/man/variable_indices.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_indices.R +% Please edit documentation in R/parse_variable_indices.R \name{variable_indices} \alias{variable_indices} \title{Parse the indices from a vector of variables (extracted e.g. from a @@ -15,5 +15,5 @@ Parse the indices from a vector of variables (extracted e.g. from a \code{draws_summary} object or a \code{draws} object) } \details{ -Assumes that variable indexing is reflected by square brackes in the names +Assumes that variable indexing is reflected by square brackets in the names } From c77881ff06d6e33c7c36d8b4ad929563bfa343ba Mon Sep 17 00:00:00 2001 From: Jacob Socolar Date: Mon, 24 May 2021 13:38:55 -0500 Subject: [PATCH 14/24] a more fully general parse_variable_indices --- R/parse_variable_indices.R | 106 +++++++++++++++++++++++++++---------- 1 file changed, 79 insertions(+), 27 deletions(-) diff --git a/R/parse_variable_indices.R b/R/parse_variable_indices.R index 4053fa32..21bbf0e1 100644 --- a/R/parse_variable_indices.R +++ b/R/parse_variable_indices.R @@ -5,41 +5,87 @@ #' @param x a character vector of variables #' #' @return -#' A list of indices for each variable +#' A list with index information for each unique variable name V. Top-level list names are the variable names. +#' Each element contains: +#' $dimensionality the number of dimensions of V. Returns 0 for scalars with no brackets +#' but 1 for `x[1]` even if `x` has no other elements. +#' +#' $dimensions a vector of the actual dimensions of V, as determined by the number of unique +#' elements at each index position. Set to `NA` if the dimensionality is zero. +#' +#' $implied_dimensions a vector of the implied dimensions of V, where any position in V that +#' contains exclusively positive integers is filled in to include all integers from one to +#' its maximum. Set to `NA` if the dimensionality is zero. +#' +#' $index_names a list of length corresponding to the dimensionality, where each element is the +#' unique levels of the corresponding index if the index is parsed as factor, and NULL otherwise. +#' Set to `NULL` if the dimensionality is zero. +#' +#' $indices if dimensionality is zero, returns 1. +#' if dimensionality is 1, returns a vector of the #' #' @details -#' Assumes that variable indexing is reflected by square brackets in the names +#' Assumes that variable indexing uses square brackets in the variable names #' NULL parse_variable_indices <- function(x){ - vars_indices <- strsplit(x, "(\\[|\\]$)") + vars_indices <- strsplit(x, "(\\[|\\])") vars <- sapply(vars_indices, `[[`, 1) var_names <- unique(vars) - - var <- var_names[6] - - indices_list <- lapply(var_names, function (var) { - var_i <- vars == var - # reset class here as otherwise the draws arrays in the output rvars - # have type draws_matrix, which makes inspecting them hard + # Check that no variables contain unpaired or non-terminal square brackets + if_indexed <- (sapply(vars_indices, length) > 1) + if_indexed2 <- grepl("\\[.*\\]$", x) + extra_bracket <- grepl("\\].", x) | grepl("\\[.*\\[", x) + if (any(extra_bracket) | (!identical(if_indexed, if_indexed2))) { + stop_no_call(paste("Some variable names contain unpaired square brackets", + "or are multi-indexed.")) + } + missing_index <- grepl("\\[,|,\\]|,,", x) # check for `[,` `,,` or `,]` + if (any(missing_index)) { + stop_no_call(paste("Some variables contain missing indices. Each comma between square", + "brackets must be both preceeded and succeeded by an index.")) + } + + # Get dimensionality. Variables with no brackets are given as dimensionality zero. + # Variables with brackets are given dimensionality 1, even if they contain just one element. + dimensionality_elementwise <- sapply(vars_indices, function(x){ + if (length(x) == 2) { + out <- length(strsplit(x[2], ",")[[1]]) # number of commas plus one + } else { + out <- 0 + } + out + }) + dimensionality <- sapply(var_names, function(x){ + out <- unique(dimensionality_elementwise[vars == x]) + if (length(out) != 1) { + stop_no_call(paste0("Inconsistent indexing found for variable ", x, " .")) + } + out + }) + variable_indices_info <- lapply(var_names, function (x) { + # dimensions counts the number of unique values of each index. + # implied_dimensions gives the maximum for integer indices with no values <= 0 + # indices is a data frame with one line per combination of indices, including implied indices + # position gives the position in the original vector of variables (passed to `parse_variable_indices`) + # where a given combination of indices is found (NA if the comination does not exist) + indices_info <- named_list(c("dimensionality", "dimensions", "implied_dimensions", "index_names", "indices", "position")) + indices_info$dimensionality <- dimensionality[[x]] + var_i <- vars == x var_length <- sum(var_i) - - if (var_length == 1) { + if (dimensionality[x] == 0) { # single variable, no indices - out <- 1 - dimnames(out) <- NULL + indices_info$dimensions <- indices_info$implied_dimensions <- NA + indices_info$indices <- as.integer(1) + dimnames(indices_info$indices) <- NULL + indices_info$implied_dimensions <- NA + indices_info$internal_position <- 1 } else { - # variable with indices => we need to reshape the array - # basically, we're going to do a bunch of work up front to figure out - # a single array slice that does most of the work for us. - - # first, pull out the list of indices into a data frame - # where each column is an index variable indices <- sapply(vars_indices[var_i], `[[`, 2) indices <- as.data.frame(do.call(rbind, strsplit(indices, ",")), stringsAsFactors = FALSE) - + indices_info$dimensions <- unname(apply(indices, 2, function(x){length(unique(x))})) unique_indices <- vector("list", length(indices)) .dimnames <- vector("list", length(indices)) names(unique_indices) <- names(indices) @@ -53,10 +99,9 @@ parse_variable_indices <- function(x){ indices[[i]] <- as.integer(numeric_index) unique_indices[[i]] <- seq.int(1, max(numeric_index)) } else { - # indices with values < 1 are sorted but otherwise left as-is, and will create dimnames + # indices with values < 1 are filled in between the min and max indices[[i]] <- numeric_index - unique_indices[[i]] <- sort(unique(numeric_index)) - .dimnames[[i]] <- unique_indices[[i]] + unique_indices[[i]] <- seq.int(min(numeric_index), max(numeric_index)) } } else { # we convert non-numeric indices to factors so that we can force them @@ -69,6 +114,9 @@ parse_variable_indices <- function(x){ } } + indices_info$index_names <- .dimnames + indices_info$implied_dimensions <- unname(sapply(unique_indices, length)) + # sort indices and fill in missing indices as NA to ensure # (1) even if the order of the variables is something weird (like # x[2,2] comes before x[1,1]) the result @@ -87,8 +135,12 @@ parse_variable_indices <- function(x){ # need to do the sort manually after merge because when sort = TRUE, merge # sorts factors as if they were strings, and we need factors to be sorted as factors indices <- indices[do.call(order, as.list(indices[, -ncol(indices), drop = FALSE])),] + + indices_info$indices <- unname(rev(indices[, names(indices) != "index", drop = FALSE])) + indices_info$position <- which(var_i)[indices$index] } - indices + indices_info }) - indices_list -} + names(variable_indices_info) <- var_names + variable_indices_info +} \ No newline at end of file From 53cc5b38b2e3f5dff6c255c20448d34494d630c7 Mon Sep 17 00:00:00 2001 From: Jacob Socolar Date: Mon, 24 May 2021 13:42:51 -0500 Subject: [PATCH 15/24] cleanup --- R/parse_variable_indices.R | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/R/parse_variable_indices.R b/R/parse_variable_indices.R index 21bbf0e1..f33ad99d 100644 --- a/R/parse_variable_indices.R +++ b/R/parse_variable_indices.R @@ -5,24 +5,26 @@ #' @param x a character vector of variables #' #' @return -#' A list with index information for each unique variable name V. Top-level list names are the variable names. -#' Each element contains: +#' A list with index information for each unique variable name V in `x`. Top-level list names are +#' the variable names. Each element contains: #' $dimensionality the number of dimensions of V. Returns 0 for scalars with no brackets -#' but 1 for `x[1]` even if `x` has no other elements. +#' but 1 for `y[1]` even if `y` has no other entries in `x`. #' #' $dimensions a vector of the actual dimensions of V, as determined by the number of unique #' elements at each index position. Set to `NA` if the dimensionality is zero. #' #' $implied_dimensions a vector of the implied dimensions of V, where any position in V that -#' contains exclusively positive integers is filled in to include all integers from one to -#' its maximum. Set to `NA` if the dimensionality is zero. +#' contains exclusively integers is filled in to include all integers from the lesser of one +#' and its minimum up to its maximum. Set to `NA` if the dimensionality is zero. #' #' $index_names a list of length corresponding to the dimensionality, where each element is the #' unique levels of the corresponding index if the index is parsed as factor, and NULL otherwise. #' Set to `NULL` if the dimensionality is zero. #' #' $indices if dimensionality is zero, returns 1. -#' if dimensionality is 1, returns a vector of the +#' if dimensionality is 1 or greater, returns a dataframe of every implied combination of indices +#' +#' $position the position of each combination of indices from $indices in the the argument `x` #' #' @details #' Assumes that variable indexing uses square brackets in the variable names @@ -143,4 +145,4 @@ parse_variable_indices <- function(x){ }) names(variable_indices_info) <- var_names variable_indices_info -} \ No newline at end of file +} From deb9121928dbf2fc27cc6f82dd1bf94366ea58a1 Mon Sep 17 00:00:00 2001 From: Jacob Socolar Date: Mon, 24 May 2021 14:09:28 -0500 Subject: [PATCH 16/24] more cleanup --- R/rollup_summary.R | 15 ++++++--------- man/draws_summary_rollup.Rd | 8 ++++++++ man/variable_indices.Rd | 23 +++++++++++++++++++++-- 3 files changed, 35 insertions(+), 11 deletions(-) diff --git a/R/rollup_summary.R b/R/rollup_summary.R index fa270406..d7ffa32c 100644 --- a/R/rollup_summary.R +++ b/R/rollup_summary.R @@ -42,13 +42,14 @@ rollup_summary <- function(x, rollup_vars = NULL, UseMethod("rollup_summary") } +#' @rdname draws_summary_rollup #' @export rollup_summary.default <- function(x, rollup_vars = NULL, min_only = c("ess_bulk", "ess_tail"), max_only = "rhat") { - rollup_summary(summarise_draws(x), rollup_vars = NULL, - min_only = c("ess_bulk", "ess_tail"), - max_only = "rhat") + rollup_summary(summarise_draws(x), rollup_vars = rollup_vars, + min_only = min_only, + max_only = max_only) } #' @rdname draws_summary_rollup @@ -81,12 +82,8 @@ rollup_summary.draws_summary <- function (x, rollup_vars = NULL, min_only <- do.call(rbind, lapply(split_nonscalar, rollup_helper_min, names = min_only)) max_only <- do.call(rbind, lapply(split_nonscalar, rollup_helper_max, names = max_only)) variable_column <- data.frame("variable" = unique(varnames_nonscalar)) - indices <- parse_variable_indices(ds_nonscalar$variable) - get_dims <- function(x){paste0("(", - paste(apply(x[, names(x) != "index", drop = FALSE], 2, function(x){max(as.integer(x))}), collapse = ","), - ")")} - dimensions <- lapply(indices, get_dims) - dimension_column <- data.frame("dimension" = unlist(dimensions)) + variable_indices <- parse_variable_indices(ds_nonscalar$variable) + dimension_column <- data.frame("dimension" = sapply(variable_indices, function(x){x$dimensions})) nonscalar_out <- tibble::as_tibble(cbind(variable_column, dimension_column, min_max, max_only, min_only)) out <- list(unrolled_vars = ds_scalar, rolled_vars = nonscalar_out) out diff --git a/man/draws_summary_rollup.Rd b/man/draws_summary_rollup.Rd index da7a26b1..f903ed0d 100644 --- a/man/draws_summary_rollup.Rd +++ b/man/draws_summary_rollup.Rd @@ -3,6 +3,7 @@ \name{draws_summary_rollup} \alias{draws_summary_rollup} \alias{rollup_summary} +\alias{rollup_summary.default} \alias{rollup_summary.draws_summary} \title{"Roll up" \code{draws_summary} objects by collapsing over nonscalar parameters.} \usage{ @@ -13,6 +14,13 @@ rollup_summary( max_only = "rhat" ) +\method{rollup_summary}{default}( + x, + rollup_vars = NULL, + min_only = c("ess_bulk", "ess_tail"), + max_only = "rhat" +) + \method{rollup_summary}{draws_summary}( x, rollup_vars = NULL, diff --git a/man/variable_indices.Rd b/man/variable_indices.Rd index 121f622b..c29415cc 100644 --- a/man/variable_indices.Rd +++ b/man/variable_indices.Rd @@ -8,12 +8,31 @@ \item{x}{a character vector of variables} } \value{ -A list of indices for each variable +A list with index information for each unique variable name V in \code{x}. Top-level list names are +the variable names. Each element contains: +$dimensionality the number of dimensions of V. Returns 0 for scalars with no brackets +but 1 for \code{y[1]} even if \code{y} has no other entries in \code{x}. + +$dimensions a vector of the actual dimensions of V, as determined by the number of unique +elements at each index position. Set to \code{NA} if the dimensionality is zero. + +$implied_dimensions a vector of the implied dimensions of V, where any position in V that +contains exclusively integers is filled in to include all integers from the lesser of one +and its minimum up to its maximum. Set to \code{NA} if the dimensionality is zero. + +$index_names a list of length corresponding to the dimensionality, where each element is the +unique levels of the corresponding index if the index is parsed as factor, and NULL otherwise. +Set to \code{NULL} if the dimensionality is zero. + +$indices if dimensionality is zero, returns 1. +if dimensionality is 1 or greater, returns a dataframe of every implied combination of indices + +$position the position of each combination of indices from $indices in the the argument \code{x} } \description{ Parse the indices from a vector of variables (extracted e.g. from a \code{draws_summary} object or a \code{draws} object) } \details{ -Assumes that variable indexing is reflected by square brackets in the names +Assumes that variable indexing uses square brackets in the variable names } From d080f2a273569507837c9e771b31c6c9779ad743 Mon Sep 17 00:00:00 2001 From: Jacob Socolar Date: Mon, 24 May 2021 14:26:41 -0500 Subject: [PATCH 17/24] getting dimension column right after switch to parse_variable_indices --- R/rollup_summary.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/rollup_summary.R b/R/rollup_summary.R index d7ffa32c..baec4b15 100644 --- a/R/rollup_summary.R +++ b/R/rollup_summary.R @@ -83,7 +83,9 @@ rollup_summary.draws_summary <- function (x, rollup_vars = NULL, max_only <- do.call(rbind, lapply(split_nonscalar, rollup_helper_max, names = max_only)) variable_column <- data.frame("variable" = unique(varnames_nonscalar)) variable_indices <- parse_variable_indices(ds_nonscalar$variable) - dimension_column <- data.frame("dimension" = sapply(variable_indices, function(x){x$dimensions})) + dimension_column <- data.frame("dimension" = paste0("(", + sapply(variable_indices, function(x){paste(x$dimensions, collapse = ",")}), + ")")) nonscalar_out <- tibble::as_tibble(cbind(variable_column, dimension_column, min_max, max_only, min_only)) out <- list(unrolled_vars = ds_scalar, rolled_vars = nonscalar_out) out From 138ee3f0c0944ba36201eb89d6555cc31924482e Mon Sep 17 00:00:00 2001 From: "Jacob B. Socolar" Date: Tue, 25 May 2021 17:13:05 -0500 Subject: [PATCH 18/24] replace sapply length with lengths Co-authored-by: Matthew Kay --- R/parse_variable_indices.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/parse_variable_indices.R b/R/parse_variable_indices.R index f33ad99d..2637a56e 100644 --- a/R/parse_variable_indices.R +++ b/R/parse_variable_indices.R @@ -36,7 +36,7 @@ parse_variable_indices <- function(x){ vars <- sapply(vars_indices, `[[`, 1) var_names <- unique(vars) # Check that no variables contain unpaired or non-terminal square brackets - if_indexed <- (sapply(vars_indices, length) > 1) + if_indexed <- lengths(vars_indices) > 1 if_indexed2 <- grepl("\\[.*\\]$", x) extra_bracket <- grepl("\\].", x) | grepl("\\[.*\\[", x) if (any(extra_bracket) | (!identical(if_indexed, if_indexed2))) { From 9efc43da0bfdf466ed3280240cba0aa99fd82499 Mon Sep 17 00:00:00 2001 From: "Jacob B. Socolar" Date: Tue, 25 May 2021 17:13:51 -0500 Subject: [PATCH 19/24] use 1L Co-authored-by: Matthew Kay --- R/parse_variable_indices.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/parse_variable_indices.R b/R/parse_variable_indices.R index 2637a56e..72889c98 100644 --- a/R/parse_variable_indices.R +++ b/R/parse_variable_indices.R @@ -79,7 +79,7 @@ parse_variable_indices <- function(x){ if (dimensionality[x] == 0) { # single variable, no indices indices_info$dimensions <- indices_info$implied_dimensions <- NA - indices_info$indices <- as.integer(1) + indices_info$indices <- 1L dimnames(indices_info$indices) <- NULL indices_info$implied_dimensions <- NA indices_info$internal_position <- 1 From 13114156dcc6f98f6cdc25ddcf6032e682c7a7bb Mon Sep 17 00:00:00 2001 From: "Jacob B. Socolar" Date: Tue, 25 May 2021 17:14:12 -0500 Subject: [PATCH 20/24] replace sapply length with lengths again Co-authored-by: Matthew Kay --- R/parse_variable_indices.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/parse_variable_indices.R b/R/parse_variable_indices.R index 72889c98..19c01aaa 100644 --- a/R/parse_variable_indices.R +++ b/R/parse_variable_indices.R @@ -117,7 +117,7 @@ parse_variable_indices <- function(x){ } indices_info$index_names <- .dimnames - indices_info$implied_dimensions <- unname(sapply(unique_indices, length)) + indices_info$implied_dimensions <- unname(lengths(unique_indices)) # sort indices and fill in missing indices as NA to ensure # (1) even if the order of the variables is something weird (like From 97aff00c01c73daa9c58c4e0a02d5dd91312b438 Mon Sep 17 00:00:00 2001 From: Jacob Socolar Date: Tue, 25 May 2021 17:51:35 -0500 Subject: [PATCH 21/24] minor stuff: --- R/parse_variable_indices.R | 39 +++++++++++++++++--------------------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/R/parse_variable_indices.R b/R/parse_variable_indices.R index 19c01aaa..408194bb 100644 --- a/R/parse_variable_indices.R +++ b/R/parse_variable_indices.R @@ -7,22 +7,22 @@ #' @return #' A list with index information for each unique variable name V in `x`. Top-level list names are #' the variable names. Each element contains: -#' $dimensionality the number of dimensions of V. Returns 0 for scalars with no brackets +#' $ndim the number of dimensions of V. Returns 0 for scalars with no brackets #' but 1 for `y[1]` even if `y` has no other entries in `x`. #' -#' $dimensions a vector of the actual dimensions of V, as determined by the number of unique -#' elements at each index position. Set to `NA` if the dimensionality is zero. +#' $dim a vector of the actual dimensions of V, as determined by the number of unique +#' elements at each index position. Set to `NA` if the ndim is zero. #' -#' $implied_dimensions a vector of the implied dimensions of V, where any position in V that +#' $implied_dim a vector of the implied dimensions of V, where any position in V that #' contains exclusively integers is filled in to include all integers from the lesser of one -#' and its minimum up to its maximum. Set to `NA` if the dimensionality is zero. +#' and its minimum up to its maximum. Set to `NA` if ndim is zero. #' -#' $index_names a list of length corresponding to the dimensionality, where each element is the +#' $index_names a list of length corresponding to ndim, where each element is the #' unique levels of the corresponding index if the index is parsed as factor, and NULL otherwise. -#' Set to `NULL` if the dimensionality is zero. +#' Set to `NULL` if ndim is zero. #' -#' $indices if dimensionality is zero, returns 1. -#' if dimensionality is 1 or greater, returns a dataframe of every implied combination of indices +#' $indices if ndim is zero, returns 1. +#' if ndim is 1 or greater, returns a dataframe of every implied combination of indices #' #' $position the position of each combination of indices from $indices in the the argument `x` #' @@ -49,9 +49,9 @@ parse_variable_indices <- function(x){ "brackets must be both preceeded and succeeded by an index.")) } - # Get dimensionality. Variables with no brackets are given as dimensionality zero. - # Variables with brackets are given dimensionality 1, even if they contain just one element. - dimensionality_elementwise <- sapply(vars_indices, function(x){ + # Get ndim. Variables with no brackets are given as ndim zero. + # Variables with brackets are given ndim 1, even if they contain just one element. + ndim_elementwise <- sapply(vars_indices, function(x){ if (length(x) == 2) { out <- length(strsplit(x[2], ",")[[1]]) # number of commas plus one } else { @@ -59,24 +59,19 @@ parse_variable_indices <- function(x){ } out }) - dimensionality <- sapply(var_names, function(x){ - out <- unique(dimensionality_elementwise[vars == x]) + ndim <- sapply(var_names, function(x){ + out <- unique(ndim_elementwise[vars == x]) if (length(out) != 1) { stop_no_call(paste0("Inconsistent indexing found for variable ", x, " .")) } out }) variable_indices_info <- lapply(var_names, function (x) { - # dimensions counts the number of unique values of each index. - # implied_dimensions gives the maximum for integer indices with no values <= 0 - # indices is a data frame with one line per combination of indices, including implied indices - # position gives the position in the original vector of variables (passed to `parse_variable_indices`) - # where a given combination of indices is found (NA if the comination does not exist) - indices_info <- named_list(c("dimensionality", "dimensions", "implied_dimensions", "index_names", "indices", "position")) - indices_info$dimensionality <- dimensionality[[x]] + indices_info <- named_list(c("ndim", "dimensions", "implied_dimensions", "index_names", "indices", "position")) + indices_info$ndim <- ndim[[x]] var_i <- vars == x var_length <- sum(var_i) - if (dimensionality[x] == 0) { + if (ndim[x] == 0) { # single variable, no indices indices_info$dimensions <- indices_info$implied_dimensions <- NA indices_info$indices <- 1L From 9af34939fe8b00f1323485e03c6a10c70b456851 Mon Sep 17 00:00:00 2001 From: "Jacob B. Socolar" Date: Tue, 25 May 2021 17:52:19 -0500 Subject: [PATCH 22/24] remove superfluous line Co-authored-by: Matthew Kay --- R/parse_variable_indices.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/parse_variable_indices.R b/R/parse_variable_indices.R index 408194bb..ab314e7e 100644 --- a/R/parse_variable_indices.R +++ b/R/parse_variable_indices.R @@ -75,7 +75,6 @@ parse_variable_indices <- function(x){ # single variable, no indices indices_info$dimensions <- indices_info$implied_dimensions <- NA indices_info$indices <- 1L - dimnames(indices_info$indices) <- NULL indices_info$implied_dimensions <- NA indices_info$internal_position <- 1 } else { From 92c826bc92f5306ffc5bd7731e3c701e90570eee Mon Sep 17 00:00:00 2001 From: Jacob Socolar Date: Tue, 25 May 2021 17:54:50 -0500 Subject: [PATCH 23/24] factoring out common code from if statement --- R/parse_variable_indices.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/parse_variable_indices.R b/R/parse_variable_indices.R index ab314e7e..8ef0431a 100644 --- a/R/parse_variable_indices.R +++ b/R/parse_variable_indices.R @@ -90,13 +90,12 @@ parse_variable_indices <- function(x){ if (!anyNA(numeric_index) && rlang::is_integerish(numeric_index)) { # for integer indices, we need to convert them to integers # so that we can sort them in numerical order (not string order) + indices[[i]] <- as.integer(numeric_index) if (min(numeric_index) >= 1) { # integer indices >= 1 are forced to lower bound of 1 + no dimnames - indices[[i]] <- as.integer(numeric_index) unique_indices[[i]] <- seq.int(1, max(numeric_index)) } else { # indices with values < 1 are filled in between the min and max - indices[[i]] <- numeric_index unique_indices[[i]] <- seq.int(min(numeric_index), max(numeric_index)) } } else { From e639cd195ceabe191eb9126340d4095131c1b712 Mon Sep 17 00:00:00 2001 From: Jacob Socolar Date: Tue, 25 May 2021 20:45:44 -0500 Subject: [PATCH 24/24] more concise error handling for errant brackets or missing indices --- R/parse_variable_indices.R | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/R/parse_variable_indices.R b/R/parse_variable_indices.R index 8ef0431a..f5d38049 100644 --- a/R/parse_variable_indices.R +++ b/R/parse_variable_indices.R @@ -38,17 +38,11 @@ parse_variable_indices <- function(x){ # Check that no variables contain unpaired or non-terminal square brackets if_indexed <- lengths(vars_indices) > 1 if_indexed2 <- grepl("\\[.*\\]$", x) - extra_bracket <- grepl("\\].", x) | grepl("\\[.*\\[", x) - if (any(extra_bracket) | (!identical(if_indexed, if_indexed2))) { - stop_no_call(paste("Some variable names contain unpaired square brackets", - "or are multi-indexed.")) + bracket_problems <- grepl("\\].|\\[.*\\[|\\[,|,\\]|,,", x) + if (any(bracket_problems) | (!identical(if_indexed, if_indexed2))) { + stop_no_call(paste("Some variable names contain unpaired square brackets,", + "missing indices, or are multi-indexed.")) } - missing_index <- grepl("\\[,|,\\]|,,", x) # check for `[,` `,,` or `,]` - if (any(missing_index)) { - stop_no_call(paste("Some variables contain missing indices. Each comma between square", - "brackets must be both preceeded and succeeded by an index.")) - } - # Get ndim. Variables with no brackets are given as ndim zero. # Variables with brackets are given ndim 1, even if they contain just one element. ndim_elementwise <- sapply(vars_indices, function(x){