Skip to content

Commit

Permalink
tabyl uses labels (#575)
Browse files Browse the repository at this point in the history
* Import `%||%` from rlang.

* Add support for displaying the label attribute for column name.

* Reduce space

* Style code (GHA)

* style

* Handle 2-3 ways tabyl as well.

* Clarify explanation comments

Co-authored-by: Sam Firke <[email protected]>

* Update NEWS.md

* Update NEWS.md

* reword breaking change

---------

Co-authored-by: olivroy <[email protected]>
Co-authored-by: Sam Firke <[email protected]>
  • Loading branch information
3 people committed Jun 27, 2024
1 parent 80cd1eb commit cd47202
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 10 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ importFrom(lubridate,second)
importFrom(lubridate,ymd)
importFrom(lubridate,ymd_hms)
importFrom(magrittr,"%>%")
importFrom(rlang,"%||%")
importFrom(rlang,dots_n)
importFrom(rlang,expr)
importFrom(rlang,syms)
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ These are all minor breaking changes resulting from enhancements and are not exp

* When using `row_to_names()`, when all input values in `row_number` for a column are `NA`, `row_to_names()` creates a column name of `"NA"`, a character, rather than `NA`. If code previously used relied on a column name of `NA`, it will now error. To fix this, rely on a column name of `"NA"`.

* When `tabyl()` is called on a data.frame containing labels, it now displays the label attribute as the name of the first column in the the resulting `tabyl` object (@olivroy, #394). This may break subsequent code that refers to the output of such a `tabyl` by column name. To maintain the previous behavior of ignoring variable labels, you can remove the labels with a function like `haven::zap_labels()` or `labelled::remove_labels()` before calling `tabyl()`.


## New features

* A new function `paste_skip_na()` pastes without including NA values (#537).
Expand Down
2 changes: 1 addition & 1 deletion R/get_dupes.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
#' mtcars %>% get_dupes(-c(wt, qsec))
#' mtcars %>% get_dupes(starts_with("cy"))
#' @importFrom tidyselect eval_select
#' @importFrom rlang expr dots_n syms
#' @importFrom rlang expr dots_n syms %||%
get_dupes <- function(dat, ...) {
expr <- rlang::expr(c(...))
pos <- tidyselect::eval_select(expr, data = dat)
Expand Down
1 change: 0 additions & 1 deletion R/print_tabyl.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#' @export

print.tabyl <- function(x, ...) {
print.data.frame(x, row.names = FALSE)
}
34 changes: 26 additions & 8 deletions R/tabyl.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ tabyl.default <- function(dat, show_na = TRUE, show_missing_levels = TRUE, ...)
var_name <- names(dat)
}


# useful error message if input vector doesn't exist
if (is.null(dat)) {
stop(paste0("object ", var_name, " not found"))
Expand All @@ -76,6 +75,13 @@ tabyl.default <- function(dat, show_na = TRUE, show_missing_levels = TRUE, ...)
var_name <- paste(var_name, collapse = "")
}

# Try to retrieve label
if (is.data.frame(dat)) {
var_label <- attr(dat[, var_name], "label", exact = TRUE) %||% var_name
} else {
var_label <- attr(dat, "label", exact = TRUE) %||% var_name
}

# if show_na is not length-1 logical, error helpfully (#377)
if (length(show_na) > 1 || !inherits(show_na, "logical")) {
stop("The value supplied to the \"show_na\" argument must be TRUE or FALSE.\n\nDid you try to call tabyl on two vectors, like tabyl(data$var1, data$var2) ? To create a two-way tabyl, the two vectors must be in the same data.frame, and the function should be called like this: \n
Expand Down Expand Up @@ -133,8 +139,8 @@ tabyl.default <- function(dat, show_na = TRUE, show_missing_levels = TRUE, ...)
dplyr::mutate(percent = n / sum(n, na.rm = TRUE)) # recalculate % without NAs
}

# reassign correct variable name
names(result)[1] <- var_name
# reassign correct variable name (or label if it exists)
names(result)[1] <- var_label

# in case input var name was "n" or "percent", call helper function to set unique names
result <- handle_if_special_names_used(result)
Expand Down Expand Up @@ -238,10 +244,11 @@ tabyl_2way <- function(dat, var1, var2, show_na = TRUE, show_missing_levels = TR
result <- result[c(setdiff(names(result), "NA_"), "NA_")]
}


result %>%
data.frame(., check.names = FALSE) %>%
as_tabyl(axes = 2, row_var_name = names(dat)[1], col_var_name = names(dat)[2])
row_var_name <- names(dat)[1]
col_var_name <- names(dat)[2]
names(result)[1] <- attr(dat[, 1], "label", exact = TRUE) %||% names(result)[1]
data.frame(result, check.names = FALSE) %>%
as_tabyl(axes = 2, row_var_name = row_var_name, col_var_name = col_var_name)
}


Expand All @@ -250,6 +257,10 @@ tabyl_3way <- function(dat, var1, var2, var3, show_na = TRUE, show_missing_level
dat <- dplyr::select(dat, !!var1, !!var2, !!var3)
var3_numeric <- is.numeric(dat[[3]])

# Preserve labels, as attributes are sometimes dropped during transformations.
var1_label <- attr(dat[, 1], "label", exact = TRUE)
var2_label <- attr(dat[, 2], "label", exact = TRUE)

# Keep factor levels for ordering the list at the end
if (is.factor(dat[[3]])) {
third_levels_for_sorting <- levels(dat[[3]])
Expand Down Expand Up @@ -277,7 +288,14 @@ tabyl_3way <- function(dat, var1, var2, var3, show_na = TRUE, show_missing_level
dat[[2]] <- as.factor(dat[[2]])
}

result <- split(dat, dat[[rlang::quo_name(var3)]]) %>%
result <- split(dat, dat[[rlang::quo_name(var3)]])
# split() drops attributes, so we manually add back the label attributes.
result <- lapply(result, function(x) {
attr(x[[1]], "label") <- var1_label
attr(x[[2]], "label") <- var2_label
x
})
result <- result %>%
purrr::map(tabyl_2way, var1, var2, show_na = show_na, show_missing_levels = show_missing_levels) %>%
purrr::map(reset_1st_col_status, col1_class, col1_levels) # reset class of var in 1st col to its input class, #168

Expand Down
11 changes: 11 additions & 0 deletions tests/testthat/test-tabyl.R
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,17 @@ test_that("3-way tabyl with 3rd var factor is listed in right order, #250", {
expect_equal(names(tabyl(z, am, gear, cyl)), c("8", "6", "NA_"))
})

test_that("tabyl works with label attributes (#394)", {
mt_label <- mtcars
attr(mt_label$cyl, "label") <- "Number of cyl"
tab <- tabyl(mt_label, cyl)
expect_named(tab, c("Number of cyl", "n", "percent"))
tab2 <- tabyl(mt_label, cyl, am)
expect_named(tab2, c("Number of cyl", "0", "1"))
tab3 <- tabyl(mt_label, cyl, am, vs)
expect_equal(names(tab3[[1]])[1], "Number of cyl")
})

test_that("tabyl works with ordered 1st variable, #386", {
mt_ordered <- mtcars
mt_ordered$cyl <- ordered(mt_ordered$cyl, levels = c("4", "8", "6"))
Expand Down

0 comments on commit cd47202

Please sign in to comment.