From 49acf01c2f32e27f5f8a5946c15e03283aa63a97 Mon Sep 17 00:00:00 2001 From: olivroy Date: Thu, 23 May 2024 03:02:59 -0400 Subject: [PATCH 01/10] Import `%||%` from rlang. --- NAMESPACE | 1 + R/get_dupes.R | 2 +- R/print_tabyl.R | 1 - 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 27073345..1461f757 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -69,6 +69,7 @@ importFrom(lubridate,second) importFrom(lubridate,ymd) importFrom(lubridate,ymd_hms) importFrom(magrittr,"%>%") +importFrom(rlang,"%||%") importFrom(rlang,dots_n) importFrom(rlang,expr) importFrom(rlang,syms) diff --git a/R/get_dupes.R b/R/get_dupes.R index b86e409c..0a37f077 100644 --- a/R/get_dupes.R +++ b/R/get_dupes.R @@ -22,7 +22,7 @@ #' mtcars %>% get_dupes(-c(wt, qsec)) #' mtcars %>% get_dupes(starts_with("cy")) #' @importFrom tidyselect eval_select -#' @importFrom rlang expr dots_n syms +#' @importFrom rlang expr dots_n syms %||% get_dupes <- function(dat, ...) { expr <- rlang::expr(c(...)) pos <- tidyselect::eval_select(expr, data = dat) diff --git a/R/print_tabyl.R b/R/print_tabyl.R index 62832419..b971bc40 100644 --- a/R/print_tabyl.R +++ b/R/print_tabyl.R @@ -1,5 +1,4 @@ #' @export - print.tabyl <- function(x, ...) { print.data.frame(x, row.names = FALSE) } From 6c4a196c2c3403cba78f4be1f424a668befb108e Mon Sep 17 00:00:00 2001 From: olivroy Date: Thu, 23 May 2024 03:03:38 -0400 Subject: [PATCH 02/10] Add support for displaying the label attribute for column name. --- NEWS.md | 2 ++ R/tabyl.R | 18 ++++++++++++++---- tests/testthat/test-tabyl.R | 15 +++++++++++++++ 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/NEWS.md b/NEWS.md index 5d715ce2..a6042b5c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -8,6 +8,8 @@ These are all minor breaking changes resulting from enhancements and are not exp ## New features +* `tabyl()` now defaults to displaying the label attribute for the column name (@olivroy, #394). + * A new function `paste_skip_na()` pastes without including NA values (#537). * `row_to_names()` now accepts multiple rows as input, and merges them using a new `sep` argument (#536). The default is `sep = "_"`. When handling multiple `NA` values, `row_to_names()` ignores them and only merges non-NA values for column names. When all values are `NA`, `row_to_names()` creates a column name of `"NA"`, a character, rather than `NA`. diff --git a/R/tabyl.R b/R/tabyl.R index 6fddebdd..4cc6214c 100644 --- a/R/tabyl.R +++ b/R/tabyl.R @@ -65,8 +65,7 @@ tabyl.default <- function(dat, show_na = TRUE, show_missing_levels = TRUE, ...) } else { var_name <- names(dat) } - - + # useful error message if input vector doesn't exist if (is.null(dat)) { stop(paste0("object ", var_name, " not found")) @@ -75,6 +74,13 @@ tabyl.default <- function(dat, show_na = TRUE, show_missing_levels = TRUE, ...) if (length(var_name) > 1) { var_name <- paste(var_name, collapse = "") } + + # Try to retrieve label + if (is.data.frame(dat)) { + var_label <- attr(dat[, var_name], "label", exact = TRUE) %||% var_name + } else { + var_label <- attr(dat, "label", exact = TRUE) %||% var_name + } # if show_na is not length-1 logical, error helpfully (#377) if (length(show_na) > 1 || !inherits(show_na, "logical")) { @@ -133,8 +139,8 @@ tabyl.default <- function(dat, show_na = TRUE, show_missing_levels = TRUE, ...) dplyr::mutate(percent = n / sum(n, na.rm = TRUE)) # recalculate % without NAs } - # reassign correct variable name - names(result)[1] <- var_name + # reassign correct variable name (or label if it exists) + names(result)[1] <- var_label # in case input var name was "n" or "percent", call helper function to set unique names result <- handle_if_special_names_used(result) @@ -169,6 +175,10 @@ tabyl.data.frame <- function(dat, var1, var2, var3, show_na = TRUE, show_missing } } +get_label <- function(x) { + attr(x, "label", exact = TRUE) +} + # a one-way frequency table; this was called "tabyl" in janitor <= 0.3.0 tabyl_1way <- function(dat, var1, show_na = TRUE, show_missing_levels = TRUE) { x <- dplyr::select(dat, !!var1) diff --git a/tests/testthat/test-tabyl.R b/tests/testthat/test-tabyl.R index 7667e15f..6cfe9caa 100644 --- a/tests/testthat/test-tabyl.R +++ b/tests/testthat/test-tabyl.R @@ -395,6 +395,21 @@ test_that("3-way tabyl with 3rd var factor is listed in right order, #250", { expect_equal(names(tabyl(z, am, gear, cyl)), c("8", "6", "NA_")) }) +test_that("tabyl works with label attributes (#394)", { + mt_label <- mtcars + attr(mt_label$cyl, "label") <- "Number of cyl" + tab <- tabyl(mt_label, cyl) + expect_named( + tab, + c("Number of cyl", "n", "percent") + ) + tab2 <- tabyl(mt_label, cyl, am) + expect_named( + tab2, + c("Number of cyl", "0", "1") + ) +}) + test_that("tabyl works with ordered 1st variable, #386", { mt_ordered <- mtcars mt_ordered$cyl <- ordered(mt_ordered$cyl, levels = c("4", "8", "6")) From a0f5c5970552fa1a426b2afcbbc7270ec6949f41 Mon Sep 17 00:00:00 2001 From: olivroy Date: Thu, 23 May 2024 03:06:50 -0400 Subject: [PATCH 03/10] Reduce space --- R/tabyl.R | 4 ---- tests/testthat/test-tabyl.R | 11 +++-------- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/R/tabyl.R b/R/tabyl.R index 4cc6214c..599e6f19 100644 --- a/R/tabyl.R +++ b/R/tabyl.R @@ -175,10 +175,6 @@ tabyl.data.frame <- function(dat, var1, var2, var3, show_na = TRUE, show_missing } } -get_label <- function(x) { - attr(x, "label", exact = TRUE) -} - # a one-way frequency table; this was called "tabyl" in janitor <= 0.3.0 tabyl_1way <- function(dat, var1, show_na = TRUE, show_missing_levels = TRUE) { x <- dplyr::select(dat, !!var1) diff --git a/tests/testthat/test-tabyl.R b/tests/testthat/test-tabyl.R index 6cfe9caa..9e71943d 100644 --- a/tests/testthat/test-tabyl.R +++ b/tests/testthat/test-tabyl.R @@ -399,15 +399,10 @@ test_that("tabyl works with label attributes (#394)", { mt_label <- mtcars attr(mt_label$cyl, "label") <- "Number of cyl" tab <- tabyl(mt_label, cyl) - expect_named( - tab, - c("Number of cyl", "n", "percent") - ) + expect_named(tab, c("Number of cyl", "n", "percent")) + tab2 <- tabyl(mt_label, cyl, am) - expect_named( - tab2, - c("Number of cyl", "0", "1") - ) + expect_named(tab2, c("Number of cyl", "0", "1")) }) test_that("tabyl works with ordered 1st variable, #386", { From 8f0e33d49c7d7806ad315a6f220d2356589a0879 Mon Sep 17 00:00:00 2001 From: olivroy Date: Thu, 23 May 2024 07:10:28 +0000 Subject: [PATCH 04/10] Style code (GHA) --- R/tabyl.R | 6 +++--- tests/testthat/test-tabyl.R | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/tabyl.R b/R/tabyl.R index 599e6f19..5435c3b5 100644 --- a/R/tabyl.R +++ b/R/tabyl.R @@ -65,7 +65,7 @@ tabyl.default <- function(dat, show_na = TRUE, show_missing_levels = TRUE, ...) } else { var_name <- names(dat) } - + # useful error message if input vector doesn't exist if (is.null(dat)) { stop(paste0("object ", var_name, " not found")) @@ -74,12 +74,12 @@ tabyl.default <- function(dat, show_na = TRUE, show_missing_levels = TRUE, ...) if (length(var_name) > 1) { var_name <- paste(var_name, collapse = "") } - + # Try to retrieve label if (is.data.frame(dat)) { var_label <- attr(dat[, var_name], "label", exact = TRUE) %||% var_name } else { - var_label <- attr(dat, "label", exact = TRUE) %||% var_name + var_label <- attr(dat, "label", exact = TRUE) %||% var_name } # if show_na is not length-1 logical, error helpfully (#377) diff --git a/tests/testthat/test-tabyl.R b/tests/testthat/test-tabyl.R index 9e71943d..fced8f28 100644 --- a/tests/testthat/test-tabyl.R +++ b/tests/testthat/test-tabyl.R @@ -401,7 +401,7 @@ test_that("tabyl works with label attributes (#394)", { tab <- tabyl(mt_label, cyl) expect_named(tab, c("Number of cyl", "n", "percent")) - tab2 <- tabyl(mt_label, cyl, am) + tab2 <- tabyl(mt_label, cyl, am) expect_named(tab2, c("Number of cyl", "0", "1")) }) From 4ddb384e11e82e514026da9ee247610d39caed44 Mon Sep 17 00:00:00 2001 From: olivroy Date: Thu, 23 May 2024 03:13:51 -0400 Subject: [PATCH 05/10] style --- tests/testthat/test-tabyl.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-tabyl.R b/tests/testthat/test-tabyl.R index 9e71943d..fced8f28 100644 --- a/tests/testthat/test-tabyl.R +++ b/tests/testthat/test-tabyl.R @@ -401,7 +401,7 @@ test_that("tabyl works with label attributes (#394)", { tab <- tabyl(mt_label, cyl) expect_named(tab, c("Number of cyl", "n", "percent")) - tab2 <- tabyl(mt_label, cyl, am) + tab2 <- tabyl(mt_label, cyl, am) expect_named(tab2, c("Number of cyl", "0", "1")) }) From cc9edbb094897ddf0911e0f9180df2a7a3a26b70 Mon Sep 17 00:00:00 2001 From: olivroy Date: Thu, 23 May 2024 03:41:51 -0400 Subject: [PATCH 06/10] Handle 2-3 ways tabyl as well. --- R/tabyl.R | 22 +++++++++++++++++----- tests/testthat/test-tabyl.R | 3 ++- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/R/tabyl.R b/R/tabyl.R index 5435c3b5..3b19ba7d 100644 --- a/R/tabyl.R +++ b/R/tabyl.R @@ -244,10 +244,11 @@ tabyl_2way <- function(dat, var1, var2, show_na = TRUE, show_missing_levels = TR result <- result[c(setdiff(names(result), "NA_"), "NA_")] } - - result %>% - data.frame(., check.names = FALSE) %>% - as_tabyl(axes = 2, row_var_name = names(dat)[1], col_var_name = names(dat)[2]) + row_var_name <- names(dat)[1] + col_var_name <- names(dat)[2] + names(result)[1] <- attr(dat[, 1], "label", exact = TRUE) %||% names(result)[1] + data.frame(result, check.names = FALSE) %>% + as_tabyl(axes = 2, row_var_name = row_var_name, col_var_name = col_var_name) } @@ -256,6 +257,10 @@ tabyl_3way <- function(dat, var1, var2, var3, show_na = TRUE, show_missing_level dat <- dplyr::select(dat, !!var1, !!var2, !!var3) var3_numeric <- is.numeric(dat[[3]]) + # Sometimes, attributes can be dropped with transformation. + var1_label <- attr(dat[, 1], "label", exact = TRUE) + var2_label <- attr(dat[, 2], "label", exact = TRUE) + # Keep factor levels for ordering the list at the end if (is.factor(dat[[3]])) { third_levels_for_sorting <- levels(dat[[3]]) @@ -283,7 +288,14 @@ tabyl_3way <- function(dat, var1, var2, var3, show_na = TRUE, show_missing_level dat[[2]] <- as.factor(dat[[2]]) } - result <- split(dat, dat[[rlang::quo_name(var3)]]) %>% + result <- split(dat, dat[[rlang::quo_name(var3)]]) + # split() drops attributes, so we manually add back the 1st variable attribute. + result <- lapply(result, function(x) { + attr(x[[1]], "label") <- var1_label + attr(x[[2]], "label") <- var2_label + x + }) + result <- result %>% purrr::map(tabyl_2way, var1, var2, show_na = show_na, show_missing_levels = show_missing_levels) %>% purrr::map(reset_1st_col_status, col1_class, col1_levels) # reset class of var in 1st col to its input class, #168 diff --git a/tests/testthat/test-tabyl.R b/tests/testthat/test-tabyl.R index fced8f28..4226e987 100644 --- a/tests/testthat/test-tabyl.R +++ b/tests/testthat/test-tabyl.R @@ -400,9 +400,10 @@ test_that("tabyl works with label attributes (#394)", { attr(mt_label$cyl, "label") <- "Number of cyl" tab <- tabyl(mt_label, cyl) expect_named(tab, c("Number of cyl", "n", "percent")) - tab2 <- tabyl(mt_label, cyl, am) expect_named(tab2, c("Number of cyl", "0", "1")) + tab3 <- tabyl(mt_label, cyl, am, vs) + expect_equal(names(tab3[[1]])[1], "Number of cyl") }) test_that("tabyl works with ordered 1st variable, #386", { From c77aa0a61965754a17f2e2cfcfb308b4499ca441 Mon Sep 17 00:00:00 2001 From: olivroy <52606734+olivroy@users.noreply.github.com> Date: Thu, 27 Jun 2024 13:45:29 -0400 Subject: [PATCH 07/10] Clarify explanation comments Co-authored-by: Sam Firke --- R/tabyl.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/tabyl.R b/R/tabyl.R index 3b19ba7d..fca82750 100644 --- a/R/tabyl.R +++ b/R/tabyl.R @@ -257,7 +257,7 @@ tabyl_3way <- function(dat, var1, var2, var3, show_na = TRUE, show_missing_level dat <- dplyr::select(dat, !!var1, !!var2, !!var3) var3_numeric <- is.numeric(dat[[3]]) - # Sometimes, attributes can be dropped with transformation. + # Preserve labels, as attributes are sometimes dropped during transformations. var1_label <- attr(dat[, 1], "label", exact = TRUE) var2_label <- attr(dat[, 2], "label", exact = TRUE) @@ -289,7 +289,7 @@ tabyl_3way <- function(dat, var1, var2, var3, show_na = TRUE, show_missing_level } result <- split(dat, dat[[rlang::quo_name(var3)]]) - # split() drops attributes, so we manually add back the 1st variable attribute. + # split() drops attributes, so we manually add back the label attributes. result <- lapply(result, function(x) { attr(x[[1]], "label") <- var1_label attr(x[[2]], "label") <- var2_label From f240ccf9f65e0268a49f61226fa1778e1d2348a8 Mon Sep 17 00:00:00 2001 From: olivroy <52606734+olivroy@users.noreply.github.com> Date: Thu, 27 Jun 2024 13:47:08 -0400 Subject: [PATCH 08/10] Update NEWS.md --- NEWS.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index a6042b5c..79a1061a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,10 +6,10 @@ These are all minor breaking changes resulting from enhancements and are not exp * When using `row_to_names()`, when all input values in `row_number` for a column are `NA`, `row_to_names()` creates a column name of `"NA"`, a character, rather than `NA`. If code previously used relied on a column name of `NA`, it will now error. To fix this, rely on a column name of `"NA"`. -## New features - * `tabyl()` now defaults to displaying the label attribute for the column name (@olivroy, #394). +## New features + * A new function `paste_skip_na()` pastes without including NA values (#537). * `row_to_names()` now accepts multiple rows as input, and merges them using a new `sep` argument (#536). The default is `sep = "_"`. When handling multiple `NA` values, `row_to_names()` ignores them and only merges non-NA values for column names. When all values are `NA`, `row_to_names()` creates a column name of `"NA"`, a character, rather than `NA`. From f224183b9c1cabec0b0e6065e400e509bb700d2c Mon Sep 17 00:00:00 2001 From: olivroy <52606734+olivroy@users.noreply.github.com> Date: Thu, 27 Jun 2024 13:52:27 -0400 Subject: [PATCH 09/10] Update NEWS.md --- NEWS.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 79a1061a..76e5e019 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,7 +6,8 @@ These are all minor breaking changes resulting from enhancements and are not exp * When using `row_to_names()`, when all input values in `row_number` for a column are `NA`, `row_to_names()` creates a column name of `"NA"`, a character, rather than `NA`. If code previously used relied on a column name of `NA`, it will now error. To fix this, rely on a column name of `"NA"`. -* `tabyl()` now defaults to displaying the label attribute for the column name (@olivroy, #394). +* `tabyl()` now defaults to displaying the label attribute for the column name (@olivroy, #394). To revert to previous behavior (ignore variable labels, you can use `haven::zap_labels()`, `labelled::remove_labels()` or similar, before piping in `tabyl()`. + ## New features From 88957ce61f1f3c944b793a9307312f13fd9bd33d Mon Sep 17 00:00:00 2001 From: Sam Firke Date: Thu, 27 Jun 2024 14:00:33 -0400 Subject: [PATCH 10/10] reword breaking change --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 76e5e019..c4356e67 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,7 +6,7 @@ These are all minor breaking changes resulting from enhancements and are not exp * When using `row_to_names()`, when all input values in `row_number` for a column are `NA`, `row_to_names()` creates a column name of `"NA"`, a character, rather than `NA`. If code previously used relied on a column name of `NA`, it will now error. To fix this, rely on a column name of `"NA"`. -* `tabyl()` now defaults to displaying the label attribute for the column name (@olivroy, #394). To revert to previous behavior (ignore variable labels, you can use `haven::zap_labels()`, `labelled::remove_labels()` or similar, before piping in `tabyl()`. +* When `tabyl()` is called on a data.frame containing labels, it now displays the label attribute as the name of the first column in the the resulting `tabyl` object (@olivroy, #394). This may break subsequent code that refers to the output of such a `tabyl` by column name. To maintain the previous behavior of ignoring variable labels, you can remove the labels with a function like `haven::zap_labels()` or `labelled::remove_labels()` before calling `tabyl()`. ## New features