diff --git a/NAMESPACE b/NAMESPACE index 27073345..37444fca 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -29,6 +29,7 @@ export(adorn_rounding) export(adorn_title) export(adorn_totals) export(as_tabyl) +export(assert_count_true) export(chisq.test) export(clean_names) export(compare_df_cols) diff --git a/NEWS.md b/NEWS.md index 5d715ce2..0526177f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -14,6 +14,8 @@ These are all minor breaking changes resulting from enhancements and are not exp * The new function `excel_time_to_numeric()` converts times from Excel that do not have accompanying dates into a number of seconds. (#245, thanks to **@billdenney** for the feature.) +* The new function `assert_count_true()` verifies that an expected number of values are `TRUE` for quality checks in data pipelines + ## Bug fixes * `adorn_totals("row")` now succeeds if the new `name` of the totals row is already a factor level of the input data.frame (#529, thanks @egozoglu for reporting). diff --git a/R/assert_count_true.R b/R/assert_count_true.R new file mode 100644 index 00000000..37779315 --- /dev/null +++ b/R/assert_count_true.R @@ -0,0 +1,40 @@ +#' Verify that a vector of values has the expected number of `TRUE` values +#' +#' @param x A logical vecotor without `NA` values +#' @param n The expected number of `TRUE` values +#' @returns `x` if `sum(x) == n` or an informative error message otherwise +#' @examples +#' data.frame(A = 1:5) %>% +#' dplyr::mutate( +#' big_values = assert_count_true(A > 2, n = 3) +#' ) +#' +#' my_data <- data.frame(name = c("Bill", "Sam"), birthdate = c("2024-05-22", "2024-05-22")) +#' my_data |> +#' dplyr::mutate( +#' birthdate = +#' dplyr::case_when( +#' assert_count_true(name == "Bill" & birthdate == "2024-05-22") ~ "2024-05-23", +#' TRUE ~ birthdate +#' ) +#' ) +#' @export +assert_count_true <- function(x, n = 1) { + stopifnot(is.logical(x)) + if (any(is.na(x))) { + stop(deparse(substitute(x)), " has NA values") + } + if (sum(x) != n) { + stop_message <- + sprintf( + "`%s` expected %g `TRUE` %s but %g %s found.", + deparse(substitute(x)), + n, + ngettext(n, "value", "values"), + sum(x), + ngettext(sum(x), "was", "were") + ) + stop(stop_message) + } + x +} diff --git a/_pkgdown.yml b/_pkgdown.yml index 965f4647..7d96cb04 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -5,11 +5,11 @@ template: reference: - title: Cleaning data - + - subtitle: Cleaning variable names contents: - contains("clean_names") - + - title: Exploring data desc: > tabyls are an enhanced version of tables. See `vignette("tabyls")` @@ -19,7 +19,7 @@ reference: - starts_with("adorn") - contains("tabyl") - -contains('.test') - + - subtitle: Change order contents: - row_to_names @@ -30,6 +30,7 @@ reference: Compare data frames columns contents: - starts_with("compare_df_cols") + - assert_count_true - title: Removing unnecessary columns / rows contents: @@ -38,9 +39,9 @@ reference: - get_one_to_one - top_levels - single_value - + - title: Rounding / dates helpers - desc: > + desc: > Help to mimic some behaviour from Excel or SAS. These should be used on vector. contents: diff --git a/man/assert_count_true.Rd b/man/assert_count_true.Rd new file mode 100644 index 00000000..c158760e --- /dev/null +++ b/man/assert_count_true.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/assert_count_true.R +\name{assert_count_true} +\alias{assert_count_true} +\title{Verify that a vector of values has the expected number of \code{TRUE} values} +\usage{ +assert_count_true(x, n = 1) +} +\arguments{ +\item{x}{A logical vecotor without \code{NA} values} + +\item{n}{The expected number of \code{TRUE} values} +} +\value{ +\code{x} if \code{sum(x) == n} or an informative error message otherwise +} +\description{ +Verify that a vector of values has the expected number of \code{TRUE} values +} +\examples{ +data.frame(A = 1:5) \%>\% + dplyr::mutate( + big_values = assert_count_true(A > 2, n = 3) + ) + +my_data <- data.frame(name = c("Bill", "Sam"), birthdate = c("2024-05-22", "2024-05-22")) +my_data |> + dplyr::mutate( + birthdate = + dplyr::case_when( + assert_count_true(name == "Bill" & birthdate == "2024-05-22") ~ "2024-05-23", + TRUE ~ birthdate + ) + ) +} diff --git a/tests/testthat/test-assert_count_true.R b/tests/testthat/test-assert_count_true.R new file mode 100644 index 00000000..7b477070 --- /dev/null +++ b/tests/testthat/test-assert_count_true.R @@ -0,0 +1,36 @@ +test_that("assert_count_true", { + expect_equal( + assert_count_true(TRUE, 1), + TRUE + ) + expect_equal( + assert_count_true(rep(TRUE, 3), 3), + rep(TRUE, 3) + ) + my_vector <- c(rep(TRUE, 3), FALSE) + expect_equal( + assert_count_true(my_vector, 3), + my_vector + ) + expect_error( + assert_count_true(NA), + regexp = "NA has NA values" + ) + # more informative errors + my_vector <- c(NA, TRUE) + expect_error( + assert_count_true(my_vector), + regexp = "my_vector has NA values" + ) + my_vector <- c(FALSE, TRUE) + expect_error( + assert_count_true(my_vector, n = 2), + regexp = "`my_vector` expected 2 `TRUE` values but 1 was found." + ) + # Check grammar of error message + my_vector <- c(TRUE, TRUE) + expect_error( + assert_count_true(my_vector, n = 1), + regexp = "`my_vector` expected 1 `TRUE` value but 2 were found." + ) +})