From ad52765371f5a15f0d1f918f70fd17c8d32f041e Mon Sep 17 00:00:00 2001 From: olivroy <52606734+olivroy@users.noreply.github.com> Date: Thu, 25 Jan 2024 11:28:50 -0500 Subject: [PATCH] Tweaks to docs + wrap long lines + add function index (#559) * Add link for deprecated functions * WS * cleanup: janitor requires dplyr 1.0.0 (no need for the old test. * Update tabyl's doc * `usethis::use_pipe()` + add example from janitor * Take the actual dplyr 1.0.0 version * Remove space. * Wrap long lines + tweak docs. * Revert unintended change + style * Update excel_time_to_numeric.R * Review links. * Inherit package doc from DESCRIPTION + use `@keywords internal` (as it is the norm in tidyverse packages * fix cran note * Address comments * oops * re-document with roxygen2 7.3.0 * Add pkgdown function index --- DESCRIPTION | 2 +- NEWS.md | 4 +- R/adorn_ns.R | 25 +++++++++---- R/adorn_percentages.R | 20 +++++++--- R/adorn_rounding.R | 31 +++++++++++---- R/adorn_title.R | 51 ++++++++++++++++++------- R/adorn_totals.R | 51 +++++++++++++++++-------- R/as_and_untabyl.R | 14 ++++--- R/compare_df_cols.R | 28 ++++++++------ R/convert_to_date.R | 27 +++++++++----- R/excel_dates.R | 4 +- R/excel_time_to_numeric.R | 4 +- R/janitor.R | 23 ++---------- R/janitor_deprecated.R | 46 ++++++++++++----------- R/make_clean_names.R | 4 +- R/row_to_names.R | 2 +- R/sas_dates.R | 2 +- R/statistical_tests.R | 3 +- R/tabyl.R | 34 +++++++++++------ R/top_levels.R | 15 +++++--- R/utils-pipe.R | 18 +++++++++ R/utils.R | 17 --------- _pkgdown.yml | 60 ++++++++++++++++++++++++++++++ man/add_totals_col.Rd | 2 +- man/add_totals_row.Rd | 2 +- man/adorn_crosstab.Rd | 2 +- man/adorn_ns.Rd | 25 +++++++++---- man/adorn_percentages.Rd | 20 +++++++--- man/adorn_rounding.Rd | 27 ++++++++++---- man/adorn_title.Rd | 31 ++++++++++++--- man/adorn_totals.Rd | 35 ++++++++++++----- man/as_tabyl.Rd | 11 +++--- man/chisq.test.Rd | 3 +- man/compare_df_cols.Rd | 7 ++-- man/compare_df_cols_same.Rd | 11 +++--- man/convert_to_NA.Rd | 6 +-- man/convert_to_date.Rd | 22 ++++------- man/crosstab.Rd | 2 +- man/describe_class.Rd | 6 +-- man/excel_numeric_to_date.Rd | 6 +-- man/excel_time_to_numeric.Rd | 6 +-- man/janitor-package.Rd | 24 +++--------- man/janitor_deprecated.Rd | 17 +++++---- man/make_clean_names.Rd | 4 +- man/pipe.Rd | 12 +++++- man/remove_empty_cols.Rd | 6 +-- man/remove_empty_rows.Rd | 2 +- man/row_to_names.Rd | 2 +- man/sas_numeric_to_date.Rd | 4 +- man/tabyl.Rd | 34 +++++++++++------ man/top_levels.Rd | 8 ++-- man/use_first_valid_of.Rd | 14 +++---- tests/testthat/test-adorn-totals.R | 6 +-- tests/testthat/test-tabyl.R | 38 ++++++------------- 54 files changed, 541 insertions(+), 339 deletions(-) create mode 100644 R/utils-pipe.R delete mode 100644 R/utils.R diff --git a/DESCRIPTION b/DESCRIPTION index a5de4660..7607c8ef 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -52,4 +52,4 @@ Config/testthat/edition: 3 Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.0 diff --git a/NEWS.md b/NEWS.md index 3e387eb8..5d715ce2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -22,11 +22,11 @@ These are all minor breaking changes resulting from enhancements and are not exp * `get_one_to_one()` no longer errors with near-equal values that become identical factor levels (fix #543, thanks to @olivroy for reporting) -# Refactoring +## Refactoring * Remove dplyr verbs superseded in dplyr 1.0.0 (#547, @olivroy) -* Restyle the package and vignettes according to the [tidyverse style guide](style.tidyverse.org) (#548, olivroy) +* Restyle the package and vignettes according to the [tidyverse style guide](https://style.tidyverse.org) (#548, olivroy) # janitor 2.2.0 (2023-02-02) diff --git a/R/adorn_ns.R b/R/adorn_ns.R index 89aba0bf..36ed4285 100644 --- a/R/adorn_ns.R +++ b/R/adorn_ns.R @@ -1,14 +1,25 @@ #' Add underlying Ns to a tabyl displaying percentages. #' -#' This function adds back the underlying Ns to a `tabyl` whose percentages were calculated using `adorn_percentages()`, to display the Ns and percentages together. You can also call it on a non-tabyl data.frame to which you wish to append Ns. +#' This function adds back the underlying Ns to a `tabyl` whose percentages were +#' calculated using [adorn_percentages()], to display the Ns and percentages together. +#' You can also call it on a non-tabyl data.frame to which you wish to append Ns. #' -#' @param dat a data.frame of class `tabyl` that has had `adorn_percentages` and/or `adorn_pct_formatting` called on it. If given a list of data.frames, this function will apply itself to each data.frame in the list (designed for 3-way `tabyl` lists). -#' @param position should the N go in the front, or in the rear, of the percentage? -#' @param ns the Ns to append. The default is the "core" attribute of the input tabyl `dat`, where the original Ns of a two-way `tabyl` are stored. However, if your Ns are stored somewhere else, or you need to customize them beyond what can be done with `format_func`, you can supply them here. -#' @param format_func a formatting function to run on the Ns. Consider defining with [base::format()]. -#' @param ... columns to adorn. This takes a tidyselect specification. By default, all columns are adorned except for the first column and columns not of class `numeric`, but this allows you to manually specify which columns should be adorned, for use on a data.frame that does not result from a call to `tabyl`. +#' @param dat A data.frame of class `tabyl` that has had `adorn_percentages` and/or +#' `adorn_pct_formatting` called on it. If given a list of data.frames, +#' this function will apply itself to each data.frame in the list (designed for 3-way `tabyl` lists). +#' @param position Should the N go in the front, or in the rear, of the percentage? +#' @param ns The Ns to append. The default is the "core" attribute of the input tabyl +#' `dat`, where the original Ns of a two-way `tabyl` are stored. However, if your Ns +#' are stored somewhere else, or you need to customize them beyond what can be done +#' with `format_func`, you can supply them here. +#' @param format_func A formatting function to run on the Ns. Consider defining +#' with [base::format()]. +#' @param ... Columns to adorn. This takes a tidyselect specification. By default, +#' all columns are adorned except for the first column and columns not of class +#' `numeric`, but this allows you to manually specify which columns should be adorned, +#' for use on a data.frame that does not result from a call to `tabyl`. #' -#' @return a data.frame with Ns appended +#' @return A `data.frame` with Ns appended #' @export #' @examples #' mtcars %>% diff --git a/R/adorn_percentages.R b/R/adorn_percentages.R index df3b19f1..6536325f 100644 --- a/R/adorn_percentages.R +++ b/R/adorn_percentages.R @@ -1,13 +1,21 @@ #' Convert a data.frame of counts to percentages. #' -#' This function defaults to excluding the first column of the input data.frame, assuming that it contains a descriptive variable, but this can be overridden by specifying the columns to adorn in the `...` argument. +#' This function defaults to excluding the first column of the input data.frame, +#' assuming that it contains a descriptive variable, but this can be overridden +#' by specifying the columns to adorn in the `...` argument. #' -#' @param dat a `tabyl` or other data.frame with a tabyl-like layout. If given a list of data.frames, this function will apply itself to each data.frame in the list (designed for 3-way `tabyl` lists). -#' @param denominator the direction to use for calculating percentages. One of "row", "col", or "all". -#' @param na.rm should missing values (including NaN) be omitted from the calculations? -#' @param ... columns to adorn. This takes a tidyselect specification. By default, all numeric columns (besides the initial column, if numeric) are adorned, but this allows you to manually specify which columns should be adorned, for use on a data.frame that does not result from a call to `tabyl`. +#' @param dat A `tabyl` or other data.frame with a tabyl-like layout. +#' If given a list of data.frames, this function will apply itself to each +#' `data.frame` in the list (designed for 3-way `tabyl` lists). +#' @param denominator The direction to use for calculating percentages. +#' One of "row", "col", or "all". +#' @param na.rm should missing values (including `NaN`) be omitted from the calculations? +#' @param ... columns to adorn. This takes a <[`tidy-select`][dplyr::dplyr_tidy_select]> +#' specification. By default, all numeric columns (besides the initial column, if numeric) +#' are adorned, but this allows you to manually specify which columns should +#' be adorned, for use on a `data.frame` that does not result from a call to [tabyl()]. #' -#' @return Returns a data.frame of percentages, expressed as numeric values between 0 and 1. +#' @return A `data.frame` of percentages, expressed as numeric values between 0 and 1. #' @export #' @examples #' diff --git a/R/adorn_rounding.R b/R/adorn_rounding.R index 3e3909bb..60059ed5 100644 --- a/R/adorn_rounding.R +++ b/R/adorn_rounding.R @@ -1,16 +1,29 @@ #' Round the numeric columns in a data.frame. #' #' @description -#' Can run on any data.frame with at least one numeric column. This function defaults to excluding the first column of the input data.frame, assuming that it contains a descriptive variable, but this can be overridden by specifying the columns to round in the `...` argument. +#' Can run on any `data.frame` with at least one numeric column. +#' This function defaults to excluding the first column of the input data.frame, +#' assuming that it contains a descriptive variable, but this can be overridden by +#' specifying the columns to round in the `...` argument. #' -#' If you're formatting percentages, e.g., the result of `adorn_percentages()`, use `adorn_pct_formatting()` instead. This is a more flexible variant for ad-hoc usage. Compared to `adorn_pct_formatting()`, it does not multiply by 100 or pad the numbers with spaces for alignment in the results data.frame. This function retains the class of numeric input columns. +#' If you're formatting percentages, e.g., the result of [adorn_percentages()], +#' use [adorn_pct_formatting()] instead. This is a more flexible variant for ad-hoc usage. +#' Compared to `adorn_pct_formatting()`, it does not multiply by 100 or pad the +#' numbers with spaces for alignment in the results `data.frame`. +#' This function retains the class of numeric input columns. #' -#' @param dat a `tabyl` or other data.frame with similar layout. If given a list of data.frames, this function will apply itself to each data.frame in the list (designed for 3-way `tabyl` lists). -#' @param digits how many digits should be displayed after the decimal point? -#' @param rounding method to use for rounding - either "half to even", the base R default method, or "half up", where 14.5 rounds up to 15. -#' @param ... columns to adorn. This takes a tidyselect specification. By default, all numeric columns (besides the initial column, if numeric) are adorned, but this allows you to manually specify which columns should be adorned, for use on a data.frame that does not result from a call to `tabyl`. +#' @param dat A `tabyl` or other `data.frame` with similar layout. +#' If given a list of data.frames, this function will apply itself to each +#' `data.frame` in the list (designed for 3-way `tabyl` lists). +#' @param digits How many digits should be displayed after the decimal point? +#' @param rounding Method to use for rounding - either "half to even" +#' (the base R default method), or "half up", where 14.5 rounds up to 15. +#' @param ... Columns to adorn. This takes a tidyselect specification. +#' By default, all numeric columns (besides the initial column, if numeric) +#' are adorned, but this allows you to manually specify which columns should +#' be adorned, for use on a data.frame that does not result from a call to `tabyl`. #' -#' @return Returns the data.frame with rounded numeric columns. +#' @return The `data.frame` with rounded numeric columns. #' @export #' @examples #' @@ -54,7 +67,9 @@ adorn_rounding <- function(dat, digits = 1, rounding = "half to even", ...) { } numeric_cols <- which(vapply(dat, is.numeric, logical(1))) non_numeric_cols <- setdiff(1:ncol(dat), numeric_cols) - numeric_cols <- setdiff(numeric_cols, 1) # assume 1st column should not be included so remove it from numeric_cols. Moved up to this line so that if only 1st col is numeric, the function errors + # assume 1st column should not be included so remove it from numeric_cols. + # Moved up to this line so that if only 1st col is numeric, the function errors + numeric_cols <- setdiff(numeric_cols, 1) if (rlang::dots_n(...) == 0) { cols_to_round <- numeric_cols diff --git a/R/adorn_title.R b/R/adorn_title.R index 33ebd6d2..fa5a5e8a 100644 --- a/R/adorn_title.R +++ b/R/adorn_title.R @@ -1,13 +1,30 @@ -#' @title Add column name to the top of a two-way tabyl. +#' Add column name to the top of a two-way tabyl. #' -#' @description -#' This function adds the column variable name to the top of a `tabyl` for a complete display of information. This makes the tabyl prettier, but renders the data.frame less useful for further manipulation. +#' This function adds the column variable name to the top of a `tabyl` for a +#' complete display of information. This makes the tabyl prettier, but renders +#' the `data.frame` less useful for further manipulation. #' -#' @param dat a data.frame of class `tabyl` or other data.frame with a tabyl-like layout. If given a list of data.frames, this function will apply itself to each data.frame in the list (designed for 3-way `tabyl` lists). -#' @param placement whether the column name should be added to the top of the tabyl in an otherwise-empty row `"top"` or appended to the already-present row name variable (`"combined"`). The formatting in the `"top"` option has the look of base R's `table()`; it also wipes out the other column names, making it hard to further use the data.frame besides formatting it for reporting. The `"combined"` option is more conservative in this regard. -#' @param row_name (optional) default behavior is to pull the row name from the attributes of the input `tabyl` object. If you wish to override that text, or if your input is not a `tabyl`, supply a string here. -#' @param col_name (optional) default behavior is to pull the column_name from the attributes of the input `tabyl` object. If you wish to override that text, or if your input is not a `tabyl`, supply a string here. -#' @return the input tabyl, augmented with the column title. Non-tabyl inputs that are of class `tbl_df` are downgraded to basic data.frames so that the title row prints correctly. +#' The `placement` argument indicates whether the column name should be added to +#' the `top` of the tabyl in an otherwise-empty row `"top"` or appended to the +#' already-present row name variable (`"combined"`). The formatting in the `"top"` +#' option has the look of base R's `table()`; it also wipes out the other column +#' names, making it hard to further use the `data.frame` besides formatting it for reporting. +#' The `"combined"` option is more conservative in this regard. +#' +#' @param dat A `data.frame` of class `tabyl` or other `data.frame` with a tabyl-like layout. +#' If given a list of data.frames, this function will apply itself to each `data.frame` +#' in the list (designed for 3-way `tabyl` lists). +#' @param placement The title placement, one of `"top"`, or `"combined"`. +#' See **Details** for more information. +#' @param row_name (optional) default behavior is to pull the row name from the +#' attributes of the input `tabyl` object. If you wish to override that text, +#' or if your input is not a `tabyl`, supply a string here. +#' @param col_name (optional) default behavior is to pull the column_name from +#' the attributes of the input `tabyl` object. If you wish to override that text, +#' or if your input is not a `tabyl`, supply a string here. +#' @return The input `tabyl`, augmented with the column title. Non-tabyl inputs +#' that are of class `tbl_df` are downgraded to basic data.frames so that the +#' title row prints correctly. #' #' @export #' @examples @@ -38,12 +55,14 @@ adorn_title <- function(dat, placement = "top", row_name, col_name) { if (inherits(dat, "tabyl")) { if (attr(dat, "tabyl_type") == "one_way") { - warning("adorn_title is meant for two-way tabyls, calling it on a one-way tabyl may not yield a meaningful result") + warning( + "adorn_title is meant for two-way tabyls, calling it on a one-way tabyl may not yield a meaningful result" + ) } } if (missing(col_name)) { if (!inherits(dat, "tabyl")) { - stop("When input is not a data.frame of class tabyl, a value must be specified for the col_name argument") + stop("When input is not a data.frame of class tabyl, a value must be specified for the col_name argument.") } col_var <- attr(dat, "var_names")$col } else { @@ -63,13 +82,15 @@ adorn_title <- function(dat, placement = "top", row_name, col_name) { if (inherits(dat, "tabyl")) { row_var <- attr(dat, "var_names")$row } else { - row_var <- names(dat)[1] # for non-tabyl input, if no row_name supplied, use first existing name + # for non-tabyl input, if no row_name supplied, use first existing name + row_var <- names(dat)[1] } } if (placement == "top") { - dat[, ] <- lapply(dat[, ], as.character) # to handle factors, problematic in first column and at bind_rows. + # to handle factors, problematic in first column and at bind_rows. + dat[, ] <- lapply(dat[, ], as.character) # Can't use mutate_all b/c it strips attributes top <- dat[1, ] @@ -82,8 +103,10 @@ adorn_title <- function(dat, placement = "top", row_name, col_name) { out <- dat names(out)[1] <- paste(row_var, col_var, sep = "/") } - if (inherits(out, "tbl_df")) { # "top" text doesn't print if input (and thus the output) is a tibble - out <- as.data.frame(out) # but this prints row numbers, so don't apply to non-tbl_dfs like tabyls + # "top" text doesn't print if input (and thus the output) is a tibble + if (inherits(out, "tbl_df")) { + # but this prints row numbers, so don't apply to non-tbl_dfs like tabyls + out <- as.data.frame(out) } out } diff --git a/R/adorn_totals.R b/R/adorn_totals.R index 8da16c58..b4db52f3 100644 --- a/R/adorn_totals.R +++ b/R/adorn_totals.R @@ -1,15 +1,31 @@ -#' @title Append a totals row and/or column to a data.frame. +#' Append a totals row and/or column to a data.frame #' -#' @description -#' This function defaults to excluding the first column of the input data.frame, assuming that it contains a descriptive variable, but this can be overridden by specifying the columns to be totaled in the `...` argument. Non-numeric columns are converted to character class and have a user-specified fill character inserted in the totals row. +#' This function defaults to excluding the first column of the input data.frame, +#' assuming that it contains a descriptive variable, but this can be overridden +#' by specifying the columns to be totaled in the `...` argument. Non-numeric +#' columns are converted to character class and have a user-specified fill character +#' inserted in the totals row. #' -#' @param dat an input data.frame with at least one numeric column. If given a list of data.frames, this function will apply itself to each data.frame in the list (designed for 3-way `tabyl` lists). -#' @param where one of "row", "col", or `c("row", "col")` -#' @param fill if there are non-numeric columns, what should fill the bottom row of those columns? If a string, relevant columns will be coerced to character. If `NA` then column types are preserved. -#' @param na.rm should missing values (including NaN) be omitted from the calculations? -#' @param name name of the totals row and/or column. If both are created, and `name` is a single string, that name is applied to both. If both are created and `name` is a vector of length 2, the first element of the vector will be used as the row name (in column 1), and the second element will be used as the totals column name. Defaults to "Total". -#' @param ... columns to total. This takes a tidyselect specification. By default, all numeric columns (besides the initial column, if numeric) are included in the totals, but this allows you to manually specify which columns should be included, for use on a data.frame that does not result from a call to `tabyl`. -#' @return a data.frame augmented with a totals row, column, or both. The data.frame is now also of class `tabyl` and stores information about the attached totals and underlying data in the tabyl attributes. +#' @param dat An input `data.frame` with at least one numeric column. If given a +#' list of data.frames, this function will apply itself to each `data.frame` +#' in the list (designed for 3-way `tabyl` lists). +#' @param where One of "row", "col", or `c("row", "col")` +#' @param fill If there are non-numeric columns, what should fill the bottom row +#' of those columns? If a string, relevant columns will be coerced to character. +#' If `NA` then column types are preserved. +#' @param na.rm Should missing values (including `NaN`) be omitted from the calculations? +#' @param name Name of the totals row and/or column. If both are created, and +#' `name` is a single string, that name is applied to both. If both are created +#' and `name` is a vector of length 2, the first element of the vector will be +#' used as the row name (in column 1), and the second element will be used as the +#' totals column name. Defaults to "Total". +#' @param ... Columns to total. This takes a tidyselect specification. By default, +#' all numeric columns (besides the initial column, if numeric) are included in +#' the totals, but this allows you to manually specify which columns should be +#' included, for use on a data.frame that does not result from a call to `tabyl`. +#' @return A `data.frame` augmented with a totals row, column, or both. +#' The `data.frame` is now also of class `tabyl` and stores information about +#' the attached totals and underlying data in the tabyl attributes. #' @export #' @examples #' mtcars %>% @@ -31,7 +47,8 @@ adorn_totals <- function(dat, where = "row", fill = "-", na.rm = TRUE, name = "T non_numeric_cols <- setdiff(1:ncol(dat), numeric_cols) if (rlang::dots_n(...) == 0) { - numeric_cols <- setdiff(numeric_cols, 1) # by default 1st column is not totaled so remove it from numeric_cols and add to non_numeric_cols + # by default 1st column is not totaled so remove it from numeric_cols and add to non_numeric_cols + numeric_cols <- setdiff(numeric_cols, 1) non_numeric_cols <- unique(c(1, non_numeric_cols)) cols_to_total <- numeric_cols } else { @@ -43,7 +60,7 @@ adorn_totals <- function(dat, where = "row", fill = "-", na.rm = TRUE, name = "T } if (length(cols_to_total) == 0) { - stop("at least one targeted column must be of class numeric. Control target variables with the ... argument. adorn_totals should be called before other adorn_ functions.") + stop("at least one targeted column must be of class numeric. Control target variables with the ... argument. adorn_totals should be called before other adorn_ functions.") } if (sum(where %in% c("row", "col")) != length(where)) { @@ -63,7 +80,8 @@ adorn_totals <- function(dat, where = "row", fill = "-", na.rm = TRUE, name = "T # set totals attribute if (sum(where %in% attr(dat, "totals")) > 0) { # if either of the values of "where" are already in totals attribute stop("trying to re-add a totals dimension that is already been added") - } else if (length(attr(dat, "totals")) == 1) { # if totals row OR col has already been adorned, append new axis to the current attribute + } else if (length(attr(dat, "totals")) == 1) { + # if totals row OR col has already been adorned, append new axis to the current attribute attr(dat, "totals") <- c(attr(dat, "totals"), where) } else { attr(dat, "totals") <- where @@ -77,7 +95,9 @@ adorn_totals <- function(dat, where = "row", fill = "-", na.rm = TRUE, name = "T } # creates the totals row to be appended col_sum <- function(a_col, na_rm = na.rm) { - if (is.numeric(a_col)) { # can't do this with if_else because it doesn't like the sum() of a character vector, even if that clause is not reached + # can't do this with if_else because it doesn't like the sum() of a character vector, + # even if that clause is not reached + if (is.numeric(a_col)) { sum(a_col, na.rm = na_rm) } else { if (!is.character(fill)) { # if fill isn't a character string, use NA consistent with data types @@ -119,7 +139,8 @@ adorn_totals <- function(dat, where = "row", fill = "-", na.rm = TRUE, name = "T } }) - if (!is.character(dat[[1]]) && !1 %in% cols_to_total) { # convert first col to character so that name can be appended + if (!is.character(dat[[1]]) && !1 %in% cols_to_total) { + # convert first col to character so that name can be appended dat[[1]] <- as.character(dat[[1]]) col_totals[[1]] <- as.character(col_totals[[1]]) } diff --git a/R/as_and_untabyl.R b/R/as_and_untabyl.R index d42f486d..235b3758 100644 --- a/R/as_and_untabyl.R +++ b/R/as_and_untabyl.R @@ -1,7 +1,7 @@ -#' Add `tabyl` attributes to a data.frame. +#' Add `tabyl` attributes to a data.frame #' #' @description -#' A `tabyl` is a data.frame containing counts of a variable or +#' A `tabyl` is a `data.frame` containing counts of a variable or #' co-occurrences of two variables (a.k.a., a contingency table or crosstab). #' This specialized kind of data.frame has attributes that enable `adorn_` #' functions to be called for precise formatting and presentation of results. @@ -15,12 +15,13 @@ #' variable 1 2) Column names 2:n are the values of variable 2 3) Numeric values #' in columns 2:n are counts of the co-occurrences of the two variables.* #' -#' * = this is the ideal form of a tabyl, but janitor's `adorn_` functions tolerate and ignore non-numeric columns in positions 2:n. +#' * = this is the ideal form of a `tabyl`, but janitor's `adorn_` functions tolerate +#' and ignore non-numeric columns in positions 2:n. #' -#' For instance, the result of [dplyr::count()] followed by [tidyr::spread()] +#' For instance, the result of [dplyr::count()] followed by [tidyr::pivot_wider()] #' can be treated as a `tabyl`. #' -#' The result of calling `tabyl()` on a single variable is a special class of +#' The result of calling [tabyl()] on a single variable is a special class of #' one-way tabyl; this function only pertains to the two-way tabyl. #' #' @param dat a data.frame with variable values in the first column and numeric @@ -53,7 +54,8 @@ as_tabyl <- function(dat, axes = 2, row_var_name = NULL, col_var_name = NULL) { # assign core attribute and classes if (inherits(dat, "tabyl")) { - # if already a tabyl, may have totals row. Safest play is to simply reorder the core rows to match the dat rows + # if already a tabyl, may have totals row. + # Safest play is to simply reorder the core rows to match the dat rows attr(dat, "core") <- attr(dat, "core")[order(match( attr(dat, "core")[, 1], dat[, 1] diff --git a/R/compare_df_cols.R b/R/compare_df_cols.R index 6456a62a..5d7c80fc 100644 --- a/R/compare_df_cols.R +++ b/R/compare_df_cols.R @@ -1,3 +1,5 @@ +#' Compare data frames columns before merging +#' #' Generate a comparison of data.frames (or similar objects) that indicates if #' they will successfully bind together by rows. #' @@ -39,7 +41,7 @@ #' compare_df_cols(dfA = data.frame(A = 1), dfB = data.frame(B = 2)) #' # a combination of list and data.frame input #' compare_df_cols(listA = list(dfA = data.frame(A = 1), dfB = data.frame(B = 2)), data.frame(A = 3)) -#' @family Data frame type comparison +#' @family data frame type comparison #' @export compare_df_cols <- function(..., return = c("all", "match", "mismatch"), bind_method = c("bind_rows", "rbind"), strict_description = FALSE) { # Input checking @@ -172,6 +174,7 @@ compare_df_cols_df_maker <- function(x, class_colname = "class", strict_descript UseMethod("compare_df_cols_df_maker") } +#' @exportS3Method NULL compare_df_cols_df_maker.data.frame <- function(x, class_colname = "class", strict_description) { if (class_colname == "column_name") { stop('`class_colname` cannot be "column_name"') @@ -191,6 +194,7 @@ compare_df_cols_df_maker.data.frame <- function(x, class_colname = "class", stri ret } +#' @exportS3Method NULL compare_df_cols_df_maker.list <- function(x, class_colname = "class", strict_description = strict_description) { if (length(class_colname) != length(x)) { stop("`x` and `class_colname` must be the same length.") @@ -215,13 +219,13 @@ compare_df_cols_df_maker.list <- function(x, class_colname = "class", strict_des #' Do the the data.frames have the same columns & types? #' -#' @description Check whether a set of data.frames are row-bindable. Calls -#' `compare_df_cols()`and returns TRUE if there are no mis-matching rows. ` +#' Check whether a set of data.frames are row-bindable. Calls `compare_df_cols()` +#' and returns `TRUE` if there are no mis-matching rows. +#' #' @inheritParams compare_df_cols #' @param verbose Print the mismatching columns if binding will fail. -#' @return `TRUE` if row binding will succeed or `FALSE` if it will -#' fail. -#' @family Data frame type comparison +#' @return `TRUE` if row binding will succeed or `FALSE` if it will fail. +#' @family data frame type comparison #' @examples #' compare_df_cols_same(data.frame(A = 1), data.frame(A = 2)) #' compare_df_cols_same(data.frame(A = 1), data.frame(B = 2)) @@ -241,18 +245,18 @@ compare_df_cols_same <- function(..., bind_method = c("bind_rows", "rbind"), ver #' #' @details For package developers, an S3 generic method can be written for #' `describe_class()` for custom classes that may need more definition -#' than the default method. This function is called by `compare_df_cols`. +#' than the default method. This function is called by [compare_df_cols()]. #' #' @param x The object to describe #' @param strict_description Should differing factor levels be treated -#' as differences for the purposes of identifying mismatches? -#' `strict_description = TRUE` is stricter and factors with different -#' levels will be treated as different classes. `FALSE` is more -#' lenient: for class comparison purposes, the variable is just a "factor". +#' as differences for the purposes of identifying mismatches? +#' `strict_description = TRUE` is stricter and factors with different +#' levels will be treated as different classes. `FALSE` is more +#' lenient: for class comparison purposes, the variable is just a "factor". #' @return A character scalar describing the class(es) of an object where if the #' scalar will match, columns in a data.frame (or similar object) should bind #' together without issue. -#' @family Data frame type comparison +#' @family data frame type comparison #' @examples #' describe_class(1) #' describe_class(factor("A")) diff --git a/R/convert_to_date.R b/R/convert_to_date.R index e124b446..b239d29e 100644 --- a/R/convert_to_date.R +++ b/R/convert_to_date.R @@ -1,18 +1,20 @@ -#' Convert many date and datetime formats as may be received from Microsoft -#' Excel +#' Parse dates from many formats #' -#' @details Character conversion checks if it matches something that looks like -#' a Microsoft Excel numeric date, converts those to numeric, and then runs -#' convert_to_datetime_helper() on those numbers. Then, character to Date or -#' POSIXct conversion occurs via `character_fun(x, ...)` or -#' `character_fun(x, tz=tz, ...)`, respectively. +#' Convert many date and date-time (POSIXct) formats as may be received +#' from Microsoft Excel. +#' @details +#' Character conversion checks if it matches something that looks like a +#' Microsoft Excel numeric date, converts those to numeric, and then runs +#' convert_to_datetime_helper() on those numbers. Then, character to Date or +#' POSIXct conversion occurs via `character_fun(x, ...)` or +#' `character_fun(x, tz=tz, ...)`, respectively. #' #' @param x The object to convert #' @param tz The timezone for POSIXct output, unless an object is POSIXt #' already. Ignored for Date output. #' @param ... Passed to further methods. Eventually may be passed to #' `excel_numeric_to_date()`, `base::as.POSIXct()`, or `base::as.Date()`. -#' @param character_fun A function to convert non-numeric-looking, non-NA values +#' @param character_fun A function to convert non-numeric-looking, non-`NA` values #' in `x` to POSIXct objects. #' @param string_conversion_failure If a character value fails to parse into the #' desired class and instead returns `NA`, should the function return the @@ -26,7 +28,7 @@ #' # Mixed date source data can be provided. #' convert_to_date(c("2020-02-29", "40000.1")) #' @export -#' @family Date-time cleaning +#' @family date-time cleaning #' @importFrom lubridate ymd convert_to_date <- function(x, ..., character_fun = lubridate::ymd, string_conversion_failure = c("error", "warning")) { string_conversion_failure <- match.arg(string_conversion_failure) @@ -38,7 +40,7 @@ convert_to_date <- function(x, ..., character_fun = lubridate::ymd, string_conve ) } -#' @describeIn convert_to_date Convert to a date-time (POSIXct) +#' @name convert_to_date #' @examples #' convert_to_datetime( #' c("2009-07-06", "40000.1", "40000", NA), @@ -66,6 +68,7 @@ convert_to_datetime_helper <- function(x, ..., out_class = c("POSIXct", "Date")) UseMethod("convert_to_datetime_helper") } +#' @exportS3Method NULL convert_to_datetime_helper.numeric <- function(x, ..., date_system = "modern", include_time = NULL, @@ -85,10 +88,12 @@ convert_to_datetime_helper.numeric <- function(x, ..., ) } +#' @exportS3Method NULL convert_to_datetime_helper.factor <- function(x, ..., out_class = c("POSIXct", "Date")) { convert_to_datetime_helper.character(as.character(x), ..., out_class = out_class) } +#' @exportS3Method NULL convert_to_datetime_helper.POSIXt <- function(x, ..., out_class = c("POSIXct", "Date")) { out_class <- match.arg(out_class) if (out_class %in% "POSIXct") { @@ -99,6 +104,7 @@ convert_to_datetime_helper.POSIXt <- function(x, ..., out_class = c("POSIXct", " } } +#' @exportS3Method NULL convert_to_datetime_helper.Date <- function(x, ..., tz = "UTC", out_class = c("POSIXct", "Date")) { out_class <- match.arg(out_class) if (out_class %in% "POSIXct") { @@ -111,6 +117,7 @@ convert_to_datetime_helper.Date <- function(x, ..., tz = "UTC", out_class = c("P ret } +#' @exportS3Method NULL convert_to_datetime_helper.character <- function(x, ..., tz = "UTC", character_fun = lubridate::ymd_hms, string_conversion_failure = c("error", "warning"), out_class = c("POSIXct", "Date")) { string_conversion_failure <- match.arg(string_conversion_failure) out_class <- match.arg(out_class) diff --git a/R/excel_dates.R b/R/excel_dates.R index a505bf10..7dd17492 100644 --- a/R/excel_dates.R +++ b/R/excel_dates.R @@ -33,7 +33,7 @@ #' https://support.microsoft.com/en-us/help/2722715/support-for-the-leap-second). #' #' @export -#' @seealso \code{\link{excel_time_to_numeric}} +#' @seealso [excel_time_to_numeric()] #' @examples #' excel_numeric_to_date(40000) #' excel_numeric_to_date(40000.5) # No time is included @@ -43,7 +43,7 @@ #' include_time = TRUE, #' round_seconds = FALSE #' ) # Time with fractional seconds is included -#' @family Date-time cleaning +#' @family date-time cleaning #' @importFrom lubridate as_date as_datetime force_tz hour minute second excel_numeric_to_date <- function(date_num, date_system = "modern", include_time = FALSE, round_seconds = TRUE, tz = Sys.timezone()) { if (all(is.na(date_num))) { diff --git a/R/excel_time_to_numeric.R b/R/excel_time_to_numeric.R index d6366bde..8517a31c 100644 --- a/R/excel_time_to_numeric.R +++ b/R/excel_time_to_numeric.R @@ -19,8 +19,8 @@ #' @param round_seconds Should the output number of seconds be rounded to an #' integer? #' @return A vector of numbers >= 0 and <86400 -#' @family Date-time cleaning -#' @seealso `\link{excel_numeric_to_date}` +#' @family date-time cleaning +#' @seealso [excel_numeric_to_date()] #' @export excel_time_to_numeric <- function(time_value, round_seconds = TRUE) { UseMethod("excel_time_to_numeric") diff --git a/R/janitor.R b/R/janitor.R index eeccc8bc..f8b7e0aa 100644 --- a/R/janitor.R +++ b/R/janitor.R @@ -1,24 +1,7 @@ -#' janitor -#' -#' janitor has simple little tools for examining and cleaning dirty data. -#' -#' @section Main functions: -#' The main janitor functions can: perfectly format data.frame -#' column names; provide quick counts of variable combinations (i.e., -#' frequency tables and crosstabs); and explore duplicate records. Other -#' janitor functions nicely format the tabulation results. These -#' tabulate-and-report functions approximate popular features of SPSS and -#' Microsoft Excel. -#' #' @section Package context: -#' This package follows the principles of the "tidyverse" and works -#' well with the pipe function `\%>\%`. -#' -#' janitor was built with beginning-to-intermediate R users in mind -#' and is optimized for user-friendliness. Advanced users can do most -#' things covered here, but they can do it faster with janitor and save -#' their thinking for more fun tasks. -#' +#' Advanced users can do most things covered here, but they can do it +#' faster with janitor and save their thinking for more fun tasks. +#' @keywords internal "_PACKAGE" ## quiets concerns of R CMD check re: the .'s that appear in pipelines ## and the "n" that is produced by dplyr::count() in a pipeline diff --git a/R/janitor_deprecated.R b/R/janitor_deprecated.R index bdd78b6b..946a2924 100644 --- a/R/janitor_deprecated.R +++ b/R/janitor_deprecated.R @@ -2,16 +2,17 @@ #' #' These functions have already become defunct or may be defunct as soon as the next release. #' -#' * [adorn_crosstab()] -#' * [crosstab()] -#' * [use_first_valid_of()] -#' * [convert_to_NA()] -#' * [add_totals_col()] -#' * [add_totals_row()] -#' * [remove_empty_rows()] -#' * [remove_empty_cols()] +#' * [adorn_crosstab()] -> `adorn_` +#' * [crosstab()] -> [tabyl()] +#' * [use_first_valid_of()] -> [dplyr::coalesce()] +#' * [convert_to_NA()] -> [dplyr::na_if()] +#' * [add_totals_col()] -> [`adorn_totals(where = "col")`][adorn_totals()] +#' * [add_totals_row()] -> [adorn_totals()] +#' * [remove_empty_rows()] -> [`remove_empty("rows")`][remove_empty()] +#' * [remove_empty_cols()] -> [`remove_empty("cols")`][remove_empty()] #' #' @name janitor_deprecated +#' @keywords internal # EXCLUDE COVERAGE START NULL @@ -22,7 +23,7 @@ NULL #' @param ... arguments #' @keywords internal #' @description -#' This function is deprecated, use `tabyl(dat, var1, var2)` instead. +#' This function is deprecated, use [`tabyl(dat, var1, var2)`][tabyl()] instead. #' @export crosstab <- function(...) { @@ -36,7 +37,7 @@ crosstab <- function(...) { #' @title Add presentation formatting to a crosstabulation table. #' @description -#' This function is deprecated, use the `adorn_` family of functions instead. +#' This function is deprecated, use [tabyl()] with the `adorn_` family of functions instead. #' @param dat a data.frame with row names in the first column and numeric values in all other columns. Usually the piped-in result of a call to `crosstab` that included the argument `percent = "none"`. #' @param denom the denominator to use for calculating percentages. One of "row", "col", or "all". #' @param show_n should counts be displayed alongside the percentages? @@ -59,7 +60,7 @@ adorn_crosstab <- function(dat, denom = "row", show_n = TRUE, digits = 1, show_t #' @title Append a totals row to a data.frame. #' #' @description -#' This function is deprecated, use `adorn_totals` instead. +#' This function is deprecated, use [adorn_totals()] instead. #' #' @param dat an input data.frame with at least one numeric column. #' @param fill if there are more than one non-numeric columns, what string should fill the bottom row of those columns? @@ -79,7 +80,7 @@ add_totals_row <- function(dat, fill = "-", na.rm = TRUE) { #' @title Append a totals column to a data.frame. #' #' @description -#' This function is deprecated, use `adorn_totals` instead. +#' This function is deprecated, use [`adorn_totals(where = "col")`][adorn_totals()] instead. #' #' @param dat an input data.frame with at least one numeric column. #' @param na.rm should missing values (including NaN) be omitted from the calculations? @@ -97,14 +98,17 @@ add_totals_col <- function(dat, na.rm = TRUE) { } -#' @title Returns first non-NA value from a set of vectors. +#' @title Returns first non-`NA` value from a set of vectors. #' #' @description -#' At each position of the input vectors, iterates through in order and returns the first non-NA value. This is a robust replacement of the common `ifelse(!is.na(x), x, ifelse(!is.na(y), y, z))`. It's more readable and handles problems like `ifelse`'s inability to work with dates in this way. +#' Warning: Deprecated, do not use in new code. Use [dplyr::coalesce()] instead. +#' +#' At each position of the input vectors, iterates through in order and returns the first non-NA value. +#' This is a robust replacement of the common `ifelse(!is.na(x), x, ifelse(!is.na(y), y, z))`. +#' It's more readable and handles problems like [ifelse()]'s inability to work with dates in this way. #' -##' @section Warning: Deprecated, do not use in new code. Use `dplyr::coalesce()` instead. #' @param ... the input vectors. Order matters: these are searched and prioritized in the order they are supplied. -#' @param if_all_NA what value should be used when all of the vectors return `NA` for a certain index? Default is NA. +#' @param if_all_NA what value should be used when all of the vectors return `NA` for a certain index? Default is `NA`. #' @return Returns a single vector with the selected values. #' @seealso janitor_deprecated #' @export @@ -120,9 +124,10 @@ use_first_valid_of <- function(..., if_all_NA = NA) { #' @title Convert string values to true `NA` values. #' #' @description +#' Warning: Deprecated, do not use in new code. Use [dplyr::na_if()] instead. +#' #' Converts instances of user-specified strings into `NA`. Can operate on either a single vector or an entire data.frame. #' -#' @section Warning: Deprecated, do not use in new code. Use `dplyr::na_if()` instead. #' @param dat vector or data.frame to operate on. #' @param strings character vector of strings to convert. #' @return Returns a cleaned object. Can be a vector, data.frame, or `tibble::tbl_df` depending on the provided input. @@ -144,7 +149,7 @@ convert_to_NA <- function(dat, strings) { #' @title Removes empty rows from a data.frame. #' #' @description -#' This function is deprecated, use `remove_empty("rows")` instead. +#' This function is deprecated, use [`remove_empty("rows")`][remove_empty()] instead. #' #' @param dat the input data.frame. #' @return Returns the data.frame with no empty rows. @@ -165,13 +170,10 @@ remove_empty_rows <- function(dat) { #' @title Removes empty columns from a data.frame. #' #' @description -#' This function is deprecated, use `remove_empty("cols")` instead. +#' This function is deprecated, use [`remove_empty("cols")`][remove_empty()] instead. #' #' @param dat the input data.frame. #' @return Returns the data.frame with no empty columns. -#' @examples -#' # not run: -#' # dat %>% remove_empty_cols #' @export #' @keywords internal diff --git a/R/make_clean_names.R b/R/make_clean_names.R index 284f0dcf..472c3b0e 100644 --- a/R/make_clean_names.R +++ b/R/make_clean_names.R @@ -15,10 +15,10 @@ #' #' The order of operations is: make replacements, (optional) ASCII conversion, #' remove initial spaces and punctuation, apply `base::make.names()`, -#' apply `snakecase::to_any_case`, and add numeric suffixes +#' apply `snakecase::to_any_case(()`, and add numeric suffixes #' to resolve any duplicated names. #' -#' This function relies on `snakecase::to_any_case` and can take advantage of +#' This function relies on `snakecase::to_any_case()` and can take advantage of #' its versatility. For instance, an abbreviation like "ID" can have its #' capitalization preserved by passing the argument `abbreviations = "ID"`. #' See the documentation for [snakecase::to_any_case()] diff --git a/R/row_to_names.R b/R/row_to_names.R index f24b180f..0d9c4d0f 100644 --- a/R/row_to_names.R +++ b/R/row_to_names.R @@ -4,7 +4,7 @@ #' @param row_number The row(s) of `dat` containing the variable names or the #' string `"find_header"` to use `find_header(dat=dat, ...)` to find #' the row_number. Allows for multiple rows input as a numeric vector. NA's are -#' ignored, and if a column contains only NA value it will be named `"NA"`. +#' ignored, and if a column contains only `NA` value it will be named `"NA"`. #' @param ... Sent to `find_header()`, if #' `row_number = "find_header"`. Otherwise, ignored. #' @param remove_row Should the row `row_number` be removed from the diff --git a/R/sas_dates.R b/R/sas_dates.R index b0f4e01e..da4c6370 100644 --- a/R/sas_dates.R +++ b/R/sas_dates.R @@ -14,7 +14,7 @@ #' sas_numeric_to_date(datetime_num = 1217083532, tz = "UTC") # 1998-07-26T14:45:32Z #' sas_numeric_to_date(date_num = 15639, time_num = 3600, tz = "UTC") # 2002-10-26T01:00:00Z #' sas_numeric_to_date(time_num = 3600) # 01:00:00 -#' @family Date-time cleaning +#' @family date-time cleaning #' @export sas_numeric_to_date <- function(date_num, datetime_num, time_num, tz = "") { # Confirm that a usable set of input arguments is given diff --git a/R/statistical_tests.R b/R/statistical_tests.R index 239879d9..b0fe5b51 100644 --- a/R/statistical_tests.R +++ b/R/statistical_tests.R @@ -55,7 +55,8 @@ chisq.test.default <- function(x, y = NULL, ...) { #' @rdname chisq.test #' @method chisq.test tabyl -#' @param tabyl_results if TRUE and x is a tabyl object, also return `observed`, `expected`, `residuals` and `stdres` as tabyl +#' @param tabyl_results If `TRUE` and `x` is a tabyl object, +#' also return `observed`, `expected`, `residuals` and `stdres` as tabyl. #' @export chisq.test.tabyl <- function(x, tabyl_results = TRUE, ...) { diff --git a/R/tabyl.R b/R/tabyl.R index 369c49a3..6fddebdd 100644 --- a/R/tabyl.R +++ b/R/tabyl.R @@ -1,20 +1,32 @@ #' Generate a frequency table (1-, 2-, or 3-way). #' #' @description -#' A fully-featured alternative to `table()`. Results are data.frames and can be formatted and enhanced with janitor's family of `adorn_` functions. +#' A fully-featured alternative to `table()`. Results are data.frames and can be +#' formatted and enhanced with janitor's family of `adorn_` functions. #' -#' Specify a data.frame and the one, two, or three unquoted column names you want to tabulate. Three variables generates a list of 2-way tabyls, split by the third variable. +#' Specify a `data.frame` and the one, two, or three unquoted column names you +#' want to tabulate. Three variables generates a list of 2-way tabyls, +#' split by the third variable. #' -#' Alternatively, you can tabulate a single variable that isn't in a data.frame by calling `tabyl` on a vector, e.g., `tabyl(mtcars$gear)`. +#' Alternatively, you can tabulate a single variable that isn't in a `data.frame` +#' by calling `tabyl()` on a vector, e.g., `tabyl(mtcars$gear)`. #' -#' @param dat a `data.frame` containing the variables you wish to count. Or, a vector you want to tabulate. -#' @param var1 the column name of the first variable. -#' @param var2 (optional) the column name of the second variable (the rows in a 2-way tabulation). -#' @param var3 (optional) the column name of the third variable (the list in a 3-way tabulation). -#' @param show_na should counts of `NA` values be displayed? In a one-way tabyl, the presence of `NA` values triggers an additional column showing valid percentages(calculated excluding `NA` values). -#' @param show_missing_levels should counts of missing levels of factors be displayed? These will be rows and/or columns of zeroes. Useful for keeping consistent output dimensions even when certain factor levels may not be present in the data. -#' @param ... the arguments to tabyl (here just for the sake of documentation compliance, as all arguments are listed with the vector- and data.frame-specific methods) -#' @return A data.frame with frequencies and percentages of the tabulated variable(s). A 3-way tabulation returns a list of data.frames. +#' @param dat A `data.frame` containing the variables you wish to count. +#' Or, a vector you want to tabulate. +#' @param var1 The column name of the first variable. +#' @param var2 (optional) the column name of the second variable +#' (the rows in a 2-way tabulation). +#' @param var3 (optional) the column name of the third variable +#' (the list in a 3-way tabulation). +#' @param show_na Should counts of `NA` values be displayed? In a one-way tabyl, +#' the presence of `NA` values triggers an additional column showing valid percentages +#' (calculated excluding `NA` values). +#' @param show_missing_levels Should counts of missing levels of factors be displayed? +#' These will be rows and/or columns of zeroes. Useful for keeping consistent +#' output dimensions even when certain factor levels may not be present in the data. +#' @param ... Additional arguments passed to methods. +#' @return A `data.frame` with frequencies and percentages of the tabulated variable(s). +#' A 3-way tabulation returns a list of data frames. #' @export #' @examples #' diff --git a/R/top_levels.R b/R/top_levels.R index 4deae4de..87320c11 100644 --- a/R/top_levels.R +++ b/R/top_levels.R @@ -3,10 +3,10 @@ #' #' Get a frequency table of a factor variable, grouped into categories by level. #' -#' @param input_vec the factor variable to tabulate. -#' @param n number of levels to include in top and bottom groups -#' @param show_na should cases where the variable is NA be shown? -#' @return a data.frame (actually a `tbl_df`) with the frequencies of the +#' @param input_vec The factor variable to tabulate. +#' @param n Number of levels to include in top and bottom groups +#' @param show_na Should cases where the variable is `NA` be shown? +#' @return A `data.frame` (actually a `tbl_df`) with the frequencies of the #' grouped, tabulated variable. Includes counts and percentages, and valid #' percentages (calculated omitting `NA` values, if present in the vector and #' `show_na = TRUE`.) @@ -26,7 +26,12 @@ top_levels <- function(input_vec, n = 2, show_na = FALSE) { stop("input factor variable must have at least 3 levels") } if (num_levels_in_var < 2 * n) { - stop(paste0("there are ", num_levels_in_var, " levels in the variable and ", n, " levels in each of the top and bottom groups.\nSince 2 * ", n, " = ", 2 * n, " is greater than ", num_levels_in_var, ", there would be overlap in the top and bottom groups and some records will be double-counted.")) + stop(paste0( + "there are ", num_levels_in_var, " levels in the variable and ", + n, " levels in each of the top and bottom groups.\nSince 2 * ", n, " = ", 2 * n, + " is greater than ", num_levels_in_var, ", + there would be overlap in the top and bottom groups and some records will be double-counted." + )) } if (n < 1 || n %% 1 != 0) { stop("n must be a whole number at least 1") diff --git a/R/utils-pipe.R b/R/utils-pipe.R new file mode 100644 index 00000000..e7fa4dff --- /dev/null +++ b/R/utils-pipe.R @@ -0,0 +1,18 @@ +#' Pipe operator +#' +#' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. +#' +#' @name %>% +#' @rdname pipe +#' @keywords internal +#' @export +#' @importFrom magrittr %>% +#' @usage lhs \%>\% rhs +#' @param lhs A value or the magrittr placeholder. +#' @param rhs A function call using the magrittr semantics. +#' @return The result of calling `rhs(lhs)`. +#' @examples +#' mtcars %>% +#' tabyl(carb, cyl) %>% +#' adorn_totals() +NULL diff --git a/R/utils.R b/R/utils.R deleted file mode 100644 index ea8af53f..00000000 --- a/R/utils.R +++ /dev/null @@ -1,17 +0,0 @@ -# Copied from tidyr/R/utils.R, to export the magrittr pipe - -#' Pipe operator -#' -#' @description Exported from the magrittr package. To learn more, run `?magrittr::`\%>\%``. -#' -#' @name %>% -#' @rdname pipe -#' @keywords internal -#' @export -#' @importFrom magrittr %>% -#' @usage lhs \%>\% rhs -#' @examples -#' mtcars %>% -#' tabyl(carb, cyl) %>% -#' adorn_totals() -NULL diff --git a/_pkgdown.yml b/_pkgdown.yml index fcbb6757..965f4647 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,3 +1,63 @@ url: https://sfirke.github.io/janitor/ template: bootstrap: 5 + + +reference: +- title: Cleaning data + +- subtitle: Cleaning variable names + contents: + - contains("clean_names") + +- title: Exploring data + desc: > + tabyls are an enhanced version of tables. See `vignette("tabyls")` + for more details. + contents: + - tabyl + - starts_with("adorn") + - contains("tabyl") + - -contains('.test') + +- subtitle: Change order + contents: + - row_to_names + - find_header + +- title: Comparison + desc: > + Compare data frames columns + contents: + - starts_with("compare_df_cols") + +- title: Removing unnecessary columns / rows + contents: + - starts_with("remove_") + - get_dupes + - get_one_to_one + - top_levels + - single_value + +- title: Rounding / dates helpers + desc: > + Help to mimic some behaviour from Excel or SAS. + These should be used on vector. + contents: + - round_half_up + - signif_half_up + - round_to_fraction + - excel_numeric_to_date + - sas_numeric_to_date + - excel_time_to_numeric + - starts_with("convert_to_date") + +- title: Misc / helpers + desc: > + These functions can help perform less frequent operations. + contents: + - describe_class + - paste_skip_na + - chisq.test + - fisher.test + - mu_to_u diff --git a/man/add_totals_col.Rd b/man/add_totals_col.Rd index 69fe98b6..c0c0c38c 100644 --- a/man/add_totals_col.Rd +++ b/man/add_totals_col.Rd @@ -15,6 +15,6 @@ add_totals_col(dat, na.rm = TRUE) Returns a data.frame with a totals column containing row-wise sums. } \description{ -This function is deprecated, use \code{adorn_totals} instead. +This function is deprecated, use \code{\link[=adorn_totals]{adorn_totals(where = "col")}} instead. } \keyword{internal} diff --git a/man/add_totals_row.Rd b/man/add_totals_row.Rd index e33098b0..d6dc8883 100644 --- a/man/add_totals_row.Rd +++ b/man/add_totals_row.Rd @@ -17,6 +17,6 @@ add_totals_row(dat, fill = "-", na.rm = TRUE) Returns a data.frame with a totals row, consisting of "Total" in the first column and column sums in the others. } \description{ -This function is deprecated, use \code{adorn_totals} instead. +This function is deprecated, use \code{\link[=adorn_totals]{adorn_totals()}} instead. } \keyword{internal} diff --git a/man/adorn_crosstab.Rd b/man/adorn_crosstab.Rd index 6e66a698..f3a94e42 100644 --- a/man/adorn_crosstab.Rd +++ b/man/adorn_crosstab.Rd @@ -30,6 +30,6 @@ adorn_crosstab( Returns a data.frame. } \description{ -This function is deprecated, use the \code{adorn_} family of functions instead. +This function is deprecated, use \code{\link[=tabyl]{tabyl()}} with the \code{adorn_} family of functions instead. } \keyword{internal} diff --git a/man/adorn_ns.Rd b/man/adorn_ns.Rd index 4e7ba233..aeaa0b43 100644 --- a/man/adorn_ns.Rd +++ b/man/adorn_ns.Rd @@ -15,21 +15,32 @@ adorn_ns( ) } \arguments{ -\item{dat}{a data.frame of class \code{tabyl} that has had \code{adorn_percentages} and/or \code{adorn_pct_formatting} called on it. If given a list of data.frames, this function will apply itself to each data.frame in the list (designed for 3-way \code{tabyl} lists).} +\item{dat}{A data.frame of class \code{tabyl} that has had \code{adorn_percentages} and/or +\code{adorn_pct_formatting} called on it. If given a list of data.frames, +this function will apply itself to each data.frame in the list (designed for 3-way \code{tabyl} lists).} -\item{position}{should the N go in the front, or in the rear, of the percentage?} +\item{position}{Should the N go in the front, or in the rear, of the percentage?} -\item{ns}{the Ns to append. The default is the "core" attribute of the input tabyl \code{dat}, where the original Ns of a two-way \code{tabyl} are stored. However, if your Ns are stored somewhere else, or you need to customize them beyond what can be done with \code{format_func}, you can supply them here.} +\item{ns}{The Ns to append. The default is the "core" attribute of the input tabyl +\code{dat}, where the original Ns of a two-way \code{tabyl} are stored. However, if your Ns +are stored somewhere else, or you need to customize them beyond what can be done +with \code{format_func}, you can supply them here.} -\item{format_func}{a formatting function to run on the Ns. Consider defining with \code{\link[base:format]{base::format()}}.} +\item{format_func}{A formatting function to run on the Ns. Consider defining +with \code{\link[base:format]{base::format()}}.} -\item{...}{columns to adorn. This takes a tidyselect specification. By default, all columns are adorned except for the first column and columns not of class \code{numeric}, but this allows you to manually specify which columns should be adorned, for use on a data.frame that does not result from a call to \code{tabyl}.} +\item{...}{Columns to adorn. This takes a tidyselect specification. By default, +all columns are adorned except for the first column and columns not of class +\code{numeric}, but this allows you to manually specify which columns should be adorned, +for use on a data.frame that does not result from a call to \code{tabyl}.} } \value{ -a data.frame with Ns appended +A \code{data.frame} with Ns appended } \description{ -This function adds back the underlying Ns to a \code{tabyl} whose percentages were calculated using \code{adorn_percentages()}, to display the Ns and percentages together. You can also call it on a non-tabyl data.frame to which you wish to append Ns. +This function adds back the underlying Ns to a \code{tabyl} whose percentages were +calculated using \code{\link[=adorn_percentages]{adorn_percentages()}}, to display the Ns and percentages together. +You can also call it on a non-tabyl data.frame to which you wish to append Ns. } \examples{ mtcars \%>\% diff --git a/man/adorn_percentages.Rd b/man/adorn_percentages.Rd index 02677b3c..0d6b8714 100644 --- a/man/adorn_percentages.Rd +++ b/man/adorn_percentages.Rd @@ -7,19 +7,27 @@ adorn_percentages(dat, denominator = "row", na.rm = TRUE, ...) } \arguments{ -\item{dat}{a \code{tabyl} or other data.frame with a tabyl-like layout. If given a list of data.frames, this function will apply itself to each data.frame in the list (designed for 3-way \code{tabyl} lists).} +\item{dat}{A \code{tabyl} or other data.frame with a tabyl-like layout. +If given a list of data.frames, this function will apply itself to each +\code{data.frame} in the list (designed for 3-way \code{tabyl} lists).} -\item{denominator}{the direction to use for calculating percentages. One of "row", "col", or "all".} +\item{denominator}{The direction to use for calculating percentages. +One of "row", "col", or "all".} -\item{na.rm}{should missing values (including NaN) be omitted from the calculations?} +\item{na.rm}{should missing values (including \code{NaN}) be omitted from the calculations?} -\item{...}{columns to adorn. This takes a tidyselect specification. By default, all numeric columns (besides the initial column, if numeric) are adorned, but this allows you to manually specify which columns should be adorned, for use on a data.frame that does not result from a call to \code{tabyl}.} +\item{...}{columns to adorn. This takes a <\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> +specification. By default, all numeric columns (besides the initial column, if numeric) +are adorned, but this allows you to manually specify which columns should +be adorned, for use on a \code{data.frame} that does not result from a call to \code{\link[=tabyl]{tabyl()}}.} } \value{ -Returns a data.frame of percentages, expressed as numeric values between 0 and 1. +A \code{data.frame} of percentages, expressed as numeric values between 0 and 1. } \description{ -This function defaults to excluding the first column of the input data.frame, assuming that it contains a descriptive variable, but this can be overridden by specifying the columns to adorn in the \code{...} argument. +This function defaults to excluding the first column of the input data.frame, +assuming that it contains a descriptive variable, but this can be overridden +by specifying the columns to adorn in the \code{...} argument. } \examples{ diff --git a/man/adorn_rounding.Rd b/man/adorn_rounding.Rd index 5da5df7b..409ce000 100644 --- a/man/adorn_rounding.Rd +++ b/man/adorn_rounding.Rd @@ -7,21 +7,34 @@ adorn_rounding(dat, digits = 1, rounding = "half to even", ...) } \arguments{ -\item{dat}{a \code{tabyl} or other data.frame with similar layout. If given a list of data.frames, this function will apply itself to each data.frame in the list (designed for 3-way \code{tabyl} lists).} +\item{dat}{A \code{tabyl} or other \code{data.frame} with similar layout. +If given a list of data.frames, this function will apply itself to each +\code{data.frame} in the list (designed for 3-way \code{tabyl} lists).} -\item{digits}{how many digits should be displayed after the decimal point?} +\item{digits}{How many digits should be displayed after the decimal point?} -\item{rounding}{method to use for rounding - either "half to even", the base R default method, or "half up", where 14.5 rounds up to 15.} +\item{rounding}{Method to use for rounding - either "half to even" +(the base R default method), or "half up", where 14.5 rounds up to 15.} -\item{...}{columns to adorn. This takes a tidyselect specification. By default, all numeric columns (besides the initial column, if numeric) are adorned, but this allows you to manually specify which columns should be adorned, for use on a data.frame that does not result from a call to \code{tabyl}.} +\item{...}{Columns to adorn. This takes a tidyselect specification. +By default, all numeric columns (besides the initial column, if numeric) +are adorned, but this allows you to manually specify which columns should +be adorned, for use on a data.frame that does not result from a call to \code{tabyl}.} } \value{ -Returns the data.frame with rounded numeric columns. +The \code{data.frame} with rounded numeric columns. } \description{ -Can run on any data.frame with at least one numeric column. This function defaults to excluding the first column of the input data.frame, assuming that it contains a descriptive variable, but this can be overridden by specifying the columns to round in the \code{...} argument. +Can run on any \code{data.frame} with at least one numeric column. +This function defaults to excluding the first column of the input data.frame, +assuming that it contains a descriptive variable, but this can be overridden by +specifying the columns to round in the \code{...} argument. -If you're formatting percentages, e.g., the result of \code{adorn_percentages()}, use \code{adorn_pct_formatting()} instead. This is a more flexible variant for ad-hoc usage. Compared to \code{adorn_pct_formatting()}, it does not multiply by 100 or pad the numbers with spaces for alignment in the results data.frame. This function retains the class of numeric input columns. +If you're formatting percentages, e.g., the result of \code{\link[=adorn_percentages]{adorn_percentages()}}, +use \code{\link[=adorn_pct_formatting]{adorn_pct_formatting()}} instead. This is a more flexible variant for ad-hoc usage. +Compared to \code{adorn_pct_formatting()}, it does not multiply by 100 or pad the +numbers with spaces for alignment in the results \code{data.frame}. +This function retains the class of numeric input columns. } \examples{ diff --git a/man/adorn_title.Rd b/man/adorn_title.Rd index 81c17b14..0bdd54f3 100644 --- a/man/adorn_title.Rd +++ b/man/adorn_title.Rd @@ -7,19 +7,38 @@ adorn_title(dat, placement = "top", row_name, col_name) } \arguments{ -\item{dat}{a data.frame of class \code{tabyl} or other data.frame with a tabyl-like layout. If given a list of data.frames, this function will apply itself to each data.frame in the list (designed for 3-way \code{tabyl} lists).} +\item{dat}{A \code{data.frame} of class \code{tabyl} or other \code{data.frame} with a tabyl-like layout. +If given a list of data.frames, this function will apply itself to each \code{data.frame} +in the list (designed for 3-way \code{tabyl} lists).} -\item{placement}{whether the column name should be added to the top of the tabyl in an otherwise-empty row \code{"top"} or appended to the already-present row name variable (\code{"combined"}). The formatting in the \code{"top"} option has the look of base R's \code{table()}; it also wipes out the other column names, making it hard to further use the data.frame besides formatting it for reporting. The \code{"combined"} option is more conservative in this regard.} +\item{placement}{The title placement, one of \code{"top"}, or \code{"combined"}. +See \strong{Details} for more information.} -\item{row_name}{(optional) default behavior is to pull the row name from the attributes of the input \code{tabyl} object. If you wish to override that text, or if your input is not a \code{tabyl}, supply a string here.} +\item{row_name}{(optional) default behavior is to pull the row name from the +attributes of the input \code{tabyl} object. If you wish to override that text, +or if your input is not a \code{tabyl}, supply a string here.} -\item{col_name}{(optional) default behavior is to pull the column_name from the attributes of the input \code{tabyl} object. If you wish to override that text, or if your input is not a \code{tabyl}, supply a string here.} +\item{col_name}{(optional) default behavior is to pull the column_name from +the attributes of the input \code{tabyl} object. If you wish to override that text, +or if your input is not a \code{tabyl}, supply a string here.} } \value{ -the input tabyl, augmented with the column title. Non-tabyl inputs that are of class \code{tbl_df} are downgraded to basic data.frames so that the title row prints correctly. +The input \code{tabyl}, augmented with the column title. Non-tabyl inputs +that are of class \code{tbl_df} are downgraded to basic data.frames so that the +title row prints correctly. } \description{ -This function adds the column variable name to the top of a \code{tabyl} for a complete display of information. This makes the tabyl prettier, but renders the data.frame less useful for further manipulation. +This function adds the column variable name to the top of a \code{tabyl} for a +complete display of information. This makes the tabyl prettier, but renders +the \code{data.frame} less useful for further manipulation. +} +\details{ +The \code{placement} argument indicates whether the column name should be added to +the \code{top} of the tabyl in an otherwise-empty row \code{"top"} or appended to the +already-present row name variable (\code{"combined"}). The formatting in the \code{"top"} +option has the look of base R's \code{table()}; it also wipes out the other column +names, making it hard to further use the \code{data.frame} besides formatting it for reporting. +The \code{"combined"} option is more conservative in this regard. } \examples{ diff --git a/man/adorn_totals.Rd b/man/adorn_totals.Rd index ce16ccfd..2f6a55ef 100644 --- a/man/adorn_totals.Rd +++ b/man/adorn_totals.Rd @@ -2,28 +2,45 @@ % Please edit documentation in R/adorn_totals.R \name{adorn_totals} \alias{adorn_totals} -\title{Append a totals row and/or column to a data.frame.} +\title{Append a totals row and/or column to a data.frame} \usage{ adorn_totals(dat, where = "row", fill = "-", na.rm = TRUE, name = "Total", ...) } \arguments{ -\item{dat}{an input data.frame with at least one numeric column. If given a list of data.frames, this function will apply itself to each data.frame in the list (designed for 3-way \code{tabyl} lists).} +\item{dat}{An input \code{data.frame} with at least one numeric column. If given a +list of data.frames, this function will apply itself to each \code{data.frame} +in the list (designed for 3-way \code{tabyl} lists).} -\item{where}{one of "row", "col", or \code{c("row", "col")}} +\item{where}{One of "row", "col", or \code{c("row", "col")}} -\item{fill}{if there are non-numeric columns, what should fill the bottom row of those columns? If a string, relevant columns will be coerced to character. If \code{NA} then column types are preserved.} +\item{fill}{If there are non-numeric columns, what should fill the bottom row +of those columns? If a string, relevant columns will be coerced to character. +If \code{NA} then column types are preserved.} -\item{na.rm}{should missing values (including NaN) be omitted from the calculations?} +\item{na.rm}{Should missing values (including \code{NaN}) be omitted from the calculations?} -\item{name}{name of the totals row and/or column. If both are created, and \code{name} is a single string, that name is applied to both. If both are created and \code{name} is a vector of length 2, the first element of the vector will be used as the row name (in column 1), and the second element will be used as the totals column name. Defaults to "Total".} +\item{name}{Name of the totals row and/or column. If both are created, and +\code{name} is a single string, that name is applied to both. If both are created +and \code{name} is a vector of length 2, the first element of the vector will be +used as the row name (in column 1), and the second element will be used as the +totals column name. Defaults to "Total".} -\item{...}{columns to total. This takes a tidyselect specification. By default, all numeric columns (besides the initial column, if numeric) are included in the totals, but this allows you to manually specify which columns should be included, for use on a data.frame that does not result from a call to \code{tabyl}.} +\item{...}{Columns to total. This takes a tidyselect specification. By default, +all numeric columns (besides the initial column, if numeric) are included in +the totals, but this allows you to manually specify which columns should be +included, for use on a data.frame that does not result from a call to \code{tabyl}.} } \value{ -a data.frame augmented with a totals row, column, or both. The data.frame is now also of class \code{tabyl} and stores information about the attached totals and underlying data in the tabyl attributes. +A \code{data.frame} augmented with a totals row, column, or both. +The \code{data.frame} is now also of class \code{tabyl} and stores information about +the attached totals and underlying data in the tabyl attributes. } \description{ -This function defaults to excluding the first column of the input data.frame, assuming that it contains a descriptive variable, but this can be overridden by specifying the columns to be totaled in the \code{...} argument. Non-numeric columns are converted to character class and have a user-specified fill character inserted in the totals row. +This function defaults to excluding the first column of the input data.frame, +assuming that it contains a descriptive variable, but this can be overridden +by specifying the columns to be totaled in the \code{...} argument. Non-numeric +columns are converted to character class and have a user-specified fill character +inserted in the totals row. } \examples{ mtcars \%>\% diff --git a/man/as_tabyl.Rd b/man/as_tabyl.Rd index fc5480c0..635898d1 100644 --- a/man/as_tabyl.Rd +++ b/man/as_tabyl.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/as_and_untabyl.R \name{as_tabyl} \alias{as_tabyl} -\title{Add \code{tabyl} attributes to a data.frame.} +\title{Add \code{tabyl} attributes to a data.frame} \usage{ as_tabyl(dat, axes = 2, row_var_name = NULL, col_var_name = NULL) } @@ -25,7 +25,7 @@ Returns the same data.frame, but with the additional class of "tabyl" and the attribute "core". } \description{ -A \code{tabyl} is a data.frame containing counts of a variable or +A \code{tabyl} is a \code{data.frame} containing counts of a variable or co-occurrences of two variables (a.k.a., a contingency table or crosstab). This specialized kind of data.frame has attributes that enable \code{adorn_} functions to be called for precise formatting and presentation of results. @@ -39,13 +39,14 @@ meets the requirements of a two-way tabyl: 1) First column contains values of variable 1 2) Column names 2:n are the values of variable 2 3) Numeric values in columns 2:n are counts of the co-occurrences of the two variables.* \itemize{ -\item = this is the ideal form of a tabyl, but janitor's \code{adorn_} functions tolerate and ignore non-numeric columns in positions 2:n. +\item = this is the ideal form of a \code{tabyl}, but janitor's \code{adorn_} functions tolerate +and ignore non-numeric columns in positions 2:n. } -For instance, the result of \code{\link[dplyr:count]{dplyr::count()}} followed by \code{\link[tidyr:spread]{tidyr::spread()}} +For instance, the result of \code{\link[dplyr:count]{dplyr::count()}} followed by \code{\link[tidyr:pivot_wider]{tidyr::pivot_wider()}} can be treated as a \code{tabyl}. -The result of calling \code{tabyl()} on a single variable is a special class of +The result of calling \code{\link[=tabyl]{tabyl()}} on a single variable is a special class of one-way tabyl; this function only pertains to the two-way tabyl. } \examples{ diff --git a/man/chisq.test.Rd b/man/chisq.test.Rd index baac01a5..675d948b 100644 --- a/man/chisq.test.Rd +++ b/man/chisq.test.Rd @@ -19,7 +19,8 @@ chisq.test(x, ...) \item{y}{if x is a vector, must be another vector or factor of the same length} -\item{tabyl_results}{if TRUE and x is a tabyl object, also return \code{observed}, \code{expected}, \code{residuals} and \code{stdres} as tabyl} +\item{tabyl_results}{If \code{TRUE} and \code{x} is a tabyl object, +also return \code{observed}, \code{expected}, \code{residuals} and \code{stdres} as tabyl.} } \value{ The result is the same as the one of \code{stats::chisq.test()}. diff --git a/man/compare_df_cols.Rd b/man/compare_df_cols.Rd index 231a4859..05333348 100644 --- a/man/compare_df_cols.Rd +++ b/man/compare_df_cols.Rd @@ -2,8 +2,7 @@ % Please edit documentation in R/compare_df_cols.R \name{compare_df_cols} \alias{compare_df_cols} -\title{Generate a comparison of data.frames (or similar objects) that indicates if -they will successfully bind together by rows.} +\title{Compare data frames columns before merging} \usage{ compare_df_cols( ..., @@ -64,8 +63,8 @@ compare_df_cols(dfA = data.frame(A = 1), dfB = data.frame(B = 2)) compare_df_cols(listA = list(dfA = data.frame(A = 1), dfB = data.frame(B = 2)), data.frame(A = 3)) } \seealso{ -Other Data frame type comparison: +Other data frame type comparison: \code{\link{compare_df_cols_same}()}, \code{\link{describe_class}()} } -\concept{Data frame type comparison} +\concept{data frame type comparison} diff --git a/man/compare_df_cols_same.Rd b/man/compare_df_cols_same.Rd index 5bbb9d0b..157bbcb2 100644 --- a/man/compare_df_cols_same.Rd +++ b/man/compare_df_cols_same.Rd @@ -25,12 +25,11 @@ missing from a data.frame would be considered a mismatch (as in \item{verbose}{Print the mismatching columns if binding will fail.} } \value{ -\code{TRUE} if row binding will succeed or \code{FALSE} if it will -fail. +\code{TRUE} if row binding will succeed or \code{FALSE} if it will fail. } \description{ -Check whether a set of data.frames are row-bindable. Calls -\code{compare_df_cols()}and returns TRUE if there are no mis-matching rows. ` +Check whether a set of data.frames are row-bindable. Calls \code{compare_df_cols()} +and returns \code{TRUE} if there are no mis-matching rows. } \examples{ compare_df_cols_same(data.frame(A = 1), data.frame(A = 2)) @@ -39,8 +38,8 @@ compare_df_cols_same(data.frame(A = 1), data.frame(B = 2), verbose = FALSE) compare_df_cols_same(data.frame(A = 1), data.frame(B = 2), bind_method = "rbind") } \seealso{ -Other Data frame type comparison: +Other data frame type comparison: \code{\link{compare_df_cols}()}, \code{\link{describe_class}()} } -\concept{Data frame type comparison} +\concept{data frame type comparison} diff --git a/man/convert_to_NA.Rd b/man/convert_to_NA.Rd index cc2aa74e..9cf87886 100644 --- a/man/convert_to_NA.Rd +++ b/man/convert_to_NA.Rd @@ -15,12 +15,10 @@ convert_to_NA(dat, strings) Returns a cleaned object. Can be a vector, data.frame, or \code{tibble::tbl_df} depending on the provided input. } \description{ +Warning: Deprecated, do not use in new code. Use \code{\link[dplyr:na_if]{dplyr::na_if()}} instead. + Converts instances of user-specified strings into \code{NA}. Can operate on either a single vector or an entire data.frame. } -\section{Warning}{ - Deprecated, do not use in new code. Use \code{dplyr::na_if()} instead. -} - \seealso{ janitor_deprecated } diff --git a/man/convert_to_date.Rd b/man/convert_to_date.Rd index ac8837d5..441fffd8 100644 --- a/man/convert_to_date.Rd +++ b/man/convert_to_date.Rd @@ -3,8 +3,7 @@ \name{convert_to_date} \alias{convert_to_date} \alias{convert_to_datetime} -\title{Convert many date and datetime formats as may be received from Microsoft -Excel} +\title{Parse dates from many formats} \usage{ convert_to_date( x, @@ -27,7 +26,7 @@ convert_to_datetime( \item{...}{Passed to further methods. Eventually may be passed to \code{excel_numeric_to_date()}, \code{base::as.POSIXct()}, or \code{base::as.Date()}.} -\item{character_fun}{A function to convert non-numeric-looking, non-NA values +\item{character_fun}{A function to convert non-numeric-looking, non-\code{NA} values in \code{x} to POSIXct objects.} \item{string_conversion_failure}{If a character value fails to parse into the @@ -42,21 +41,16 @@ POSIXct objects for \code{convert_to_datetime()} or Date objects for \code{convert_to_date()}. } \description{ -Convert many date and datetime formats as may be received from Microsoft -Excel +Convert many date and date-time (POSIXct) formats as may be received +from Microsoft Excel. } \details{ -Character conversion checks if it matches something that looks like -a Microsoft Excel numeric date, converts those to numeric, and then runs +Character conversion checks if it matches something that looks like a +Microsoft Excel numeric date, converts those to numeric, and then runs convert_to_datetime_helper() on those numbers. Then, character to Date or POSIXct conversion occurs via \code{character_fun(x, ...)} or \code{character_fun(x, tz=tz, ...)}, respectively. } -\section{Functions}{ -\itemize{ -\item \code{convert_to_datetime()}: Convert to a date-time (POSIXct) - -}} \examples{ convert_to_date("2009-07-06") convert_to_date(40000) @@ -69,9 +63,9 @@ convert_to_datetime( ) } \seealso{ -Other Date-time cleaning: +Other date-time cleaning: \code{\link{excel_numeric_to_date}()}, \code{\link{excel_time_to_numeric}()}, \code{\link{sas_numeric_to_date}()} } -\concept{Date-time cleaning} +\concept{date-time cleaning} diff --git a/man/crosstab.Rd b/man/crosstab.Rd index f8249006..32f44354 100644 --- a/man/crosstab.Rd +++ b/man/crosstab.Rd @@ -10,6 +10,6 @@ crosstab(...) \item{...}{arguments} } \description{ -This function is deprecated, use \code{tabyl(dat, var1, var2)} instead. +This function is deprecated, use \code{\link[=tabyl]{tabyl(dat, var1, var2)}} instead. } \keyword{internal} diff --git a/man/describe_class.Rd b/man/describe_class.Rd index 2717e693..1c895342 100644 --- a/man/describe_class.Rd +++ b/man/describe_class.Rd @@ -32,7 +32,7 @@ Describe the class(es) of an object \details{ For package developers, an S3 generic method can be written for \code{describe_class()} for custom classes that may need more definition -than the default method. This function is called by \code{compare_df_cols}. +than the default method. This function is called by \code{\link[=compare_df_cols]{compare_df_cols()}}. } \section{Methods (by class)}{ \itemize{ @@ -49,8 +49,8 @@ describe_class(ordered(c("A", "B"))) describe_class(ordered(c("A", "B")), strict_description = FALSE) } \seealso{ -Other Data frame type comparison: +Other data frame type comparison: \code{\link{compare_df_cols_same}()}, \code{\link{compare_df_cols}()} } -\concept{Data frame type comparison} +\concept{data frame type comparison} diff --git a/man/excel_numeric_to_date.Rd b/man/excel_numeric_to_date.Rd index eda1d8b1..23fc1ba1 100644 --- a/man/excel_numeric_to_date.Rd +++ b/man/excel_numeric_to_date.Rd @@ -64,11 +64,11 @@ excel_numeric_to_date(40000.521, ) # Time with fractional seconds is included } \seealso{ -\code{\link{excel_time_to_numeric}} +\code{\link[=excel_time_to_numeric]{excel_time_to_numeric()}} -Other Date-time cleaning: +Other date-time cleaning: \code{\link{convert_to_date}()}, \code{\link{excel_time_to_numeric}()}, \code{\link{sas_numeric_to_date}()} } -\concept{Date-time cleaning} +\concept{date-time cleaning} diff --git a/man/excel_time_to_numeric.Rd b/man/excel_time_to_numeric.Rd index 59e9b033..ebba75d4 100644 --- a/man/excel_time_to_numeric.Rd +++ b/man/excel_time_to_numeric.Rd @@ -34,11 +34,11 @@ Microsoft Excel to a numeric number of seconds between 0 and 86400. } } \seealso{ -\verb{\link{excel_numeric_to_date}} +\code{\link[=excel_numeric_to_date]{excel_numeric_to_date()}} -Other Date-time cleaning: +Other date-time cleaning: \code{\link{convert_to_date}()}, \code{\link{excel_numeric_to_date}()}, \code{\link{sas_numeric_to_date}()} } -\concept{Date-time cleaning} +\concept{date-time cleaning} diff --git a/man/janitor-package.Rd b/man/janitor-package.Rd index f58f513a..f397d9b1 100644 --- a/man/janitor-package.Rd +++ b/man/janitor-package.Rd @@ -4,29 +4,14 @@ \name{janitor-package} \alias{janitor} \alias{janitor-package} -\title{janitor} +\title{janitor: Simple Tools for Examining and Cleaning Dirty Data} \description{ -janitor has simple little tools for examining and cleaning dirty data. +The main janitor functions can: perfectly format data.frame column names; provide quick counts of variable combinations (i.e., frequency tables and crosstabs); and explore duplicate records. Other janitor functions nicely format the tabulation results. These tabulate-and-report functions approximate popular features of SPSS and Microsoft Excel. This package follows the principles of the "tidyverse" and works well with the pipe function %>%. janitor was built with beginning-to-intermediate R users in mind and is optimized for user-friendliness. } -\section{Main functions}{ - -The main janitor functions can: perfectly format data.frame -column names; provide quick counts of variable combinations (i.e., -frequency tables and crosstabs); and explore duplicate records. Other -janitor functions nicely format the tabulation results. These -tabulate-and-report functions approximate popular features of SPSS and -Microsoft Excel. -} - \section{Package context}{ -This package follows the principles of the "tidyverse" and works -well with the pipe function \verb{\\\%>\\\%}. - -janitor was built with beginning-to-intermediate R users in mind -and is optimized for user-friendliness. Advanced users can do most -things covered here, but they can do it faster with janitor and save -their thinking for more fun tasks. +Advanced users can do most things covered here, but they can do it +faster with janitor and save their thinking for more fun tasks. } \seealso{ @@ -52,3 +37,4 @@ Other contributors: } } +\keyword{internal} diff --git a/man/janitor_deprecated.Rd b/man/janitor_deprecated.Rd index d1591a6d..6b9cb5c2 100644 --- a/man/janitor_deprecated.Rd +++ b/man/janitor_deprecated.Rd @@ -8,13 +8,14 @@ These functions have already become defunct or may be defunct as soon as the nex } \details{ \itemize{ -\item \code{\link[=adorn_crosstab]{adorn_crosstab()}} -\item \code{\link[=crosstab]{crosstab()}} -\item \code{\link[=use_first_valid_of]{use_first_valid_of()}} -\item \code{\link[=convert_to_NA]{convert_to_NA()}} -\item \code{\link[=add_totals_col]{add_totals_col()}} -\item \code{\link[=add_totals_row]{add_totals_row()}} -\item \code{\link[=remove_empty_rows]{remove_empty_rows()}} -\item \code{\link[=remove_empty_cols]{remove_empty_cols()}} +\item \code{\link[=adorn_crosstab]{adorn_crosstab()}} -> \code{adorn_} +\item \code{\link[=crosstab]{crosstab()}} -> \code{\link[=tabyl]{tabyl()}} +\item \code{\link[=use_first_valid_of]{use_first_valid_of()}} -> \code{\link[dplyr:coalesce]{dplyr::coalesce()}} +\item \code{\link[=convert_to_NA]{convert_to_NA()}} -> \code{\link[dplyr:na_if]{dplyr::na_if()}} +\item \code{\link[=add_totals_col]{add_totals_col()}} -> \code{\link[=adorn_totals]{adorn_totals(where = "col")}} +\item \code{\link[=add_totals_row]{add_totals_row()}} -> \code{\link[=adorn_totals]{adorn_totals()}} +\item \code{\link[=remove_empty_rows]{remove_empty_rows()}} -> \code{\link[=remove_empty]{remove_empty("rows")}} +\item \code{\link[=remove_empty_cols]{remove_empty_cols()}} -> \code{\link[=remove_empty]{remove_empty("cols")}} } } +\keyword{internal} diff --git a/man/make_clean_names.Rd b/man/make_clean_names.Rd index 2486da61..d17559fa 100644 --- a/man/make_clean_names.Rd +++ b/man/make_clean_names.Rd @@ -107,10 +107,10 @@ the Spanish character "enye" becomes "n". The order of operations is: make replacements, (optional) ASCII conversion, remove initial spaces and punctuation, apply \code{base::make.names()}, -apply \code{snakecase::to_any_case}, and add numeric suffixes +apply \verb{snakecase::to_any_case(()}, and add numeric suffixes to resolve any duplicated names. -This function relies on \code{snakecase::to_any_case} and can take advantage of +This function relies on \code{snakecase::to_any_case()} and can take advantage of its versatility. For instance, an abbreviation like "ID" can have its capitalization preserved by passing the argument \code{abbreviations = "ID"}. See the documentation for \code{\link[snakecase:to_any_case]{snakecase::to_any_case()}} diff --git a/man/pipe.Rd b/man/pipe.Rd index ee8964de..4f928936 100644 --- a/man/pipe.Rd +++ b/man/pipe.Rd @@ -1,13 +1,21 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R +% Please edit documentation in R/utils-pipe.R \name{\%>\%} \alias{\%>\%} \title{Pipe operator} \usage{ lhs \%>\% rhs } +\arguments{ +\item{lhs}{A value or the magrittr placeholder.} + +\item{rhs}{A function call using the magrittr semantics.} +} +\value{ +The result of calling \code{rhs(lhs)}. +} \description{ -Exported from the magrittr package. To learn more, run \verb{?magrittr::}\\%>\\%``. +See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. } \examples{ mtcars \%>\% diff --git a/man/remove_empty_cols.Rd b/man/remove_empty_cols.Rd index 35778141..5c996472 100644 --- a/man/remove_empty_cols.Rd +++ b/man/remove_empty_cols.Rd @@ -13,10 +13,6 @@ remove_empty_cols(dat) Returns the data.frame with no empty columns. } \description{ -This function is deprecated, use \code{remove_empty("cols")} instead. -} -\examples{ -# not run: -# dat \%>\% remove_empty_cols +This function is deprecated, use \code{\link[=remove_empty]{remove_empty("cols")}} instead. } \keyword{internal} diff --git a/man/remove_empty_rows.Rd b/man/remove_empty_rows.Rd index ebaf2192..4d987c7d 100644 --- a/man/remove_empty_rows.Rd +++ b/man/remove_empty_rows.Rd @@ -13,7 +13,7 @@ remove_empty_rows(dat) Returns the data.frame with no empty rows. } \description{ -This function is deprecated, use \code{remove_empty("rows")} instead. +This function is deprecated, use \code{\link[=remove_empty]{remove_empty("rows")}} instead. } \examples{ # not run: diff --git a/man/row_to_names.Rd b/man/row_to_names.Rd index 34e4e48a..e1096399 100644 --- a/man/row_to_names.Rd +++ b/man/row_to_names.Rd @@ -19,7 +19,7 @@ row_to_names( \item{row_number}{The row(s) of \code{dat} containing the variable names or the string \code{"find_header"} to use \code{find_header(dat=dat, ...)} to find the row_number. Allows for multiple rows input as a numeric vector. NA's are -ignored, and if a column contains only NA value it will be named \code{"NA"}.} +ignored, and if a column contains only \code{NA} value it will be named \code{"NA"}.} \item{...}{Sent to \code{find_header()}, if \code{row_number = "find_header"}. Otherwise, ignored.} diff --git a/man/sas_numeric_to_date.Rd b/man/sas_numeric_to_date.Rd index 94ff9af9..50769818 100644 --- a/man/sas_numeric_to_date.Rd +++ b/man/sas_numeric_to_date.Rd @@ -36,9 +36,9 @@ SAS Date, Time, and Datetime Values reference (retrieved on 2022-03-08): https://v8doc.sas.com/sashtml/lrcon/zenid-63.htm } \seealso{ -Other Date-time cleaning: +Other date-time cleaning: \code{\link{convert_to_date}()}, \code{\link{excel_numeric_to_date}()}, \code{\link{excel_time_to_numeric}()} } -\concept{Date-time cleaning} +\concept{date-time cleaning} diff --git a/man/tabyl.Rd b/man/tabyl.Rd index 461ab9d5..f883dd98 100644 --- a/man/tabyl.Rd +++ b/man/tabyl.Rd @@ -13,29 +13,41 @@ tabyl(dat, ...) \method{tabyl}{data.frame}(dat, var1, var2, var3, show_na = TRUE, show_missing_levels = TRUE, ...) } \arguments{ -\item{dat}{a \code{data.frame} containing the variables you wish to count. Or, a vector you want to tabulate.} +\item{dat}{A \code{data.frame} containing the variables you wish to count. +Or, a vector you want to tabulate.} -\item{...}{the arguments to tabyl (here just for the sake of documentation compliance, as all arguments are listed with the vector- and data.frame-specific methods)} +\item{...}{Additional arguments passed to methods.} -\item{show_na}{should counts of \code{NA} values be displayed? In a one-way tabyl, the presence of \code{NA} values triggers an additional column showing valid percentages(calculated excluding \code{NA} values).} +\item{show_na}{Should counts of \code{NA} values be displayed? In a one-way tabyl, +the presence of \code{NA} values triggers an additional column showing valid percentages +(calculated excluding \code{NA} values).} -\item{show_missing_levels}{should counts of missing levels of factors be displayed? These will be rows and/or columns of zeroes. Useful for keeping consistent output dimensions even when certain factor levels may not be present in the data.} +\item{show_missing_levels}{Should counts of missing levels of factors be displayed? +These will be rows and/or columns of zeroes. Useful for keeping consistent +output dimensions even when certain factor levels may not be present in the data.} -\item{var1}{the column name of the first variable.} +\item{var1}{The column name of the first variable.} -\item{var2}{(optional) the column name of the second variable (the rows in a 2-way tabulation).} +\item{var2}{(optional) the column name of the second variable +(the rows in a 2-way tabulation).} -\item{var3}{(optional) the column name of the third variable (the list in a 3-way tabulation).} +\item{var3}{(optional) the column name of the third variable +(the list in a 3-way tabulation).} } \value{ -A data.frame with frequencies and percentages of the tabulated variable(s). A 3-way tabulation returns a list of data.frames. +A \code{data.frame} with frequencies and percentages of the tabulated variable(s). +A 3-way tabulation returns a list of data frames. } \description{ -A fully-featured alternative to \code{table()}. Results are data.frames and can be formatted and enhanced with janitor's family of \code{adorn_} functions. +A fully-featured alternative to \code{table()}. Results are data.frames and can be +formatted and enhanced with janitor's family of \code{adorn_} functions. -Specify a data.frame and the one, two, or three unquoted column names you want to tabulate. Three variables generates a list of 2-way tabyls, split by the third variable. +Specify a \code{data.frame} and the one, two, or three unquoted column names you +want to tabulate. Three variables generates a list of 2-way tabyls, +split by the third variable. -Alternatively, you can tabulate a single variable that isn't in a data.frame by calling \code{tabyl} on a vector, e.g., \code{tabyl(mtcars$gear)}. +Alternatively, you can tabulate a single variable that isn't in a \code{data.frame} +by calling \code{tabyl()} on a vector, e.g., \code{tabyl(mtcars$gear)}. } \examples{ diff --git a/man/top_levels.Rd b/man/top_levels.Rd index 0484d7b2..821c02c9 100644 --- a/man/top_levels.Rd +++ b/man/top_levels.Rd @@ -8,14 +8,14 @@ other levels.} top_levels(input_vec, n = 2, show_na = FALSE) } \arguments{ -\item{input_vec}{the factor variable to tabulate.} +\item{input_vec}{The factor variable to tabulate.} -\item{n}{number of levels to include in top and bottom groups} +\item{n}{Number of levels to include in top and bottom groups} -\item{show_na}{should cases where the variable is NA be shown?} +\item{show_na}{Should cases where the variable is \code{NA} be shown?} } \value{ -a data.frame (actually a \code{tbl_df}) with the frequencies of the +A \code{data.frame} (actually a \code{tbl_df}) with the frequencies of the grouped, tabulated variable. Includes counts and percentages, and valid percentages (calculated omitting \code{NA} values, if present in the vector and \code{show_na = TRUE}.) diff --git a/man/use_first_valid_of.Rd b/man/use_first_valid_of.Rd index 2466f43e..e735e1dc 100644 --- a/man/use_first_valid_of.Rd +++ b/man/use_first_valid_of.Rd @@ -2,25 +2,25 @@ % Please edit documentation in R/janitor_deprecated.R \name{use_first_valid_of} \alias{use_first_valid_of} -\title{Returns first non-NA value from a set of vectors.} +\title{Returns first non-\code{NA} value from a set of vectors.} \usage{ use_first_valid_of(..., if_all_NA = NA) } \arguments{ \item{...}{the input vectors. Order matters: these are searched and prioritized in the order they are supplied.} -\item{if_all_NA}{what value should be used when all of the vectors return \code{NA} for a certain index? Default is NA.} +\item{if_all_NA}{what value should be used when all of the vectors return \code{NA} for a certain index? Default is \code{NA}.} } \value{ Returns a single vector with the selected values. } \description{ -At each position of the input vectors, iterates through in order and returns the first non-NA value. This is a robust replacement of the common \code{ifelse(!is.na(x), x, ifelse(!is.na(y), y, z))}. It's more readable and handles problems like \code{ifelse}'s inability to work with dates in this way. -} -\section{Warning}{ - Deprecated, do not use in new code. Use \code{dplyr::coalesce()} instead. -} +Warning: Deprecated, do not use in new code. Use \code{\link[dplyr:coalesce]{dplyr::coalesce()}} instead. +At each position of the input vectors, iterates through in order and returns the first non-NA value. +This is a robust replacement of the common \code{ifelse(!is.na(x), x, ifelse(!is.na(y), y, z))}. +It's more readable and handles problems like \code{\link[=ifelse]{ifelse()}}'s inability to work with dates in this way. +} \seealso{ janitor_deprecated } diff --git a/tests/testthat/test-adorn-totals.R b/tests/testthat/test-adorn-totals.R index b7598ebc..045460be 100644 --- a/tests/testthat/test-adorn-totals.R +++ b/tests/testthat/test-adorn-totals.R @@ -168,12 +168,12 @@ test_that("error thrown if no columns past first are numeric", { ) expect_error( adorn_totals(df2, "col"), - "at least one targeted column must be of class numeric. Control target variables with the ... argument. adorn_totals should be called before other adorn_ functions." + "at least one targeted column must be of class numeric. Control target variables with the ... argument. adorn_totals should be called before other adorn_ functions." ) expect_error( mixed %>% adorn_totals("row", "-", TRUE, "Totals", d), - "at least one targeted column must be of class numeric. Control target variables with the ... argument. adorn_totals should be called before other adorn_ functions." + "at least one targeted column must be of class numeric. Control target variables with the ... argument. adorn_totals should be called before other adorn_ functions." ) # Add a test where only the first column is numeric @@ -183,7 +183,7 @@ test_that("error thrown if no columns past first are numeric", { ) expect_error( adorn_totals(df3), - "at least one targeted column must be of class numeric. Control target variables with the ... argument. adorn_totals should be called before other adorn_ functions." + "at least one targeted column must be of class numeric. Control target variables with the ... argument. adorn_totals should be called before other adorn_ functions." ) }) diff --git a/tests/testthat/test-tabyl.R b/tests/testthat/test-tabyl.R index e60b25f7..7667e15f 100644 --- a/tests/testthat/test-tabyl.R +++ b/tests/testthat/test-tabyl.R @@ -309,38 +309,22 @@ test_that("NA levels get moved to the last column in the data.frame, are suppres y_with_missing[["NA_"]] %>% untabyl(), # column c remains numeric data.frame(c = 10, `1` = 1, `2` = 0, NA_ = 1, check.names = FALSE) ) - # If no NA in 3rd variable, it doesn't appear in split list expect_equal(length(dplyr::starwars %>% dplyr::filter(species == "Human") %>% tabyl(eye_color, skin_color, gender, show_missing_levels = TRUE)), 2) - # The starwars data set changed in dplyr v 1.0.0 so have two blocks of tests: - if (packageVersion("dplyr") > package_version("0.8.5")) { - # If there is NA, it does appear in split list - expect_equal(length(dplyr::starwars %>% - tabyl(eye_color, skin_color, gender, show_missing_levels = TRUE)), 3) - expect_equal(length(dplyr::starwars %>% - tabyl(eye_color, skin_color, gender, show_missing_levels = FALSE)), 3) - - # NA level in the list gets suppressed if show_na = FALSE. Should have one less level if NA is suppressed. - expect_equal(length(dplyr::starwars %>% - tabyl(eye_color, skin_color, gender, show_na = TRUE)), 3) - expect_equal(length(dplyr::starwars %>% - tabyl(eye_color, skin_color, gender, show_na = FALSE)), 2) - } else { - # If there is NA, it does appear in split list - expect_equal(length(dplyr::starwars %>% - tabyl(eye_color, skin_color, gender, show_missing_levels = TRUE)), 5) - expect_equal(length(dplyr::starwars %>% - tabyl(eye_color, skin_color, gender, show_missing_levels = FALSE)), 5) - - # NA level in the list gets suppressed if show_na = FALSE. Should have one less level if NA is suppressed. - expect_equal(length(dplyr::starwars %>% - tabyl(eye_color, skin_color, gender, show_na = TRUE)), 5) - expect_equal(length(dplyr::starwars %>% - tabyl(eye_color, skin_color, gender, show_na = FALSE)), 4) - } + # If there is NA, it does appear in split list + expect_equal(length(dplyr::starwars %>% + tabyl(eye_color, skin_color, gender, show_missing_levels = TRUE)), 3) + expect_equal(length(dplyr::starwars %>% + tabyl(eye_color, skin_color, gender, show_missing_levels = FALSE)), 3) + + # NA level in the list gets suppressed if show_na = FALSE. Should have one less level if NA is suppressed. + expect_equal(length(dplyr::starwars %>% + tabyl(eye_color, skin_color, gender, show_na = TRUE)), 3) + expect_equal(length(dplyr::starwars %>% + tabyl(eye_color, skin_color, gender, show_na = FALSE)), 2) }) test_that("tabyl fill 0s with show_missing_levels = FALSE", {