Skip to content

Commit

Permalink
Merge pull request #27 from discindo/master
Browse files Browse the repository at this point in the history
Add option to omit colname prefix when only one column is recoded
  • Loading branch information
jacobkap authored Oct 3, 2021
2 parents 2c2f03d + fbe6e6f commit 4c9902b
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 4 deletions.
23 changes: 21 additions & 2 deletions R/dummy_cols.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@
#' and dog dummy columns.
#' @param remove_selected_columns
#' If TRUE (not default), removes the columns used to generate the dummy columns.
#' @param omit_colname_prefix
#' If TRUE (not default) and `length(select_columns) == 1`, omit pre-pending the
#' name of `select_columns` to the names of the newly generated dummy columns
#'
#' @return
#' A data.frame (or tibble or data.table, depending on input data type) with
Expand All @@ -54,7 +57,8 @@ dummy_cols <- function(.data,
remove_most_frequent_dummy = FALSE,
ignore_na = FALSE,
split = NULL,
remove_selected_columns = FALSE) {
remove_selected_columns = FALSE,
omit_colname_prefix = FALSE) {

stopifnot(is.null(select_columns) || is.character(select_columns),
select_columns != "",
Expand Down Expand Up @@ -177,6 +181,7 @@ dummy_cols <- function(.data,
}

data.table::alloc.col(.data, ncol(.data) + length(unique_vals))

# data.table::set(.data, j = paste0(col_name, "_", unique_vals), value = 0L)
.data[, paste0(col_name, "_", unique_vals)] <- 0L
for (unique_value in unique_vals) {
Expand Down Expand Up @@ -219,8 +224,22 @@ dummy_cols <- function(.data,
}

.data <- fix_data_type(.data, data_type)
return(.data)
if (omit_colname_prefix) {
if (length(select_columns) == 1) {

new_col_index <-
as.logical(rowSums(sapply(unique_vals, function(x)
grepl(paste0(select_columns, "_", x), names(.data)))))
names(.data)[new_col_index] <-
gsub(paste0(select_columns, "_"), "", names(.data)[new_col_index])

} else {
message("Can't omit the colname prefix when recoding more than one column.")
message("Returning prefixed dummy columns.")
}
}

return(.data)
}


Expand Down
6 changes: 5 additions & 1 deletion man/dummy_cols.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion man/dummy_columns.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

70 changes: 70 additions & 0 deletions tests/testthat/test-omit-colname-prefix.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
sample_data <-
structure(
list(
colA = c("a", "a", "a", "b", "b", "c", "c", "c",
"c", "c"),
colB = c(1, 1, 1, 2, 2, 3, 3, 3, 3, 3),
colC = c(
"val1",
"val2",
"val3",
"val1",
"val2",
"val7",
"val2",
"val4",
"val6",
"val8"
)
),
row.names = c(NA, -10L),
class = c("tbl_df", "tbl",
"data.frame")
)

test_that("omit_colname_prefix works", {
expect_named(
dummy_cols(
sample_data,
c("colC"),
remove_selected_columns = TRUE,
omit_colname_prefix = TRUE
),
c(
"colA",
"colB",
"val1",
"val2",
"val3",
"val4",
"val6",
"val7",
"val8"
)
)
})

test_that("omit_colname_prefix does not remove prefix when >1 select_columns",
{
expect_named(
dummy_cols(
sample_data,
c("colB", "colC"),
remove_selected_columns = TRUE,
omit_colname_prefix = TRUE
),
c(
"colA",
"colB_1",
"colB_2",
"colB_3",
"colC_val1",
"colC_val2",
"colC_val3",
"colC_val4",
"colC_val6",
"colC_val7",
"colC_val8"
)
)
})

0 comments on commit 4c9902b

Please sign in to comment.