From fbe6e6f31473a5939b491df37f01128f7a9698cb Mon Sep 17 00:00:00 2001
From: teofiln <teofiln@gmail.com>
Date: Wed, 15 Sep 2021 21:30:58 -0600
Subject: [PATCH] add option to omit colname prefix when only one column is
 recoded (includes a couple of tests)

---
 R/dummy_cols.R                            | 23 +++++++-
 man/dummy_cols.Rd                         |  6 +-
 man/dummy_columns.Rd                      |  6 +-
 tests/testthat/test-omit-colname-prefix.R | 70 +++++++++++++++++++++++
 4 files changed, 101 insertions(+), 4 deletions(-)
 create mode 100644 tests/testthat/test-omit-colname-prefix.R

diff --git a/R/dummy_cols.R b/R/dummy_cols.R
index b8d017f..c038e9d 100644
--- a/R/dummy_cols.R
+++ b/R/dummy_cols.R
@@ -32,6 +32,9 @@
 #' and dog dummy columns.
 #' @param remove_selected_columns
 #' If TRUE (not default), removes the columns used to generate the dummy columns.
+#' @param omit_colname_prefix
+#' If TRUE (not default) and `length(select_columns) == 1`, omit pre-pending the
+#' name of `select_columns` to the names of the newly generated dummy columns
 #'
 #' @return
 #' A data.frame (or tibble or data.table, depending on input data type) with
@@ -54,7 +57,8 @@ dummy_cols <- function(.data,
                        remove_most_frequent_dummy = FALSE,
                        ignore_na = FALSE,
                        split = NULL,
-                       remove_selected_columns = FALSE) {
+                       remove_selected_columns = FALSE,
+                       omit_colname_prefix = FALSE) {
 
   stopifnot(is.null(select_columns) || is.character(select_columns),
             select_columns != "",
@@ -177,6 +181,7 @@ dummy_cols <- function(.data,
     }
 
     data.table::alloc.col(.data, ncol(.data) + length(unique_vals))
+
     #   data.table::set(.data, j = paste0(col_name, "_", unique_vals), value = 0L)
     .data[, paste0(col_name, "_", unique_vals)] <- 0L
     for (unique_value in unique_vals) {
@@ -219,8 +224,22 @@ dummy_cols <- function(.data,
   }
 
   .data <- fix_data_type(.data, data_type)
-  return(.data)
+  if (omit_colname_prefix) {
+    if (length(select_columns) == 1) {
+
+      new_col_index <-
+        as.logical(rowSums(sapply(unique_vals, function(x)
+          grepl(paste0(select_columns, "_", x), names(.data)))))
+      names(.data)[new_col_index] <-
+        gsub(paste0(select_columns, "_"), "", names(.data)[new_col_index])
+
+    } else {
+      message("Can't omit the colname prefix when recoding more than one column.")
+      message("Returning prefixed dummy columns.")
+    }
+  }
 
+  return(.data)
 }
 
 
diff --git a/man/dummy_cols.Rd b/man/dummy_cols.Rd
index ca6f916..337d2cb 100644
--- a/man/dummy_cols.Rd
+++ b/man/dummy_cols.Rd
@@ -11,7 +11,8 @@ dummy_cols(
   remove_most_frequent_dummy = FALSE,
   ignore_na = FALSE,
   split = NULL,
-  remove_selected_columns = FALSE
+  remove_selected_columns = FALSE,
+  omit_colname_prefix = FALSE
 )
 }
 \arguments{
@@ -38,6 +39,9 @@ then a split value of "," this row would have a value of 1 for both the cat
 and dog dummy columns.}
 
 \item{remove_selected_columns}{If TRUE (not default), removes the columns used to generate the dummy columns.}
+
+\item{omit_colname_prefix}{If TRUE (not default) and `length(select_columns) == 1`, omit pre-pending the
+name of `select_columns` to the names of the newly generated dummy columns}
 }
 \value{
 A data.frame (or tibble or data.table, depending on input data type) with
diff --git a/man/dummy_columns.Rd b/man/dummy_columns.Rd
index 1236b0a..19946bd 100644
--- a/man/dummy_columns.Rd
+++ b/man/dummy_columns.Rd
@@ -11,7 +11,8 @@ dummy_columns(
   remove_most_frequent_dummy = FALSE,
   ignore_na = FALSE,
   split = NULL,
-  remove_selected_columns = FALSE
+  remove_selected_columns = FALSE,
+  omit_colname_prefix = FALSE
 )
 }
 \arguments{
@@ -38,6 +39,9 @@ then a split value of "," this row would have a value of 1 for both the cat
 and dog dummy columns.}
 
 \item{remove_selected_columns}{If TRUE (not default), removes the columns used to generate the dummy columns.}
+
+\item{omit_colname_prefix}{If TRUE (not default) and `length(select_columns) == 1`, omit pre-pending the
+name of `select_columns` to the names of the newly generated dummy columns}
 }
 \description{
 dummy_columns() quickly creates dummy (binary) columns from character and
diff --git a/tests/testthat/test-omit-colname-prefix.R b/tests/testthat/test-omit-colname-prefix.R
new file mode 100644
index 0000000..0825ad9
--- /dev/null
+++ b/tests/testthat/test-omit-colname-prefix.R
@@ -0,0 +1,70 @@
+sample_data <-
+  structure(
+    list(
+      colA = c("a", "a", "a", "b", "b", "c", "c", "c",
+               "c", "c"),
+      colB = c(1, 1, 1, 2, 2, 3, 3, 3, 3, 3),
+      colC = c(
+        "val1",
+        "val2",
+        "val3",
+        "val1",
+        "val2",
+        "val7",
+        "val2",
+        "val4",
+        "val6",
+        "val8"
+      )
+    ),
+    row.names = c(NA, -10L),
+    class = c("tbl_df", "tbl",
+              "data.frame")
+  )
+
+test_that("omit_colname_prefix works", {
+  expect_named(
+    dummy_cols(
+      sample_data,
+      c("colC"),
+      remove_selected_columns = TRUE,
+      omit_colname_prefix = TRUE
+    ),
+    c(
+      "colA",
+      "colB",
+      "val1",
+      "val2",
+      "val3",
+      "val4",
+      "val6",
+      "val7",
+      "val8"
+    )
+  )
+})
+
+test_that("omit_colname_prefix does not remove prefix when >1 select_columns",
+          {
+            expect_named(
+              dummy_cols(
+                sample_data,
+                c("colB", "colC"),
+                remove_selected_columns = TRUE,
+                omit_colname_prefix = TRUE
+              ),
+              c(
+                "colA",
+                "colB_1",
+                "colB_2",
+                "colB_3",
+                "colC_val1",
+                "colC_val2",
+                "colC_val3",
+                "colC_val4",
+                "colC_val6",
+                "colC_val7",
+                "colC_val8"
+              )
+            )
+          })