diff --git a/R/center.R b/R/center.R index 7b1e878ea..f507e165a 100644 --- a/R/center.R +++ b/R/center.R @@ -105,7 +105,8 @@ center.numeric <- function(x, attr(centered_x, "center") <- args$center attr(centered_x, "scale") <- args$scale attr(centered_x, "robust") <- robust - centered_x + # labels + .set_back_labels(centered_x, x, include_values = FALSE) } diff --git a/R/convert_data_to_numeric.R b/R/convert_data_to_numeric.R index 204ca4645..bf7fa0571 100644 --- a/R/convert_data_to_numeric.R +++ b/R/convert_data_to_numeric.R @@ -28,13 +28,27 @@ data_to_numeric <- convert_data_to_numeric #' @export convert_data_to_numeric.data.frame <- function(x, dummy_factors = TRUE, ...) { out <- sapply(x, convert_data_to_numeric, dummy_factors = dummy_factors, simplify = FALSE) - as.data.frame(do.call(cbind, out)) + # save variable attributes + attr_vars <- lapply(out, attributes) + # "out" is currently a list, bind columns and to data frame + out <- as.data.frame(do.call(cbind, out)) + # set back attributes + for (i in colnames(out)) { + if (is.list(attr_vars[[i]])) { + if (is.list(attributes(out[[i]]))) { + attributes(out[[i]]) <- utils::modifyList(attr_vars[[i]], attributes(out[[i]])) + } else { + attributes(out[[i]]) <- attr_vars[[i]] + } + } + } + out } #' @export convert_data_to_numeric.numeric <- function(x, ...) { - as.numeric(x) + .set_back_labels(as.numeric(x), x) } #' @export @@ -49,9 +63,37 @@ convert_data_to_numeric.factor <- function(x, dummy_factors = TRUE, ...) { if (dummy_factors) { out <- as.data.frame(stats::model.matrix(~x, contrasts.arg = list(x = "contr.treatment"))) out[1] <- as.numeric(rowSums(out[2:ncol(out)]) == 0) + + # insert back NA rows. if "x" had missing values, model.matrix() creates an + # array with only non-missing values, so some rows are missing. First, we + # need to now which rows are missing (na_values) and the length of the + # original vector (which will be the number of rows in the final data frame) + + na_values <- which(is.na(x)) + rows_x <- length(x) + + if (any(na_values)) { + # iterate all missing values that have + for (i in 1:length(na_values)) { + # if the first observation was missing, add NA row and bind data frame + if (i == 1 && na_values[i] == 1) { + out <- rbind(NA, out) + } else { + # if the last observation was NA, add NA row to data frame + if (na_values[i] == rows_x) { + out <- rbind(out, NA) + } else { + # else, pick rows from beginning to current NA value, add NA, + # and rbind the remaining rows + out <- rbind(out[1:(na_values[i] - 1), ], NA, out[na_values[i]:nrow(out), ]) + } + } + } + rownames(out) <- NULL + } names(out) <- levels(x) } else { - out <- as.numeric(x) + out <- .set_back_labels(as.numeric(x), x) } out } diff --git a/data/efc.RData b/data/efc.RData index d54bd8819..ca7c18c94 100644 Binary files a/data/efc.RData and b/data/efc.RData differ diff --git a/tests/testthat/test-convert_data_to_numeric.R b/tests/testthat/test-convert_data_to_numeric.R index da7fbf384..8ae4da4e3 100644 --- a/tests/testthat/test-convert_data_to_numeric.R +++ b/tests/testthat/test-convert_data_to_numeric.R @@ -33,3 +33,52 @@ test_that("convert factor to numeric, dummy factors", { ignore_attr = TRUE ) }) + +test_that("convert factor to numeric, dummy factors, with NA", { + x1 <- factor(rep(c("a", "b"), 3)) + x2 <- factor(c("a", NA_character_, "a", "b", "a", "b")) + x3 <- factor(c(NA_character_, "b", "a", "b", "a", "b")) + x4 <- factor(c("a", "b", "a", "b", "a", NA_character_)) + x5 <- factor(c(NA_character_, "b", "a", "b", "a", NA_character_)) + x6 <- factor(c(NA_character_, "b", NA_character_, "b", "a", NA_character_)) + x7 <- factor(c(NA_character_, "b", "a", "b", "a", "b", NA_character_, "b", "a", NA_character_, "a", "b", "a", "b", "a", NA_character_)) + + # same observations are missing + expect_equal( + which(!complete.cases(convert_data_to_numeric(x1, dummy_factors = TRUE))), + which(is.na(x1)) + ) + expect_equal( + which(!complete.cases(convert_data_to_numeric(x2, dummy_factors = TRUE))), + which(is.na(x2)) + ) + expect_equal( + which(!complete.cases(convert_data_to_numeric(x3, dummy_factors = TRUE))), + which(is.na(x3)) + ) + expect_equal( + which(!complete.cases(convert_data_to_numeric(x4, dummy_factors = TRUE))), + which(is.na(x4)) + ) + expect_equal( + which(!complete.cases(convert_data_to_numeric(x5, dummy_factors = TRUE))), + which(is.na(x5)) + ) + expect_equal( + which(!complete.cases(convert_data_to_numeric(x6, dummy_factors = TRUE))), + which(is.na(x6)) + ) + expect_equal( + which(!complete.cases(convert_data_to_numeric(x7, dummy_factors = TRUE))), + which(is.na(x7)) + ) + + # output has same number of observation as input + expect_equal(nrow(convert_data_to_numeric(x1, dummy_factors = TRUE)), length(x1)) + expect_equal(nrow(convert_data_to_numeric(x2, dummy_factors = TRUE)), length(x2)) + expect_equal(nrow(convert_data_to_numeric(x3, dummy_factors = TRUE)), length(x3)) + expect_equal(nrow(convert_data_to_numeric(x4, dummy_factors = TRUE)), length(x4)) + expect_equal(nrow(convert_data_to_numeric(x5, dummy_factors = TRUE)), length(x5)) + expect_equal(nrow(convert_data_to_numeric(x6, dummy_factors = TRUE)), length(x6)) + expect_equal(nrow(convert_data_to_numeric(x7, dummy_factors = TRUE)), length(x7)) +}) diff --git a/tests/testthat/test-labelled_data.R b/tests/testthat/test-labelled_data.R index 09a859046..5fe42a66a 100644 --- a/tests/testthat/test-labelled_data.R +++ b/tests/testthat/test-labelled_data.R @@ -176,3 +176,23 @@ test_that("data_addsuffix, labels preserved", { ignore_attr = TRUE ) }) + + + +# data_to_numeric ----------------------------------- + +test_that("data_to_numeric, labels preserved", { + x <- data_to_numeric(efc) |> str() + # factor + expect_equal( + attr(x$e42dep_new, "label", exact = TRUE), + attr(efc$e42dep, "label", exact = TRUE), + ignore_attr = TRUE + ) + # numeric + expect_equal( + attr(x$c12hour_new, "label", exact = TRUE), + attr(efc$c12hour, "label", exact = TRUE), + ignore_attr = TRUE + ) +})