From 9d372e5cb2d0d13726bb6a7853cfc14173007190 Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 4 Mar 2022 09:32:27 +0100 Subject: [PATCH] fix data_to_numeric for factors with NA --- R/center.R | 3 +- R/convert_data_to_numeric.R | 48 +++++++++++++++-- data/efc.RData | Bin 940 -> 949 bytes tests/testthat/test-convert_data_to_numeric.R | 49 ++++++++++++++++++ tests/testthat/test-labelled_data.R | 20 +++++++ 5 files changed, 116 insertions(+), 4 deletions(-) diff --git a/R/center.R b/R/center.R index 7b1e878ea..f507e165a 100644 --- a/R/center.R +++ b/R/center.R @@ -105,7 +105,8 @@ center.numeric <- function(x, attr(centered_x, "center") <- args$center attr(centered_x, "scale") <- args$scale attr(centered_x, "robust") <- robust - centered_x + # labels + .set_back_labels(centered_x, x, include_values = FALSE) } diff --git a/R/convert_data_to_numeric.R b/R/convert_data_to_numeric.R index 204ca4645..bf7fa0571 100644 --- a/R/convert_data_to_numeric.R +++ b/R/convert_data_to_numeric.R @@ -28,13 +28,27 @@ data_to_numeric <- convert_data_to_numeric #' @export convert_data_to_numeric.data.frame <- function(x, dummy_factors = TRUE, ...) { out <- sapply(x, convert_data_to_numeric, dummy_factors = dummy_factors, simplify = FALSE) - as.data.frame(do.call(cbind, out)) + # save variable attributes + attr_vars <- lapply(out, attributes) + # "out" is currently a list, bind columns and to data frame + out <- as.data.frame(do.call(cbind, out)) + # set back attributes + for (i in colnames(out)) { + if (is.list(attr_vars[[i]])) { + if (is.list(attributes(out[[i]]))) { + attributes(out[[i]]) <- utils::modifyList(attr_vars[[i]], attributes(out[[i]])) + } else { + attributes(out[[i]]) <- attr_vars[[i]] + } + } + } + out } #' @export convert_data_to_numeric.numeric <- function(x, ...) { - as.numeric(x) + .set_back_labels(as.numeric(x), x) } #' @export @@ -49,9 +63,37 @@ convert_data_to_numeric.factor <- function(x, dummy_factors = TRUE, ...) { if (dummy_factors) { out <- as.data.frame(stats::model.matrix(~x, contrasts.arg = list(x = "contr.treatment"))) out[1] <- as.numeric(rowSums(out[2:ncol(out)]) == 0) + + # insert back NA rows. if "x" had missing values, model.matrix() creates an + # array with only non-missing values, so some rows are missing. First, we + # need to now which rows are missing (na_values) and the length of the + # original vector (which will be the number of rows in the final data frame) + + na_values <- which(is.na(x)) + rows_x <- length(x) + + if (any(na_values)) { + # iterate all missing values that have + for (i in 1:length(na_values)) { + # if the first observation was missing, add NA row and bind data frame + if (i == 1 && na_values[i] == 1) { + out <- rbind(NA, out) + } else { + # if the last observation was NA, add NA row to data frame + if (na_values[i] == rows_x) { + out <- rbind(out, NA) + } else { + # else, pick rows from beginning to current NA value, add NA, + # and rbind the remaining rows + out <- rbind(out[1:(na_values[i] - 1), ], NA, out[na_values[i]:nrow(out), ]) + } + } + } + rownames(out) <- NULL + } names(out) <- levels(x) } else { - out <- as.numeric(x) + out <- .set_back_labels(as.numeric(x), x) } out } diff --git a/data/efc.RData b/data/efc.RData index d54bd881978305ac2074f81f8444b6b24cb281f6..ca7c18c94cb07b3067121e794ebeba3df91d1c43 100644 GIT binary patch delta 934 zcmV;X16lm62ek)~9eAH{>v_%fFPC)}|#IqGrqgCvrF; zwPqz&%Q-Fhl`9t^ehl%?mH$8WuPjK;dQ~&TAEduc{9VHDgnwIrn}i<-iy%KHe;n_K z=*ne?`@}D3QN1kCtAvY$@4zqnBYH5;qbUCdZUenTxTetG2YM6eJwm%2;(@{s+Cl!Y z_!H>n!chr*9*Ap?5-vPDrj5(F&;~- z%E#d6g8bg{7(>WUYG{mlIM#tqD`M&pCrM6m&nP_uVvOU9b@hbyyjBtYq4^UJia?wN zMG(B#G;vdT9u#=6&Vyp;!RS-rq;C{)hH#D;fquFVF&;H#O`iCL+3|=q@3xxthSxeb zQL@nP%6|g3JTXEx@5(7SQAtpMBYR?=dxDkibF6f`Ww|bxP1Y>i>pI#khtjy4*WhbP zrI!J}8HFC`$Lht|1OL8b+3mp2Vt=Ss0Cp6EJU?x9d!xLD(wEJSC!DsZ zHsv;=R(sZvThPed_*|pz=erZ%&G@dcOvv(e!Yt@zL7(qTd}n(NbPo6{fXvSWpTF%* z!ZnbWLL9tZ%#Ufl8!Umm2z;Io&zs|NUOAxG09n2cd9i+WwBA6@@zF2m$M0oJ;h*nN zynmi}*W-A&PWWCpZXWD$UD@90%FJE~Vj_b(oZ zy6mSjVK&<*@<=s%O|M~=%%&&W{+W9Xy*)v8Q+B@K-xK*b7WoZV{2oxnf;`TN%Gmi* z!Is|(Y}yg^WBa%S$NaR@?Pd9+*8f!1FCXMzzu*5GDo$eRGgZs8vNcDVXtWvp2b;y) ImPZl*0L3=yVE_OC delta 925 zcmV;O17iHO2doE>9e)#%($>x@i0eRM!Vva~V~9)B+cvB(l14i}%^$K)`z8LQ{Q|q2 z;YUH+xCv^Fi#5H(x=Ka#@< zsWmIHTFz;~uUxqd@g1SiMv?WZW{6)&Z-@AMgkK1^0k;U>5`Pv!egbiiiLP9QctHFD zpHTY$JIKN{BpK%B1UBY#R{sGWiK<^XU4{y$%h#@S3bPo59gA;;nR;(dBUc`PXAyEV#( z7Mf^U715M~xql(%s&y`0t1is;S*s#kvr{u0oin%V4{Xa7=BZ@%g!nFXuLq4s?vvuH zKX;R>^i_DRNDZXV=5g0NZ|hejkHfI>^u9LQYqO$R74D0! zSr=_NfcBgW+OtbNreiMH%x$YBx)eYsX1ZlH1-q+>fPV_w)_;tL607nthMsiHV+JMxBOjN^-S^@R4kRwMeCw3q*QPz2&UD1u;L)5J~Xc~IcN z1`mo@7Se%hMxlp` z8K)O#5B$4+zE2t@flL z-z$;1@pp~7pYKk5H{-j)G9k-12(zG<1%19V@ttiA=p67@0GXc$KA-JP!gY|BLLBTa z=EpSO4VFM&1U}D)=go0BuN=_pfGppDyjVXQtv8T!eDurt@xE*;{PP`(*Awr091qtC z_kV@s=D{AvY*a{>9kJe z8Nuv1Uc)Szjwf3FA>tZ(dxGqy?0mt$C-V71 str() + # factor + expect_equal( + attr(x$e42dep_new, "label", exact = TRUE), + attr(efc$e42dep, "label", exact = TRUE), + ignore_attr = TRUE + ) + # numeric + expect_equal( + attr(x$c12hour_new, "label", exact = TRUE), + attr(efc$c12hour, "label", exact = TRUE), + ignore_attr = TRUE + ) +})