allow missing values using na.rm argument

Kss2k · Nov 22, 2024 · 92c4d9a · 92c4d9a
1 parent bd14a0f
commit 92c4d9a
Show file tree

Hide file tree

Showing 7 changed files with 69 additions and 27 deletions.
diff --git a/R/modsem.R b/R/modsem.R
@@ -58,7 +58,7 @@
 #' summary(est1_ca)
 #'
 #' # LMS approach
-#' est1_lms <- modsem(m1, oneInt, method = "lms")
+#' est1_lms <- modsem(m1, oneInt, method = "lms", EFIM.S=1000)
 #' summary(est1_lms)
 #'
 #' # QML approach

diff --git a/R/modsem_da.R b/R/modsem_da.R
@@ -154,7 +154,7 @@
 #' "
 #'
 #' # LMS Approach
-#' estTpb <- modsem_da(tpb, data = TPB, method = lms)
+#' estTpb <- modsem_da(tpb, data = TPB, method = lms, EFIM.S = 1000)
 #' summary(estTpb)
 #' }
 modsem_da <- function(model.syntax = NULL,

diff --git a/R/modsem_pi.R b/R/modsem_pi.R
@@ -45,6 +45,10 @@
 #'
 #' @param run should the model be run via \code{lavaan}, if \code{FALSE} only modified syntax and data is returned
 #'
+#' @param na.rm should missing values be removed (case-wise)? Default is \code{NULL}, which has the same effect as \code{TRUE}
+#' but will generate a warning if missing values are present. If \code{TRUE}, missing values are removed without any warning. 
+#' If \code{FALSE} they are not removed.
+#'
 #' @param suppress.warnings.lavaan should warnings from \code{lavaan} be suppressed?
 #' @param suppress.warnings.match should warnings from \code{match} be suppressed?
 #'
@@ -136,6 +140,7 @@ modsem_pi <- function(model.syntax = NULL,
                       estimator = "ML",
                       group = NULL,
                       run = TRUE,
+                      na.rm = NULL,
                       suppress.warnings.lavaan = FALSE,
                       suppress.warnings.match = FALSE,
                       ...) {
@@ -164,8 +169,9 @@ modsem_pi <- function(model.syntax = NULL,
   # Data Processing  -----------------------------------------------------------
   data          <- data[c(modelSpec$oVs, group)]
   completeCases <- stats::complete.cases(data)
-  if (any(!completeCases)) {
-    warning2("Removing missing values case-wise.")
+
+  if (any(!completeCases) && (is.null(na.rm) || na.rm)) {
+    warnif(is.null(na.rm), "Removing missing values case-wise.")
     data <- data[completeCases, ]
   }
 
@@ -174,7 +180,7 @@ modsem_pi <- function(model.syntax = NULL,
   }
 
   if (center.data || method %in% auto.center) {
-    data <- lapplyDf(data, FUN = function(x) x - mean(x))
+    data <- lapplyDf(data, FUN = function(x) x - mean(x, na.rm = TRUE))
   }
 
   prodInds <-
@@ -243,7 +249,7 @@ createProdInds <- function(modelSpec,
 
   if (center.after) {
     indProds <- lapply(indProds, FUN = function(df)
-                       lapplyDf(df, FUN = function(x) x - mean(x)))
+                       lapplyDf(df, FUN = function(x) x - mean(x, na.rm = TRUE)))
 
   }
 
@@ -252,22 +258,19 @@ createProdInds <- function(modelSpec,
 
 
 createIndProds <- function(relDf, indNames, data, centered = FALSE) {
-  # Getting the indProd names
   varnames <- unname(colnames(relDf))
-  # Selecting the inds from the dataset
-  inds <- data[indNames]
-  # Check if inds are numeric
+  inds      <- data[indNames]
   isNumeric <- sapply(inds, is.numeric)
 
   stopif(any(!isNumeric), "Expected inds to be numeric when creating prods")
 
-  # Centering them
-  if (centered) inds <- lapplyDf(inds, FUN = function(x) x - mean(x))
+  if (centered) {
+    inds <- lapplyDf(inds, FUN = function(x) x - mean(x, na.rm = TRUE))
+  }
 
-  prods <- lapplyNamed(varnames,
-                FUN = function(varname, data, relDf)
-                  multiplyIndicatorsCpp(data[relDf[[varname]]]),
-                data = inds, relDf = relDf, names = varnames)
+  prods <- lapplyNamed(varnames, FUN = function(varname, data, relDf)
+                       multiplyIndicatorsCpp(data[relDf[[varname]]]),
+                       data = inds, relDf = relDf, names = varnames)
 
   # return as data.frame()
   structure(prods, row.names = seq_len(nrow(data)),
@@ -284,14 +287,14 @@ calculateResidualsDf <- function(dependentDf, independentNames, data) {
   # Getting formula
   formula <- getResidualsFormula(dependentNames, independentNames)
 
-  if (length(dependentNames <= 1)) {
-    res <- as.data.frame(stats::residuals(stats::lm(formula = formula,
-                                                    combinedData)))
-    colnames(res) <- dependentNames
-    return(res)
-  }
+  resNoNA <- as.data.frame(stats::residuals(stats::lm(formula = formula,
+                                                      combinedData)))
+  colnames(resNoNA) <- dependentNames
+
+  resNA <- dependentDf 
+  resNA[stats::complete.cases(data), ] <- resNoNA
 
-  stats::residuals(stats::lm(formula = formula, combinedData))
+  resNA
 }
 
 
@@ -381,14 +384,14 @@ getParTableRestrictedMean <- function(prodName, elementsInProdName,
 }
 
 
-multiplyInds <- function(df) {
+multiplyIndicators <- function(df) {
   if (is.null(df)) return(NULL)
   if (ncol(df) <= 1) return(df[[1]])
 
   y <- cbind.data.frame(df[[1]] * df[[2]],
                         df[,-(1:2),drop = FALSE])
 
-  multiplyInds(y)
+  multiplyIndicators(y)
 }
 
 

diff --git a/man/modsem.Rd b/man/modsem.Rd
diff --git a/man/modsem_da.Rd b/man/modsem_da.Rd
diff --git a/man/modsem_pi.Rd b/man/modsem_pi.Rd
diff --git a/tests/testthat/test_missing.R b/tests/testthat/test_missing.R
@@ -0,0 +1,34 @@
+devtools::load_all()
+m1 <- '
+  # Outer Model
+  X =~ x1 + x2 +x3
+  Y =~ y1 + y2 + y3
+  Z =~ z1 + z2 + z3
+
+  # Inner model
+  Y ~ X + Z + X:Z
+'
+
+oneInt2 <- oneInt
+oneInt2[c(176, 176, 258, 1900),
+        c(1, 2, 3, 7)] <- NA
+
+# Double centering approach
+testthat::expect_warning(modsem(m1, oneInt2),
+                         regex = "Removing missing values case-wise")
+
+est <- modsem(m1, oneInt2, na.rm=TRUE)
+testthat::expect_true(!any(is.na(est$data)))
+
+est <- modsem(m1, oneInt2, na.rm=FALSE)
+testthat::expect_true(any(is.na(est$data)))
+
+# Residual Centering Approach
+testthat::expect_warning(modsem(m1, oneInt2, method = "rca"),
+                         regex = "Removing missing values case-wise")
+
+est <- modsem(m1, oneInt2, method = "rca", na.rm=TRUE)
+testthat::expect_true(!any(is.na(est$data)))
+
+est <- modsem(m1, oneInt2, method = "rca", na.rm=FALSE)
+testthat::expect_true(any(is.na(est$data)))