diff --git a/NAMESPACE b/NAMESPACE index ba9bc8a9a..a47d7d32d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -73,3 +73,7 @@ export(standardize) export(to_numeric) export(unstandardize) export(winsorize) +importFrom(stats,mad) +importFrom(stats,median) +importFrom(stats,sd) +importFrom(stats,weighted.mean) diff --git a/R/check_if_installed.R b/R/check_if_installed.R new file mode 100644 index 000000000..990cead9b --- /dev/null +++ b/R/check_if_installed.R @@ -0,0 +1,35 @@ +#' Checking if needed package is installed + +#' @param package A string naming the package, whose installation needs to be +#' checked in any of the libraries +#' @param reason A phrase describing why the package is needed. The default is a +#' generic description. +#' @param stop Logical that decides whether the function should stop if the +#' needed package is not installed. +#' @param ... Currently ignored +#' +#' @examples +#' \dontrun{ +#' check_if_installed("inexistent_package") +#' } +#' +#' +#' @export + +check_if_installed <- function(package, + reason = "for this function to work", + stop = TRUE, + ...) { + # does it need to be displayed? + is_installed <- requireNamespace(package, quietly = TRUE) + if (!is_installed) { + # prepare the message + message <- paste0( + "Package '", package, "' is required ", reason, ".\n", + "Please install it by running install.packages('", package, "')." + ) + + if (stop) stop(message, call. = FALSE) else warning(message, call. = FALSE) + } + invisible(is_installed) +} diff --git a/R/utils_standardize.R b/R/utils_standardize.R new file mode 100644 index 000000000..8d57cc63f --- /dev/null +++ b/R/utils_standardize.R @@ -0,0 +1,121 @@ + +# For standardize_parameters ---------------------------------------------- + +#' @keywords internal +.get_object <- function(x, attribute_name = "object_name") { + obj_name <- attr(x, attribute_name, exact = TRUE) + model <- NULL + if (!is.null(obj_name)) { + model <- tryCatch( + { + get(obj_name, envir = parent.frame()) + }, + error = function(e) { + NULL + } + ) + if (is.null(model) || + # prevent self reference + inherits(model, "parameters_model")) { + model <- tryCatch( + { + get(obj_name, envir = globalenv()) + }, + error = function(e) { + NULL + } + ) + } + } + model +} + + + +# For standardize_info ---------------------------------------------------- + +#' @keywords internal +#' @importFrom stats weighted.mean +.mean <- function(x, weights = NULL) { + if (!.are_weights(weights)) { + return(mean(x, na.rm = TRUE)) + } + + stopifnot(all(weights > 0, na.rm = TRUE)) + + stats::weighted.mean(x, weights, na.rm = TRUE) +} + +#' @keywords internal +#' @importFrom stats sd +.sd <- function(x, weights = NULL) { + # from cov.wt + if (!.are_weights(weights)) { + return(stats::sd(x, na.rm = TRUE)) + } + + stopifnot(all(weights > 0, na.rm = TRUE)) + + weights1 <- weights / sum(weights) + center <- sum(weights1 * x) + xc <- sqrt(weights1) * (x - center) + var <- (t(xc) %*% xc) / (1 - sum(weights1^2)) + sqrt(as.vector(var)) +} + + + +#' @keywords internal +#' @importFrom stats mad +.mad <- function(x, weights = NULL, constant = 1.4826) { + # From matrixStats + if (!.are_weights(weights)) { + return(stats::mad(x, na.rm = TRUE)) + } + + stopifnot(all(weights > 0, na.rm = TRUE)) + + center <- .median(x, weights = weights) + x <- abs(x - center) + constant * .median(x, weights = weights) +} + + + +#' @keywords internal +#' @importFrom stats median +.median <- function(x, weights = NULL) { + # From spatstat + wiki + if (!.are_weights(weights)) { + return(stats::median(x, na.rm = TRUE)) + } + + stopifnot(all(weights > 0, na.rm = TRUE)) + + oo <- order(x) + x <- x[oo] + weights <- weights[oo] + Fx <- cumsum(weights) / sum(weights) + + lefties <- which(Fx <= 0.5) + left <- max(lefties) + if (length(lefties) == 0) { + result <- x[1] + } else if (left == length(x)) { + result <- x[length(x)] + } else { + result <- x[left] + + if (!(Fx[left - 1] < 0.5 && 1 - Fx[left] < 0.5)) { + right <- left + 1 + y <- x[left] * Fx[left] + x[right] * Fx[right] + if (is.finite(y)) result <- y + } + } + + return(result) +} + +.are_weights <- function(w) { + !is.null(w) && length(w) && !all(w == 1) && !all(w == w[1]) +} diff --git a/datawizard.Rproj b/datawizard.Rproj index 57bf3f9fb..67565a589 100644 --- a/datawizard.Rproj +++ b/datawizard.Rproj @@ -1,8 +1,8 @@ Version: 1.0 -RestoreWorkspace: Default -SaveWorkspace: Default -AlwaysSaveHistory: Default +RestoreWorkspace: No +SaveWorkspace: No +AlwaysSaveHistory: No EnableCodeIndexing: Yes UseSpacesForTab: Yes @@ -19,3 +19,6 @@ BuildType: Package PackageUseDevtools: Yes PackageInstallArgs: --no-multiarch --with-keep.source PackageRoxygenize: rd,collate,namespace + +QuitChildProcessesOnExit: Yes +DisableExecuteRprofile: Yes diff --git a/tests/testthat/test-adjust.R b/tests/testthat/test-adjust.R new file mode 100644 index 000000000..0dc584db2 --- /dev/null +++ b/tests/testthat/test-adjust.R @@ -0,0 +1,21 @@ +if (require("testthat") && require("effectsize")) { + data(iris) + test_that("adjust multilevel", { + skip_if_not_installed("lme4") + adj <- adjust(iris[c("Sepal.Length", "Species")], multilevel = TRUE, bayesian = FALSE) + expect_equal( + head(adj$Sepal.Length), + c(0.08698, -0.11302, -0.31302, -0.41302, -0.01302, 0.38698), + tolerance = 1e-3 + ) + }) + + test_that("adjust", { + adj <- adjust(iris[c("Sepal.Length", "Species")], multilevel = FALSE, bayesian = FALSE) + expect_equal( + head(adj$Sepal.Length), + c(0.094, -0.106, -0.306, -0.406, -0.006, 0.394), + tolerance = 1e-3 + ) + }) +} diff --git a/tests/testthat/test-ranktransform.R b/tests/testthat/test-ranktransform.R new file mode 100644 index 000000000..faab3a516 --- /dev/null +++ b/tests/testthat/test-ranktransform.R @@ -0,0 +1,16 @@ +if (require("testthat") && require("effectsize")) { + test_that("signed rank", { + x <- c(-1, 2, -3, 4) + + sr <- ranktransform(x, sign = TRUE) + r <- ranktransform(x, sign = FALSE) + + expect_equal(sr, x) # unchanged + expect_equal(r, c(2, 3, 1, 4)) + + + x <- c(1, -2, -2, 4, 0, 3, -14, 0) + expect_warning(ranktransform(x, sign = TRUE)) + expect_true(all(is.na(suppressWarnings(ranktransform(x, sign = TRUE)[c(5, 8)])))) + }) +} diff --git a/tests/testthat/test-standardize-data.R b/tests/testthat/test-standardize-data.R new file mode 100644 index 000000000..5bbd06c4e --- /dev/null +++ b/tests/testthat/test-standardize-data.R @@ -0,0 +1,212 @@ +if (require("testthat") && require("effectsize")) { + + # standardize.numeric ----------------------------------------------------- + test_that("standardize.numeric", { + x <- standardize(seq(0, 1, length.out = 100)) + expect_equal(mean(x), 0, tolerance = 0.01) + + x <- standardize(seq(0, 1, length.out = 100), two_sd = TRUE) + expect_equal(sd(x), 0.5, tolerance = 0.01) + + x <- standardize(seq(0, 1, length.out = 100), robust = TRUE) + expect_equal(median(x), 0, tolerance = 0.01) + + x <- standardize(seq(0, 1, length.out = 100), robust = TRUE, two_sd = TRUE) + expect_equal(mad(x), 0.5, tolerance = 0.01) + + expect_message(standardize(c(0, 0, 0, 1, 1))) + + x <- standardize(c(-1, 0, 1), reference = seq(3, 4, length.out = 100)) + expect_equal(mean(x), -11.943, tolerance = 0.01) + }) + + + # standardize factor / Date ----------------------------------------------- + test_that("standardize.numeric", { + f <- factor(c("c", "a", "b")) + expect_equal(standardize(f), f) + expect_equal(standardize(f, force = TRUE), c(1, -1, 0), ignore_attr = TRUE) + + d <- as.Date(c("1989/08/06", "1989/08/04", "1989/08/05")) + expect_equal(standardize(d), d) + expect_equal(standardize(d, force = TRUE), c(1, -1, 0), ignore_attr = TRUE) + }) + + + # standardize.data.frame -------------------------------------------------- + test_that("standardize.data.frame", { + data(iris) + x <- standardize(iris) + expect_equal(mean(x$Sepal.Length), 0, tolerance = 0.01) + expect_length(levels(x$Species), 3) + expect_equal(mean(subset(x, Species == "virginica")$Sepal.Length), 0.90, tolerance = 0.01) + + x2 <- standardize(x = iris[1, ], reference = iris) + expect_true(all(x2[1, ] == x[1, ])) + + skip_if_not_installed("dplyr") + x <- standardize(dplyr::group_by(iris, Species)) + expect_equal(mean(x$Sepal.Length), 0, tolerance = 0.01) + expect_length(levels(x$Species), 3) + expect_equal(mean(subset(x, Species == "virginica")$Sepal.Length), 0, tolerance = 0.01) + + + + }) + + + test_that("standardize.data.frame, NAs", { + data(iris) + iris$Sepal.Width[c(148, 65, 33, 58, 54, 93, 114, 72, 32, 23)] <- NA + iris$Sepal.Length[c(11, 30, 141, 146, 13, 149, 6, 8, 48, 101)] <- NA + + x <- standardize(iris) + expect_equal(head(x$Sepal.Length), c(-0.9163, -1.1588, -1.4013, -1.5226, -1.0376, NA), tolerance = 0.01) + expect_equal(head(x$Sepal.Width), c(1.0237, -0.151, 0.3189, 0.0839, 1.2586, 1.9635), tolerance = 0.01) + expect_equal(mean(x$Sepal.Length), as.numeric(NA)) + + x <- standardize(iris, two_sd = TRUE) + expect_equal(head(x$Sepal.Length), c(-0.4603, -0.5811, -0.7019, -0.7623, -0.5207, NA), tolerance = 0.01) + expect_equal(head(x$Sepal.Width), c(0.5118, -0.0755, 0.1594, 0.042, 0.6293, 0.9817), tolerance = 0.01) + expect_equal(mean(x$Sepal.Length), as.numeric(NA)) + + skip_if_not_installed("dplyr") + x <- standardize(dplyr::group_by(iris, .data$Species)) + expect_equal(head(x$Sepal.Length), c(0.2547, -0.3057, -0.8661, -1.1463, -0.0255, NA), tolerance = 0.01) + expect_equal(head(x$Sepal.Width), c(0.2369, -1.0887, -0.5584, -0.8235, 0.502, 1.2974), tolerance = 0.01) + expect_equal(mean(x$Sepal.Length), as.numeric(NA)) + }) + + + test_that("standardize.data.frame, apend", { + data(iris) + iris$Sepal.Width[c(26, 43, 56, 11, 66, 132, 23, 133, 131, 28)] <- NA + iris$Sepal.Length[c(32, 12, 109, 92, 119, 49, 83, 113, 64, 30)] <- NA + + x <- standardize(iris, append = TRUE) + expect_equal(colnames(x), c( + "Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width", + "Species", "Sepal.Length_z", "Sepal.Width_z", "Petal.Length_z", + "Petal.Width_z" + )) + expect_equal(head(x$Sepal.Length_z), c(-0.8953, -1.1385, -1.3816, -1.5032, -1.0169, -0.5306), tolerance = 0.01) + expect_equal(head(x$Sepal.Width_z), c(1.04, -0.1029, 0.3543, 0.1257, 1.2685, 1.9542), tolerance = 0.01) + expect_equal(mean(x$Sepal.Length_z), as.numeric(NA)) + + x <- standardize(iris, two_sd = TRUE, append = TRUE) + expect_equal(head(x$Sepal.Length_z), c(-0.4477, -0.5692, -0.6908, -0.7516, -0.5084, -0.2653), tolerance = 0.01) + expect_equal(head(x$Sepal.Width_z), c(0.52, -0.0514, 0.1771, 0.0629, 0.6343, 0.9771), tolerance = 0.01) + expect_equal(mean(x$Sepal.Length_z), as.numeric(NA)) + + skip_if_not_installed("dplyr") + x <- standardize(dplyr::group_by(iris, .data$Species), append = TRUE) + expect_equal(head(x$Sepal.Length_z), c(0.2746, -0.2868, -0.8483, -1.129, -0.0061, 1.1168), tolerance = 0.01) + expect_equal(head(x$Sepal.Width_z), c(0.1766, -1.1051, -0.5924, -0.8487, 0.4329, 1.2019), tolerance = 0.01) + expect_equal(mean(x$Sepal.Length_z), as.numeric(NA)) + }) + + + + test_that("standardize.data.frame, weights", { + x <- rexp(30) + w <- rpois(30, 20) + 1 + + expect_equal( + sqrt(cov.wt(cbind(x, x), w)$cov[1, 1]), + attr(standardize(x, weights = w), "scale") + ) + expect_equal( + standardize(x, weights = w), + standardize(data.frame(x), weights = w)$x + ) + + # name and vector give same results + expect_equal( + standardize(mtcars, exclude = "cyl", weights = mtcars$cyl), + standardize(mtcars, weights = "cyl") + ) + + skip_if_not_installed("dplyr") + d <- dplyr::group_by(mtcars, am) + expect_warning(standardize(d, weights = d$cyl)) + }) + + + # Unstandardize ----------------------------------------------------------- + test_that("unstandardize, numeric", { + data(iris) + x <- standardize(iris$Petal.Length) + rez <- unstandardize(x) + expect_equal(rez, iris$Petal.Length, tolerance = 1e-3, ignore_attr = TRUE) + + rez <- unstandardize(x, reference = iris$Petal.Length) + expect_equal(rez, iris$Petal.Length, tolerance = 1e-3, ignore_attr = TRUE) + + rez <- unstandardize(x, center = mean(iris$Petal.Length), scale = stats::sd(iris$Petal.Length)) + expect_equal(rez, iris$Petal.Length, tolerance = 1e-3, ignore_attr = TRUE) + + rez <- unstandardize(0, center = mean(iris$Petal.Length), scale = stats::sd(iris$Petal.Length)) + expect_equal(rez, mean(iris$Petal.Length), tolerance = 1e-3) + + x <- standardize(iris$Petal.Length, robust = TRUE, two_sd = TRUE) + rez <- unstandardize(x, robust = TRUE, two_sd = TRUE) + expect_equal(rez, iris$Petal.Length, tolerance = 1e-3, ignore_attr = TRUE) + + x <- scale(iris$Petal.Length) + rez <- unstandardize(x) + expect_equal(rez, iris$Petal.Length, tolerance = 1e-3, ignore_attr = TRUE) + + x <- scale(iris$Petal.Length, center = 3, scale = 2) + rez <- unstandardize(x) + expect_equal(rez, iris$Petal.Length, tolerance = 1e-3, ignore_attr = TRUE) + }) + + test_that("unstandardize, dataframe", { + data(iris) + x <- standardize(iris) + rez <- unstandardize(x) + expect_equal(rez, iris, tolerance = 0.1, ignore_attr = TRUE) + + x <- standardize(iris, select = "Petal.Length") + rez <- unstandardize(x) + expect_equal(rez, iris, tolerance = 0.1, ignore_attr = TRUE) + + x <- standardize(iris, select = "Petal.Length") + rez <- unstandardize(x, + center = c(Petal.Length = mean(iris$Petal.Length)), + scale = c(Petal.Length = stats::sd(iris$Petal.Length)) + ) + expect_equal(rez, iris, tolerance = 0.1, ignore_attr = TRUE) + + expect_error(unstandardize(x, + center = mean(iris$Petal.Length), + scale = stats::sd(iris$Petal.Length) + )) + + x <- standardize(iris) + rez <- unstandardize(x, center = rep(0, 4), scale = rep(1, 4)) + expect_equal(rez, x, tolerance = 0.1, ignore_attr = TRUE) + + data(iris) + x <- standardize(iris, robust = TRUE, two_sd = TRUE) + rez <- unstandardize(x, robust = TRUE, two_sd = TRUE) + expect_equal(rez, iris, tolerance = 0.1, ignore_attr = TRUE) + + skip_if_not_installed("dplyr") + d <- dplyr::group_by(mtcars, am) + x <- standardize(d) + expect_error(unstandardize(x)) + }) + + test_that("unstandardize, matrix", { + data(mtcars) + d <- as.matrix(mtcars) + x <- standardize(d) + rez <- unstandardize(x) + expect_equal(rez, d, tolerance = 1e-3, ignore_attr = TRUE) + + x <- scale(d) + rez <- unstandardize(x) + expect_equal(rez, d, tolerance = 1e-3, ignore_attr = TRUE) + }) +}