Skip to content

Commit

Permalink
fixed bug and added example dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
osorensen committed Apr 3, 2024
1 parent e3e2e80 commit fc5ca84
Show file tree
Hide file tree
Showing 19 changed files with 332 additions and 68 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ LinkingTo:
RcppEigen
LazyData: true
Roxygen: list(markdown = TRUE, roclets = c ("namespace", "rd", "srr::srr_stats_roclet"))
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
Suggests:
covr,
gamm4,
Expand Down
27 changes: 27 additions & 0 deletions R/data.R
Original file line number Diff line number Diff line change
Expand Up @@ -200,3 +200,30 @@
#' @family datasets
#'
"latent_covariates_long"

#' Simulated Dataset with Lifespan Trajectories of Three Cognitive Domains
#'
#' This dataset is simulated based on the data used in Section 4.1 of
#' \insertCite{sorensenLongitudinalModelingAgeDependent2023;textual}{galamm}.
#'
#' @format ## `lifespan` A data frame with 54,457 rows and 7 columns:
#' \describe{
#' \item{id}{Subject ID.}
#' \item{domain}{Cognitive domain being measured. One of \code{"epmem"} for
#' episodic memory, \code{"wmem"} for working memory and \code{"execfun"} for
#' executive function/speed.}
#' \item{age}{Age of participant at the timepoint.}
#' \item{test}{The particular test at this observation.}
#' \item{y}{Response. For \code{"epmem"} and \code{"wmem"} this is the number
#' of successes in 16 trials. For \code{"execfun"} it is the time in seconds
#' to complete the task.}
#' \item{retest}{Integer indicating whether the participant has taken the test
#' at a previous timepoint.}
#' \item{domainepmem}{Dummy variable for \code{domain=="epmem"}.}
#' \item{domainwmem}{Dummy variable for \code{domain=="wmem"}.}
#' \item{domainexecfun}{Dummy variable for \code{domain=="execfun"}.}
#' }
#'
#' @family datasets
#' @references \insertAllCited{}
"lifespan"
1 change: 1 addition & 0 deletions R/galamm.R
Original file line number Diff line number Diff line change
Expand Up @@ -593,6 +593,7 @@ galamm <- function(formula, weights = NULL, data, family = gaussian,
logical(1)
)),
family = family_list,
family_mapping = family_mapping,
factor_interactions = factor_interactions,
fit = fit,
fit_population = fit_population,
Expand Down
19 changes: 9 additions & 10 deletions R/gamm4-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,8 @@ gamm4.wrapup <- function(gobj, ret, final_model) {
model = gobj$mf,
smooth = gobj$G$smooth,
nsdf = gobj$G$nsdf,
family = ret$model$family[[1]],
family = ret$model$family,
family_mapping = ret$model$family_mapping,
df.null = nrow(gobj$G$X),
y = ret$model$response,
terms = gobj$gam.terms,
Expand All @@ -256,7 +257,8 @@ gamm4.wrapup <- function(gobj, ret, final_model) {
if (length(pvars) > 0) stats::reformulate(pvars) else NULL
sn <- names(gobj$G$random)

if (object$family$family == "gaussian" && object$family$link == "identity") {
if (length(object$family) == 1 &&
object$family[[1]]$family == "gaussian" && object$family[[1]]$link == "identity") {
linear <- TRUE
} else {
linear <- FALSE
Expand Down Expand Up @@ -356,7 +358,6 @@ gamm4.wrapup <- function(gobj, ret, final_model) {
V <- Matrix::Diagonal(length(final_model$V), scale / final_model$V)
}


if (nrow(Zt) > 0) V <- V + Matrix::crossprod(root.phi %*% Zt) * scale

R <- Matrix::chol(V, pivot = FALSE)
Expand All @@ -365,7 +366,6 @@ gamm4.wrapup <- function(gobj, ret, final_model) {
gobj$G$Xf <- methods::as(gobj$G$Xf, "dgCMatrix")
Xfp <- methods::as(Xfp, "dgCMatrix")


WX <- methods::as(Matrix::solve(Matrix::t(R), Xfp), "matrix")
XVX <- methods::as(Matrix::solve(Matrix::t(R), gobj$G$Xf), "matrix")

Expand All @@ -374,7 +374,6 @@ gamm4.wrapup <- function(gobj, ret, final_model) {
object$R[, qrz$pivot] <- object$R

XVX <- crossprod(object$R) ## X'V^{-1}X original parameterization

object$sp <- sp

colx <- ncol(gobj$G$Xf)
Expand Down Expand Up @@ -406,7 +405,6 @@ gamm4.wrapup <- function(gobj, ret, final_model) {
Vb <- Vb %*% Matrix::t(Vb)

object$edf <- rowSums(Vb * t(XVX))

object$df.residual <- length(object$y) - sum(object$edf)

object$sig2 <- scale
Expand All @@ -417,9 +415,7 @@ gamm4.wrapup <- function(gobj, ret, final_model) {
}

object$Vp <- methods::as(Vb, "matrix")

object$Ve <- methods::as(Vb %*% XVX %*% Vb, "matrix")

class(object) <- "gam"

if (!is.null(gobj$G$P)) {
Expand All @@ -428,11 +424,12 @@ gamm4.wrapup <- function(gobj, ret, final_model) {
object$Ve <- gobj$G$P %*% object$Ve %*% t(gobj$G$P)
}

object$linear.predictors <- object$family$linkfun(fitted(ret))
object$fitted.values <- fitted(ret)
object$linear.predictors <- vapply(object$family_mapping, function(i) {
object$family[[i]]$linkfun(object$fitted.values[[i]])
}, numeric(1))

object$residuals <- residuals(ret$mer)

term.names <- colnames(gobj$G$X)[seq_len(gobj$G$nsdf)]
n.smooth <- length(gobj$G$smooth)

Expand All @@ -452,6 +449,8 @@ gamm4.wrapup <- function(gobj, ret, final_model) {
names(object$edf) <- term.names
names(object$sp) <- names(gobj$G$sp)
object$gcv.ubre <- deviance(ret$mer)
object$family <- object$family[[1]]
object$family_mapping <- NULL

object
}
127 changes: 127 additions & 0 deletions data-raw/lifespan.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
# Simulate lifespan data from Section 4.1 in Sørensen et al. (2023)
library(tidyverse)
library(mvtnorm)
set.seed(3)
trajectories <- readRDS("data-raw/trajectories.rds")
epmem_fun <- approxfun(
trajectories$age,
trajectories$epmem_trajectory)
wmem_fun <- approxfun(
trajectories$age, trajectories$wmem_trajectory)
execfun_fun <- approxfun(
trajectories$age, trajectories$execfun_trajectory)

n_ids <- 1000
Psi3 <- matrix(c(.08, .05, .05,
.05, .12, .06,
.05, .06, .24), ncol = 3)
Psi2 <- diag(c(.06, .8, .18))

domains <- c("epmem", "wmem", "execfun")
epmem_tests <- c("CVLTA1", "CVLTA2", "CVLTA3", "CVLTA4", "CVLTA5",
"CVLT5min", "CVLT30min")
wmem_tests <- c("DspanBwd", "DspanFwd")
execfun_tests <- c("Stroop1", "Stroop2", "Stroop3", "Stroop4")

item_bias <- c(
CVLTA1 = -.3,
CVLTA2 = 0.7,
CVLTA3 = 1.5,
CVLTA4 = 1.8,
CVLTA5 = 2.2,
CVLT5min = 1.6,
CVLT30min = 1.8,
DspanBwd = -0.4,
DspanFwd = 0.3,
Stroop1 = 32,
Stroop2 = 23,
Stroop3 = 60,
Stroop4 = 65
)
loadings <- c(1, 1.8, 2.4, 2.8, 3, 3, 3.2, 1, 1, 7, 4, 20, 20)
names(loadings) <- names(item_bias)

lifespan <- tibble(
id = seq_len(n_ids),
age_at_baseline = runif(n_ids, min = min(trajectories$age),
max = max(trajectories$age) - 10),
timepoints = sample(6, n_ids, replace = TRUE, prob = c(.2, .4, .2, .1, .1, 1))
) %>%
mutate(domain = list(domains)) %>%
unnest(cols = "domain") %>%
mutate(domain = factor(domain, levels = domains)) %>%
nest_by(id, .keep = TRUE) %>%
pmap_dfr(function(id, data) {
data$zeta3 <- as.numeric(rmvnorm(1, sigma = Psi3))
data
}) %>%
mutate(
timepoint = map(timepoints, ~ seq_len(.x))
) %>%
select(-timepoints) %>%
unnest(cols = timepoint) %>%
group_by(id, domain) %>%
mutate(
time_since_baseline = cumsum(c(0, runif(n(), min = .5, max = 2.5)[-1])),
age = age_at_baseline + time_since_baseline
) %>%
ungroup() %>%
filter(between(age, min(trajectories$age), max(trajectories$age))) %>%
select(-age_at_baseline, -time_since_baseline) %>%
nest_by(id, timepoint, .keep = TRUE) %>%
pmap_dfr(function(id, timepoint, data) {
data$zeta2 <- as.numeric(rmvnorm(1, sigma = Psi2))
data
}) %>%
mutate(
eta = case_when(
domain == "epmem" ~ epmem_fun(age),
domain == "wmem" ~ wmem_fun(age),
domain == "execfun" ~ -execfun_fun(age),
TRUE ~ NA_real_
) + zeta2 + zeta3,
test = case_when(
domain == "epmem" ~ list(epmem_tests),
domain == "wmem" ~ list(wmem_tests),
domain == "execfun" ~ list(execfun_tests),
TRUE ~ list(NA)
)
) %>%
unnest(cols = test) %>%
mutate(
retest_effect = case_when(
domain == "epmem" ~ .05 * timepoint > 1,
domain == "wmem" ~ .05 * timepoint > 1,
domain == "execfun" ~ -2
),
item_bias = item_bias[test],
lambda = loadings[test],
nu = retest_effect + item_bias + eta * lambda
) %>%
select(-zeta2, -zeta3, -eta, -retest_effect, -item_bias, -lambda) %>%
rowwise() %>%
mutate(
y = case_when(
domain %in% c("epmem", "wmem") ~ rbinom(1, 16, prob = plogis(nu)),
domain == "execfun" ~ rnorm(1, mean = nu, sd = 8),
TRUE ~ NA_real_
),
test = factor(test, levels = c(epmem_tests, wmem_tests, execfun_tests))
) %>%
group_by(test) %>%
mutate(
retest = as.integer(timepoint > 1),
y = case_when(
domain == "execfun" ~ (y - mean(y)) / sd(y),
TRUE ~ y
)
) %>%
ungroup() %>%
select(-nu, -timepoint) %>%
as.data.frame()

mm <- model.matrix(~ 0 + domain, data = lifespan)
lifespan <- cbind(lifespan, mm)

usethis::use_data(lifespan, overwrite = TRUE)

Binary file added data-raw/trajectories.rds
Binary file not shown.
Binary file added data/lifespan.rda
Binary file not shown.
34 changes: 34 additions & 0 deletions dev-scripts/lifespan_demo.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
library(galamm)
lmat <- matrix(c(
1, rep(NA, 6), rep(0, 6),
rep(0, 7), 1, NA, rep(0, 4),
rep(0, 9), 1, NA, NA, NA), ncol = 3)

mod_init <- galamm(
formula = cbind(y, 16 - y) ~ 0 + retest:domain + test +
sl(age, by = domain) + (0 + domain | id),
data = lifespan,
family = c(binomial, gaussian),
family_mapping = ifelse(lifespan$domain == "execfun", 2, 1),
control = galamm_control(
optim_control = list(REPORT = 2, factr = 1e9, trace = 3, maxit = 2))
)

mod <- galamm(
formula = cbind(y, 16 - y) ~ 0 + retest:domain + test +
sl(age, by = domain,
factor = c("epmem_ability", "wmem_ability", "execfun_ability")) +
(0 + domainepmem:epmem_ability + domainwmem:wmem_ability +
domainexecfun:execfun_ability | id),
data = lifespan,
family = c(binomial, gaussian),
family_mapping = ifelse(lifespan$domain == "execfun", 2, 1),
load.var = "test",
lambda = lmat,
factor = c("epmem_ability", "wmem_ability", "execfun_ability"),
control = galamm_control(
optim_control = list(REPORT = 2, factr = 1e9, trace = 3, maxit = 10))
)
beepr::beep()
summary(mod)
plot_smooth(mod, scale = 0, pages = 1)
Loading

0 comments on commit fc5ca84

Please sign in to comment.