diff --git a/DESCRIPTION b/DESCRIPTION index 7966108..3e378ee 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: INLAvaan Type: Package Title: Bayesian Structural Equation Modelling with INLA -Version: 0.1.0.9011 +Version: 0.1.0.9012 Authors@R: c(person("Haziq", "Jamil", email = "haziq.jamil@gmail.com", diff --git a/R/10-utilities.R b/R/10-utilities.R index c8013dc..8d0d662 100644 --- a/R/10-utilities.R +++ b/R/10-utilities.R @@ -10,15 +10,30 @@ cli_messages <- c( ) theta_to_rho <- function(x) { + pos_only <- FALSE + u <- 1 / (1 + exp(-x)) - rho <- 2 * u - 1 + if (pos_only) { + rho <- u + } else { + rho <- 2 * u - 1 + } + rho } rho_to_theta <- function(x) { + pos_only <- FALSE + x[x > 1] <- 1 - x[x < -1] <- -1 - u <- (x + 1) / 2 + if (pos_only) { + x[x < 0] <- 0 + u <- x + } else { + x[x < -1] <- -1 + u <- (x + 1) / 2 + } + theta <- log(u / (1 - u)) theta } diff --git a/R/20-rgeneric.R b/R/20-rgeneric.R index 2dc665c..a44f4be 100644 --- a/R/20-rgeneric.R +++ b/R/20-rgeneric.R @@ -9,64 +9,14 @@ inla_sem <- function( # - init (initial values) # - partable - # CACHE - envir <- parent.env(environment()) - if (!exists("INLAvaan_SEM_cache", envir = envir)) { - - # FIXME: These are indices for free parameters theta. But sometimes, with - # fixed parameter values we still need the indices... + interpret.theta <- function() { - # Indices idx_lam <- partable$free[partable$mat == "lambda" & partable$free > 0] idx_beta <- partable$free[partable$mat == "beta" & partable$free > 0] idx_theta <- partable$free[partable$mat == "theta" & partable$free > 0] idx_rho <- partable$free[partable$mat == "rho" & partable$free > 0] idx_psi <- partable$free[partable$mat == "psi" & partable$free > 0] idx_lvrho <- partable$free[partable$mat == "lvrho" & partable$free > 0] - assign("idx_lam", idx_lam, envir = envir) - assign("idx_beta", idx_beta, envir = envir) - assign("idx_theta", idx_theta, envir = envir) - assign("idx_rho", idx_rho, envir = envir) - assign("idx_psi", idx_psi, envir = envir) - assign("idx_lvrho", idx_lvrho, envir = envir) - - # Lambda matrix - Lam_df <- partable[partable$mat == "lambda", ] - Lambda <- matrix(0, nrow = max(Lam_df$row), ncol = max(Lam_df$col)) - Lambda[cbind(Lam_df$row, Lam_df$col)] <- Lam_df$est - LAM_IDX <- as.matrix(Lam_df[Lam_df$free > 0, c("row", "col")]) - assign("Lambda", Lambda, envir = envir) - assign("LAM_IDX", LAM_IDX, envir = envir) - - # (I-B) matrix - B_df <- partable[partable$mat == "beta", ] - B_IDX <- as.matrix(B_df[B_df$free > 0, c("row", "col")]) - IminB <- 1 - if (length(idx_beta) > 0) { - IminB <- diag(q) - IminB[B_IDX] <- -B_df$est - } - assign("IminB", IminB, envir = envir) - assign("B_IDX", B_IDX, envir = envir) - - # Rho and Theta matrix - Rho_df <- partable[partable$mat == "rho", ] - Theta <- diag(p) - RHO_IDX <- cbind(Rho_df$row, Rho_df$col) - assign("RHO_IDX", RHO_IDX, envir = envir) - assign("Theta", Theta, envir = envir) - - # LVRho and Psi matrix - LVRho_df <- partable[partable$mat == "lvrho", ] - Psi <- diag(q) - LVRHO_IDX <- cbind(LVRho_df$row, LVRho_df$col) - assign("LVRHO_IDX", LVRHO_IDX, envir = envir) - assign("Psi", Psi, envir = envir) - - assign("INLAvaan_SEM_cache", TRUE, envir = envir) - } - - interpret.theta <- function() { lambda <- theta[idx_lam] beta <- theta[idx_beta] @@ -88,53 +38,58 @@ inla_sem <- function( Q <- function() { params <- interpret.theta() - # The matrices are cached and updated here - Lambda[LAM_IDX] <- params$lambda - if (length(idx_beta) > 0) IminB[B_IDX] <- -params$beta + # Lambda matrix + Lam_df <- partable[partable$mat == "lambda", ] + Lam_df$est[Lam_df$free > 0] <- params$lambda + Lambda <- matrix(0, nrow = max(Lam_df$row), ncol = max(Lam_df$col)) + Lambda[cbind(Lam_df$row, Lam_df$col)] <- Lam_df$est - # Theta matrix - diag(Theta) <- params$sd_e ^ 2 - if (length(idx_rho) > 0) { - for (k in seq_len(nrow(RHO_IDX))) { - i <- RHO_IDX[k, 1] - j <- RHO_IDX[k, 2] - Theta[i, j] <- Theta[j, i] <- - params$rho[k] * params$sd_e[i] * params$sd_e[j] - } + # B matrix + B_df <- partable[partable$mat == "beta", ] + B_df$est[B_df$free > 0] <- params$beta + if (length(params$beta) > 0) { + sizeB <- max(B_df$row, B_df$col) + B <- matrix(0, nrow = sizeB, ncol = sizeB) + B[cbind(B_df$row, B_df$col)] <- B_df$est + } else { + B <- 0 } - # Psi matrix - diag(Psi) <- params$sd_z ^ 2 - if (length(idx_lvrho) > 0) { - for (k in seq_len(nrow(LVRHO_IDX))) { - i <- LVRHO_IDX[k, 1] - j <- LVRHO_IDX[k, 2] - Psi[i, j] <- Psi[j, i] <- - params$lvrho[k] * params$sd_z[i] * params$sd_z[j] - } + # Theta matrix + if (length(params$rho) > 0) { + SD <- diag(x = params$sd_e) + Rho_df <- partable[partable$mat == "rho", ] + Rho_df$est[Rho_df$free > 0] <- params$rho + sizeRho <- length(params$sd_e) + Rho <- matrix(0, nrow = sizeRho, ncol = sizeRho) + Rho[cbind(Rho_df$row, Rho_df$col)] <- Rho_df$est + Rho <- Rho + t(Rho) + diag(Rho) <- 1 + Theta <- SD %*% Rho %*% SD + } else { + Theta <- diag(params$sd_e ^ 2) } - # Using Woodbury Matrix identity - # I_q <- diag(q) - # inv_I_minus_B <- solve(I_q - B) - # U <- Lambda %*% inv_I_minus_B - # C_inv <- diag(x = 1 / params$psi) # solve(Psi) - # A_inv <- diag(x = 1 / params$sd_e) - # int_mat <- C_inv + t(U) %*% A_inv %*% U - # Sigma_inv <- A_inv - A_inv %*% U %*% solve(int_mat) %*% t(U) %*% A_inv - - # Regular way - if (is.vector(IminB)) { - front <- Lambda + + # Psi matrix + if (length(params$lvrho) > 0) { + SD <- diag(x = params$sd_z) + LVRho_df <- partable[partable$mat == "lvrho", ] + LVRho_df$est[LVRho_df$free > 0] <- params$lvrho + sizeLVRho <- length(params$sd_z) + LVRho <- matrix(0, nrow = sizeLVRho, ncol = sizeLVRho) + LVRho[cbind(LVRho_df$row, LVRho_df$col)] <- LVRho_df$est + LVRho <- LVRho + t(LVRho) + diag(LVRho) <- 1 + Psi <- SD %*% LVRho %*% SD } else { - front <- Lambda %*% solve(IminB) + Psi <- diag(params$sd_z ^ 2) } - Sigma <- front %*% tcrossprod(Psi, front) + Theta - # safe_solve(Sigma) - Sigma <- Sigma + diag(1e-7, nrow(Sigma)) # for stability - chol2inv(chol(Sigma)) - # Sigma <- Matrix::forceSymmetric(Matrix::Matrix(Sigma)) - # Matrix::solve(Sigma) + + # Return inverse Sigma + front <- Lambda %*% solve(diag(1, nrow = q) - B) + Sigma <- front %*% Psi %*% t(front) + Theta + solve(Sigma) } mu <- function() { numeric(0) } @@ -153,7 +108,7 @@ inla_sem <- function( log_pdf_rho <- function(x, shape1 = 1, shape2 = 1) { u <- 1 / (1 + exp(-x)) log_beta_density <- dbeta(u, shape1, shape2, log = TRUE) - log_jacobian <- -log(2 * u * (1 - u)) # log(Jacobian) = theta - 2 * log(1 + exp(theta)) + log_jacobian <- -(log(2) + log(u) + log(1-u)) # log(Jacobian) = theta - 2 * log(1 + exp(theta)) log_beta_density + log_jacobian } @@ -178,8 +133,7 @@ inla_sem <- function( } graph <- function() { - Q <- matrix(1, nrow = p, ncol = p) - INLA::inla.as.sparse(Q) + matrix(1, nrow = p, ncol = p) } quit = function() { return(invisible()) } diff --git a/R/21-rgeneric_cached.R b/R/21-rgeneric_cached.R new file mode 100644 index 0000000..03090db --- /dev/null +++ b/R/21-rgeneric_cached.R @@ -0,0 +1,204 @@ +inla_sem_cached <- function( + cmd = c("graph", "Q", "mu", "initial", "log.norm.const", "log.prior", "quit"), + theta = NULL, + .debug = FALSE) { + + # In the environment require + # - n (sample size) + # - p (no of items) + # - q (no of factors) + # - init (initial values) + # - partable + + # CACHE + envir <- parent.env(environment()) + if (!exists("INLAvaan_SEM_cache", envir = envir)) { + + # FIXME: These are indices for free parameters theta. But sometimes, with + # fixed parameter values we still need the indices... + + force_pd <- function(x) { + ed <- eigen(x, symmetric = TRUE) + eval <- ed$values + evec <- ed$vectors + eval[eval < 0] <- .Machine$double.eps + evec %*% diag(eval) %*% t(evec) + } + assign("force_pd", force_pd, envir = envir) + + # Indices + idx_lam <- partable$free[partable$mat == "lambda" & partable$free > 0] + idx_beta <- partable$free[partable$mat == "beta" & partable$free > 0] + idx_theta <- partable$free[partable$mat == "theta" & partable$free > 0] + idx_rho <- partable$free[partable$mat == "rho" & partable$free > 0] + idx_psi <- partable$free[partable$mat == "psi" & partable$free > 0] + idx_lvrho <- partable$free[partable$mat == "lvrho" & partable$free > 0] + assign("idx_lam", idx_lam, envir = envir) + assign("idx_beta", idx_beta, envir = envir) + assign("idx_theta", idx_theta, envir = envir) + assign("idx_rho", idx_rho, envir = envir) + assign("idx_psi", idx_psi, envir = envir) + assign("idx_lvrho", idx_lvrho, envir = envir) + + # Lambda matrix + Lam_df <- partable[partable$mat == "lambda", ] + Lambda <- matrix(0, nrow = max(Lam_df$row), ncol = max(Lam_df$col)) + Lambda[cbind(Lam_df$row, Lam_df$col)] <- Lam_df$est + LAM_IDX <- as.matrix(Lam_df[Lam_df$free > 0, c("row", "col")]) + assign("Lambda", Lambda, envir = envir) + assign("LAM_IDX", LAM_IDX, envir = envir) + + # (I-B) matrix + B_df <- partable[partable$mat == "beta", ] + B_IDX <- as.matrix(B_df[B_df$free > 0, c("row", "col")]) + IminB <- 1 + if (length(idx_beta) > 0) { + IminB <- diag(q) + IminB[B_IDX] <- -B_df$est + } + assign("IminB", IminB, envir = envir) + assign("B_IDX", B_IDX, envir = envir) + + # Rho and Theta matrix + Rho_df <- partable[partable$mat == "rho", ] + Theta <- diag(subset(partable, mat == "theta" & row == col)$start) + RHO_IDX <- cbind(Rho_df$row, Rho_df$col) + assign("RHO_IDX", RHO_IDX, envir = envir) + assign("Theta", Theta, envir = envir) + + # LVRho and Psi matrix + LVRho_df <- partable[partable$mat == "lvrho", ] + Psi <- diag(subset(partable, mat == "psi" & row == col)$start) + LVRHO_IDX <- cbind(LVRho_df$row, LVRho_df$col) + assign("LVRHO_IDX", LVRHO_IDX, envir = envir) + assign("Psi", Psi, envir = envir) + + assign("INLAvaan_SEM_cache", TRUE, envir = envir) + } + + interpret.theta <- function() { + + lambda <- theta[idx_lam] + beta <- theta[idx_beta] + sd_e <- sqrt(exp(theta[idx_theta])) # sd_e = sd_e ^ 2 (item sd) + rho <- theta_to_rho(theta[idx_rho]) + sd_z <- sqrt(exp(theta[idx_psi])) # sd_z = sd_z ^ 2 (latent sd) + lvrho <- theta_to_rho(theta[idx_lvrho]) + + list( + lambda = lambda, + beta = beta, + sd_e = sd_e, + rho = rho, + sd_z = sd_z, + lvrho = lvrho + ) + } + + Q <- function(debug = .debug) { + params <- interpret.theta() + + # The matrices are cached and updated here + Lambda[LAM_IDX] <- params$lambda + if (length(idx_beta) > 0) IminB[B_IDX] <- -params$beta + + # Theta matrix + diag(Theta) <- params$sd_e ^ 2 + if (length(idx_rho) > 0) { + for (k in seq_len(nrow(RHO_IDX))) { + i <- RHO_IDX[k, 1] + j <- RHO_IDX[k, 2] + Theta[i, j] <- Theta[j, i] <- + params$rho[k] * params$sd_e[i] * params$sd_e[j] + } + Theta <- force_pd(Theta) # force pd + } + + # Psi matrix + diag(Psi) <- params$sd_z ^ 2 + if (length(idx_lvrho) > 0) { + for (k in seq_len(nrow(LVRHO_IDX))) { + i <- LVRHO_IDX[k, 1] + j <- LVRHO_IDX[k, 2] + Psi[i, j] <- Psi[j, i] <- + params$lvrho[k] * params$sd_z[i] * params$sd_z[j] + } + Psi <- force_pd(Psi) # force pd + } + + if (isTRUE(debug)) { + return(list( + Lambda = Lambda, + IminB = IminB, + Theta = Theta, + Psi = Psi + )) + } + + # Return inverse Sigma + if (is.matrix(IminB)) { + front <- Lambda %*% solve(IminB) + } else { + front <- Lambda + } + Sigma <- front %*% tcrossprod(Psi, front) + Theta + Sigma <- Sigma + 1e-10 * diag(nrow(Sigma)) + solve(Sigma) + } + + mu <- function() { numeric(0) } + + log.norm.const <- function() { numeric(0) } + + log.prior = function() { + params <- interpret.theta() + + # If sigma ~ gamma(shape,rate) then this is the logpdf transform of theta = log(sigma^2) + log_pdf_scale <- function(x, shape = 1, rate = 0.5) { + dgamma(sqrt(exp(x)), shape, rate, log = TRUE) + x + } + + # If rho ~ beta(a,b) then this is the logpdf of theta = log(rho / (1 - rho)) + log_pdf_rho <- function(x, shape1 = 1, shape2 = 1) { + pos_only <- FALSE + u <- 1 / (1 + exp(-x)) + log_beta_density <- dbeta(u, shape1, shape2, log = TRUE) + if (pos_only) { + log_jacobian <- 0 + } else { + log_jacobian <- -(log(2) + log(u) + log(1-u)) # log(Jacobian) = theta - 2 * log(1 + exp(theta)) + } + + log_beta_density + log_jacobian + } + + # FIXME: Adjust priors in the future + res <- + sum(dnorm(params$lambda, mean = 0, sd = 10, log = TRUE)) + + sum(dnorm(params$beta, mean = 0, sd = 10, log = TRUE)) + + # Variances + # sum(dgamma(params$sd_e, shape = 1, rate = 5e-05, log = TRUE)) + + # sum(dbeta(params$rho, shape1 = 1, shape2 = 1, log = TRUE)) + + sum(log_pdf_scale(params$sd_e, shape = 1, rate = 0.5)) + + sum(log_pdf_rho(params$rho, shape1 = 1, shape2 = 1)) + + # sum(dgamma(params$sd_z, shape = 1, rate = 5e-05, log = TRUE)) + + # sum(dbeta(params$lvrho, shape1 = 1, shape2 = 1, log = TRUE)) + + sum(log_pdf_scale(params$sd_z, shape = 1, rate = 0.5)) + + sum(log_pdf_rho(params$lvrho, shape1 = 1, shape2 = 1)) + res + } + + initial = function() { + init + } + + graph <- function() { + matrix(1, nrow = p, ncol = p) + } + + quit = function() { return(invisible()) } + + if (!length(theta)) theta = initial() + val = do.call(match.arg(cmd), args = list()) + return (val) +} diff --git a/R/30-inlavaan.R b/R/30-inlavaan.R index e3d1a1c..70d9250 100644 --- a/R/30-inlavaan.R +++ b/R/30-inlavaan.R @@ -6,6 +6,7 @@ #' @param save.lvs TBC #' @param bcontrol TBC #' @param npost Number of post-processing samples to do, if any. +#' @param stop_at_jagtrans DEBUG #' #' @return TBC #' @export @@ -17,7 +18,11 @@ inlavaan <- function( dp = NULL, save.lvs = FALSE, npost = 250, - bcontrol = list(num.threads = 6)) { + + # FOR MY DEBUGGING + stop_at_jagtrans = FALSE, + + bcontrol = list()) { # To play nice with blavaan code cp = "srs" @@ -568,6 +573,8 @@ inlavaan <- function( # inits = initsin ) ) + if (stop_at_jagtrans) return(jagtrans) + # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> } else if(target == "jags") { jagtrans <- try(lav2mcmc(model = lavpartable, lavdata = lavdata, @@ -1282,7 +1289,12 @@ isem <- function( target = "INLA", dp = NULL, save.lvs = FALSE, - bcontrol = list(verbose = TRUE, num.threads = 6)) { + npost = 250, + + # FOR MY DEBUGGING + stop_at_jagtrans = FALSE, + + bcontrol = list()) { dotdotdot <- list(...) std.lv <- ifelse(any(names(dotdotdot) == "std.lv"), dotdotdot$std.lv, FALSE) diff --git a/R/40-lav_export_INLA.R b/R/40-lav_export_INLA.R index e0d5e9a..218f86b 100644 --- a/R/40-lav_export_INLA.R +++ b/R/40-lav_export_INLA.R @@ -1287,23 +1287,22 @@ lav2inla <- function( log(start), ifelse(mat %in% c("rho", "lvrho"), rho_to_theta(start), + # log(start / (1 - start)), start))) filtered_partable <- subset(partable, free > 0 & mat != "nu") sorted_partable <- filtered_partable[order(filtered_partable$free), ] inlastart <- sorted_partable$inlastart - inlastart[inlastart == Inf] <- rho_to_theta(0.9) # not needed - inlastart[inlastart == -Inf] <- rho_to_theta(-0.9) # INLA formula the_model <- INLA::inla.rgeneric.define( - inla_sem, # see 20-rgeneric.R + inla_sem_cached, n = lavdata@nobs[[1]], # TODO: Multiple groups? p = pta$nvar[[1]], q = pta$nfac[[1]], init = inlastart, partable = partable, - theta_to_rho = theta_to_rho, # utility function, see 10-utilities.R - safe_solve = safe_solve + theta_to_rho = theta_to_rho # utility function, see 10-utilities.R + # safe_solve = safe_solve # optimize = TRUE ) @@ -1311,7 +1310,7 @@ lav2inla <- function( if (isTRUE(meanstructure)) { form <- val ~ -1 + nu + f(ov.idx, model = the_model, replicate = i) } else { - form <- val ~ -1 + offset(nu_off) + f(ov.idx, model = the_model, replicate = i) + form <- val ~ offset(nu_off) + f(ov.idx, model = the_model, replicate = i) control_fixed$prec <- 100 } @@ -1321,6 +1320,7 @@ lav2inla <- function( the_model = the_model, control.family = list(hyper = list(prec = list(initial = 10, fixed = TRUE))), control.fixed = control_fixed, + control.compute = list(smtp = "band"), pxpartable = partable, inlastart = inlastart ) diff --git a/inst/demo.R b/inst/demo.R index 5d03356..f2e2ec6 100644 --- a/inst/demo.R +++ b/inst/demo.R @@ -52,11 +52,32 @@ myModel <- ' fit <- isem( model = myModel, - data = PoliticalDemocracy, - meanstructure = FALSE, + data = scale(PoliticalDemocracy, center = FALSE, scale = FALSE), + meanstructure = TRUE, + # stop_at_jagtrans = TRUE, verbose = TRUE ) +tmp <- fit +tmp$the_model$f$rgeneric$definition("Q") + +with(tmp$the_model$f$rgeneric$definition("Q", .debug = TRUE), { + solve(Lambda %*% IminB %*% Psi %*% t(IminB) %*% t(Lambda) + Theta) +}) + + + + + + + + + + + + + + fit_lav <- sem(myModel, data = PoliticalDemocracy) inla_coef <- coef(fit)