Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions R/simulate_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
#' @param delay_map A data frame that defines the delays between events. It must
#' contain the columns `from` (character), `to` (character), and `group` (list
#' of numeric group IDs).
#' @param delay_params A data frame containing the parameters (`delay_mean`,
#' `delay_cv`) for each delay. Consider combining delay_map and delay_params?
#' @param delay_params A data frame containing the parameters (`mean_delay`,
#' `cv_delay`) for each delay. Consider combining delay_map and delay_params?
#' @param error_params A list containing `prop_missing_data` and `prob_error`.
#' @param range_dates A vector of two integer dates for the simulation range.
#' @param simul_error Boolean. If TRUE, simulates missing and erroneous data.
Expand Down Expand Up @@ -39,8 +39,8 @@
#' "hospitalisation", "onset", "hospitalisation"),
#' to = c("report", "report", "report", "report", "death", "hospitalisation",
#' "discharge", "hospitalisation", "death"),
#' delay_mean = c(10, 10, 10, 10, 15, 7, 20, 7, 12),
#' delay_cv = c(0.3, 0.3, 0.3, 0.3, 0.4, 0.2, 0.5, 0.2, 0.3)
#' mean_delay = c(10, 10, 10, 10, 15, 7, 20, 7, 12),
#' cv_delay = c(0.3, 0.3, 0.3, 0.3, 0.4, 0.2, 0.5, 0.2, 0.3)
#' )
#'
#' # Define other parameters
Expand Down Expand Up @@ -114,8 +114,8 @@ simulate_data <- function(n_per_group,
)

# Sample the delay
shape <- (1 / params$delay_cv)^2
scale <- params$delay_mean / shape
shape <- (1 / params$cv_delay)^2
scale <- params$mean_delay / shape
delay <- pmax(0, round(rgamma(1, shape = shape, scale = scale)))
true_data[i, to_event] <- true_data[i, from_event] + delay
}
Expand Down
8 changes: 4 additions & 4 deletions man/simulate_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

88 changes: 88 additions & 0 deletions tests/testthat/test-simulate-data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
delay_map <- data.frame(
from = c("onset", "onset", "onset", "hospitalisation", "onset",
"hospitalisation"),
to = c("report", "death", "hospitalisation", "discharge", "hospitalisation",
"death"),
group = I(list(1:4, 2, 3, 3, 4, 4))
)

delay_params <- data.frame(
group = c(1:4, 2, 3, 3, 4, 4),
from = c("onset", "onset", "onset", "onset", "onset", "onset",
"hospitalisation", "onset", "hospitalisation"),
to = c("report", "report", "report", "report", "death", "hospitalisation",
"discharge", "hospitalisation", "death"),
mean_delay = c(10, 10, 10, 10, 15, 7, 20, 7, 12),
cv_delay = c(0.3, 0.3, 0.3, 0.3, 0.4, 0.2, 0.5, 0.2, 0.3)
)

test_that("simulate_data returns correct structure and dimensions", {

n_per_group <- rep(10, max(delay_params$group))
total_n <- sum(n_per_group)
error_params <- list(prop_missing_data = 0.2, prob_error = 0.05)
range_dates <- as.integer(as.Date(c("2025-03-01", "2025-09-01")))

set.seed(1)
sim_result <- simulate_data(
n_per_group = n_per_group,
delay_map = delay_map,
delay_params = delay_params,
error_params = error_params,
range_dates = range_dates,
simul_error = TRUE
)

expect_type(sim_result, "list")
expect_named(sim_result, c("true_data", "observed_data", "error_indicators"))

expect_equal(nrow(sim_result$true_data), total_n)
expect_equal(nrow(sim_result$observed_data), total_n)
expect_equal(nrow(sim_result$error_indicators), total_n)

expected_cols <- c("id", "group", "onset", "report", "death",
"hospitalisation", "discharge")
expect_true(all(expected_cols %in% names(sim_result$true_data)))
expect_true(all(expected_cols %in% names(sim_result$observed_data)))
})

test_that("error_params as expected in simulated data", {

n_per_group <- rep(100, max(delay_params$group))
total_n <- sum(n_per_group)
error_params <- list(prop_missing_data = 0.2, prob_error = 0.05)
range_dates <- as.integer(as.Date(c("2025-03-01", "2025-09-01")))

set.seed(1)
sim_result <- simulate_data(
n_per_group = n_per_group,
delay_map = delay_map,
delay_params = delay_params,
error_params = error_params,
range_dates = range_dates,
simul_error = TRUE
)

# just the date columns from the true data and error indicators
date_cols <- setdiff(names(sim_result$true_data), c("id", "group"))
true_dates <- sim_result$true_data[, date_cols]
error_indic <- sim_result$error_indicators[, date_cols]

# identify date entries that should be observed in the groups
eligible_dates <- !is.na(true_dates)
n_eligible_points <- sum(eligible_dates)
eligible_indicators <- error_indic[eligible_dates]

# test the proportion of missing data
observed_prop_missing <- sum(is.na(eligible_indicators)) / n_eligible_points
expect_equal(observed_prop_missing, error_params$prop_missing_data,
tolerance = 0.05)

# test the probability of an error
non_missing_indicators <- na.omit(eligible_indicators)
observed_prob_error <- sum(non_missing_indicators) /
length(non_missing_indicators)
expect_equal(observed_prob_error, error_params$prob_error, tolerance = 0.05)
})