R/simulate_data.R

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -6,8 +6,8 @@
  
    #' @param delay_map A data frame that defines the delays between events. It must

    #'   contain the columns `from` (character), `to` (character), and `group` (list

    #'   of numeric group IDs).

    #' @param delay_params A data frame containing the parameters (`delay_mean`,

    #'  `delay_cv`) for each delay. Consider combining delay_map and delay_params?

    #' @param delay_params A data frame containing the parameters (`mean_delay`,

    #'  `cv_delay`) for each delay. Consider combining delay_map and delay_params?

    #' @param error_params A list containing `prop_missing_data` and `prob_error`.

    #' @param range_dates A vector of two integer dates for the simulation range.

    #' @param simul_error Boolean. If TRUE, simulates missing and erroneous data.

    @@ -39,8 +39,8 @@
  
    #'            "hospitalisation", "onset", "hospitalisation"),

    #'   to = c("report", "report", "report", "report", "death", "hospitalisation",

    #'          "discharge", "hospitalisation", "death"),

    #'   delay_mean = c(10, 10, 10, 10, 15, 7, 20, 7, 12),

    #'   delay_cv = c(0.3, 0.3, 0.3, 0.3, 0.4, 0.2, 0.5, 0.2, 0.3)

    #'   mean_delay = c(10, 10, 10, 10, 15, 7, 20, 7, 12),

    #'   cv_delay = c(0.3, 0.3, 0.3, 0.3, 0.4, 0.2, 0.5, 0.2, 0.3)

    #' )

    #'

    #' # Define other parameters

    @@ -114,8 +114,8 @@ simulate_data <- function(n_per_group,
  
          )

          # Sample the delay

          shape <- (1 / params$delay_cv)^2

          scale <- params$delay_mean / shape

          shape <- (1 / params$cv_delay)^2

          scale <- params$mean_delay / shape

          delay <- pmax(0, round(rgamma(1, shape = shape, scale = scale)))

          true_data[i, to_event] <- true_data[i, from_event] + delay

        }

man/simulate_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

tests/testthat/test-simulate-data.R

-Original file line number
+Diff line change
@@ -0,0 +1,88 @@
+    delay_map <- data.frame(
+      from = c("onset", "onset", "onset", "hospitalisation", "onset",
+               "hospitalisation"),
+      to = c("report", "death", "hospitalisation", "discharge", "hospitalisation",
+             "death"),
+      group = I(list(1:4, 2, 3, 3, 4, 4))
+    )
+    delay_params <- data.frame(
+      group = c(1:4, 2, 3, 3, 4, 4),
+      from = c("onset", "onset", "onset", "onset", "onset", "onset",
+               "hospitalisation", "onset", "hospitalisation"),
+      to = c("report", "report", "report", "report", "death", "hospitalisation",
+             "discharge", "hospitalisation", "death"),
+      mean_delay = c(10, 10, 10, 10, 15, 7, 20, 7, 12),
+      cv_delay = c(0.3, 0.3, 0.3, 0.3, 0.4, 0.2, 0.5, 0.2, 0.3)
+    )
+    test_that("simulate_data returns correct structure and dimensions", {
+      n_per_group <- rep(10, max(delay_params$group))
+      total_n <- sum(n_per_group)
+      error_params <- list(prop_missing_data = 0.2, prob_error = 0.05)
+      range_dates <- as.integer(as.Date(c("2025-03-01", "2025-09-01")))
+      set.seed(1)
+      sim_result <- simulate_data(
+        n_per_group = n_per_group,
+        delay_map = delay_map,
+        delay_params = delay_params,
+        error_params = error_params,
+        range_dates = range_dates,
+        simul_error = TRUE
+      )
+      expect_type(sim_result, "list")
+      expect_named(sim_result, c("true_data", "observed_data", "error_indicators"))
+      expect_equal(nrow(sim_result$true_data), total_n)
+      expect_equal(nrow(sim_result$observed_data), total_n)
+      expect_equal(nrow(sim_result$error_indicators), total_n)
+      expected_cols <- c("id", "group", "onset", "report", "death",
+                         "hospitalisation", "discharge")
+      expect_true(all(expected_cols %in% names(sim_result$true_data)))
+      expect_true(all(expected_cols %in% names(sim_result$observed_data)))
+    })
+    test_that("error_params as expected in simulated data", {
+      n_per_group <- rep(100, max(delay_params$group))
+      total_n <- sum(n_per_group)
+      error_params <- list(prop_missing_data = 0.2, prob_error = 0.05)
+      range_dates <- as.integer(as.Date(c("2025-03-01", "2025-09-01")))
+      set.seed(1)
+      sim_result <- simulate_data(
+        n_per_group = n_per_group,
+        delay_map = delay_map,
+        delay_params = delay_params,
+        error_params = error_params,
+        range_dates = range_dates,
+        simul_error = TRUE
+      )
+      # just the date columns from the true data and error indicators
+      date_cols <- setdiff(names(sim_result$true_data), c("id", "group"))
+      true_dates <- sim_result$true_data[, date_cols]
+      error_indic <- sim_result$error_indicators[, date_cols]
+      # identify date entries that should be observed in the groups
+      eligible_dates <- !is.na(true_dates)
+      n_eligible_points <- sum(eligible_dates)
+      eligible_indicators <- error_indic[eligible_dates]
+      # test the proportion of missing data
+      observed_prop_missing <- sum(is.na(eligible_indicators)) / n_eligible_points
+      expect_equal(observed_prop_missing, error_params$prop_missing_data,
+                   tolerance = 0.05)
+      # test the probability of an error
+      non_missing_indicators <- na.omit(eligible_indicators)
+      observed_prob_error <- sum(non_missing_indicators) /
+        length(non_missing_indicators)
+      expect_equal(observed_prob_error, error_params$prob_error, tolerance = 0.05)
+    })

Test simulate data #5

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open

rebeccanash wants to merge 5 commits into main from test-simulate-data

-Original file line number
+Diff line change
@@ -0,0 +1,88 @@
+    delay_map <- data.frame(
+      from = c("onset", "onset", "onset", "hospitalisation", "onset",
+               "hospitalisation"),
+      to = c("report", "death", "hospitalisation", "discharge", "hospitalisation",
+             "death"),
+      group = I(list(1:4, 2, 3, 3, 4, 4))
+    )
+    delay_params <- data.frame(
+      group = c(1:4, 2, 3, 3, 4, 4),
+      from = c("onset", "onset", "onset", "onset", "onset", "onset",
+               "hospitalisation", "onset", "hospitalisation"),
+      to = c("report", "report", "report", "report", "death", "hospitalisation",
+             "discharge", "hospitalisation", "death"),
+      mean_delay = c(10, 10, 10, 10, 15, 7, 20, 7, 12),
+      cv_delay = c(0.3, 0.3, 0.3, 0.3, 0.4, 0.2, 0.5, 0.2, 0.3)
+    )
+    test_that("simulate_data returns correct structure and dimensions", {
+      n_per_group <- rep(10, max(delay_params$group))
+      total_n <- sum(n_per_group)
+      error_params <- list(prop_missing_data = 0.2, prob_error = 0.05)
+      range_dates <- as.integer(as.Date(c("2025-03-01", "2025-09-01")))
+      set.seed(1)
+      sim_result <- simulate_data(
+        n_per_group = n_per_group,
+        delay_map = delay_map,
+        delay_params = delay_params,
+        error_params = error_params,
+        range_dates = range_dates,
+        simul_error = TRUE
+      )
+      expect_type(sim_result, "list")
+      expect_named(sim_result, c("true_data", "observed_data", "error_indicators"))
+      expect_equal(nrow(sim_result$true_data), total_n)
+      expect_equal(nrow(sim_result$observed_data), total_n)
+      expect_equal(nrow(sim_result$error_indicators), total_n)
+      expected_cols <- c("id", "group", "onset", "report", "death",
+                         "hospitalisation", "discharge")
+      expect_true(all(expected_cols %in% names(sim_result$true_data)))
+      expect_true(all(expected_cols %in% names(sim_result$observed_data)))
+    })
+    test_that("error_params as expected in simulated data", {
+      n_per_group <- rep(100, max(delay_params$group))
+      total_n <- sum(n_per_group)
+      error_params <- list(prop_missing_data = 0.2, prob_error = 0.05)
+      range_dates <- as.integer(as.Date(c("2025-03-01", "2025-09-01")))
+      set.seed(1)
+      sim_result <- simulate_data(
+        n_per_group = n_per_group,
+        delay_map = delay_map,
+        delay_params = delay_params,
+        error_params = error_params,
+        range_dates = range_dates,
+        simul_error = TRUE
+      )
+      # just the date columns from the true data and error indicators
+      date_cols <- setdiff(names(sim_result$true_data), c("id", "group"))
+      true_dates <- sim_result$true_data[, date_cols]
+      error_indic <- sim_result$error_indicators[, date_cols]
+      # identify date entries that should be observed in the groups
+      eligible_dates <- !is.na(true_dates)
+      n_eligible_points <- sum(eligible_dates)
+      eligible_indicators <- error_indic[eligible_dates]
+      # test the proportion of missing data
+      observed_prop_missing <- sum(is.na(eligible_indicators)) / n_eligible_points
+      expect_equal(observed_prop_missing, error_params$prop_missing_data,
+                   tolerance = 0.05)
+      # test the probability of an error
+      non_missing_indicators <- na.omit(eligible_indicators)
+      observed_prob_error <- sum(non_missing_indicators) /
+        length(non_missing_indicators)
+      expect_equal(observed_prob_error, error_params$prob_error, tolerance = 0.05)
+    })

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Test simulate data #5

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!

Test simulate data #5

Are you sure you want to change the base?

Uh oh!

Test simulate data #5

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!