Skip to content

Commit

Permalink
Improvements and bug fixes for epiweekly other hubverse tables and pl…
Browse files Browse the repository at this point in the history
…ots (#312)
  • Loading branch information
dylanhmorris authored Jan 28, 2025
1 parent 82cda4b commit df3648d
Show file tree
Hide file tree
Showing 4 changed files with 199 additions and 41 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ repos:
#####
# R
- repo: https://github.com/lorenzwalthert/precommit
rev: v0.4.3.9001
rev: v0.4.3
hooks:
- id: style-files
- id: lintr
Expand Down
45 changes: 29 additions & 16 deletions hewr/R/to_epiweekly_quantile_table.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ to_epiweekly_quantiles <- function(model_run_dir,
value_col = ".value"
) |>
dplyr::mutate(
"location" = !!location
"location" = !!location,
"source_samples" = !!draws_file_name
)
message(glue::glue("Done processing {model_run_dir}"))
return(epiweekly_quantiles)
Expand Down Expand Up @@ -98,7 +99,7 @@ to_epiweekly_quantile_table <- function(model_batch_dir,
epiweekly_other_locations = c()) {
model_runs_path <- fs::path(model_batch_dir, "model_runs")

locations_to_process <- fs::dir_ls(model_runs_path,
model_run_dirs_to_process <- fs::dir_ls(model_runs_path,
type = "directory"
) |>
purrr::discard(~ fs::path_file(.x) %in% exclude)
Expand All @@ -122,45 +123,57 @@ to_epiweekly_quantile_table <- function(model_batch_dir,
day_of_week = 7
)

get_location_table <- \(loc) {
epiweekly_other <- loc %in% epiweekly_other_locations
get_location_table <- \(model_run_dir) {
loc <- fs::path_file(model_run_dir)
use_epiweekly_other <- loc %in% epiweekly_other_locations
which_forecast <- ifelse(use_epiweekly_other,
"explicitly epiweekly",
"aggregated daily"
)
glue::glue(
"Using {which_forecast} non-target ED visit forecast ",
"for location {loc}"
)

draws_file <- ifelse(
epiweekly_other,
use_epiweekly_other,
"epiweekly_with_epiweekly_other_samples",
"epiweekly_samples"
)
return(to_epiweekly_quantiles(
loc,
model_run_dir,
report_date = report_date,
max_lookback_days = 15,
draws_file_name = draws_file,
strict = strict
))
}

hubverse_table <- purrr::map(
locations_to_process,
quant_table <- purrr::map(
model_run_dirs_to_process,
get_location_table
) |>
dplyr::bind_rows() |>
dplyr::bind_rows()

loc_sources <- quant_table |>
dplyr::distinct(.data$location, .data$source_samples)

hubverse_table <- quant_table |>
forecasttools::get_hubverse_table(
report_epiweek_end,
target_name =
glue::glue("wk inc {disease_abbr} prop ed visits")
) |>
dplyr::inner_join(loc_sources,
by = "location"
) |>
dplyr::arrange(
.data$target,
.data$output_type,
.data$location,
.data$reference_date,
.data$horizon,
.data$output_type_id
) |>
dplyr::mutate(other_ed_visit_forecast = ifelse(
.data$location %in% !!epiweekly_other_locations,
"direct_epiweekly_fit",
"aggregated_daily_fit"
))

)
return(hubverse_table)
}
111 changes: 95 additions & 16 deletions hewr/tests/testthat/test_to_epiweekly_quantile_table.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,20 @@ test_that("to_epiweekly_quantiles works as expected", {
) |> suppressMessages()

expect_s3_class(result, "tbl_df")
expect_setequal(c(
"epiweek", "epiyear", "quantile_value", "quantile_level", "location"
), colnames(result))
checkmate::expect_names(
colnames(result),
identical.to = c(
"epiweek",
"epiyear",
"quantile_value",
"quantile_level",
"location",
"source_samples"
)
)

expect_equal(draws_file_name, unique(result$source_samples))

expect_gt(nrow(result), 0)
}

Expand Down Expand Up @@ -127,7 +138,11 @@ test_that("to_epiweekly_quantiles handles missing forecast files", {


# tests for `to_epiweekly_quantile_table`
test_that("to_epiweekly_quantile_table handles multiple locations", {
test_that(paste0(
"to_epiweekly_quantile_table ",
"handles multiple locations ",
"and multiple source files"
), {
batch_dir_name <- "covid-19_r_2024-12-14_f_2024-12-08_t_2024-12-14"
tempdir <- withr::local_tempdir()

Expand All @@ -142,6 +157,17 @@ test_that("to_epiweekly_quantile_table handles multiple locations", {
if (loc != "loc3") {
disease_cols <- c(disease_cols, "prop_disease_ed_visits")
}
create_tidy_forecast_data(
directory = loc_dir,
filename = "epiweekly_with_epiweekly_other_samples.parquet",
date_cols = seq(
lubridate::ymd("2024-12-08"), lubridate::ymd("2024-12-14"),
by = "week"
),
disease_cols = disease_cols,
n_draw = 25,
with_epiweek = TRUE
)

create_tidy_forecast_data(
directory = loc_dir,
Expand All @@ -157,7 +183,10 @@ test_that("to_epiweekly_quantile_table handles multiple locations", {
})

## should succeed despite loc3 not having valid draws with strict = FALSE
result_w_both_locations <- to_epiweekly_quantile_table(temp_batch_dir) |>
result_w_both_locations <-
to_epiweekly_quantile_table(temp_batch_dir,
epiweekly_other_locations = "loc1"
) |>
suppressMessages()

## should error if strict = TRUE because loc3 does not have
Expand All @@ -168,6 +197,44 @@ test_that("to_epiweekly_quantile_table handles multiple locations", {
"did not find valid draws"
)

## should succeed with strict = TRUE if loc3 is excluded
alt_result_w_both_locations <- (
to_epiweekly_quantile_table(temp_batch_dir,
strict = TRUE,
exclude = "loc3"
)) |>
suppressMessages()

## results should be equivalent for loc2,
## but not for loc1
expect_equal(
result_w_both_locations |>
dplyr::filter(location == "loc2"),
alt_result_w_both_locations |>
dplyr::filter(location == "loc2")
)

## check that one used epiweekly
## other for loc1 while other used
## default, resulting in different values
loc1_a <- result_w_both_locations |>
dplyr::filter(location == "loc1") |>
dplyr::pull(.data$value)
loc1_b <- alt_result_w_both_locations |>
dplyr::filter(location == "loc1") |>
dplyr::pull(.data$value)

## length checks ensure that the
## number of allowed equalities _could_
## be reached if the vectors were mostly
## or entirely identical
expect_gt(length(loc1_a), 10)
expect_gt(length(loc1_b), 10)
expect_lt(
sum(loc1_a == loc1_b),
5
)

expect_s3_class(result_w_both_locations, "tbl_df")
expect_gt(nrow(result_w_both_locations), 0)
checkmate::expect_names(
Expand All @@ -181,20 +248,32 @@ test_that("to_epiweekly_quantile_table handles multiple locations", {
"output_type",
"output_type_id",
"value",
"other_ed_visit_forecast"
"source_samples"
)
)
expect_setequal(
c("loc1", "loc2"),
result_w_both_locations$location
result_w_both_locations$location,
c("loc1", "loc2")
)
expect_setequal(
alt_result_w_both_locations$location,
c("loc1", "loc2")
)
expect_false("loc3" %in% result_w_both_locations$location)

result_w_one_location <- to_epiweekly_quantile_table(
model_batch_dir = temp_batch_dir,
exclude = "loc1"
) |>
suppressMessages()
expect_true("loc2" %in% result_w_one_location$location)
expect_false("loc1" %in% result_w_one_location$location)
expect_setequal(
result_w_both_locations$source_samples,
c(
"epiweekly_samples",
"epiweekly_with_epiweekly_other_samples"
)
)

expect_setequal(
alt_result_w_both_locations$source_samples,
"epiweekly_samples"
)


expect_false("loc3" %in% result_w_both_locations$location)
expect_false("loc3" %in% alt_result_w_both_locations$location)
})
Loading

0 comments on commit df3648d

Please sign in to comment.