Skip to content

Commit

Permalink
Move hubverse table creation main work to hewr
Browse files Browse the repository at this point in the history
  • Loading branch information
dylanhmorris committed Nov 19, 2024
1 parent 83a1d65 commit 667b49b
Show file tree
Hide file tree
Showing 5 changed files with 224 additions and 130 deletions.
2 changes: 2 additions & 0 deletions hewr/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@

export(parse_model_batch_dir_name)
export(parse_model_run_dir_path)
export(to_epiweekly_quantile_table)
export(to_epiweekly_quantiles)
155 changes: 155 additions & 0 deletions hewr/R/to_epiweekly_quantile_table.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
#' Read in daily forecast draws from a model run directory
#' and output a set of epiweekly quantiles, as a
#' [`tibbble`][tibble::tibble()].
#'
#' @param model_run_dir Path to a directory containing
#' forecast draws to process, whose basename is the forecasted
#' location.
#' @param report_date Report date for which to generate epiweekly quantiles.
#' @param max_lookback_days How many days before the report date
#' to look back when generating epiweekly quantiles (determines how
#' many negative epiweekly forecast horizons (i.e. nowcast/backcast)
#' quantiles will be generated.
#' @return A [`tibble`][tibble::tibble()] of quantiles.
#' @export
to_epiweekly_quantiles <- function(model_run_dir,
report_date,
max_lookback_days) {
message(glue::glue("Processing {model_run_dir}..."))
draws_path <- fs::path(model_run_dir,
"forecast_samples",
ext = "parquet"
)
location <- fs::path_file(model_run_dir)

draws <- arrow::read_parquet(draws_path) |>
dplyr::filter(.data$date >= lubridate::ymd(!!report_date) -
lubridate::days(!!max_lookback_days))

if (nrow(draws) < 1) {
return(NULL)
}

epiweekly_disease_draws <- draws |>
dplyr::filter(
disease == "Disease"
) |>
forecasttools::daily_to_epiweekly(
date_col = "date",
value_col = ".value",
id_cols = ".draw",
weekly_value_name = "epiweekly_disease",
strict = TRUE
)

epiweekly_total_draws <- draws |>
dplyr::filter(.data$disease == "Other") |>
forecasttools::daily_to_epiweekly(
date_col = "date",
value_col = ".value",
id_cols = ".draw",
weekly_value_name = "epiweekly_total",
strict = TRUE
)

epiweekly_prop_draws <- dplyr::inner_join(
epiweekly_disease_draws,
epiweekly_total_draws,
by = c(
"epiweek",
"epiyear",
".draw"
)
) |>
dplyr::mutate(
"epiweekly_proportion" =
.data$epiweekly_disease / .data$epiweekly_total
)


epiweekly_quantiles <- epiweekly_prop_draws |>
forecasttools::trajectories_to_quantiles(
timepoint_cols = c("epiweek", "epiyear"),
value_col = "epiweekly_proportion"
) |>
dplyr::mutate(
"location" = !!location
)

message(glue::glue("Done processing {model_run_dir}"))
return(epiweekly_quantiles)
}

#' Create an epiweekly hubverse-format forecast quantile table
#' from a model batch directory containing forecasts
#' for multiple locations as daily MCMC draws.
#'
#' @param model_batch_dir Model batch directory containing
#' the individual location forecast directories
#' ("model run directories") to process. Name should be in the format
#' `{disease}_r_{reference_date}_f_{first_data_date}_t_{last_data_date}`.
#' @param exclude Locations to exclude, if any, as a list of strings.
#' Default `NULL` (exclude nothing).
#'
#' @export
to_epiweekly_quantile_table <- function(model_batch_dir,
exclude = NULL) {
locations_to_process <- fs::dir_ls(model_batch_dir,
type = "directory"
)

if (!is.null(exclude)) {
locations_to_process <- locations_to_process[
!(fs::path_file(locations_to_process) %in% exclude)
]
}

batch_params <- hewr::parse_model_batch_dir_name(
fs::path_file(model_batch_dir)
)
report_date <- batch_params$report_date
disease <- batch_params$disease
disease_abbr <- dplyr::case_when(
disease == "Influenza" ~ "flu",
disease == "COVID-19" ~ "covid",
TRUE ~ disease
)

report_epiweek <- lubridate::epiweek(report_date)
report_epiyear <- lubridate::epiyear(report_date)
report_epiweek_end <- forecasttools::epiweek_to_date(
report_epiweek,
report_epiyear,
day_of_week = 7
)

hubverse_table <- purrr::map(
locations_to_process,
\(x) {
to_epiweekly_quantiles(
x,
report_date = report_date,
max_lookback_days = 8
)
}
## ensures we get the full -1 horizon but do not
## waste time quantilizing draws that will not be
## included in the final table.
) |>
dplyr::bind_rows() |>
forecasttools::get_hubverse_table(
report_epiweek_end,
target_name =
glue::glue("wk inc {disease_abbr} prop ed visits")
) |>
dplyr::arrange(
.data$target,
.data$output_type,
.data$location,
.data$reference_date,
.data$horizon,
.data$output_type_id
)

return(hubverse_table)
}
24 changes: 24 additions & 0 deletions hewr/man/to_epiweekly_quantile_table.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 30 additions & 0 deletions hewr/man/to_epiweekly_quantiles.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

143 changes: 13 additions & 130 deletions pipelines/create_hubverse_table.R
Original file line number Diff line number Diff line change
@@ -1,138 +1,21 @@
draws_to_quantiles <- function(forecast_dir,
report_date,
max_lookback_days) {
message(glue::glue("Processing {forecast_dir}..."))
draws_path <- fs::path(forecast_dir,
"forecast_samples",
ext = "parquet"
)
location <- fs::path_file(forecast_dir)

draws <- arrow::read_parquet(draws_path) |>
dplyr::filter(date >= lubridate::ymd(report_date) -
lubridate::days(max_lookback_days))

if (nrow(draws) < 1) {
return(NULL)
}

epiweekly_disease_draws <- draws |>
dplyr::filter(
disease == "Disease"
) |>
forecasttools::daily_to_epiweekly(
date_col = "date",
value_col = ".value",
id_cols = ".draw",
weekly_value_name = "epiweekly_disease",
strict = TRUE
)

epiweekly_total_draws <- draws |>
dplyr::filter(disease == "Other") |>
forecasttools::daily_to_epiweekly(
date_col = "date",
value_col = ".value",
id_cols = ".draw",
weekly_value_name = "epiweekly_total",
strict = TRUE
)

epiweekly_prop_draws <- dplyr::inner_join(
epiweekly_disease_draws,
epiweekly_total_draws,
by = c(
"epiweek",
"epiyear",
".draw"
)
) |>
dplyr::mutate(
epiweekly_proportion =
epiweekly_disease / epiweekly_total
)


epiweekly_quantiles <- epiweekly_prop_draws |>
forecasttools::trajectories_to_quantiles(
timepoint_cols = c("epiweek", "epiyear"),
value_col = "epiweekly_proportion"
) |>
dplyr::mutate(
location = !!location
)

message(glue::glue("Done processing {forecast_dir}"))
return(epiweekly_quantiles)
}

create_hubverse_table <- function(model_batch_dir,
exclude = NULL) {
locations_to_process <- fs::dir_ls(model_batch_dir,
type = "directory"
)

if (!is.null(exclude)) {
locations_to_process <- locations_to_process[
!(fs::path_file(locations_to_process) %in% exclude)
]
}

batch_params <- hewr::parse_model_batch_dir_name(
fs::path_file(model_batch_dir)
)
report_date <- batch_params$report_date
disease <- batch_params$disease
disease_abbr <- dplyr::case_when(
disease == "Influenza" ~ "flu",
disease == "COVID-19" ~ "covid",
TRUE ~ disease
)

report_epiweek <- lubridate::epiweek(report_date)
report_epiyear <- lubridate::epiyear(report_date)
report_epiweek_end <- forecasttools::epiweek_to_date(
report_epiweek,
report_epiyear,
day_of_week = 7
)

hubverse_table <- purrr::map(
locations_to_process,
\(x) {
draws_to_quantiles(
x,
report_date = report_date,
max_lookback_days = 8
)
}
## ensures we get the full -1 horizon but do not
## waste time quantilizing draws that will not be
## included in the final table.
) |>
dplyr::bind_rows() |>
forecasttools::get_hubverse_table(
report_epiweek_end,
target_name =
glue::glue("wk inc {disease_abbr} prop ed visits")
) |>
dplyr::arrange(
target,
output_type,
location,
reference_date,
horizon,
output_type_id
)

return(hubverse_table)
}
#!/usr/bin/env Rscript


#' Create a hubverse table from model output, using
#' utilities from `hewr`.
#'
#' @param model_batch_dir Model batch directory from which
#' to create a hubverse table
#' @param output_path path to save the table as a tsv
#' @param exclude Locations to exclude, as a vector of strings.
#' @return Nothing, saving the table as a side effect.
main <- function(model_batch_dir,
output_path,
exclude = NULL) {
create_hubverse_table(model_batch_dir, exclude = exclude) |>
hewr::to_epiweekly_quantile_table(
model_batch_dir,
exclude = exclude
) |>
readr::write_tsv(output_path)
}

Expand Down

0 comments on commit 667b49b

Please sign in to comment.