From 667b49bb2f6a06bf1f5e83b9d7983f58ba1b8a2b Mon Sep 17 00:00:00 2001 From: "Dylan H. Morris" Date: Tue, 19 Nov 2024 12:51:29 -0500 Subject: [PATCH] Move hubverse table creation main work to hewr --- hewr/NAMESPACE | 2 + hewr/R/to_epiweekly_quantile_table.R | 155 ++++++++++++++++++++++++ hewr/man/to_epiweekly_quantile_table.Rd | 24 ++++ hewr/man/to_epiweekly_quantiles.Rd | 30 +++++ pipelines/create_hubverse_table.R | 143 ++-------------------- 5 files changed, 224 insertions(+), 130 deletions(-) create mode 100644 hewr/R/to_epiweekly_quantile_table.R create mode 100644 hewr/man/to_epiweekly_quantile_table.Rd create mode 100644 hewr/man/to_epiweekly_quantiles.Rd diff --git a/hewr/NAMESPACE b/hewr/NAMESPACE index 04f4e2c6..71a47c3d 100644 --- a/hewr/NAMESPACE +++ b/hewr/NAMESPACE @@ -2,3 +2,5 @@ export(parse_model_batch_dir_name) export(parse_model_run_dir_path) +export(to_epiweekly_quantile_table) +export(to_epiweekly_quantiles) diff --git a/hewr/R/to_epiweekly_quantile_table.R b/hewr/R/to_epiweekly_quantile_table.R new file mode 100644 index 00000000..dbd77515 --- /dev/null +++ b/hewr/R/to_epiweekly_quantile_table.R @@ -0,0 +1,155 @@ +#' Read in daily forecast draws from a model run directory +#' and output a set of epiweekly quantiles, as a +#' [`tibbble`][tibble::tibble()]. +#' +#' @param model_run_dir Path to a directory containing +#' forecast draws to process, whose basename is the forecasted +#' location. +#' @param report_date Report date for which to generate epiweekly quantiles. +#' @param max_lookback_days How many days before the report date +#' to look back when generating epiweekly quantiles (determines how +#' many negative epiweekly forecast horizons (i.e. nowcast/backcast) +#' quantiles will be generated. +#' @return A [`tibble`][tibble::tibble()] of quantiles. +#' @export +to_epiweekly_quantiles <- function(model_run_dir, + report_date, + max_lookback_days) { + message(glue::glue("Processing {model_run_dir}...")) + draws_path <- fs::path(model_run_dir, + "forecast_samples", + ext = "parquet" + ) + location <- fs::path_file(model_run_dir) + + draws <- arrow::read_parquet(draws_path) |> + dplyr::filter(.data$date >= lubridate::ymd(!!report_date) - + lubridate::days(!!max_lookback_days)) + + if (nrow(draws) < 1) { + return(NULL) + } + + epiweekly_disease_draws <- draws |> + dplyr::filter( + disease == "Disease" + ) |> + forecasttools::daily_to_epiweekly( + date_col = "date", + value_col = ".value", + id_cols = ".draw", + weekly_value_name = "epiweekly_disease", + strict = TRUE + ) + + epiweekly_total_draws <- draws |> + dplyr::filter(.data$disease == "Other") |> + forecasttools::daily_to_epiweekly( + date_col = "date", + value_col = ".value", + id_cols = ".draw", + weekly_value_name = "epiweekly_total", + strict = TRUE + ) + + epiweekly_prop_draws <- dplyr::inner_join( + epiweekly_disease_draws, + epiweekly_total_draws, + by = c( + "epiweek", + "epiyear", + ".draw" + ) + ) |> + dplyr::mutate( + "epiweekly_proportion" = + .data$epiweekly_disease / .data$epiweekly_total + ) + + + epiweekly_quantiles <- epiweekly_prop_draws |> + forecasttools::trajectories_to_quantiles( + timepoint_cols = c("epiweek", "epiyear"), + value_col = "epiweekly_proportion" + ) |> + dplyr::mutate( + "location" = !!location + ) + + message(glue::glue("Done processing {model_run_dir}")) + return(epiweekly_quantiles) +} + +#' Create an epiweekly hubverse-format forecast quantile table +#' from a model batch directory containing forecasts +#' for multiple locations as daily MCMC draws. +#' +#' @param model_batch_dir Model batch directory containing +#' the individual location forecast directories +#' ("model run directories") to process. Name should be in the format +#' `{disease}_r_{reference_date}_f_{first_data_date}_t_{last_data_date}`. +#' @param exclude Locations to exclude, if any, as a list of strings. +#' Default `NULL` (exclude nothing). +#' +#' @export +to_epiweekly_quantile_table <- function(model_batch_dir, + exclude = NULL) { + locations_to_process <- fs::dir_ls(model_batch_dir, + type = "directory" + ) + + if (!is.null(exclude)) { + locations_to_process <- locations_to_process[ + !(fs::path_file(locations_to_process) %in% exclude) + ] + } + + batch_params <- hewr::parse_model_batch_dir_name( + fs::path_file(model_batch_dir) + ) + report_date <- batch_params$report_date + disease <- batch_params$disease + disease_abbr <- dplyr::case_when( + disease == "Influenza" ~ "flu", + disease == "COVID-19" ~ "covid", + TRUE ~ disease + ) + + report_epiweek <- lubridate::epiweek(report_date) + report_epiyear <- lubridate::epiyear(report_date) + report_epiweek_end <- forecasttools::epiweek_to_date( + report_epiweek, + report_epiyear, + day_of_week = 7 + ) + + hubverse_table <- purrr::map( + locations_to_process, + \(x) { + to_epiweekly_quantiles( + x, + report_date = report_date, + max_lookback_days = 8 + ) + } + ## ensures we get the full -1 horizon but do not + ## waste time quantilizing draws that will not be + ## included in the final table. + ) |> + dplyr::bind_rows() |> + forecasttools::get_hubverse_table( + report_epiweek_end, + target_name = + glue::glue("wk inc {disease_abbr} prop ed visits") + ) |> + dplyr::arrange( + .data$target, + .data$output_type, + .data$location, + .data$reference_date, + .data$horizon, + .data$output_type_id + ) + + return(hubverse_table) +} diff --git a/hewr/man/to_epiweekly_quantile_table.Rd b/hewr/man/to_epiweekly_quantile_table.Rd new file mode 100644 index 00000000..56d017c1 --- /dev/null +++ b/hewr/man/to_epiweekly_quantile_table.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/to_epiweekly_quantile_table.R +\name{to_epiweekly_quantile_table} +\alias{to_epiweekly_quantile_table} +\title{Create an epiweekly hubverse-format forecast quantile table +from a model batch directory containing forecasts +for multiple locations as daily MCMC draws.} +\usage{ +to_epiweekly_quantile_table(model_batch_dir, exclude = NULL) +} +\arguments{ +\item{model_batch_dir}{Model batch directory containing +the individual location forecast directories +("model run directories") to process. Name should be in the format +\verb{\{disease\}_r_\{reference_date\}_f_\{first_data_date\}_t_\{last_data_date\}}.} + +\item{exclude}{Locations to exclude, if any, as a list of strings. +Default \code{NULL} (exclude nothing).} +} +\description{ +Create an epiweekly hubverse-format forecast quantile table +from a model batch directory containing forecasts +for multiple locations as daily MCMC draws. +} diff --git a/hewr/man/to_epiweekly_quantiles.Rd b/hewr/man/to_epiweekly_quantiles.Rd new file mode 100644 index 00000000..47738ad5 --- /dev/null +++ b/hewr/man/to_epiweekly_quantiles.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/to_epiweekly_quantile_table.R +\name{to_epiweekly_quantiles} +\alias{to_epiweekly_quantiles} +\title{Read in daily forecast draws from a model run directory +and output a set of epiweekly quantiles, as a +\code{\link[tibble:tibble]{tibbble}}.} +\usage{ +to_epiweekly_quantiles(model_run_dir, report_date, max_lookback_days) +} +\arguments{ +\item{model_run_dir}{Path to a directory containing +forecast draws to process, whose basename is the forecasted +location.} + +\item{report_date}{Report date for which to generate epiweekly quantiles.} + +\item{max_lookback_days}{How many days before the report date +to look back when generating epiweekly quantiles (determines how +many negative epiweekly forecast horizons (i.e. nowcast/backcast) +quantiles will be generated.} +} +\value{ +A \code{\link[tibble:tibble]{tibble}} of quantiles. +} +\description{ +Read in daily forecast draws from a model run directory +and output a set of epiweekly quantiles, as a +\code{\link[tibble:tibble]{tibbble}}. +} diff --git a/pipelines/create_hubverse_table.R b/pipelines/create_hubverse_table.R index 23a7ecdb..db724c39 100644 --- a/pipelines/create_hubverse_table.R +++ b/pipelines/create_hubverse_table.R @@ -1,138 +1,21 @@ -draws_to_quantiles <- function(forecast_dir, - report_date, - max_lookback_days) { - message(glue::glue("Processing {forecast_dir}...")) - draws_path <- fs::path(forecast_dir, - "forecast_samples", - ext = "parquet" - ) - location <- fs::path_file(forecast_dir) - - draws <- arrow::read_parquet(draws_path) |> - dplyr::filter(date >= lubridate::ymd(report_date) - - lubridate::days(max_lookback_days)) - - if (nrow(draws) < 1) { - return(NULL) - } - - epiweekly_disease_draws <- draws |> - dplyr::filter( - disease == "Disease" - ) |> - forecasttools::daily_to_epiweekly( - date_col = "date", - value_col = ".value", - id_cols = ".draw", - weekly_value_name = "epiweekly_disease", - strict = TRUE - ) - - epiweekly_total_draws <- draws |> - dplyr::filter(disease == "Other") |> - forecasttools::daily_to_epiweekly( - date_col = "date", - value_col = ".value", - id_cols = ".draw", - weekly_value_name = "epiweekly_total", - strict = TRUE - ) - - epiweekly_prop_draws <- dplyr::inner_join( - epiweekly_disease_draws, - epiweekly_total_draws, - by = c( - "epiweek", - "epiyear", - ".draw" - ) - ) |> - dplyr::mutate( - epiweekly_proportion = - epiweekly_disease / epiweekly_total - ) - - - epiweekly_quantiles <- epiweekly_prop_draws |> - forecasttools::trajectories_to_quantiles( - timepoint_cols = c("epiweek", "epiyear"), - value_col = "epiweekly_proportion" - ) |> - dplyr::mutate( - location = !!location - ) - - message(glue::glue("Done processing {forecast_dir}")) - return(epiweekly_quantiles) -} - -create_hubverse_table <- function(model_batch_dir, - exclude = NULL) { - locations_to_process <- fs::dir_ls(model_batch_dir, - type = "directory" - ) - - if (!is.null(exclude)) { - locations_to_process <- locations_to_process[ - !(fs::path_file(locations_to_process) %in% exclude) - ] - } - - batch_params <- hewr::parse_model_batch_dir_name( - fs::path_file(model_batch_dir) - ) - report_date <- batch_params$report_date - disease <- batch_params$disease - disease_abbr <- dplyr::case_when( - disease == "Influenza" ~ "flu", - disease == "COVID-19" ~ "covid", - TRUE ~ disease - ) - - report_epiweek <- lubridate::epiweek(report_date) - report_epiyear <- lubridate::epiyear(report_date) - report_epiweek_end <- forecasttools::epiweek_to_date( - report_epiweek, - report_epiyear, - day_of_week = 7 - ) - - hubverse_table <- purrr::map( - locations_to_process, - \(x) { - draws_to_quantiles( - x, - report_date = report_date, - max_lookback_days = 8 - ) - } - ## ensures we get the full -1 horizon but do not - ## waste time quantilizing draws that will not be - ## included in the final table. - ) |> - dplyr::bind_rows() |> - forecasttools::get_hubverse_table( - report_epiweek_end, - target_name = - glue::glue("wk inc {disease_abbr} prop ed visits") - ) |> - dplyr::arrange( - target, - output_type, - location, - reference_date, - horizon, - output_type_id - ) - - return(hubverse_table) -} +#!/usr/bin/env Rscript +#' Create a hubverse table from model output, using +#' utilities from `hewr`. +#' +#' @param model_batch_dir Model batch directory from which +#' to create a hubverse table +#' @param output_path path to save the table as a tsv +#' @param exclude Locations to exclude, as a vector of strings. +#' @return Nothing, saving the table as a side effect. main <- function(model_batch_dir, output_path, exclude = NULL) { - create_hubverse_table(model_batch_dir, exclude = exclude) |> + hewr::to_epiweekly_quantile_table( + model_batch_dir, + exclude = exclude + ) |> readr::write_tsv(output_path) }