diff --git a/hewr/R/parse_dir_name.R b/hewr/R/directory_utils.R similarity index 60% rename from hewr/R/parse_dir_name.R rename to hewr/R/directory_utils.R index f8a4e15c..f4b519bb 100644 --- a/hewr/R/parse_dir_name.R +++ b/hewr/R/directory_utils.R @@ -1,8 +1,13 @@ +#' Utilities for handling and parsing directory names +#' based on pyrenew-hew pipeline conventions. + disease_map_lower <- list( "covid-19" = "COVID-19", "influenza" = "Influenza" ) +#' Parse model batch directory name. +#' #' Parse the name of a model batch directory #' (i.e. a directory representing a single #' report date and disease pair, but potentially @@ -40,6 +45,8 @@ parse_model_batch_dir_name <- function(model_batch_dir_name) { )) } +#' Parse model run directory path. +#' #' Parse path to a model run directory #' (i.e. a directory representing a run for a #' particular location, disease, and reference @@ -61,3 +68,39 @@ parse_model_run_dir_path <- function(model_run_dir_path) { parse_model_batch_dir(batch_dir) )) } + + +#' Get forecast directories. +#' +#' Get all the subdirectories within a parent directory +#' that match the pattern for a forecast run for a +#' given disease and optionally a given report date. +#' +#' @param dir_of_forecast_dirs Directory in which to look for +#' subdirectories representing individual forecast date / pathogen / +#' dataset combinations. +#' @param diseases Names of the diseases to match, as a vector of strings, +#' or a single disease as a string. +#' @return A vector of paths to the forecast subdirectories. +get_all_forecast_dirs <- function(dir_of_forecast_dirs, + diseases) { + # disease names are lowercase by convention + match_patterns <- stringr::str_c(tolower(diseases), + "_r", + collapse = "|" + ) + + dirs <- tibble::tibble( + dir_path = fs::dir_ls( + dir_of_forecast_dirs, + type = "directory" + ) + ) |> + dplyr::filter(str_starts( + fs::path_file(dir_path), + match_patterns + )) |> + dplyr::pull(dir_path) + + return(dirs) +} diff --git a/hewr/man/disease_map_lower.Rd b/hewr/man/disease_map_lower.Rd new file mode 100644 index 00000000..dbf5de27 --- /dev/null +++ b/hewr/man/disease_map_lower.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/directory_utils.R +\docType{data} +\name{disease_map_lower} +\alias{disease_map_lower} +\title{Utilities for handling and parsing directory names +based on pyrenew-hew pipeline conventions.} +\format{ +An object of class \code{list} of length 2. +} +\usage{ +disease_map_lower +} +\description{ +Utilities for handling and parsing directory names +based on pyrenew-hew pipeline conventions. +} +\keyword{datasets} diff --git a/hewr/man/get_all_forecast_dirs.Rd b/hewr/man/get_all_forecast_dirs.Rd new file mode 100644 index 00000000..71255dfb --- /dev/null +++ b/hewr/man/get_all_forecast_dirs.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/directory_utils.R +\name{get_all_forecast_dirs} +\alias{get_all_forecast_dirs} +\title{Get forecast directories.} +\usage{ +get_all_forecast_dirs(dir_of_forecast_dirs, diseases) +} +\arguments{ +\item{dir_of_forecast_dirs}{Directory in which to look for +subdirectories representing individual forecast date / pathogen / +dataset combinations.} + +\item{diseases}{Names of the diseases to match, as a vector of strings, +or a single disease as a string.} +} +\value{ +A vector of paths to the forecast subdirectories. +} +\description{ +Get all the subdirectories within a parent directory +that match the pattern for a forecast run for a +given disease and optionally a given report date. +} diff --git a/hewr/man/parse_model_batch_dir_name.Rd b/hewr/man/parse_model_batch_dir_name.Rd index 2bfbce25..adb01fb9 100644 --- a/hewr/man/parse_model_batch_dir_name.Rd +++ b/hewr/man/parse_model_batch_dir_name.Rd @@ -1,12 +1,8 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/parse_dir_name.R +% Please edit documentation in R/directory_utils.R \name{parse_model_batch_dir_name} \alias{parse_model_batch_dir_name} -\title{Parse the name of a model batch directory -(i.e. a directory representing a single -report date and disease pair, but potentially -with fits for multiple locations), returning -a named list of quantities of interest.} +\title{Parse model batch directory name.} \usage{ parse_model_batch_dir_name(model_batch_dir_name) } diff --git a/hewr/man/parse_model_run_dir_path.Rd b/hewr/man/parse_model_run_dir_path.Rd index 85a74dfd..4795c6a3 100644 --- a/hewr/man/parse_model_run_dir_path.Rd +++ b/hewr/man/parse_model_run_dir_path.Rd @@ -1,11 +1,8 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/parse_dir_name.R +% Please edit documentation in R/directory_utils.R \name{parse_model_run_dir_path} \alias{parse_model_run_dir_path} -\title{Parse path to a model run directory -(i.e. a directory representing a run for a -particular location, disease, and reference -date, and extract key quantities of interest.} +\title{Parse model run directory path.} \usage{ parse_model_run_dir_path(model_run_dir_path) } diff --git a/pipelines/collate_score_tables.R b/pipelines/collate_score_tables.R index 70e0f9da..e5176921 100644 --- a/pipelines/collate_score_tables.R +++ b/pipelines/collate_score_tables.R @@ -12,35 +12,6 @@ purrr::walk(script_packages, \(pkg) { }) -#' Get all the subdirectories within a parent directory -#' that match the pattern for a forecast run for a -#' given disease and optionally a given report date. -#' -#' @param parent_dir Directory in which to look for forecast subdirectories. -#' @param diseases Names of the diseases to match, as a vector of strings, -#' or a single disease as a string. -#' @return A vector of paths to the forecast subdirectories. -get_all_forecast_dirs <- function(dir_of_forecast_date_dirs, - diseases) { - # disease names are lowercase by convention - match_patterns <- str_c(tolower(diseases), "_r", collapse = "|") - - dirs <- tibble::tibble( - dir_path = fs::dir_ls( - dir_of_forecast_date_dirs, - type = "directory" - ) - ) |> - dplyr::filter(str_starts( - fs::path_file(dir_path), - match_patterns - )) |> - dplyr::pull(dir_path) - - return(dirs) -} - - process_loc_date_score_table <- function(model_run_dir) { table_path <- fs::path(model_run_dir, "score_table", @@ -137,7 +108,7 @@ collate_scores_for_date <- function(model_run_dir, collate_all_score_tables <- function(model_base_dir, disease, score_file_save_path = NULL) { - date_dirs_to_process <- get_all_forecast_dirs( + date_dirs_to_process <- hewr::get_all_forecast_dirs( model_base_dir, diseases = disease )