From 32df62e34a2f8fb16b2825614b1b485e2875e2dc Mon Sep 17 00:00:00 2001 From: "Dylan H. Morris" Date: Mon, 16 Dec 2024 21:42:53 +0000 Subject: [PATCH] Add basic batch postprocessing script --- pipelines/batch/setup_prod_job.py | 6 ++-- pipelines/postprocess_all_locations.py | 42 +++++++++++++++++++------- pipelines/utils.py | 6 ++-- 3 files changed, 38 insertions(+), 16 deletions(-) diff --git a/pipelines/batch/setup_prod_job.py b/pipelines/batch/setup_prod_job.py index 73c6c4ef..0913e7a2 100644 --- a/pipelines/batch/setup_prod_job.py +++ b/pipelines/batch/setup_prod_job.py @@ -192,12 +192,14 @@ def main( help=("Name of the Azure batch pool on which to run the job"), ) parser.add_argument( - "diseases", + "--diseases", type=str, + default="COVID-19 Influenza", help=( "Name(s) of disease(s) to run as part of the job, " "as a whitespace-separated string. Supported " - "values are 'COVID-19' and 'Influenza'." + "values are 'COVID-19' and 'Influenza'. " + "Default 'COVID-19 Influenza' (i.e. run for both)." ), ) diff --git a/pipelines/postprocess_all_locations.py b/pipelines/postprocess_all_locations.py index fbf922a2..dc656d87 100644 --- a/pipelines/postprocess_all_locations.py +++ b/pipelines/postprocess_all_locations.py @@ -1,5 +1,4 @@ import argparse -import logging import subprocess from pathlib import Path @@ -7,7 +6,7 @@ from utils import get_all_forecast_dirs, parse_model_batch_dir_name -def create_hubverse_table(model_batch_dir: Path) -> None: +def create_hubverse_table(base_path: Path, model_batch_dir: Path) -> None: batch_info = parse_model_batch_dir_name(model_batch_dir) output_file_name = ( @@ -16,13 +15,14 @@ def create_hubverse_table(model_batch_dir: Path) -> None: "hubverse-table.tsv" ) - output_path = Path(model_batch_dir, output_file_name) + model_batch_path = Path(base_path, model_batch_dir) + output_path = Path(model_batch_path, output_file_name) result = subprocess.run( [ "Rscript", "pipelines/create_hubverse_table.R", - f"{model_batch_dir}", + f"{model_batch_path}", f"{output_path}", ], capture_output=True, @@ -32,15 +32,23 @@ def create_hubverse_table(model_batch_dir: Path) -> None: return None -def process_model_batch_dir(model_batch_dir: Path) -> None: - cp.process_dir(model_batch_dir) - create_hubverse_table(model_batch_dir) +def process_model_batch_dir(base_dir: Path, model_batch_dir: Path) -> None: + plot_types = ["Disease", "Other", "prop_disease_ed_visits"] + plots_to_collate = [f"{x}_forecast_plot.pdf" for x in plot_types] + [ + f"{x}_forecast_plot_log.pdf" for x in plot_types + ] + cp.process_dir( + Path(base_dir, model_batch_dir), target_filenames=plots_to_collate + ) + create_hubverse_table(base_dir, model_batch_dir) -def main(base_forecast_dir: Path): - to_process = get_all_forecast_dirs(base_forecast_dir) +def main( + base_forecast_dir: Path, diseases: list[str] = ["COVID-19", "Influenza"] +): + to_process = get_all_forecast_dirs(base_forecast_dir, diseases) for batch_dir in to_process: - process_model_batch_dir(batch_dir) + process_model_batch_dir(base_forecast_dir, batch_dir) if __name__ == "__main__": @@ -50,9 +58,21 @@ def main(base_forecast_dir: Path): parser.add_argument( "base_forecast_dir", type=Path, - required=True, help="Directory containing forecast subdirectories.", ) + parser.add_argument( + "--diseases", + type=str, + default="COVID-19 Influenza", + help=( + "Name(s) of disease(s) to postprocess, " + "as a whitespace-separated string. Supported " + "values are 'COVID-19' and 'Influenza'. " + "Default 'COVID-19 Influenza' (i.e. postprocess both)." + ), + ) + args = parser.parse_args() + args.diseases = args.diseases.split() main(**vars(args)) diff --git a/pipelines/utils.py b/pipelines/utils.py index c18215cc..a8fe3629 100644 --- a/pipelines/utils.py +++ b/pipelines/utils.py @@ -67,11 +67,11 @@ def parse_model_batch_dir_name(model_batch_dir_name): ) return dict( disease=disease_map_lower_[disease], - report_date=datetime.strptime(report_date, "%Y-%m-%d").date(), - first_training_date=datetime.strptime( + report_date=datetime.datetime.strptime(report_date, "%Y-%m-%d").date(), + first_training_date=datetime.datetime.strptime( first_training_date, "%Y-%m-%d" ).date(), - last_training_date=datetime.strptime( + last_training_date=datetime.datetime.strptime( last_training_date, "%Y-%m-%d" ).date(), )