CDCgov
diff --git a/‎.github/workflows/pipeline-run-check.yaml
Lines changed: 110 additions & 13 deletions b/‎.github/workflows/pipeline-run-check.yaml
Lines changed: 110 additions & 13 deletions
diff --git a/‎.gitignore
Lines changed: 2 additions & 6 deletions b/‎.gitignore
Lines changed: 2 additions & 6 deletions
diff --git a/‎pipelines/forecast_state.py
Lines changed: 34 additions & 47 deletions b/‎pipelines/forecast_state.py
Lines changed: 34 additions & 47 deletions
@@ -9,39 +9,136 @@ concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
   cancel-in-progress: true
 
+env:
+  BASE_DIR: "pipelines/end_to_end_test_output"
+  DATA_DIR: "pipelines/end_to_end_test_output/private_data"
+
 jobs:
-  run-pipeline:
-    strategy:
-      matrix:
-        os: [ubuntu-22.04, macos-latest]
-    runs-on:  ${{matrix.os}}
+  generate-data:
+    runs-on:  ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up R
+        uses: r-lib/actions/setup-r@v2
+        with:
+          r-version: "release"
+          use-public-rspm: true
+      - name: "Set up dependencies for hewr"
+        uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          working-directory: hewr
+      - name: Install hewr
+        run: pak::local_install("hewr", ask = FALSE)
+        shell: Rscript {0}
+      - name: Generate test data
+        run: Rscript pipelines/generate_test_data.R ${{ env.DATA_DIR }}
+
+      - name: Upload artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: test-data
+          path: ${{ env.DATA_DIR }}
+          retention-days: 1
 
+  fit-models:
+    needs: generate-data
+    runs-on: ubuntu-latest
     env:
       NHSN_API_KEY_ID: ${{ secrets.NHSN_API_KEY_ID }}
       NHSN_API_KEY_SECRET: ${{ secrets.NHSN_API_KEY_SECRET }}
+    strategy:
+      matrix:
+        model: [h, e, he, hw, ew, hew]
+        disease: [COVID-19, Influenza]
+        location: [US, CA, MT]
+        exclude:
+          - model: hw
+            disease: Influenza
+          - model: ew
+            disease: Influenza
+          - model: hew
+            disease: Influenza
+          - model: hw
+            location: US
+          - model: ew
+            location: US
+          - model: hew
+            location: US
     steps:
       - uses: actions/checkout@v4
       - name: Set up python
         uses: actions/setup-python@v5
         with:
           python-version: "3.12"
-      - name: "Set up R"
+      - name: Set up R
         uses: r-lib/actions/setup-r@v2
         with:
           r-version: "release"
           use-public-rspm: true
-      - name: "Set up Quarto"
+      - name: Set up Quarto
         uses: quarto-dev/quarto-actions/setup@v2
-      - name: "Install poetry"
+      - name: Install poetry
         run: pip install poetry
-      - name: "Install pyrenew-hew"
+      - name: Install pyrenew-hew
         run: poetry install
-      - name: "Set up dependencies for hewr"
+      - name: Set up dependencies for hewr
+        uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          working-directory: hewr
+      - name: Install hewr
+        run: pak::local_install("hewr", ask = FALSE)
+        shell: Rscript {0}
+      - name: Download test data
+        uses: actions/download-artifact@v4
+        with:
+          name: test-data
+          path: ${{ env.DATA_DIR }}
+      - name: Fit model
+        run: |
+          poetry run bash pipelines/tests/test_fit.sh ${{ env.BASE_DIR }} \
+          ${{ matrix.disease }} ${{ matrix.location }} ${{ matrix.model }}
+      - name: Upload artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: |
+            test-fit-${{ matrix.disease }}-${{ matrix.location }}-${{ matrix.model }}
+          path: ${{ env.BASE_DIR }}
+
+  postprocess-models:
+    needs: fit-models
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+      - name: Set up R
+        uses: r-lib/actions/setup-r@v2
+        with:
+          r-version: "release"
+          use-public-rspm: true
+      - name: Set up Quarto
+        uses: quarto-dev/quarto-actions/setup@v2
+      - name: Install poetry
+        run: pip install poetry
+      - name: Install pyrenew-hew
+        run: poetry install
+      - name: Set up dependencies for hewr
         uses: r-lib/actions/setup-r-dependencies@v2
         with:
           working-directory: hewr
-      - name: "Install hewr"
+      - name: Install hewr
         run: pak::local_install("hewr", ask = FALSE)
         shell: Rscript {0}
-      - name: "Run pipeline"
-        run: poetry run bash pipelines/tests/test_end_to_end.sh pipelines/tests
+      - name: Download fitting output
+        uses: actions/download-artifact@v4
+        with:
+          pattern: test-fit-*
+          path: ${{ env.BASE_DIR }}
+          merge-multiple: true
+      - name: Run postprocessing
+        run: |
+          poetry run python pipelines/postprocess_forecast_batches.py \
+          ${{ env.DATA_DIR }} \
+          ${{ env.DATA_DIR }}/nssp-etl/latest_comprehensive.parquet
@@ -400,9 +400,5 @@ private_data/*
 *_files/
 .vscode/settings.json
 
-# Test data exceptions to the general data exclusion
-!pipelines/tests/covid-19_r_2024-01-29_f_2023-11-01_t_2024-01-29/model_runs/TD/data/data.tsv
-!pipelines/tests/covid-19_r_2024-01-29_f_2023-11-01_t_2024-01-29/model_runs/TD/data/eval_data.tsv
-
-# Ignore test pipe output
-pipelines/tests/private_data/*
+# Ignore end to end test output
+pipelines/tests/end_to_end_test_output/*
@@ -14,7 +14,10 @@
 from prep_eval_data import save_eval_data
 from pygit2 import Repository
 
-from pyrenew_hew.util import pyrenew_model_name_from_flags
+from pyrenew_hew.util import (
+    flags_from_hew_letters,
+    pyrenew_model_name_from_flags,
+)
 
 numpyro.set_host_device_count(4)
 
@@ -317,22 +320,20 @@ def main(
     facility_level_nssp_data, state_level_nssp_data = None, None
 
     if report_date in available_facility_level_reports:
-        logger.info(
-            "Facility level data available for " "the given report date"
-        )
+        logger.info("Facility level data available for the given report date")
         facility_datafile = f"{report_date}.parquet"
         facility_level_nssp_data = pl.scan_parquet(
             Path(facility_level_nssp_data_dir, facility_datafile)
         )
     if state_report_date in available_state_level_reports:
-        logger.info("State-level data available for the given report " "date.")
+        logger.info("State-level data available for the given report date.")
         state_datafile = f"{state_report_date}.parquet"
         state_level_nssp_data = pl.scan_parquet(
             Path(state_level_nssp_data_dir, state_datafile)
         )
     if facility_level_nssp_data is None and state_level_nssp_data is None:
         raise ValueError(
-            "No data available for the requested report date " f"{report_date}"
+            f"No data available for the requested report date {report_date}"
         )
 
     nwss_data_disease_map = {
@@ -516,6 +517,15 @@ def get_available_nwss_reports(
         ),
     )
 
+    parser.add_argument(
+        "--model-letters",
+        type=str,
+        help=(
+            "Fit the model corresponding to the provided model letters (e.g. 'he', 'e', 'hew')."
+        ),
+        required=True,
+    )
+
     parser.add_argument(
         "--report-date",
         type=str,
@@ -528,8 +538,7 @@ def get_available_nwss_reports(
         type=Path,
         default=Path("private_data", "nssp_etl_gold"),
         help=(
-            "Directory in which to look for facility-level NSSP "
-            "ED visit data"
+            "Directory in which to look for facility-level NSSP ED visit data"
         ),
     )
 
@@ -538,7 +547,7 @@ def get_available_nwss_reports(
         type=Path,
         default=Path("private_data", "nssp_state_level_gold"),
         help=(
-            "Directory in which to look for state-level NSSP " "ED visit data."
+            "Directory in which to look for state-level NSSP ED visit data."
         ),
     )
 
@@ -612,7 +621,7 @@ def get_available_nwss_reports(
         type=int,
         default=1000,
         help=(
-            "Number of warmup iterations per chain for NUTS" "(default: 1000)."
+            "Number of warmup iterations per chain for NUTS (default: 1000)."
         ),
     )
 
@@ -648,45 +657,23 @@ def get_available_nwss_reports(
         type=Path,
         help=("Path to a parquet file containing compehensive truth data."),
     )
-
-    parser.add_argument(
-        "--fit-ed-visits",
-        type=bool,
-        action=argparse.BooleanOptionalAction,
-        help="If provided, fit to ED visit data.",
-    )
-    parser.add_argument(
-        "--fit-hospital-admissions",
-        type=bool,
-        action=argparse.BooleanOptionalAction,
-        help=("If provided, fit to hospital admissions data."),
-    )
     parser.add_argument(
-        "--fit-wastewater",
-        type=bool,
-        action=argparse.BooleanOptionalAction,
-        help="If provided, fit to wastewater data.",
-    )
-
-    parser.add_argument(
-        "--forecast-ed-visits",
-        type=bool,
-        action=argparse.BooleanOptionalAction,
-        help="If provided, forecast ED visits.",
-    )
-    parser.add_argument(
-        "--forecast-hospital-admissions",
-        type=bool,
-        action=argparse.BooleanOptionalAction,
-        help=("If provided, forecast hospital admissions."),
-    )
-    parser.add_argument(
-        "--forecast-wastewater",
-        type=bool,
-        action=argparse.BooleanOptionalAction,
-        help="If provided, forecast wastewater concentrations.",
+        "--additional-forecast-letters",
+        type=str,
+        help=(
+            "Forecast the following signals even if they were not fit. "
+            "Fit signals are always forecast."
+        ),
+        default="he",
     )
 
     args = parser.parse_args()
     numpyro.set_host_device_count(args.n_chains)
-    main(**vars(args))
+    fit_flags = flags_from_hew_letters(args.model_letters)
+    forecast_flags = flags_from_hew_letters(
+        args.model_letters + args.additional_forecast_letters,
+        flag_prefix="forecast",
+    )
+    delattr(args, "model_letters")
+    delattr(args, "additional_forecast_letters")
+    main(**vars(args), **fit_flags, **forecast_flags)