Issue 532: Change deps and remove Dagger for pipeline (#534)

* move functions that use RCall into an ext * rm Dagger * add Distributed as a dep * Recreate scoring method for scoringutils. * create test env for pipeline For testing using RCall * fix unit tests * Update RCallExt.jl
CDCgov · Dec 5, 2024 · 5274760 · 5274760
1 parent ae73647
commit 5274760
Show file tree

Hide file tree

Showing 15 changed files with 47 additions and 38 deletions.
diff --git a/pipeline/Project.toml b/pipeline/Project.toml
@@ -8,9 +8,9 @@ AbstractMCMC = "80f14c24-f653-4e6a-9b94-39d6b0f70001"
 AlgebraOfGraphics = "cbdf2221-f076-402e-a563-3d30da359d67"
 CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
 CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
-Dagger = "d58978e5-989f-55fb-8d15-ea34adc7bf54"
 DataFramesMeta = "1313f7d8-7da2-5740-9ea0-a2ca25f37964"
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
+Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
 DrWatson = "634d3b9d-ee7a-5ddf-bec9-22491ea816e1"
@@ -20,12 +20,18 @@ JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LogExpFunctions = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
 MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d"
-RCall = "6f49c342-dc21-5d91-9882-a32aef131414"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 Turing = "fce5fe82-541a-59a6-adf8-730c64b5f9a0"
 
+[weakdeps]
+RCall = "6f49c342-dc21-5d91-9882-a32aef131414"
+
+[extensions]
+RCallExt = "RCall"
+
 [compat]
+EpiAware = "0.1.0"
 julia = ">= 1.10"
diff --git a/pipeline/README.md b/pipeline/README.md
@@ -77,7 +77,7 @@ We score each scenario model in two ways:
 
 ## Running the pipeline
 
-The pipeline structure is built using [DrWatson.jl](https://github.com/JuliaDynamics/DrWatson.jl) for project management of simulation parameters/settings, saved results, and figures. Compute is done with [Dagger.jl](https://github.com/JuliaParallel/Dagger.jl) for organizing parallel computation and checkpointing of results.
+The pipeline structure is built using [DrWatson.jl](https://github.com/JuliaDynamics/DrWatson.jl) for project management of simulation parameters/settings, saved results, and figures.
 
 ## Running pipeline tests
 

diff --git a/pipeline/ext/RCallExt.jl b/pipeline/ext/RCallExt.jl
@@ -0,0 +1,6 @@
+module RCallExt
+
+using EpiAwarePipeline, RCall, DataFramesMeta
+
+include("score_parameters.jl")
+end
diff --git a/pipeline/src/scoring/score_parameters.jl → pipeline/ext/score_parameters.jl b/pipeline/src/scoring/score_parameters.jl → pipeline/ext/score_parameters.jl
@@ -23,7 +23,7 @@ function _score(df)
 end
 
 """
-This function calculates standard scores provided by [`scoringutils`](https://epiforecasts.io/scoringutils/dev/)
+This method for `score_parameters` calculates standard scores provided by [`scoringutils`](https://epiforecasts.io/scoringutils/dev/)
 for a set of parameters using the provided MCMC samples and the truth value.
 The function returns a DataFrame containing a summary of the scores.
 
@@ -37,7 +37,7 @@ The function returns a DataFrame containing a summary of the scores.
 - `result`: A DataFrame containing the summarized scores for the parameter.
 
 """
-function score_parameters(param_names, samples, truths; model = "EpiAware")
+function EpiAwarePipeline.score_parameters(param_names, samples, truths; model = "EpiAware")
     df = mapreduce(vcat, param_names, truths) do param_name, truth
         _make_prediction_dataframe(param_name, samples, truth; model = model)
     end

diff --git a/pipeline/scripts/run_analysis_pipeline.jl b/pipeline/scripts/run_analysis_pipeline.jl
@@ -1,7 +1,6 @@
 # Local environment script to run the analysis pipeline
 using Pkg
 Pkg.activate(joinpath(@__DIR__(), ".."))
-using Dagger
 
 @assert !isempty(ARGS) "Test mode script requires the number of draws as an argument."
 ndraws = parse(Int64, ARGS[1])

diff --git a/pipeline/scripts/run_priorpred_pipeline.jl b/pipeline/scripts/run_priorpred_pipeline.jl
@@ -1,7 +1,6 @@
 # Local environment script to run the analysis pipeline
 using Pkg
 Pkg.activate(joinpath(@__DIR__(), ".."))
-using Dagger
 
 @assert !isempty(ARGS) "Test mode script requires the number of draws as an argument."
 ndraws = parse(Int64, ARGS[1])

diff --git a/pipeline/src/EpiAwarePipeline.jl b/pipeline/src/EpiAwarePipeline.jl
@@ -1,19 +1,13 @@
 """
 This module contains the analysis pipeline for the `Rt-without-renewal` project.
 
-# Pipeline Components
-
-In this module the meaning of a _pipeline component_ is a directed-acylic-graph
-(DAG) of tasks defined using `Dagger.jl` via dispatch on an `AbstractEpiAwarePipeline`
-sub-type from a function with prefix `do_`. A full pipeline is a sequence of DAGs,
-with execution determined by available computational resources.
 """
 module EpiAwarePipeline
 
-using CSV, Dagger, DataFramesMeta, Dates, Distributions, DocStringExtensions, DrWatson,
+using CSV, DataFramesMeta, Dates, Distributions, DocStringExtensions, DrWatson,
       EpiAware, Statistics, ADTypes, AbstractMCMC, JLD2, MCMCChains, Turing, DynamicPPL,
-      LogExpFunctions, RCall, LinearAlgebra, Random, AlgebraOfGraphics, CairoMakie,
-      ReverseDiff
+      LogExpFunctions, LinearAlgebra, Random, AlgebraOfGraphics, CairoMakie,
+      ReverseDiff, Distributed
 
 using EpiAware.EpiInfModels: oneexpy
 

diff --git a/pipeline/src/infer/map_inference_results.jl b/pipeline/src/infer/map_inference_results.jl
@@ -1,7 +1,6 @@
 """
 Map the inference results for each inference configuration. This is the default
-method for mapping inference results; based on inference results as spawned
-tasks from `Dagger.@spawn`.
+method for mapping inference results.
 
 # Arguments
 - `truthdata`: The truth data used for generating inference results.
@@ -16,8 +15,7 @@ tasks from `Dagger.@spawn`.
 """
 function map_inference_results(
         truthdata, inference_configs, pipeline::AbstractEpiAwarePipeline)
-    map(inference_configs) do inference_config
-        Dagger.@spawn generate_inference_results(
-            truthdata, inference_config, pipeline)
+    pmap(inference_configs) do inference_config
+        generate_inference_results(truthdata, inference_config, pipeline)
     end
 end
diff --git a/pipeline/src/pipeline/do_truthdata.jl b/pipeline/src/pipeline/do_truthdata.jl
@@ -11,8 +11,7 @@ An array of truth data generated from the given pipeline.
 function do_truthdata(pipeline::AbstractEpiAwarePipeline)
     truth_data_configs = make_truth_data_configs(pipeline)
     truthdata_from_configs = map(truth_data_configs) do truth_data_config
-        return Dagger.@spawn cache=true generate_truthdata(
-            truth_data_config, pipeline; plot = false)
+        return generate_truthdata(truth_data_config, pipeline; plot = false)
     end
     return truthdata_from_configs
 end
@@ -29,7 +28,7 @@ Generate truth data for the given pipeline.
 
 # Details
 - When `pipeline.priorpredictive` is `true`, the function returns a dictionary with keys `"y_t"`, `"I_t"`, `"truth_I0"`, and `"truth_gi_mean"`, where `"y_t"` is set to `missing`, `"I_t"` is a vector of 100 elements all set to `1.0`, and both `"truth_I0"` and `"truth_gi_mean"` are set to `1.0`.
-- When `pipeline.priorpredictive` is `false`, the function generates truth data configurations using `make_truth_data_configs(pipeline)` and spawns tasks to generate truth data for each configuration using `Dagger.@spawn`.
+- When `pipeline.priorpredictive` is `false`, the function generates truth data configurations using `make_truth_data_configs(pipeline)` and spawns tasks to generate truth data for each configuration.
 """
 function do_truthdata(pipeline::AbstractRtwithoutRenewalPipeline)
     if pipeline.priorpredictive
@@ -39,8 +38,7 @@ function do_truthdata(pipeline::AbstractRtwithoutRenewalPipeline)
     else
         truth_data_configs = make_truth_data_configs(pipeline)
         truthdata_from_configs = map(truth_data_configs) do truth_data_config
-            return Dagger.@spawn cache=true generate_truthdata(
-                truth_data_config, pipeline; plot = false)
+            return generate_truthdata(truth_data_config, pipeline; plot = false)
         end
         return truthdata_from_configs
     end

diff --git a/pipeline/src/scoring/scoring.jl b/pipeline/src/scoring/scoring.jl
@@ -1,3 +1,8 @@
-include("score_parameters.jl")
 include("simple_crps.jl")
 include("summarise_crps.jl")
+
+"""
+Base function for scoring parameters intended to be extended conditional on other
+    dependency packages, such as the R package `scoringutils` via `RCall`.
+"""
+function score_parameters end
diff --git a/pipeline/test/Project.toml b/pipeline/test/Project.toml
@@ -0,0 +1,12 @@
+[deps]
+ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
+AbstractMCMC = "80f14c24-f653-4e6a-9b94-39d6b0f70001"
+CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
+Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
+DrWatson = "634d3b9d-ee7a-5ddf-bec9-22491ea816e1"
+EpiAware = "b2eeebe4-5992-4301-9193-7ebc9f62c855"
+LogExpFunctions = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
+MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d"
+RCall = "6f49c342-dc21-5d91-9882-a32aef131414"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/pipeline/test/pipeline/test_pipelinefunctions.jl b/pipeline/test/pipeline/test_pipelinefunctions.jl
@@ -1,5 +1,4 @@
 @testset "do_truthdata tests" begin
-    using Dagger
     for pipetype in [SmoothOutbreakPipeline, MeasuresOutbreakPipeline,
         SmoothEndemicPipeline, RoughEndemicPipeline]
         pipeline = pipetype(; testmode = true)
@@ -13,8 +12,6 @@
 end
 
 @testset "do_inference tests" begin
-    using Dagger
-
     function make_inference(pipeline)
         truthdata_dg_task = do_truthdata(pipeline)
         truthdata = fetch.(truthdata_dg_task)
@@ -33,7 +30,6 @@ end
 end
 
 @testset "do_pipeline test: just run all pipeline objects" begin
-    using Dagger
     pipelines = map([SmoothOutbreakPipeline, MeasuresOutbreakPipeline,
         SmoothEndemicPipeline, RoughEndemicPipeline]) do pipetype
         pipetype(; ndraws = 10, nchains = 1, testmode = true)
@@ -45,7 +41,6 @@ end
 end
 
 @testset "do_pipeline test: prior predictive" begin
-    using Dagger
     pipelines = map([SmoothOutbreakPipeline, MeasuresOutbreakPipeline,
         SmoothEndemicPipeline, RoughEndemicPipeline]) do pipetype
         pipetype(; ndraws = 10, nchains = 1, testmode = true, priorpredictive = true)

diff --git a/pipeline/test/runtests.jl b/pipeline/test/runtests.jl
@@ -1,7 +1,5 @@
-using DrWatson, Test
-quickactivate(@__DIR__(), "EpiAwarePipeline")
-using EpiAwarePipeline, EpiAware
-import Random
+using EpiAwarePipeline, EpiAware, Test
+using Random
 Random.seed!(123)
 # Run tests
 include("utils/test_utils.jl");

diff --git a/pipeline/test/scoring/test_score_parameters.jl b/pipeline/test/scoring/test_score_parameters.jl
@@ -1,5 +1,5 @@
 @testset "score_parameter tests" begin
-    using MCMCChains
+    using MCMCChains, RCall
 
     samples = MCMCChains.Chains(0.5 .+ randn(1000, 2, 1), [:a, :b])
     truths = fill(0.5, 2)

diff --git a/pipeline/test/utils/test_calculate_processes.jl b/pipeline/test/utils/test_calculate_processes.jl
@@ -1,5 +1,4 @@
 @testset "calculate_processes" begin
-    using Random
     rng = MersenneTwister(1234)
     I0 = 10.0
     rt = randn(rng, 20)