Skip to content

Commit

Permalink
Issue 532: Change deps and remove Dagger for pipeline (#534)
Browse files Browse the repository at this point in the history
* move functions that use RCall into an ext

* rm Dagger

* add Distributed as a dep

* Recreate scoring method for scoringutils.

* create test env for pipeline

For testing using RCall

* fix unit tests

* Update RCallExt.jl
  • Loading branch information
SamuelBrand1 authored Dec 5, 2024
1 parent ae73647 commit 5274760
Show file tree
Hide file tree
Showing 15 changed files with 47 additions and 38 deletions.
10 changes: 8 additions & 2 deletions pipeline/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ AbstractMCMC = "80f14c24-f653-4e6a-9b94-39d6b0f70001"
AlgebraOfGraphics = "cbdf2221-f076-402e-a563-3d30da359d67"
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
Dagger = "d58978e5-989f-55fb-8d15-ea34adc7bf54"
DataFramesMeta = "1313f7d8-7da2-5740-9ea0-a2ca25f37964"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
DrWatson = "634d3b9d-ee7a-5ddf-bec9-22491ea816e1"
Expand All @@ -20,12 +20,18 @@ JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
LogExpFunctions = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d"
RCall = "6f49c342-dc21-5d91-9882-a32aef131414"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Turing = "fce5fe82-541a-59a6-adf8-730c64b5f9a0"

[weakdeps]
RCall = "6f49c342-dc21-5d91-9882-a32aef131414"

[extensions]
RCallExt = "RCall"

[compat]
EpiAware = "0.1.0"
julia = ">= 1.10"
2 changes: 1 addition & 1 deletion pipeline/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ We score each scenario model in two ways:

## Running the pipeline

The pipeline structure is built using [DrWatson.jl](https://github.com/JuliaDynamics/DrWatson.jl) for project management of simulation parameters/settings, saved results, and figures. Compute is done with [Dagger.jl](https://github.com/JuliaParallel/Dagger.jl) for organizing parallel computation and checkpointing of results.
The pipeline structure is built using [DrWatson.jl](https://github.com/JuliaDynamics/DrWatson.jl) for project management of simulation parameters/settings, saved results, and figures.

## Running pipeline tests

Expand Down
6 changes: 6 additions & 0 deletions pipeline/ext/RCallExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
module RCallExt

using EpiAwarePipeline, RCall, DataFramesMeta

include("score_parameters.jl")
end
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ function _score(df)
end

"""
This function calculates standard scores provided by [`scoringutils`](https://epiforecasts.io/scoringutils/dev/)
This method for `score_parameters` calculates standard scores provided by [`scoringutils`](https://epiforecasts.io/scoringutils/dev/)
for a set of parameters using the provided MCMC samples and the truth value.
The function returns a DataFrame containing a summary of the scores.
Expand All @@ -37,7 +37,7 @@ The function returns a DataFrame containing a summary of the scores.
- `result`: A DataFrame containing the summarized scores for the parameter.
"""
function score_parameters(param_names, samples, truths; model = "EpiAware")
function EpiAwarePipeline.score_parameters(param_names, samples, truths; model = "EpiAware")
df = mapreduce(vcat, param_names, truths) do param_name, truth
_make_prediction_dataframe(param_name, samples, truth; model = model)
end
Expand Down
1 change: 0 additions & 1 deletion pipeline/scripts/run_analysis_pipeline.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# Local environment script to run the analysis pipeline
using Pkg
Pkg.activate(joinpath(@__DIR__(), ".."))
using Dagger

@assert !isempty(ARGS) "Test mode script requires the number of draws as an argument."
ndraws = parse(Int64, ARGS[1])
Expand Down
1 change: 0 additions & 1 deletion pipeline/scripts/run_priorpred_pipeline.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# Local environment script to run the analysis pipeline
using Pkg
Pkg.activate(joinpath(@__DIR__(), ".."))
using Dagger

@assert !isempty(ARGS) "Test mode script requires the number of draws as an argument."
ndraws = parse(Int64, ARGS[1])
Expand Down
12 changes: 3 additions & 9 deletions pipeline/src/EpiAwarePipeline.jl
Original file line number Diff line number Diff line change
@@ -1,19 +1,13 @@
"""
This module contains the analysis pipeline for the `Rt-without-renewal` project.
# Pipeline Components
In this module the meaning of a _pipeline component_ is a directed-acylic-graph
(DAG) of tasks defined using `Dagger.jl` via dispatch on an `AbstractEpiAwarePipeline`
sub-type from a function with prefix `do_`. A full pipeline is a sequence of DAGs,
with execution determined by available computational resources.
"""
module EpiAwarePipeline

using CSV, Dagger, DataFramesMeta, Dates, Distributions, DocStringExtensions, DrWatson,
using CSV, DataFramesMeta, Dates, Distributions, DocStringExtensions, DrWatson,
EpiAware, Statistics, ADTypes, AbstractMCMC, JLD2, MCMCChains, Turing, DynamicPPL,
LogExpFunctions, RCall, LinearAlgebra, Random, AlgebraOfGraphics, CairoMakie,
ReverseDiff
LogExpFunctions, LinearAlgebra, Random, AlgebraOfGraphics, CairoMakie,
ReverseDiff, Distributed

using EpiAware.EpiInfModels: oneexpy

Expand Down
8 changes: 3 additions & 5 deletions pipeline/src/infer/map_inference_results.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""
Map the inference results for each inference configuration. This is the default
method for mapping inference results; based on inference results as spawned
tasks from `Dagger.@spawn`.
method for mapping inference results.
# Arguments
- `truthdata`: The truth data used for generating inference results.
Expand All @@ -16,8 +15,7 @@ tasks from `Dagger.@spawn`.
"""
function map_inference_results(
truthdata, inference_configs, pipeline::AbstractEpiAwarePipeline)
map(inference_configs) do inference_config
Dagger.@spawn generate_inference_results(
truthdata, inference_config, pipeline)
pmap(inference_configs) do inference_config
generate_inference_results(truthdata, inference_config, pipeline)
end
end
8 changes: 3 additions & 5 deletions pipeline/src/pipeline/do_truthdata.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@ An array of truth data generated from the given pipeline.
function do_truthdata(pipeline::AbstractEpiAwarePipeline)
truth_data_configs = make_truth_data_configs(pipeline)
truthdata_from_configs = map(truth_data_configs) do truth_data_config
return Dagger.@spawn cache=true generate_truthdata(
truth_data_config, pipeline; plot = false)
return generate_truthdata(truth_data_config, pipeline; plot = false)
end
return truthdata_from_configs
end
Expand All @@ -29,7 +28,7 @@ Generate truth data for the given pipeline.
# Details
- When `pipeline.priorpredictive` is `true`, the function returns a dictionary with keys `"y_t"`, `"I_t"`, `"truth_I0"`, and `"truth_gi_mean"`, where `"y_t"` is set to `missing`, `"I_t"` is a vector of 100 elements all set to `1.0`, and both `"truth_I0"` and `"truth_gi_mean"` are set to `1.0`.
- When `pipeline.priorpredictive` is `false`, the function generates truth data configurations using `make_truth_data_configs(pipeline)` and spawns tasks to generate truth data for each configuration using `Dagger.@spawn`.
- When `pipeline.priorpredictive` is `false`, the function generates truth data configurations using `make_truth_data_configs(pipeline)` and spawns tasks to generate truth data for each configuration.
"""
function do_truthdata(pipeline::AbstractRtwithoutRenewalPipeline)
if pipeline.priorpredictive
Expand All @@ -39,8 +38,7 @@ function do_truthdata(pipeline::AbstractRtwithoutRenewalPipeline)
else
truth_data_configs = make_truth_data_configs(pipeline)
truthdata_from_configs = map(truth_data_configs) do truth_data_config
return Dagger.@spawn cache=true generate_truthdata(
truth_data_config, pipeline; plot = false)
return generate_truthdata(truth_data_config, pipeline; plot = false)
end
return truthdata_from_configs
end
Expand Down
7 changes: 6 additions & 1 deletion pipeline/src/scoring/scoring.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
include("score_parameters.jl")
include("simple_crps.jl")
include("summarise_crps.jl")

"""
Base function for scoring parameters intended to be extended conditional on other
dependency packages, such as the R package `scoringutils` via `RCall`.
"""
function score_parameters end
12 changes: 12 additions & 0 deletions pipeline/test/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[deps]
ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
AbstractMCMC = "80f14c24-f653-4e6a-9b94-39d6b0f70001"
CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
DrWatson = "634d3b9d-ee7a-5ddf-bec9-22491ea816e1"
EpiAware = "b2eeebe4-5992-4301-9193-7ebc9f62c855"
LogExpFunctions = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d"
RCall = "6f49c342-dc21-5d91-9882-a32aef131414"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
5 changes: 0 additions & 5 deletions pipeline/test/pipeline/test_pipelinefunctions.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
@testset "do_truthdata tests" begin
using Dagger
for pipetype in [SmoothOutbreakPipeline, MeasuresOutbreakPipeline,
SmoothEndemicPipeline, RoughEndemicPipeline]
pipeline = pipetype(; testmode = true)
Expand All @@ -13,8 +12,6 @@
end

@testset "do_inference tests" begin
using Dagger

function make_inference(pipeline)
truthdata_dg_task = do_truthdata(pipeline)
truthdata = fetch.(truthdata_dg_task)
Expand All @@ -33,7 +30,6 @@ end
end

@testset "do_pipeline test: just run all pipeline objects" begin
using Dagger
pipelines = map([SmoothOutbreakPipeline, MeasuresOutbreakPipeline,
SmoothEndemicPipeline, RoughEndemicPipeline]) do pipetype
pipetype(; ndraws = 10, nchains = 1, testmode = true)
Expand All @@ -45,7 +41,6 @@ end
end

@testset "do_pipeline test: prior predictive" begin
using Dagger
pipelines = map([SmoothOutbreakPipeline, MeasuresOutbreakPipeline,
SmoothEndemicPipeline, RoughEndemicPipeline]) do pipetype
pipetype(; ndraws = 10, nchains = 1, testmode = true, priorpredictive = true)
Expand Down
6 changes: 2 additions & 4 deletions pipeline/test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
using DrWatson, Test
quickactivate(@__DIR__(), "EpiAwarePipeline")
using EpiAwarePipeline, EpiAware
import Random
using EpiAwarePipeline, EpiAware, Test
using Random
Random.seed!(123)
# Run tests
include("utils/test_utils.jl");
Expand Down
2 changes: 1 addition & 1 deletion pipeline/test/scoring/test_score_parameters.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
@testset "score_parameter tests" begin
using MCMCChains
using MCMCChains, RCall

samples = MCMCChains.Chains(0.5 .+ randn(1000, 2, 1), [:a, :b])
truths = fill(0.5, 2)
Expand Down
1 change: 0 additions & 1 deletion pipeline/test/utils/test_calculate_processes.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
@testset "calculate_processes" begin
using Random
rng = MersenneTwister(1234)
I0 = 10.0
rt = randn(rng, 20)
Expand Down

0 comments on commit 5274760

Please sign in to comment.