From 09df65a6e6cc94e39e42f60f7e54bbbadc618d02 Mon Sep 17 00:00:00 2001
From: Charles Kawczynski <kawczynski.charles@gmail.com>
Date: Thu, 21 Nov 2024 20:30:30 -0500
Subject: [PATCH] Reapply reproducibility improvements + unit tests

debug

reproducibility_tests/print_new_mse.jl -> mse_summary
---
 .buildkite/pipeline.yml                       |    8 +-
 examples/hybrid/driver.jl                     |   19 +-
 reproducibility_tests/compute_mse.jl          |  417 +++---
 reproducibility_tests/move_output.jl          |   70 +-
 .../{print_new_mse.jl => mse_summary.jl}      |   39 +-
 reproducibility_tests/mse_tables.jl           |   68 +-
 .../reproducibility_tests.jl                  |   49 -
 .../reproducibility_utils.jl                  |  380 ++++--
 reproducibility_tests/test_reset.jl           |   14 -
 test/Project.toml                             |    1 +
 test/unit_reproducibility_infra.jl            | 1185 +++++++++++++----
 11 files changed, 1467 insertions(+), 783 deletions(-)
 rename reproducibility_tests/{print_new_mse.jl => mse_summary.jl} (60%)
 delete mode 100644 reproducibility_tests/reproducibility_tests.jl
 delete mode 100644 reproducibility_tests/test_reset.jl

diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
index f399ec8e78e..381173fe673 100644
--- a/.buildkite/pipeline.yml
+++ b/.buildkite/pipeline.yml
@@ -56,11 +56,11 @@ steps:
 
   - wait
 
-  - group: "Reproducibility tests"
+  - group: "Reproducibility infrastructure"
     steps:
 
-      - label: ":computer: Ensure mse tables are reset when necessary"
-        command: "julia --color=yes --project=examples reproducibility_tests/test_reset.jl"
+      - label: ":computer: Test reproducibility infrastructure"
+        command: "julia --color=yes --project=examples test/unit_reproducibility_infra.jl"
 
   - group: "Radiation"
     steps:
@@ -1186,7 +1186,7 @@ steps:
     continue_on_failure: true
 
   - label: ":robot_face: Print new mse tables"
-    command: "julia --color=yes --project=examples reproducibility_tests/print_new_mse.jl"
+    command: "julia --color=yes --project=examples reproducibility_tests/mse_summary.jl"
 
   - label: ":robot_face: Print new reference counter"
     command: "julia --color=yes --project=examples reproducibility_tests/print_new_ref_counter.jl"
diff --git a/examples/hybrid/driver.jl b/examples/hybrid/driver.jl
index b11af3bf27b..a06b64383da 100644
--- a/examples/hybrid/driver.jl
+++ b/examples/hybrid/driver.jl
@@ -95,21 +95,14 @@ if config.parsed_args["reproducibility_test"]
             "..",
             "..",
             "reproducibility_tests",
-            "reproducibility_tests.jl",
+            "compute_mse.jl", # TODO: improve filename
         ),
     )
-    @testset "Test reproducibility table entries" begin
-        mse_keys = sort(collect(keys(all_best_mse[simulation.job_id])))
-        pcs = collect(Fields.property_chains(sol.u[end]))
-        for prop_chain in mse_keys
-            @test prop_chain in pcs
-        end
-    end
-    perform_reproducibility_tests(
-        simulation.job_id,
+    export_reproducibility_results(
         sol.u[end],
-        all_best_mse,
-        simulation.output_dir,
+        config.comms_ctx;
+        job_id = simulation.job_id,
+        computed_dir = simulation.output_dir,
     )
 end
 
@@ -152,7 +145,7 @@ if ClimaComms.iamroot(config.comms_ctx)
         ),
     )
     @info "Plotting"
-    paths = latest_comparable_paths() # __build__ path (not job path)
+    paths = latest_comparable_dirs() # __build__ path (not job path)
     if isempty(paths)
         make_plots(Val(Symbol(reference_job_id)), simulation.output_dir)
     else
diff --git a/reproducibility_tests/compute_mse.jl b/reproducibility_tests/compute_mse.jl
index 09d3dd99ae6..d8e86920050 100644
--- a/reproducibility_tests/compute_mse.jl
+++ b/reproducibility_tests/compute_mse.jl
@@ -1,214 +1,279 @@
 import ClimaReproducibilityTests as CRT
-import NCDatasets
-import Tar
-import ClimaCoreTempestRemap as CCTR
+import ClimaCore: InputOutput, Fields
+import ClimaComms
+import JSON
 
 include("reproducibility_utils.jl")
 
-function get_nc_data(ds, var::String)
-    if haskey(ds, var)
-        return ds[var]
-    else
-        for key in keys(ds.group)
-            if haskey(ds.group[key], var)
-                return ds.group[key][var]
+function error_if_dissimilar_dicts(dicts, dict)
+    if !similar_dicts(dicts, dict)
+        println("Dictionaries assembled in the reproducibility tests are ")
+        println("not similar, and cannot be compared.")
+        foreach(dicts) do d
+            if !similar_dicts(d, dict)
+                @show keys(dict)
+                @show keys(d)
+                @show typeof(collect(values(dict)))
+                @show typeof(collect(values(d)))
+                @show size(collect(values(dict)))
+                @show size(collect(values(d)))
             end
         end
+        msg = "\nPlease find\n"
+        msg *= "`reproducibility_tests/README.md` and read the section\n\n"
+        msg *= "  `How to merge pull requests (PR) that get approved but *break* reproducibility tests`\n\n"
+        msg *= "for how to merge this PR."
+        error(msg)
     end
-    error("No key $var for mse computation.")
-    return nothing
+end
+
+function no_comparison_error(non_existent_files)
+    msg = "\n\n"
+    msg *= "Pull request author:\n"
+    msg *= "    It seems that a new dataset,\n"
+    msg *= "\n"
+    msg *= "dataset file(s):`$(non_existent_files)`,"
+    msg *= "\n"
+    msg *= "    was created, or the name of the dataset\n"
+    msg *= "    has changed. Please increment the reference\n"
+    msg *= "    counter in `reproducibility_tests/ref_counter.jl`.\n"
+    msg *= "\n"
+    msg *= "    If this is not the case, then please\n"
+    msg *= "    open an issue with a link pointing to this\n"
+    msg *= "    PR and build.\n"
+    msg *= "\n"
+    msg *= "For more information, please find\n"
+    msg *= "`reproducibility_tests/README.md` and read the section\n\n"
+    msg *= "  `How to merge pull requests (PR) that get approved\n"
+    msg *= "   but *break* reproducibility tests`\n\n"
+    msg *= "for how to merge this PR."
+    error(msg)
 end
 
 """
-    to_dict(nc_filename::String, reference_keys::Vector{String})
+    to_dict(file::String, name, comms_ctx)
 
-Convert an NCDatasets file to a `Dict`.
+Convert the HDF5 file containing the prognostic field with name `name` into a
+`Dict` using ClimaCore's `property_chains` and `single_field` functions.
 """
-function to_dict(nc_filename::String, reference_keys::Vector{String})
+function to_dict(file::String, name, comms_ctx)
     dict = Dict{String, AbstractArray}()
-    NCDatasets.Dataset(nc_filename, "r") do ds
-        for key in reference_keys
-            dict[key] = vec(Array(get_nc_data(ds, key)))
-        end
+    reader = InputOutput.HDF5Reader(file, comms_ctx)
+    Y = InputOutput.read_field(reader, name)
+    Base.close(reader)
+    for prop_chain in Fields.property_chains(Y)
+        dict[string(prop_chain)] =
+            Array(vec(parent(Fields.single_field(Y, prop_chain))))
+    end
+    return dict
+end
+
+"""
+    zero_dict(file::String, name, comms_ctx)
+
+Return a dict of zeros for all `ClimaCore.Fields.property_chains` in the
+fieldvector contained in the HDF5 file `file`.
+"""
+function zero_dict(file::String, name, comms_ctx)
+    dict = Dict{String, AbstractArray}()
+    reader = InputOutput.HDF5Reader(file, comms_ctx)
+    Y = InputOutput.read_field(reader, name)
+    Base.close(reader)
+    for prop_chain in Fields.property_chains(Y)
+        arr = vec(Array(parent(Fields.single_field(Y, prop_chain))))
+        fill!(arr, 0)
+        dict[string(prop_chain)] = arr
     end
     return dict
 end
 
+function maybe_extract(file)
+    job_dir = dirname(file)
+    nc_tar = joinpath(job_dir, "nc_files.tar")
+    # We may have converted to tarball, try to
+    # extract nc files from tarball first:
+    isfile(file) && return nothing
+    if !isfile(nc_tar)
+        @warn "There is no reference dataset, and no NC tar file."
+        return nothing
+    end
+    mktempdir(joinpath(job_dir, tempdir())) do tdir
+        # We must extract to an empty folder, let's
+        # move it back to job_dir after.
+        Tar.extract(nc_tar, tdir) do hdr
+            basename(hdr.path) == basename(file)
+        end
+        mv(
+            joinpath(tdir, basename(file)),
+            joinpath(job_dir, basename(file));
+            force = true,
+        )
+    end
+end
+
+"""
+    similar_dicts(dict::Dict, dict::Dict)
+    similar_dicts(vec_of_dicts::Vector{<:Dict}, dict::Dict)
+
+Returns `true` if dicts have the same keys and same value _types_ as `dict`, and
+false otherwise.
 """
-    reproducibility_test(;
+similar_dicts(v::Vector{<:Dict}, dict::Dict) =
+    all(d -> similar_dicts(d, dict), v)
+function similar_dicts(a::Dict, b::Dict)
+    keys(a) == keys(b) || return false
+    typeof(values(a)) == typeof(values(b)) || return false
+    return true
+end
+
+"""
+    reproducibility_results(
+        comms_ctx;
         job_id,
-        reference_mse,
-        ds_filename_computed,
-        ds_filename_reference = nothing,
-        varname,
+        data_file_computed,
+        skip = !haskey(ENV, "BUILDKITE_COMMIT")
     )
 
-Returns a `Dict` of mean-squared errors between
-`NCDataset`s `ds_filename_computed` and
-`ds_filename_reference` for all keys in `reference_mse`.
-Keys in `reference_mse` may directly map to keys in
-the `NCDataset`s, or they may be mapped to the keys
-via `varname`.
+Returns a tuple containing:
+ - the paths used for the comparison
+ - a vector of `Dict`s of mean-squared errors between datasets
+   `data_file_computed` and `data_file_reference` for all variables.
+ - a symbol indicating how results were returned (for unit testing)
 
-If running on buildkite, we get `ds_filename_reference`
-from the latest merged dataset on Caltech central.
+If running on buildkite, we get `data_file_reference` from the latest merged
+dataset on Caltech central.
 """
-function reproducibility_test(;
-    job_id,
-    reference_mse,
-    ds_filename_computed,
-    varname,
+function reproducibility_results(
+    comms_ctx;
+    job_id::String,
+    data_file_computed::String,
+    n::Int = 10,
+    name::String,
+    save_dir::String = "/central/scratch/esm/slurm-buildkite/climaatmos-main",
+    ref_counter_PR::Int = read_ref_counter(
+        joinpath(@__DIR__, "ref_counter.jl"),
+    ),
+    reference_filename::String = "prog_state.hdf5",
+    skip::Bool = !haskey(ENV, "BUILDKITE_COMMIT"),
 )
-    local ds_filename_reference
-    reference_keys = map(k -> varname(k), collect(keys(reference_mse)))
-    paths = String[] # initialize for later handling
-
-    if haskey(ENV, "BUILDKITE_COMMIT")
-        paths = latest_comparable_paths(; n = 10)
-        isempty(paths) && return (reference_mse, paths)
-        @info "`ds_filename_computed`: `$ds_filename_computed`"
-        ds_filename_references =
-            map(p -> joinpath(p, ds_filename_computed), paths)
-        for ds_filename_reference in ds_filename_references
-            @info "`ds_filename_reference`: `$ds_filename_reference`"
-            job_dir = dirname(ds_filename_reference)
-            nc_tar = joinpath(job_dir, "nc_files.tar")
-            # We may have converted to tarball, try to
-            # extract nc files from tarball first:
-            if !isfile(ds_filename_reference)
-                if isfile(nc_tar)
-                    mktempdir(joinpath(job_dir, tempdir())) do tdir
-                        # We must extract to an empty folder, let's
-                        # move it back to job_dir after.
-                        Tar.extract(nc_tar, tdir) do hdr
-                            basename(hdr.path) ==
-                            basename(ds_filename_reference)
-                        end
-                        mv(
-                            joinpath(tdir, basename(ds_filename_reference)),
-                            joinpath(job_dir, basename(ds_filename_reference));
-                            force = true,
-                        )
-                    end
-                else
-                    @warn "There is no reference dataset, and no NC tar file."
-                end
-            end
-            if !isfile(ds_filename_reference)
-                msg = "\n\n"
-                msg *= "Pull request author:\n"
-                msg *= "    It seems that a new dataset,\n"
-                msg *= "\n"
-                msg *= "dataset file:`$(ds_filename_computed)`,"
-                msg *= "\n"
-                msg *= "    was created, or the name of the dataset\n"
-                msg *= "    has changed. Please increment the reference\n"
-                msg *= "    counter in `reproducibility_tests/ref_counter.jl`.\n"
-                msg *= "\n"
-                msg *= "    If this is not the case, then please\n"
-                msg *= "    open an issue with a link pointing to this\n"
-                msg *= "    PR and build.\n"
-                msg *= "\n"
-                msg *= "For more information, please find\n"
-                msg *= "`reproducibility_tests/README.md` and read the section\n\n"
-                msg *= "  `How to merge pull requests (PR) that get approved\n"
-                msg *= "   but *break* reproducibility tests`\n\n"
-                msg *= "for how to merge this PR."
-                error(msg)
-            end
-        end
-    else
-        @warn "Buildkite not detected. Skipping reproducibility tests."
-        @info "Please review output results before merging."
-        return (reference_mse, paths)
-    end
+    dirs = String[] # initialize for later handling
 
-    local computed_mse
-    @info "Prescribed reference keys $reference_keys"
-    dict_computed = to_dict(ds_filename_computed, reference_keys)
-    dict_references =
-        map(ds -> to_dict(ds, reference_keys), ds_filename_references)
-    @info "Computed keys $(collect(keys(dict_computed)))"
-    @info "Reference keys $(collect(keys(first(dict_references))))"
-    if all(dr -> keys(dict_computed) == keys(dr), dict_references) && all(
-        dr -> typeof(values(dict_computed)) == typeof(values(dr)),
-        dict_references,
+    # Skip if we're not on buildkite:
+    skip && return (
+        dirs,
+        [zero_dict(data_file_computed, name, comms_ctx)],
+        :skipped,
     )
-        computed_mses = map(dict_references) do dict_reference
-            CRT.compute_mse(;
-                job_name = string(job_id),
-                reference_keys = reference_keys,
-                dict_computed,
-                dict_reference,
-            )
-        end
-    else
-        msg = ""
-        msg *= "The reproducibility test broke. Please find\n"
-        msg *= "`reproducibility_tests/README.md` and read the section\n\n"
-        msg *= "  `How to merge pull requests (PR) that get approved but *break* reproducibility tests`\n\n"
-        msg *= "for how to merge this PR."
-        error(msg)
-    end
-    return (computed_mses, paths)
 
-end
+    dirs =
+        latest_comparable_dirs(; n, root_dir = save_dir, ref_counter_PR, skip) # should we pass / expose more kwargs?
+    isempty(dirs) && return (
+        dirs,
+        [zero_dict(data_file_computed, name, comms_ctx)],
+        :no_comparable_dirs,
+    )
 
+    data_file_references = map(p -> joinpath(p, reference_filename), dirs)
 
-##### TODO: move below functions to ClimaCore
+    # foreach(x->maybe_extract(x), data_file_references)
 
-function first_center_space(fv::Fields.FieldVector)
-    for prop_chain in Fields.property_chains(fv)
-        f = Fields.single_field(fv, prop_chain)
-        space = axes(f)
-        if space isa Spaces.CenterExtrudedFiniteDifferenceSpace
-            return space
-        end
-    end
-    error("Unfound space")
-end
+    non_existent_files = filter(x -> !isfile(x), data_file_references)
+    isempty(non_existent_files) || no_comparison_error(non_existent_files)
 
-function first_face_space(fv::Fields.FieldVector)
-    for prop_chain in Fields.property_chains(fv)
-        f = Fields.single_field(fv, prop_chain)
-        space = axes(f)
-        if space isa Spaces.FaceExtrudedFiniteDifferenceSpace
-            return space
+    dict_computed_solution = to_dict(data_file_computed, name, comms_ctx)
+    dict_reference_solutions =
+        map(ds -> to_dict(ds, name, comms_ctx), data_file_references)
+    reference_keys = keys(first(dict_reference_solutions))
+
+    error_if_dissimilar_dicts(dict_reference_solutions, dict_computed_solution)
+
+    computed_mses =
+        map(zip(dirs, dict_reference_solutions)) do (p, dict_reference_solution)
+            CRT.compute_mse(;
+                job_name = string(job_id, "_", basename(p)),
+                reference_keys = collect(string.(reference_keys)),
+                dict_computed = dict_computed_solution,
+                dict_reference = dict_reference_solution,
+            )
         end
-    end
-    error("Unfound space")
+    return (dirs, computed_mses, :successful_comparison)
 end
 
-function export_nc(
-    Y::Fields.FieldVector;
-    nc_filename,
-    t_now = 0.0,
-    center_space = first_center_space,
-    face_space = first_face_space,
-    filter_prop_chain = pn -> true, # use all fields
-    varname::Function,
+
+"""
+    export_reproducibility_results(
+        field_vec::Fields.FieldVector,
+        comms_ctx::ClimaComms.AbstractCommsContext;
+        job_id::String,
+        computed_dir::String,
+        save_dir::String = "/central/scratch/esm/slurm-buildkite/climaatmos-main",
+        name::String = "Y",
+        reference_filename = "prog_state.hdf5",
+        computed_filename = reference_filename,
+        n::Int = 10,
+        ref_counter_PR::Int = read_ref_counter(
+            joinpath(@__DIR__, "ref_counter.jl"),
+        ),
+        skip::Bool = !haskey(ENV, "BUILDKITE_COMMIT"),
+    )
+
+This function returns:
+    `(data_file_computed, computed_mses, dirs, how)`
+ where
+ - `data_file_computed` is the computed solution, i.e., the field vector
+   `field_vec`
+ - `computed_mses` is a vector of dictionaries containing mean-squared errors
+   against the reference files, found via `latest_comparable_dirs`.
+
+ - exports the results from field-vector `field_vec`, and saves it into the
+   reproducibility folder
+ - Compares the computed results against comparable references
+ - Writes the dictionary of comparisons to json files in the reproducibility
+   folder
+"""
+function export_reproducibility_results(
+    field_vec::Fields.FieldVector,
+    comms_ctx::ClimaComms.AbstractCommsContext;
+    job_id::String,
+    computed_dir::String,
+    save_dir::String = "/central/scratch/esm/slurm-buildkite/climaatmos-main",
+    name::String = "Y",
+    reference_filename = "prog_state.hdf5",
+    computed_filename = reference_filename,
+    n::Int = 10,
+    ref_counter_PR::Int = read_ref_counter(
+        joinpath(@__DIR__, "ref_counter.jl"),
+    ),
+    skip::Bool = !haskey(ENV, "BUILDKITE_COMMIT"),
 )
-    prop_chains = Fields.property_chains(Y)
-    filter!(filter_prop_chain, prop_chains)
-    cspace = center_space(Y)
-    fspace = face_space(Y)
-    # create a temporary dir for intermediate data
-    FT = eltype(Y)
-    NCDatasets.NCDataset(nc_filename, "c") do nc
-        # defines the appropriate dimensions and variables for a space coordinate
-        # defines the appropriate dimensions and variables for a time coordinate (by default, unlimited size)
-        nc_time = CCTR.def_time_coord(nc)
-        CCTR.def_space_coord(nc, cspace, type = "cgll")
-        CCTR.def_space_coord(nc, fspace, type = "cgll")
-        # define variables for the prognostic states
-        for prop_chain in Fields.property_chains(Y)
-            f = Fields.single_field(Y, prop_chain)
-            space = axes(f)
-            nc_var = CCTR.defVar(nc, varname(prop_chain), FT, space, ("time",))
-            nc_var[:, 1] = f
+    repro_folder = joinpath(computed_dir, "reproducibility_bundle")
+    data_file_computed = joinpath(repro_folder, reference_filename)
+
+    mkpath(dirname(data_file_computed))
+    hdfwriter = InputOutput.HDF5Writer(data_file_computed, comms_ctx)
+    InputOutput.write!(hdfwriter, field_vec, name)
+    Base.close(hdfwriter)
+
+    (dirs, computed_mses, how) = reproducibility_results(
+        comms_ctx;
+        job_id,
+        data_file_computed,
+        n,
+        name,
+        reference_filename,
+        save_dir = save_dir,
+        ref_counter_PR,
+        skip,
+    )
+
+    for (computed_mse, dir) in zip(computed_mses, dirs)
+        commit_hash = basename(dir)
+        computed_mse_file =
+            joinpath(repro_folder, "computed_mse_$commit_hash.json")
+
+        open(computed_mse_file, "w") do io
+            JSON.print(io, computed_mse)
         end
-        # TODO: interpolate w onto center space and save it the same way as the other vars
-        nc_time[1] = t_now
     end
-    return nothing
+    return (data_file_computed, computed_mses, dirs, how)
 end
diff --git a/reproducibility_tests/move_output.jl b/reproducibility_tests/move_output.jl
index 73256739953..e23224f846f 100644
--- a/reproducibility_tests/move_output.jl
+++ b/reproducibility_tests/move_output.jl
@@ -1,73 +1,15 @@
-
 include(joinpath(@__DIR__, "reproducibility_utils.jl"))
-paths = latest_comparable_paths()
 
 all_lines = readlines(joinpath(@__DIR__, "mse_tables.jl"))
-lines = deepcopy(all_lines)
-filter!(x -> occursin("] = OrderedCollections", x), lines)
-job_ids = getindex.(split.(lines, "\""), 2)
-@assert count(x -> occursin("OrderedDict", x), all_lines) == length(job_ids) + 1
-@assert length(job_ids) ≠ 0 # safety net
-
-# Note: cluster_data_prefix is also defined in compute_mse.jl
-cluster_data_prefix = "/central/scratch/esm/slurm-buildkite/climaatmos-main"
-buildkite_ci = get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) == "climaatmos-ci"
-commit = get(ENV, "BUILDKITE_COMMIT", nothing)
-branch = get(ENV, "BUILDKITE_BRANCH", nothing)
-in_merge_queue = startswith(branch, "gh-readonly-queue/main/")
-if buildkite_ci
-    @info "pwd() = $(pwd())"
-    @info "branch = $(branch)"
-    @info "commit = $(commit)"
+job_ids = reproducibility_test_job_ids
 
-    using Glob
-    @show readdir(joinpath(@__DIR__, ".."))
-    # if a contributor manually merged, we still want to move data
-    # from scratch to `cluster_data_prefix`. So, let's also try moving
-    # data if this is running on the main branch.
-    if in_merge_queue || branch == "main"
-        commit_sha = commit[1:7]
-        mkpath(cluster_data_prefix)
-        path = joinpath(cluster_data_prefix, commit_sha)
-        mkpath(path)
-        # Always move reproducibility data, so that we
-        # can compare against multiple references
-        for folder_name in job_ids
-            src = folder_name
-            dst = joinpath(path, folder_name)
-            @info "Moving $src to $dst"
-            if !isfile(dst)
-                mv(src, dst; force = true)
-            end
-        end
-        ref_counter_file_PR = joinpath(@__DIR__, "ref_counter.jl")
-        ref_counter_file_main = joinpath(path, "ref_counter.jl")
-        if !isfile(ref_counter_file_main)
-            mv(ref_counter_file_PR, ref_counter_file_main; force = true)
-        end
-        perf_benchmarks_PR = joinpath(dirname(@__DIR__), "perf_benchmarks.json")
-        perf_benchmarks_main = joinpath(path, "perf_benchmarks.json")
-        if !isfile(perf_benchmarks_main)
-            mv(perf_benchmarks_PR, perf_benchmarks_main; force = true)
-        end
-        println("New reference folder: $path")
-        for (root, dirs, files) in walkdir(path)
-            println("--Directories in $root")
-            for dir in dirs
-                println("    ", joinpath(root, dir)) # path to directories
-            end
-            println("--Files in $root")
-            for file in files
-                println("    ", joinpath(root, file)) # path to files
-            end
-        end
-    end
-else
-    @info "ENV keys: $(keys(ENV))"
-end
+move_data_to_save_dir(;
+    dirs_src = job_ids,
+    ref_counter_file_PR = joinpath(@__DIR__, "ref_counter.jl"),
+)
 
 if buildkite_ci && in_merge_queue
-    folders = get_reference_paths_to_delete(; root_path = cluster_data_prefix)
+    folders = get_reference_dirs_to_delete(; root_dir = cluster_data_prefix)
     for f in folders
         rm(f; recursive = true, force = true)
     end
diff --git a/reproducibility_tests/print_new_mse.jl b/reproducibility_tests/mse_summary.jl
similarity index 60%
rename from reproducibility_tests/print_new_mse.jl
rename to reproducibility_tests/mse_summary.jl
index 72bff33ae80..e95a5620851 100644
--- a/reproducibility_tests/print_new_mse.jl
+++ b/reproducibility_tests/mse_summary.jl
@@ -3,16 +3,10 @@ import JSON
 
 # Get cases from JobIDs in mse_tables file:
 include(joinpath(@__DIR__, "reproducibility_utils.jl"))
-paths = latest_comparable_paths()
-
-all_lines = readlines(joinpath(@__DIR__, "mse_tables.jl"))
-lines = deepcopy(all_lines)
-filter!(x -> occursin("] = OrderedCollections", x), lines)
-job_ids = getindex.(split.(lines, "\""), 2)
-@assert count(x -> occursin("OrderedDict", x), all_lines) == length(job_ids) + 1
-@assert length(job_ids) ≠ 0 # safety net
+paths = latest_comparable_dirs()
 
 include(joinpath(@__DIR__, "mse_tables.jl"))
+job_ids = reproducibility_test_job_ids
 
 computed_mse = OrderedCollections.OrderedDict()
 files_skipped = OrderedCollections.OrderedDict()
@@ -24,7 +18,7 @@ end
 for job_id in job_ids
     all_filenames = readdir(joinpath(job_id, "output_active"); join = true)
     mse_filenames = filter(is_mse_file, all_filenames)
-    @info "mse_filenames: $mse_filenames"
+    isempty(mse_filenames) || @info "mse_filenames: $mse_filenames"
     for filename in mse_filenames
         if !isfile(filename)
             @warn "File $filename skipped"
@@ -41,26 +35,22 @@ for job_id in job_ids
     end
 end
 
-println("#################################")
-println("################################# MSE tables")
-println("#################################")
+println("################################# Computed MSEs")
 println("#! format: off")
 println("#")
 
-println("all_best_mse = OrderedCollections.OrderedDict()\n#")
 for job_id in keys(computed_mse)
-    println("all_best_mse[\"$job_id\"] = OrderedCollections.OrderedDict()")
     for var in keys(computed_mse[job_id])
         if computed_mse[job_id][var] == "NA"
             println(
-                "all_best_mse[\"$job_id\"][$(var)] = \"$(computed_mse[job_id][var])\"",
+                "mse_dict[\"$job_id\"][$(var)] = \"$(computed_mse[job_id][var])\"",
             )
         else
             # It's easier to update the reference counter, rather than updating
             # the mse tables, so let's always print zeros:
             computed_mse[job_id][var] = 0
             println(
-                "all_best_mse[\"$job_id\"][$(var)] = $(computed_mse[job_id][var])",
+                "mse_dict[\"$job_id\"][$(var)] = $(computed_mse[job_id][var])",
             )
         end
     end
@@ -72,13 +62,7 @@ println("#################################")
 println("#################################")
 println("#################################")
 
-if isempty(paths)
-    @warn string(
-        "The printed `all_best_mse` values have",
-        "been set to zero, due to no comparable references,",
-        "for copy-paste convenience.",
-    )
-end
+isempty(paths) && @warn string("No comparable references.")
 
 # Cleanup
 for job_id in job_ids
@@ -91,15 +75,6 @@ end
 
 println("-- DO NOT COPY --")
 
-for job_id in keys(computed_mse)
-    for var in keys(computed_mse[job_id])
-        if haskey(all_best_mse[job_id], var)
-            all_best_mse[job_id][var] isa Real || continue # skip if "NA"
-            computed_mse[job_id][var] isa Real || continue # skip if "NA"
-        end
-    end
-end
-
 if any(values(files_skipped))
     @info "Skipped files:"
     for key in keys(files_skipped)
diff --git a/reproducibility_tests/mse_tables.jl b/reproducibility_tests/mse_tables.jl
index 8a9d98ae974..1efd0dcd758 100644
--- a/reproducibility_tests/mse_tables.jl
+++ b/reproducibility_tests/mse_tables.jl
@@ -1,60 +1,8 @@
-#################################
-################################# MSE tables
-#################################
-#! format: off
-#
-all_best_mse = OrderedCollections.OrderedDict()
-#
-all_best_mse["sphere_baroclinic_wave_rhoe_equilmoist"] = OrderedCollections.OrderedDict()
-all_best_mse["sphere_baroclinic_wave_rhoe_equilmoist"][(:c, :ρ)] = 0
-all_best_mse["sphere_baroclinic_wave_rhoe_equilmoist"][(:c, :ρe_tot)] = 0
-all_best_mse["sphere_baroclinic_wave_rhoe_equilmoist"][(:c, :uₕ, :components, :data, 1)] = 0
-all_best_mse["sphere_baroclinic_wave_rhoe_equilmoist"][(:c, :uₕ, :components, :data, 2)] = 0
-all_best_mse["sphere_baroclinic_wave_rhoe_equilmoist"][(:c, :ρq_tot)] = 0
-all_best_mse["sphere_baroclinic_wave_rhoe_equilmoist"][(:f, :u₃, :components, :data, 1)] = 0
-#
-all_best_mse["sphere_held_suarez_rhoe_equilmoist_hightop_sponge"] = OrderedCollections.OrderedDict()
-all_best_mse["sphere_held_suarez_rhoe_equilmoist_hightop_sponge"][(:c, :ρ)] = 0
-all_best_mse["sphere_held_suarez_rhoe_equilmoist_hightop_sponge"][(:c, :ρe_tot)] = 0
-all_best_mse["sphere_held_suarez_rhoe_equilmoist_hightop_sponge"][(:c, :uₕ, :components, :data, 1)] = 0
-all_best_mse["sphere_held_suarez_rhoe_equilmoist_hightop_sponge"][(:c, :uₕ, :components, :data, 2)] = 0
-all_best_mse["sphere_held_suarez_rhoe_equilmoist_hightop_sponge"][(:c, :ρq_tot)] = 0
-all_best_mse["sphere_held_suarez_rhoe_equilmoist_hightop_sponge"][(:f, :u₃, :components, :data, 1)] = 0
-#
-all_best_mse["sphere_aquaplanet_rhoe_equilmoist_allsky_gw_res"] = OrderedCollections.OrderedDict()
-all_best_mse["sphere_aquaplanet_rhoe_equilmoist_allsky_gw_res"][(:c, :ρ)] = 0
-all_best_mse["sphere_aquaplanet_rhoe_equilmoist_allsky_gw_res"][(:c, :ρe_tot)] = 0
-all_best_mse["sphere_aquaplanet_rhoe_equilmoist_allsky_gw_res"][(:c, :uₕ, :components, :data, 1)] = 0
-all_best_mse["sphere_aquaplanet_rhoe_equilmoist_allsky_gw_res"][(:c, :uₕ, :components, :data, 2)] = 0
-all_best_mse["sphere_aquaplanet_rhoe_equilmoist_allsky_gw_res"][(:c, :ρq_tot)] = 0
-all_best_mse["sphere_aquaplanet_rhoe_equilmoist_allsky_gw_res"][(:f, :u₃, :components, :data, 1)] = 0
-#
-all_best_mse["sphere_aquaplanet_rhoe_equilmoist_allsky_gw_raw_zonallyasymmetric"] = OrderedCollections.OrderedDict()
-all_best_mse["sphere_aquaplanet_rhoe_equilmoist_allsky_gw_raw_zonallyasymmetric"][(:c, :ρ)] = 0
-all_best_mse["sphere_aquaplanet_rhoe_equilmoist_allsky_gw_raw_zonallyasymmetric"][(:c, :ρe_tot)] = 0
-all_best_mse["sphere_aquaplanet_rhoe_equilmoist_allsky_gw_raw_zonallyasymmetric"][(:c, :uₕ, :components, :data, 1)] = 0
-all_best_mse["sphere_aquaplanet_rhoe_equilmoist_allsky_gw_raw_zonallyasymmetric"][(:c, :uₕ, :components, :data, 2)] = 0
-all_best_mse["sphere_aquaplanet_rhoe_equilmoist_allsky_gw_raw_zonallyasymmetric"][(:c, :ρq_tot)] = 0
-all_best_mse["sphere_aquaplanet_rhoe_equilmoist_allsky_gw_raw_zonallyasymmetric"][(:f, :u₃, :components, :data, 1)] = 0
-#
-all_best_mse["deep_sphere_baroclinic_wave_rhoe_equilmoist"] = OrderedCollections.OrderedDict()
-all_best_mse["deep_sphere_baroclinic_wave_rhoe_equilmoist"][(:c, :ρ)] = 0
-all_best_mse["deep_sphere_baroclinic_wave_rhoe_equilmoist"][(:c, :ρe_tot)] = 0
-all_best_mse["deep_sphere_baroclinic_wave_rhoe_equilmoist"][(:c, :uₕ, :components, :data, 1)] = 0
-all_best_mse["deep_sphere_baroclinic_wave_rhoe_equilmoist"][(:c, :uₕ, :components, :data, 2)] = 0
-all_best_mse["deep_sphere_baroclinic_wave_rhoe_equilmoist"][(:c, :ρq_tot)] = 0
-all_best_mse["deep_sphere_baroclinic_wave_rhoe_equilmoist"][(:f, :u₃, :components, :data, 1)] = 0
-#
-all_best_mse["diagnostic_edmfx_aquaplanet"] = OrderedCollections.OrderedDict()
-all_best_mse["diagnostic_edmfx_aquaplanet"][(:c, :ρ)] = 0
-all_best_mse["diagnostic_edmfx_aquaplanet"][(:c, :uₕ, :components, :data, 1)] = 0
-all_best_mse["diagnostic_edmfx_aquaplanet"][(:c, :uₕ, :components, :data, 2)] = 0
-all_best_mse["diagnostic_edmfx_aquaplanet"][(:c, :ρe_tot)] = 0
-all_best_mse["diagnostic_edmfx_aquaplanet"][(:c, :ρq_tot)] = 0
-all_best_mse["diagnostic_edmfx_aquaplanet"][(:c, :sgs⁰, :ρatke)] = 0
-all_best_mse["diagnostic_edmfx_aquaplanet"][(:f, :u₃, :components, :data, 1)] = 0
-#
-#! format: on
-#################################
-#################################
-#################################
+reproducibility_test_job_ids = [
+    "sphere_baroclinic_wave_rhoe_equilmoist",
+    "sphere_held_suarez_rhoe_equilmoist_hightop_sponge",
+    "sphere_aquaplanet_rhoe_equilmoist_allsky_gw_res",
+    "sphere_aquaplanet_rhoe_equilmoist_allsky_gw_raw_zonallyasymmetric",
+    "deep_sphere_baroclinic_wave_rhoe_equilmoist",
+    "diagnostic_edmfx_aquaplanet",
+]
diff --git a/reproducibility_tests/reproducibility_tests.jl b/reproducibility_tests/reproducibility_tests.jl
deleted file mode 100644
index 5ab9493c332..00000000000
--- a/reproducibility_tests/reproducibility_tests.jl
+++ /dev/null
@@ -1,49 +0,0 @@
-import JSON
-import ClimaCore.Fields as Fields
-include(joinpath(@__DIR__, "compute_mse.jl"))
-
-function perform_reproducibility_tests(
-    job_id::String,
-    Y_last::Fields.FieldVector,
-    all_best_mse::AbstractDict,
-    output_dir::String,
-)
-    # This is helpful for starting up new tables
-    @info "Job-specific MSE table format:"
-    println("all_best_mse[\"$job_id\"] = OrderedCollections.OrderedDict()")
-    for prop_chain in Fields.property_chains(Y_last)
-        println("all_best_mse[\"$job_id\"][$prop_chain] = 0.0")
-    end
-    # Extract best mse for this job:
-    best_mse = all_best_mse[job_id]
-
-    ds_filename_computed = joinpath(output_dir, "prog_state.nc")
-
-    function process_name(s::AbstractString)
-        # "c_ρ", "c_ρe", "c_uₕ_1", "c_uₕ_2", "f_w_1", "c_sgs⁰_ρatke"
-        s = replace(s, "components_data_" => "")
-        s = replace(s, "ₕ" => "_h")
-        s = replace(s, "ρ" => "rho")
-        s = replace(s, "⁰" => "_0")
-        return s
-    end
-    varname(pc::Tuple) = process_name(join(pc, "_"))
-
-    export_nc(Y_last; nc_filename = ds_filename_computed, varname)
-    (computed_mses, paths) = reproducibility_test(;
-        job_id,
-        reference_mse = best_mse,
-        ds_filename_computed,
-        varname,
-    )
-
-    for (computed_mse, path) in zip(computed_mses, paths)
-        commit_hash = basename(path)
-        computed_mse_filename =
-            joinpath(output_dir, "computed_mse_$commit_hash.json")
-
-        open(computed_mse_filename, "w") do io
-            JSON.print(io, computed_mse)
-        end
-    end
-end
diff --git a/reproducibility_tests/reproducibility_utils.jl b/reproducibility_tests/reproducibility_utils.jl
index a0abf0bd107..23eb3ab001d 100644
--- a/reproducibility_tests/reproducibility_utils.jl
+++ b/reproducibility_tests/reproducibility_utils.jl
@@ -2,30 +2,32 @@
 ################################################################################
 Reproducibility Terminology.
 
-Consider the following set of reproducibility
-folders, prefixed by "reference counters", which
-allow users to compare against other reproducible
-states in that column.
-
-Note that reference counter changes can "rewind"
-(which may happen in the case of reverted commits).
-In such cases, we do consider the rewound state as
-an entirely new state, in order to fully preserve
-the history (to some depth).
-
-An important consequence of this requires precise
-terminology to avoid ambiguous descriptions.
-
-For example, "comparable references per reference counter"
-is not well defined, because the reference counter can
-be reverted. So, let's introduce the concept of a "bin",
-which can be defined as a collection of folders
-created in a period with the same reference counter.
-Folders created before and after that bin have a different
-reference counter. Also, `n_bins == n_reference_changes + 1`
-(modulo the edge case of when there are no bins)
-because, if the reference counter doesn't change, new results
-are put into the same bin.
+First, we try to be consistent in distinguishing:
+
+ - `dir` to denote a directory (and not a file)
+ - `path` to denote files or directories
+ - `folder_name` to denote a folder name, for example, "`f`" in `a/f/c`
+ - `file` to denote a file (and not a directory)
+
+Consider the following set of reproducibility directories, prefixed
+by "reference counters", which allow users to compare against other
+reproducible states in that column.
+
+Note that reference counter changes can "rewind"(which may happen in the case of
+reverted commits). In such cases, we do consider the rewound state as an
+entirely new state, in order to fully preserve the history (to some depth).
+
+An important consequence of this requires precise terminology to avoid ambiguous
+descriptions.
+
+For example, "comparable references per reference counter" is not well defined,
+because the reference counter can be reverted. So, let's introduce the concept
+of a "bin", which can be defined as a collection of directories created in a
+period with the same reference counter. Folders created before and after that
+bin have a different reference counter. Also, `n_bins == n_reference_changes +
+1`(modulo the edge case of when there are no bins) because, if the reference
+counter doesn't change, new results are put into the same bin.
+
 ```
 comparable states
          |                           ref counter changes ---->                         | oldest
@@ -41,85 +43,111 @@ comparable states
 
 import Dates
 
-read_ref_counter(filename) = parse(Int, first(readlines(filename)))
+read_ref_counter(file) = parse(Int, first(readlines(file)))
+
+"""
+    sorted_dirs_with_matched_files(; dir = pwd(), filename)
+
+Return an array of subdirectories of `dir` (defaults to the current working
+directory) sorted by modification time (oldest to newest). Return an empty
+vector if no subdirectories are found.
+
+This function recurses through `dir`, and finds all directories that have the
+file `filename`.
+"""
+function sorted_dirs_with_matched_files(;
+    dir = pwd(),
+    filename = "ref_counter.jl",
+)
+    matched_dirs = String[]
+    for (root, dirs, files) in walkdir(dir)
+        for dir in dirs
+            push!(matched_dirs, joinpath(root, dir))
+        end
+    end
+    isempty(matched_dirs) && return String[]
+    filter!(x -> isfile(joinpath(x, filename)), matched_dirs)
+    isempty(matched_dirs) && return String[]
+    # sort by timestamp
+    sorted_dirs =
+        sort(matched_dirs; by = f -> Dates.unix2datetime(stat(f).mtime))
+    return sorted_dirs
+end
 
 """
     sorted_dataset_folder(; dir=pwd())
 
-Return a the subdirectory paths within the given `dir` (defaults
-to the current working directory) sorted by modification time
-(oldest to newest).  Return an empty vector if no subdirectories
-are found.
+Return an array of subdirectories of `dir` (defaults to the current working
+directory) sorted by modification time (oldest to newest). Return an empty
+vector if no subdirectories are found.
 """
 function sorted_dataset_folder(; dir = pwd())
-    matching_paths = filter(ispath, readdir(dir; join = true))
-    isempty(matching_paths) && return String[]
+    matching_dirs = filter(isdir, readdir(dir; join = true))
+    isempty(matching_dirs) && return String[]
     # sort by timestamp
-    sorted_paths =
-        sort(matching_paths; by = f -> Dates.unix2datetime(stat(f).mtime))
-    return sorted_paths
+    sorted_dirs =
+        sort(matching_dirs; by = f -> Dates.unix2datetime(stat(f).mtime))
+    return sorted_dirs
 end
 
 
 """
-    ref_counters_per_path(paths)
+    ref_counters_per_dir(dirs)
 
-Read the `ref_counter.jl` file in each given path and parses the integer it contains.
-Return a vector of integers, where each element corresponds to a path.
-If a path does not contain a `ref_counter.jl` file, the corresponding element is -1.
-It assumes that `ref_counter.jl` contains the value as the first line of the file.
+Read the `ref_counter.jl` file in each given directory and parses the integer it
+contains. Return a vector of integers, where each element corresponds to a
+directory. If a directory does not contain a `ref_counter.jl` file, the
+corresponding element is -1. It assumes that `ref_counter.jl` contains the
+value as the first line of the file.
 """
-function ref_counters_per_path(paths)
-    ref_counters_in_path = Vector{Int}(undef, length(paths))
-    ref_counters_in_path .= -1
-    for (i, path) in enumerate(paths)
-        ref_counter_file = joinpath(path, "ref_counter.jl")
+function ref_counters_per_dir(dirs)
+    ref_counters_in_dir = Vector{Int}(undef, length(dirs))
+    ref_counters_in_dir .= -1
+    for (i, dir) in enumerate(dirs)
+        ref_counter_file = joinpath(dir, "ref_counter.jl")
         !isfile(ref_counter_file) && continue
-        ref_counters_in_path[i] = read_ref_counter(ref_counter_file)
+        ref_counters_in_dir[i] = read_ref_counter(ref_counter_file)
     end
-    return ref_counters_in_path
+    return ref_counters_in_dir
 end
 
 """
-    paths = latest_comparable_paths(;
+    dirs = latest_comparable_dirs(;
         n = 5,
-        root_path = "/central/scratch/esm/slurm-buildkite/climaatmos-main",
+        root_dir = "/central/scratch/esm/slurm-buildkite/climaatmos-main",
         ref_counter_PR = read_ref_counter(joinpath(@__DIR__, "ref_counter.jl"))
         skip = get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) != "climaatmos-ci"
     )
 
-Returns a vector of strings, containing the `n`
-latest comparable paths in the `root_path` directory.
-Only paths that match the `ref_counter_PR` are
-returned, and an empty vector is retuned if
-`skip = true`. By default, `skip` is set to
-`get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) != "climaatmos-ci"`.
+Returns a vector of strings, containing the `n` latest comparable directories in
+the `root_dir` directory. Only directories that match the `ref_counter_PR` are
+returned, and an empty vector is retuned if `skip = true`. By default, `skip`
+is set to `get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) != "climaatmos-ci"`.
 
 The assumed folder structure is:
 
 ```
-root_path/some_folder_1/ref_counter.jl
-root_path/some_folder_2/ref_counter.jl
-root_path/some_folder_3/ref_counter.jl
+root_dir/some_folder_1/ref_counter.jl
+root_dir/some_folder_2/ref_counter.jl
+root_dir/some_folder_3/ref_counter.jl
 ```
 
-If a subfolder does not contain a `ref_counter.jl` file
-then it is filtered out as not-comparable. The `ref_counter.jl`
-files are assumed to start with a single integer,
-which is read. If that integer matches `ref_counter_PR`,
-then that path is considered comparable.
+If a subfolder does not contain a `ref_counter.jl` file then it is filtered out
+as not-comparable. The `ref_counter.jl` files are assumed to start with a
+single integer, which is read. If that integer matches `ref_counter_PR`, then
+that directory is considered comparable.
 
-`paths[1]` is the most recent comparable path, and
-`paths[end]` is the oldest comparable path.
+`dirs[1]` is the most recent comparable directory, and `dirs[end]` is the oldest
+comparable directory.
 """
-function latest_comparable_paths(;
+function latest_comparable_dirs(;
     n = 5,
-    root_path = "/central/scratch/esm/slurm-buildkite/climaatmos-main",
+    root_dir = "/central/scratch/esm/slurm-buildkite/climaatmos-main",
     ref_counter_PR = read_ref_counter(joinpath(@__DIR__, "ref_counter.jl")),
     skip = get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) != "climaatmos-ci",
 )
     skip && return String[]
-    bins = compute_bins(root_path)
+    bins = compute_bins(root_dir)
     isempty(bins) && return String[]
     ref_counter_bins = filter(bins) do bin
         f = joinpath(first(bin), "ref_counter.jl")
@@ -127,29 +155,28 @@ function latest_comparable_paths(;
     end
     isnothing(ref_counter_bins) && return String[]
     isempty(ref_counter_bins) && return String[]
-    comparable_paths = ref_counter_bins[1]
-    return comparable_paths[1:min(n, length(comparable_paths))]
+    comparable_dirs = ref_counter_bins[1]
+    return comparable_dirs[1:min(n, length(comparable_dirs))]
 end
 
 """
-    invalid_reference_folders(; root_path)
+    invalid_reference_folders(dirs)
 
-Return all subfolders in `root_path`
-that meet the following criteria:
+Return all subfolders in vectory of directory, `dirs`, that meet the following
+criteria:
 
  - A `ref_counter.jl` file is missing
 """
-function invalid_reference_folders(; root_path)
-    paths = sorted_dataset_folder(; dir = root_path)
-    invalid_folders = filter(paths) do p
+function invalid_reference_folders(dirs)
+    invalid_folders = filter(dirs) do p
         !isfile(joinpath(p, "ref_counter.jl"))
     end
     return invalid_folders
 end
 
 """
-    compute_bins(root_path::String)
-    compute_bins(sorted_paths::Vector{String})
+    compute_bins(root_dir::String)
+    compute_bins(sorted_dirs::Vector{String})
 
 Return a vector of reproducibility bins.
 
@@ -168,52 +195,57 @@ comparable states
          v             04_4c042                                                        v newest
 ```
 """
-compute_bins(root_path::String) =
-    compute_bins(reverse(sorted_dataset_folder(; dir = root_path)))
-function compute_bins(sorted_paths::Vector{String})
+compute_bins(root_dir::String; filename = "ref_counter.jl") = compute_bins(
+    reverse(
+        sorted_dirs_with_matched_files(;
+            dir = root_dir,
+            filename = "ref_counter.jl",
+        ),
+    ),
+)
+function compute_bins(sorted_dirs::Vector{String})
+    @assert isempty(invalid_reference_folders(sorted_dirs))
     bins = Vector{String}[]
-    path_index = 1
-    while path_index ≤ length(sorted_paths)
-        paths_per_bin = String[]
-        while path_index ≤ length(sorted_paths)
-            path = sorted_paths[path_index]
-            if isempty(paths_per_bin)
-                push!(paths_per_bin, path)
-                path_index += 1
+    dir_index = 1
+    while dir_index ≤ length(sorted_dirs)
+        dirs_per_bin = String[]
+        while dir_index ≤ length(sorted_dirs)
+            dir = sorted_dirs[dir_index]
+            if isempty(dirs_per_bin)
+                push!(dirs_per_bin, dir)
+                dir_index += 1
             else
                 ref_counter_bin = read_ref_counter(
-                    joinpath(first(paths_per_bin), "ref_counter.jl"),
+                    joinpath(first(dirs_per_bin), "ref_counter.jl"),
                 )
-                ref_counter_path =
-                    read_ref_counter(joinpath(path, "ref_counter.jl"))
-                if ref_counter_bin == ref_counter_path
-                    push!(paths_per_bin, path)
-                    path_index += 1
+                ref_counter_dir =
+                    read_ref_counter(joinpath(dir, "ref_counter.jl"))
+                if ref_counter_bin == ref_counter_dir
+                    push!(dirs_per_bin, dir)
+                    dir_index += 1
                 else
                     break
                 end
             end
         end
-        push!(bins, paths_per_bin)
+        push!(bins, dirs_per_bin)
     end
     return bins
 end
 
 """
-    get_reference_paths_to_delete(;
-        root_path,
+    get_reference_dirs_to_delete(;
+        root_dir,
         keep_n_comparable_states = 5,
         keep_n_bins_back = 7,
     )
 
 Return a list of folders to delete.
 
-Our reference folders are saved, and can
-therefore build up significantly and take
-a lot of storage.
+Our reference folders are saved, and can therefore build up significantly and
+take a lot of storage.
 
-Consider a collection of folders whose
-names are prepended by the reference
+Consider a collection of folders whose names are prepended by the reference
 counter:
 
 ```
@@ -226,26 +258,27 @@ keep_n_comparable_states
          |             04_d6e48              06_d6d73              08_1cc58            |
          v             04_4c042                                                        v newest
 ```
-With these folders, and given a reference
-counter of 10, we'll see the following
+
+With these folders, and given a reference counter of 10, we'll see the following
 behavior:
+
 ```
-    get_reference_paths_to_delete(;
+    get_reference_dirs_to_delete(;
         keep_n_comparable_states = 4,
         keep_n_bins_back = 3
     ) -> [02_49f92, 04_36ebe, 04_d6e48, 04_4c042]
 
-    get_reference_paths_to_delete(;
+    get_reference_dirs_to_delete(;
         keep_n_comparable_states = 1,
         keep_n_bins_back = 5
     ) -> [02_49f92, 04_d6e48, 04_4c042, 06_d6d73, 08_1cc58]
 ```
 
 Note:
-    `keep_n_references_back` is sorted _chronologically_,
-    in order to correctly operate in the case of
-    reverted pull requests. In other words, the above
+    `keep_n_references_back` is sorted _chronologically_, in order to correctly
+    operate in the case of reverted pull requests. In other words, the above
     references may look like this:
+
 ```
 keep_n_comparable_states
          |                           <---- keep_n_bins_back                            | oldest
@@ -258,26 +291,133 @@ keep_n_comparable_states
 ```
 
 """
-function get_reference_paths_to_delete(;
-    root_path,
+function get_reference_dirs_to_delete(;
+    root_dir,
     keep_n_comparable_states = 5,
     keep_n_bins_back = 7,
+    filename = "ref_counter.jl",
 )
-    @assert isempty(invalid_reference_folders(; root_path))
-    paths_to_delete = String[]
-    sorted_paths = reverse(sorted_dataset_folder(; dir = root_path))
-    if !isempty(sorted_paths)
-        # Now, sorted_paths[1] is newest, sorted_paths[end] is oldest
-        bins = compute_bins(sorted_paths)
+    dirs = sorted_dirs_with_matched_files(; dir = root_dir, filename)
+    @assert isempty(invalid_reference_folders(dirs))
+    dir_to_delete = String[]
+    sorted_dirs = reverse(dirs)
+    if !isempty(sorted_dirs)
+        # Now, sorted_dirs[1] is newest, sorted_dirs[end] is oldest
+        bins = compute_bins(sorted_dirs)
         for i in 1:length(bins), j in 1:length(bins[i])
             if i ≤ keep_n_bins_back
                 if !(j ≤ keep_n_comparable_states)
-                    push!(paths_to_delete, bins[i][j])
+                    push!(dir_to_delete, bins[i][j])
                 end
             else
-                push!(paths_to_delete, bins[i][j])
+                push!(dir_to_delete, bins[i][j])
             end
         end
     end
-    return paths_to_delete
+    return dir_to_delete
+end
+
+"""
+    source_checksum(dir = pwd())
+
+Return a hash from the contents of all Julia files found recursively in `dir`
+(defaults to `pwd`).
+"""
+function source_checksum(dir = pwd())
+    jl_files = String[]
+    for (root, dirs, files) in walkdir(dir)
+        for file in files
+            endswith(file, ".jl") && push!(jl_files, joinpath(root, file))
+        end
+    end
+    all_contents = map(jl_files) do jl_file
+        readlines(jl_file)
+    end
+    joined_contents = join(all_contents, "\n")
+    return hash(joined_contents)
+end
+
+function source_has_changed(;
+    n = 5,
+    root_dir = "/central/scratch/esm/slurm-buildkite/climaatmos-main",
+    ref_counter_PR = read_ref_counter(joinpath(@__DIR__, "ref_counter.jl")),
+    skip = get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) != "climaatmos-ci",
+    src_dir = dirname(@__DIR__),
+)
+    dirs = latest_comparable_dirs(; n, root_dir, ref_counter_PR, skip)
+    isempty(dirs) && return true
+    latest_reference_checksum = joinpath(dirs[1], "source_checksum.dat")
+    if isfile(latest_reference_checksum)
+        src_checksum =
+            parse(UInt64, first(readlines(latest_reference_checksum)))
+        if source_checksum(src_dir) == src_checksum
+            return false # all julia files are the same
+        else
+            return true
+        end
+    else
+        return true
+    end
+end
+
+"""
+    move_data_to_save_dir(;
+        dest_root = "/central/scratch/esm/slurm-buildkite/climaatmos-main",
+        buildkite_ci = get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) == "climaatmos-ci",
+        commit = get(ENV, "BUILDKITE_COMMIT", nothing),
+        branch = get(ENV, "BUILDKITE_BRANCH", nothing),
+        in_merge_queue = startswith(branch, "gh-readonly-queue/main/"),
+        dirs_src,
+        ref_counter_file_PR = joinpath(@__DIR__, "ref_counter.jl"),
+    )
+
+Moves data from directories `dirs_src[i]` to `dest_root/commit_sha/basename
+(dirs_src[i])`, given some conditions are met. In particular, data movement
+will occur when this function is called:
+
+ - on a job run in buildkite
+ - when in the merge queue
+ - when on the main branch if the `source_checksum` is different from the source
+   code in the latest comparable reference
+
+"""
+function move_data_to_save_dir(;
+    dest_root = "/central/scratch/esm/slurm-buildkite/climaatmos-main",
+    buildkite_ci = get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) ==
+                   "climaatmos-ci",
+    commit = get(ENV, "BUILDKITE_COMMIT", nothing),
+    branch = get(ENV, "BUILDKITE_BRANCH", nothing),
+    in_merge_queue = startswith(branch, "gh-readonly-queue/main/"),
+    dirs_src,
+    ref_counter_file_PR = joinpath(@__DIR__, "ref_counter.jl"),
+    ref_counter_PR = read_ref_counter(ref_counter_file_PR),
+    skip = get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) != "climaatmos-ci",
+    n_hash_characters = 7,
+)
+    buildkite_ci || return nothing
+
+    # if a contributor manually merged, we still want to move data from scratch
+    # to `dest_root`. But if moving data on both conditions means that data
+    # will be moved twice if it's merged via the merge queue (and if it is) run
+    # again on the main branch. One thing we can do to prevent the redundant
+    # movement is to check if the source code has changed:
+
+    @assert isfile(ref_counter_file_PR)
+    if in_merge_queue || (
+        branch == "main" &&
+        source_has_changed(; n = 1, root_dir = dest_root, ref_counter_PR, skip)
+    )
+        commit_sha = commit[1:min(n_hash_characters, length(commit))]
+        mkpath(dest_root)
+        dest_dir = joinpath(dest_root, commit_sha)
+        mkpath(dest_dir)
+        # Always move reproducibility data, so that we
+        # can compare against multiple references
+        for src in dirs_src
+            dst = joinpath(dest_dir, basename(src))
+            mv(src, dst; force = true)
+        end
+        ref_counter_file_main = joinpath(dest_dir, "ref_counter.jl")
+        mv(ref_counter_file_PR, ref_counter_file_main; force = true)
+    end
 end
diff --git a/reproducibility_tests/test_reset.jl b/reproducibility_tests/test_reset.jl
deleted file mode 100644
index 14f8a1c8770..00000000000
--- a/reproducibility_tests/test_reset.jl
+++ /dev/null
@@ -1,14 +0,0 @@
-import OrderedCollections
-
-# Get cases from JobIDs in mse_tables file:
-include(joinpath(@__DIR__, "reproducibility_utils.jl"))
-paths = latest_comparable_paths()
-include(joinpath(@__DIR__, "mse_tables.jl"))
-
-#### Test that mse values are all zero if ref counter is incremented
-mse_vals = collect(Iterators.flatten(map(x -> values(x), values(all_best_mse))))
-if isempty(paths) && !all(mse_vals .== 0)
-    error(
-        "All mse values in `reproducibility_tests/mse_tables.jl` must be set to zero when the reference counter is incremented",
-    )
-end
diff --git a/test/Project.toml b/test/Project.toml
index fab088ffa7a..3420581d2f5 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -7,6 +7,7 @@ ClimaAtmos = "b2c96348-7fb7-4fe0-8da9-78d88439e717"
 ClimaComms = "3a4d1b5c-c61d-41fd-a00a-5873ba7a1b0d"
 ClimaCore = "d414da3d-4745-48bb-8d80-42e94e092884"
 ClimaParams = "5c42b081-d73a-476f-9059-fd94b934656c"
+ClimaReproducibilityTests = "e0c89595-00ba-42a9-9f9b-061ef3dc23a1"
 ClimaTimeSteppers = "595c0a79-7f3d-439a-bc5a-b232dc3bde79"
 ClimaUtilities = "b3f4f4ca-9299-4f7f-bd9b-81e1242a7513"
 CloudMicrophysics = "6a9e3e04-43cd-43ba-94b9-e8782df3c71b"
diff --git a/test/unit_reproducibility_infra.jl b/test/unit_reproducibility_infra.jl
index 5194608c93a..d69e3d2b7f9 100644
--- a/test/unit_reproducibility_infra.jl
+++ b/test/unit_reproducibility_infra.jl
@@ -1,270 +1,265 @@
 #=
+julia --project=examples
 using Revise; include("test/unit_reproducibility_infra.jl")
 =#
 using Test
 import Dates
 import Logging
 
-include(joinpath("..", "reproducibility_tests/reproducibility_utils.jl"))
+# this also includes reproducibility_utils.jl
+include(joinpath("..", "reproducibility_tests/compute_mse.jl"))
 
-quiet_latest_comparable_paths(args...; kwargs...) =
+quiet_latest_comparable_dirs(args...; kwargs...) =
     Logging.with_logger(Logging.NullLogger()) do
-        latest_comparable_paths(args...; kwargs...)
+        latest_comparable_dirs(args...; kwargs...)
     end
 basenames(x) = map(basename, x) # for debugging
-function make_path(dir, pathname)
-    d = mkdir(pathname)
+function make_dir(dir, dirname)
+    d = mkpath(dirname)
     return joinpath(dir, d)
 end
 
-function make_ref_file_counter(dir, pathname, i)
-    d = mkdir(pathname)
-    open(io -> println(io, i), joinpath(d, "ref_counter.jl"), "w")
-    return joinpath(dir, d)
+function make_and_cd(f)
+    mktempdir() do dir
+        cd(dir) do
+            f(dir)
+        end
+    end
 end
 
-@testset "Reproducibility infrastructure: latest_comparable_paths" begin
-    # No paths at all
-    mktempdir() do path
-        cd(path) do
-            paths = quiet_latest_comparable_paths(;
-                root_path = path,
-                ref_counter_PR = 2,
-                skip = false,
-            )
-            @test paths == []
+function make_ref_file_counter(i, dir...)
+    d = mkpath(joinpath(dir...))
+    open(io -> println(io, i), joinpath(d, "ref_counter.jl"), "w")
+    return d
+end
+rbundle(p) = joinpath(p, "reproducibility_bundle")
+function mktempdir2_cd_computed(f) # make two temporary dirs
+    mktempdir() do save_dir
+        mktempdir() do computed_dir
+            cd(computed_dir) do
+                f((save_dir, computed_dir))
+            end
         end
     end
+end
 
-    # No paths with ref counters
-    mktempdir() do path
-        cd(path) do
-            p1 = mkdir("d1")
-            paths = quiet_latest_comparable_paths(;
-                root_path = path,
-                ref_counter_PR = 2,
-                skip = false,
-            )
-            @test paths == []
-        end
+function put_data_file(
+    dir,
+    fv,
+    comms_ctx,
+    name = "Y",
+    filename = "my_prog_state.hdf5",
+)
+    mkpath(dir)
+    file = joinpath(dir, filename)
+    hdfwriter = InputOutput.HDF5Writer(file, comms_ctx)
+    InputOutput.write!(hdfwriter, fv, name)
+    Base.close(hdfwriter)
+end
+
+@testset "Reproducibility infrastructure: latest_comparable_dirs" begin
+    # No dirs at all
+    make_and_cd() do dir
+        dirs = quiet_latest_comparable_dirs(;
+            root_dir = dir,
+            ref_counter_PR = 2,
+            skip = false,
+        )
+        @test dirs == []
     end
 
-    # No paths with matching ref counters
-    mktempdir() do path
-        cd(path) do
-            p1 = make_ref_file_counter(path, "d1", 1)
-            paths = quiet_latest_comparable_paths(;
-                root_path = path,
-                ref_counter_PR = 2,
-                skip = false,
-            )
-            @test paths == []
-        end
+    # No dirs with ref counters
+    make_and_cd() do dir
+        p1 = mkdir("d1")
+        dirs = quiet_latest_comparable_dirs(;
+            root_dir = dir,
+            ref_counter_PR = 2,
+            skip = false,
+        )
+        @test dirs == []
+    end
+
+    # No dirs with matching ref counters
+    make_and_cd() do dir
+        d1 = make_ref_file_counter(1, dir, "d1")
+        dirs = quiet_latest_comparable_dirs(;
+            root_dir = dir,
+            ref_counter_PR = 2,
+            skip = false,
+        )
+        @test dirs == []
     end
 
     # 1 matching ref counter
-    mktempdir() do path
-        cd(path) do
-            p1 = make_ref_file_counter(path, "d1", 1)
-            p2 = make_ref_file_counter(path, "d2", 2)
-            p3 = make_ref_file_counter(path, "d3", 3)
-            paths = quiet_latest_comparable_paths(;
-                root_path = path,
-                ref_counter_PR = 2,
-                skip = false,
-            )
-            @test paths == [p2]
-        end
+    make_and_cd() do dir
+        d1 = make_ref_file_counter(1, dir, "d1")
+        d2 = make_ref_file_counter(2, dir, "d2")
+        d3 = make_ref_file_counter(3, dir, "d3")
+        dirs = quiet_latest_comparable_dirs(;
+            root_dir = dir,
+            ref_counter_PR = 2,
+            skip = false,
+        )
+        @test dirs == [d2]
     end
 
     # multiple matching ref counters
-    mktempdir() do path
-        cd(path) do
-            p1 = make_ref_file_counter(path, "d1", 1)
-            p2 = make_ref_file_counter(path, "d2", 2)
-            p3 = make_ref_file_counter(path, "d3", 3)
-            p4 = make_ref_file_counter(path, "d4", 3)
-            p5 = make_ref_file_counter(path, "d5", 3)
-            p6 = make_ref_file_counter(path, "d6", 3)
-            paths = quiet_latest_comparable_paths(;
-                root_path = path,
-                ref_counter_PR = 3,
-                skip = false,
-            )
-            @test paths == [p6, p5, p4, p3] # p6 is most recent
-        end
+    make_and_cd() do dir
+        d1 = make_ref_file_counter(1, dir, "d1")
+        d2 = make_ref_file_counter(2, dir, "d2")
+        d3 = make_ref_file_counter(3, dir, "d3")
+        d4 = make_ref_file_counter(3, dir, "d4")
+        d5 = make_ref_file_counter(3, dir, "d5")
+        d6 = make_ref_file_counter(3, dir, "d6")
+        dirs = quiet_latest_comparable_dirs(;
+            root_dir = dir,
+            ref_counter_PR = 3,
+            skip = false,
+        )
+        @test dirs == [d6, d5, d4, d3] # d6 is most recent
     end
 
     # matching ref counters that exceed n
-    mktempdir() do path
-        cd(path) do
-            p1 = make_ref_file_counter(path, "d1", 1)
-            p2 = make_ref_file_counter(path, "d2", 2)
-            p3 = make_ref_file_counter(path, "d3", 3)
-            p4 = make_ref_file_counter(path, "d4", 3)
-            p5 = make_ref_file_counter(path, "d5", 3)
-            p6 = make_ref_file_counter(path, "d6", 3)
-            paths = quiet_latest_comparable_paths(;
-                n = 2,
-                root_path = path,
-                ref_counter_PR = 3,
-                skip = false,
-            )
-            @test paths == [p6, p5] # p6 is most recent
-        end
+    make_and_cd() do dir
+        d1 = make_ref_file_counter(1, dir, "d1")
+        d2 = make_ref_file_counter(2, dir, "d2")
+        d3 = make_ref_file_counter(3, dir, "d3")
+        d4 = make_ref_file_counter(3, dir, "d4")
+        d5 = make_ref_file_counter(3, dir, "d5")
+        d6 = make_ref_file_counter(3, dir, "d6")
+        dirs = quiet_latest_comparable_dirs(;
+            n = 2,
+            root_dir = dir,
+            ref_counter_PR = 3,
+            skip = false,
+        )
+        @test dirs == [d6, d5] # d6 is most recent
     end
 
     # reverted commits examples
-    mktempdir() do path
-        cd(path) do
-            p1 = make_ref_file_counter(path, "d1", 1)
-            p2 = make_ref_file_counter(path, "d2", 2)
-            p3 = make_ref_file_counter(path, "d3", 3)
-            p4 = make_ref_file_counter(path, "d4", 4)
-            p5 = make_ref_file_counter(path, "d5", 5)
-            p6 = make_ref_file_counter(path, "d6", 3)
-            paths = quiet_latest_comparable_paths(;
-                n = 2,
-                root_path = path,
-                ref_counter_PR = 3,
-                skip = false,
-            )
-            @test paths == [p6]
-        end
+    make_and_cd() do dir
+        d1 = make_ref_file_counter(1, dir, "d1")
+        d2 = make_ref_file_counter(2, dir, "d2")
+        d3 = make_ref_file_counter(3, dir, "d3")
+        d4 = make_ref_file_counter(4, dir, "d4")
+        d5 = make_ref_file_counter(5, dir, "d5")
+        d6 = make_ref_file_counter(3, dir, "d6")
+        dirs = quiet_latest_comparable_dirs(;
+            n = 2,
+            root_dir = dir,
+            ref_counter_PR = 3,
+            skip = false,
+        )
+        @test dirs == [d6]
     end
 
     # appending to p7 now, confusingly, removes p3:
-    mktempdir() do path
-        cd(path) do
-            p1 = make_ref_file_counter(path, "d1", 1)
-            p2 = make_ref_file_counter(path, "d2", 2)
-            p3 = make_ref_file_counter(path, "d3", 3)
-            p4 = make_ref_file_counter(path, "d4", 4)
-            p5 = make_ref_file_counter(path, "d5", 5)
-            p6 = make_ref_file_counter(path, "d6", 3)
-            p7 = make_ref_file_counter(path, "d7", 3)
-            paths = quiet_latest_comparable_paths(;
-                n = 2,
-                root_path = path,
-                ref_counter_PR = 3,
-                skip = false,
-            )
-            @test paths == [p7, p6]
-        end
+    make_and_cd() do dir
+        d1 = make_ref_file_counter(1, dir, "d1")
+        d2 = make_ref_file_counter(2, dir, "d2")
+        d3 = make_ref_file_counter(3, dir, "d3")
+        d4 = make_ref_file_counter(4, dir, "d4")
+        d5 = make_ref_file_counter(5, dir, "d5")
+        d6 = make_ref_file_counter(3, dir, "d6")
+        d7 = make_ref_file_counter(3, dir, "d7")
+        dirs = quiet_latest_comparable_dirs(;
+            n = 2,
+            root_dir = dir,
+            ref_counter_PR = 3,
+            skip = false,
+        )
+        @test dirs == [d7, d6]
     end
 end
 
 @testset "Reproducibility infrastructure: validate_reference_folders" begin
-    # No paths at all
-    mktempdir() do path
-        cd(path) do
-            @test invalid_reference_folders(; root_path = path) == []
-        end
+    # No dirs at all
+    make_and_cd() do dir
+        @test invalid_reference_folders(sorted_dataset_folder(; dir)) == []
     end
 
-    # 1 path without ref counter
-    mktempdir() do path
-        cd(path) do
-            p1 = make_path(path, "d1")
-            @test invalid_reference_folders(; root_path = path) == [p1]
-        end
+    # 1 dir without ref counter
+    make_and_cd() do dir
+        p1 = make_dir(dir, "d1")
+        @test invalid_reference_folders(sorted_dataset_folder(; dir)) == [p1]
     end
 
     # mix
-    mktempdir() do path
-        cd(path) do
-            p1 = make_ref_file_counter(path, "d1", 1)
-            r1 = make_path(path, "r1")
-            p2 = make_ref_file_counter(path, "d2", 2)
-            r2 = make_path(path, "r2")
-            p3 = make_ref_file_counter(path, "d3", 3)
-            r3 = make_path(path, "r3")
-            @test invalid_reference_folders(; root_path = path) == [r1, r2, r3]
-        end
+    make_and_cd() do dir
+        d1 = make_ref_file_counter(1, dir, "d1")
+        r1 = make_dir(dir, "r1")
+        d2 = make_ref_file_counter(2, dir, "d2")
+        r2 = make_dir(dir, "r2")
+        d3 = make_ref_file_counter(3, dir, "d3")
+        r3 = make_dir(dir, "r3")
+        @test invalid_reference_folders(sorted_dataset_folder(; dir)) ==
+              [r1, r2, r3]
     end
 end
 
 @testset "Reproducibility infrastructure: compute_bins" begin
-    # No paths at all
-    mktempdir() do path
-        cd(path) do
-            @test compute_bins(path) == []
-        end
+    # No dirs at all
+    make_and_cd() do dir
+        @test compute_bins(dir) == []
     end
 
     # 1 ref counter
-    mktempdir() do path
-        cd(path) do
-            p1 = make_ref_file_counter(path, "d1", 1)
-            @test compute_bins(path) == [[p1]]
-        end
+    make_and_cd() do dir
+        d1 = make_ref_file_counter(1, dir, "d1")
+        @test compute_bins(dir) == [[d1]]
     end
 
     # 2 ref counter
-    mktempdir() do path
-        cd(path) do
-            p1 = make_ref_file_counter(path, "d1", 1)
-            p2 = make_ref_file_counter(path, "d2", 2)
-            @test compute_bins(path) == [[p2], [p1]]
-        end
+    make_and_cd() do dir
+        d1 = make_ref_file_counter(1, dir, "d1")
+        d2 = make_ref_file_counter(2, dir, "d2")
+        @test compute_bins(dir) == [[d2], [d1]]
     end
 
     # 4 ref counter
-    mktempdir() do path
-        cd(path) do
-            p1 = make_ref_file_counter(path, "d1", 1)
-            p2 = make_ref_file_counter(path, "d2", 2)
-            p3 = make_ref_file_counter(path, "d3", 3)
-            p4 = make_ref_file_counter(path, "d4", 3)
-            p5 = make_ref_file_counter(path, "d5", 5)
-            p6 = make_ref_file_counter(path, "d6", 5)
-            p7 = make_ref_file_counter(path, "d7", 6)
-            @test compute_bins(path) == [[p7], [p6, p5], [p4, p3], [p2], [p1]]
-        end
+    make_and_cd() do dir
+        d1 = make_ref_file_counter(1, dir, "d1")
+        d2 = make_ref_file_counter(2, dir, "d2")
+        d3 = make_ref_file_counter(3, dir, "d3")
+        d4 = make_ref_file_counter(3, dir, "d4")
+        d5 = make_ref_file_counter(5, dir, "d5")
+        d6 = make_ref_file_counter(5, dir, "d6")
+        d7 = make_ref_file_counter(6, dir, "d7")
+        @test compute_bins(dir) == [[d7], [d6, d5], [d4, d3], [d2], [d1]]
     end
 
     # simulating reverted PR
-    mktempdir() do path
-        cd(path) do
-            p1 = make_ref_file_counter(path, "d1", 1)
-            p2 = make_ref_file_counter(path, "d2", 2)
-            p3 = make_ref_file_counter(path, "d3", 3)
-            p4 = make_ref_file_counter(path, "d4", 4)
-            p5 = make_ref_file_counter(path, "d5", 3)
-            p6 = make_ref_file_counter(path, "d6", 4)
-            p7 = make_ref_file_counter(path, "d7", 5)
-            @test compute_bins(path) ==
-                  [[p7], [p6], [p5], [p4], [p3], [p2], [p1]]
-        end
+    make_and_cd() do dir
+        d1 = make_ref_file_counter(1, dir, "d1")
+        d2 = make_ref_file_counter(2, dir, "d2")
+        d3 = make_ref_file_counter(3, dir, "d3")
+        d4 = make_ref_file_counter(4, dir, "d4")
+        d5 = make_ref_file_counter(3, dir, "d5")
+        d6 = make_ref_file_counter(4, dir, "d6")
+        d7 = make_ref_file_counter(5, dir, "d7")
+        @test compute_bins(dir) == [[d7], [d6], [d5], [d4], [d3], [d2], [d1]]
     end
 end
 
-@testset "Reproducibility infrastructure: get_reference_paths_to_delete" begin
-    # No paths at all
-    mktempdir() do path
-        cd(path) do
-            paths = get_reference_paths_to_delete(; root_path = path)
-            @test paths == []
-        end
+@testset "Reproducibility infrastructure: get_reference_dirs_to_delete" begin
+    # No dirs at all
+    make_and_cd() do dir
+        dirs = get_reference_dirs_to_delete(; root_dir = dir)
+        @test dirs == []
     end
 
-    # Paths without ref counters (error)
-    mktempdir() do path
-        cd(path) do
-            p1 = mkdir("d1")
-            @test_throws AssertionError get_reference_paths_to_delete(;
-                root_path = path,
-            )
-        end
+    # dirs without ref counters (assume this isn't reproducibility data)
+    make_and_cd() do dir
+        d1 = mkdir("d1")
+        dirs = get_reference_dirs_to_delete(; root_dir = dir)
+        @test dirs == []
     end
 
     # keep everything case
-    mktempdir() do path
-        cd(path) do
-            p1 = make_ref_file_counter(path, "d1", 1)
-            paths = get_reference_paths_to_delete(; root_path = path)
-            @test paths == []
-        end
+    make_and_cd() do dir
+        d1 = make_ref_file_counter(1, dir, "d1")
+        dirs = get_reference_dirs_to_delete(; root_dir = dir)
+        @test dirs == []
     end
 
     #=
@@ -279,32 +274,30 @@ end
              |      p03     p07     p10     |
              v      p04                     v newest
     =#
-    mktempdir() do path
-        cd(path) do
-            p01 = make_ref_file_counter(path, "01", 1)
-            p02 = make_ref_file_counter(path, "02", 2)
-            p03 = make_ref_file_counter(path, "03", 2)
-            p04 = make_ref_file_counter(path, "04", 2)
-            p05 = make_ref_file_counter(path, "05", 3)
-            p06 = make_ref_file_counter(path, "06", 4)
-            p07 = make_ref_file_counter(path, "07", 4)
-            p08 = make_ref_file_counter(path, "08", 5)
-            p09 = make_ref_file_counter(path, "09", 6)
-            p10 = make_ref_file_counter(path, "10", 6)
-            p11 = make_ref_file_counter(path, "11", 7)
-            paths = get_reference_paths_to_delete(;
-                root_path = path,
-                keep_n_comparable_states = 1,
-                keep_n_bins_back = 5,
-            )
-            @test paths == reverse([p01, p02, p03, p04, p06, p09])
-            paths = get_reference_paths_to_delete(;
-                root_path = path,
-                keep_n_comparable_states = 4,
-                keep_n_bins_back = 3,
-            )
-            @test paths == reverse([p01, p02, p03, p04, p05, p06, p07])
-        end
+    make_and_cd() do dir
+        d01 = make_ref_file_counter(1, dir, "01")
+        d02 = make_ref_file_counter(2, dir, "02")
+        d03 = make_ref_file_counter(2, dir, "03")
+        d04 = make_ref_file_counter(2, dir, "04")
+        d05 = make_ref_file_counter(3, dir, "05")
+        d06 = make_ref_file_counter(4, dir, "06")
+        d07 = make_ref_file_counter(4, dir, "07")
+        d08 = make_ref_file_counter(5, dir, "08")
+        d09 = make_ref_file_counter(6, dir, "09")
+        d10 = make_ref_file_counter(6, dir, "10")
+        d11 = make_ref_file_counter(7, dir, "11")
+        dirs = get_reference_dirs_to_delete(;
+            root_dir = dir,
+            keep_n_comparable_states = 1,
+            keep_n_bins_back = 5,
+        )
+        @test dirs == reverse([d01, d02, d03, d04, d06, d09])
+        dirs = get_reference_dirs_to_delete(;
+            root_dir = dir,
+            keep_n_comparable_states = 4,
+            keep_n_bins_back = 3,
+        )
+        @test dirs == reverse([d01, d02, d03, d04, d05, d06, d07])
     end
 
     #=
@@ -315,36 +308,726 @@ end
              |                              |
              |  B01 B02 B03 B01 B02 B03 B04 |
              |                              |
-             |  p01 p02 p05 p06 p08 p09 p11 |
-             |      p03     p07     p10     |
-             v      p04                     v newest
+             |  d01 d02 d05 d06 d08 d09 d11 |
+             |      d03     d07     d10     |
+             v      d04                     v newest
     =#
 
-    mktempdir() do path
-        cd(path) do
-            p01 = make_ref_file_counter(path, "01", 1)
-            p02 = make_ref_file_counter(path, "02", 2)
-            p03 = make_ref_file_counter(path, "03", 2)
-            p04 = make_ref_file_counter(path, "04", 2)
-            p05 = make_ref_file_counter(path, "05", 3)
-            p06 = make_ref_file_counter(path, "06", 1)
-            p07 = make_ref_file_counter(path, "07", 1)
-            p08 = make_ref_file_counter(path, "08", 2)
-            p09 = make_ref_file_counter(path, "09", 3)
-            p10 = make_ref_file_counter(path, "10", 3)
-            p11 = make_ref_file_counter(path, "11", 4)
-            paths = get_reference_paths_to_delete(;
-                root_path = path,
-                keep_n_comparable_states = 1,
-                keep_n_bins_back = 5,
+    make_and_cd() do dir
+        d01 = make_ref_file_counter(1, dir, "01")
+        d02 = make_ref_file_counter(2, dir, "02")
+        d03 = make_ref_file_counter(2, dir, "03")
+        d04 = make_ref_file_counter(2, dir, "04")
+        d05 = make_ref_file_counter(3, dir, "05")
+        d06 = make_ref_file_counter(1, dir, "06")
+        d07 = make_ref_file_counter(1, dir, "07")
+        d08 = make_ref_file_counter(2, dir, "08")
+        d09 = make_ref_file_counter(3, dir, "09")
+        d10 = make_ref_file_counter(3, dir, "10")
+        d11 = make_ref_file_counter(4, dir, "11")
+        dirs = get_reference_dirs_to_delete(;
+            root_dir = dir,
+            keep_n_comparable_states = 1,
+            keep_n_bins_back = 5,
+        )
+        @test dirs == reverse([d01, d02, d03, d04, d06, d09])
+        dirs = get_reference_dirs_to_delete(;
+            root_dir = dir,
+            keep_n_comparable_states = 4,
+            keep_n_bins_back = 3,
+        )
+        @test dirs == reverse([d01, d02, d03, d04, d05, d06, d07])
+    end
+end
+
+using ClimaComms
+using ClimaCore.CommonGrids
+using ClimaCore: Spaces, Fields, Grids, InputOutput
+@testset "Reproducibility infrastructure: to_dict" begin
+    make_and_cd() do dir
+        grid = ExtrudedCubedSphereGrid(;
+            z_elem = 10,
+            z_min = 0,
+            z_max = 1,
+            radius = 10,
+            h_elem = 10,
+            n_quad_points = 4,
+        )
+        space = Spaces.ExtrudedFiniteDifferenceSpace(grid, Grids.CellCenter())
+        comms_ctx = ClimaComms.context(space)
+
+        fv = Fields.FieldVector(; x = ones(space), y = ones(space))
+        file = joinpath(dir, "fv.hdf5")
+        hdfwriter = InputOutput.HDF5Writer(file, comms_ctx)
+        InputOutput.write!(hdfwriter, fv, "fv")
+        Base.close(hdfwriter)
+        dict = to_dict(file, "fv", comms_ctx)
+        @test dict["(:x,)"] isa Vector{Float64}
+        @test dict["(:y,)"] isa Vector{Float64}
+        zdict = zero_dict(file, "fv", comms_ctx)
+        @test zdict["(:x,)"] isa Vector{Float64}
+        @test zdict["(:y,)"] isa Vector{Float64}
+        @test all(x -> iszero(x), zdict["(:x,)"])
+        @test all(x -> iszero(x), zdict["(:y,)"])
+    end
+end
+
+@testset "Reproducibility infrastructure: reproducibility_results - legacy folder structure" begin
+    make_and_cd() do dir
+        grid = ExtrudedCubedSphereGrid(;
+            z_elem = 5,
+            z_min = 0,
+            z_max = 1,
+            radius = 10,
+            h_elem = 5,
+            n_quad_points = 2,
+        )
+        space = Spaces.ExtrudedFiniteDifferenceSpace(grid, Grids.CellCenter())
+        comms_ctx = ClimaComms.context(space)
+
+        fv = Fields.FieldVector(; x = ones(space), y = ones(space))
+        file = joinpath(dir, "my_prog_state.hdf5")
+        hdfwriter = InputOutput.HDF5Writer(file, comms_ctx)
+        InputOutput.write!(hdfwriter, fv, "Y")
+        Base.close(hdfwriter)
+
+        # Not on buildkite
+        job_id = "unit_test"
+        (d, v, how) = reproducibility_results(
+            comms_ctx;
+            job_id,
+            name = "Y",
+            save_dir = dir,
+            ref_counter_PR = 1,
+            reference_filename = "my_prog_state.hdf5",
+            data_file_computed = file,
+            skip = true,
+        )
+        @test length(v) == 1
+        @test v[1]["(:x,)"] isa Vector{Float64}
+        @test v[1]["(:y,)"] isa Vector{Float64}
+        @test all(x -> iszero(x), v[1]["(:x,)"])
+        @test all(x -> iszero(x), v[1]["(:y,)"])
+
+        @test isempty(d)
+        @test how == :skipped
+
+        # Empty comparable dirs
+        job_id = "unit_test"
+        (d, v, how) = reproducibility_results(
+            comms_ctx;
+            job_id,
+            name = "Y",
+            save_dir = dir,
+            reference_filename = "my_prog_state.hdf5",
+            ref_counter_PR = 1,
+            data_file_computed = file,
+            skip = false,
+        )
+        @test length(v) == 1
+        @test v[1]["(:x,)"] isa Vector{Float64}
+        @test v[1]["(:y,)"] isa Vector{Float64}
+        @test all(x -> iszero(x), v[1]["(:x,)"])
+        @test all(x -> iszero(x), v[1]["(:y,)"])
+
+        @test isempty(d)
+        @test how == :no_comparable_dirs
+
+        # Successful comparison
+
+        d01 = make_ref_file_counter(1, dir, "01")
+        d02 = make_ref_file_counter(2, dir, "02")
+        d03 = make_ref_file_counter(3, dir, "03")
+        d04 = make_ref_file_counter(3, dir, "04")
+        d05 = make_ref_file_counter(3, dir, "05")
+
+        put_data_file(d01, fv, comms_ctx)
+        put_data_file(d02, fv, comms_ctx)
+        put_data_file(d03, fv, comms_ctx)
+        put_data_file(d04, fv, comms_ctx)
+        fv.x .= 200
+        fv.y .= 300
+        put_data_file(d05, fv, comms_ctx)
+
+        job_id = "unit_test"
+        (d, v, how) = reproducibility_results(
+            comms_ctx;
+            job_id,
+            name = "Y",
+            save_dir = dir,
+            ref_counter_PR = 3,
+            reference_filename = "my_prog_state.hdf5",
+            data_file_computed = file,
+            skip = false,
+        )
+        # The first we compare against is most recent,
+        # And we set `fv.x .= 200` and `fv.y .= 300` for
+        # that dataset.
+        @test v[1]["(:x,)"] == 2970.075
+        @test v[1]["(:y,)"] == 2980.0333333333333
+        @test v[2]["(:x,)"] == 0.0
+        @test v[2]["(:y,)"] == 0.0
+        @test v[3]["(:x,)"] == 0.0
+        @test v[3]["(:y,)"] == 0.0
+
+        @test d == [d05, d04, d03]
+        @test how == :successful_comparison
+    end
+end
+
+@testset "Reproducibility infrastructure: reproducibility_results" begin
+    mktempdir2_cd_computed() do (save_dir, computed_dir)
+        grid = ExtrudedCubedSphereGrid(;
+            z_elem = 5,
+            z_min = 0,
+            z_max = 1,
+            radius = 10,
+            h_elem = 5,
+            n_quad_points = 2,
+        )
+        space = Spaces.ExtrudedFiniteDifferenceSpace(grid, Grids.CellCenter())
+        comms_ctx = ClimaComms.context(space)
+
+        fv = Fields.FieldVector(; x = ones(space), y = ones(space))
+        file = joinpath(
+            computed_dir,
+            "reproducibility_bundle",
+            "my_prog_state.hdf5",
+        )
+        mkpath(dirname(file))
+        hdfwriter = InputOutput.HDF5Writer(file, comms_ctx)
+        InputOutput.write!(hdfwriter, fv, "Y")
+        Base.close(hdfwriter)
+
+        # Not on buildkite
+        job_id = "unit_test"
+        (d, v, how) = reproducibility_results(
+            comms_ctx;
+            job_id,
+            name = "Y",
+            save_dir = save_dir,
+            ref_counter_PR = 1,
+            reference_filename = "my_prog_state.hdf5",
+            data_file_computed = file,
+            skip = true,
+        )
+        @test length(v) == 1
+        @test v[1]["(:x,)"] isa Vector{Float64}
+        @test v[1]["(:y,)"] isa Vector{Float64}
+        @test all(x -> iszero(x), v[1]["(:x,)"])
+        @test all(x -> iszero(x), v[1]["(:y,)"])
+
+        @test isempty(d)
+        @test how == :skipped
+
+        # Empty comparable dirs
+        job_id = "unit_test"
+        (d, v, how) = reproducibility_results(
+            comms_ctx;
+            job_id,
+            name = "Y",
+            save_dir = save_dir,
+            reference_filename = "my_prog_state.hdf5",
+            ref_counter_PR = 1,
+            data_file_computed = file,
+            skip = false,
+        )
+        @test length(v) == 1
+        @test v[1]["(:x,)"] isa Vector{Float64}
+        @test v[1]["(:y,)"] isa Vector{Float64}
+        @test all(x -> iszero(x), v[1]["(:x,)"])
+        @test all(x -> iszero(x), v[1]["(:y,)"])
+
+        @test isempty(d)
+        @test how == :no_comparable_dirs
+
+        # Successful comparison
+
+        d01 = make_ref_file_counter(1, save_dir, rbundle("01"))
+        d02 = make_ref_file_counter(2, save_dir, rbundle("02"))
+        d03 = make_ref_file_counter(3, save_dir, rbundle("03"))
+        d04 = make_ref_file_counter(3, save_dir, rbundle("04"))
+        d05 = make_ref_file_counter(3, save_dir, rbundle("05"))
+
+        put_data_file(d01, fv, comms_ctx)
+        put_data_file(d02, fv, comms_ctx)
+        put_data_file(d03, fv, comms_ctx)
+        put_data_file(d04, fv, comms_ctx)
+        fv.x .= 200
+        fv.y .= 300
+        put_data_file(d05, fv, comms_ctx)
+
+        job_id = "unit_test"
+        (d, v, how) = reproducibility_results(
+            comms_ctx;
+            job_id,
+            name = "Y",
+            save_dir = save_dir,
+            ref_counter_PR = 3,
+            reference_filename = "my_prog_state.hdf5",
+            data_file_computed = file,
+            skip = false,
+        )
+        # The first we compare against is most recent,
+        # And we set `fv.x .= 200` and `fv.y .= 300` for
+        # that dataset.
+        @test v[1]["(:x,)"] == 2970.075
+        @test v[1]["(:y,)"] == 2980.0333333333333
+        @test v[2]["(:x,)"] == 0.0
+        @test v[2]["(:y,)"] == 0.0
+        @test v[3]["(:x,)"] == 0.0
+        @test v[3]["(:y,)"] == 0.0
+
+        @test d == [d05, d04, d03]
+        @test how == :successful_comparison
+    end
+end
+
+@testset "Reproducibility infrastructure: export_reproducibility_results, legacy folder structure" begin
+    mktempdir2_cd_computed() do (save_dir, computed_dir)
+        grid = ExtrudedCubedSphereGrid(;
+            z_elem = 5,
+            z_min = 0,
+            z_max = 1,
+            radius = 10,
+            h_elem = 5,
+            n_quad_points = 2,
+        )
+        space = Spaces.ExtrudedFiniteDifferenceSpace(grid, Grids.CellCenter())
+        comms_ctx = ClimaComms.context(space)
+
+        job_id = "unit_test_export_reproducibility_results"
+
+        fv = Fields.FieldVector(; x = ones(space), y = ones(space))
+
+        # Test skipped case
+        (data_file_computed, computed_mses, dirs, how) =
+            export_reproducibility_results(
+                fv,
+                comms_ctx;
+                job_id,
+                save_dir = save_dir,
+                computed_dir = computed_dir,
+                name = "Y",
+                n = 10,
+                ref_counter_PR = 1,
+                skip = true,
             )
-            @test paths == reverse([p01, p02, p03, p04, p06, p09])
-            paths = get_reference_paths_to_delete(;
-                root_path = path,
-                keep_n_comparable_states = 4,
-                keep_n_bins_back = 3,
+        @test how == :skipped
+        @test isempty(dirs)
+
+        # Test no comparable dirs
+        (data_file_computed, computed_mses, dirs, how) =
+            export_reproducibility_results(
+                fv,
+                comms_ctx;
+                job_id,
+                save_dir = save_dir,
+                computed_dir = computed_dir,
+                name = "Y",
+                n = 10,
+                ref_counter_PR = 1,
+                skip = false,
             )
-            @test paths == reverse([p01, p02, p03, p04, p05, p06, p07])
-        end
+        @test how == :no_comparable_dirs
+        @test isempty(dirs)
+
+        # Successful comparisons, legacy path configuration
+        d01 = make_ref_file_counter(1, save_dir, "01")
+        d02 = make_ref_file_counter(2, save_dir, "02")
+        d03 = make_ref_file_counter(3, save_dir, "03")
+        d04 = make_ref_file_counter(3, save_dir, "04")
+        d05 = make_ref_file_counter(3, save_dir, "05")
+
+        put_data_file(d01, fv, comms_ctx)
+        put_data_file(d02, fv, comms_ctx)
+        put_data_file(d03, fv, comms_ctx)
+        put_data_file(d04, fv, comms_ctx)
+        fv.x .= 200
+        fv.y .= 300
+        put_data_file(d05, fv, comms_ctx)
+
+        @test isfile(joinpath(d01, "my_prog_state.hdf5"))
+        @test isfile(joinpath(d02, "my_prog_state.hdf5"))
+        @test isfile(joinpath(d03, "my_prog_state.hdf5"))
+        @test isfile(joinpath(d04, "my_prog_state.hdf5"))
+        @test isfile(joinpath(d05, "my_prog_state.hdf5"))
+        @test isfile(joinpath(d01, "ref_counter.jl"))
+        @test isfile(joinpath(d02, "ref_counter.jl"))
+        @test isfile(joinpath(d03, "ref_counter.jl"))
+        @test isfile(joinpath(d04, "ref_counter.jl"))
+        @test isfile(joinpath(d05, "ref_counter.jl"))
+
+        (data_file_computed, computed_mses, dirs, how) =
+            export_reproducibility_results(
+                fv,
+                comms_ctx;
+                job_id,
+                save_dir = save_dir,
+                computed_dir = computed_dir,
+                name = "Y",
+                reference_filename = "my_prog_state.hdf5",
+                n = 10,
+                ref_counter_PR = 3,
+                skip = false,
+            )
+        @test how == :successful_comparison
+        @test dirs == [d05, d04, d03]
+    end
+end
+
+@testset "Reproducibility infrastructure: export_reproducibility_results" begin
+    mktempdir2_cd_computed() do (save_dir, computed_dir)
+        grid = ExtrudedCubedSphereGrid(;
+            z_elem = 5,
+            z_min = 0,
+            z_max = 1,
+            radius = 10,
+            h_elem = 5,
+            n_quad_points = 2,
+        )
+        space = Spaces.ExtrudedFiniteDifferenceSpace(grid, Grids.CellCenter())
+        comms_ctx = ClimaComms.context(space)
+
+        job_id = "unit_test_export_reproducibility_results"
+
+        fv = Fields.FieldVector(; x = ones(space), y = ones(space))
+
+        # Test skipped case
+        (data_file_computed, computed_mses, dirs, how) =
+            export_reproducibility_results(
+                fv,
+                comms_ctx;
+                job_id,
+                save_dir = save_dir,
+                computed_dir = computed_dir,
+                name = "Y",
+                n = 10,
+                ref_counter_PR = 1,
+                skip = true,
+            )
+        @test how == :skipped
+        @test isempty(dirs)
+
+        # Test no comparable dirs
+        (data_file_computed, computed_mses, dirs, how) =
+            export_reproducibility_results(
+                fv,
+                comms_ctx;
+                job_id,
+                save_dir = save_dir,
+                computed_dir = computed_dir,
+                name = "Y",
+                n = 10,
+                ref_counter_PR = 1,
+                skip = false,
+            )
+        @test how == :no_comparable_dirs
+        @test isempty(dirs)
+
+        # Successful comparisons, legacy path configuration
+        d01 = make_ref_file_counter(1, save_dir, rbundle("01"))
+        d02 = make_ref_file_counter(2, save_dir, rbundle("02"))
+        d03 = make_ref_file_counter(3, save_dir, rbundle("03"))
+        d04 = make_ref_file_counter(3, save_dir, rbundle("04"))
+        d05 = make_ref_file_counter(3, save_dir, rbundle("05"))
+
+        put_data_file(d01, fv, comms_ctx)
+        put_data_file(d02, fv, comms_ctx)
+        put_data_file(d03, fv, comms_ctx)
+        put_data_file(d04, fv, comms_ctx)
+        fv.x .= 200
+        fv.y .= 300
+        put_data_file(d05, fv, comms_ctx)
+
+        @test isfile(joinpath(d01, "my_prog_state.hdf5"))
+        @test isfile(joinpath(d02, "my_prog_state.hdf5"))
+        @test isfile(joinpath(d03, "my_prog_state.hdf5"))
+        @test isfile(joinpath(d04, "my_prog_state.hdf5"))
+        @test isfile(joinpath(d05, "my_prog_state.hdf5"))
+        @test isfile(joinpath(d01, "ref_counter.jl"))
+        @test isfile(joinpath(d02, "ref_counter.jl"))
+        @test isfile(joinpath(d03, "ref_counter.jl"))
+        @test isfile(joinpath(d04, "ref_counter.jl"))
+        @test isfile(joinpath(d05, "ref_counter.jl"))
+
+        (data_file_computed, computed_mses, dirs, how) =
+            export_reproducibility_results(
+                fv,
+                comms_ctx;
+                job_id,
+                save_dir = save_dir,
+                computed_dir = computed_dir,
+                name = "Y",
+                reference_filename = "my_prog_state.hdf5",
+                n = 10,
+                ref_counter_PR = 3,
+                skip = false,
+            )
+        @test how == :successful_comparison
+        @test dirs == [d05, d04, d03]
+    end
+end
+
+function make_file_with_contents(dir, filename, contents)
+    mkpath(dir)
+    f = joinpath(dir, filename)
+    open(io -> println(io, contents), f, "w")
+end
+@testset "Reproducibility infrastructure: source_checksum" begin
+    mktempdir2_cd_computed() do (dir_A, dir_B)
+        make_file_with_contents(dir_A, "file_x.jl", "abc")
+        make_file_with_contents(dir_A, "file_y.jl", "abc")
+        make_file_with_contents(dir_A, "file_z.jl", "abc")
+
+        make_file_with_contents(dir_B, "file_x.jl", "abc")
+        make_file_with_contents(dir_B, "file_y.jl", "abc")
+        make_file_with_contents(dir_B, "file_z.jl", "abc")
+        @test source_checksum(dir_A) == source_checksum(dir_B)
+    end
+
+    mktempdir2_cd_computed() do (dir_A, dir_B)
+        make_file_with_contents(dir_A, "file_x.jl", "abc")
+        make_file_with_contents(dir_A, "file_y.jl", "abc")
+        make_file_with_contents(dir_A, "file_z.jl", "abc")
+
+        make_file_with_contents(dir_B, "file_x.jl", "xyz")
+        make_file_with_contents(dir_B, "file_y.jl", "abc")
+        make_file_with_contents(dir_B, "file_z.jl", "abc")
+        @test source_checksum(dir_A) ≠ source_checksum(dir_B)
+    end
+end
+
+@testset "Reproducibility infrastructure: source_has_changed" begin
+    mktempdir2_cd_computed() do (dir_A, dir_B)
+        make_file_with_contents(dir_A, "file_x.jl", "abc")
+        make_file_with_contents(dir_A, "file_y.jl", "abc")
+        make_file_with_contents(dir_A, "file_z.jl", "abc")
+        d_A = make_ref_file_counter(3, dir_A, "d_A")
+        make_file_with_contents(
+            d_A,
+            "source_checksum.dat",
+            source_checksum(dir_A),
+        )
+
+        make_file_with_contents(dir_B, "file_x.jl", "abc")
+        make_file_with_contents(dir_B, "file_y.jl", "abc")
+        make_file_with_contents(dir_B, "file_z.jl", "abc")
+        d_B = make_ref_file_counter(3, dir_B, "d_B")
+        make_file_with_contents(
+            d_B,
+            "source_checksum.dat",
+            source_checksum(dir_B),
+        )
+
+        @test source_has_changed(;
+            n = 0, # force no comparable reference, source code
+            root_dir = dir_A,
+            ref_counter_PR = 3,
+            skip = false,
+            src_dir = dir_B,
+        )
+    end
+
+    mktempdir2_cd_computed() do (dir_A, dir_B)
+        make_file_with_contents(dir_A, "file_x.jl", "abc")
+        make_file_with_contents(dir_A, "file_y.jl", "abc")
+        make_file_with_contents(dir_A, "file_z.jl", "abc")
+        d_A = make_ref_file_counter(3, dir_A, "d_A")
+        make_file_with_contents(
+            d_A,
+            "source_checksum.dat",
+            source_checksum(dir_A),
+        )
+
+        make_file_with_contents(dir_B, "file_x.jl", "abc")
+        make_file_with_contents(dir_B, "file_y.jl", "abc")
+        make_file_with_contents(dir_B, "file_z.jl", "abc")
+        d_B = make_ref_file_counter(3, dir_B, "d_B")
+        make_file_with_contents(
+            d_B,
+            "source_checksum.dat",
+            source_checksum(dir_B),
+        )
+
+        @test !source_has_changed(;
+            n = 5,
+            root_dir = dir_A,
+            ref_counter_PR = 3,
+            skip = false,
+            src_dir = dir_B,
+        )
+    end
+
+    mktempdir2_cd_computed() do (dir_A, dir_B)
+        make_file_with_contents(dir_A, "file_x.jl", "abc")
+        make_file_with_contents(dir_A, "file_y.jl", "abc")
+        make_file_with_contents(dir_A, "file_z.jl", "abc")
+        d_A = make_ref_file_counter(3, dir_A, "d_A")
+        make_file_with_contents(
+            d_A,
+            "source_checksum.dat",
+            source_checksum(dir_A),
+        )
+
+        make_file_with_contents(dir_B, "file_x.jl", "abc")
+        make_file_with_contents(dir_B, "file_y.jl", "abc")
+        make_file_with_contents(dir_B, "file_z.jl", "xyz")
+        d_B = make_ref_file_counter(3, dir_B, "d_B")
+        make_file_with_contents(
+            d_B,
+            "source_checksum.dat",
+            source_checksum(dir_B),
+        )
+
+        @test source_has_changed(;
+            n = 5,
+            root_dir = dir_A,
+            ref_counter_PR = 3,
+            skip = false,
+            src_dir = dir_B,
+        )
+    end
+end
+
+@testset "Reproducibility infrastructure: move_data_to_save_dir legacy folder structure" begin
+    mktempdir2_cd_computed() do (save_dir, computed_dir)
+        grid = ExtrudedCubedSphereGrid(;
+            z_elem = 5,
+            z_min = 0,
+            z_max = 1,
+            radius = 10,
+            h_elem = 5,
+            n_quad_points = 2,
+        )
+        space = Spaces.ExtrudedFiniteDifferenceSpace(grid, Grids.CellCenter())
+        comms_ctx = ClimaComms.context(space)
+        fv = Fields.FieldVector(; x = ones(space), y = ones(space))
+
+        hash1 = joinpath(save_dir, "hash1")
+        hash2 = joinpath(save_dir, "hash2")
+        make_file_with_contents(hash1, "file_x.jl", "abc")
+        make_file_with_contents(hash1, "file_y.jl", "abc")
+        make_file_with_contents(hash1, "file_z.jl", "abc")
+        make_ref_file_counter(3, hash1)
+
+        make_file_with_contents(hash2, "file_x.jl", "abc")
+        make_file_with_contents(hash2, "file_y.jl", "abc")
+        make_file_with_contents(hash2, "file_z.jl", "abc")
+        make_ref_file_counter(3, hash2)
+
+        make_file_with_contents(computed_dir, "file_x.jl", "abc")
+        make_file_with_contents(computed_dir, "file_y.jl", "abc")
+        make_file_with_contents(computed_dir, "file_z.jl", "abc")
+        ref_counter_file_dir = make_ref_file_counter(3, computed_dir)
+        job_id_1 = joinpath(computed_dir, "job_id_1")
+        job_id_2 = joinpath(computed_dir, "job_id_2")
+        put_data_file(job_id_1, fv, comms_ctx)
+        put_data_file(job_id_2, fv, comms_ctx)
+        @test source_checksum(hash1) == source_checksum(computed_dir)
+        @test source_checksum(hash2) == source_checksum(computed_dir)
+
+        move_data_to_save_dir(;
+            dest_root = save_dir,
+            buildkite_ci = true,
+            commit = "hash_new",
+            n_hash_characters = length("hash_new"),
+            branch = "unit_test_move_data_to_save_dir",
+            in_merge_queue = true,
+            dirs_src = [job_id_1, job_id_2],
+            ref_counter_file_PR = joinpath(
+                ref_counter_file_dir,
+                "ref_counter.jl",
+            ),
+            ref_counter_PR = 3,
+            skip = false,
+        )
+        @test isfile(
+            joinpath(save_dir, "hash_new", "job_id_1", "my_prog_state.hdf5"),
+        )
+        @test isfile(
+            joinpath(save_dir, "hash_new", "job_id_2", "my_prog_state.hdf5"),
+        )
+        @test isfile(joinpath(save_dir, "hash_new", "ref_counter.jl"))
+    end
+end
+
+@testset "Reproducibility infrastructure: move_data_to_save_dir" begin
+    mktempdir2_cd_computed() do (save_dir, computed_dir)
+        grid = ExtrudedCubedSphereGrid(;
+            z_elem = 5,
+            z_min = 0,
+            z_max = 1,
+            radius = 10,
+            h_elem = 5,
+            n_quad_points = 2,
+        )
+        space = Spaces.ExtrudedFiniteDifferenceSpace(grid, Grids.CellCenter())
+        comms_ctx = ClimaComms.context(space)
+        fv = Fields.FieldVector(; x = ones(space), y = ones(space))
+
+        hash1 = joinpath(save_dir, "hash1")
+        hash2 = joinpath(save_dir, "hash2")
+        make_file_with_contents(hash1, "file_x.jl", "abc")
+        make_file_with_contents(hash1, "file_y.jl", "abc")
+        make_file_with_contents(hash1, "file_z.jl", "abc")
+        make_ref_file_counter(3, hash1, "repro_bundle")
+
+        make_file_with_contents(hash2, "file_x.jl", "abc")
+        make_file_with_contents(hash2, "file_y.jl", "abc")
+        make_file_with_contents(hash2, "file_z.jl", "abc")
+        make_ref_file_counter(3, hash2, "repro_bundle")
+
+        make_file_with_contents(computed_dir, "file_x.jl", "abc")
+        make_file_with_contents(computed_dir, "file_y.jl", "abc")
+        make_file_with_contents(computed_dir, "file_z.jl", "abc")
+        ref_counter_file_dir =
+            make_ref_file_counter(3, computed_dir, "repro_bundle")
+        job_id_1 = joinpath(computed_dir, "job_id_1")
+        job_id_2 = joinpath(computed_dir, "job_id_2")
+        put_data_file(job_id_1, fv, comms_ctx)
+        put_data_file(job_id_2, fv, comms_ctx)
+        @test source_checksum(hash1) == source_checksum(computed_dir)
+        @test source_checksum(hash2) == source_checksum(computed_dir)
+
+        move_data_to_save_dir(;
+            dest_root = save_dir,
+            buildkite_ci = true,
+            commit = "hash_new",
+            n_hash_characters = length("hash_new"),
+            branch = "unit_test_move_data_to_save_dir",
+            in_merge_queue = true,
+            dirs_src = [job_id_1, job_id_2],
+            ref_counter_file_PR = joinpath(
+                ref_counter_file_dir,
+                "ref_counter.jl",
+            ),
+            ref_counter_PR = 3,
+            skip = false,
+        )
+        @test isfile(
+            joinpath(save_dir, "hash_new", "job_id_1", "my_prog_state.hdf5"),
+        )
+        @test isfile(
+            joinpath(save_dir, "hash_new", "job_id_2", "my_prog_state.hdf5"),
+        )
+        @test isfile(joinpath(save_dir, "hash_new", "ref_counter.jl"))
+    end
+end
+
+@testset "Reproducibility infrastructure: integration test" begin
+    mktempdir2_cd_computed() do (save_dir, computed_dir)
+        grid = ExtrudedCubedSphereGrid(;
+            z_elem = 5,
+            z_min = 0,
+            z_max = 1,
+            radius = 10,
+            h_elem = 5,
+            n_quad_points = 2,
+        )
+        space = Spaces.ExtrudedFiniteDifferenceSpace(grid, Grids.CellCenter())
+        comms_ctx = ClimaComms.context(space)
+
+        fv = Fields.FieldVector(; x = ones(space), y = ones(space))
     end
 end