artifact stuff

grahamstark · Nov 27, 2024 · d242ad2 · d242ad2
1 parent d432421
commit d242ad2
Show file tree

Hide file tree

Showing 12 changed files with 91 additions and 150 deletions.
diff --git a/Artifacts.toml b/Artifacts.toml
@@ -12,7 +12,7 @@ lazy = true
 
     [[disability.download]]
     sha256 = "7ed1e796fc2e09a4c48f772ea3cd06b7c6aab721c183f6905b2fa83890d0184b"
-    url = "https://virtual-worlds.biz/artifacts//disability.tar.gz"
+    url = "http://stb-artifacts/disability.tar.gz"
 
 [exampledata]
 git-tree-sha1 = "7853136d3530ea531e4f45a29f8fcef81d08fc1c"
@@ -28,23 +28,23 @@ lazy = true
 
     [[legalaid.download]]
     sha256 = "fb9a643138fea7669fceda20ef9b4f65b2646d2e8c27d4a0ecbb7306908cb3a9"
-    url = "https://virtual-worlds.biz/artifacts//legalaid.tar.gz"
+    url = "http://stb-artifacts/legalaid.tar.gz"
 
 [scottish-frs-data]
 git-tree-sha1 = "9c5c1e8a29e8f82d861100579cee3e3e49cae464"
 lazy = true
 
     [[scottish-frs-data.download]]
     sha256 = "5bc1607882203127bd0c13ddeeb22deb0f51156cd65aa9f4c14af3c1df175ef2"
-    url = "https://virtual-worlds.biz/artifacts//model-data-scotland-2015-2021-v2.tar.gz"
+    url = "http://stb-artifacts/model-data-scotland-2015-2021-v2.tar.gz"
 
 [scottish-lcf-expenditure]
 git-tree-sha1 = "bf18d73b3f48bea065528bf63e43e522f85f5e4b"
 lazy = true
 
     [[scottish-lcf-expenditure.download]]
     sha256 = "66231a2f9d4cf75d373812e6969e40ffe1dd386dc0b8c577aa3ed67f75e4cc1c"
-    url = "https://virtual-worlds.biz/artifacts//scottish-lcf-expenditure.tar.gz"
+    url = "http://stb-artifacts/scottish-lcf-expenditure.tar.gz"
 
 [scottish-synthetic-data]
 git-tree-sha1 = "2bcdf3e7a62bb4bcd282f05008adbe6b63228ab9"
@@ -76,23 +76,23 @@ lazy = true
 
     [[scottish-was-wealth.download]]
     sha256 = "6a6f5a25f22a102bb0b3e770a874b54d19407a1bafbdd6bd7956477c1204fb46"
-    url = "https://virtual-worlds.biz/artifacts//scottish-was-wealth.tar.gz"
+    url = "http://stb-artifacts/scottish-was-wealth.tar.gz"
 
 [uk-frs-data]
 git-tree-sha1 = "8891ef02ea5dada4ec097ce0a183502cfafb995f"
 lazy = true
 
     [[uk-frs-data.download]]
     sha256 = "8e3efc98a687441085be9ef5e53b663c2bd9abf6e300efa2dd6edb29993d8777"
-    url = "https://virtual-worlds.biz/artifacts//model-data-2021-2021-v2.tar.gz"
+    url = "http://stb-artifacts/model-data-2021-2021-v2.tar.gz"
 
 [uk-lcf-expenditure]
 git-tree-sha1 = "aac0e469d51d53ae54dcc2aa54af2d4f2d72a03a"
 lazy = true
 
     [[uk-lcf-expenditure.download]]
     sha256 = "a2888101e82837d3cf9852fc35f66a36372d579b5660bf5cbf7a70cf11777187"
-    url = "https://virtual-worlds.biz/artifacts//uk-lcf-expenditure.tar.gz"
+    url = "http://stb-artifacts/uk-lcf-expenditure.tar.gz"
 
 [uk-synthetic-data]
 git-tree-sha1 = "2bcdf3e7a62bb4bcd282f05008adbe6b63228ab9"
@@ -124,4 +124,4 @@ lazy = true
 
     [[uk-was-wealth.download]]
     sha256 = "d33b1c51eadb5f4bf8278f716a60b0a37ea7af66b8fe56dca045d50f7674d94f"
-    url = "https://virtual-worlds.biz/artifacts//uk-was-wealth.tar.gz"
+    url = "http://stb-artifacts/uk-was-wealth.tar.gz"
diff --git a/LocalPreferences.toml b/LocalPreferences.toml
@@ -1,6 +1,10 @@
 [preferences.ScottishTaxBenefitModel]
-artifact_server_upload = "[email protected]:public_html/artifacts/"
-artifact_server_url = "https://virtual-worlds.biz/artifacts/"
+
+public-artifact_server_upload = "[email protected]:public_html/artifacts/"
+public-artifact_server_url = "https://virtual-worlds.biz/artifacts/"
+local-artifact_server_upload = "stb-artifacts:"
+local-artifact_server_url = "http://stb-artifacts/"
+
 auto_weight = true
 default_run_name = "default_run"
 use_weighting = true

diff --git a/Project.toml b/Project.toml
@@ -113,8 +113,11 @@ julia = "1"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [preferences.ScottishTaxBenefitModel]
-artifact_server_upload = "[email protected]:public_html/artifacts/"
-artifact_server_url = "https://virtual-worlds.biz/artifacts/"
+public-artifact_server_upload = "[email protected]:public_html/artifacts/"
+public-artifact_server_url = "https://virtual-worlds.biz/artifacts/"
+local-artifact_server_upload = "stb-artifacts:"
+local-artifact_server_url = "http://stb-artifacts/"
+
 auto_weight = true
 default_run_name = "default_run"
 do_marginal_rates = false

diff --git a/docs/modelled-taxes-and-benfits.md b/docs/modelled-taxes-and-benfits.md
diff --git a/src/ConsumptionData.jl b/src/ConsumptionData.jl
@@ -350,9 +350,14 @@ function init( settings :: Settings; reset = false )
             global IND_MATCHING
             global EXPENDITURE_DATASET
             global FACTOR_COST_DATASET
-            IND_MATCHING = CSV.File( joinpath( artifact"uk-lcf-expenditure", "matches.tab" )) |> DataFrame
-            EXPENDITURE_DATASET = CSV.File( joinpath( artifact"uk-lcf-expenditure", "dataset.tab")) |> DataFrame
-            FACTOR_COST_DATASET = CSV.File( joinpath( artifact"uk-lcf-expenditure", "dataset.tab" )) |> DataFrame
+            c_artifact = RunSettings.get_artifact(; 
+                name="expenditure", 
+                source=settings.data_source == SyntheticSource ? "synthetic" : "lcf", 
+                scottish=settings.target_nation == N_Scotland )
+
+            IND_MATCHING = CSV.File( joinpath( c_artifact, "matches.tab" )) |> DataFrame
+            EXPENDITURE_DATASET = CSV.File( joinpath( c_artifact, "dataset.tab")) |> DataFrame
+            FACTOR_COST_DATASET = CSV.File( joinpath( c_artifact, "dataset.tab" )) |> DataFrame
             println( EXPENDITURE_DATASET[1:2,:])
             uprate_expenditure( settings )
         end

diff --git a/src/LegalAidData.jl b/src/LegalAidData.jl
@@ -280,58 +280,49 @@ CIVIL_COSTS_GRP4 = DataFrame()
 CIVIL_SUBJECTS = DataFrame()
 
 
-function init()
+function initialise()
 
-    CIVIL_COSTS = DataFrame()
-    AA_COSTS = DataFrame()
-    CIVIL_AWARDS = DataFrame()
+    global CIVIL_COSTS
+    global AA_COSTS
+    global CIVIL_AWARDS
 
-    CIVIL_AWARDS_GRP_NS = DataFrame()
-    CIVIL_AWARDS_GRP1 = DataFrame()
-    CIVIL_AWARDS_GRP2 = DataFrame()
-    CIVIL_AWARDS_GRP3 = DataFrame()
-    CIVIL_AWARDS_GRP4 = DataFrame()
-    CIVIL_COSTS_GRP_NS = DataFrame()
-
-    CIVIL_COSTS_GRP1 = DataFrame()
-    AA_COSTS_GRP1 = DataFrame()
-
-    CIVIL_COSTS_GRP2 = DataFrame()
-    CIVIL_COSTS_GRP3 = DataFrame()
-    CIVIL_COSTS_GRP4 = DataFrame()
-    CIVIL_SUBJECTS = DataFrame()
-
-
-   CIVIL_COSTS = load_costs( joinpath(artifact"legalaid", "civil-legal-aid-case-costs.tab" ))
-   AA_COSTS = load_aa_costs( joinpath( artifact"legalaid", "aa-case-costs.tab" ))
-   CIVIL_AWARDS = load_awards( joinpath( artifact"legalaid", "civil-applications.tab" ))
-
-   CIVIL_AWARDS_GRP_NS = groupby(CIVIL_AWARDS, [:hsm, :age2, :sex])
-   CIVIL_AWARDS_GRP1 = groupby(CIVIL_AWARDS, [:hsm])
-   CIVIL_AWARDS_GRP2 = groupby(CIVIL_AWARDS, [:hsm, :la_status])
-   CIVIL_AWARDS_GRP3 = groupby(CIVIL_AWARDS, [:hsm, :la_status, :sex])
-   CIVIL_AWARDS_GRP4 = groupby(CIVIL_AWARDS, [:hsm, :la_status,:age2, :sex])
-   CIVIL_COSTS_GRP_NS = groupby(CIVIL_COSTS, [:hsm, :age2, :sex])
-
-   CIVIL_COSTS_GRP1 = groupby(CIVIL_COSTS, [:hsm_censored])
-   AA_COSTS_GRP1 = groupby(AA_COSTS, [:hsm_censored])
-
-   CIVIL_COSTS_GRP2 = groupby(CIVIL_COSTS, [:hsm, :la_status])
-   CIVIL_COSTS_GRP3 = groupby(CIVIL_COSTS, [:hsm, :la_status, :sex])
-   CIVIL_COSTS_GRP4 = groupby(CIVIL_COSTS, [:hsm, :la_status, :age2, :sex])
-   CIVIL_SUBJECTS = sort(levels( CIVIL_AWARDS.hsm ))
+    global CIVIL_AWARDS_GRP_NS 
+    global CIVIL_AWARDS_GRP1 
+    global CIVIL_AWARDS_GRP2 
+    global CIVIL_AWARDS_GRP3 
+    global CIVIL_AWARDS_GRP4 
+    global CIVIL_COSTS_GRP_NS 
+
+    global CIVIL_COSTS_GRP1 
+    global AA_COSTS_GRP1 
+
+    global CIVIL_COSTS_GRP2 
+    global CIVIL_COSTS_GRP3 
+    global CIVIL_COSTS_GRP4 
+    global CIVIL_SUBJECTS 
+
+    if size( CIVIL_COSTS ) == (0,0) || size(AA_COSTS) == (0,0) || size(CIVIL_AWARDS) == (0.0)
+        CIVIL_COSTS = load_costs( joinpath(artifact"legalaid", "civil-legal-aid-case-costs.tab" ))
+        AA_COSTS = load_aa_costs( joinpath( artifact"legalaid", "aa-case-costs.tab" ))
+        CIVIL_AWARDS = load_awards( joinpath( artifact"legalaid", "civil-applications.tab" ))
+
+        CIVIL_AWARDS_GRP_NS = groupby(CIVIL_AWARDS, [:hsm, :age2, :sex])
+        CIVIL_AWARDS_GRP1 = groupby(CIVIL_AWARDS, [:hsm])
+        CIVIL_AWARDS_GRP2 = groupby(CIVIL_AWARDS, [:hsm, :la_status])
+        CIVIL_AWARDS_GRP3 = groupby(CIVIL_AWARDS, [:hsm, :la_status, :sex])
+        CIVIL_AWARDS_GRP4 = groupby(CIVIL_AWARDS, [:hsm, :la_status,:age2, :sex])
+        CIVIL_COSTS_GRP_NS = groupby(CIVIL_COSTS, [:hsm, :age2, :sex])
+
+        CIVIL_COSTS_GRP1 = groupby(CIVIL_COSTS, [:hsm_censored])
+        AA_COSTS_GRP1 = groupby(AA_COSTS, [:hsm_censored])
+
+        CIVIL_COSTS_GRP2 = groupby(CIVIL_COSTS, [:hsm, :la_status])
+        CIVIL_COSTS_GRP3 = groupby(CIVIL_COSTS, [:hsm, :la_status, :sex])
+        CIVIL_COSTS_GRP4 = groupby(CIVIL_COSTS, [:hsm, :la_status, :age2, :sex])
+        CIVIL_SUBJECTS = sort(levels( CIVIL_AWARDS.hsm ))
+    end
 end
 
-#= 
-  psa = groupby(awards, [:hsm,:age_banded,:consolidatedsex])
-  k=(hsm = "Discrimination", age_banded = "5 - 9", consolidatedsex = "Male")
-  psa[k]
-  haskey(psa,k)
-  for( k, v ) in pairs( psa )
-   println( "k=$k ")
-  end
-=#
-
 function gcounts( gdf :: GroupedDataFrame )
     kk = sort(keys(gdf))
     for k in kk

diff --git a/src/RunSettings.jl b/src/RunSettings.jl
@@ -83,6 +83,7 @@ module RunSettings
 
     # settings loaded automatically from the Project.toml section 'preferences.ScottishTaxBenefitModel' 
     # and maybe overwritten in LocalPreferences.toml
+    # FIXME clear out all the duplications of Scotland in this
     @with_kw mutable struct Settings
         uuid :: UUID = UUID("c2ae9c83-d24a-431c-b04f-74662d2ba07e")
         uid :: Int = 1 # placeholder for maybe a user somewhere

diff --git a/src/Runner.jl b/src/Runner.jl
@@ -80,6 +80,9 @@ module Runner
                 BenefitGenerosity.initialise( artifact"disability" )  
             end     
         end
+        if settings.do_legal_aid
+            LegalAidData.initialise()
+        end
         full_results = Array{HouseholdResult}(undef,0,0)
         # fixme if we have one are threads OK? I think yes
         if settings.export_full_results

diff --git a/src/Uprating.jl b/src/Uprating.jl
@@ -7,6 +7,8 @@ module Uprating
 #
 using DataFrames
 using CSV
+using Pkg,Pkg.Artifacts
+using LazyArtifacts
 
 using ScottishTaxBenefitModel
 using .RunSettings
@@ -122,7 +124,8 @@ function load_prices( settings :: Settings, reload :: Bool = false )
         return
     end
 
-    upr = CSV.File("$(PRICES_DIR)/$(settings.prices_file)"; delim = '\t', comment = "#") |> DataFrame
+    upr = CSV.File(joinpath(artifact"augdata","indexes.tab"); delim = '\t', comment = "#") |> DataFrame
+
     nrows = size(upr)[1]
     ncols = size(upr)[2]
     println( "read $nrows rows and $ncols cols ")

diff --git a/src/Utils.jl b/src/Utils.jl
@@ -85,20 +85,27 @@ function make_household_sample(
 end
 
 """
-Given a gzipped tar file in `tmp/` with some data, upload this to a server 
+Given a directory in `tmp/` with some data, make a gzipped tar file, upload this to a server 
 defined in Project.toml and add an entry to `Artifacts.toml`. Artifact
 is set to lazy load. Uses `ArtifactUtils`.
 
-file should contain: `people.tab` `households.tab` `README.md`, all top-level
+main data files should contain: `people.tab` `households.tab` `README.md`, all top-level
+other files can contain anything.
 
 """
 function make_artifact(;
    artifact_name :: AbstractString,
+   is_local :: Bool,
    toml_file = "Artifacts.toml" )::Int 
    gzip_file_name = "$(artifact_name).tar.gz"
    dir = "/mnt/data/ScotBen/artifacts/"
-   artifact_server_upload = @load_preference( "artifact_server_upload" )
-   artifact_server_url = @load_preference( "artifact_server_url" )
+   if is_local 
+      artifact_server_upload = @load_preference( "public-artifact_server_upload" )
+      artifact_server_url = @load_preference( "public-artifact_server_url" )
+   else
+      artifact_server_upload = @load_preference( "local-artifact_server_upload" )
+      artifact_server_url = @load_preference( "local-artifact_server_url" )
+   end
    tarcmd = `tar zcvf $(dir)/tmp/$(gzip_file_name) -C $(dir)/$(artifact_name)/ .`
    run( tarcmd )
    dest = "$(artifact_server_upload)/$(gzip_file_name)"

diff --git a/src/WealthData.jl b/src/WealthData.jl
@@ -75,8 +75,12 @@ function init( settings :: Settings; reset = false )
     if(settings.wealth_method == matching) && (reset || (size(WEALTH_DATASET)[1] == 0 )) # needed but uninitialised
         global IND_MATCHING
         global WEALTH_DATASET
-        #IND_MATCHING = CSV.File( joinpath( artifact"uk-was-wealth", "matches.tab" )) |> DataFrame
-        #WEALTH_DATASET = CSV.File( joinpath( artifact"uk-was-wealth", "data.tab"); types=jam_on_float ) |> DataFrame
+        w_artifact = RunSettings.get_artifact(; 
+            name="wealth", 
+            source=settings.data_source == SyntheticSource ? "synthetic" : "was", 
+            scottish=settings.target_nation == N_Scotland )
+        IND_MATCHING = CSV.File( joinpath( w_artifact, "matches.tab" )) |> DataFrame
+        WEALTH_DATASET = CSV.File( joinpath( w_artifact, "data.tab"); types=jam_on_float ) |> DataFrame
         uprate_raw_wealth()
         println( WEALTH_DATASET[1:2,:])
     end

diff --git a/test/wealth_tests.jl b/test/wealth_tests.jl
@@ -108,11 +108,3 @@ end
         println( "hres.bhc_net_income=$(hres.bhc_net_income)" )
     end
 end
-
-@testset "Wealth Imputation Direct" begin
-    settings = Settings() # scotland
-    mps = CSV.File( joinpath( data_dir( settings ), settings.people_name*".tab")) |>DataFrame
-
-
-
-end