From d242ad2578faa9a5a9a7d78c8106f3010c37bd08 Mon Sep 17 00:00:00 2001
From: Graham Stark <graham.stark@virtual-worlds.biz>
Date: Wed, 27 Nov 2024 17:23:38 +0000
Subject: [PATCH] artifact stuff

---
 Artifacts.toml                     | 16 +++---
 LocalPreferences.toml              |  8 ++-
 Project.toml                       |  7 ++-
 docs/modelled-taxes-and-benfits.md | 72 -------------------------
 src/ConsumptionData.jl             | 11 ++--
 src/LegalAidData.jl                | 87 ++++++++++++++----------------
 src/RunSettings.jl                 |  1 +
 src/Runner.jl                      |  3 ++
 src/Uprating.jl                    |  5 +-
 src/Utils.jl                       | 15 ++++--
 src/WealthData.jl                  |  8 ++-
 test/wealth_tests.jl               |  8 ---
 12 files changed, 91 insertions(+), 150 deletions(-)
 delete mode 100644 docs/modelled-taxes-and-benfits.md

diff --git a/Artifacts.toml b/Artifacts.toml
index 8ec2953d..60fb44a6 100644
--- a/Artifacts.toml
+++ b/Artifacts.toml
@@ -12,7 +12,7 @@ lazy = true
 
     [[disability.download]]
     sha256 = "7ed1e796fc2e09a4c48f772ea3cd06b7c6aab721c183f6905b2fa83890d0184b"
-    url = "https://virtual-worlds.biz/artifacts//disability.tar.gz"
+    url = "http://stb-artifacts/disability.tar.gz"
 
 [exampledata]
 git-tree-sha1 = "7853136d3530ea531e4f45a29f8fcef81d08fc1c"
@@ -28,7 +28,7 @@ lazy = true
 
     [[legalaid.download]]
     sha256 = "fb9a643138fea7669fceda20ef9b4f65b2646d2e8c27d4a0ecbb7306908cb3a9"
-    url = "https://virtual-worlds.biz/artifacts//legalaid.tar.gz"
+    url = "http://stb-artifacts/legalaid.tar.gz"
 
 [scottish-frs-data]
 git-tree-sha1 = "9c5c1e8a29e8f82d861100579cee3e3e49cae464"
@@ -36,7 +36,7 @@ lazy = true
 
     [[scottish-frs-data.download]]
     sha256 = "5bc1607882203127bd0c13ddeeb22deb0f51156cd65aa9f4c14af3c1df175ef2"
-    url = "https://virtual-worlds.biz/artifacts//model-data-scotland-2015-2021-v2.tar.gz"
+    url = "http://stb-artifacts/model-data-scotland-2015-2021-v2.tar.gz"
 
 [scottish-lcf-expenditure]
 git-tree-sha1 = "bf18d73b3f48bea065528bf63e43e522f85f5e4b"
@@ -44,7 +44,7 @@ lazy = true
 
     [[scottish-lcf-expenditure.download]]
     sha256 = "66231a2f9d4cf75d373812e6969e40ffe1dd386dc0b8c577aa3ed67f75e4cc1c"
-    url = "https://virtual-worlds.biz/artifacts//scottish-lcf-expenditure.tar.gz"
+    url = "http://stb-artifacts/scottish-lcf-expenditure.tar.gz"
 
 [scottish-synthetic-data]
 git-tree-sha1 = "2bcdf3e7a62bb4bcd282f05008adbe6b63228ab9"
@@ -76,7 +76,7 @@ lazy = true
 
     [[scottish-was-wealth.download]]
     sha256 = "6a6f5a25f22a102bb0b3e770a874b54d19407a1bafbdd6bd7956477c1204fb46"
-    url = "https://virtual-worlds.biz/artifacts//scottish-was-wealth.tar.gz"
+    url = "http://stb-artifacts/scottish-was-wealth.tar.gz"
 
 [uk-frs-data]
 git-tree-sha1 = "8891ef02ea5dada4ec097ce0a183502cfafb995f"
@@ -84,7 +84,7 @@ lazy = true
 
     [[uk-frs-data.download]]
     sha256 = "8e3efc98a687441085be9ef5e53b663c2bd9abf6e300efa2dd6edb29993d8777"
-    url = "https://virtual-worlds.biz/artifacts//model-data-2021-2021-v2.tar.gz"
+    url = "http://stb-artifacts/model-data-2021-2021-v2.tar.gz"
 
 [uk-lcf-expenditure]
 git-tree-sha1 = "aac0e469d51d53ae54dcc2aa54af2d4f2d72a03a"
@@ -92,7 +92,7 @@ lazy = true
 
     [[uk-lcf-expenditure.download]]
     sha256 = "a2888101e82837d3cf9852fc35f66a36372d579b5660bf5cbf7a70cf11777187"
-    url = "https://virtual-worlds.biz/artifacts//uk-lcf-expenditure.tar.gz"
+    url = "http://stb-artifacts/uk-lcf-expenditure.tar.gz"
 
 [uk-synthetic-data]
 git-tree-sha1 = "2bcdf3e7a62bb4bcd282f05008adbe6b63228ab9"
@@ -124,4 +124,4 @@ lazy = true
 
     [[uk-was-wealth.download]]
     sha256 = "d33b1c51eadb5f4bf8278f716a60b0a37ea7af66b8fe56dca045d50f7674d94f"
-    url = "https://virtual-worlds.biz/artifacts//uk-was-wealth.tar.gz"
+    url = "http://stb-artifacts/uk-was-wealth.tar.gz"
diff --git a/LocalPreferences.toml b/LocalPreferences.toml
index 697bffbe..a2ad7e6c 100644
--- a/LocalPreferences.toml
+++ b/LocalPreferences.toml
@@ -1,6 +1,10 @@
 [preferences.ScottishTaxBenefitModel]
-artifact_server_upload = "vw@virtual-worlds.biz:public_html/artifacts/"
-artifact_server_url = "https://virtual-worlds.biz/artifacts/"
+
+public-artifact_server_upload = "vw@virtual-worlds.biz:public_html/artifacts/"
+public-artifact_server_url = "https://virtual-worlds.biz/artifacts/"
+local-artifact_server_upload = "stb-artifacts:"
+local-artifact_server_url = "http://stb-artifacts/"
+
 auto_weight = true
 default_run_name = "default_run"
 use_weighting = true
diff --git a/Project.toml b/Project.toml
index bb398672..eb56b556 100644
--- a/Project.toml
+++ b/Project.toml
@@ -113,8 +113,11 @@ julia = "1"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [preferences.ScottishTaxBenefitModel]
-artifact_server_upload = "vw@virtual-worlds.biz:public_html/artifacts/"
-artifact_server_url = "https://virtual-worlds.biz/artifacts/"
+public-artifact_server_upload = "vw@virtual-worlds.biz:public_html/artifacts/"
+public-artifact_server_url = "https://virtual-worlds.biz/artifacts/"
+local-artifact_server_upload = "stb-artifacts:"
+local-artifact_server_url = "http://stb-artifacts/"
+
 auto_weight = true
 default_run_name = "default_run"
 do_marginal_rates = false
diff --git a/docs/modelled-taxes-and-benfits.md b/docs/modelled-taxes-and-benfits.md
deleted file mode 100644
index 8becbc01..00000000
--- a/docs/modelled-taxes-and-benfits.md
+++ /dev/null
@@ -1,72 +0,0 @@
-# Scotben Modelled Taxes
-
-as of 13/11/2024
-
-* Income Tax (Scottish and reserved UK);
-* National Insurance - Employees, Self Employed and Employers (though this needs more thought on incidence);
-* Council Tax, plus some simple modelling of local income taxes and domestic rates from the Plaid study
-* Wealth Taxes, using WAS data;
-* VAT and excise duties using LCF data;
-* incidence of essentially any tax incident on wages.
-
-## Modelled benefits
-
-### Non Means-Tested
-
-* attendances allowance
-* child benefit
-* dla
-* carers benefits
-* pip
-* esa
-* jsa
-* pensions
-* bereavement
-* widows pension
-* maternity
-* smp 
-
-Note: the Scottish disability benefits:
-
-* Carer’s Allowance Supplement 
-* Carer Support Payment 
-* Adult Disability Payment 
-* Child Disability Payment 
-* Pension Age Disability Payment
-
- are modelled as being equivalent to the rUK benefits, though a mechanism exists to make the disability tests more or less generous.
-
-### Means-Tested
-
-* Universal Credit
-
-#### Legacy Benefits
-    
-* savings credit/pension credit
-* working tax credit
-* child tax credit
-* Housing Benefit
-* council tax reductions (modeled quite crudely)
-
-## Others
-
-* Minimum Wages
-* Scottish Civil Legal Aid
-
-## Hypothetical Benefits
-
-* Basic Incomes
-
-## Not currently modelled
-
-* Any form of Student Support;
-* Student loans and repayments (working on repayments ATM)
-* Food banks or similar;
-* Foster Care payments
-* Scottish Best Start payments
-* Child Winter Heating Payment
-* Winter Heating Payment
-* Funeral Support Payment
-* Job Start Payment
-* any ocal authority-specific payments
-* Young Carer Grant
\ No newline at end of file
diff --git a/src/ConsumptionData.jl b/src/ConsumptionData.jl
index b0177994..d760633c 100644
--- a/src/ConsumptionData.jl
+++ b/src/ConsumptionData.jl
@@ -350,9 +350,14 @@ function init( settings :: Settings; reset = false )
             global IND_MATCHING
             global EXPENDITURE_DATASET
             global FACTOR_COST_DATASET
-            IND_MATCHING = CSV.File( joinpath( artifact"uk-lcf-expenditure", "matches.tab" )) |> DataFrame
-            EXPENDITURE_DATASET = CSV.File( joinpath( artifact"uk-lcf-expenditure", "dataset.tab")) |> DataFrame
-            FACTOR_COST_DATASET = CSV.File( joinpath( artifact"uk-lcf-expenditure", "dataset.tab" )) |> DataFrame
+            c_artifact = RunSettings.get_artifact(; 
+                name="expenditure", 
+                source=settings.data_source == SyntheticSource ? "synthetic" : "lcf", 
+                scottish=settings.target_nation == N_Scotland )
+
+            IND_MATCHING = CSV.File( joinpath( c_artifact, "matches.tab" )) |> DataFrame
+            EXPENDITURE_DATASET = CSV.File( joinpath( c_artifact, "dataset.tab")) |> DataFrame
+            FACTOR_COST_DATASET = CSV.File( joinpath( c_artifact, "dataset.tab" )) |> DataFrame
             println( EXPENDITURE_DATASET[1:2,:])
             uprate_expenditure( settings )
         end
diff --git a/src/LegalAidData.jl b/src/LegalAidData.jl
index f06e4e41..f1fa930b 100644
--- a/src/LegalAidData.jl
+++ b/src/LegalAidData.jl
@@ -280,58 +280,49 @@ CIVIL_COSTS_GRP4 = DataFrame()
 CIVIL_SUBJECTS = DataFrame()
 
 
-function init()
+function initialise()
 
-    CIVIL_COSTS = DataFrame()
-    AA_COSTS = DataFrame()
-    CIVIL_AWARDS = DataFrame()
+    global CIVIL_COSTS
+    global AA_COSTS
+    global CIVIL_AWARDS
     
-    CIVIL_AWARDS_GRP_NS = DataFrame()
-    CIVIL_AWARDS_GRP1 = DataFrame()
-    CIVIL_AWARDS_GRP2 = DataFrame()
-    CIVIL_AWARDS_GRP3 = DataFrame()
-    CIVIL_AWARDS_GRP4 = DataFrame()
-    CIVIL_COSTS_GRP_NS = DataFrame()
-    
-    CIVIL_COSTS_GRP1 = DataFrame()
-    AA_COSTS_GRP1 = DataFrame()
-    
-    CIVIL_COSTS_GRP2 = DataFrame()
-    CIVIL_COSTS_GRP3 = DataFrame()
-    CIVIL_COSTS_GRP4 = DataFrame()
-    CIVIL_SUBJECTS = DataFrame()
-    
-
-   CIVIL_COSTS = load_costs( joinpath(artifact"legalaid", "civil-legal-aid-case-costs.tab" ))
-   AA_COSTS = load_aa_costs( joinpath( artifact"legalaid", "aa-case-costs.tab" ))
-   CIVIL_AWARDS = load_awards( joinpath( artifact"legalaid", "civil-applications.tab" ))
-
-   CIVIL_AWARDS_GRP_NS = groupby(CIVIL_AWARDS, [:hsm, :age2, :sex])
-   CIVIL_AWARDS_GRP1 = groupby(CIVIL_AWARDS, [:hsm])
-   CIVIL_AWARDS_GRP2 = groupby(CIVIL_AWARDS, [:hsm, :la_status])
-   CIVIL_AWARDS_GRP3 = groupby(CIVIL_AWARDS, [:hsm, :la_status, :sex])
-   CIVIL_AWARDS_GRP4 = groupby(CIVIL_AWARDS, [:hsm, :la_status,:age2, :sex])
-   CIVIL_COSTS_GRP_NS = groupby(CIVIL_COSTS, [:hsm, :age2, :sex])
-
-   CIVIL_COSTS_GRP1 = groupby(CIVIL_COSTS, [:hsm_censored])
-   AA_COSTS_GRP1 = groupby(AA_COSTS, [:hsm_censored])
-
-   CIVIL_COSTS_GRP2 = groupby(CIVIL_COSTS, [:hsm, :la_status])
-   CIVIL_COSTS_GRP3 = groupby(CIVIL_COSTS, [:hsm, :la_status, :sex])
-   CIVIL_COSTS_GRP4 = groupby(CIVIL_COSTS, [:hsm, :la_status, :age2, :sex])
-   CIVIL_SUBJECTS = sort(levels( CIVIL_AWARDS.hsm ))
+    global CIVIL_AWARDS_GRP_NS 
+    global CIVIL_AWARDS_GRP1 
+    global CIVIL_AWARDS_GRP2 
+    global CIVIL_AWARDS_GRP3 
+    global CIVIL_AWARDS_GRP4 
+    global CIVIL_COSTS_GRP_NS 
+        
+    global CIVIL_COSTS_GRP1 
+    global AA_COSTS_GRP1 
+        
+    global CIVIL_COSTS_GRP2 
+    global CIVIL_COSTS_GRP3 
+    global CIVIL_COSTS_GRP4 
+    global CIVIL_SUBJECTS 
+
+    if size( CIVIL_COSTS ) == (0,0) || size(AA_COSTS) == (0,0) || size(CIVIL_AWARDS) == (0.0)
+        CIVIL_COSTS = load_costs( joinpath(artifact"legalaid", "civil-legal-aid-case-costs.tab" ))
+        AA_COSTS = load_aa_costs( joinpath( artifact"legalaid", "aa-case-costs.tab" ))
+        CIVIL_AWARDS = load_awards( joinpath( artifact"legalaid", "civil-applications.tab" ))
+
+        CIVIL_AWARDS_GRP_NS = groupby(CIVIL_AWARDS, [:hsm, :age2, :sex])
+        CIVIL_AWARDS_GRP1 = groupby(CIVIL_AWARDS, [:hsm])
+        CIVIL_AWARDS_GRP2 = groupby(CIVIL_AWARDS, [:hsm, :la_status])
+        CIVIL_AWARDS_GRP3 = groupby(CIVIL_AWARDS, [:hsm, :la_status, :sex])
+        CIVIL_AWARDS_GRP4 = groupby(CIVIL_AWARDS, [:hsm, :la_status,:age2, :sex])
+        CIVIL_COSTS_GRP_NS = groupby(CIVIL_COSTS, [:hsm, :age2, :sex])
+
+        CIVIL_COSTS_GRP1 = groupby(CIVIL_COSTS, [:hsm_censored])
+        AA_COSTS_GRP1 = groupby(AA_COSTS, [:hsm_censored])
+
+        CIVIL_COSTS_GRP2 = groupby(CIVIL_COSTS, [:hsm, :la_status])
+        CIVIL_COSTS_GRP3 = groupby(CIVIL_COSTS, [:hsm, :la_status, :sex])
+        CIVIL_COSTS_GRP4 = groupby(CIVIL_COSTS, [:hsm, :la_status, :age2, :sex])
+        CIVIL_SUBJECTS = sort(levels( CIVIL_AWARDS.hsm ))
+    end
 end
 
-#= 
-  psa = groupby(awards, [:hsm,:age_banded,:consolidatedsex])
-  k=(hsm = "Discrimination", age_banded = "5 - 9", consolidatedsex = "Male")
-  psa[k]
-  haskey(psa,k)
-  for( k, v ) in pairs( psa )
-   println( "k=$k ")
-  end
-=#
-
 function gcounts( gdf :: GroupedDataFrame )
     kk = sort(keys(gdf))
     for k in kk
diff --git a/src/RunSettings.jl b/src/RunSettings.jl
index ff4b732d..e2c36006 100644
--- a/src/RunSettings.jl
+++ b/src/RunSettings.jl
@@ -83,6 +83,7 @@ module RunSettings
 
     # settings loaded automatically from the Project.toml section 'preferences.ScottishTaxBenefitModel' 
     # and maybe overwritten in LocalPreferences.toml
+    # FIXME clear out all the duplications of Scotland in this
     @with_kw mutable struct Settings
         uuid :: UUID = UUID("c2ae9c83-d24a-431c-b04f-74662d2ba07e")
         uid :: Int = 1 # placeholder for maybe a user somewhere
diff --git a/src/Runner.jl b/src/Runner.jl
index 5494ab87..05563681 100644
--- a/src/Runner.jl
+++ b/src/Runner.jl
@@ -80,6 +80,9 @@ module Runner
                 BenefitGenerosity.initialise( artifact"disability" )  
             end     
         end
+        if settings.do_legal_aid
+            LegalAidData.initialise()
+        end
         full_results = Array{HouseholdResult}(undef,0,0)
         # fixme if we have one are threads OK? I think yes
         if settings.export_full_results
diff --git a/src/Uprating.jl b/src/Uprating.jl
index aafaf9eb..51b90d3b 100644
--- a/src/Uprating.jl
+++ b/src/Uprating.jl
@@ -7,6 +7,8 @@ module Uprating
 #
 using DataFrames
 using CSV
+using Pkg,Pkg.Artifacts
+using LazyArtifacts
 
 using ScottishTaxBenefitModel
 using .RunSettings
@@ -122,7 +124,8 @@ function load_prices( settings :: Settings, reload :: Bool = false )
         return
     end
 
-    upr = CSV.File("$(PRICES_DIR)/$(settings.prices_file)"; delim = '\t', comment = "#") |> DataFrame
+    upr = CSV.File(joinpath(artifact"augdata","indexes.tab"); delim = '\t', comment = "#") |> DataFrame
+
     nrows = size(upr)[1]
     ncols = size(upr)[2]
     println( "read $nrows rows and $ncols cols ")
diff --git a/src/Utils.jl b/src/Utils.jl
index d6efdeb1..82f141ad 100644
--- a/src/Utils.jl
+++ b/src/Utils.jl
@@ -85,20 +85,27 @@ function make_household_sample(
 end
 
 """
-Given a gzipped tar file in `tmp/` with some data, upload this to a server 
+Given a directory in `tmp/` with some data, make a gzipped tar file, upload this to a server 
 defined in Project.toml and add an entry to `Artifacts.toml`. Artifact
 is set to lazy load. Uses `ArtifactUtils`.
 
-file should contain: `people.tab` `households.tab` `README.md`, all top-level
+main data files should contain: `people.tab` `households.tab` `README.md`, all top-level
+other files can contain anything.
 
 """
 function make_artifact(;
    artifact_name :: AbstractString,
+   is_local :: Bool,
    toml_file = "Artifacts.toml" )::Int 
    gzip_file_name = "$(artifact_name).tar.gz"
    dir = "/mnt/data/ScotBen/artifacts/"
-   artifact_server_upload = @load_preference( "artifact_server_upload" )
-   artifact_server_url = @load_preference( "artifact_server_url" )
+   if is_local 
+      artifact_server_upload = @load_preference( "public-artifact_server_upload" )
+      artifact_server_url = @load_preference( "public-artifact_server_url" )
+   else
+      artifact_server_upload = @load_preference( "local-artifact_server_upload" )
+      artifact_server_url = @load_preference( "local-artifact_server_url" )
+   end
    tarcmd = `tar zcvf $(dir)/tmp/$(gzip_file_name) -C $(dir)/$(artifact_name)/ .`
    run( tarcmd )
    dest = "$(artifact_server_upload)/$(gzip_file_name)"
diff --git a/src/WealthData.jl b/src/WealthData.jl
index 93f92efc..6d222342 100644
--- a/src/WealthData.jl
+++ b/src/WealthData.jl
@@ -75,8 +75,12 @@ function init( settings :: Settings; reset = false )
     if(settings.wealth_method == matching) && (reset || (size(WEALTH_DATASET)[1] == 0 )) # needed but uninitialised
         global IND_MATCHING
         global WEALTH_DATASET
-        #IND_MATCHING = CSV.File( joinpath( artifact"uk-was-wealth", "matches.tab" )) |> DataFrame
-        #WEALTH_DATASET = CSV.File( joinpath( artifact"uk-was-wealth", "data.tab"); types=jam_on_float ) |> DataFrame
+        w_artifact = RunSettings.get_artifact(; 
+            name="wealth", 
+            source=settings.data_source == SyntheticSource ? "synthetic" : "was", 
+            scottish=settings.target_nation == N_Scotland )
+        IND_MATCHING = CSV.File( joinpath( w_artifact, "matches.tab" )) |> DataFrame
+        WEALTH_DATASET = CSV.File( joinpath( w_artifact, "data.tab"); types=jam_on_float ) |> DataFrame
         uprate_raw_wealth()
         println( WEALTH_DATASET[1:2,:])
     end
diff --git a/test/wealth_tests.jl b/test/wealth_tests.jl
index 1354ef41..2b861231 100644
--- a/test/wealth_tests.jl
+++ b/test/wealth_tests.jl
@@ -108,11 +108,3 @@ end
         println( "hres.bhc_net_income=$(hres.bhc_net_income)" )
     end
 end
-
-@testset "Wealth Imputation Direct" begin
-    settings = Settings() # scotland
-    mps = CSV.File( joinpath( data_dir( settings ), settings.people_name*".tab")) |>DataFrame
-    
-
-
-end
\ No newline at end of file