From d242ad2578faa9a5a9a7d78c8106f3010c37bd08 Mon Sep 17 00:00:00 2001 From: Graham Stark Date: Wed, 27 Nov 2024 17:23:38 +0000 Subject: [PATCH] artifact stuff --- Artifacts.toml | 16 +++--- LocalPreferences.toml | 8 ++- Project.toml | 7 ++- docs/modelled-taxes-and-benfits.md | 72 ------------------------- src/ConsumptionData.jl | 11 ++-- src/LegalAidData.jl | 87 ++++++++++++++---------------- src/RunSettings.jl | 1 + src/Runner.jl | 3 ++ src/Uprating.jl | 5 +- src/Utils.jl | 15 ++++-- src/WealthData.jl | 8 ++- test/wealth_tests.jl | 8 --- 12 files changed, 91 insertions(+), 150 deletions(-) delete mode 100644 docs/modelled-taxes-and-benfits.md diff --git a/Artifacts.toml b/Artifacts.toml index 8ec2953d..60fb44a6 100644 --- a/Artifacts.toml +++ b/Artifacts.toml @@ -12,7 +12,7 @@ lazy = true [[disability.download]] sha256 = "7ed1e796fc2e09a4c48f772ea3cd06b7c6aab721c183f6905b2fa83890d0184b" - url = "https://virtual-worlds.biz/artifacts//disability.tar.gz" + url = "http://stb-artifacts/disability.tar.gz" [exampledata] git-tree-sha1 = "7853136d3530ea531e4f45a29f8fcef81d08fc1c" @@ -28,7 +28,7 @@ lazy = true [[legalaid.download]] sha256 = "fb9a643138fea7669fceda20ef9b4f65b2646d2e8c27d4a0ecbb7306908cb3a9" - url = "https://virtual-worlds.biz/artifacts//legalaid.tar.gz" + url = "http://stb-artifacts/legalaid.tar.gz" [scottish-frs-data] git-tree-sha1 = "9c5c1e8a29e8f82d861100579cee3e3e49cae464" @@ -36,7 +36,7 @@ lazy = true [[scottish-frs-data.download]] sha256 = "5bc1607882203127bd0c13ddeeb22deb0f51156cd65aa9f4c14af3c1df175ef2" - url = "https://virtual-worlds.biz/artifacts//model-data-scotland-2015-2021-v2.tar.gz" + url = "http://stb-artifacts/model-data-scotland-2015-2021-v2.tar.gz" [scottish-lcf-expenditure] git-tree-sha1 = "bf18d73b3f48bea065528bf63e43e522f85f5e4b" @@ -44,7 +44,7 @@ lazy = true [[scottish-lcf-expenditure.download]] sha256 = "66231a2f9d4cf75d373812e6969e40ffe1dd386dc0b8c577aa3ed67f75e4cc1c" - url = "https://virtual-worlds.biz/artifacts//scottish-lcf-expenditure.tar.gz" + url = "http://stb-artifacts/scottish-lcf-expenditure.tar.gz" [scottish-synthetic-data] git-tree-sha1 = "2bcdf3e7a62bb4bcd282f05008adbe6b63228ab9" @@ -76,7 +76,7 @@ lazy = true [[scottish-was-wealth.download]] sha256 = "6a6f5a25f22a102bb0b3e770a874b54d19407a1bafbdd6bd7956477c1204fb46" - url = "https://virtual-worlds.biz/artifacts//scottish-was-wealth.tar.gz" + url = "http://stb-artifacts/scottish-was-wealth.tar.gz" [uk-frs-data] git-tree-sha1 = "8891ef02ea5dada4ec097ce0a183502cfafb995f" @@ -84,7 +84,7 @@ lazy = true [[uk-frs-data.download]] sha256 = "8e3efc98a687441085be9ef5e53b663c2bd9abf6e300efa2dd6edb29993d8777" - url = "https://virtual-worlds.biz/artifacts//model-data-2021-2021-v2.tar.gz" + url = "http://stb-artifacts/model-data-2021-2021-v2.tar.gz" [uk-lcf-expenditure] git-tree-sha1 = "aac0e469d51d53ae54dcc2aa54af2d4f2d72a03a" @@ -92,7 +92,7 @@ lazy = true [[uk-lcf-expenditure.download]] sha256 = "a2888101e82837d3cf9852fc35f66a36372d579b5660bf5cbf7a70cf11777187" - url = "https://virtual-worlds.biz/artifacts//uk-lcf-expenditure.tar.gz" + url = "http://stb-artifacts/uk-lcf-expenditure.tar.gz" [uk-synthetic-data] git-tree-sha1 = "2bcdf3e7a62bb4bcd282f05008adbe6b63228ab9" @@ -124,4 +124,4 @@ lazy = true [[uk-was-wealth.download]] sha256 = "d33b1c51eadb5f4bf8278f716a60b0a37ea7af66b8fe56dca045d50f7674d94f" - url = "https://virtual-worlds.biz/artifacts//uk-was-wealth.tar.gz" + url = "http://stb-artifacts/uk-was-wealth.tar.gz" diff --git a/LocalPreferences.toml b/LocalPreferences.toml index 697bffbe..a2ad7e6c 100644 --- a/LocalPreferences.toml +++ b/LocalPreferences.toml @@ -1,6 +1,10 @@ [preferences.ScottishTaxBenefitModel] -artifact_server_upload = "vw@virtual-worlds.biz:public_html/artifacts/" -artifact_server_url = "https://virtual-worlds.biz/artifacts/" + +public-artifact_server_upload = "vw@virtual-worlds.biz:public_html/artifacts/" +public-artifact_server_url = "https://virtual-worlds.biz/artifacts/" +local-artifact_server_upload = "stb-artifacts:" +local-artifact_server_url = "http://stb-artifacts/" + auto_weight = true default_run_name = "default_run" use_weighting = true diff --git a/Project.toml b/Project.toml index bb398672..eb56b556 100644 --- a/Project.toml +++ b/Project.toml @@ -113,8 +113,11 @@ julia = "1" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [preferences.ScottishTaxBenefitModel] -artifact_server_upload = "vw@virtual-worlds.biz:public_html/artifacts/" -artifact_server_url = "https://virtual-worlds.biz/artifacts/" +public-artifact_server_upload = "vw@virtual-worlds.biz:public_html/artifacts/" +public-artifact_server_url = "https://virtual-worlds.biz/artifacts/" +local-artifact_server_upload = "stb-artifacts:" +local-artifact_server_url = "http://stb-artifacts/" + auto_weight = true default_run_name = "default_run" do_marginal_rates = false diff --git a/docs/modelled-taxes-and-benfits.md b/docs/modelled-taxes-and-benfits.md deleted file mode 100644 index 8becbc01..00000000 --- a/docs/modelled-taxes-and-benfits.md +++ /dev/null @@ -1,72 +0,0 @@ -# Scotben Modelled Taxes - -as of 13/11/2024 - -* Income Tax (Scottish and reserved UK); -* National Insurance - Employees, Self Employed and Employers (though this needs more thought on incidence); -* Council Tax, plus some simple modelling of local income taxes and domestic rates from the Plaid study -* Wealth Taxes, using WAS data; -* VAT and excise duties using LCF data; -* incidence of essentially any tax incident on wages. - -## Modelled benefits - -### Non Means-Tested - -* attendances allowance -* child benefit -* dla -* carers benefits -* pip -* esa -* jsa -* pensions -* bereavement -* widows pension -* maternity -* smp - -Note: the Scottish disability benefits: - -* Carer’s Allowance Supplement -* Carer Support Payment -* Adult Disability Payment -* Child Disability Payment -* Pension Age Disability Payment - - are modelled as being equivalent to the rUK benefits, though a mechanism exists to make the disability tests more or less generous. - -### Means-Tested - -* Universal Credit - -#### Legacy Benefits - -* savings credit/pension credit -* working tax credit -* child tax credit -* Housing Benefit -* council tax reductions (modeled quite crudely) - -## Others - -* Minimum Wages -* Scottish Civil Legal Aid - -## Hypothetical Benefits - -* Basic Incomes - -## Not currently modelled - -* Any form of Student Support; -* Student loans and repayments (working on repayments ATM) -* Food banks or similar; -* Foster Care payments -* Scottish Best Start payments -* Child Winter Heating Payment -* Winter Heating Payment -* Funeral Support Payment -* Job Start Payment -* any ocal authority-specific payments -* Young Carer Grant \ No newline at end of file diff --git a/src/ConsumptionData.jl b/src/ConsumptionData.jl index b0177994..d760633c 100644 --- a/src/ConsumptionData.jl +++ b/src/ConsumptionData.jl @@ -350,9 +350,14 @@ function init( settings :: Settings; reset = false ) global IND_MATCHING global EXPENDITURE_DATASET global FACTOR_COST_DATASET - IND_MATCHING = CSV.File( joinpath( artifact"uk-lcf-expenditure", "matches.tab" )) |> DataFrame - EXPENDITURE_DATASET = CSV.File( joinpath( artifact"uk-lcf-expenditure", "dataset.tab")) |> DataFrame - FACTOR_COST_DATASET = CSV.File( joinpath( artifact"uk-lcf-expenditure", "dataset.tab" )) |> DataFrame + c_artifact = RunSettings.get_artifact(; + name="expenditure", + source=settings.data_source == SyntheticSource ? "synthetic" : "lcf", + scottish=settings.target_nation == N_Scotland ) + + IND_MATCHING = CSV.File( joinpath( c_artifact, "matches.tab" )) |> DataFrame + EXPENDITURE_DATASET = CSV.File( joinpath( c_artifact, "dataset.tab")) |> DataFrame + FACTOR_COST_DATASET = CSV.File( joinpath( c_artifact, "dataset.tab" )) |> DataFrame println( EXPENDITURE_DATASET[1:2,:]) uprate_expenditure( settings ) end diff --git a/src/LegalAidData.jl b/src/LegalAidData.jl index f06e4e41..f1fa930b 100644 --- a/src/LegalAidData.jl +++ b/src/LegalAidData.jl @@ -280,58 +280,49 @@ CIVIL_COSTS_GRP4 = DataFrame() CIVIL_SUBJECTS = DataFrame() -function init() +function initialise() - CIVIL_COSTS = DataFrame() - AA_COSTS = DataFrame() - CIVIL_AWARDS = DataFrame() + global CIVIL_COSTS + global AA_COSTS + global CIVIL_AWARDS - CIVIL_AWARDS_GRP_NS = DataFrame() - CIVIL_AWARDS_GRP1 = DataFrame() - CIVIL_AWARDS_GRP2 = DataFrame() - CIVIL_AWARDS_GRP3 = DataFrame() - CIVIL_AWARDS_GRP4 = DataFrame() - CIVIL_COSTS_GRP_NS = DataFrame() - - CIVIL_COSTS_GRP1 = DataFrame() - AA_COSTS_GRP1 = DataFrame() - - CIVIL_COSTS_GRP2 = DataFrame() - CIVIL_COSTS_GRP3 = DataFrame() - CIVIL_COSTS_GRP4 = DataFrame() - CIVIL_SUBJECTS = DataFrame() - - - CIVIL_COSTS = load_costs( joinpath(artifact"legalaid", "civil-legal-aid-case-costs.tab" )) - AA_COSTS = load_aa_costs( joinpath( artifact"legalaid", "aa-case-costs.tab" )) - CIVIL_AWARDS = load_awards( joinpath( artifact"legalaid", "civil-applications.tab" )) - - CIVIL_AWARDS_GRP_NS = groupby(CIVIL_AWARDS, [:hsm, :age2, :sex]) - CIVIL_AWARDS_GRP1 = groupby(CIVIL_AWARDS, [:hsm]) - CIVIL_AWARDS_GRP2 = groupby(CIVIL_AWARDS, [:hsm, :la_status]) - CIVIL_AWARDS_GRP3 = groupby(CIVIL_AWARDS, [:hsm, :la_status, :sex]) - CIVIL_AWARDS_GRP4 = groupby(CIVIL_AWARDS, [:hsm, :la_status,:age2, :sex]) - CIVIL_COSTS_GRP_NS = groupby(CIVIL_COSTS, [:hsm, :age2, :sex]) - - CIVIL_COSTS_GRP1 = groupby(CIVIL_COSTS, [:hsm_censored]) - AA_COSTS_GRP1 = groupby(AA_COSTS, [:hsm_censored]) - - CIVIL_COSTS_GRP2 = groupby(CIVIL_COSTS, [:hsm, :la_status]) - CIVIL_COSTS_GRP3 = groupby(CIVIL_COSTS, [:hsm, :la_status, :sex]) - CIVIL_COSTS_GRP4 = groupby(CIVIL_COSTS, [:hsm, :la_status, :age2, :sex]) - CIVIL_SUBJECTS = sort(levels( CIVIL_AWARDS.hsm )) + global CIVIL_AWARDS_GRP_NS + global CIVIL_AWARDS_GRP1 + global CIVIL_AWARDS_GRP2 + global CIVIL_AWARDS_GRP3 + global CIVIL_AWARDS_GRP4 + global CIVIL_COSTS_GRP_NS + + global CIVIL_COSTS_GRP1 + global AA_COSTS_GRP1 + + global CIVIL_COSTS_GRP2 + global CIVIL_COSTS_GRP3 + global CIVIL_COSTS_GRP4 + global CIVIL_SUBJECTS + + if size( CIVIL_COSTS ) == (0,0) || size(AA_COSTS) == (0,0) || size(CIVIL_AWARDS) == (0.0) + CIVIL_COSTS = load_costs( joinpath(artifact"legalaid", "civil-legal-aid-case-costs.tab" )) + AA_COSTS = load_aa_costs( joinpath( artifact"legalaid", "aa-case-costs.tab" )) + CIVIL_AWARDS = load_awards( joinpath( artifact"legalaid", "civil-applications.tab" )) + + CIVIL_AWARDS_GRP_NS = groupby(CIVIL_AWARDS, [:hsm, :age2, :sex]) + CIVIL_AWARDS_GRP1 = groupby(CIVIL_AWARDS, [:hsm]) + CIVIL_AWARDS_GRP2 = groupby(CIVIL_AWARDS, [:hsm, :la_status]) + CIVIL_AWARDS_GRP3 = groupby(CIVIL_AWARDS, [:hsm, :la_status, :sex]) + CIVIL_AWARDS_GRP4 = groupby(CIVIL_AWARDS, [:hsm, :la_status,:age2, :sex]) + CIVIL_COSTS_GRP_NS = groupby(CIVIL_COSTS, [:hsm, :age2, :sex]) + + CIVIL_COSTS_GRP1 = groupby(CIVIL_COSTS, [:hsm_censored]) + AA_COSTS_GRP1 = groupby(AA_COSTS, [:hsm_censored]) + + CIVIL_COSTS_GRP2 = groupby(CIVIL_COSTS, [:hsm, :la_status]) + CIVIL_COSTS_GRP3 = groupby(CIVIL_COSTS, [:hsm, :la_status, :sex]) + CIVIL_COSTS_GRP4 = groupby(CIVIL_COSTS, [:hsm, :la_status, :age2, :sex]) + CIVIL_SUBJECTS = sort(levels( CIVIL_AWARDS.hsm )) + end end -#= - psa = groupby(awards, [:hsm,:age_banded,:consolidatedsex]) - k=(hsm = "Discrimination", age_banded = "5 - 9", consolidatedsex = "Male") - psa[k] - haskey(psa,k) - for( k, v ) in pairs( psa ) - println( "k=$k ") - end -=# - function gcounts( gdf :: GroupedDataFrame ) kk = sort(keys(gdf)) for k in kk diff --git a/src/RunSettings.jl b/src/RunSettings.jl index ff4b732d..e2c36006 100644 --- a/src/RunSettings.jl +++ b/src/RunSettings.jl @@ -83,6 +83,7 @@ module RunSettings # settings loaded automatically from the Project.toml section 'preferences.ScottishTaxBenefitModel' # and maybe overwritten in LocalPreferences.toml + # FIXME clear out all the duplications of Scotland in this @with_kw mutable struct Settings uuid :: UUID = UUID("c2ae9c83-d24a-431c-b04f-74662d2ba07e") uid :: Int = 1 # placeholder for maybe a user somewhere diff --git a/src/Runner.jl b/src/Runner.jl index 5494ab87..05563681 100644 --- a/src/Runner.jl +++ b/src/Runner.jl @@ -80,6 +80,9 @@ module Runner BenefitGenerosity.initialise( artifact"disability" ) end end + if settings.do_legal_aid + LegalAidData.initialise() + end full_results = Array{HouseholdResult}(undef,0,0) # fixme if we have one are threads OK? I think yes if settings.export_full_results diff --git a/src/Uprating.jl b/src/Uprating.jl index aafaf9eb..51b90d3b 100644 --- a/src/Uprating.jl +++ b/src/Uprating.jl @@ -7,6 +7,8 @@ module Uprating # using DataFrames using CSV +using Pkg,Pkg.Artifacts +using LazyArtifacts using ScottishTaxBenefitModel using .RunSettings @@ -122,7 +124,8 @@ function load_prices( settings :: Settings, reload :: Bool = false ) return end - upr = CSV.File("$(PRICES_DIR)/$(settings.prices_file)"; delim = '\t', comment = "#") |> DataFrame + upr = CSV.File(joinpath(artifact"augdata","indexes.tab"); delim = '\t', comment = "#") |> DataFrame + nrows = size(upr)[1] ncols = size(upr)[2] println( "read $nrows rows and $ncols cols ") diff --git a/src/Utils.jl b/src/Utils.jl index d6efdeb1..82f141ad 100644 --- a/src/Utils.jl +++ b/src/Utils.jl @@ -85,20 +85,27 @@ function make_household_sample( end """ -Given a gzipped tar file in `tmp/` with some data, upload this to a server +Given a directory in `tmp/` with some data, make a gzipped tar file, upload this to a server defined in Project.toml and add an entry to `Artifacts.toml`. Artifact is set to lazy load. Uses `ArtifactUtils`. -file should contain: `people.tab` `households.tab` `README.md`, all top-level +main data files should contain: `people.tab` `households.tab` `README.md`, all top-level +other files can contain anything. """ function make_artifact(; artifact_name :: AbstractString, + is_local :: Bool, toml_file = "Artifacts.toml" )::Int gzip_file_name = "$(artifact_name).tar.gz" dir = "/mnt/data/ScotBen/artifacts/" - artifact_server_upload = @load_preference( "artifact_server_upload" ) - artifact_server_url = @load_preference( "artifact_server_url" ) + if is_local + artifact_server_upload = @load_preference( "public-artifact_server_upload" ) + artifact_server_url = @load_preference( "public-artifact_server_url" ) + else + artifact_server_upload = @load_preference( "local-artifact_server_upload" ) + artifact_server_url = @load_preference( "local-artifact_server_url" ) + end tarcmd = `tar zcvf $(dir)/tmp/$(gzip_file_name) -C $(dir)/$(artifact_name)/ .` run( tarcmd ) dest = "$(artifact_server_upload)/$(gzip_file_name)" diff --git a/src/WealthData.jl b/src/WealthData.jl index 93f92efc..6d222342 100644 --- a/src/WealthData.jl +++ b/src/WealthData.jl @@ -75,8 +75,12 @@ function init( settings :: Settings; reset = false ) if(settings.wealth_method == matching) && (reset || (size(WEALTH_DATASET)[1] == 0 )) # needed but uninitialised global IND_MATCHING global WEALTH_DATASET - #IND_MATCHING = CSV.File( joinpath( artifact"uk-was-wealth", "matches.tab" )) |> DataFrame - #WEALTH_DATASET = CSV.File( joinpath( artifact"uk-was-wealth", "data.tab"); types=jam_on_float ) |> DataFrame + w_artifact = RunSettings.get_artifact(; + name="wealth", + source=settings.data_source == SyntheticSource ? "synthetic" : "was", + scottish=settings.target_nation == N_Scotland ) + IND_MATCHING = CSV.File( joinpath( w_artifact, "matches.tab" )) |> DataFrame + WEALTH_DATASET = CSV.File( joinpath( w_artifact, "data.tab"); types=jam_on_float ) |> DataFrame uprate_raw_wealth() println( WEALTH_DATASET[1:2,:]) end diff --git a/test/wealth_tests.jl b/test/wealth_tests.jl index 1354ef41..2b861231 100644 --- a/test/wealth_tests.jl +++ b/test/wealth_tests.jl @@ -108,11 +108,3 @@ end println( "hres.bhc_net_income=$(hres.bhc_net_income)" ) end end - -@testset "Wealth Imputation Direct" begin - settings = Settings() # scotland - mps = CSV.File( joinpath( data_dir( settings ), settings.people_name*".tab")) |>DataFrame - - - -end \ No newline at end of file