Skip to content

Commit

Permalink
artifact stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
grahamstark committed Nov 27, 2024
1 parent d432421 commit d242ad2
Show file tree
Hide file tree
Showing 12 changed files with 91 additions and 150 deletions.
16 changes: 8 additions & 8 deletions Artifacts.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ lazy = true

[[disability.download]]
sha256 = "7ed1e796fc2e09a4c48f772ea3cd06b7c6aab721c183f6905b2fa83890d0184b"
url = "https://virtual-worlds.biz/artifacts//disability.tar.gz"
url = "http://stb-artifacts/disability.tar.gz"

[exampledata]
git-tree-sha1 = "7853136d3530ea531e4f45a29f8fcef81d08fc1c"
Expand All @@ -28,23 +28,23 @@ lazy = true

[[legalaid.download]]
sha256 = "fb9a643138fea7669fceda20ef9b4f65b2646d2e8c27d4a0ecbb7306908cb3a9"
url = "https://virtual-worlds.biz/artifacts//legalaid.tar.gz"
url = "http://stb-artifacts/legalaid.tar.gz"

[scottish-frs-data]
git-tree-sha1 = "9c5c1e8a29e8f82d861100579cee3e3e49cae464"
lazy = true

[[scottish-frs-data.download]]
sha256 = "5bc1607882203127bd0c13ddeeb22deb0f51156cd65aa9f4c14af3c1df175ef2"
url = "https://virtual-worlds.biz/artifacts//model-data-scotland-2015-2021-v2.tar.gz"
url = "http://stb-artifacts/model-data-scotland-2015-2021-v2.tar.gz"

[scottish-lcf-expenditure]
git-tree-sha1 = "bf18d73b3f48bea065528bf63e43e522f85f5e4b"
lazy = true

[[scottish-lcf-expenditure.download]]
sha256 = "66231a2f9d4cf75d373812e6969e40ffe1dd386dc0b8c577aa3ed67f75e4cc1c"
url = "https://virtual-worlds.biz/artifacts//scottish-lcf-expenditure.tar.gz"
url = "http://stb-artifacts/scottish-lcf-expenditure.tar.gz"

[scottish-synthetic-data]
git-tree-sha1 = "2bcdf3e7a62bb4bcd282f05008adbe6b63228ab9"
Expand Down Expand Up @@ -76,23 +76,23 @@ lazy = true

[[scottish-was-wealth.download]]
sha256 = "6a6f5a25f22a102bb0b3e770a874b54d19407a1bafbdd6bd7956477c1204fb46"
url = "https://virtual-worlds.biz/artifacts//scottish-was-wealth.tar.gz"
url = "http://stb-artifacts/scottish-was-wealth.tar.gz"

[uk-frs-data]
git-tree-sha1 = "8891ef02ea5dada4ec097ce0a183502cfafb995f"
lazy = true

[[uk-frs-data.download]]
sha256 = "8e3efc98a687441085be9ef5e53b663c2bd9abf6e300efa2dd6edb29993d8777"
url = "https://virtual-worlds.biz/artifacts//model-data-2021-2021-v2.tar.gz"
url = "http://stb-artifacts/model-data-2021-2021-v2.tar.gz"

[uk-lcf-expenditure]
git-tree-sha1 = "aac0e469d51d53ae54dcc2aa54af2d4f2d72a03a"
lazy = true

[[uk-lcf-expenditure.download]]
sha256 = "a2888101e82837d3cf9852fc35f66a36372d579b5660bf5cbf7a70cf11777187"
url = "https://virtual-worlds.biz/artifacts//uk-lcf-expenditure.tar.gz"
url = "http://stb-artifacts/uk-lcf-expenditure.tar.gz"

[uk-synthetic-data]
git-tree-sha1 = "2bcdf3e7a62bb4bcd282f05008adbe6b63228ab9"
Expand Down Expand Up @@ -124,4 +124,4 @@ lazy = true

[[uk-was-wealth.download]]
sha256 = "d33b1c51eadb5f4bf8278f716a60b0a37ea7af66b8fe56dca045d50f7674d94f"
url = "https://virtual-worlds.biz/artifacts//uk-was-wealth.tar.gz"
url = "http://stb-artifacts/uk-was-wealth.tar.gz"
8 changes: 6 additions & 2 deletions LocalPreferences.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
[preferences.ScottishTaxBenefitModel]
artifact_server_upload = "[email protected]:public_html/artifacts/"
artifact_server_url = "https://virtual-worlds.biz/artifacts/"

public-artifact_server_upload = "[email protected]:public_html/artifacts/"
public-artifact_server_url = "https://virtual-worlds.biz/artifacts/"
local-artifact_server_upload = "stb-artifacts:"
local-artifact_server_url = "http://stb-artifacts/"

auto_weight = true
default_run_name = "default_run"
use_weighting = true
Expand Down
7 changes: 5 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,11 @@ julia = "1"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[preferences.ScottishTaxBenefitModel]
artifact_server_upload = "[email protected]:public_html/artifacts/"
artifact_server_url = "https://virtual-worlds.biz/artifacts/"
public-artifact_server_upload = "[email protected]:public_html/artifacts/"
public-artifact_server_url = "https://virtual-worlds.biz/artifacts/"
local-artifact_server_upload = "stb-artifacts:"
local-artifact_server_url = "http://stb-artifacts/"

auto_weight = true
default_run_name = "default_run"
do_marginal_rates = false
Expand Down
72 changes: 0 additions & 72 deletions docs/modelled-taxes-and-benfits.md

This file was deleted.

11 changes: 8 additions & 3 deletions src/ConsumptionData.jl
Original file line number Diff line number Diff line change
Expand Up @@ -350,9 +350,14 @@ function init( settings :: Settings; reset = false )
global IND_MATCHING
global EXPENDITURE_DATASET
global FACTOR_COST_DATASET
IND_MATCHING = CSV.File( joinpath( artifact"uk-lcf-expenditure", "matches.tab" )) |> DataFrame
EXPENDITURE_DATASET = CSV.File( joinpath( artifact"uk-lcf-expenditure", "dataset.tab")) |> DataFrame
FACTOR_COST_DATASET = CSV.File( joinpath( artifact"uk-lcf-expenditure", "dataset.tab" )) |> DataFrame
c_artifact = RunSettings.get_artifact(;
name="expenditure",
source=settings.data_source == SyntheticSource ? "synthetic" : "lcf",
scottish=settings.target_nation == N_Scotland )

IND_MATCHING = CSV.File( joinpath( c_artifact, "matches.tab" )) |> DataFrame
EXPENDITURE_DATASET = CSV.File( joinpath( c_artifact, "dataset.tab")) |> DataFrame
FACTOR_COST_DATASET = CSV.File( joinpath( c_artifact, "dataset.tab" )) |> DataFrame
println( EXPENDITURE_DATASET[1:2,:])
uprate_expenditure( settings )
end
Expand Down
87 changes: 39 additions & 48 deletions src/LegalAidData.jl
Original file line number Diff line number Diff line change
Expand Up @@ -280,58 +280,49 @@ CIVIL_COSTS_GRP4 = DataFrame()
CIVIL_SUBJECTS = DataFrame()


function init()
function initialise()

CIVIL_COSTS = DataFrame()
AA_COSTS = DataFrame()
CIVIL_AWARDS = DataFrame()
global CIVIL_COSTS
global AA_COSTS
global CIVIL_AWARDS

CIVIL_AWARDS_GRP_NS = DataFrame()
CIVIL_AWARDS_GRP1 = DataFrame()
CIVIL_AWARDS_GRP2 = DataFrame()
CIVIL_AWARDS_GRP3 = DataFrame()
CIVIL_AWARDS_GRP4 = DataFrame()
CIVIL_COSTS_GRP_NS = DataFrame()

CIVIL_COSTS_GRP1 = DataFrame()
AA_COSTS_GRP1 = DataFrame()

CIVIL_COSTS_GRP2 = DataFrame()
CIVIL_COSTS_GRP3 = DataFrame()
CIVIL_COSTS_GRP4 = DataFrame()
CIVIL_SUBJECTS = DataFrame()


CIVIL_COSTS = load_costs( joinpath(artifact"legalaid", "civil-legal-aid-case-costs.tab" ))
AA_COSTS = load_aa_costs( joinpath( artifact"legalaid", "aa-case-costs.tab" ))
CIVIL_AWARDS = load_awards( joinpath( artifact"legalaid", "civil-applications.tab" ))

CIVIL_AWARDS_GRP_NS = groupby(CIVIL_AWARDS, [:hsm, :age2, :sex])
CIVIL_AWARDS_GRP1 = groupby(CIVIL_AWARDS, [:hsm])
CIVIL_AWARDS_GRP2 = groupby(CIVIL_AWARDS, [:hsm, :la_status])
CIVIL_AWARDS_GRP3 = groupby(CIVIL_AWARDS, [:hsm, :la_status, :sex])
CIVIL_AWARDS_GRP4 = groupby(CIVIL_AWARDS, [:hsm, :la_status,:age2, :sex])
CIVIL_COSTS_GRP_NS = groupby(CIVIL_COSTS, [:hsm, :age2, :sex])

CIVIL_COSTS_GRP1 = groupby(CIVIL_COSTS, [:hsm_censored])
AA_COSTS_GRP1 = groupby(AA_COSTS, [:hsm_censored])

CIVIL_COSTS_GRP2 = groupby(CIVIL_COSTS, [:hsm, :la_status])
CIVIL_COSTS_GRP3 = groupby(CIVIL_COSTS, [:hsm, :la_status, :sex])
CIVIL_COSTS_GRP4 = groupby(CIVIL_COSTS, [:hsm, :la_status, :age2, :sex])
CIVIL_SUBJECTS = sort(levels( CIVIL_AWARDS.hsm ))
global CIVIL_AWARDS_GRP_NS
global CIVIL_AWARDS_GRP1
global CIVIL_AWARDS_GRP2
global CIVIL_AWARDS_GRP3
global CIVIL_AWARDS_GRP4
global CIVIL_COSTS_GRP_NS

global CIVIL_COSTS_GRP1
global AA_COSTS_GRP1

global CIVIL_COSTS_GRP2
global CIVIL_COSTS_GRP3
global CIVIL_COSTS_GRP4
global CIVIL_SUBJECTS

if size( CIVIL_COSTS ) == (0,0) || size(AA_COSTS) == (0,0) || size(CIVIL_AWARDS) == (0.0)
CIVIL_COSTS = load_costs( joinpath(artifact"legalaid", "civil-legal-aid-case-costs.tab" ))
AA_COSTS = load_aa_costs( joinpath( artifact"legalaid", "aa-case-costs.tab" ))
CIVIL_AWARDS = load_awards( joinpath( artifact"legalaid", "civil-applications.tab" ))

CIVIL_AWARDS_GRP_NS = groupby(CIVIL_AWARDS, [:hsm, :age2, :sex])
CIVIL_AWARDS_GRP1 = groupby(CIVIL_AWARDS, [:hsm])
CIVIL_AWARDS_GRP2 = groupby(CIVIL_AWARDS, [:hsm, :la_status])
CIVIL_AWARDS_GRP3 = groupby(CIVIL_AWARDS, [:hsm, :la_status, :sex])
CIVIL_AWARDS_GRP4 = groupby(CIVIL_AWARDS, [:hsm, :la_status,:age2, :sex])
CIVIL_COSTS_GRP_NS = groupby(CIVIL_COSTS, [:hsm, :age2, :sex])

CIVIL_COSTS_GRP1 = groupby(CIVIL_COSTS, [:hsm_censored])
AA_COSTS_GRP1 = groupby(AA_COSTS, [:hsm_censored])

CIVIL_COSTS_GRP2 = groupby(CIVIL_COSTS, [:hsm, :la_status])
CIVIL_COSTS_GRP3 = groupby(CIVIL_COSTS, [:hsm, :la_status, :sex])
CIVIL_COSTS_GRP4 = groupby(CIVIL_COSTS, [:hsm, :la_status, :age2, :sex])
CIVIL_SUBJECTS = sort(levels( CIVIL_AWARDS.hsm ))
end
end

#=
psa = groupby(awards, [:hsm,:age_banded,:consolidatedsex])
k=(hsm = "Discrimination", age_banded = "5 - 9", consolidatedsex = "Male")
psa[k]
haskey(psa,k)
for( k, v ) in pairs( psa )
println( "k=$k ")
end
=#

function gcounts( gdf :: GroupedDataFrame )
kk = sort(keys(gdf))
for k in kk
Expand Down
1 change: 1 addition & 0 deletions src/RunSettings.jl
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ module RunSettings

# settings loaded automatically from the Project.toml section 'preferences.ScottishTaxBenefitModel'
# and maybe overwritten in LocalPreferences.toml
# FIXME clear out all the duplications of Scotland in this
@with_kw mutable struct Settings
uuid :: UUID = UUID("c2ae9c83-d24a-431c-b04f-74662d2ba07e")
uid :: Int = 1 # placeholder for maybe a user somewhere
Expand Down
3 changes: 3 additions & 0 deletions src/Runner.jl
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ module Runner
BenefitGenerosity.initialise( artifact"disability" )
end
end
if settings.do_legal_aid
LegalAidData.initialise()
end
full_results = Array{HouseholdResult}(undef,0,0)
# fixme if we have one are threads OK? I think yes
if settings.export_full_results
Expand Down
5 changes: 4 additions & 1 deletion src/Uprating.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ module Uprating
#
using DataFrames
using CSV
using Pkg,Pkg.Artifacts
using LazyArtifacts

using ScottishTaxBenefitModel
using .RunSettings
Expand Down Expand Up @@ -122,7 +124,8 @@ function load_prices( settings :: Settings, reload :: Bool = false )
return
end

upr = CSV.File("$(PRICES_DIR)/$(settings.prices_file)"; delim = '\t', comment = "#") |> DataFrame
upr = CSV.File(joinpath(artifact"augdata","indexes.tab"); delim = '\t', comment = "#") |> DataFrame

nrows = size(upr)[1]
ncols = size(upr)[2]
println( "read $nrows rows and $ncols cols ")
Expand Down
15 changes: 11 additions & 4 deletions src/Utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -85,20 +85,27 @@ function make_household_sample(
end

"""
Given a gzipped tar file in `tmp/` with some data, upload this to a server
Given a directory in `tmp/` with some data, make a gzipped tar file, upload this to a server
defined in Project.toml and add an entry to `Artifacts.toml`. Artifact
is set to lazy load. Uses `ArtifactUtils`.
file should contain: `people.tab` `households.tab` `README.md`, all top-level
main data files should contain: `people.tab` `households.tab` `README.md`, all top-level
other files can contain anything.
"""
function make_artifact(;
artifact_name :: AbstractString,
is_local :: Bool,
toml_file = "Artifacts.toml" )::Int
gzip_file_name = "$(artifact_name).tar.gz"
dir = "/mnt/data/ScotBen/artifacts/"
artifact_server_upload = @load_preference( "artifact_server_upload" )
artifact_server_url = @load_preference( "artifact_server_url" )
if is_local
artifact_server_upload = @load_preference( "public-artifact_server_upload" )
artifact_server_url = @load_preference( "public-artifact_server_url" )
else
artifact_server_upload = @load_preference( "local-artifact_server_upload" )
artifact_server_url = @load_preference( "local-artifact_server_url" )
end
tarcmd = `tar zcvf $(dir)/tmp/$(gzip_file_name) -C $(dir)/$(artifact_name)/ .`
run( tarcmd )
dest = "$(artifact_server_upload)/$(gzip_file_name)"
Expand Down
8 changes: 6 additions & 2 deletions src/WealthData.jl
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,12 @@ function init( settings :: Settings; reset = false )
if(settings.wealth_method == matching) && (reset || (size(WEALTH_DATASET)[1] == 0 )) # needed but uninitialised
global IND_MATCHING
global WEALTH_DATASET
#IND_MATCHING = CSV.File( joinpath( artifact"uk-was-wealth", "matches.tab" )) |> DataFrame
#WEALTH_DATASET = CSV.File( joinpath( artifact"uk-was-wealth", "data.tab"); types=jam_on_float ) |> DataFrame
w_artifact = RunSettings.get_artifact(;
name="wealth",
source=settings.data_source == SyntheticSource ? "synthetic" : "was",
scottish=settings.target_nation == N_Scotland )
IND_MATCHING = CSV.File( joinpath( w_artifact, "matches.tab" )) |> DataFrame
WEALTH_DATASET = CSV.File( joinpath( w_artifact, "data.tab"); types=jam_on_float ) |> DataFrame
uprate_raw_wealth()
println( WEALTH_DATASET[1:2,:])
end
Expand Down
8 changes: 0 additions & 8 deletions test/wealth_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,3 @@ end
println( "hres.bhc_net_income=$(hres.bhc_net_income)" )
end
end

@testset "Wealth Imputation Direct" begin
settings = Settings() # scotland
mps = CSV.File( joinpath( data_dir( settings ), settings.people_name*".tab")) |>DataFrame



end

0 comments on commit d242ad2

Please sign in to comment.