Skip to content

Commit

Permalink
ladata
Browse files Browse the repository at this point in the history
  • Loading branch information
grahamstark committed Dec 9, 2024
1 parent 934738e commit 7ec23cb
Show file tree
Hide file tree
Showing 4 changed files with 238 additions and 155 deletions.
210 changes: 138 additions & 72 deletions src/FRSHouseholdGetter.jl
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ module FRSHouseholdGetter
pseqs :: Vector{Int}
end

struct HHWrapper
mutable struct HHWrapper
hhlds :: Vector{Household{Float64}}
# weight :: Vector{Float64}
dimensions :: Vector{Int}
Expand Down Expand Up @@ -96,7 +96,18 @@ module FRSHouseholdGetter
zeros(0))

function backup()
BACKUP_HOUSEHOLDS.hhlds = deepcopy( MODEL_HOUSEHOLDS.hhlds )
#= BACKUP_HOUSEHOLDS.hhlds = similar( MODEL_HOUSEHOLDS.hhlds )
if length( BACKUP_HOUSEHOLDS.hhlds ) == 0
for i in eachindex( MODEL_HOUSEHOLDS.hhlds )
push!(BACKUP_HOUSEHOLDS.hhlds, deepcopy( MODEL_HOUSEHOLDS.hhlds[i]))
end
else
for i in eachindex( MODEL_HOUSEHOLDS.hhlds )
BACKUP_HOUSEHOLDS.hhlds[i] = deepcopy( MODEL_HOUSEHOLDS.hhlds[i])
end
end
=#
BACKUP_HOUSEHOLDS.hhlds = deepcopy(MODEL_HOUSEHOLDS.hhlds)
BACKUP_HOUSEHOLDS.dimensions = deepcopy( MODEL_HOUSEHOLDS.dimensions )
BACKUP_HOUSEHOLDS.hh_map = deepcopy( MODEL_HOUSEHOLDS.hh_map )
BACKUP_HOUSEHOLDS.pers_map = deepcopy( MODEL_HOUSEHOLDS.pers_map )
Expand All @@ -105,10 +116,15 @@ module FRSHouseholdGetter
end

function restore()
MODEL_HOUSEHOLDS.hhlds = deepcopy( BACKUP_HOUSEHOLDS.hhlds )
#=
for i in eachindex( BACKUP_HOUSEHOLDS.hhlds )
MODEL_HOUSEHOLDS.hhlds[i] = deepcopy( BACKUP_HOUSEHOLDS.hhlds[i])
end
=#
MODEL_HOUSEHOLDS.hhlds = deepcopy( BACKUP_HOUSEHOLDS.hhlds )
MODEL_HOUSEHOLDS.dimensions = deepcopy( BACKUP_HOUSEHOLDS.dimensions )
MODEL_HOUSEHOLDS.hh_map = deepcopy( BACKUP_HOUSEHOLDS.hh_map )
MODEL_HOUSEHOLDS.pers_map = deepcopy( BACKUP_HOUSEHOLDS.pers_map )
MODEL_HOUSEHOLDS.hh_map = deepcopy( BACKUP_HOUSEHOLDS.hh_map )
MODEL_HOUSEHOLDS.pers_map = deepcopy( BACKUP_HOUSEHOLDS.pers_map )
MODEL_HOUSEHOLDS.data_years = deepcopy( BACKUP_HOUSEHOLDS.data_years )
MODEL_HOUSEHOLDS.interview_years = deepcopy( BACKUP_HOUSEHOLDS.interview_years )
end
Expand Down Expand Up @@ -193,19 +209,126 @@ module FRSHouseholdGetter
return size(sk)[1] == 0 # not in skiplist
end

function set_local_weights_and_incomes( settings::Settings, reset::Bool)

@with_kw mutable struct PopAndWage
popn=zeros(100_000)
wage=zeros(100_000)
mean = 0.0
median = 0.0
ratio = 1.0
sdev = 0.0
n=0
end

function addone!( pw :: PopAndWage, wage::Real, weight::Real )
pw.n += 1
pw.popn[pw.n] = weight
pw.wage[pw.n] = wage
end

function summarise!( pw :: PopAndWage;
sex :: Union{Sex,Nothing},
is_ft :: Union{Bool,Nothing},
ccode :: Symbol )
if pw.n > 0
pw.wage = pw.wage[1:pw.n]
pw.popn = pw.popn[1:pw.n]
pw.mean = mean( pw.wage, Weights(pw.popn))
pw.median = median( pw.wage, Weights(pw.popn))
pw.sdev = std( pw.wage, Weights(pw.popn))
pw.wage = zeros(0)
pw.popn = zeros(0)
wd = WeightingData.get_earnings_data(; sex = sex, is_ft = is_ft, council=ccode )
@show pw.mean
@show wd.Mean
if ! ismissing( wd.Mean )
pw.ratio = wd.Mean/pw.mean
end
@show pw.ratio
end
end

"""
Make dictionary of wages from the model to match to the LA NOMIS data from WeightingData.jl+
and use this to adjust average wages to match the ratio between the two.
"""
function fixup_earnings_data!(
settings :: Settings )::Dict
popns = Dict{String,PopAndWage}()
for sex in [Male,Female,nothing]
for is_ft in [true,false,nothing]
key = WeightingData.make_wage_key(; sex=sex, is_ft=is_ft)
popns[key] = PopAndWage()
end
end
# @show settings.num_households
for hno in 1:settings.num_households
hh = get_household(hno)
for (pid,ad) in hh.people
# @show ad
if ad.employment_status in [Full_time_Employee,
Part_time_Employee]
key = WeightingData.make_wage_key(; sex=ad.sex, is_ft=ad.employment_status == Full_time_Employee )
addone!( popns[key], ad.income[wages], hh.weight )
allsexk = WeightingData.make_wage_key(; is_ft=ad.employment_status == Full_time_Employee )
addone!( popns[allsexk], ad.income[wages], hh.weight )
allk = WeightingData.make_wage_key(; )
addone!( popns[allk], ad.income[wages], hh.weight )
# full time workers both sexes
allftk = WeightingData.make_wage_key(; sex = ad.sex )
addone!( popns[allftk], ad.income[wages], hh.weight )
end
end
end
for sex in [Male,Female]
for is_ft in [true,false]
key = WeightingData.make_wage_key(; sex=sex, is_ft=is_ft)
summarise!(popns[key]; sex = sex, is_ft = is_ft, ccode=settings.ccode )
println( "summarising $key ")
end
end
for hno in 1:settings.num_households
hh = get_household(hno)
for (pid,ad) in hh.people
ratio = 1.0
if ad.employment_status in [Full_time_Employee,
Full_time_Self_Employed]
key = WeightingData.make_wage_key(; sex=ad.sex, is_ft=true )
ratio = popns[key].ratio
elseif ad.employment_status in [Part_time_Employee,
Part_time_Self_Employed]
key = WeightingData.make_wage_key(; sex=ad.sex, is_ft=false )
ratio = popns[key].ratio
end
if haskey( ad.income, wages )
# @show ad.income[wages]
# @show ratio
ad.income[ wages ] *= ratio
end
if haskey( ad.income, self_employment_income )
ad.income[ self_employment_income ] *= ratio
end
end
end
return popns
end

function set_local_weights_and_incomes!( settings::Settings; reset::Bool)
# wsizes = WeightingData.init_local_weights( settings; reset=reset )
for hno in eachindex(MODEL_HOUSEHOLDS.hhlds)
MODEL_HOUSEHOLDS.hhlds[hno].council = settings.ccode
# ?? FIXME fixup nhs area??
WeightingData.set_weight!( MODEL_HOUSEHOLDS.hhlds[hno], settings )
end
ratios = fixup_earnings_data!( settings )
# .. something to summarise the ratios if very large
end

"""
Initialise the dataset. If this has already been done, do nothing unless
`reset` is true.
return (number of households available, num people loaded inc. kids, num hhls in dataset (should always = item[1]))
return (number of households available, num people loaded inc. kids, num hhls in dataset (should always = item[1]), and a valuw
that's nothing if not a local run, else the funny ratios dict)
"""
function initialise(
settings :: Settings;
Expand Down Expand Up @@ -286,17 +409,17 @@ module FRSHouseholdGetter
REG_DATA = create_regression_dataframe( hh_dataset, people_dataset )
# Save a copy of the dataset before we maybe mess with council weights
# and incomes.
backup()
# weights: national or local
if settings.do_local_run && (settings.ccode != :"")
set_local_weights_and_incomes( settings, reset )
else
WeightingData.init_national_weights( settings, reset=reset )
for hno in eachindex(MODEL_HOUSEHOLDS.hhlds)
WeightingData.set_weight!( MODEL_HOUSEHOLDS.hhlds[hno], settings )
end
lsummary = nothing
# if settings.do_local_run && (settings.ccode != :"")
# lsummary = set_local_weights_and_incomes!( settings, reset )
# else
WeightingData.init_national_weights( settings, reset=reset )
for hno in eachindex(MODEL_HOUSEHOLDS.hhlds)
WeightingData.set_weight!( MODEL_HOUSEHOLDS.hhlds[hno], settings )
end
fill_in_deciles!(settings)
backup()
return (MODEL_HOUSEHOLDS.dimensions...,)
end

Expand All @@ -320,63 +443,6 @@ module FRSHouseholdGetter
function get_regression_dataset()::DataFrame
return REG_DATA
end

@with_kw mutable struct PopAndWage
popn=zeros(100_000)
wage=zeros(100_000)
n=0
end

function addone!( pw :: PopAndWage, wage::Real, weight::Real )
pw.n += 1
pw.popn[pw.n] = weight
pw.wage[pw.n] = wage
end

function truncate!( pw :: PopAndWage )
if pw.n > 0
pw.wage = pw.wage[1:pw.n]
pw.popn = pw.popn[1:pw.n]
end
end

"""
A dictionary of wages from the model to match to the LA NOMIS data from WeightingData.jl+
"""
function get_mean_earnings(
settings :: Settings )::Dict
popns = Dict{String,PopAndWage}()
for sex in [Male,Female,nothing]
for is_ft in [true,false,nothing]
key = WeightingData.make_wage_key(; sex=sex, is_ft=is_ft)
popns[key] = PopAndWage()
end
end
for hno in 1:settings.num_households
hh = get_household(hno)
for (pid,ad) in hh.people
if ad.employment_status in [Full_time_Employee,
Part_time_Employee]
key = WeightingData.make_wage_key(; sex=ad.sex, is_ft=ad.employment_status == Full_time_Employee )
addone!( popns[key], ad.income[wages], hh.weight )
allsexk = WeightingData.make_wage_key(; is_ft=ad.employment_status == Full_time_Employee )
addone!( popns[allsexk], ad.income[wages], hh.weight )
allk = WeightingData.make_wage_key(; )
addone!( popns[allk], ad.income[wages], hh.weight )
# full time workers both sexes
allftk = WeightingData.make_wage_key(; sex = ad.sex )
addone!( popns[allftk], ad.income[wages], hh.weight )
end
end
end
for sex in [Male,Female,nothing]
for is_ft in [true,false,nothing]
key = WeightingData.make_wage_key(; sex=sex, is_ft=is_ft)
truncate!(popns[key])
end
end
return popns
end

"""
A vector of the data years in the actual data e.g.2014,2020 ..
Expand Down
78 changes: 1 addition & 77 deletions src/LocalLevelCalculations.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ using LazyArtifacts

using ScottishTaxBenefitModel
using .Definitions
using .WeightingData

using .ModelHousehold:
BenefitUnit,
Expand All @@ -38,90 +39,13 @@ using .Results:

export
LA_BRMA_MAP,
LA_CODES,
LA_NAMES,
LA_NAMES_TO_CCODES,
apply_rent_restrictions,
apply_size_criteria,
calc_council_tax,
calc_proportional_property_tax,
lookup,
make_la_to_brma_map

# la codes in order
const LA_CODES = [
:S12000033,
:S12000034,
:S12000041,
:S12000035,
:S12000036,
:S12000005,
:S12000006,
:S12000042,
:S12000008,
:S12000045,
:S12000010,
:S12000011,
:S12000014,
:S12000047,
:S12000049,
:S12000017,
:S12000018,
:S12000019,
:S12000020,
:S12000013,
:S12000021,
:S12000050,
:S12000023,
:S12000048,
:S12000038,
:S12000026,
:S12000027,
:S12000028,
:S12000029,
:S12000030,
:S12000039,
:S12000040
]

const LA_NAMES = Dict(
:S12000033 => "Aberdeen City",
:S12000034 => "Aberdeenshire",
:S12000041 => "Angus",
:S12000035 => "Argyll and Bute",
:S12000036 => "City of Edinburgh",
:S12000005 => "Clackmannanshire",
:S12000006 => "Dumfries and Galloway",
:S12000042 => "Dundee City",
:S12000008 => "East Ayrshire",
:S12000045 => "East Dunbartonshire",
:S12000010 => "East Lothian",
:S12000011 => "East Renfrewshire",
:S12000014 => "Falkirk",
:S12000047 => "Fife",
:S12000049 => "Glasgow City",
:S12000017 => "Highland",
:S12000018 => "Inverclyde",
:S12000019 => "Midlothian",
:S12000020 => "Moray",
:S12000013 => "Na h-Eileanan Siar",
:S12000021 => "North Ayrshire",
:S12000050 => "North Lanarkshire",
:S12000023 => "Orkney Islands",
:S12000048 => "Perth and Kinross",
:S12000038 => "Renfrewshire",
:S12000026 => "Scottish Borders",
:S12000027 => "Shetland Islands",
:S12000028 => "South Ayrshire",
:S12000029 => "South Lanarkshire",
:S12000030 => "Stirling",
:S12000039 => "West Dunbartonshire",
:S12000040 => "West Lothian"
)
# reverse lookup
const LA_NAMES_TO_CCODES = Dict( values(LA_NAMES) .=> keys(LA_NAMES))


# FIXME hard code this in
function make_la_to_brma_map()
lacsv = CSV.File( joinpath( artifact"augdata", "la_to_brma_approx_mappings.csv" )) |> DataFrame
Expand Down
Loading

0 comments on commit 7ec23cb

Please sign in to comment.