Skip to content

Commit

Permalink
Initial version of LA weights as module
Browse files Browse the repository at this point in the history
  • Loading branch information
grahamstark committed Dec 16, 2024
1 parent 8557c8b commit 367636d
Show file tree
Hide file tree
Showing 6 changed files with 96 additions and 27 deletions.
44 changes: 40 additions & 4 deletions scripts/create-scottish-la-weights.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ using .FRSHouseholdGetter
using .RunSettings
using .Weighting
using SurveyDataWeighting
using CSV
using StatsBase

const DDIR = joinpath("/","mnt","data","ScotBen","data", "local", "local_targets_2024" )

Expand Down Expand Up @@ -95,6 +97,37 @@ const authority_codes = [
:S92000003] # scotland


function summarise_dfs( data :: DataFrame, targets::DataFrameRow, household_total :: Number )::DataFrame
nms = Symbol.(names(targets))
nrows, ncols = size( data )
d = DataFrame()
scale = nrows / popn
initial_weights = Weights(ones(nrows)*household_total/rows)
for n in nms
d[n] = zeros(11)
v = summarystats(data[!,n], initial_weights)
d[1,n] = v.max
d[3,n] = v.mean
d[4,n] = v.median
d[5,n] = v.nmiss
d[6,n] = v.min
d[7,n] = v.nobs
d[8,n] = v.q25
d[9,n] = v.q75
d[10,n] = v.sd
d[11,n] = targets[n] / sum(data[!,n],initial_weights)
end
#=
max mean
median min
nmiss nobs
q25 q75
sd
=#
d
end



DROPS = [
"Authority_1",
Expand Down Expand Up @@ -216,7 +249,6 @@ allfs.authority_code = authority_codes
CSV.write( joinpath(DDIR,"labels.tab"), labels; delim='\t')
CSV.write( joinpath(DDIR,"allfs.tab"), allfs; delim='\t' )


const INCLUDE_OCCUP = true
const INCLUDE_HOUSING = true
const INCLUDE_BEDROOMS = true
Expand Down Expand Up @@ -629,6 +661,7 @@ function weight_to_la(
code :: Symbol,
num_households :: Int )
targets = make_target_list( alldata, code )

hhtotal = alldata[alldata.authority_code .== code,:total_hhlds][1]
println( "calculating for $code; hh total $hhtotal")
weights = generate_weights(
Expand All @@ -640,13 +673,15 @@ function weight_to_la(
targets = targets,
initialise_target_dataframe = initialise_target_dataframe_scotland_la,
make_target_row! = make_target_row_scotland_la! )
initial_weights( )

return weights
end

function t_make_target_dataset(
nhhlds :: Integer,
initialise_target_dataframe :: Function,
make_target_row! :: Function ) :: Matrix
make_target_row! :: Function ) :: Tuple
df :: DataFrame = initialise_target_dataframe( nhhlds )
for hno in 1:nhhlds
hh = FRSHouseholdGetter.get_household( hno )
Expand All @@ -665,13 +700,14 @@ function t_make_target_dataset(
for r in 1:nr
@assert sum(m[r,:] ) != 0 "all zero row $r"
end
return m
return m,df
end

settings = Settings()
@time settings.num_households, settings.num_people, nhh2 =
initialise( settings; reset=false )
dataset = t_make_target_dataset(
# initial version for checking
m, tdf = t_make_target_dataset(
settings.num_households,
initialise_target_dataframe_scotland_la,
make_target_row_scotland_la! )
Expand Down
1 change: 1 addition & 0 deletions src/Results.jl
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ module Results
work_allowance :: RT = zero(RT)
earnings_before_allowances :: RT = zero(RT)
earned_income :: RT = zero(RT)
untapered_earnings :: RT = zero(RT)
other_income :: RT = zero(RT)
tariff_income :: RT = zero(RT)
standard_allowance :: RT = zero(RT)
Expand Down
5 changes: 3 additions & 2 deletions src/UniversalCredit.jl
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,7 @@ function calc_uc_income(
earn = max( 0.0, earn-bur.uc.work_allowance)
end
earned_income = earn*uc.taper
return (; other_income=inc, earned_income=earned_income )
return (; other_income=inc, earned_income=earned_income, untapered_earnings = earn )
end


Expand All @@ -349,7 +349,8 @@ function calc_uc_income!(
uc :: UniversalCreditSys,
minwage :: MinimumWage )
benefit_unit_result.uc.other_income,
benefit_unit_result.uc.earned_income = calc_uc_income(
benefit_unit_result.uc.earned_income,
benefit_unit_result.uc.untapered_earnings = calc_uc_income(
benefit_unit_result,
benefit_unit,
intermed,
Expand Down
2 changes: 1 addition & 1 deletion src/Weighting.jl
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ function generate_weights(
hh = FRSHouseholdGetter.get_household( hno )
hh.weight = weights[hno]
end
return weights
return weights, data
end

end # package
4 changes: 3 additions & 1 deletion src/legal_aid_parameters.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ export Net_Or_Gross, net, gross
@enum Assessment_Period weekly monthly annualHistoric annualForward
export ContributionType, cont_proportion, cont_fixed
@enum ContributionType cont_proportion cont_fixed
export UCEarningsType, assessed_net_income, tapered_uc_earnings, full_uc_earnings
@enum UCEarningsType assessed_net_income tapered_uc_earnings full_uc_earnings

"""
needed because json (inf) isn't supported and typemax(somefloattype) == Inf
Expand Down Expand Up @@ -214,7 +216,7 @@ end
premia = zero_premia(RT)
uc_limit = zero(RT)
uc_limit_type :: UCLimitType = uc_no_limit
uc_use_earnings = false
uc_use_earnings :: UCEarningsType = assessed_net_income
include_mortgage_repayments = true
end

Expand Down
67 changes: 48 additions & 19 deletions src/targets/scotland-localities-2024.jl
Original file line number Diff line number Diff line change
@@ -1,26 +1,27 @@

const DDIR = joinpath("/","mnt","data","ScotBen","data", "local", "local_targets_2024" )

function read_census_file(filename::String)::Tuple
d = (CSV.File( filename; normalizenames=true, header=10, skipto=12)|>DataFrame)
if ismissing(d[1,2])
delete!( )
end
# if ismissing(d[1,2])
# delete!( )
# end
label = names(d)[1]
actuald = d[1:33,2:end]
nms = names(actuald)
rename!(actuald,1=>"Authority")
actuald, label, nms
end

function read_all_scot_2024()
fs = sort(readdir( DDIR, join=true ))
"""
Very, very ad-hoc code to munge together a bunch of Census Scotland datafiles into a
single dataframe.
"""
function read_all_scot_2024( file_dir :: AbstractString )::Tuple
fs = sort(reafile_dir( file_dir, join=true ))
n = 0
allfs = nothing
merged_census_files = nothing
rows = 0
cols = 0
nfs = length(fs)
dfs = []
individual_datasets = []
labels = DataFrame( filename=fill("",nfs), label=fill("",nfs), start=zeros(Int,nfs) )
for f in fs
if ! isnothing(match(r".*table.*.csv$",f))
Expand All @@ -34,24 +35,28 @@ function read_all_scot_2024()
labels.label[n]=label
labels.start[n]=cols+2
if n == 1
allfs = deepcopy( data )
merged_census_files = deepcopy( data )
else
n1 = String.(data[:,1])[1:8] # skip "Na hEileanan Siar", since it's sometimes edited
n2 = String.(allfs[:,1])[1:8]
n2 = String.(merged_census_files[:,1])[1:8]
@assert n1 == n2 "$(n1) !== $(n2)" # check in sync
allfs = hcat( allfs, data; makeunique=true )
rows,cols = size(allfs)
merged_census_files = hcat( merged_census_files, data; makeunique=true )
rows,cols = size(merged_census_files)
end
push!(dfs,data)
push!(individual_datasets,data)
# println( "label=$label")
end
end
allfs,labels[1:n,:],dfs
merged_census_files,labels[1:n,:],individual_datasets
end

"""
More ad-hoc code code to load Census Scotland files, clean them up and
add some constructed fields.
"""
function load_census_2024()

allfs,labels,dfs = read_all_scot_2024()
file_dir = joinpath("/","mnt","data","ScotBen","data", "local", "local_targets_2024" )
merged_census_files,labels,individual_datasets = read_all_scot_2024( file_dir )
# FIXME dup
authority_codes = [
:S12000033,
Expand Down Expand Up @@ -184,7 +189,31 @@ function load_census_2024()
"Band_F" => "F",
"Band_G" => "G",
"Band_H" => "H"])


merged_census_files,labels,individual_datasets = read_all_scot_2024()

ctbase=CSV.File(joinpath( file_dir, "CTAXBASE+2024+-+Tables+-+Chargeable+Dwellings.csv"),normalizenames=true)|>DataFrame
merged_census_files = hcat( merged_census_files, ctbase; makeunique=true )

rename!( merged_census_files, RENAMES )
select!( merged_census_files, Not(DROPS))
merged_census_files.total_cts = sum.(eachrow(merged_census_files[:,[:A,:B,:C,:D,:E,:F,:G,:H]]))

# merged columns
merged_census_files.private_rented_rent_free = merged_census_files.private_rented + merged_census_files.rent_free
merged_census_files.converted_flat = merged_census_files.converted_flat_1 + merged_census_files.converted_flat_2
merged_census_files.all_mortgaged = merged_census_files.mortgaged + merged_census_files.shared_ownership + merged_census_files.shared_equity
merged_census_files.bedrooms_4_plus = merged_census_files.bedrooms_4 + merged_census_files.bedrooms_5_plus
merged_census_files.Five_plus_people = merged_census_files.Five_people +
merged_census_files.Six_people +
merged_census_files.Seven_people +
merged_census_files.Eight_or_more_people
merged_census_files.working = merged_census_files.economically_active_employee + merged_census_files.economically_active_self_employed
merged_census_files.authority_code = authority_codes

CSV.write( joinpath(file_dir,"merged_census_labels_2024.tab"), labels; delim='\t')
CSV.write( joinpath(file_dir,"merged_census_files_2024.tab"), merged_census_files; delim='\t' )
return merged_census_files
end

function initialise_target_dataframe_scotland_la( n :: Integer ) :: DataFrame
Expand Down

0 comments on commit 367636d

Please sign in to comment.