Skip to content

Commit

Permalink
ladata
Browse files Browse the repository at this point in the history
  • Loading branch information
grahamstark committed Nov 29, 2024
1 parent db3f5e7 commit 7cd198d
Show file tree
Hide file tree
Showing 2 changed files with 162 additions and 22 deletions.
123 changes: 123 additions & 0 deletions scripts/ct-calculations-draft.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
using ScottishTaxBenefitModel
using .RunSettings
using .ModelHousehold
using .STBParameters
using .Definitions

using CSV
using DataFrame

const DDIR = joinpath("/","mnt","data","ScotBen","data", "local", "local_targets_2024" )


"""
Very simple implementation of the CT scheme
note this doesn't include rebates apart from single
person rebate
"""
function l_calc_council_tax(
hh :: Household{RT},
intermed :: MTIntermediate,
ctsys :: CouncilTax{RT} ) :: RT where RT
ctres = zero(RT)
if hh.region != Wales
@assert hh.ct_band != Band_I # We're not Welsh
end
ctres = ctsys.band_d[hh.council]* ctsys.relativities[hh.ct_band]
if intermed.num_adults == 1
ctres *= (1-ctsys.single_person_discount)
end
## TODO disabled discounts. See CT note.
return ctres
end

function calculate_ct()
ctf = joinpath( DDIR, "council-tax-levels-scotland-24-25-edited.tab")
wf = joinpath( DDIR, "la-frs-weights-scotland-2024.tab")
settings = Settings()
ctrates = CSV.File( ctf ) |> DataFrame
ctrates.authority_code = Symbol.(ctrates.authority_code)
weights = CSV.File( wf ) |> DataFrame
band_ds = Dict{Symbol,Float64}()
p = 0
for r in eachrow(ctrates)
p += 1
if p > 1 # skip 1
band_ds[Symbol(r.authority_code)] = r.D
end
end
sys = get_default_system_for_fin_year(2024; scotland=true)
sys.loctax.ct.band_d = band_ds

time settings.num_households, settings.num_people, nhh2 = initialise( settings; reset=false )
# @time nhh, num_people, nhh2 = initialise( settings; reset=false )
num_las = size( ctrates )[1]
revs = DataFrame(
code=fill("", num_las),
ctrev = zeros(num_las),
average_wage=zeros(num_las),
average_se=zeros(num_las),
ft_jobs=zeros(num_las),
semp=zeros(num_las) )
p = 0
for code in ctrates.authority_code
localincometax = deepcopy( sys.it )
localincometax.non_savings_rates .+= 0.01
w = weights[!,code]
p += 1
band_d = ctrates[(ctrates.code .== code),:D][1]
ctrev = 0.0
average_wage = 0.0
average_se = 0.0
nearers = 0.0
nses = 0.0
for i in 1:nhh
hh = get_household(i)
hh.council = scode
hh.weight = w[i]
intermed = make_intermediate(
hh, sys.lmt.hours_limits,
sys.age_limits,
sys.child_limits )
ct1 = l_calc_council_tax( hh, intermed.hhint, band_d, sys.loctax.ct )
ct2 = l_calc_council_tax(
hh, intermed.hhint, sys.loctax.ct )
@assert ct1 ct2



for (pid,pers) in hh.people
if pers.employment_status in [
Full_time_Employee ]
# Part_time_Employee ]
nearers += w[i]
average_wage += (w[i]*pers.income[wages])
elseif pers.employment_status in [
Full_time_Self_Employed,
Part_time_Self_Employed]
average_se += pers.income[self_employment_income]*w[i]
nses += w[i]
end
end

ctrev += w[i]*ct2
end
average_se /= nses
average_wage /= nearers
revs.code[p] = code
revs.ctrev[p] = ctrev
revs.average_wage[p] = average_wage
revs.average_se[p] = average_se
revs.ft_jobs[p] = nearers
revs.semp[p] = nses
end
#=
for code in ctrates.code[2:end]
f = Formatting.format(revs[code],precision=0, commas=true)
println( "$code = $(f)")
end
=#

revs
end

61 changes: 39 additions & 22 deletions scripts/parse-scottish-census.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ using .RunSettings
using .Weighting
using SurveyDataWeighting

const DDIR = joinpath("/","mnt","data","ScotBen","data", "local", "local_targets_2024" )

function readc(filename::String)::Tuple
d = (CSV.File( filename; normalizenames=true, header=10, skipto=12)|>DataFrame)
if ismissing(d[1,2])
Expand All @@ -20,7 +22,7 @@ function readc(filename::String)::Tuple
end

function read_all()
fs = sort(readdir("."))
fs = sort(readdir( DDIR, join=true ))
n = 0
allfs = nothing
rows = 0
Expand All @@ -29,7 +31,7 @@ function read_all()
dfs = []
labels = DataFrame( filename=fill("",nfs), label=fill("",nfs), start=zeros(Int,nfs) )
for f in fs
if ! isnothing(match(r"^table.*.csv$",f))
if ! isnothing(match(r".*table.*.csv$",f))
n += 1
println( "on $f")
data, label, nms = readc(f)
Expand Down Expand Up @@ -57,7 +59,7 @@ end

allfs,labels,dfs = read_all()

const Authority_Codes = [
const authority_codes = [
:S12000033,
:S12000034,
:S12000041,
Expand Down Expand Up @@ -174,12 +176,6 @@ RENAMES = Dict(
"One_family_household_Couple_family" => "single_family",
"One_family_household_Lone_parent" => "single_parent",
"Other_household_types" => "multi_family",
#=
"economically_active_employee" => "employee"
"economically_active_self_employed" => "selfemp"
"economically_active_unemployed" => "unemployed"
"economically_inactive" => "inactive"
=#
"Managers_Directors_and_Senior_Officials" => "Soc_Managers_Directors_and_Senior_Officials",
"Professional_Occupations" => "Soc_Professional_Occupations",
"Associate_Professional_and_Technical_Occupations" => "Soc_Associate_Prof_and_Technical_Occupations",
Expand All @@ -198,7 +194,7 @@ RENAMES = Dict(
"Band_G" => "G",
"Band_H" => "H"])

ctbase=CSV.File("CTAXBASE+2024+-+Tables+-+Chargeable+Dwellings.csv",normalizenames=true)|>DataFrame
ctbase=CSV.File(joinpath( DDIR, "CTAXBASE+2024+-+Tables+-+Chargeable+Dwellings.csv"),normalizenames=true)|>DataFrame
allfs = hcat( allfs, ctbase; makeunique=true )

rename!( allfs, RENAMES )
Expand All @@ -215,10 +211,10 @@ allfs.Five_plus_people = allfs.Five_people +
allfs.Seven_people +
allfs.Eight_or_more_people
allfs.working = allfs.economically_active_employee + allfs.economically_active_self_employed
allfs.authortity_code = Authority_Codes
allfs.authority_code = authority_codes

CSV.write( "labels.tab", labels; delim='\t')
CSV.write( "allfs.tab", allfs; delim='\t' )
CSV.write( joinpath(DDIR,"labels.tab"), labels; delim='\t')
CSV.write( joinpath(DDIR,"allfs.tab"), allfs; delim='\t' )


const INCLUDE_OCCUP = true
Expand Down Expand Up @@ -536,8 +532,8 @@ function make_target_row_scotland_la!(
end
end

function make_target_list( alldata::DataFrame, council::AbstractString )::Vector
data = alldata[alldata.Authority .== council,:][1,:]
function make_target_list( alldata::DataFrame, council::Symbol )::Vector
data = alldata[alldata.authority_code .== council,:][1,:]
v = initialise_target_dataframe_scotland_la(1)[1,:] # a single row
if INCLUDE_HCOMP
# v.single_person = data.single_person
Expand Down Expand Up @@ -630,10 +626,10 @@ end
function weight_to_la(
settings :: Settings,
alldata :: DataFrame,
code :: AbstractString,
code :: Symbol,
num_households :: Int )
targets = make_target_list( alldata, code )
hhtotal = alldata[alldata.Authority .== code,:total_hhlds][1]
hhtotal = alldata[alldata.authority_code .== code,:total_hhlds][1]
println( "calculating for $code; hh total $hhtotal")
weights = generate_weights(
num_households;
Expand Down Expand Up @@ -680,12 +676,26 @@ dataset = t_make_target_dataset(
initialise_target_dataframe_scotland_la,
make_target_row_scotland_la! )
errors = []
const wides = Set(["Na h-Eileanan Siar"] ) #"Angus", "East Lothian", "East Renfrewshire", "Renfrewshire", "East Dunbartonshire", "North Ayrshire", "West Dunbartonshire", "Shetland Islands", "Orkney Islands", "Inverclyde", "Midlothian", "Argyll and Bute", "East Ayrshire", "Dundee City", "Na h-Eileanan Siar", "South Lanarkshire", "Clackmannanshire", "West Lothian", "Falkirk", "Moray", "South Ayrshire", "City of Edinburgh", "Aberdeenshire", "North Lanarkshire"])
const verywides = Set(["East Lothian", "Midlothian", "East Renfrewshire", "Argyll and Bute", "East Dunbartonshire"])
# s = Set()
const wides = Set([:S12000013] ) # h-Eileanan Siar""Angus", "East Lothian", "East Renfrewshire", "Renfrewshire", "East Dunbartonshire", "North Ayrshire", "West Dunbartonshire", "Shetland Islands", "Orkney Islands", "Inverclyde", "Midlothian", "Argyll and Bute", "East Ayrshire", "Dundee City", "Na h-Eileanan Siar", "South Lanarkshire", "Clackmannanshire", "West Lothian", "Falkirk", "Moray", "South Ayrshire", "City of Edinburgh", "Aberdeenshire", "North Lanarkshire"])
const verywides = Set([:S12000010, :S12000019, :S12000011, :S12000035, :S12000045] )
#"East Lothian", "Midlothian", "East Renfrewshire", "Argyll and Bute", "East Dunbartonshire"])
s = Set()
settings.lower_multiple = 0.01
settings.upper_multiple = 50.0
for code in allfs.Authority
settings.upper_multiple = 50.0

outweights = DataFrame()

outweights.data_year = zeros(Int,settings.num_households)
outweights.hid = zeros(BigInt,settings.num_households)
outweights.uhid = zeros(BigInt,settings.num_households)
for href in 1:settings.num_households
mhh = get_household( href )
outweights.uhid[href] = mhh.uhid
outweights.hid[href] = mhh.hid
outweights.data_year[href] = mhh.data_year
end

for code in allfs.authority_code
global errors, s, INCLUDE_EMPLOYMENT, INCLUDE_HH_SIZE
println( "on $code")
try
Expand All @@ -704,14 +714,21 @@ for code in allfs.Authority
end
w = weight_to_la( settings, allfs, code, settings.num_households )
println("OK")
outweights[!,code] = w
catch e
println( "error $e")
push!( errors, (; e, code ))
push!(s, code )
end

end

println( errors )
println(s)

CSV.write( joinpath( DDIR, "la-frs-weights-scotland-2024.tab"), outweights; delim='\t')

weights = CSV.File( joinpath( DDIR, "la-frs-weights-scotland-2024.tab") ) |> DataFrame



0 comments on commit 7cd198d

Please sign in to comment.