ladata

grahamstark · Nov 29, 2024 · 7cd198d · 7cd198d
1 parent db3f5e7
commit 7cd198d
Show file tree

Hide file tree

Showing 2 changed files with 162 additions and 22 deletions.
diff --git a/scripts/ct-calculations-draft.jl b/scripts/ct-calculations-draft.jl
@@ -0,0 +1,123 @@
+using ScottishTaxBenefitModel
+using .RunSettings
+using .ModelHousehold
+using .STBParameters
+using .Definitions
+
+using CSV
+using DataFrame
+
+const DDIR = joinpath("/","mnt","data","ScotBen","data", "local", "local_targets_2024" )
+
+
+"""
+Very simple implementation of the CT scheme
+note this doesn't include rebates apart from single
+person rebate
+"""
+function l_calc_council_tax( 
+    hh :: Household{RT}, 
+    intermed :: MTIntermediate,
+    ctsys :: CouncilTax{RT} ) :: RT where RT 
+    ctres = zero(RT)
+    if hh.region != Wales
+        @assert hh.ct_band != Band_I # We're not Welsh
+    end
+    ctres = ctsys.band_d[hh.council]* ctsys.relativities[hh.ct_band]
+    if intermed.num_adults == 1
+        ctres *= (1-ctsys.single_person_discount)
+    end
+    ## TODO disabled discounts. See CT note.
+    return ctres
+end
+
+function calculate_ct()
+    ctf = joinpath( DDIR, "council-tax-levels-scotland-24-25-edited.tab")
+    wf = joinpath( DDIR,  "la-frs-weights-scotland-2024.tab") 
+    settings = Settings()
+    ctrates = CSV.File( ctf ) |> DataFrame
+    ctrates.authority_code = Symbol.(ctrates.authority_code)
+    weights = CSV.File( wf ) |> DataFrame
+    band_ds = Dict{Symbol,Float64}()
+    p = 0
+    for r in eachrow(ctrates)
+        p += 1
+        if p > 1 # skip 1
+            band_ds[Symbol(r.authority_code)] = r.D
+        end
+    end 
+    sys = get_default_system_for_fin_year(2024; scotland=true)
+    sys.loctax.ct.band_d = band_ds
+
+    time settings.num_households, settings.num_people, nhh2 = initialise( settings; reset=false )
+    # @time nhh, num_people, nhh2 = initialise( settings; reset=false )
+    num_las = size( ctrates )[1]
+    revs = DataFrame( 
+        code=fill("", num_las), 
+        ctrev = zeros(num_las), 
+        average_wage=zeros(num_las), 
+        average_se=zeros(num_las), 
+        ft_jobs=zeros(num_las), 
+        semp=zeros(num_las) )
+    p = 0
+    for code in ctrates.authority_code
+        localincometax = deepcopy( sys.it )
+        localincometax.non_savings_rates .+= 0.01
+        w = weights[!,code]
+        p += 1
+        band_d = ctrates[(ctrates.code .== code),:D][1]
+        ctrev = 0.0
+        average_wage = 0.0
+        average_se = 0.0
+        nearers = 0.0
+        nses = 0.0
+        for i in 1:nhh
+            hh = get_household(i)
+            hh.council = scode
+            hh.weight = w[i]
+            intermed = make_intermediate( 
+                hh, sys.lmt.hours_limits,
+                sys.age_limits,
+                sys.child_limits )
+            ct1 = l_calc_council_tax( hh, intermed.hhint, band_d, sys.loctax.ct )
+            ct2 = l_calc_council_tax( 
+                hh, intermed.hhint, sys.loctax.ct )
+            @assert ct1 ≈ ct2
+
+
+
+            for (pid,pers) in hh.people
+                if pers.employment_status in [
+                    Full_time_Employee ]
+                    # Part_time_Employee ]
+                    nearers += w[i]
+                    average_wage += (w[i]*pers.income[wages])
+                elseif  pers.employment_status in [
+                    Full_time_Self_Employed,
+                    Part_time_Self_Employed]
+                    average_se += pers.income[self_employment_income]*w[i]
+                    nses += w[i]
+                end
+            end
+
+            ctrev += w[i]*ct2
+        end 
+        average_se /= nses
+        average_wage /= nearers
+        revs.code[p] = code
+        revs.ctrev[p] = ctrev
+        revs.average_wage[p] = average_wage
+        revs.average_se[p] = average_se
+        revs.ft_jobs[p] = nearers
+        revs.semp[p] = nses
+    end
+    #=
+    for code in ctrates.code[2:end]
+        f = Formatting.format(revs[code],precision=0, commas=true)
+        println( "$code = $(f)")
+    end
+    =#
+
+    revs
+end
+
diff --git a/scripts/parse-scottish-census.jl b/scripts/parse-scottish-census.jl
@@ -7,6 +7,8 @@ using .RunSettings
 using .Weighting
 using SurveyDataWeighting
 
+const DDIR = joinpath("/","mnt","data","ScotBen","data", "local", "local_targets_2024" )
+
 function readc(filename::String)::Tuple
     d = (CSV.File( filename; normalizenames=true, header=10, skipto=12)|>DataFrame)
     if ismissing(d[1,2])
@@ -20,7 +22,7 @@ function readc(filename::String)::Tuple
 end
 
 function read_all()
-    fs = sort(readdir("."))
+    fs = sort(readdir( DDIR, join=true ))
     n = 0
     allfs = nothing
     rows = 0
@@ -29,7 +31,7 @@ function read_all()
     dfs = []
     labels = DataFrame( filename=fill("",nfs), label=fill("",nfs), start=zeros(Int,nfs) )
     for f in fs
-        if ! isnothing(match(r"^table.*.csv$",f))
+        if ! isnothing(match(r".*table.*.csv$",f))
             n += 1
             println( "on $f")
             data, label, nms = readc(f)
@@ -57,7 +59,7 @@ end
 
 allfs,labels,dfs = read_all()
 
-const Authority_Codes = [
+const authority_codes = [
     :S12000033,
     :S12000034,
     :S12000041,
@@ -174,12 +176,6 @@ RENAMES = Dict(
         "One_family_household_Couple_family" => "single_family",
         "One_family_household_Lone_parent" => "single_parent",
         "Other_household_types" => "multi_family",
-        #=
-        "economically_active_employee" => "employee"
-        "economically_active_self_employed" => "selfemp"
-        "economically_active_unemployed" => "unemployed"
-        "economically_inactive" => "inactive"
-        =#
         "Managers_Directors_and_Senior_Officials" => "Soc_Managers_Directors_and_Senior_Officials",
         "Professional_Occupations" => "Soc_Professional_Occupations",
         "Associate_Professional_and_Technical_Occupations" => "Soc_Associate_Prof_and_Technical_Occupations",
@@ -198,7 +194,7 @@ RENAMES = Dict(
         "Band_G" => "G",
         "Band_H" => "H"])
 
-ctbase=CSV.File("CTAXBASE+2024+-+Tables+-+Chargeable+Dwellings.csv",normalizenames=true)|>DataFrame
+ctbase=CSV.File(joinpath( DDIR, "CTAXBASE+2024+-+Tables+-+Chargeable+Dwellings.csv"),normalizenames=true)|>DataFrame
 allfs = hcat( allfs, ctbase; makeunique=true )
 
 rename!( allfs, RENAMES )
@@ -215,10 +211,10 @@ allfs.Five_plus_people = allfs.Five_people +
         allfs.Seven_people +
         allfs.Eight_or_more_people 
 allfs.working = allfs.economically_active_employee + allfs.economically_active_self_employed 
-allfs.authortity_code = Authority_Codes
+allfs.authority_code = authority_codes
 
-CSV.write( "labels.tab", labels; delim='\t')
-CSV.write( "allfs.tab", allfs; delim='\t' )
+CSV.write( joinpath(DDIR,"labels.tab"), labels; delim='\t')
+CSV.write( joinpath(DDIR,"allfs.tab"), allfs; delim='\t' )
 
 
 const INCLUDE_OCCUP = true
@@ -536,8 +532,8 @@ function make_target_row_scotland_la!(
     end
 end
 
-function make_target_list( alldata::DataFrame, council::AbstractString )::Vector
-    data = alldata[alldata.Authority .== council,:][1,:]
+function make_target_list( alldata::DataFrame, council::Symbol )::Vector
+    data = alldata[alldata.authority_code .== council,:][1,:]
     v = initialise_target_dataframe_scotland_la(1)[1,:] # a single row
     if INCLUDE_HCOMP
         # v.single_person = data.single_person
@@ -630,10 +626,10 @@ end
 function weight_to_la( 
     settings :: Settings,
     alldata :: DataFrame, 
-    code :: AbstractString,
+    code :: Symbol,
     num_households :: Int )
     targets = make_target_list( alldata, code ) 
-    hhtotal = alldata[alldata.Authority .== code,:total_hhlds][1]
+    hhtotal = alldata[alldata.authority_code .== code,:total_hhlds][1]
     println( "calculating for $code; hh total $hhtotal")
     weights = generate_weights(
         num_households;
@@ -680,12 +676,26 @@ dataset = t_make_target_dataset(
     initialise_target_dataframe_scotland_la,
     make_target_row_scotland_la! )
 errors = []
-const wides = Set(["Na h-Eileanan Siar"] ) #"Angus", "East Lothian", "East Renfrewshire", "Renfrewshire", "East Dunbartonshire", "North Ayrshire", "West Dunbartonshire", "Shetland Islands", "Orkney Islands", "Inverclyde", "Midlothian", "Argyll and Bute", "East Ayrshire", "Dundee City", "Na h-Eileanan Siar", "South Lanarkshire", "Clackmannanshire", "West Lothian", "Falkirk", "Moray", "South Ayrshire", "City of Edinburgh", "Aberdeenshire", "North Lanarkshire"])
-const verywides = Set(["East Lothian", "Midlothian", "East Renfrewshire", "Argyll and Bute", "East Dunbartonshire"])
-# s = Set()
+const wides = Set([:S12000013] ) # h-Eileanan Siar""Angus", "East Lothian", "East Renfrewshire", "Renfrewshire", "East Dunbartonshire", "North Ayrshire", "West Dunbartonshire", "Shetland Islands", "Orkney Islands", "Inverclyde", "Midlothian", "Argyll and Bute", "East Ayrshire", "Dundee City", "Na h-Eileanan Siar", "South Lanarkshire", "Clackmannanshire", "West Lothian", "Falkirk", "Moray", "South Ayrshire", "City of Edinburgh", "Aberdeenshire", "North Lanarkshire"])
+const verywides = Set([:S12000010, :S12000019, :S12000011, :S12000035, :S12000045] ) 
+#"East Lothian", "Midlothian", "East Renfrewshire", "Argyll and Bute", "East Dunbartonshire"])
+s = Set()
 settings.lower_multiple = 0.01
-settings.upper_multiple = 50.0     
-for code in allfs.Authority
+settings.upper_multiple = 50.0  
+
+outweights = DataFrame()
+
+outweights.data_year = zeros(Int,settings.num_households)
+outweights.hid = zeros(BigInt,settings.num_households)
+outweights.uhid = zeros(BigInt,settings.num_households)
+for href in 1:settings.num_households
+    mhh = get_household( href )
+    outweights.uhid[href] = mhh.uhid
+    outweights.hid[href] = mhh.hid
+    outweights.data_year[href] = mhh.data_year
+end
+
+for code in allfs.authority_code
     global errors, s, INCLUDE_EMPLOYMENT, INCLUDE_HH_SIZE 
     println( "on $code")
     try
@@ -704,14 +714,21 @@ for code in allfs.Authority
         end
         w = weight_to_la( settings, allfs, code, settings.num_households )
         println("OK")
+        outweights[!,code] = w
     catch e
         println( "error $e")
         push!( errors, (; e, code ))
         push!(s, code )
     end
+
 end
 
 println( errors )
 println(s)
 
+CSV.write( joinpath( DDIR, "la-frs-weights-scotland-2024.tab"), outweights; delim='\t')
+
+weights = CSV.File( joinpath( DDIR, "la-frs-weights-scotland-2024.tab") ) |> DataFrame 
+
+