Initial version of LA weights as module

grahamstark · Dec 16, 2024 · 367636d · 367636d
1 parent 8557c8b
commit 367636d
Show file tree

Hide file tree

Showing 6 changed files with 96 additions and 27 deletions.
diff --git a/scripts/create-scottish-la-weights.jl b/scripts/create-scottish-la-weights.jl
@@ -6,6 +6,8 @@ using .FRSHouseholdGetter
 using .RunSettings
 using .Weighting
 using SurveyDataWeighting
+using CSV
+using StatsBase
 
 const DDIR = joinpath("/","mnt","data","ScotBen","data", "local", "local_targets_2024" )
 
@@ -95,6 +97,37 @@ const authority_codes = [
     :S92000003] # scotland
 
 
+function summarise_dfs( data :: DataFrame, targets::DataFrameRow, household_total :: Number )::DataFrame
+    nms = Symbol.(names(targets))
+    nrows, ncols = size( data )
+    d = DataFrame()
+    scale = nrows / popn
+    initial_weights = Weights(ones(nrows)*household_total/rows)
+    for n in nms 
+        d[n] = zeros(11)
+        v = summarystats(data[!,n], initial_weights)
+        d[1,n] = v.max
+        d[3,n] = v.mean
+        d[4,n] = v.median
+        d[5,n] = v.nmiss
+        d[6,n] = v.min
+        d[7,n] = v.nobs
+        d[8,n] = v.q25
+        d[9,n] = v.q75
+        d[10,n] = v.sd
+        d[11,n] = targets[n] / sum(data[!,n],initial_weights)
+    end
+    #=
+    max     mean
+median  min
+nmiss   nobs
+q25     q75
+sd
+    =#
+    d
+end
+
+
 
 DROPS = [
     "Authority_1",
@@ -216,7 +249,6 @@ allfs.authority_code = authority_codes
 CSV.write( joinpath(DDIR,"labels.tab"), labels; delim='\t')
 CSV.write( joinpath(DDIR,"allfs.tab"), allfs; delim='\t' )
 
-
 const INCLUDE_OCCUP = true
 const INCLUDE_HOUSING = true
 const INCLUDE_BEDROOMS = true
@@ -629,6 +661,7 @@ function weight_to_la(
     code :: Symbol,
     num_households :: Int )
     targets = make_target_list( alldata, code ) 
+
     hhtotal = alldata[alldata.authority_code .== code,:total_hhlds][1]
     println( "calculating for $code; hh total $hhtotal")
     weights = generate_weights(
@@ -640,13 +673,15 @@ function weight_to_la(
         targets = targets,
         initialise_target_dataframe = initialise_target_dataframe_scotland_la,
         make_target_row! = make_target_row_scotland_la! )
+    initial_weights( )
+
     return weights
 end
 
 function t_make_target_dataset( 
     nhhlds :: Integer, 
     initialise_target_dataframe :: Function,
-    make_target_row! :: Function ) :: Matrix
+    make_target_row! :: Function ) :: Tuple
     df :: DataFrame = initialise_target_dataframe( nhhlds )
     for hno in 1:nhhlds
         hh = FRSHouseholdGetter.get_household( hno )
@@ -665,13 +700,14 @@ function t_make_target_dataset(
     for r in 1:nr
         @assert sum(m[r,:] ) != 0 "all zero row $r"
     end
-    return m
+    return m,df
 end
 
 settings = Settings()
 @time settings.num_households, settings.num_people, nhh2 = 
     initialise( settings; reset=false )
-dataset = t_make_target_dataset(
+# initial version for checking
+m, tdf = t_make_target_dataset(
     settings.num_households,
     initialise_target_dataframe_scotland_la,
     make_target_row_scotland_la! )

diff --git a/src/Results.jl b/src/Results.jl
@@ -105,6 +105,7 @@ module Results
         work_allowance :: RT = zero(RT)
         earnings_before_allowances :: RT = zero(RT)
         earned_income :: RT = zero(RT)
+        untapered_earnings :: RT = zero(RT)
         other_income :: RT = zero(RT)
         tariff_income :: RT = zero(RT)
         standard_allowance  :: RT = zero(RT)

diff --git a/src/UniversalCredit.jl b/src/UniversalCredit.jl
@@ -335,7 +335,7 @@ function calc_uc_income(
         earn = max( 0.0, earn-bur.uc.work_allowance)
     end
     earned_income = earn*uc.taper
-    return (; other_income=inc, earned_income=earned_income )
+    return (; other_income=inc, earned_income=earned_income, untapered_earnings = earn )
 end
 
 
@@ -349,7 +349,8 @@ function calc_uc_income!(
     uc                  :: UniversalCreditSys,
     minwage             :: MinimumWage ) 
     benefit_unit_result.uc.other_income, 
-    benefit_unit_result.uc.earned_income = calc_uc_income( 
+    benefit_unit_result.uc.earned_income,
+    benefit_unit_result.uc.untapered_earnings = calc_uc_income( 
         benefit_unit_result, 
         benefit_unit,
         intermed,

diff --git a/src/Weighting.jl b/src/Weighting.jl
@@ -145,7 +145,7 @@ function generate_weights(
         hh = FRSHouseholdGetter.get_household( hno )
         hh.weight = weights[hno]
     end
-    return weights
+    return weights, data
 end
 
 end # package
diff --git a/src/legal_aid_parameters.jl b/src/legal_aid_parameters.jl
@@ -19,6 +19,8 @@ export Net_Or_Gross, net, gross
 @enum Assessment_Period weekly monthly annualHistoric annualForward
 export ContributionType, cont_proportion, cont_fixed
 @enum ContributionType cont_proportion cont_fixed
+export UCEarningsType, assessed_net_income, tapered_uc_earnings, full_uc_earnings
+@enum UCEarningsType assessed_net_income tapered_uc_earnings full_uc_earnings
 
 """
 needed because json (inf) isn't supported and typemax(somefloattype) == Inf
@@ -214,7 +216,7 @@ end
     premia = zero_premia(RT)
     uc_limit = zero(RT)
     uc_limit_type :: UCLimitType = uc_no_limit
-    uc_use_earnings = false
+    uc_use_earnings :: UCEarningsType = assessed_net_income 
     include_mortgage_repayments = true
 end
 

diff --git a/src/targets/scotland-localities-2024.jl b/src/targets/scotland-localities-2024.jl
@@ -1,26 +1,27 @@
-
-const DDIR = joinpath("/","mnt","data","ScotBen","data", "local", "local_targets_2024" )
-
 function read_census_file(filename::String)::Tuple
     d = (CSV.File( filename; normalizenames=true, header=10, skipto=12)|>DataFrame)
-    if ismissing(d[1,2])
-        delete!( )
-    end
+    # if ismissing(d[1,2])
+    #     delete!( )
+    # end
     label = names(d)[1]
     actuald = d[1:33,2:end]
     nms = names(actuald)
     rename!(actuald,1=>"Authority")
     actuald, label, nms
 end
 
-function read_all_scot_2024()
-    fs = sort(readdir( DDIR, join=true ))
+"""
+Very, very ad-hoc code to munge together a bunch of Census Scotland datafiles into a
+single dataframe.
+"""
+function read_all_scot_2024( file_dir :: AbstractString )::Tuple
+    fs = sort(reafile_dir( file_dir, join=true ))
     n = 0
-    allfs = nothing
+    merged_census_files = nothing
     rows = 0
     cols = 0
     nfs = length(fs)
-    dfs = []
+    individual_datasets = []
     labels = DataFrame( filename=fill("",nfs), label=fill("",nfs), start=zeros(Int,nfs) )
     for f in fs
         if ! isnothing(match(r".*table.*.csv$",f))
@@ -34,24 +35,28 @@ function read_all_scot_2024()
             labels.label[n]=label
             labels.start[n]=cols+2        
             if n == 1
-                allfs = deepcopy( data )
+                merged_census_files = deepcopy( data )
             else
                 n1 = String.(data[:,1])[1:8] # skip "Na hEileanan Siar", since it's sometimes edited
-                n2 = String.(allfs[:,1])[1:8]
+                n2 = String.(merged_census_files[:,1])[1:8]
                 @assert n1 == n2 "$(n1) !== $(n2)" # check in sync
-                allfs = hcat( allfs, data; makeunique=true )
-                rows,cols = size(allfs)                
+                merged_census_files = hcat( merged_census_files, data; makeunique=true )
+                rows,cols = size(merged_census_files)                
             end
-            push!(dfs,data)
+            push!(individual_datasets,data)
             # println( "label=$label")
         end
     end
-    allfs,labels[1:n,:],dfs
+    merged_census_files,labels[1:n,:],individual_datasets
 end
 
+"""
+More ad-hoc code code to load Census Scotland files, clean them up and
+add some constructed fields.
+"""
 function load_census_2024()
-
-    allfs,labels,dfs = read_all_scot_2024()
+    file_dir = joinpath("/","mnt","data","ScotBen","data", "local", "local_targets_2024" )
+    merged_census_files,labels,individual_datasets = read_all_scot_2024( file_dir )
     # FIXME dup
     authority_codes = [
         :S12000033,
@@ -184,7 +189,31 @@ function load_census_2024()
             "Band_F" => "F",
             "Band_G" => "G",
             "Band_H" => "H"])
-
+
+        merged_census_files,labels,individual_datasets = read_all_scot_2024()
+
+        ctbase=CSV.File(joinpath( file_dir, "CTAXBASE+2024+-+Tables+-+Chargeable+Dwellings.csv"),normalizenames=true)|>DataFrame
+        merged_census_files = hcat( merged_census_files, ctbase; makeunique=true )
+
+        rename!( merged_census_files, RENAMES )
+        select!( merged_census_files, Not(DROPS))
+        merged_census_files.total_cts = sum.(eachrow(merged_census_files[:,[:A,:B,:C,:D,:E,:F,:G,:H]]))
+
+        # merged columns 
+        merged_census_files.private_rented_rent_free = merged_census_files.private_rented + merged_census_files.rent_free
+        merged_census_files.converted_flat = merged_census_files.converted_flat_1 + merged_census_files.converted_flat_2
+        merged_census_files.all_mortgaged = merged_census_files.mortgaged + merged_census_files.shared_ownership + merged_census_files.shared_equity
+        merged_census_files.bedrooms_4_plus = merged_census_files.bedrooms_4 + merged_census_files.bedrooms_5_plus
+        merged_census_files.Five_plus_people = merged_census_files.Five_people +
+                merged_census_files.Six_people +
+                merged_census_files.Seven_people +
+                merged_census_files.Eight_or_more_people 
+        merged_census_files.working = merged_census_files.economically_active_employee + merged_census_files.economically_active_self_employed 
+        merged_census_files.authority_code = authority_codes
+
+        CSV.write( joinpath(file_dir,"merged_census_labels_2024.tab"), labels; delim='\t')
+        CSV.write( joinpath(file_dir,"merged_census_files_2024.tab"), merged_census_files; delim='\t' )
+        return merged_census_files
 end
 
 function initialise_target_dataframe_scotland_la( n :: Integer ) :: DataFrame