Skip to content

Commit

Permalink
Start on updating Example code for VAT and Wealth
Browse files Browse the repository at this point in the history
  • Loading branch information
grahamstark committed Sep 7, 2023
1 parent e0337ac commit 35dd3e3
Show file tree
Hide file tree
Showing 4 changed files with 138 additions and 7 deletions.
92 changes: 91 additions & 1 deletion matching/lcf_frs_matching.jl
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,11 @@ function make_lfs_subset( lfs :: DataFrame ) :: DataFrame
datayear = lcf.datayear,
month = lcf.a055,
year= lcf.year,
a121 = lcf.a121,
gorx = lcf.gorx,
a065p = lcf.a065p,
a062 = lcf.a062,

any_wages = lcf.any_wages,
any_pension_income = lcf.any_pension_income,
any_selfemp = lcf.any_selfemp,
Expand Down Expand Up @@ -1487,6 +1492,36 @@ function frs_age_hrp( hhagegr4 :: Int ) :: Vector{Int}
out
end

function model_age_hrp( age :: Int )
return if age < 20
1
elseif age < 25
2
elseif age < 30
3
elseif age < 35
4
elseif age < 40
5
elseif age < 45
6
elseif age < 50
7
elseif age < 55
8
elseif age < 60
9
elseif age < 65
10
elseif age < 70
11
elseif age < 75
12
elseif age >= 75
13
end
end

#=
Value = 3.0 Label = 15 but under 20 yrs
Value = 4.0 Label = 20 but under 25 yrs
Expand Down Expand Up @@ -1597,14 +1632,62 @@ function frs_lcf_match_row( frs :: DataFrameRow, lcf :: DataFrameRow ) :: Tuple
return t,incdiff
end

function example_lcf_match( hh :: Household, lcf :: DataFrameRow ) :: Tuple
t = 0.0
t += score( lcf_tenuremap( lcf.a121 ), model_tenuremap( hh.tenure ))
t += score( lcf_regionmap( lcf.gorx ), model_regionmap( model_region ))
# !!! both next missing in 2020 LCF FUCKKK
# t += score( lcf_accmap( lcf.a116 ), frs_accmap( frs.typeacc ))
# t += score( rooms( lcf.a111p, 998 ), rooms( frs.bedroom6, 999 ))
t += score( lcf_age_hrp( lcf.a065p ), frs_age_hrp( frs.hhagegr4 ))
t += score( lcf_composition_map( lcf.a062 ), frs_composition_map( frs.hhcomps ))
any_wages = false
any_selfemp = false
any_pension_income = false
has_female_adult = false
hrp = get_head( hh )
income = 0.0
for (pid,pers) in hh.people
if get(pers.income,wages,0) > 0
any_wages = true
end
if get(pers.income,self_employment_income,0) > 0
any_selfemp = true
end
if (get(pers.income,private_pensions,0) > 0) || pers.age >= 66
any_pension_income = true
end
if (! pers.is_standard_child) && (pers.sex == Female )
has_female_adult = true
end
income += sum( pers.income, start=wages, stop=alimony_and_child_support_received ) # FIXME
end
t += lcf.any_wages == any_wages ? 1 : 0
t += lcf.any_pension_income == any_pension_income ? 1 : 0
t += lcf.any_selfemp == any_selfemp ? 1 : 0
t += lcf.hrp_unemployed == hrp.employment_status == Unemployed ? 1 : 0
t += lcf.hrp_non_white == hrp.ethnic_group !== White ? 1 : 0
# t += lcf.datayear == frs.datayear ? 0.5 : 0 # - a little on same year FIXME use date range
# t += lcf.any_disabled == frs.any_disabled ? 1 : 0 -- not possible in LCF??
t += Int(lcf.has_female_adult) == Int(has_female_adult) ? 1 : 0
t += score( lcf.num_children, num_children(hh) )
t += score( lcf.num_people, num_people(hh) )
# fixme should we include this at all?
incdiff = compare_income( lcf.income, income )
t += 10.0*incdiff
return t,incdiff


end

islessscore( l1::LCFLocation, l2::LCFLocation ) = l1.score < l2.score
islessincdiff( l1::LCFLocation, l2::LCFLocation ) = l1.incdiff < l2.incdiff

"""
Match one row in the FRS (recip) with all possible lcf matches (donor). Intended to be general
but isn't really any more. FIXME: pass in a saving function so we're not tied to case/datayear.
"""
function match_recip_row( recip :: DataFrameRow, donor :: DataFrame, matcher :: Function ) :: Vector{LCFLocation}
function match_recip_row( recip, donor :: DataFrame, matcher :: Function ) :: Vector{LCFLocation}
drows, dcols = size(donor)
i = 0
similar = Vector{LCFLocation}( undef, drows )
Expand All @@ -1620,6 +1703,8 @@ function match_recip_row( recip :: DataFrameRow, donor :: DataFrame, matcher ::
return similar
end



"""
Create a dataframe for storing all the matches.
This has the FRS record and then 20 lcf records, with case,year,income and matching score for each.
Expand Down Expand Up @@ -1673,6 +1758,11 @@ function map_all( recip :: DataFrame, donor :: DataFrame, matcher :: Function ):
return df
end

function map_example( example :: Household, donor :: DataFrame, matcher::Function )::LCFLocation
matches = map_recip_row( example, donor, matcher )
return matches[1]
end

"""
print out our lcf and frs records
"""
Expand Down
18 changes: 13 additions & 5 deletions src/ConsumptionData.jl
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,18 @@ end

const DEFAULT_STANDARD_RATE = default_standard_rate()

"""
Match in the lcf data using the lookup table constructed in 'matching/lcf_frs_matching.jl'
'which' best, 2nd best etc match (<=20)
"""
function find_consumption_for_hh!( hh :: Household, case :: Int, dataset :: Int )
# println( "find_consumption_for_hh! matching to case $case datayear $datayear")
hh.expenditure = EXPENDITURE_DATASET[(EXPENDITURE_DATASET.case .== case).&(EXPENDITURE_DATASET.datayear.==datayear),:][1,:]
hh.factor_costs = FACTOR_COST_DATASET[(FACTOR_COST_DATASET.case .== case).&(FACTOR_COST_DATASET.datayear.==datayear),:][1,:]
@assert ! isnothing( hh.expenditure )
@assert ! isnothing( hh.factor_costs )
end

"""
Match in the lcf data using the lookup table constructed in 'matching/lcf_frs_matching.jl'
'which' best, 2nd best etc match (<=20)
Expand All @@ -177,11 +189,7 @@ function find_consumption_for_hh!( hh :: Household, settings :: Settings, which
lcf_datayear_sym = Symbol( "lcf_datayear_$(which)")
case = match[lcf_case_sym]
datayear = match[lcf_datayear_sym]
# println( "find_consumption_for_hh! matching to case $case datayear $datayear")
hh.expenditure = EXPENDITURE_DATASET[(EXPENDITURE_DATASET.case .== case).&(EXPENDITURE_DATASET.datayear.==datayear),:][1,:]
hh.factor_costs = FACTOR_COST_DATASET[(FACTOR_COST_DATASET.case .== case).&(FACTOR_COST_DATASET.datayear.==datayear),:][1,:]
@assert ! isnothing( hh.expenditure )
@assert ! isnothing( hh.factor_costs )
find_consumption_for_hh!( hh, case, datayear )
end

# FIXME FIXME CHAOTIC EVIL this is the diff between actual 157bn and crude modelled VAT receipts of 102mb. 2022
Expand Down
33 changes: 33 additions & 0 deletions src/Definitions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ using ScottishTaxBenefitModel
using ScottishTaxBenefitModel.Utils
using Parameters
using JSON3
import Base.sum

export
Employment_Status, # mapped from empstat
Expand Down Expand Up @@ -1228,6 +1229,38 @@ end
Incomes_Dict = Dict{Incomes_Type,T} where T<:Real
Incomes_Set = Set{Incomes_Type}


export sum

function Base.sum( i :: Incomes_Dict{T}, which :: Incomes_Set ) :: T where T <: Number
z = zero(T)
t = intersect( which, keys(i))
for k in t
v += i[k]
end
v
end

function ran( start :: Incomes_Type, stop :: Incomes_Type ) :: Incomes_Set
s = Incomes_Set()
for k in instances(Incomes_Type)
if k >= from
push!(s,k)
if k == to
break
end
end
end
s
end

function Base.sum( i :: Incomes_Dict{T}; start :: Incomes_Type, stop :: Incomes_Type ) :: T where T <: Number
z = zero(T)
s = rand( start, stop )
return sum( i, s )
end


const Expenses = Incomes_Set([
permanent_health_insurance,
health_insurance,
Expand Down
2 changes: 1 addition & 1 deletion src/Utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ start_col and end_col and other fields just copied
diff is difference df2 - df1, Frames should have identical other cols.
"""
function df_diff( df1, df2 :: DataFrame, start_col::Int, end_col :: Int ) :: DataFrame
@argcheck size( df1 ) == size( df2 )
argch@eck size( df1 ) == size( df2 )
## maybe check that the non diffed fields are all the same too..
d = copy(df1)
d[:,start_col:end_col] = df2[:,start_col:end_col] .- df1[:,start_col:end_col]
Expand Down

0 comments on commit 35dd3e3

Please sign in to comment.