diff --git a/Artifacts.toml b/Artifacts.toml index 7c05905b..441f9c3f 100644 --- a/Artifacts.toml +++ b/Artifacts.toml @@ -1,9 +1,18 @@ [augdata] -git-tree-sha1 = "288471b266e4ec933d1c6190c8dd7fc3e82d3f4c" +git-tree-sha1 = "b1ad31b8fcd1165a57e02d7de274b7140254b480" +lazy = true [[augdata.download]] - sha256 = "774759a0726fe9d904016569c643650ad1cb0bcf650784c52f60bdcb35a0f2db" - url = "https://virtual-worlds.biz/artifacts//aux-data.tar.gz" + sha256 = "ff09ec25734af6aecd831a4fb89ba3c402945812f5b34a29986aed8252dadf02" + url = "https://virtual-worlds.biz/artifacts//augdata.tar.gz" + +[disability] +git-tree-sha1 = "288ef84eac1aad34af423852e64001e75be3b822" +lazy = true + + [[disability.download]] + sha256 = "7ed1e796fc2e09a4c48f772ea3cd06b7c6aab721c183f6905b2fa83890d0184b" + url = "https://virtual-worlds.biz/artifacts//disability.tar.gz" [exampledata] git-tree-sha1 = "7853136d3530ea531e4f45a29f8fcef81d08fc1c" diff --git a/Project.toml b/Project.toml index 6a591a32..bab3cd67 100644 --- a/Project.toml +++ b/Project.toml @@ -6,6 +6,7 @@ version = "0.1.2" [deps] ArgCheck = "dce04be8-c92d-5529-be00-80e4d2c0e197" ArtifactUtils = "8b73e784-e7d8-4ea5-973d-377fed4e3bce" +Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" BudgetConstraints = "51aacf4b-43e8-45f9-b960-ce5126c2a956" CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" @@ -63,6 +64,7 @@ UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" [compat] ArgCheck = "2.3.0, 4" ArtifactUtils = "0.2.4" +Artifacts = "1.11.0" BenchmarkTools = "1.3.2, 4" BudgetConstraints = "1.0.1, 3" CSV = "0.10.10, 1" @@ -115,22 +117,19 @@ artifact_server_upload = "vw@virtual-worlds.biz:public_html/artifacts/" artifact_server_url = "https://virtual-worlds.biz/artifacts/" auto_weight = true default_run_name = "default_run" -use_weighting = true -target_nation = "N_Scotland" +do_marginal_rates = false +do_replacement_rates = false dump_frames = false -to_y = 2024 -to_q = 3 +dump_frames2 = true +ineq_income_measure = "eq_bhc_net_income" means_tested_routing = "modelled_phase_in" poverty_line_source = "pl_first_sys" -ineq_income_measure = "eq_bhc_net_income" -do_marginal_rates = false -do_replacement_rates = false +prem2 = "enhanced_disability_child" replacement_rate_hours = 30 +target_nation = "N_Scotland" +to_q = 3 +to_y = 2024 +use_weighting = true -prem2 = "enhanced_disability_child" -dump_frames2 = true - [targets] test = ["Test"] - - diff --git a/docs/ima-pluto.jl b/docs/ima-pluto.jl index 96152efb..de6e0b09 100644 --- a/docs/ima-pluto.jl +++ b/docs/ima-pluto.jl @@ -57,7 +57,6 @@ begin const DEFAULT_NUM_TYPE = Float64 settings = RunSettings.Settings() settings.requested_threads = 4 - data_dir( settings ) = "/home/graham_s/julia/vw/ScottishTaxBenefitModel/data" function init_data(; reset :: Bool = false ) nhh = FRSHouseholdGetter.get_num_households() @@ -71,14 +70,6 @@ begin function load_system()::TaxBenefitSystem sys = get_default_system_for_cal_year( 2021 ) - # load_file( joinpath( Definitions.MODEL_PARAMS_DIR, "sys_2021_22.jl" )) - # - # Note that as of Budget21 removing these doesn't actually happen till May 2022. - # - # load_file!( sys, joinpath( Definitions.MODEL_PARAMS_DIR, "sys_2021-uplift-removed.jl")) - # uc taper to 55 - # load_file!( sys, joinpath( Definitions.MODEL_PARAMS_DIR, "budget_2021_uc_changes.jl")) - # weeklyise!( sys ) return sys end diff --git a/params/sys_2023_24_ruk.jl b/params/sys_2023_24_ruk.jl index 9682a17a..85ec09bb 100644 --- a/params/sys_2023_24_ruk.jl +++ b/params/sys_2023_24_ruk.jl @@ -359,7 +359,7 @@ function load_sys_2023_24_ruk!( sys :: TaxBenefitSystem{T} ) where T :NIRELAND => -99999.99 ] ) - brmapath = joinpath(MODEL_DATA_DIR, "local", "brma-2023-2024-country-averages.csv") + brmapath = joinpath( artifact"augdata", "brma-2023-2024-country-averages.csv" ) sys.hr.brmas = loadBRMAs( 4, Float64, brmapath ) diff --git a/params/sys_2023_24_scotland.jl b/params/sys_2023_24_scotland.jl index ab56ec7f..f5856e73 100644 --- a/params/sys_2023_24_scotland.jl +++ b/params/sys_2023_24_scotland.jl @@ -10,7 +10,7 @@ function load_sys_2023_24_scotland!( sys :: TaxBenefitSystem ) sys.it.non_savings_thresholds = [2_162, 13_118, 31_092, 125_120.0] sys.it.non_savings_basic_rate = 2 # above this counts as higher rate rate FIXME 3??? sys.nmt_bens.carers.scottish_supplement = 0.0 # FROM APRIL 2021 - brmapath = joinpath(MODEL_DATA_DIR, "local", "lha_rates_scotland_2023_24.csv") + brmapath = joinpath( artifact"augdata", "lha_rates_scotland_2023_24.csv") sys.hr.brmas = loadBRMAs( 4, Float64, brmapath ) sys.loctax.ct.band_d = Dict( [ diff --git a/regressions/disability_regressions.jl b/regressions/disability_regressions.jl index 5e4ee6b4..c2f22a76 100644 --- a/regressions/disability_regressions.jl +++ b/regressions/disability_regressions.jl @@ -10,22 +10,11 @@ using CSV,DataFrames,GLM,RegressionTables const settings = Settings() +# settings.data_source = SyntheticSource # !!! changeme!!! +daf = get_data_artifact( settings ) -const LOCAL_DATA_DIR = data_dir( settings ) -# -# !!! For Actual Dataset, UK wide with Scottish dummy -# For synth data, Scotland only for now!!! -# -settings.data_source = SyntheticSource # !!! changeme!!! -if settings.data_source == FRS - settings.household_name = "model_households-2015-2021.tab" - settings.people_name = "model_people-2015-2021.tab" -end - -const DATASETS = main_datasets( settings ) - -frshh = CSV.File( DATASETS.hhlds ) |> DataFrame -frspeople = CSV.File( DATASETS.people ) |> DataFrame +frshh = CSV.File( joinpath( daf, "households.tab" )) |> DataFrame +frspeople = CSV.File( joinpath( daf, "households.tab" )) |> DataFrame fm = innerjoin( frshh, frspeople, on=[:data_year, :hid ], makeunique=true ) diff --git a/src/ExampleHouseholdGetter.jl b/src/ExampleHouseholdGetter.jl index 4b340182..227b9811 100644 --- a/src/ExampleHouseholdGetter.jl +++ b/src/ExampleHouseholdGetter.jl @@ -8,6 +8,7 @@ using DataFrames using CSV using ArgCheck using Pkg.Artifacts +using LazyArtifacts using ScottishTaxBenefitModel using .Definitions @@ -56,12 +57,10 @@ function initialise( global KEYMAP global EXAMPLE_HOUSEHOLDS - # tmpsource = settings.data_source # hack to work round datasource being wired in to settings - # settings.data_source = ExampleSource - # println( "DEF_MODEL_DATA_DIR=|$(Definitions.DEF_MODEL_DATA_DIR)| MODEL_DATA_DIR=|$MODEL_DATA_DIR|") # lazy load cons data if needs be tmp_data_source = settings.data_source settings.data_source = ExampleSource + # tmpsource = settings.data_source # hack to work round datasource being wired in to settings if settings.indirect_method == matching ConsumptionData.init( settings ) end diff --git a/src/HistoricBenefits.jl b/src/HistoricBenefits.jl index 8be06ffc..569fb8a3 100644 --- a/src/HistoricBenefits.jl +++ b/src/HistoricBenefits.jl @@ -9,7 +9,7 @@ module HistoricBenefits # with a series of complete parameter files, once we have # everything defined fully. # -using CSV, DataFrames, Dates +using CSV, DataFrames, Dates, Artifacts, LazyArtifacts using ScottishTaxBenefitModel using .Definitions using .ModelHousehold: Person @@ -41,7 +41,7 @@ function load_historic( file ) :: Dict end function load_pip() - pip=CSV.File( "$(MODEL_DATA_DIR)/receipts/pip_2002-2023_from_stat_explore.csv", + pip=CSV.File( joinpath(artifact"augdata", "pip_2002-2023_from_stat_explore.csv"), missingstring="..", types=Dict([:Date=>String]))|>DataFrame pip.Date = Date.( pip.Date, dateformat"yyyymm" ) @@ -49,12 +49,15 @@ function load_pip() end function load_dla() - dla=CSV.File( "$(MODEL_DATA_DIR)/receipts/dla_2002-2023_from_stat_explore.csv" )|> DataFrame + dla=CSV.File( joinpath(artifact"augdata","dla_2002-2023_from_stat_explore.csv" ))|> DataFrame dla.Date = Date.( dla.Date, dateformat"u-yy" ) .+Year(2000) return dla end -const HISTORIC_BENEFITS = load_historic( "$(MODEL_PARAMS_DIR)/historic_benefits.csv" ) +const HISTORIC_BENEFITS = load_historic( + joinpath( + artifact"augdata", "historic_benefits.csv" )) + const DLA_RECEIPTS = load_dla() const PIP_RECEIPTS = load_pip() diff --git a/src/LocalLevelCalculations.jl b/src/LocalLevelCalculations.jl index 26332fe3..2fff9649 100644 --- a/src/LocalLevelCalculations.jl +++ b/src/LocalLevelCalculations.jl @@ -9,6 +9,8 @@ module LocalLevelCalculations using StaticArrays using CSV,DataFrames +using Pkg, Pkg.Artifacts +using LazyArtifacts using ScottishTaxBenefitModel using .Definitions @@ -118,7 +120,7 @@ export # FIXME hard code this in function make_la_to_brma_map() - lacsv = CSV.File( "$(MODEL_DATA_DIR)/local/la_to_brma_approx_mappings.csv" ) |> DataFrame + lacsv = CSV.File( joinpath( artifact"augdata", "la_to_brma_approx_mappings.csv" )) |> DataFrame out = Dict{Symbol,Symbol}() for r in eachrow( lacsv ) out[Symbol(r.ccode)] = Symbol(r.bcode) diff --git a/src/RunSettings.jl b/src/RunSettings.jl index 7ed3fa4f..8d080868 100644 --- a/src/RunSettings.jl +++ b/src/RunSettings.jl @@ -4,6 +4,7 @@ module RunSettings # using Pkg using Pkg.Artifacts + using LazyArtifacts using Parameters using Preferences using UUIDs @@ -45,7 +46,6 @@ module RunSettings # DatasetType, # actual_data, # synthetic_data, - data_dir, get_skiplist, get_all_uk_settings_2023, @@ -90,7 +90,6 @@ module RunSettings scotland_full :: Bool = true weighted = @load_preference( "use_weighting", true ) auto_weight = @load_preference( "auto_weight", true ) - data_dir :: String = MODEL_DATA_DIR # DELETE household_name = "model_households_scotland-2015-2021-w-enums-2" people_name = "model_people_scotland-2015-2021-w-enums-2" target_nation :: Nation = eval(Symbol(@load_preference("target_nation", "N_Scotland"))) # N_Scotland @@ -178,6 +177,7 @@ module RunSettings settings.replacement_rate_hours = @load_preference( "replacement_rate_hours" ) end =# + function get_data_artifact( settings::Settings )::AbstractString return if settings.data_source == FRSSource @@ -197,6 +197,16 @@ module RunSettings end end + function main_datasets( settings :: Settings ) :: NamedTuple + artd = get_data_artifact( settings ) + return ( + hhlds = joinpath( artd, "households.tab" ), + people = joinpath( artd, "people.tab" ) + ) + end + + #= + function data_dir( settings :: Settings ) :: String ds = if settings.data_source == FRSSource "actual_data" @@ -243,6 +253,8 @@ module RunSettings ) end + =# + """ Hacky prebuilt settings for the Northumbria model. """ diff --git a/src/Runner.jl b/src/Runner.jl index 761fc9e1..5494ab87 100644 --- a/src/Runner.jl +++ b/src/Runner.jl @@ -6,6 +6,8 @@ module Runner # using Base.Threads + using Pkg, Pkg.Artifacts + using LazyArtifacts using Parameters: @with_kw using DataFrames: DataFrame, DataFrameRow, Not, select! using CSV @@ -75,7 +77,7 @@ module Runner @time settings.num_households, settings.num_people, nhh2 = FRSHouseholdGetter.initialise( settings ) if settings.benefit_generosity_estimates_available - BenefitGenerosity.initialise( MODEL_DATA_DIR*"/disability/" ) + BenefitGenerosity.initialise( artifact"disability" ) end end full_results = Array{HouseholdResult}(undef,0,0) diff --git a/src/STBParameters.jl b/src/STBParameters.jl index 95f2a1af..84e5977d 100644 --- a/src/STBParameters.jl +++ b/src/STBParameters.jl @@ -13,6 +13,8 @@ using Dates: Date, now, TimeType, Year using TimeSeries using StaticArrays using Parameters +using Pkg, Pkg.Artifacts +using LazyArtifacts using DataFrames,CSV using ScottishTaxBenefitModel @@ -822,7 +824,7 @@ end function weeklyise!( hb :: HousingBenefits; wpm=WEEKS_PER_MONTH, wpy=WEEKS_PER_YEARs ) hb.taper /= 100.0 - end +end @with_kw mutable struct LegacyMeansTestedBenefitSystem{RT<:Real} # CPAG 2019/bur.pers[pid].20 p335 @@ -863,7 +865,7 @@ function loadBRMAs( N :: Int, T :: Type, file :: String ) :: Dict{Symbol,BRMA{N dict end -const DEFAULT_BRMA_2021 = joinpath(MODEL_DATA_DIR,"local", "lha_rates_scotland_2020_21.csv") +const DEFAULT_BRMA_2021 = joinpath( artifact"augdata", "lha_rates_scotland_2020_21.csv") @with_kw mutable struct HousingRestrictions{RT<:Real} abolished :: Bool = false diff --git a/src/Utils.jl b/src/Utils.jl index ab10d72f..d6efdeb1 100644 --- a/src/Utils.jl +++ b/src/Utils.jl @@ -94,15 +94,18 @@ file should contain: `people.tab` `households.tab` `README.md`, all top-level """ function make_artifact(; artifact_name :: AbstractString, - gzip_file_name:: AbstractString, toml_file = "Artifacts.toml" )::Int - art_server_upload = @load_preference( "artifact_server_upload" ) - art_server_url = @load_preference( "artifact_server_url" ) - dest = "$(art_server_upload)/$(gzip_file_name)" - println( "copying |tmp/$gzip_file_name| to |$dest| ") - upload = `scp tmp/$(gzip_file_name) $(dest)` + gzip_file_name = "$(artifact_name).tar.gz" + dir = "/mnt/data/ScotBen/artifacts/" + artifact_server_upload = @load_preference( "artifact_server_upload" ) + artifact_server_url = @load_preference( "artifact_server_url" ) + tarcmd = `tar zcvf $(dir)/tmp/$(gzip_file_name) -C $(dir)/$(artifact_name)/ .` + run( tarcmd ) + dest = "$(artifact_server_upload)/$(gzip_file_name)" + println( "copying |$(dir)/tmp/$gzip_file_name| to |$dest| ") + upload = `scp $(dir)/tmp/$(gzip_file_name) $(dest)` println( "upload cmd |$upload|") - url = "$(art_server_url)/$gzip_file_name" + url = "$(artifact_server_url)/$gzip_file_name" try run( upload ) add_artifact!( toml_file, artifact_name, url; force=true, lazy=true ) diff --git a/src/notused/ExampleHouseholdGetter.jl b/src/notused/ExampleHouseholdGetter.jl deleted file mode 100644 index edd731d7..00000000 --- a/src/notused/ExampleHouseholdGetter.jl +++ /dev/null @@ -1,113 +0,0 @@ -module ExampleHouseholdGetter -# -# This module contains code to fetch some test households from CSV files. -# It differs from the main FRSHouseholdGetter in that it doesn't bother with weighting, and households -# to be accessed by names rather than indexes. -# -using DataFrames -using CSV -using ArgCheck - -using ScottishTaxBenefitModel -using .Definitions -using .ModelHousehold: Household -using .ConsumptionData: find_consumption_for_hh! -using .WealthData: find_wealth_for_hh! -using .HouseholdFromFrame: load_hhld_from_frame -using .MatchingLibs -using .RunSettings - -export initialise, get_household - -EXAMPLE_HOUSEHOLDS = Dict{String,Household}() - -KEYMAP = Vector{AbstractString}() - -function find_consumption_for_example!( hh, settings ) - @argcheck settings.indirect_method == matching - c = MatchingLibs.match_recip_row( - hh, - ConsumptionData.EXPENDITURE_DATASET, - MatchingLibs.example_lcf_match )[1] - find_consumption_for_hh!( hh, c.case, c.datayear ) -end - -function find_wealth_for_example!( hh, settings ) - @argcheck settings.wealth_method == matching - c = MatchingLibs.match_recip_row( - hh, - WealthData.WEALTH_DATASET, - MatchingLibs.model_was_match, - :weekly_gross_income )[1] - find_wealth_for_hh!( hh, c.case ) -end - - -""" -return number of households available -""" -function initialise( - settings :: Settings - ; - # fixme move these to settings - household_name :: AbstractString = "example_households", - people_name :: AbstractString = "example_people" ) :: Vector{AbstractString} - - global KEYMAP - global EXAMPLE_HOUSEHOLDS - tmpsource = settings.data_source # hack to work round datasource being wired in to settings - settings.data_source = ExampleSource - println( "DEF_MODEL_DATA_DIR=|$(Definitions.DEF_MODEL_DATA_DIR)| MODEL_DATA_DIR=|$MODEL_DATA_DIR|") - # lazy load cons data if needs be - if settings.indirect_method == matching - ConsumptionData.init( settings ) - end - if settings.wealth_method == matching - WealthData.init( settings ) - end - KEYMAP = Vector{AbstractString}() - ds = example_datasets( settings ) - - # hh_dataset = CSV.File( ds.hhlds, delim='\t' ) |> DataFrame - # people_dataset = CSV.File(ds.people, delim='\t' ) |> DataFrame - @show ds - hh_dataset = HouseholdFromFrame.read_hh( ds.hhlds ) # CSV.File( ds.hhlds ) |> DataFrame - people_dataset = HouseholdFromFrame.read_pers( ds.people ) # CSV.File( ds.people ) |> DataFrame - - npeople = size( people_dataset)[1] - nhhlds = size( hh_dataset )[1] - for hseq in 1:nhhlds - hhf = hh_dataset[hseq,:] - push!( KEYMAP, hhf.name ) - println( "loading $(hhf.name) $(hhf.council)") - hh = load_hhld_from_frame( - hseq, hhf, people_dataset, settings ) - if( settings.indirect_method == matching ) && (settings.do_indirect_tax_calculations) - find_consumption_for_example!( hh, settings ) - end - if settings.wealth_method == matching - find_wealth_for_example!( hh, settings ) - end - EXAMPLE_HOUSEHOLDS[hhf.name] = hh - println( EXAMPLE_HOUSEHOLDS[hhf.name].council ) - end - settings.data_source = tmpsource - return KEYMAP -end - -function example_names() - return KEYMAP -end - -function get_household( pos :: Integer ) :: Household - key = KEYMAP[pos] - return EXAMPLE_HOUSEHOLDS[key] -end - -function get_household( name :: AbstractString ) :: Household - # global EXAMPLE_HOUSEHOLDS - return EXAMPLE_HOUSEHOLDS[name] -end - - -end diff --git a/src/notused/FRSHouseholdGetter.jl b/src/notused/FRSHouseholdGetter.jl deleted file mode 100644 index 0be82dee..00000000 --- a/src/notused/FRSHouseholdGetter.jl +++ /dev/null @@ -1,351 +0,0 @@ -module FRSHouseholdGetter - - # - # This module retrieves the main dataset. The data is retrieved from CSV files and assembled once, including generating sample weights, - # but it can then be accessed multiple times in a session. Retrieval is currently by index (1,2,3...) only but retrieval by sernum/datayear, - # or by some sort of query interface might be added later. - # - - using CSV - using DataFrames: DataFrame, DataFrameRow, AbstractDataFrame - using StatsBase - - using ScottishTaxBenefitModel - - using .Definitions - - using .ModelHousehold: - Household, - num_people, - uprate! - - using .HouseholdFromFrame: - create_regression_dataframe, - load_hhld_from_frame - - using .RunSettings - - using .Weighting: - generate_weights - - using .Uprating: load_prices - - using .Utils:get_quantiles - - using .LegalAidData - using .ConsumptionData - using .WealthData - - export - initialise, - get_data_years, - get_household, - num_households, - not_in_skiplist, - get_household_of_person, - get_interview_years, - get_regression_dataset, - get_people_slots_for_household, - get_slot_for_person, - get_slot_for_household - - ## See scripts/performance/hhld_example.jl for the rationalle behind this wrapper - # - # If you're using a getter to get hhlds, wrap the array of hhlds in a struct, - # so you can give the - # array a type and declare a constant. This aviods type instability which can murder - # performance of the getter. - # - - struct OnePos - hseq :: Int - pseq :: Int - end - - struct HHPeople - hseq :: Int - pseqs :: Vector{Int} - end - - struct HHWrapper - hhlds :: Vector{Household{Float64}} - weight :: Vector{Float64} - dimensions :: Vector{Int} - hh_map :: Dict{OneIndex,HHPeople} - pers_map :: Dict{OneIndex,OnePos} - data_years :: Vector{Int} - interview_years :: Vector{Int} - end - - const MODEL_HOUSEHOLDS = - HHWrapper( - Vector{Household{Float64}}(undef, 0 ), - zeros(Float64,0), - zeros(Int,3), - Dict{OneIndex,HHPeople}(), - Dict{OneIndex,Int}(), - zeros(0), - zeros(0)) - - mutable struct RegWrapper # I don't understand why I need 'mutable' here, but.. - data :: DataFrame - end - - function get_skiplist( settings :: Settings )::DataFrame - df = DataFrame( hid=zeros(BigInt,0), data_year=zeros(Int,0), reason=fill("",0)) - if settings.skiplist != "" - fname = main_datasets( settings ).skiplist - df = CSV.File( fname )|>DataFrame - end - return df - end - - """ - Insert into data a pair of basic deciles in the hh data based on actual pre-model income and eq scale - """ - function fill_in_deciles!() - nhhs = MODEL_HOUSEHOLDS.dimensions[1] - inc = zeros(nhhs) - eqinc = zeros(nhhs) - w = zeros(nhhs) - for hno in 1:nhhs - hh = get_household(hno) - inc[hno] = hh.original_gross_income - eqinc[hno] = hh.original_gross_income / hh.equivalence_scales.oecd_bhc - w[hno] = hh.weight*num_people(hh) # person level deciles - end - # HACK HACK HACK - need to add gross inc to Scottish subset and uprate it - if sum( inc ) ≈ 0 - return - end - wt = Weights(w) - # FIXME duplication here - incbreaks = quantile(inc,wt,[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]) - incdecs = get_quantiles( inc, incbreaks ) - eqbreaks = quantile(eqinc,wt,[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]) - eqdecs = get_quantiles( eqinc, eqbreaks ) - for hno in 1:nhhs - hh = get_household(hno) - hh.original_income_decile = incdecs[hno] - hh.equiv_original_income_decile = eqdecs[hno] - end - # idiot check - 10 deciles each roughtly the same - deccheck = zeros(10) - for hno in 1:nhhs - hh = get_household(hno) - dec = hh.original_income_decile - @assert dec in 1:10 - deccheck[dec] += hh.weight*num_people(hh) - end - for i in 1:10 - println( "$i : $(deccheck[i])" ) - end - for dc in deccheck - prop = dc/deccheck[1] - @assert isapprox( prop, 1, rtol=0.03 ) "Counts in Deciles seem very uneven: prop vs [1]=$(prop) abs diff $(dc - deccheck[1])." - end - end - - # fixme I don't see how to make this a constant - REG_DATA :: DataFrame = DataFrame() - - """ - This hh not in skiplist - """ - function not_in_skiplist( hr :: DataFrameRow, skiplist :: DataFrame )::Bool - if size(skiplist)[1] == 0 - return true - end - sk = skiplist[ (skiplist.data_year .== hr.data_year ) .& - (skiplist.hid .== hr.hid ),:] - return size(sk)[1] == 0 # not in skiplist - end - - """ - Initialise the dataset. If this has already been done, do nothing unless - `reset` is true. - return (number of households available, num people loaded inc. kids, num hhls in dataset (should always = item[1])) - """ - function initialise( - settings :: Settings; - reset :: Bool = false ) :: Tuple - - global MODEL_HOUSEHOLDS - global REG_DATA - nhh = size( MODEL_HOUSEHOLDS.hhlds )[1] - if( nhh > 0 ) && ( ! reset ) - # weird syntax to make tuple from array; - # see e.g. https://discourse.julialang.org/t/array-to-tuple/9024 - return (MODEL_HOUSEHOLDS.dimensions...,) - end - load_prices( settings ) - if settings.indirect_method == matching - ConsumptionData.init( settings; reset = reset ) - end - if settings.wealth_method == matching - WealthData.init( settings; reset = reset ) - end - - if settings.do_legal_aid - LegalAidData.init( settings; reset = reset ) - end - skiplist = get_skiplist( settings ) - ds = main_datasets( settings ) - - hh_dataset = HouseholdFromFrame.read_hh( ds.hhlds ) # CSV.File( ds.hhlds ) |> DataFrame - people_dataset = HouseholdFromFrame.read_pers( ds.people ) # CSV.File( ds.people ) |> DataFrame - - npeople = 0; # size( people_dataset)[1] - nhhlds = size( hh_dataset )[1] - resize!( MODEL_HOUSEHOLDS.hhlds, nhhlds ) - resize!( MODEL_HOUSEHOLDS.weight, nhhlds ) - MODEL_HOUSEHOLDS.weight .= 0 - - pseq = 0 - hseq = 0 - dseq = 0 - for hdata in eachrow( hh_dataset ) - if not_in_skiplist( hdata, skiplist ) - hseq += 1 - hh = load_hhld_from_frame( dseq, hdata, people_dataset, settings ) - npeople += num_people(hh) - MODEL_HOUSEHOLDS.hhlds[hseq] = hh - if settings.wealth_method == matching - WealthData.find_wealth_for_hh!( hh, settings, 1 ) # fixme allow 1 to vary somehow Lee Chung.. - end - uprate!( hh, settings ) - if( settings.indirect_method == matching ) && (settings.do_indirect_tax_calculations) - ConsumptionData.find_consumption_for_hh!( hh, settings, 1 ) # fixme allow 1 to vary somehow Lee Chung.. - if settings.impute_fields_from_consumption - ConsumptionData.impute_stuff_from_consumption!(hh,settings) - end - end - - pseqs = [] - for pid in keys(hh.people) - pseq += 1 - push!( pseqs, pseq ) - MODEL_HOUSEHOLDS.pers_map[OneIndex( pid, hh.data_year )] = OnePos(hseq,pseq) - end - MODEL_HOUSEHOLDS.hh_map[OneIndex( hh.hid, hh.data_year )] = HHPeople( hseq, pseqs) - if ! (hh.data_year in MODEL_HOUSEHOLDS.data_years ) - push!( MODEL_HOUSEHOLDS.data_years, hh.data_year ) - end - if settings.do_legal_aid - LegalAidData.add_la_probs!( hh ) - end - if ! (hh.interview_year in MODEL_HOUSEHOLDS.interview_years ) - push!( MODEL_HOUSEHOLDS.interview_years, hh.interview_year ) - end - end # don't skip - end - resize!( MODEL_HOUSEHOLDS.hhlds, hseq ) - resize!( MODEL_HOUSEHOLDS.weight, hseq ) - nhhlds = size( MODEL_HOUSEHOLDS.hhlds )[1] - # default weighting using current Scotland settings; otherwise do manually - if settings.auto_weight && settings.target_nation == N_Scotland - @time weight = generate_weights( - nhhlds; - weight_type = settings.weight_type, - lower_multiple = settings.lower_multiple, - upper_multiple = settings.upper_multiple ) - for i in eachindex( weight ) # just assign weight = weight? - MODEL_HOUSEHOLDS.weight[i] = weight[i] - end - else - for hseq in 1:nhhlds # just assign weight = weight? - MODEL_HOUSEHOLDS.weight[hseq] = MODEL_HOUSEHOLDS.hhlds[hseq].weight - end - end - # in case we have skipped some - - MODEL_HOUSEHOLDS.dimensions.= - size(MODEL_HOUSEHOLDS.hhlds)[1], - npeople, - nhhlds - - REG_DATA = create_regression_dataframe( hh_dataset, people_dataset ) - fill_in_deciles!() - return (MODEL_HOUSEHOLDS.dimensions...,) - end - - """ - Save some of the bits that are generated internally. - FIXME: add an extract function - """ - function extract_weights_and_deciles( - settings :: Settings, - filename :: String ) - fname = joinpath(settings.output_dir, "$(filename).tab" ) - f = open( fname, "w") - println( f, "hid\tdata_year\tweight\tdecile") - for hno in 1:settings.num_households - hh = get_household(hno) - println(f, hh.hid, '\t', hh.data_year, '\t', hh.weight, '\t', hh.equiv_original_income_decile) - end - close(f) - end - - function get_regression_dataset()::DataFrame - return REG_DATA - end - - """ - A vector of the data years in the actual data e.g.2014,2020 .. - """ - function get_data_years()::Vector{Integer} - return MODEL_HOUSEHOLDS.data_years - end - - """ - A vector of the interview years in the actual data e.g.2014,2020 .. - """ - function get_interview_years()::Vector{Integer} - return MODEL_HOUSEHOLDS.interview_years - end - - function get_household( pos :: Integer ) :: Household - hh = MODEL_HOUSEHOLDS.hhlds[pos] - hh.weight = MODEL_HOUSEHOLDS.weight[pos] - return hh - end - - function get_household( hid :: BigInt, datayear :: Int ) :: Household - pos :: Int = MODEL_HOUSEHOLDS.hh_map[ OneIndex( hid, datayear) ].hseq - return get_household( pos ) - end - - function get_household( oi :: OneIndex ) :: Household - pos :: Int = MODEL_HOUSEHOLDS.hh_map[ oi ].hseq - return get_household( pos ) - end - - function get_household_of_person( pid :: BigInt, datayear :: Int ) :: Union{Nothing,Household} - pos = get( MODEL_HOUSEHOLDS.pers_map, OneIndex( pid, datayear), nothing ) - if pos === nothing - return nothing - end - return get_household( pos.hseq ) - end - - function get_slot_for_household( hid :: BigInt, datayear :: Int ) :: Int - return MODEL_HOUSEHOLDS.hh_map[ OneIndex( hid, datayear) ].hseq - end - - function get_people_slots_for_household( hid :: BigInt, datayear :: Int ) :: Vector{Int} - return MODEL_HOUSEHOLDS.hh_map[ OneIndex( hid, datayear) ].pseqs - end - - function get_slot_for_person( pid :: BigInt, datayear :: Int ) :: Int - return MODEL_HOUSEHOLDS.pers_map[ OneIndex( pid, datayear) ].pseq - end - - function get_num_households()::Integer - return size( MODEL_HOUSEHOLDS.hhlds )[1] - end - - function get_num_people()::Integer - return MODEL_HOUSEHOLDS.dimensions[2] - end - -end # module diff --git a/src/notused/HealthRegressions.jl b/src/notused/HealthRegressions.jl deleted file mode 100644 index 1fa09473..00000000 --- a/src/notused/HealthRegressions.jl +++ /dev/null @@ -1,252 +0,0 @@ -module HealthRegressions - -using Base.Threads - -using ArgCheck -using DataFrames -using StatsBase -using Observables -using ScottishTaxBenefitModel -using .Definitions -using .FRSHouseholdGetter: get_regression_dataset, get_slot_for_person -using .GeneralTaxComponents:WEEKS_PER_MONTH -using .ModelHousehold -using .Monitor: Progress -using .Results -using .RunSettings -using .Utils: make_start_stops - -export get_death_prob, - get_sf6d, - summarise_sf12, - # create_health_indicator, - do_health_regressions!, - rm2 # fixme move this - -const SFD12_REGRESSION = DataFrame([ - "q1mlog" -.0669224 .0129316 -5.18 0.000 -.0922679 -.0415769; - "q2mlog" -.065569 .0104701 -6.26 0.000 -.0860902 -.0450479; - "q3mlog" -.0412175 .0083636 -4.93 0.000 -.0576098 -.0248251; - "q4mlog" -.020758 .0068287 -3.04 0.002 -.034142 -.0073741; - "mlogbhc" .168725 .0648695 2.60 0.009 .0415827 .2958673; -# "q1dlog" -.0625272 .1071465 -0.58 0.560 -.2725313 .1474769; -# "q2dlog" -.5528586 .1677468 -3.30 0.001 -.8816375 -.2240796; -# "q3dlog" -.8932082 .1628441 -5.49 0.000 -1.212378 -.5740385; -# "q4dlog" -.6595537 .1581312 -4.17 0.000 -.9694864 -.349621; -# "dlogbhc" .0676555 .0926011 0.73 0.465 -.11384 .2491511; -# | -# sf12mcs_dv | -# "L1." .5262758 .0015297 344.04 0.000 .5232776 .5292739; -# | - "female" -.799361 .0301345 -26.53 0.000 -.8584237 -.7402983; - "race_ms" .8272873 .2600764 3.18 0.001 .317545 1.33703; - "race_mx" -.4235778 .117867 -3.59 0.000 -.6545937 -.1925619; - "race_as" -.2153134 .0672696 -3.20 0.001 -.3471599 -.0834669; - "race_bl" .8555785 .0941759 9.08 0.000 .6709964 1.040161; - "race_ot" -.0818178 .1989814 -0.41 0.681 -.4718156 .30818; - "born_m" -.4412494 .1317057 -3.35 0.001 -.6993888 -.1831099; - "born_uk" -.2516825 .0552214 -4.56 0.000 -.3599147 -.1434502; - "llsid" -1.948192 .0330161 -59.01 0.000 -2.012903 -1.883482; - "marciv" .4932766 .04302 11.47 0.000 .4089587 .5775946; - "divsep" -.0224856 .0580789 -0.39 0.699 -.1363186 .0913475; - "widow" .4939777 .0767766 6.43 0.000 .3434978 .6444576; - "age2534" -.9669299 .0686889 -14.08 0.000 -1.101558 -.8323016; - "age3544" -.662258 .0701197 -9.44 0.000 -.7996906 -.5248254; - "age4554" -.2041259 .0709515 -2.88 0.004 -.3431888 -.0650631; - "age5565" .616918 .0749932 8.23 0.000 .4699335 .7639024; - "age6574" 1.180853 .0934391 12.64 0.000 .9977149 1.363991; - "age75" 1.327786 .1054333 12.59 0.000 1.12114 1.534432; - "hq_deg" -.1245185 .0595372 -2.09 0.036 -.2412097 -.0078273; - "hq_ohe" .0742798 .0635622 1.17 0.243 -.0503004 .19886; - "hq_al" .1178647 .0589437 2.00 0.046 .0023367 .2333927; - "hq_gcse" .2075504 .0579199 3.58 0.000 .094029 .3210718; - "hq_oth" .1935175 .0661833 2.92 0.003 .0638001 .3232349; - "ec_emp" 2.582286 .0618956 41.72 0.000 2.460973 2.7036; - "ec_se" 2.979511 .0795412 37.46 0.000 2.823612 3.13541; - "ec_fam" 2.179118 .0870071 25.05 0.000 2.008587 2.34965; - "ec_un" .5503545 .0904333 6.09 0.000 .3731078 .7276012; - "ec_ret" 3.087541 .0829093 37.24 0.000 2.925041 3.250041; - "rural" .2987942 .0350712 8.52 0.000 .2300557 .3675328; - "gor_nw" .1452431 .0864176 1.68 0.093 -.024133 .3146192; - "gor_yh" .1790899 .0895601 2.00 0.046 .0035547 .3546252; - "gor_em" .2360591 .090613 2.61 0.009 .0584603 .4136579; - "gor_wm" -.0172429 .0898078 -0.19 0.848 -.1932635 .1587778; - "gor_ee" .2282277 .0885268 2.58 0.010 .0547177 .4017376; - "gor_lo" .0892817 .0908445 0.98 0.326 -.088771 .2673344; - "gor_se" .1642839 .0845966 1.94 0.052 -.0015231 .330091; - "gor_sw" .1898351 .0885738 2.14 0.032 .0162329 .3634373; - "gor_wa" -.071062 .0921799 -0.77 0.441 -.251732 .1096079; - "gor_sc" .2934197 .0880426 3.33 0.001 .1208587 .4659807; - "gor_ni" .5129039 .0951431 5.39 0.000 .3264262 .6993816; - "ten_own" .3748749 .0493456 7.60 0.000 .278159 .4715909; - "ten_sr" -.2750053 .0597592 -4.60 0.000 -.3921317 -.1578789; - "cons" 20.01 .5385379 37.16 0.000 18.95448 21.06552 ], - ["var", "coef", "stderr", "t", "p", "conflow", "confhigh"] ) - -# just the sf12 coefficients, as a df row -const SFD12_REGRESSION_TR = unstack(SFD12_REGRESSION[!,[:var,:coef]],:var,:coef)[1,:] - -const SFD6_REGRESSION = DataFrame([ - "q1mlog" -.0004121 .0001645 -2.51 0.012 -.0007345 -.0000898; - "q2mlog" -.0007313 .0001332 -5.49 0.000 -.0009923 -.0004702; - "q3mlog" -.000679 .0001064 -6.38 0.000 -.0008875 -.0004705; - "q4mlog" -.0003551 .0000869 -4.09 0.000 -.0005254 -.0001849; - "mlogbhc" .0051953 .0008252 6.30 0.000 .003578 .0068126; -# "q1dlog" .0005124 .0013629 0.38 0.707 -.0021588 .0031836; -# "q2dlog" -.0034277 .0021336 -1.61 0.108 -.0076094 .000754; -# "q3dlog" -.0081074 .0020713 -3.91 0.000 -.0121671 -.0040477; -# "q4dlog" -.0096112 .0020113 -4.78 0.000 -.0135533 -.0056691; -# "dlogbhc" .0001985 .0011779 0.17 0.866 -.00211 .0025071; - "female" -.010241 .0003834 -26.71 0.000 -.0109924 -.0094895; - "race_ms" .0073831 .003308 2.23 0.026 .0008996 .0138665; - "race_mx" -.0051334 .0014992 -3.42 0.001 -.0080719 -.002195; - "race_as" -.0113549 .0008564 -13.26 0.000 -.0130334 -.0096764; - "race_bl" .0072463 .0011977 6.05 0.000 .0048989 .0095936; - "race_ot" -.009669 .002531 -3.82 0.000 -.0146297 -.0047082; - "born_m" .0009416 .0016751 0.56 0.574 -.0023415 .0042247; - "born_uk" .0018567 .0007023 2.64 0.008 .0004802 .0032331; - "llsid" -.0506025 .0004371 -115.78 0.000 -.0514592 -.0497459; - "marciv" .0044845 .000547 8.20 0.000 .0034124 .0055567; - "divsep" -.0023306 .0007388 -3.15 0.002 -.0037786 -.0008826; - "widow" -.0005617 .0009765 -0.58 0.565 -.0024756 .0013523; - "age2534" -.0152145 .0008739 -17.41 0.000 -.0169274 -.0135017; - "age3544" -.0145724 .0008926 -16.33 0.000 -.0163218 -.012823; - "age4554" -.0137875 .0009037 -15.26 0.000 -.0155588 -.0120163; - "age5565" -.0091562 .0009548 -9.59 0.000 -.0110275 -.0072849; - "age6574" -.0056498 .001188 -4.76 0.000 -.0079782 -.0033215; - "age75" -.0170103 .0013407 -12.69 0.000 -.0196381 -.0143826; - "hq_deg" .0071459 .0007576 9.43 0.000 .005661 .0086308; - "hq_ohe" .0065171 .0008087 8.06 0.000 .0049321 .0081022; - "hq_al" .0070895 .00075 9.45 0.000 .0056195 .0085596; - "hq_gcse" .0071972 .0007371 9.76 0.000 .0057526 .0086418; - "hq_oth" .0045121 .0008419 5.36 0.000 .002862 .0061622; - "ec_emp" .0398993 .0007906 50.47 0.000 .0383498 .0414489; - "ec_se" .0436185 .001014 43.02 0.000 .0416312 .0456059; - "ec_fam" .0326606 .0011079 29.48 0.000 .0304891 .034832; - "ec_un" .0161176 .0011512 14.00 0.000 .0138613 .0183739; - "ec_ret" .0375567 .0010544 35.62 0.000 .0354901 .0396233; - "rural" .003955 .0004461 8.87 0.000 .0030807 .0048293; - "gor_nw" -.0001711 .0010992 -0.16 0.876 -.0023255 .0019833; - "gor_yh" .0007933 .0011391 0.70 0.486 -.0014394 .003026; - "gor_em" .0017145 .0011525 1.49 0.137 -.0005444 .0039734; - "gor_wm" -.0016317 .0011423 -1.43 0.153 -.0038706 .0006072; - "gor_ee" .00248 .001126 2.20 0.028 .0002731 .0046869; - "gor_lo" .0011987 .0011555 1.04 0.300 -.001066 .0034633; - "gor_se" .002256 .001076 2.10 0.036 .000147 .0043649; - "gor_sw" .0024576 .0011266 2.18 0.029 .0002495 .0046657; - "gor_wa" -.0030937 .0011725 -2.64 0.008 -.0053917 -.0007956; - "gor_sc" .003073 .0011198 2.74 0.006 .0008782 .0052679; - "gor_ni" .0001698 .0012101 0.14 0.888 -.002202 .0025415; - "ten_own" .0072883 .0006278 11.61 0.000 .0060578 .0085189; - "ten_sr" -.00562 .0007603 -7.39 0.000 -.0071102 -.0041298; - "cons" .3024495 .0068728 44.01 0.000 .2889791 .31592 -], ["var", "coef", "stderr", "t", "p", "conflow", "confhigh"] ) - -# just the sf6 coefficients, as a df row -const SFD6_REGRESSION_TR = unstack(SFD6_REGRESSION[!,[:var,:coef]],:var,:coef)[1,:] - -""" -cross-mult a row in a dataframe using just the col names in common -""" -function rmul( d1 :: DataFrameRow, d2::DataFrameRow)::Number - nc = Symbol.(intersect( names(d1), names(d2))) - v1 = Vector(d1[nc]) - v2 = Vector(d2[nc]) - return v1'*v2 -end - -""" -As above but with a pre-computed set of names in common and a pre-computed coefficient vector -""" -function rm2( - names :: Vector{Symbol}, - d1 :: DataFrameRow, - v2 ::Vector{Float64}; lagvalue=0.0 )::Float64 - v1 = Vector(d1[names]) - # println( [names v1]) - v1'*v2/(1-lagvalue) -end - - -""" -h - the dataframe made by create_health_indicator -return histogram, count of below settings.sf12_depression_limit, thresholds -for 0.025% increments -""" -function summarise_sf12( h :: DataFrame, settings :: Settings ) :: NamedTuple - w = weights(h[!,:weight]) - sf = h[!,:sf12] - range = 0.025:0.025:1 - average,sdev = StatsBase.mean_and_std( sf, w ) - med = StatsBase.median( sf, w ) - thresholds = quantile( sf , w, range ) - hist = fit(Histogram, sf, w, 0:2:100 ) - popn = sum( h[ !, :weight ]) - depressed = sum( h[h.sf12 .<= settings.sf12_depression_limit, :weight ]) - depressed_pct = 100*depressed/popn - (; depressed, depressed_pct, hist, thresholds, range, average, med, sdev, popn ) -end - -""" -Calculate sf6 & sf12 health measures, return a summary of sf12, and insert health stuff into indiv records. -Results need to have been passed through summarise_frames. -""" -function do_health_regressions!( results :: NamedTuple, settings :: Settings ) :: Array{NamedTuple} - # @assert something results - - uk_data = get_regression_dataset() # alias - uk_data_ads = copy(uk_data[(uk_data.from_child_record .== 0).&(uk_data.gor_ni.==0),:]) - summaries = [] - # - # extract variable names and regression coefficients as vectors - - # this speeds things up. - # - nc12 = Symbol.(intersect( names(uk_data), names(SFD12_REGRESSION_TR))) - coefs12 = Vector{Float64}( SFD12_REGRESSION_TR[nc12] ) - nc6 = Symbol.(intersect( names(uk_data), names(SFD6_REGRESSION_TR))) - coefs6 = Vector{Float64}( SFD6_REGRESSION_TR[nc6] ) - nsys = size( results.indiv )[1] - for sysno in 1:nsys - # - # create a dataset from the main dataset but with the results for 1 run appended. - # This merges in averaged household income, deciles, poverty line, etc., - # from one system run. - # - data_ads = innerjoin( - uk_data_ads, - results.indiv[sysno], on=[:data_year, :hid ], makeunique=true ) - data_ads.mlogbhc = log.(max.(1,WEEKS_PER_MONTH.*data_ads.eq_bhc_net_income )) - data_ads.quintile = ((data_ads.decile .+1) .÷ 2) - data_ads.q1mlog = (data_ads.quintile .== 1) .* data_ads.mlogbhc - data_ads.q2mlog = (data_ads.quintile .== 2) .* data_ads.mlogbhc - data_ads.q3mlog = (data_ads.quintile .== 3) .* data_ads.mlogbhc - data_ads.q4mlog = (data_ads.quintile .== 4) .* data_ads.mlogbhc - data_ads.q5mlog = (data_ads.quintile .== 5) .* data_ads.mlogbhc - k = 0 - for h in eachrow(data_ads) - k += 1 - pslot = get_slot_for_person( BigInt(h.pid), h.data_year ) - sf12 = rm2( nc12, h, coefs12; lagvalue = 0.526275 ) - results.indiv[sysno][pslot,:sf12] = sf12 - sf6 = rm2( nc6, h, coefs6; lagvalue = 0.5337817 ) - results.indiv[sysno][pslot,:sf6] = sf6 - results.indiv[sysno][pslot,:has_mental_health_problem] = - sf12 <= settings.sf12_depression_limit - results.indiv[sysno][pslot,:qualys] = -1 - results.indiv[sysno][pslot,:life_expectancy] = -1 - end - summary = summarise_sf12( results.indiv[sysno][results.indiv[sysno].sf12 .> 0,:], settings ) - - push!( summaries, summary ) - end - return summaries -end - -function get_death_prob( - ; - hh :: Household, - hres :: HouseholdResult ) :: Dict{BigInt,Number} - -end - -end # module \ No newline at end of file diff --git a/src/notused/HouseholdFromFrame.jl b/src/notused/HouseholdFromFrame.jl deleted file mode 100644 index e3b1b525..00000000 --- a/src/notused/HouseholdFromFrame.jl +++ /dev/null @@ -1,730 +0,0 @@ -module HouseholdFromFrame - -# -# This module maps from flat-ish DataFrames containing FRS/SHS/Example data to -# our Household/Person structures from ModelHouseholds.jl. It also does some incidental calculations - -# equivalence scales and ratios of recorded benefits to standard entitlements. -# - -using DataFrames -using StatsBase -using CSV -# using CSVFiles - -using ScottishTaxBenefitModel -using .Definitions -using .ModelHousehold -using .TimeSeriesUtils -using .HistoricBenefits: - make_benefit_ratios!, - switch_dla_to_pip! -using .EquivalenceScales: EQScales -using .Utils: not_zero_or_missing -using .Randoms: strtobi -using .RunSettings -using .Pensions: impute_employer_pension! - -export - create_regression_dataframe, - load_hhld_from_frame, - map_hhld, - read_hh, - read_pers - -const ZERO_EQ_SCALE = EQScales(0.0,0.0,0.0,0.0,0.0,0.0,0.0) - - -function read_hh( filename :: String ) :: DataFrame - println( "read_hh; opening $filename") - hh = CSV.File( filename; delim='\t') |> DataFrame - hh.hid = BigInt.(hh.hid) - hh.uhid = BigInt.(hh.uhid) - hh.tenure = eval.( Symbol.( hh.tenure )) - hh.region = eval.(Symbol.( hh.region)) - hh.ct_band = eval.(Symbol.( hh.ct_band)) - hh.dwelling = eval.(Symbol.( hh.dwelling )) - hh.council = Symbol.(hh.council ) - hh.rent_includes_water_and_sewerage = Bool.(hh.rent_includes_water_and_sewerage) - hh.nhs_board = Symbol.(hh.nhs_board ) - hh.onerand = strtobi.(hh.onerand) - return hh -end - -function read_pers( filename :: String ) :: DataFrame - println( "read_pers; opening $filename") - pers = CSV.File( filename; delim='\t' ) |> DataFrame - pers.pid = BigInt.(pers.pid) - # pno -# pers.is_hrp -# pers.is_bu_head - # pers.from_child_record = Bool.( pers.from_child_record ) - # default_benefit_unit - # age - pers.sex = eval.( Symbol.( pers.sex )) - pers.ethnic_group = eval.( Symbol.( pers.ethnic_group )) - pers.marital_status = eval.( Symbol.( pers.marital_status )) - pers.highest_qualification = eval.( Symbol.( pers.highest_qualification )) - pers.sic = eval.( Symbol.( pers.sic )) - pers.occupational_classification = eval.( Symbol.( pers.occupational_classification )) - pers.public_or_private = eval.( Symbol.( pers.public_or_private )) - pers.principal_employment_type = eval.( Symbol.( pers.principal_employment_type )) - pers.socio_economic_grouping = eval.( Symbol.( pers.socio_economic_grouping )) -# pers.age_completed_full_time_education -# pers.years_in_full_time_work - pers.employment_status = eval.( Symbol.( pers.employment_status )) -# pers.usual_hours_worked -# pers.actual_hours_worked -# pers.age_started_first_job - pers.type_of_bereavement_allowance = eval.( Symbol.( pers.type_of_bereavement_allowance )) - pers.had_children_when_bereaved = safe_to_bool.( pers.had_children_when_bereaved ) -# pers.pay_includes_ssp -# pers.pay_includes_smp -# pers.pay_includes_spp -# pers.pay_includes_sap -# pers.pay_includes_mileage -# pers.pay_includes_motoring_expenses -# pers.income_wages -# pers.income_self_employment_income -# pers.income_self_employment_expenses -# pers.income_self_employment_losses -# pers.income_odd_jobs -# pers.income_private_pensions -# pers.income_national_savings -# pers.income_bank_interest -# pers.income_stocks_shares -# pers.income_individual_savings_account -# pers.income_property -# pers.income_royalties -# pers.income_bonds_and_gilts -# pers.income_other_investment_income -# pers.income_other_income -# pers.income_alimony_and_child_support_received -# pers.income_health_insurance -# pers.income_alimony_and_child_support_paid -# pers.income_care_insurance -# pers.income_trade_unions_etc -# pers.income_friendly_societies -# pers.income_work_expenses -# pers.income_avcs -# pers.income_other_deductions -# pers.income_loan_repayments -# pers.income_student_loan_repayments -# pers.income_pension_contributions_employer -# pers.income_pension_contributions_employee -# pers.income_education_allowances -# pers.income_foster_care_payments -# pers.income_student_grants -# pers.income_student_loans -# pers.income_income_tax -# pers.income_national_insurance -# pers.income_local_taxes -# pers.income_free_school_meals -# pers.income_dlaself_care -# pers.income_dlamobility -# pers.income_child_benefit -# pers.income_pension_credit -# pers.income_state_pension -# pers.income_bereavement_allowance_or_widowed_parents_allowance_or_bereavement -# pers.income_armed_forces_compensation_scheme -# pers.income_war_widows_or_widowers_pension -# pers.income_severe_disability_allowance -# pers.income_attendance_allowance -# pers.income_carers_allowance -# pers.income_jobseekers_allowance -# pers.income_industrial_injury_disablement_benefit -# pers.income_employment_and_support_allowance -# pers.income_incapacity_benefit -# pers.income_income_support -# pers.income_maternity_allowance -# pers.income_maternity_grant_from_social_fund -# pers.income_funeral_grant_from_social_fund -# pers.income_any_other_ni_or_state_benefit -# pers.income_trade_union_sick_or_strike_pay -# pers.income_friendly_society_benefits -# pers.income_private_sickness_scheme_benefits -# pers.income_accident_insurance_scheme_benefits -# pers.income_hospital_savings_scheme_benefits -# pers.income_government_training_allowances -# pers.income_guardians_allowance -# pers.income_widows_payment -# pers.income_unemployment_or_redundancy_insurance -# pers.income_winter_fuel_payments -# pers.income_child_winter_heating_assistance_payment -# pers.income_dwp_third_party_payments_is_or_pc -# pers.income_dwp_third_party_payments_jsa_or_esa -# pers.income_social_fund_loan_repayment_from_is_or_pc -# pers.income_social_fund_loan_repayment_from_jsa_or_esa -# pers.income_extended_hb -# pers.income_permanent_health_insurance -# pers.income_any_other_sickness_insurance -# pers.income_critical_illness_cover -# pers.income_working_tax_credit -# pers.income_child_tax_credit -# pers.income_working_tax_credit_lump_sum -# pers.income_child_tax_credit_lump_sum -# pers.income_housing_benefit -# pers.income_universal_credit -# pers.income_personal_independence_payment_daily_living -# pers.income_personal_independence_payment_mobility -# pers.income_a_loan_from_the_dwp_and_dfc -# pers.income_a_loan_or_grant_from_local_authority -# pers.income_social_fund_loan_uc -# pers.income_other_benefits -# pers.income_scottish_child_payment -# pers.income_job_start_payment -# pers.income_troubles_permanent_disablement -# pers.income_child_disability_payment_care -# pers.income_child_disability_payment_mobility -# pers.income_pupil_development_grant -# pers.wages_frs -# pers.self_emp_frs -# pers.wages_hbai -# pers.self_emp_hbai - pers.jsa_type = eval.( Symbol.( pers.jsa_type )) - pers.esa_type = eval.( Symbol.( pers.esa_type )) - # @show pers.dlaself_care_type - pers.dlaself_care_type = eval.( Symbol.( pers.dlaself_care_type )) - # @show pers.dlaself_care_type - pers.dlamobility_type = eval.( Symbol.( pers.dlamobility_type )) - pers.attendance_allowance_type = eval.( Symbol.( pers.attendance_allowance_type )) - pers.personal_independence_payment_daily_living_type = eval.( Symbol.( pers.personal_independence_payment_daily_living_type )) - pers.personal_independence_payment_mobility_type = eval.( Symbol.( pers.personal_independence_payment_mobility_type )) -# pers.over_20_k_saving -# println("#1") -# pers.asset_current_account -# pers.asset_nsb_ordinary_account -# pers.asset_nsb_investment_account -# pers.asset_not_used -# pers.asset_savings_investments_etc -# pers.asset_government_gilt_edged_stock -# pers.asset_unit_or_investment_trusts -# pers.asset_stocks_shares_bonds_etc -# pers.asset_pep -# pers.asset_national_savings_capital_bonds -# pers.asset_index_linked_national_savings_certificates -# pers.asset_fixed_interest_national_savings_certificates -# pers.asset_pensioners_guaranteed_bonds -# pers.asset_saye -# pers.asset_premium_bonds -# pers.asset_national_savings_income_bonds -# pers.asset_national_savings_deposit_bonds -# pers.asset_first_option_bonds -# pers.asset_yearly_plan -# pers.asset_isa -# pers.asset_fixd_rate_svngs_bonds_or_grntd_incm_bonds_or_grntd_growth_bonds -# pers.asset_geb -# pers.asset_basic_account -# pers.asset_credit_unions -# pers.asset_endowment_policy_not_linked -# pers.asset_informal_assets -# pers.asset_post_office_card_account -# pers.asset_friendly_society_investment -# println("#2") - # contracted_out_of_serps -# pers.registered_blind -# pers.registered_partially_sighted -# pers.registered_deaf -# pers.disability_vision -# pers.disability_hearing -# pers.disability_mobility -# pers.disability_dexterity -# pers.disability_learning -# pers.disability_memory -# pers.disability_mental_health -# pers.disability_stamina -# pers.disability_socially -# pers.disability_other_difficulty - pers.health_status = eval.( Symbol.( pers.health_status )) -# pers.has_long_standing_illness - pers.adls_are_reduced = eval.( Symbol.( pers.adls_are_reduced )) - pers.how_long_adls_reduced = eval.( Symbol.( pers.how_long_adls_reduced )) -# pers.is_informal_carer -# pers.receives_informal_care_from_non_householder -# pers.hours_of_care_received -# pers.hours_of_care_given -# pers.hours_of_childcare -# pers.cost_of_childcare - pers.childcare_type = eval.( Symbol.( pers.childcare_type )) -# pers.employer_provides_child_care -# pers.work_expenses -# pers.travel_to_work -# pers.debt_repayments -# pers.wealth_and_assets -# pers.totsav - pers.company_car_fuel_type = eval.( Symbol.( pers.company_car_fuel_type )) -# pers.company_car_value -# pers.company_car_contribution -# pers.fuel_supplied - pers.relationship_to_hoh = eval.( Symbol.( pers.relationship_to_hoh )) - pers.relationship_1 = eval.( Symbol.( pers.relationship_1 )) - pers.relationship_2 = eval.( Symbol.( pers.relationship_2 )) - pers.relationship_3 = eval.( Symbol.( pers.relationship_3 )) - pers.relationship_4 = eval.( Symbol.( pers.relationship_4 )) - pers.relationship_5 = eval.( Symbol.( pers.relationship_5 )) - pers.relationship_6 = eval.( Symbol.( pers.relationship_6 )) - pers.relationship_7 = eval.( Symbol.( pers.relationship_7 )) - pers.relationship_8 = eval.( Symbol.( pers.relationship_8 )) - pers.relationship_9 = eval.( Symbol.( pers.relationship_9 )) - pers.relationship_10 = eval.( Symbol.( pers.relationship_10 )) - pers.relationship_11 = eval.( Symbol.( pers.relationship_11 )) - pers.relationship_12 = eval.( Symbol.( pers.relationship_12 )) - pers.relationship_13 = eval.( Symbol.( pers.relationship_13 )) - pers.relationship_14 = eval.( Symbol.( pers.relationship_14 )) - pers.relationship_15 = eval.( Symbol.( pers.relationship_15 )) - # println("#3") - pers.onerand = strtobi.(pers.onerand) - pers.uhid = BigInt.(pers.uhid) - # CSV.write( "data/actual_data/model_people_scotland-2015-2021-w-enums.tab", pers ) - return pers -end - -""" - Create the dataframe used in the regressions for (e.g) disability - by joining the household and person frames, and adding - some disability fields -""" -function create_regression_dataframe( - model_households :: DataFrame, - model_people :: DataFrame ) :: DataFrame - - fm = innerjoin( model_households, model_people, on=[:data_year, :hid ],makeunique=true ) - nrows,ncols = size( fm ) - fm.age_sq = fm.age.^2 - fm.cons = ones( nrows ) - fm.deaf_blind=fm.registered_blind .| fm.registered_deaf .| fm.registered_partially_sighted - fm.yr = fm.data_year .- 2014 - fm.any_dis = ( - fm.disability_vision .| - fm.disability_hearing .| - fm.disability_mobility .| - fm.disability_dexterity .| - fm.disability_learning .| - fm.disability_memory .| - fm.disability_other_difficulty .| - fm.disability_mental_health .| - fm.disability_stamina .| - fm.disability_socially ) - fm.adls_bad=fm.adls_are_reduced.==1 - fm.adls_mid=fm.adls_are_reduced.==2 - fm.rec_dla = ( fm.income_dlamobility.>0.0) .| ( fm.income_dlaself_care .>0.0 ) - fm.rec_dla_care = ( fm.income_dlaself_care .>0.0 ) - fm.rec_dla_mob = ( fm.income_dlamobility.>0.0 ) - fm.rec_pip = ( fm.income_personal_independence_payment_mobility.>0.0) .| ( fm.income_personal_independence_payment_daily_living .>0.0 ) - fm.rec_pip_care = ( fm.income_personal_independence_payment_daily_living .>0.0 ) - fm.rec_pip_mob = ( fm.income_personal_independence_payment_mobility.>0.0) - fm.rec_esa = ( fm.income_employment_and_support_allowance.>0.0) - fm.rec_aa = ( fm.income_attendance_allowance.>0.0) - fm.rec_carers = ( fm.income_carers_allowance.>0.0) - fm.rec_aa = ( fm.income_attendance_allowance.>0.0) - - fm.scotland = fm.region .== 299999999 - - ## these rather cryptic names below are to match Howard' Stata regressions. - ## FIXME make them all consistent - fm.mlogbhc = zeros(nrows) - fm.gor_nw = fm.region .== North_West - fm.gor_yh = fm.region .== Yorks_and_the_Humber - fm.gor_em = fm.region .== East_Midlands - fm.gor_wm = fm.region .== West_Midlands - fm.gor_ee = fm.region .== East_of_England - fm.gor_lo = fm.region .== London - fm.gor_se = fm.region .== South_East - fm.gor_sw = fm.region .== South_West - fm.gor_wa = fm.region .== Wales - fm.gor_sc = fm.region .== Scotland - fm.gor_ni = fm.region .== Northern_Ireland - fm.ten_own = in.( fm.tenure,( [Owned_outright, Mortgaged_Or_Shared], )) - fm.ten_sr = in.(fm.tenure, ( [Council_Rented, Housing_Association], ) ) - - fm.male = fm.sex .== 1 - fm.female = fm.sex .== 2 - - # eg = safe_assign.(fm.ethnic_group) - - fm.race_ms = fm.ethnic_group .== Missing_Ethnic_Group - fm.race_mx = fm.ethnic_group .== Mixed_or_Multiple_ethnic_groups - fm.race_as = fm.ethnic_group .== Asian_or_Asian_British - fm.race_bl = fm.ethnic_group .== Black_or_African_or_Caribbean_or_Black_British - fm.race_ot = fm.ethnic_group .== Other_ethnic_group - fm.born_m = zeros(nrows) - fm.born_uk = zeros(nrows) - fm.llsid = fm.has_long_standing_illness .| fm.adls_bad - # ms = safe_assign.(fm.marital_status) - fm.marciv = fm.marital_status .== Int(Married_or_Civil_Partnership) - fm.divsep = in.(fm.marital_status , ([Separated,Divorced_or_Civil_Partnership_dissolved],) ) - fm.widow = in.(fm.marital_status, ([Widowed],) ) - - fm.age2534 = in.(fm.age, [25:34] ) - fm.age3544 = in.(fm.age, [35:44] ) - fm.age4554 = in.(fm.age, [45:54] ) - - # FIXME check HR 5564 - fm.age5565 = in.(fm.age, [55:64] ) - fm.age6574 = in.(fm.age, [65:74] ) - fm.age75 = in.(fm.age,[75:200]) - hq = fm.highest_qualification - fm.hq_deg = highqual_degree_equiv.( hq ) - fm.hq_ohe = highqual_other_he.( hq ) - fm.hq_al = highqual_alevel_equiv.( hq ) - fm.hq_gcse = highqual_gcse_equiv.( hq ) - fm.hq_oth = highqual_other.( hq) - # es = safe_assign.( fm.employment_status ) - fm.ec_emp = in.(fm.employment_status, ([Full_time_Employee, Part_time_Employee], )) - fm.ec_se = in.(fm.employment_status, ([Full_time_Self_Employed,Part_time_Self_Employed],) ) - fm.ec_fam = in.(fm.employment_status, ([Looking_after_family_or_home],) ) - fm.ec_un = in.(fm.employment_status, ([Unemployed],) ) - fm.ec_ret = in.(fm.employment_status, ([Retired],)) - - fm.q1mlog = zeros(nrows) - fm.q2mlog = zeros(nrows) - fm.q3mlog = zeros(nrows) - fm.q4mlog = zeros(nrows) - fm.q5mlog = zeros(nrows) - fm.mlogbhc = zeros(nrows) - - fm.rural = zeros(nrows) # missing from frs public version - - ## region renames for my wealth and housing regressions - - ## this eats memory, obs, but still... - - fm.wales = fm.gor_wa - fm.london = fm.gor_lo - fm.north_west = fm.gor_nw - fm.yorkshire = fm.gor_yh - fm.east_midlands = fm.gor_em - fm.west_midlands = fm.gor_wm - fm.east_of_england = fm.gor_ee - fm.south_east = fm.gor_se - fm.south_west = fm.gor_sw - - fm.age_u_25 = in.(fm.age, [0:24] ) - fm.age_25_34 = fm.age2534 - fm.age_35_44 = fm.age3544 - fm.age_45_54 = fm.age4554 - fm.age_55_64 = fm.age5565 # check - fm.age_65_74 = fm.age6574 - fm.age_75_plus = fm.age75 - - fm.employee = in.(fm.employment_status, ([Full_time_Employee, Part_time_Employee],) ) - fm.selfemp = in.(fm.employment_status, ([Full_time_Self_Employed,Part_time_Self_Employed],) ) - fm.inactive = in.(fm.employment_status, ([Looking_after_family_or_home, Other_Inactive],) ) - fm.unemployed = in.(fm.employment_status, ([Unemployed],) ) - fm.student = in.(fm.employment_status, ([Student],) ) - fm.sick = in.(fm.employment_status, ([Permanently_sick_or_disabled, Temporarily_sick_or_injured],) ) - fm.retired = in.(fm.employment_status, ([Retired],)) - - fm.log_weekly_gross_income = log.( max.(0.0001, fm.original_gross_income)) - fm.weekly_gross_income = fm.original_gross_income - fm.detatched = in.( fm.dwelling, ([detatched],) ) - fm.semi = in.( fm.dwelling, ([semi_detached],) ) - fm.terraced = in.( fm.dwelling, ([terraced],)) - fm.purpose_build_flat = in.(fm.dwelling, ([flat_or_maisonette],)) - fm.converted_flat = in.(fm.dwelling, ([converted_flat],)) - - fm.managerial = in.(fm.socio_economic_grouping, - ( [Employers_in_large_organisations, - Higher_managerial_occupations, - Lower_managerial_occupations, - Higher_supervisory_occupations, - Higher_professional_occupations_New_self_employed, - Lower_supervisory_occupations],) ) - - fm.intermediate = in.(fm.socio_economic_grouping, - ([Lower_prof_and_higher_technical_Traditional_employee, - Lower_technical_craft, - Own_account_workers_non_professional],) ) - - fm.routine = in.(fm.socio_economic_grouping, - ([Lower_technical_craft, - Semi_routine_sales, - Routine_sales_and_service],) ) - - fm.num_people = zeros(Int,nrows) - fm.num_adults = zeros(Int,nrows) - fm.num_children = zeros(Int,nrows) - - hhlds = groupby( fm, [:hid,:data_year]) - for hhld in hhlds - hhld.num_children .= sum( hhld.from_child_record ) - hhld.num_people .= size( hhld )[1] - hhld.num_adults .= hhld.num_people - hhld.num_children - end - - fm.owner = in.( fm.tenure, ([Owned_outright],) ) - fm.mortgaged = in.( fm.tenure, ([Mortgaged_Or_Shared],) ) - fm.renter = in.( fm.tenure, ([Council_Rented, - Housing_Association, - Private_Rented_Unfurnished, - Private_Rented_Furnished], )) - - # fm.is_hrp = coalesce.(fm.is_hrp,0) - - ## wealth for head only - fm[ .!fm.is_hrp,[:net_housing_wealth,:net_pension_wealth,:net_financial_wealth,:net_physical_wealth]] .= 0.0 - - # - # added for legal aid, matching scjs - see scjs_mappings.jl, civil_problems-scjs.jl in regressions/ - # - fm.lives_in_flat = fm.purpose_build_flat .| fm.converted_flat - fm.non_white = fm.race_mx .| fm.race_as .| fm.race_bl .| fm.race_ot - fm.is_carer = fm.rec_carers .| fm.is_informal_carer - fm.single_parent = (fm.num_children .> 0) .& (fm.num_adults .== 1) # FIXME this is hhld level - fm.divorced_or_separated = in.( fm.marital_status, ([Separated, Divorced_or_Civil_Partnership_dissolved],) ) - fm.out_of_labour_market = fm.inactive .| fm.unemployed .| fm.student .| fm.retired - fm.is_limited = in.(fm.adls_are_reduced, ([reduced_a_lot, reduced_a_little],) ) .| (fm.has_long_standing_illness ) - fm.health_good_or_better = in.( fm.health_status, ([Very_Good, Good],) ) - fm.has_condition = coalesce.( fm.any_dis .| fm.adls_bad .| fm.adls_mid .| fm.is_limited, 0 ) - fm.agesq = fm.age .^2 - # - # 2nd - # - return fm -end - -function map_person( - hh :: Household, - model_person :: DataFrameRow, - settings :: Settings ) - income = Dict{Incomes_Type,Float64}() - - for i in instances(Incomes_Type) - ikey = make_sym_for_frame("income", i) - if model_person[ikey] != 0.0 - income[i] = model_person[ikey] - end - end - # - # override wages and se - # wage needs to be set - if settings.income_data_source == ds_frs - income[wages] = model_person.wages_frs - income[self_employment_income] = model_person.self_emp_frs - else # not really needed since hbai is the default - income[wages] = model_person.wages_hbai - income[self_employment_income] = model_person.self_emp_hbai - end - - # FIXME should be set - pay_includes = Included_In_Pay_Dict{Bool}() - for i in instances(Included_In_Pay_Type) - s = String(Symbol(i)) - ikey = Symbol(lowercase("pay_includes_" * s)) - if model_person[ikey] - pay_includes[i] = true # model_person[ikey] - end - end - - assets = Dict{Asset_Type,Float64}() # fixme asset_type_dict - for i in instances(Asset_Type) - if i != Missing_Asset_Type - ikey = make_sym_for_asset( i ) - # println(ikey) - if model_person[ikey] != 0 - assets[i] = model_person[ikey] - end - end - end - - # FIXME disabilties should be a set, not a map - disabilities = Dict{Disability_Type,Bool}() - for i in instances(Disability_Type) - ikey = make_sym_for_frame("disability", i) - if model_person[ikey] - disabilities[i] = true # model_person[ikey - end - end - - #= ??? not needed ??? - bereavement_type = missing - if not_zero_or_missing( model_person.income_bereavement_allowance_or_widowed_parents_allowance_or_bereavement ) || - not_zero_or_missing( model_person.income_widows_payment ) - if interview_date( hh ) < FY_2017 - bereavement_type = 2 # widowed parents allow - else - @assert ! ismissing( model_person.type_of_bereavement_allowance ) - bereavement_type = model_person.type_of_bereavement_allowance - # hack for 1 household: pid 120210849301 - if bereavement_type == -1 - bereavement_type = 1 - end - end - end - =# - - relationships = Relationship_Dict() - for i in 1:15 - relmod = Symbol( "relationship_$(i)") # :relationship_10 or :relationship_2 - irel = model_person[relmod] - if irel != Missing_Relationship - pid = get_pid( - settings.data_source, - model_person.data_year, - model_person.hid, - i ) - relationships[pid] = irel - end - end - - benefit_ratios = Incomes_Dict{Float64}() - - pers = Person{Float64}( - model_person.hid, - model_person.pid, - model_person.uhid, - model_person.pno, # Integer# person number in household - model_person.is_hrp, - model_person.default_benefit_unit, # Integer - model_person.is_bu_head, - model_person.from_child_record, - model_person.age, # Integer - model_person.sex, - model_person.ethnic_group, - model_person.marital_status, - model_person.highest_qualification, - model_person.sic, - model_person.occupational_classification, - model_person.public_or_private, - model_person.principal_employment_type, - model_person.socio_economic_grouping, - model_person.age_completed_full_time_education, - model_person.years_in_full_time_work, - model_person.employment_status, - model_person.actual_hours_worked, - model_person.usual_hours_worked, - model_person.age_started_first_job, - income, - benefit_ratios, - model_person.jsa_type, - model_person.esa_type, - model_person.dlaself_care_type, - model_person.dlamobility_type, - model_person.attendance_allowance_type, - model_person.personal_independence_payment_daily_living_type, - model_person.personal_independence_payment_mobility_type, - model_person.type_of_bereavement_allowance, - model_person.had_children_when_bereaved, - assets, - model_person.over_20_k_saving, - pay_includes, - model_person.registered_blind, - model_person.registered_partially_sighted, - model_person.registered_deaf, - disabilities, - model_person.health_status, - model_person.has_long_standing_illness, - model_person.adls_are_reduced, - model_person.how_long_adls_reduced, - relationships, - model_person.relationship_to_hoh, - model_person.is_informal_carer, - model_person.receives_informal_care_from_non_householder, - model_person.hours_of_care_received, - model_person.hours_of_care_given, - model_person.hours_of_childcare, - model_person.cost_of_childcare, - model_person.childcare_type, - model_person.employer_provides_child_care , - model_person.company_car_fuel_type, - model_person.company_car_value, - model_person.company_car_contribution, - model_person.fuel_supplied, - model_person.work_expenses , - model_person.travel_to_work , - model_person.debt_repayments , - model_person.wealth_and_assets , - model_person.totsav, # FIXME unedited FRS totsav field needs enum ??? - strtobi(model_person.onerand), - nothing # legal aid added as needed FIXME? maybe make this 'other data'?? - ) - # println( "model_person.pid=$(model_person.pid) model_person.dlaself_care_type $(model_person.dlaself_care_type) pers.dla_self_care_type $(pers.dla_self_care_type) ") - # FIXME we need a separate switch for make benefit ratios - if settings.benefit_generosity_estimates_available - make_benefit_ratios!( - pers, hh.interview_year, hh.interview_month ) - switch_dla_to_pip!( pers, hh.interview_year, hh.interview_month ) - end - if settings.impute_employer_pension - impute_employer_pension!( pers ) - end - return pers; -end - -function map_hhld( hno::Integer, frs_hh :: DataFrameRow, settings :: Settings ) - people = People_Dict{Float64}() - head_of_household = BigInt(-1) # this is set when we scan the actual people below - hh = Household{Float64}( - hno, - frs_hh.hid, - frs_hh.uhid, - frs_hh.data_year, - frs_hh.interview_year, - frs_hh.interview_month, - frs_hh.quarter, - frs_hh.tenure, - frs_hh.region, - frs_hh.ct_band, - frs_hh.dwelling , - frs_hh.council_tax, - frs_hh.water_and_sewerage , - frs_hh.mortgage_payment, - frs_hh.mortgage_interest, - frs_hh.years_outstanding_on_mortgage, - frs_hh.mortgage_outstanding, - frs_hh.year_house_bought, - frs_hh.gross_rent, - frs_hh.rent_includes_water_and_sewerage, - frs_hh.other_housing_charges, - frs_hh.gross_housing_costs, - # frs_hh.total_income, - frs_hh.total_wealth, - frs_hh.house_value, - frs_hh.weight, - frs_hh.council , - frs_hh.nhs_board , - frs_hh.bedrooms, - head_of_household, - frs_hh.net_physical_wealth, - frs_hh.net_financial_wealth, - frs_hh.net_housing_wealth, - frs_hh.net_pension_wealth, - frs_hh.original_gross_income, - # frs_hh.lcf_default_matched_case, - # frs_hh.lcf_default_data_year, - -1, # original_income_decile - -1, # equiv_original_income_decile - nothing, # Recorded expenditure; loaded afterwards as needed. - nothing, # Expenditure factor costs i.e. minus taxes. - nothing, # raw_wealth - people, - strtobi(frs_hh.onerand), - ZERO_EQ_SCALE ) - return hh -end - -function load_hhld_from_frame( - hseq :: Integer, - hhld_fr :: DataFrameRow, - pers_fr :: DataFrame, - settings :: Settings ) :: Household - hh = map_hhld( hseq, hhld_fr, settings ) - pers_fr_in_this_hh = pers_fr[((pers_fr.data_year .== hhld_fr.data_year).&(pers_fr.hid .== hh.hid)),:] - npers = size( pers_fr_in_this_hh )[1] - @assert npers in 1:19 - head_of_household = -1 - for p in 1:npers - pers = map_person( hh, pers_fr_in_this_hh[p,:], settings ) - hh.people[pers.pid] = pers - # println( "pers.pid=$(pers.pid) pers.relationship_to_hoh=$(pers.relationship_to_hoh)") - if pers.relationship_to_hoh == This_Person - hh.head_of_household = pers.pid - end - end - @assert hh.head_of_household > 0 "head for hid $(hh.hid) = $(hh.head_of_household); should be +ive" - # rewrite the eq scale once we know everything - make_eq_scales!( hh ) - # infer_wealth!( hh ) - @assert hh.head_of_household !== -1 - return hh -end - -end # module diff --git a/src/notused/HouseholdMappingFRS_Only.jl b/src/notused/HouseholdMappingFRS_Only.jl deleted file mode 100644 index 69142727..00000000 --- a/src/notused/HouseholdMappingFRS_Only.jl +++ /dev/null @@ -1,558 +0,0 @@ -# -# This contains most of the functions used to create our model dataset from raw FRS/SHS/HBAI data. -# This has all the HBAI references removed -# -# -using DataFrames -using CSV -using ArgCheck -using StatsBase -using ScottishTaxBenefitModel -using .Utils -using .Definitions -using .RunSettings -using .Randoms: mybigrandstr -using .GeneralTaxComponents: RateBands, WEEKS_PER_YEAR - -export CreateData - -include( "frs_hbai_creation_libs.jl") - -# -# BU head is hrp or 1st person interviewed in subsequent BUs -# see: -function is_bu_head( - frs_bu :: DataFrameRow, - bu_people :: DataFrame, - frs_person :: DataFrameRow ) :: Bool - if frs_person.benunit == 1 # head of hh is head of bu - return frs_person.hrpid == 1 - end - @assert frs_person.hrpid != 1 # 2nd&subsquent bu can't be hrp - sort!( bu_people, [:person]) # FIXME shouldn't be needed - return frs_person.person == bu_people[1,:person] # is this the 1st person in subsequent bus -end - - -function create_adults( - year::Integer, - frs_adults::DataFrame, - accounts::DataFrame, - benunit::DataFrame, - extchild::DataFrame, - maint::DataFrame, - penprov::DataFrame, - # admin::DataFrame, - care::DataFrame, - mortcont::DataFrame, - pension::DataFrame, - govpay::DataFrame, - mortgage::DataFrame, - assets::DataFrame, - chldcare::DataFrame, - househol::DataFrame, - oddjob::DataFrame, - benefits::DataFrame, - endowmnt::DataFrame, - job::DataFrame, - frsx :: DataFrame )::DataFrame - - num_adults = size(frs_adults)[1] - adult_model = initialise_person(num_adults) - adno = 0 - - for pn in 1:num_adults - if pn % 1000 == 0 - println("adults: on year $year, pno $pn") - end - - frs_person = frs_adults[pn, :] - frs_bu = benunit[ (frs_person.sernum .== benunit.sernum).&(frs_person.benunit .== benunit.benunit), : ][1,:] - # everyone in hh in same benefit unit as frs_person - bu_people = frs_adults[ (frs_person.sernum .== frs_adults.sernum).&(frs_person.benunit .== frs_adults.benunit), : ] # FIXME do this the other way around - sernum = frs_person.sernum - adno += 1 - model_adult = adult_model[adno, :] - model_adult.onerand = mybigrandstr() - ## also for children - model_adult = adult_model[adno, :] - model_adult.pno = frs_person.person - model_adult.hid = frs_person.sernum - model_adult.is_hrp = (frs_person.hrpid == 1) - model_adult.uhid = get_pid( FRSSource, year, frs_person.sernum, 0 ) # unique hhid needed for mostly.ai generator - model_adult.pid = get_pid( FRSSource, year, frs_person.sernum, frs_person.person ) - model_adult.from_child_record = false - model_adult.data_year = year - model_adult.default_benefit_unit = frs_person.benunit - model_adult.age = frs_person.age80 - model_adult.sex = Sex(safe_assign(frs_person.sex)) - model_adult.ethnic_group = Ethnic_Group(safe_assign(frs_person.ethgr3)) - - hdsp = is_bu_head( - frs_bu, - bu_people, - frs_person ) - model_adult.is_bu_head = hdsp # == true) - model_adult.jsa_type, model_adult.esa_type = make_jsa_type( - frsx, - frs_person.sernum, - frs_person.benunit, - hdsp ) - if model_adult.is_bu_head - # see the note on capital in `docs/legalaid` - and - # assign BU total to head of bu - # totsav3 us is the only measure in all of 2015-2021 FRSs - model_adult.wealth_and_assets = frs_bu.totcapb3 - # we'll also store the band - model_adult.totsav= frs_bu.totsav - end - a_job = job[((job.sernum.==frs_person.sernum).&(job.benunit.==frs_person.benunit).&(job.person.==frs_person.person)), :] - a_benunit = benunit[((frs_person.benunit .== benunit.benunit).&(frs_person.sernum.==benunit.sernum)),:] - a_benunit = a_benunit[1,:] - model_adult.over_20_k_saving = 0 - if hdsp - ts = safe_assign(a_benunit.totsav) - if ts >= 5 - model_adult.over_20_k_saving = 1 - end - end - # println( "model_adult.over_20_k_saving=$(model_adult.over_20_k_saving)") - - a_pension = pension[((pension.sernum.==frs_person.sernum).&(pension.benunit.==frs_person.benunit).&(pension.person.==frs_person.person)), :] - a_penprov = penprov[((penprov.sernum.==frs_person.sernum).&(penprov.benunit.==frs_person.benunit).&(penprov.person.==frs_person.person)), :] - an_asset = assets[((assets.sernum.==frs_person.sernum).&(assets.benunit.==frs_person.benunit).&(assets.person.==frs_person.person)), :] - an_account = accounts[((accounts.sernum.==frs_person.sernum).&(accounts.benunit.==frs_person.benunit).&(accounts.person.==frs_person.person)), :] - a_maint = maint[((maint.sernum.==frs_person.sernum).&(maint.benunit.==frs_person.benunit).&(maint.person.==frs_person.person)), :] - a_oddjob = oddjob[((oddjob.sernum.==frs_person.sernum).&(oddjob.benunit.==frs_person.benunit).&(oddjob.person.==frs_person.person)), :] - a_benefits = benefits[((benefits.sernum.==frs_person.sernum).&(benefits.benunit.==frs_person.benunit).&(benefits.person.==frs_person.person)), :] - npens = size(a_pension)[1] - nassets = size(an_asset)[1] - naaccounts = size(an_account)[1] - nojs = size(a_oddjob)[1] - - model_adult.marital_status = Marital_Status(safe_assign(frs_person.marital)) - model_adult.highest_qualification = Qualification_Type(safe_assign(frs_person.dvhiqual)) - model_adult.sic = SIC_2007(safe_assign(frs_person.sic)) - - model_adult.socio_economic_grouping = Socio_Economic_Group(safe_assign(Integer(trunc(frs_person.nssec)))) - model_adult.age_completed_full_time_education = safe_assign(frs_person.tea) - model_adult.years_in_full_time_work = safe_inc(0, frs_person.ftwk) - model_adult.employment_status = ILO_Employment(safe_assign(frs_person.empstati)) - model_adult.occupational_classification = Standard_Occupational_Classification(safe_assign(frs_person.soc2010)) - - process_job_rec!(model_adult, a_job) - # FIXME some duplication here - # - # new - assign a total earnings/se figure from both hbai and frs - # so we can compare the two. This is in reaction to the - # oddly low Gini/Palma when using HBAI/SPI'd earnings - # DELETED in this Non-HBAI version - model_adult.wages_hbai = -1 #missing #hbaidata.wages - model_adult.self_emp_hbai = -1# missing # hbaidata.selfemp - model_adult.wages_frs = safe_inc( 0.0, frs_person.inearns ) - model_adult.self_emp_frs = safe_inc( 0.0, frs_person.incseo2 ) - - penstuff = process_pensions(a_pension) - model_adult.income_private_pensions = penstuff.pension - model_adult.income_income_tax += penstuff.tax - - # FIXME CHECK THIS - adding PENCONT and also from work pension contributions - double counting? - (employee,employer) = process_penprovs(a_penprov) - - model_adult.income_pension_contributions_employee = safe_inc( employee, model_adult.income_pension_contributions_employee ) - model_adult.income_pension_contributions_employer = safe_inc( employer, model_adult.income_pension_contributions_employer ) - - map_investment_income!(model_adult, an_account) - model_adult.income_property = safe_inc(0.0, frs_person.royyr1) - if frs_person.rentprof == 2 # it's a loss - model_adult.income_property *= -1 # a loss - end - model_adult.income_royalties = safe_inc(0.0, frs_person.royyr2) - model_adult.income_other_income = safe_inc(0.0, frs_person.royyr3) # sleeping partners - model_adult.income_other_income = safe_inc( - model_adult.income_other_income, - frs_person.royyr4 - ) # overseas pensions - # payments from charities, bbysitting .. - # model_adult.income_other_income = safe_inc( model_adult.income_other_income, frs_person.[x] - model_adult.income_alimony_and_child_support_received, - model_adult.income_alimony_and_child_support_paid = map_alimony( - frs_person, - a_maint ) - - model_adult.income_odd_jobs = 0.0 - for o in 1:nojs - model_adult.income_odd_jobs = safe_inc( - model_adult.income_odd_jobs, - a_oddjob[o, :ojamt] - ) - end - model_adult.income_odd_jobs /= 4.0 # since it's monthly - - ## TODO babysitting,chartities (secure version only??) - ## TODO alimony and childcare PAID ?? // 2015/6 only - ## TODO allowances from absent spouses apamt apdamt - - ## TODO income_education_allowances - - model_adult.income_foster_care_payments = max(0.0,coalesce(frs_person.allpd3,0.0)) - - - ## TODO income_student_grants - ## TODO income_student_loans - ## TODO income_income_tax - ## TODO income_national_insurance - ## TODO income_local_taxes - - process_benefits!(model_adult, a_benefits) - process_assets!(model_adult, an_asset) - - ## also for child - model_adult.registered_blind = (frs_person.spcreg1 == 1) - model_adult.registered_partially_sighted = (frs_person.spcreg2 == 1) - model_adult.registered_deaf = (frs_person.spcreg3 == 1) - - model_adult.disability_vision = (frs_person.disd01 == 1) # cdisd kids .. - model_adult.disability_hearing = (frs_person.disd02 == 1) - model_adult.disability_mobility = (frs_person.disd03 == 1) - model_adult.disability_dexterity = (frs_person.disd04 == 1) - model_adult.disability_learning = (frs_person.disd05 == 1) - model_adult.disability_memory = (frs_person.disd06 == 1) - model_adult.disability_mental_health = (frs_person.disd07 == 1) - model_adult.disability_stamina = (frs_person.disd08 == 1) - model_adult.disability_socially = (frs_person.disd09 == 1) - model_adult.disability_other_difficulty = (frs_person.disd10 == 1) - - model_adult.has_long_standing_illness = (frs_person.health1 == 1) - model_adult.how_long_adls_reduced = Illness_Length(max(-1,frs_person.limitl)) # < 0 ? -1 : frs_person.limitl) - adlr = max(-1,frs_person.condit) - model_adult.adls_are_reduced = ADLS_Inhibited(adlr) # missings to 'not at all' - - model_adult.age_started_first_job = safe_assign( frs_person.jobbyr ) - # This IGNORES the WID field and should use financial year as changeover - # FIXME check this - if(model_adult.income_bereavement_allowance_or_widowed_parents_allowance_or_bereavement > 0)|| - (model_adult.income_widows_payment > 0) - if( year >= 2017 ) # || (year == 2017 && month > 3) - model_adult.type_of_bereavement_allowance = widowed_parents - else - model_adult.type_of_bereavement_allowance = bereavement_allowance - end - end - #= - BereavementType(safe_assign( frs_person.wid )) - end - =# - - model_adult.had_children_when_bereaved = safe_assign( frs_person.w2 ) == 1 - - # dindividual_savings_accountbility_other_difficulty = Vector{Union{Real,Missing}}(missing, n), - model_adult.health_status = Health_Status(safe_assign(frs_person.heathad)) - model_adult.hours_of_care_received = safe_inc(0.0, frs_person.hourcare) - model_adult.hours_of_care_given = infer_hours_of_care(frs_person.hourtot) # also kid - - model_adult.is_informal_carer = (frs_person.carefl == 1) # also kid - process_relationships!( model_adult, frs_person ) - # - # illness benefit levels - # See the note on this in docs/ - model_adult.dlaself_care_type = LowMiddleHigh(map123( model_adult.income_dlaself_care, [30, 60 ] )) - model_adult.dlamobility_type = LowMiddleHigh(map123(model_adult.income_dlamobility, [30] )) - model_adult.attendance_allowance_type = LowMiddleHigh(map123( model_adult.income_attendance_allowance, [65] )) - model_adult.personal_independence_payment_daily_living_type = PIPType(map12( model_adult.income_personal_independence_payment_daily_living, 65 )) - model_adult.personal_independence_payment_mobility_type = PIPType(map12( model_adult.income_personal_independence_payment_mobility, 30 )) - end # adult loop - println("final adno $adno") - return adult_model[1:adno, :] -end # proc create_adult - -# -# -function create_children( - year::Integer, - frs_children::DataFrame, - childcare::DataFrame, - benefits:: DataFrame )::DataFrame - num_children = size(frs_children)[1] - child_model = initialise_person(num_children) - ccount = 0 - for chno in 1:num_children - if chno % 1000 == 0 - println("on year $year, chno $chno") - end - frs_person = frs_children[chno, :] - a_childcare = childcare[((childcare.sernum.==frs_person.sernum).&(childcare.benunit.==frs_person.benunit).&(childcare.person.==frs_person.person)), :] - nchildcares = size(a_childcare)[1] - - sernum = frs_person.sernum - ccount += 1 - ## also for children - model_child = child_model[ccount, :] - - model_child.pno = frs_person.person - model_child.hid = frs_person.sernum - model_child.uhid = get_pid( FRSSource, year, frs_person.sernum, 0 ) # unique hhid needed for mostly.ai generator - - model_child.pid = get_pid(FRSSource, year, frs_person.sernum, frs_person.person) - model_child.from_child_record = true - - model_child.data_year = year - model_child.default_benefit_unit = frs_person.benunit - model_child.age = frs_person.age - model_child.sex = Sex(safe_assign(frs_person.sex)) - # model_child.ethnic_group = safe_assign(frs_person.ethgr3) - ## also for child - # println( "frs_person.chlimitl='$(frs_person.chlimitl)'") - model_child.has_long_standing_illness = (frs_person.chealth1 == 1) - model_child.how_long_adls_reduced = Illness_Length(frs_person.chlimitl < 0 ? -1 : frs_person.chlimitl) - model_child.adls_are_reduced = ADLS_Inhibited(frs_person.chcond < 0 ? -1 : frs_person.chcond) # missings to 'not at all' - model_child.over_20_k_saving = 0 - - model_child.registered_blind = (frs_person.spcreg1 == 1) - model_child.registered_partially_sighted = (frs_person.spcreg2 == 1) - model_child.registered_deaf = (frs_person.spcreg3 == 1 ) - - model_child.disability_vision = ( frs_person.cdisd01 == 1 ) # cdisd kids .. - model_child.disability_hearing = ( frs_person.cdisd02 == 1 ) - model_child.disability_mobility = ( frs_person.cdisd03 == 1 ) - model_child.disability_dexterity = ( frs_person.cdisd04 == 1 ) - model_child.disability_learning = ( frs_person.cdisd05 == 1 ) - model_child.disability_memory = ( frs_person.cdisd06 == 1 ) - model_child.disability_mental_health = ( frs_person.cdisd07 == 1 ) - model_child.disability_stamina = ( frs_person.cdisd08 == 1 ) - model_child.disability_socially = ( frs_person.cdisd09 == 1 ) - # dindividual_savings_accountbility_other_difficulty = Vector{Union{Real,Missing}}(missing, n), - model_child.health_status = Health_Status(safe_assign(frs_person.heathch)) - model_child.income_wages = safe_inc( 0.0, frs_person.chearns ) - model_child.income_other_investment_income = safe_inc( 0.0, frs_person.chsave ) - model_child.income_other_income = safe_inc( 0.0, frs_person.chrinc ) - model_child.income_free_school_meals = 0.0 - for t in [:fsbval,:fsfvval,:fsmlkval,:fsmval] - model_child.income_free_school_meals = safe_inc( model_child.income_free_school_meals, frs_person[t] ) - end - model_child.is_informal_carer = (frs_person.carefl == 1 ) # also kid - process_relationships!( model_child, frs_person ) - # TODO education grants, all the other good child stuff EMA - - model_child.cost_of_childcare = 0.0 - model_child.hours_of_childcare = 0.0 - for c in 1:nchildcares - if c == 1 # type of care from 1st instance - model_child.childcare_type = - Child_Care_Type(map_child_care( year, a_childcare[c, :chlook] )) - model_child.employer_provides_child_care = (a_childcare[c, :emplprov] == 2) - end - model_child.cost_of_childcare = safe_inc( - model_child.cost_of_childcare, - a_childcare[c, :chamt] - ) - model_child.hours_of_childcare = safe_inc( - model_child.hours_of_childcare, - a_childcare[c, :chhr] - ) - end # child care loop - model_child.onerand = mybigrandstr() - # - # - # this is zero length - # a_oddjob = oddjob[((oddjob.sernum.==frs_person.sernum).&(oddjob.benunit.==frs_person.benunit).&(oddjob.person.==frs_person.person)), :] - # this isn't - a_benefits = benefits[((benefits.sernum.==frs_person.sernum).&(benefits.benunit.==frs_person.benunit).&(benefits.person.==frs_person.person)), :] - sb = size( a_benefits )[1] - # println( "sb = $sb") - # @assert sb in [0,1] - process_benefits!( model_child, a_benefits ) - - end # chno loop - return child_model[1:ccount,:] # send them all back ... -end - -function create_household( - year::Integer, - frs_household::DataFrame, - renter::DataFrame, - mortgage::DataFrame, - mortcont::DataFrame, - owner::DataFrame, - frsx :: DataFrame )::DataFrame - - num_households = size(frs_household)[1] - hh_model = initialise_household(num_households) - hhno = 0 - for hn in 1:num_households - if hn % 1000 == 0 - println("on year $year, hid $hn") - end - hh = frs_household[hn, :] - frx = frsx[(frsx.sernum.==hh.sernum ), :] - - sernum = hh.sernum - hhno += 1 - dd = split(hh.intdate, "/") - hh_model[hhno, :interview_year] = parse(Int64, dd[3]) - interview_month = parse(Int8, dd[1]) - hh_model[hhno, :interview_month] = interview_month - hh_model[hhno, :quarter] = div(interview_month - 1, 3) + 1 - - hh_model[hhno, :hid] = sernum - hh_model[hhno, :uhid] = get_pid( FRSSource, year, sernum, 0 ) # unique hhid needed for mostly.ai generator - - hh_model[hhno, :data_year] = year - hh_model[hhno, :tenure] = Tenure_Type( max(-1,hh.tentyp2)) - hh_model[hhno, :dwelling] = DwellingType(max(-1,hh.typeacc)) - hh_model[hhno, :region] = Standard_Region(max(-1,hh.gvtregn)) - hh_model[hhno, :ct_band] = CT_Band(max(-1,hh.ctband)) - hh_model[hhno, :weight] = hh.gross4 - # hh_model[hhno, :tenure] = hh.tentyp2 > 0 ? Tenure_Type(hh.tentyp2) : - # Missing_Tenure_Type - # hh_model[hhno, :region] = hh.gvtregn > 0 ? Standard_Region(hh.gvtregn) : - # Missing_Standard_Region - # hh_model[hhno, :ct_band] = hh.ctband > 0 ? CT_Band(hh.ctband) : Missing_CT_Band - # - # council_tax::Real - # FIXME this is rounded to £ - if hh_model[hhno, :region] == 299999999 # Scotland # FIXME this is whole £s only - # also 16 missings in 2015 - investigate - hh_model[hhno, :water_and_sewerage] = safe_assign(hh.cwatamtd) - elseif hh_model[hhno, :region] == 399999999 # Nireland - hh_model[hhno, :water_and_sewerage] = 0.0 # FIXME NIreland in rates???? - else # - hh_model[hhno, :water_and_sewerage] = safe_assign(hh.watsewrt) - end - # FIXME this needs renamed: actually capital component - hh_model[hhno, :mortgage_payment] = mortage_capital_payments( frx ) - mit = safe_assign( hh.mortint ) - hh_model[hhno, :mortgage_interest] = max( 0.0, mit ) # > 0 ? mit : missing - - # TODO - # years_outstanding_on_mortgage::Integer - # mortgage_outstanding::Real - # year_house_bought::Integer - # FIXME rounded to £1 - hh_model[hhno, :gross_rent] = max(0.0, hh.hhrent) # rentg Gross rent including Housing Benefit or rent Net amount of last rent payment - - rents = renter[(renter.sernum.==sernum), :] - nrents = size(rents)[1] - hh_model[hhno, :rent_includes_water_and_sewerage] = false - for r in 1:nrents - if (rents[r, :wsinc] in [1, 2, 3]) - hh_model[hhno, :rent_includes_water_and_sewerage] = true - end - end - ohc = 0.0 - ohc = safe_inc(ohc, hh.chrgamt1) - ohc = safe_inc(ohc, hh.chrgamt2) - ohc = safe_inc(ohc, hh.chrgamt3) - ohc = safe_inc(ohc, hh.chrgamt4) - ohc = safe_inc(ohc, hh.chrgamt5) - ohc = safe_inc(ohc, hh.chrgamt6) - ohc = safe_inc(ohc, hh.chrgamt7) - ohc = safe_inc(ohc, hh.chrgamt8) - ohc = safe_inc(ohc, hh.chrgamt9) - hh_model[hhno, :other_housing_charges] = ohc - hh_model[hhno, :bedrooms] = hh.bedroom6 - hh_model[hhno, :onerand] = mybigrandstr() - hh_model[hhno, :original_gross_income] = hh.hhinc - # TODO - # gross_housing_costs::Real - # total_wealth::Real - # house_value::Real - # people::People_Dict - end - hh_model[1:hhno, :] -end - -""" -Override loadfrs for all the weird missings in frx1920 etc. -""" -function loadfrsx(which::AbstractString, year )::DataFrame - filename = "$(L_FRS_DIR)/$(year)/tab/$(which).tab" - df = CSV.File(filename, delim = '\t', missingstring=[""," ","-1"]) |> DataFrame # - lcnames = Symbol.(lowercase.(string.(names(df)))) - rename!(df, lcnames) - df.data_year .= year - df -end - -function create_data(;start_year::Int, end_year::Int) - for year in start_year:end_year - print("on year $year ") - y = year - 2000 - ystr = "$(y)$(y+1)" - # we only want this massive thing for a couple of - # benefit variables. - frsx = loadfrsx( "frs$ystr", year ) - accounts = loadfrs("accounts", year) - adult = loadfrs("adult", year) - # probably *is* sorted by this. - sort!( adult, [:sernum, :benunit, :person ]) - assets = loadfrs("assets", year) - benefits = loadfrs("benefits", year) - benunit = loadfrs("benunit", year) - care = loadfrs("care", year) - child = loadfrs("child", year) - chldcare = loadfrs("chldcare", year) - endowmnt = loadfrs("endowmnt", year) - extchild = loadfrs("extchild", year) - govpay = loadfrs("govpay", year) - househol = loadfrs("househol", year) - job = loadfrs("job", year) - maint = loadfrs("maint", year) - mortcont = loadfrs("mortcont", year) - mortgage = loadfrs("mortgage", year) - oddjob = loadfrs("oddjob", year) - owner = loadfrs("owner", year) - penprov = loadfrs("penprov", year) - pension = loadfrs("pension", year) - rentcont = loadfrs("rentcont", year) - renter = loadfrs("renter", year) - # - # 2021 renames ... these are all the same variables - # - renameif!( adult, ["nssec20"=> "nssec", "soc2020"=>"soc2010"]) # 2021 change; this seems to be the same variable - - model_children = create_children( - year, - child, - chldcare, - benefits ) - - model_people = create_adults( - year, - adult, - accounts, - benunit, - extchild, - maint, - penprov, - # admin, - care, - mortcont, - pension, - govpay, - mortgage, - assets, - chldcare, - househol, - oddjob, - benefits, - endowmnt, - job, - frsx ) - model_households = create_household( - year, - househol, - renter, - mortgage, - mortcont, - owner, - frsx ) - println( "on year $year") - println( "hhlds") - append = year > start_year - CSV.write("$(MODEL_DATA_DIR)/actual_data/model_households-$(start_year)-$(end_year)-w-enums-2.tab", model_households, delim = "\t", append=append) - CSV.write("$(MODEL_DATA_DIR)/actual_data/model_people-$(start_year)-$(end_year)-w-enums-2.tab", model_people, delim = "\t", append=append) - CSV.write("$(MODEL_DATA_DIR)/actual_data/model_people-$(start_year)-$(end_year)-w-enums-2.tab", model_children, delim = "\t", append=true) - - end -end \ No newline at end of file diff --git a/src/notused/MatchingLibs.jl b/src/notused/MatchingLibs.jl deleted file mode 100644 index 7b469c5c..00000000 --- a/src/notused/MatchingLibs.jl +++ /dev/null @@ -1,2967 +0,0 @@ - -module MatchingLibs - -# -# A script to match records from 2019/19 to 2020/21 lcf to 2020 FRS -# strategy is to match to a bunch of characteristics, take the top 20 of those, and then -# match between those 20 on household income. -# TODO -# - make this into a module and a bit more general-purpose; -# - write up, so why not just Engel curves? -# -using ScottishTaxBenefitModel -using .Definitions, - .ModelHousehold, - .FRSHouseholdGetter, - # FIXME cross dependency .ExampleHouseholdGetter, - .Uprating, - .RunSettings - -using CSV, - DataFrames, - Measures, - StatsBase, - ArgCheck, - PrettyTables - -export make_lcf_subset, - map_example, - load, - map_all_lcf_frs, - frs_lcf_match_row - -struct LCFLocation - case :: Int - datayear :: Int - score :: Float64 - income :: Float64 - incdiff :: Float64 -end - -s = instances( Socio_Economic_Group ) - -""" -Load 2020/21 FRS and add some matching fields -""" -function loadfrs()::Tuple - frsrows,frscols,frshh = load( "/mnt/data/frs/2021/tab/househol.tab",2021) - farows,facols,frsad = load( "/mnt/data/frs/2021/tab/adult.tab", 2021) - frs_hh_pp = innerjoin( frshh, frsad, on=[:sernum,:datayear], makeunique=true ) - add_some_frs_fields!( frshh, frs_hh_pp ) - return frshh,frspers,frs_hh_pp -end -# fcrows,fccols,frsch = load( "/mnt/data/frs/2021/tab/child.tab", 2021 ) - -""" -Scottish Version on Pooled data -""" -function load_scottish_frss( startyear::Int, endyear :: Int )::NamedTuple - frshh = DataFrame() - frs_hh_pp = DataFrame() - frspers = DataFrame() - for year in startyear:endyear - lhh = loadfrs( "househol", year ) - lhh = lhh[ lhh.gvtregn.== 299999999, :] # SCOTLAND - lhh.datayear .= year - lad = loadfrs( "adult", year ) - lad.datayear .= year - l_hh_pp = innerjoin( lhh, lad, on=[:sernum,:datayear], makeunique=true ) - add_some_frs_fields!( lhh, l_hh_pp ) - frshh = vcat( frshh, lhh; cols=:union ) - frspers = vcat( frspers, lad; cols=:union ) - frs_hh_pp = vcat( frs_hh_pp, l_hh_pp, cols=:union ) - end - (; frshh, frspers, frs_hh_pp ) -end - -""" -North_East = 1 -North_West = 2 -Yorks_and_the_Humber = 3 -East_Midlands = 4 -West_Midlands = 5 -East_of_England = 6 -London = 7 -South_East = 8 -South_West = 9 -Scotland = 11 -Wales = 10 -Northern_Ireland = 12 -""" -function frs_regionmap( gvtregn :: Union{Int,Missing}, default=9999 ) :: Vector{Int} - out = fill( default, 3 ) - # gvtregn = parse(Int, gvtregn ) - if ismissing( gvtregn ) - ; - elseif gvtregn == 112000007 # london - out[1] = 7 - out[2] = 1 - elseif gvtregn in 112000001:112000009 # rEngland - out[1] = gvtregn - 112000000 - out[2] = 2 - elseif gvtregn == 299999999 # scotland - out[1] = 11 - out[2] = 3 - elseif gvtregn == 399999999 # - out[1] = 10 - out[2] = 4 - elseif gvtregn == 499999999 # nire - out[1] = 12 - out[2] = 5 - else - @assert false "unmatched gvtregn $gvtregn"; - end - return out -end - -function model_regionmap( reg :: Standard_Region ) :: Vector{Int} - return frs_regionmap( Int( reg ), 9998 ) -end - -""" -Score for one of our 3-level matches 1 for exact 0.5 for partial 1, 0.1 for partial 2 -""" -function score( a3 :: Vector{Int}, b3 :: Vector{Int})::Float64 - return if a3[1] == b3[1] - 1.0 - elseif a3[2] == b3[2] - 0.5 - elseif a3[3] == b3[3] - 0.1 - else - 0.0 - end -end - -""" -Score for comparison between 2 ints: 1 for exact, 0.5 for within 2 steps, 0.1 for within 5. FIXME look at this again. -""" -function score( a :: Int, b :: Int ) :: Float64 - return if a == b - 1.0 - elseif abs( a - b ) < 2 - 0.5 - elseif abs( a - b ) < 5 - 0.1 - else - 0.0 - end -end - -function load( path::String, datayear :: Int )::Tuple - d = CSV.File( path ) |> DataFrame - ns = lowercase.(names( d )) - rename!( d, ns ) - d.datayear .= datayear - rows,cols = size(d) - return rows,cols,d -end - -export TOPCODE, within, load, uprate_incomes!, checkdiffs - -const NUM_SAMPLES = 20 - -function checkdiffs( title::String, col1::Vector, col2::Vector ) - n = size(col1)[1] - @assert n == size(col2)[1] - out = [] - for i in 1:n - d = col1[i] - col2[i] - if abs(d) > 0.00001 - push!( out, (i, d) ) - end - end - if size(out)[1] !== 0 - println("differences at positions $out") - end -end - -function searchbaddies(lcf::DataFrame, rows, amount::Real, op=≈) - nms = names(lcf) - nc = size(lcf)[2] - for i in 1:nc - for r in rows - if(typeof(lcf[r,i]) == Float64) && op(lcf[r,i], amount ) - println("row $r varname = $(n[i])") - end - end - end -end - -function person_map( n::Int, default::Int )::Vector{Int} - @argcheck n >= 0 - out = fill( default, 3 ) - out[1] = n - out[2] = if n in 0:2 - n - else - 3 - end - out -end - - -""" -Small, easier to use, subset of lfs expenditure codes kinda sorta matching the tax system we're modelling. -""" -function make_lcf_subset( lcf :: DataFrame ) :: DataFrame - out = DataFrame( - case = lcf.case, - datayear = lcf.datayear, - month = lcf.a055, - year= lcf.year, - a121 = lcf.a121, - gorx = lcf.gorx, - a065p = lcf.a065p, - a062 = lcf.a062, - - any_wages = lcf.any_wages, - any_pension_income = lcf.any_pension_income, - any_selfemp = lcf.any_selfemp, - hrp_unemployed = lcf.hrp_unemployed, - num_children = lcf.num_children, - hrp_non_white = lcf.hrp_non_white, - num_people = lcf.num_people, - income = lcf.income, - any_disabled = lcf.any_disabled, - has_female_adult = lcf.has_female_adult ) - - #= top level COICOP - 01 Food and Non-Alcoholic Beverages - 02 Alcoholic Beverages, Tobacco and Narcotics - 03 Clothing and Footwear - 04 Housing, Water, Electricity, Gas and Other Fuels - 05 Furnishings, Household Equipment and Routine Maintenance of the House - 06 Health - 07 Transport - 08 Communication - 09 Recreation - 10 (A) Education - 11 (B) Restaurant and Hotels - 12 (C) Miscellaneous Goods and Services - 20 (K) Non-Consumption Expenditure - =# - - # 01) food - - out.sweets_and_icecream = lcf.c11831t + lcf.c11841t + lcf.c11851t - out.other_food_and_beverages = lcf.p601t - out.sweets_and_icecream - out.hot_and_eat_out_food = ## CHECK is this counting children's sweets twice? - lcf.cb1111t + - lcf.cb1112t + - lcf.cb1113t + - lcf.cb1114t + - lcf.cb1115t + - lcf.cb1116t + - lcf.cb1117c + - lcf.cb1118c + - lcf.cb1119c + - lcf.cb111ac + - lcf.cb111bc + - lcf.cb1121t + - lcf.cb1122t + - lcf.cb1123t + - lcf.cb1124t + - lcf.cb1125t + - lcf.cb1126t + - lcf.cb1127t + - lcf.cb1128t + - lcf.cb112bt + - lcf.cb1213t - - # 02 Alcoholic Beverages, Tobacco and Narcotics - - out.spirits = lcf.cb111ct + lcf.c21111t - out.wine = lcf.cb111dt + lcf.c21211t - out.fortified_wine = lcf.cb111et + lcf.c21212t - out.cider = lcf.cb111ft + lcf.c21213t - out.alcopops = lcf.cb111gt + lcf.c21214t - out.champagne = lcf.cb111ht + lcf.c21221t - out.beer = lcf.cb111it + lcf.cb111jt + lcf.c21311t # fixme rounds of drinks are beer! - - out.cigarettes = lcf.c22111t - out.cigars = lcf.c22121t - out.other_tobacco = lcf.c22131t # ?? Assume Vapes? - - # 03 Clothing and Footwear - - out.childrens_clothing_and_footwear = lcf.c31231t + lcf.c31232t + lcf.c31233t + lcf.c31234t + lcf.c31313t + lcf.c32131t - out.helmets_etc = lcf.c31315t - out.other_clothing_and_footwear = lcf.p603t - out.helmets_etc - out.childrens_clothing_and_footwear - - # 04 Housing, Water, Electricity, Gas and Other Fuels - out.domestic_fuel_electric =(lcf.b175 - lcf.b178) + lcf.b227 + lcf.c45114t - out.domestic_fuel_gas = (lcf.b170 - lcf.b173) + lcf.b226 + lcf.b018 + lcf.c45112t + lcf.c45214t + lcf.c45222t - out.domestic_fuel_coal = lcf.c45411t - out.domestic_fuel_other = lcf.b017 + lcf.c45312t + lcf.c45412t + lcf.c45511t - out.other_housing = lcf.p604t - out.domestic_fuel_electric - out.domestic_fuel_gas - out.domestic_fuel_coal - out.domestic_fuel_other - - # 05 Furnishings, Household Equipment and Routine Maintenance of the House - - out.furnishings_etc = lcf.p605t - - out.medical_services = lcf.c62112t + lcf.c62113t + lcf.c62114t + lcf.c62211t + lcf.c62311t + lcf.c62321t + lcf.c63111t + lcf.c62331t + lcf.c62322t + lcf.c62212t + lcf.c62111t# exempt - out.prescriptions = lcf.c61111t # zero - out.other_medicinces = lcf.c61112t # vatable - out.spectacles_etc = lcf.c61311t + lcf.c61312t # vatable but see: https://www.chapman-opticians.co.uk/vat_on_spectacles - out.other_health = lcf.c61211t + lcf.c61313t # but condoms smoking medicines (?? tampons ) - checkdiffs( "health", out.medical_services + out.prescriptions + out.other_medicinces + out.spectacles_etc + out.other_health, lcf.p606t ) - - # :c61111t,:c61112t,:c61211t,:c61311t,:c61312t,:c61313t,:c62111t,:c62112t,:c62113t,:c62114t,:c62211t,:c62212t,:c62311t,:c62321t,:c62322t,:c62331t,:c63111t - - # lcf[399,[:p606t, :c61111t,:c61112t,:c61211t,:c61311t,:c61312t,:c61313t,:c62111t,:c62112t,:c62113t,:c62114t,:c62211t,:c62212t,:c62311t,:c62321t,:c62322t,:c62331t,:c63111t]] - - # 07 Transport !!1 DURABLES - # ?? how are outright purchases handled? - out.bus_boat_and_train = lcf.b216 + lcf.b217 + lcf.b218 + lcf.b219 + lcf.c73212t + lcf.c73411t + lcf.c73512t + lcf.c73513t + lcf.p546c # zero FIXME I don't see why p546c - children's transport - is needed but we don't add up otherwise. - out.air_travel = lcf.b487 + lcf.b488 - out.petrol = lcf.c72211t - out.diesel = lcf.c72212t - out.other_motor_oils = lcf.c72213t - out.other_transport = lcf.p607t - (out.bus_boat_and_train + out.air_travel + out.petrol + out.diesel + out.other_motor_oils) - - # 08 Communication - out.communication = lcf.p608t # Standard - - # 09 Recreation - out.books = lcf.c95111t - out.newspapers = lcf.c95211t - out.magazines = lcf.c95212t - out.gambling = lcf.c94314t # - winnings? C9431Dt - out.museums_etc = lcf.c94221t * 0.5 # FIXME includes theme parks - out.postage = lcf.c81111t + lcf.cc6212t - out.other_recreation = lcf.p609t - (out.books + out.newspapers + out.magazines + out.gambling + out.museums_etc + out.postage) - # FIXME deaf ebooks .. - # 10 (A) Education - out.education = lcf.p610t # exempt - # 11 (B) Restaurant and Hotels - out.hotels_and_restaurants = lcf.p611t - out.hot_and_eat_out_food - (lcf.cb111ct+lcf.cb111dt+lcf.cb111et+lcf.cb111ft+lcf.cb111gt+lcf.cb111ht+lcf.cb111it + lcf.cb111jt) # h&r less the food,drink,alcohol - # 12 (C) Miscellaneous Goods and Services - - out.insurance = lcf.b110 + - lcf.b168 + - lcf.cc5213t + - lcf.cc5311c + - lcf.cc5411c + - lcf.cc5412t + - lcf.cc5413t + - lcf.cc6211c + - lcf.cc6212t + - lcf.cc6214t + - lcf.cc7111t + - lcf.cc7112t + - lcf.cc7113t + - lcf.cc7115t + - lcf.cc7116t # exempt - - out.other_financial = - lcf.b1802 + - lcf.b188 + - lcf.b229 + - lcf.b238 + - lcf.b273 + - lcf.b280 + - lcf.b281 + - lcf.b282 + - lcf.b283 # exempt - - out.prams_and_baby_chairs = - lcf.cc3222t + - lcf.cc3223t # zero - - out.care_services = lcf.cc4121t + lcf.cc4111t + lcf.cc4112t - - out.trade_union_subs = lcf.cc1317t # fixme rename - - out.nappies = lcf.cc1317t * 0.5 # nappies zero rated, other baby goods standard rated FIXME wild guess - - out.funerals = lcf.cc7114t # exempt https://www.gov.uk/guidance/burial-cremation-and-commemoration-of-the-dead-notice-70132 - - out.womens_sanitary = lcf.cc1312t * 0.5 # FIXME wild guess - - out.other_misc_goods = lcf.p612t - ( - out.womens_sanitary + - out.insurance + - out.other_financial + - out.prams_and_baby_chairs + - out.care_services + - out.nappies + - out.funerals + - out.trade_union_subs) # rest standard rated - - out.non_consumption = lcf.p620tp - - out.total_expenditure = lcf.p630tp - - checkdiffs( "total spending", - out.sweets_and_icecream + - out.other_food_and_beverages + - out.hot_and_eat_out_food + - out.spirits + - out.wine + - out.fortified_wine + - out.cider + - out.alcopops + - out.champagne + - out.beer + - out.cigarettes + - out.cigars + - out.other_tobacco + - out.childrens_clothing_and_footwear + - out.helmets_etc + - out.other_clothing_and_footwear + - out.domestic_fuel_electric + - out.domestic_fuel_gas + - out.domestic_fuel_coal + - out.domestic_fuel_other+ - out.other_housing + - out.furnishings_etc + - out.medical_services + - out.prescriptions + - out.other_medicinces + - out.spectacles_etc + - out.other_health + - out.bus_boat_and_train + - out.air_travel + - out.petrol + - out.diesel + - out.other_motor_oils + - out.other_transport + - out.communication + - out.books + - out.newspapers + - out.magazines + - out.museums_etc + - out.postage + - out.other_recreation + - out.education + - out.hotels_and_restaurants + - out.insurance + - out.other_financial + - out.prams_and_baby_chairs + - out.care_services + - out.nappies + - out.funerals + - out.womens_sanitary + - out.other_misc_goods + - out.trade_union_subs + - out.gambling + - out.non_consumption, - lcf.p630tp ) - - out.repayments = - lcf.b237 + lcf.b238 + lcf.ck5316t + lcf.cc6211c - - return out - - #= 06 - Health - see https://www.gov.uk/guidance/health-professionals-pharmaceutical-products-and-vat-notice-70157 - in summary: - * services EXEPMT, if on the big list - * contraception, smoking zero related - * medicines ZERO if from a listed person - * other stuff from pharmacy VATABLE - * specs VATABLE - * opticians services EXEMPT - * - - Dataset | year | tables | name | pos | var_fmt | measurement_level | label | data_type - ---------+------+-----------------+---------+-----+---------+-------------------+-------------------------------------------------------------------------------------------------+----------- - lcf | 2020 | dvhh | C61111t | 960 | numeric | scale | NHS prescription charges and payments - children, aged between 7 and 15 | 1 - lcf | 2020 | dvhh | C61112t | 961 | numeric | scale | Medicines and medical goods (not NHS) - children, aged between 7 and 15 | 1 - lcf | 2020 | dvhh | C61211t | 962 | numeric | scale | Other medical products (eg plasters, condoms, tubigrip, etc.) - children, aged between 7 and 15 | 1 - lcf | 2020 | dvhh | C61311t | 963 | numeric | scale | Purchase of spectacles, lenses, prescription glasses - children, aged between 7 and 15 | 1 - lcf | 2020 | dvhh | C61312t | 964 | numeric | scale | Accessories repairs to spectacles lenses - children, aged between 7 and 15 | 1 - lcf | 2020 | dvhh | C61313t | 965 | numeric | scale | Non-optical appliances and equipment (eg wheelchairs, etc.) - children, aged between 7 and 15 | 1 - lcf | 2020 | dvhh | C62111t | 966 | numeric | scale | NHS medical services - children, aged between 7 and 15 | 1 - lcf | 2020 | dvhh | C62112t | 967 | numeric | scale | Private medical services - children, aged between 7 and 15 | 1 - lcf | 2020 | dvhh | C62113t | 968 | numeric | scale | NHS optical services - children, aged between 7 and 15 | 1 - lcf | 2020 | dvhh | C62114t | 969 | numeric | scale | Private optical services - children, aged between 7 and 15 | 1 - lcf | 2020 | dvhh | C62211t | 970 | numeric | scale | NHS dental services - children, aged between 7 and 15 | 1 - lcf | 2020 | dvhh | C62212t | 971 | numeric | scale | Private dental services - children, aged between 7 and 15 | 1 - lcf | 2020 | dvhh | C62311t | 972 | numeric | scale | Services of medical analysis laboratorie - children, aged between 7 and 15 | 1 - lcf | 2020 | dvhh | C62321t | 973 | numeric | scale | Services of NHS medical auxiliaries - children, aged between 7 and 15 | 1 - lcf | 2020 | dvhh | C62322t | 974 | numeric | scale | Services of private medical auxiliaries - children, aged between 7 and 15 | 1 - lcf | 2020 | dvhh | C62331t | 975 | numeric | scale | Non-hospital ambulance services etc. - children, aged between 7 and 15 | 1 - lcf | 2020 | dvhh | C63111t | 976 | numeric | scale | Hospital services - children, aged between 7 and 15 | 1 - - dataset | year | tables | name | pos | var_fmt | measurement_level | label | data_type - ---------+------+--------+---------+-----+---------+-------------------+-------------------------------------------------------------------------------------------------+----------- - lcf | 2020 | dvhh | B216 | 163 | numeric | scale | Bus Tube and/or rail season ticket | 1 - lcf | 2020 | dvhh | B217 | 164 | numeric | scale | Season ticket-bus/coach-total net amount | 1 - lcf | 2020 | dvhh | B218 | 165 | numeric | scale | Season ticket-rail/tube-total net amount | 1 - lcf | 2020 | dvhh | B219 | 166 | numeric | scale | Water travel season ticket | 1 - lcf | 2020 | dvhh | B244 | 173 | numeric | scale | Vehicle - cost of new car/van outright | 1 - lcf | 2020 | dvhh | B245 | 175 | numeric | scale | Vehicle - cost of second-hand car/van outright | 1 - lcf | 2020 | dvhh | B247 | 177 | numeric | scale | Vehicle - cost of motorcycle outright | 1 - lcf | 2020 | dvhh | B248 | 178 | numeric | scale | Car leasing on | 1 - lcf | 2020 | dvhh | B249 | 179 | numeric | scale | Car or van - servicing : amount paid | 1 - lcf | 2020 | dvhh | B250 | 180 | numeric | scale | Car or van - other works, repairs: amount paid | 1 - lcf | 2020 | dvhh | B252 | 181 | numeric | scale | Motor cycle - services, repairs: amount paid | 1 - lcf | 2020 | dvhh | B487 | 229 | numeric | scale | Domestic flight expenditure | 1 - lcf | 2020 | dvhh | B488 | 230 | numeric | scale | International flight expenditure | 1 - lcf | 2020 | dvhh | C71111c | 977 | numeric | scale | Outright purchase of new car/van - children, aged between 7 and 15 | 1 - lcf | 2020 | dvhh | C71112t | 1 | numeric | scale | Loan / HP purchase of new car/van - children and adults | 1 - lcf | 2020 | dvhh | C71121c | 978 | numeric | scale | Outright purchase of second-hand car/van - children, aged between 7 and 15 | 1 - lcf | 2020 | dvhh | C71122t | 1 | numeric | scale | Loan / HP purchase of second-hand car/van - children and adults | 1 - lcf | 2020 | dvhh | C71211c | 979 | numeric | scale | Outright purchase of new or second-hand motorcycle - children, aged between 7 and 15 | 1 - lcf | 2020 | dvhh | C71212t | 1 | numeric | scale | Loan / HP purchase of new or second-hand motorcycle - children and adults - children and adults | 1 - lcf | 2020 | dvhh | C71311t | 1 | numeric | scale | Purchase of bicycle - children and adults | 1 - lcf | 2020 | dvhh | C71411t | 1 | numeric | scale | Animal drawn vehicles - children and adults | 1 - lcf | 2020 | dvhh | C72111t | 1 | numeric | scale | Car van accessories and fittings - children and adults | 1 - lcf | 2020 | dvhh | C72112t | 1 | numeric | scale | Car van spare parts - children and adults | 1 - lcf | 2020 | dvhh | C72113t | 1 | numeric | scale | Motor cycle accessories and spare parts - children and adults | 1 - lcf | 2020 | dvhh | C72114t | 1 | numeric | scale | Anti-freeze, battery water, cleaning materials - children and adults | 1 - lcf | 2020 | dvhh | C72115t | 1 | numeric | scale | Bicycle accessories, repairs and other costs - children and adults | 1 - lcf | 2020 | dvhh | C72211t | 1 | numeric | scale | Petrol - children and adults | 1 - lcf | 2020 | dvhh | C72212t | 1 | numeric | scale | Diesel oil - children and adults | 1 - lcf | 2020 | dvhh | C72213t | 1 | numeric | scale | Other motor oils - children and adults | 1 - lcf | 2020 | dvhh | C72311c | 990 | numeric | scale | Car or van repairs and servicing - children, aged between 7 and 15 | 1 - lcf | 2020 | dvhh | C72312c | 991 | numeric | scale | Motor cycle repairs, service - children, aged between 7 and 15 | 1 - lcf | 2020 | dvhh | C72313t | 1 | numeric | scale | Motoring organisation subscription (eg AA and RAC) - children and adults | 1 - lcf | 2020 | dvhh | C72314t | 1 | numeric | scale | Car washing and breakdown services - children and adults | 1 - lcf | 2020 | dvhh | C72411t | 1 | numeric | scale | Parking fees, tolls, and permits (excluding motoring fines) - children and adults | 1 - lcf | 2020 | dvhh | C72412t | 1 | numeric | scale | Garage rent,MOT,etc.    - children and adults | 1 - lcf | 2020 | dvhh | C72413t | 1 | numeric | scale | Driving lessons - children and adults | 1 - lcf | 2020 | dvhh | C72414t | 1 | numeric | scale | Hire of self-drive cars, vans, bicycles - children and adults | 1 - lcf | 2020 | dvhh | C73112t | 1 | numeric | scale | Railway and tube fares other than season tickets - children and adults | 1 - lcf | 2020 | dvhh | C73212t | 1 | numeric | scale | Bus and coach fares other than season tickets - children and adults | 1 - lcf | 2020 | dvhh | C73213t | 1 | numeric | scale | Taxis and hired cars with drivers - children and adults | 1 - lcf | 2020 | dvhh | C73214t | 1 | numeric | scale | Other personal travel - children and adults | 1 - lcf | 2020 | dvhh | C73411t | 1 | numeric | scale | Water travel - children and adults | 1 - lcf | 2020 | dvhh | C73512t | 1 | numeric | scale | Combined fares other than season tickets - children and adults | 1 - lcf | 2020 | dvhh | C73513t | 1 | numeric | scale | School travel - children and adults | 1 - lcf | 2020 | dvhh | C73611t | 1 | numeric | scale | Delivery charges and other transport services - children and adults | 1 - Pos. = 817 Variable = C95111 Variable label = Books - adults -This variable is    numeric, the SPSS measurement level is SCALE - Value label information for C95111 - -Pos. = 818 Variable = C95211 Variable label = Newspapers - adults -This variable is    numeric, the SPSS measurement level is SCALE - Value label information for C95211 - -Pos. = 819 Variable = C95212 Variable label = Magazines and periodicals - adults -This variable is    numeric, the SPSS measurement level is SCALE - Value label information for C95212 - -BUT NOT: -Pos. = 820 Variable = C95311 Variable label = Cards, calendars, posters and other printed matter - adults -This variable is    numeric, the SPSS measurement level is SCALE - Value label information for C95311 - -Pos. = 821 Variable = C95411 Variable label = Stationery, diaries, address books, art materials - adults -This variable is    numeric, the SPSS measurement level is SCALE - Value label information for C95411 - -see: -https://www.gov.uk/guidance/zero-rating-books-and-printed-matter-for-vat-notice-70110 - - lcf | 2020 | dvhh | CB1111t | 586 | numeric | scale | Catered food non-alcoholic drink eaten / drunk on premises - children and adults | 1 - lcf | 2020 | dvhh | CB1112t | 587 | numeric | scale | Confectionery eaten off premises - children and adults | 1 - lcf | 2020 | dvhh | CB1113t | 588 | numeric | scale | Ice cream eaten off premises - children and adults | 1 - lcf | 2020 | dvhh | CB1114t | 589 | numeric | scale | Soft drinks eaten off premises - children and adults | 1 - lcf | 2020 | dvhh | CB1115t | 590 | numeric | scale | Hot food eaten off premises - children and adults | 1 - lcf | 2020 | dvhh | CB1116t | 591 | numeric | scale | Cold food eaten off premises - children and adults | 1 - lcf | 2020 | dvhh | CB1117c | 492 | numeric | scale | Confectionery (child) - children, aged between 7 and 15 | 1 - lcf | 2020 | dvhh | CB1118c | 493 | numeric | scale | Ice cream (child) - children, aged between 7 and 15 | 1 - lcf | 2020 | dvhh | CB1119c | 494 | numeric | scale | Soft drinks (child) - children, aged between 7 and 15 | 1 - lcf | 2020 | dvhh | CB111Ac | 495 | numeric | scale | Hot food (child) | 1 - lcf | 2020 | dvhh | CB111Bc | 496 | numeric | scale | Cold food (child) | 1 - lcf | 2020 | dvhh | CB1121t | 605 | numeric | scale | Food non-alcoholic drinks eaten drunk on premises - children and adults | 1 - lcf | 2020 | dvhh | CB1122t | 606 | numeric | scale | Confectionery - children and adults | 1 - lcf | 2020 | dvhh | CB1123t | 607 | numeric | scale | Ice cream - children and adults | 1 - lcf | 2020 | dvhh | CB1124t | 608 | numeric | scale | Soft drinks - children and adults | 1 - lcf | 2020 | dvhh | CB1125t | 609 | numeric | scale | Hot food - children and adults | 1 - lcf | 2020 | dvhh | CB1126t | 610 | numeric | scale | Cold food - children and adults | 1 - lcf | 2020 | dvhh | CB1127t | 611 | numeric | scale | Hot take away meal eaten at home - children and adults | 1 - lcf | 2020 | dvhh | CB1128t | 612 | numeric | scale | Cold take away meal eaten at home - children and adults | 1 - lcf | 2020 | dvhh | CB112Bt | 613 | numeric | scale | Contract catering (food) | 1 - lcf | 2020 | dvhh | CB1213t | 614 | numeric | scale | Meals bought and eaten at workplace - children and adults | 1 - -Pos. = 576 Variable = C21111t Variable label = Spirits and liqueurs (brought home) - children and adults -This variable is    numeric, the SPSS measurement level is SCALE - Value label information for C21111t - -Pos. = 577 Variable = C21211t Variable label = Wine from grape or other fruit (brought home) - children and adults -This variable is    numeric, the SPSS measurement level is SCALE - Value label information for C21211t - -Pos. = 578 Variable = C21212t Variable label = Fortified wine (brought home) - children and adults -This variable is    numeric, the SPSS measurement level is SCALE - Value label information for C21212t - -Pos. = 579 Variable = C21213t Variable label = Ciders and Perry (brought home) - children and adults -This variable is    numeric, the SPSS measurement level is SCALE - Value label information for C21213t - -Pos. = 580 Variable = C21214t Variable label = Alcopops (brought home) - children and adults -This variable is    numeric, the SPSS measurement level is SCALE - Value label information for C21214t - -Pos. = 581 Variable = C21221t Variable label = Champagne and sparkling wines (brought home) - children and adults -This variable is    numeric, the SPSS measurement level is SCALE - Value label information for C21221t - -Pos. = 582 Variable = C21311t Variable label = Beer and lager (brought home) - children and adults -This variable is    numeric, the SPSS measurement level is SCALE - Value label information for C21311t - -Pos. = 583 Variable = C22111t Variable label = Cigarettes - children and adults -This variable is    numeric, the SPSS measurement level is SCALE - Value label information for C22111t - -Pos. = 584 Variable = C22121t Variable label = Cigars - children and adults -This variable is    numeric, the SPSS measurement level is SCALE - Value label information for C22121t - -Pos. = 585 Variable = C22131t Variable label = Other tobacco - children and adults -This variable is    numeric, the SPSS measurement level is SCALE - -lcf | 2020 | dvhh | CB111Ct | 597 | numeric | scale | Spirits and liqueurs (away from home) | 1 - lcf | 2020 | dvhh | CB111Dt | 598 | numeric | scale | Wine from grape or other fruit (away from home) | 1 - lcf | 2020 | dvhh | CB111Et | 599 | numeric | scale | Fortified wines (away from home) | 1 - lcf | 2020 | dvhh | CB111Ft | 600 | numeric | scale | Ciders and Perry (away from home) | 1 - lcf | 2020 | dvhh | CB111Gt | 601 | numeric | scale | Alcopops (away from home) | 1 - lcf | 2020 | dvhh | CB111Ht | 602 | numeric | scale | Champagne and sparkling wines (away from home) | 1 - lcf | 2020 | dvhh | CB111It | 603 | numeric | scale | Beer and lager (away from home) | 1 - lcf | 2020 | dvhh | CB111Jt | 604 | numeric | scale | Round of drinks (away from home) | 1 - -SWEETS - Pos. = 467 Variable = C11831c Variable label = Chocolate - children, aged between 7 and 15 - This variable is    numeric, the SPSS measurement level is SCALE - Value label information for C11831c - - Pos. = 468 Variable = C11841c Variable label = Confectionery products - children, aged between 7 and 15 - This variable is    numeric, the SPSS measurement level is SCALE - Value label information for C11841c - - Pos. = 469 Variable = C11851c Variable label = Edible ices and ice cream - children, aged between 7 and 15 - This variable is    numeric, the SPSS measurement level is SCALE - Value label information for C11851c - - Pos. = 470 Variable = C11861c Variable label = Other sugar products - children, aged between 7 and 15 - dataset | year | tables | name | pos | var_fmt | measurement_level | label | data_type - ---------+------+--------+---------+-----+---------+-------------------+--------------------------------------------------------------------------------------+----------- - lcf | 2020 | dvhh | C11111t | 509 | numeric | scale | Rice - children and adults | 1 - lcf | 2020 | dvhh | C11121t | 510 | numeric | scale | Bread - children and adults | 1 - lcf | 2020 | dvhh | C11122t | 511 | numeric | scale | Buns, crispbread and biscuits - children and adults | 1 - lcf | 2020 | dvhh | C11131t | 512 | numeric | scale | Pasta products - children and adults | 1 - lcf | 2020 | dvhh | C11141t | 513 | numeric | scale | Cakes and puddings - children and adults | 1 - lcf | 2020 | dvhh | C11142t | 514 | numeric | scale | Pastry (savoury) - children and adults | 1 - lcf | 2020 | dvhh | C11151t | 515 | numeric | scale | Other breads and cereals - children and adults | 1 - lcf | 2020 | dvhh | C11211t | 516 | numeric | scale | Beef (fresh, chilled or frozen) - children and adults | 1 - lcf | 2020 | dvhh | C11221t | 517 | numeric | scale | Pork (fresh, chilled or frozen) - children and adults | 1 - lcf | 2020 | dvhh | C11231t | 518 | numeric | scale | Lamb (fresh, chilled or frozen) - children and adults | 1 - lcf | 2020 | dvhh | C11241t | 519 | numeric | scale | Poultry (fresh, chilled or frozen) - children and adults | 1 - lcf | 2020 | dvhh | C11251t | 520 | numeric | scale | Sausages - children and adults | 1 - lcf | 2020 | dvhh | C11252t | 521 | numeric | scale | Bacon and ham - children and adults | 1 - lcf | 2020 | dvhh | C11253t | 522 | numeric | scale | Offal, pâté etc. - children and adults | 1 - lcf | 2020 | dvhh | C11261t | 523 | numeric | scale | Other preserved or processed meat and meat preparations - children and adults | 1 - lcf | 2020 | dvhh | C11271t | 524 | numeric | scale | Other fresh, chilled or frozen edible meat - children and adults | 1 - lcf | 2020 | dvhh | C11311t | 525 | numeric | scale | Fish (fresh, chilled or frozen) - children and adults | 1 - lcf | 2020 | dvhh | C11321t | 526 | numeric | scale | Seafood (fresh, chilled or frozen) - children and adults | 1 - lcf | 2020 | dvhh | C11331t | 527 | numeric | scale | Dried, smoked or salted fish and seafood - children and adults | 1 - lcf | 2020 | dvhh | C11341t | 528 | numeric | scale | Other preserved or processed fish and seafood and preparations - children and adults | 1 - lcf | 2020 | dvhh | C11411t | 529 | numeric | scale | Whole milk - children and adults | 1 - lcf | 2020 | dvhh | C11421t | 530 | numeric | scale | Low fat milk - children and adults | 1 - lcf | 2020 | dvhh | C11431t | 531 | numeric | scale | Preserved milk - children and adults | 1 - lcf | 2020 | dvhh | C11441t | 532 | numeric | scale | Yoghurt - children and adults | 1 - lcf | 2020 | dvhh | C11451t | 533 | numeric | scale | Cheese and curd - children and adults | 1 - lcf | 2020 | dvhh | C11461t | 534 | numeric | scale | Other milk products - children and adults | 1 - lcf | 2020 | dvhh | C11471t | 535 | numeric | scale | Eggs - children and adults | 1 - lcf | 2020 | dvhh | C11511t | 536 | numeric | scale | Butter - children and adults | 1 - lcf | 2020 | dvhh | C11521t | 537 | numeric | scale | Margarine and other vegetable fats - children and adults | 1 - lcf | 2020 | dvhh | C11522t | 538 | numeric | scale | Peanut butter - children and adults | 1 - lcf | 2020 | dvhh | C11531t | 539 | numeric | scale | Olive oil - children and adults | 1 - lcf | 2020 | dvhh | C11541t | 540 | numeric | scale | Edible oils - children and adults | 1 - lcf | 2020 | dvhh | C11551t | 541 | numeric | scale | Other edible animal fats - children and adults | 1 - lcf | 2020 | dvhh | C11611t | 542 | numeric | scale | Citrus fruits (fresh) - children and adults | 1 - lcf | 2020 | dvhh | C11621t | 543 | numeric | scale | Bananas (fresh) - children and adults | 1 - lcf | 2020 | dvhh | C11631t | 544 | numeric | scale | Apples (fresh) - children and adults | 1 - lcf | 2020 | dvhh | C11641t | 545 | numeric | scale | Pears (fresh) - children and adults | 1 - lcf | 2020 | dvhh | C11651t | 546 | numeric | scale | Stone fruits (fresh) - children and adults | 1 - lcf | 2020 | dvhh | C11661t | 547 | numeric | scale | Berries (fresh) - children and adults | 1 - lcf | 2020 | dvhh | C11671t | 548 | numeric | scale | Other fresh, chilled or frozen fruits - children and adults | 1 - lcf | 2020 | dvhh | C11681t | 549 | numeric | scale | Dried fruit and nuts - children and adults | 1 - lcf | 2020 | dvhh | C11691t | 550 | numeric | scale | Preserved fruit and fruit-based products - children and adults | 1 - lcf | 2020 | dvhh | C11711t | 551 | numeric | scale | Leaf and stem vegetables (fresh or chilled) - children and adults | 1 - lcf | 2020 | dvhh | C11721t | 552 | numeric | scale | Cabbages (fresh or chilled) - children and adults | 1 - lcf | 2020 | dvhh | C11731t | 553 | numeric | scale | Vegetables grown for their fruit (fresh, chilled or frozen) - children and adults | 1 - lcf | 2020 | dvhh | C11741t | 554 | numeric | scale | Root crops, non-starchy bulbs and mushrooms (fresh or frozen) - children and adults | 1 - lcf | 2020 | dvhh | C11751t | 555 | numeric | scale | Dried vegetables - children and adults | 1 - lcf | 2020 | dvhh | C11761t | 556 | numeric | scale | Other preserved or processed vegetables - children and adults | 1 - lcf | 2020 | dvhh | C11771t | 557 | numeric | scale | Potatoes - children and adults | 1 - lcf | 2020 | dvhh | C11781t | 558 | numeric | scale | Other tubers and products of tuber vegetables - children and adults | 1 - lcf | 2020 | dvhh | C11811t | 559 | numeric | scale | Sugar - children and adults | 1 - lcf | 2020 | dvhh | C11821t | 560 | numeric | scale | Jams, marmalades - children and adults | 1 - lcf | 2020 | dvhh | C11831t | 561 | numeric | scale | Chocolate - children and adults | 1 - lcf | 2020 | dvhh | C11841t | 562 | numeric | scale | Confectionery products - children and adults | 1 - lcf | 2020 | dvhh | C11851t | 563 | numeric | scale | Edible ices and ice cream - children and adults | 1 - lcf | 2020 | dvhh | C11861t | 564 | numeric | scale | Other sugar products - children and adults | 1 - lcf | 2020 | dvhh | C11911t | 565 | numeric | scale | Sauces, condiments - children and adults | 1 - lcf | 2020 | dvhh | C11921t | 566 | numeric | scale | Salt, spices and culinary herbs - children and adults | 1 - lcf | 2020 | dvhh | C11931t | 567 | numeric | scale | Baker's yeast, dessert preparations, soups - children and adults | 1 - lcf | 2020 | dvhh | C11941t | 568 | numeric | scale | Other food products - children and adults | 1 - lcf | 2020 | dvhh | C12111t | 569 | numeric | scale | Coffee - children and adults | 1 - lcf | 2020 | dvhh | C12121t | 570 | numeric | scale | Tea - children and adults | 1 - lcf | 2020 | dvhh | C12131t | 571 | numeric | scale | Cocoa and powdered chocolate - children and adults | 1 - lcf | 2020 | dvhh | C12211t | 572 | numeric | scale | Mineral or spring waters - children and adults | 1 - lcf | 2020 | dvhh | C12221t | 573 | numeric | scale | Soft drinks - children and adults | 1 - lcf | 2020 | dvhh | C12231t | 574 | numeric | scale | Fruit juices - children and adults | 1 - lcf | 2020 | dvhh | C12241t | 575 | numeric | scale | Vegetable juices - children and adults | 1 - : - - VATABLE FOOD - from resteraunts and hotels - - Pos. = 610 Variable = CB1126t Variable label = Cold food - children and adults - This variable is    numeric, the SPSS measurement level is SCALE - Value label information for CB1126t - - Pos. = 611 Variable = CB1127t Variable label = Hot take away meal eaten at home - children and adults - This variable is    numeric, the SPSS measurement level is SCALE - Value label information for CB1127t - - Pos. = 612 Variable = CB1128t Variable label = Cold take away meal eaten at home - children and adults - This variable is    numeric, the SPSS measurement level is SCALE - Value label information for CB1128t - - Pos. = 613 Variable = CB112Bt Variable label = Contract catering (food) - This variable is    numeric, the SPSS measurement level is SCALE - Value label information for CB112Bt - - Pos. = 614 Variable = CB1213t Variable label = Meals bought and eaten at workplace - children and adults - This variable is    numeric, the SPSS measurement level is SCALE - Value label information for CB1213t - - Pos. = 615 Variable = CB1311t Variable label = Catered food - eaten on premises - children and adults - This variable is    numeric, the SPSS measurement level is SCALE - Value label information for CB1311t - - Pos. = 389 Variable = CB1111 Variable label = Catered food non-alcoholic drink eaten / drunk on premises - adults - This variable is    numeric, the SPSS measurement level is SCALE - Value label information for CB1111 - - Pos. = 390 Variable = CB1112 Variable label = Confectionery eaten off premises - adults - This variable is    numeric, the SPSS measurement level is SCALE - Value label information for CB1112 - - Pos. = 391 Variable = CB1113 Variable label = Ice cream eaten off premises - adults - This variable is    numeric, the SPSS measurement level is SCALE - Value label information for CB1113 - - Pos. = 392 Variable = CB1114 Variable label = Soft drinks drunk off premises - adults - This variable is    numeric, the SPSS measurement level is SCALE - Value label information for CB1114 - - Pos. = 393 Variable = CB1115 Variable label = Hot food eaten off premises - adults - This variable is    numeric, the SPSS measurement level is SCALE - Value label information for CB1115 - - Pos. = 394 Variable = CB1116 Variable label = Cold food eaten off premises - adults - This variable is    numeric, the SPSS measurement level is SCALE - Value label information for CB1116 - - # 02) alcohol and tobacco subsets - lcf | 2020 | dvhh | CB111Ct | 597 | numeric | scale | Spirits and liqueurs (away from home) | 1 - lcf | 2020 | dvhh | CB111Dt | 598 | numeric | scale | Wine from grape or other fruit (away from home) | 1 - lcf | 2020 | dvhh | CB111Et | 599 | numeric | scale | Fortified wines (away from home) | 1 - lcf | 2020 | dvhh | CB111Ft | 600 | numeric | scale | Ciders and Perry (away from home) | 1 - lcf | 2020 | dvhh | CB111Gt | 601 | numeric | scale | Alcopops (away from home) | 1 - lcf | 2020 | dvhh | CB111Ht | 602 | numeric | scale | Champagne and sparkling wines (away from home) | 1 - lcf | 2020 | dvhh | CB111It | 603 | numeric | scale | Beer and lager (away from home) | 1 - lcf | 2020 | dvhh | CB111Jt | 604 | numeric | scale | Round of drinks (away from home) | 1 - -Children’s Clothes and footwear -C31231 Variable label = Boys' outer garments (5-15) - adults -C31232 Variable label = Girls' outer garments (5-15) - adults -C31233 Variable label = Infants' outer garments (Under 5) - adults -C31234 Variable label = Children's under garments (Under 16) - adults -C31313 Variable label = Children's accessories - adults -C32131 Variable label = Footwear for children (5-15) and infants - adults - -DOMESTIC Fuel - -(B175 - B178) + B222 + (B170 - B173) + B221 + B018 + B017 + C41211t + C43111t + C43112t + C43212c + C44112u + C44211t + C45112t + C45114t + C45212t + C45214t + C45222t + C45312t + C45411t + C45412t + C45511t - -dataset | year | tables | name | pos | var_fmt | measurement_level | label | data_type ----------+------+--------+---------+-----+---------+-------------------+-------------------------------------------------------------------------------------------------+----------- -x lcf | 2020 | dvhh | B017 | 62 | numeric | scale | Oil for central heating - last quarter | 1 -x lcf | 2020 | dvhh | B018 | 63 | numeric | scale | Bottled gas for central heating | 1 -x lcf | 2020 | dvhh | B170 | 116 | numeric | scale | Gas amount paid in last account | 1 -x lcf | 2020 | dvhh | B173 | 133 | numeric | scale | Rebate for separate Gas amount | 1 -x lcf | 2020 | dvhh | B175 | 134 | numeric | scale | Electricity amount paid in last account | 1 -x lcf | 2020 | dvhh | B178 | 135 | numeric | scale | Rebate for separate Electricity amount | 1 -x lcf | 2020 | dvhh | C45112t | 1 | numeric | scale | Second dwelling: electricity account pay - children and adults | 1 -x lcf | 2020 | dvhh | C45114t | 1 | numeric | scale | Electricity slot meter payment - children and adults | 1 -x lcf | 2020 | dvhh | C45214t | 1 | numeric | scale | Gas slot meter payment - children and adults | 1 -x lcf | 2020 | dvhh | C45222t | 1 | numeric | scale | Bottled gas - other - children and adults | 1 -x lcf | 2020 | dvhh | C45312t | 1 | numeric | scale | Paraffin - children and adults | 1 -x lcf | 2020 | dvhh | C45411t | 1 | numeric | scale | Coal and coke - children and adults | 1 -x lcf | 2020 | dvhh | C45412t | 1 | numeric | scale | Wood and peat - children and adults | 1 -lcf | 2020 | dvhh | C45511t | 1 | numeric | scale | Hot water, steam and ice - children and adults | 1 - - # 05 Furnishings, Household Equipment and Routine Maintenance of the House - - :furnishings P605 - - # 06 Health - - :health - - :hospital # Care or medical treatment provided by a qualifying institution like a hospital, hospice or nursing home Exempt VAT Notice 701/31 - :prescriptions # Dispensing of prescriptions by a registered pharmacist 0% VAT Notice 701/57 - :doctors # Health services provided by registered doctors, dentists, opticians, pharmacists and other health professionals Exempt VAT Notice 701/57 - :incontinence # Incontinence products 0% VAT Notice 701/7 - :maternity_pads # Maternity pads 0% VAT Notice 701/18 - :sanitary_products # Sanitary protection products 0% VAT Notice 701/18 - :low_vision_aids # Low vision aids 0% Equipment for blind or partially sighted people - :disability_aids - - -dataset | year | tables | name | pos | var_fmt | measurement_level | label | data_type ----------+------+-----------------+---------+-----+---------+-------------------+-------------------------------------------------------------------------------------------------+----------- -lcf | 2020 | dvhh | C61111c | 960 | numeric | scale | NHS prescription charges and payments - children, aged between 7 and 15 | 1 -lcf | 2020 | dvhh | C61112c | 961 | numeric | scale | Medicines and medical goods (not NHS) - children, aged between 7 and 15 | 1 -lcf | 2020 | dvhh | C61211c | 962 | numeric | scale | Other medical products (eg plasters, condoms, tubigrip, etc.) - children, aged between 7 and 15 | 1 -lcf | 2020 | dvhh | C61311c | 963 | numeric | scale | Purchase of spectacles, lenses, prescription glasses - children, aged between 7 and 15 | 1 -lcf | 2020 | dvhh | C61312c | 964 | numeric | scale | Accessories repairs to spectacles lenses - children, aged between 7 and 15 | 1 -lcf | 2020 | dvhh | C61313c | 965 | numeric | scale | Non-optical appliances and equipment (eg wheelchairs, etc.) - children, aged between 7 and 15 | 1 -lcf | 2020 | dvhh | C62111c | 966 | numeric | scale | NHS medical services - children, aged between 7 and 15 | 1 -lcf | 2020 | dvhh | C62112c | 967 | numeric | scale | Private medical services - children, aged between 7 and 15 | 1 -lcf | 2020 | dvhh | C62113c | 968 | numeric | scale | NHS optical services - children, aged between 7 and 15 | 1 -lcf | 2020 | dvhh | C62114c | 969 | numeric | scale | Private optical services - children, aged between 7 and 15 | 1 -lcf | 2020 | dvhh | C62211c | 970 | numeric | scale | NHS dental services - children, aged between 7 and 15 | 1 -lcf | 2020 | dvhh | C62212c | 971 | numeric | scale | Private dental services - children, aged between 7 and 15 | 1 -lcf | 2020 | dvhh | C62311c | 972 | numeric | scale | Services of medical analysis laboratorie - children, aged between 7 and 15 | 1 -lcf | 2020 | dvhh | C62321c | 973 | numeric | scale | Services of NHS medical auxiliaries - children, aged between 7 and 15 | 1 -lcf | 2020 | dvhh | C62322c | 974 | numeric | scale | Services of private medical auxiliaries - children, aged between 7 and 15 | 1 -lcf | 2020 | dvhh | C62331c | 975 | numeric | scale | Non-hospital ambulance services etc. - children, aged between 7 and 15 | 1 -lcf | 2020 | dvhh | C63111c | 976 | numeric | scale | Hospital services - children, aged between 7 and 15 | 1 - -DOMESTIC Fuel - -(B175 - B178) + B222 + (B170 - B173) + B221 + B018 + B017 + C41211t + C43111t + C43112t + C43212c + C44112u + C44211t + C45112t + C45114t + C45212t + C45214t + C45222t + C45312t + C45411t + C45412t + C45511t - -dataset | year | tables | name | pos | var_fmt | measurement_level | label | data_type ----------+------+--------+---------+-----+---------+-------------------+-------------------------------------------------------------------------------------------------+----------- -lcf | 2020 | dvhh | B017 | 62 | numeric | scale | Oil for central heating - last quarter | 1 -lcf | 2020 | dvhh | B018 | 63 | numeric | scale | Bottled gas for central heating | 1 -lcf | 2020 | dvhh | B170 | 116 | numeric | scale | Gas amount paid in last account | 1 -lcf | 2020 | dvhh | B173 | 133 | numeric | scale | Rebate for separate Gas amount | 1 -lcf | 2020 | dvhh | B175 | 134 | numeric | scale | Electricity amount paid in last account | 1 -lcf | 2020 | dvhh | B178 | 135 | numeric | scale | Rebate for separate Electricity amount | 1 -lcf | 2020 | dvhh | C41211t | 1 | numeric | scale | Second dwelling - rent - children and adults | 1 -lcf | 2020 | dvhh | C43111t | 1 | numeric | scale | Paint, wallpaper, timber - children and adults | 1 -lcf | 2020 | dvhh | C43112t | 1 | numeric | scale | Equipment hire, small materials - children and adults | 1 -lcf | 2020 | dvhh | C43212c | 906 | numeric | scale | Other services for the maintenance and repair of the dwelling - children, aged between 7 and 15 | 1 -lcf | 2020 | dvhh | C44211t | 1 | numeric | scale | Refuse collection, including skip hire - children and adults | 1 -lcf | 2020 | dvhh | C45112t | 1 | numeric | scale | Second dwelling: electricity account pay - children and adults | 1 -lcf | 2020 | dvhh | C45114t | 1 | numeric | scale | Electricity slot meter payment - children and adults | 1 -lcf | 2020 | dvhh | C45214t | 1 | numeric | scale | Gas slot meter payment - children and adults | 1 -lcf | 2020 | dvhh | C45222t | 1 | numeric | scale | Bottled gas - other - children and adults | 1 -lcf | 2020 | dvhh | C45312t | 1 | numeric | scale | Paraffin - children and adults | 1 -lcf | 2020 | dvhh | C45411t | 1 | numeric | scale | Coal and coke - children and adults | 1 -lcf | 2020 | dvhh | C45412t | 1 | numeric | scale | Wood and peat - children and adults | 1 -lcf | 2020 | dvhh | C45511t | 1 | numeric | scale | Hot water, steam and ice - children and adults | 1 - - - # 07 Transport - - :other_transport - :bus_boat_and_train_tickets - :air_travel - :petrol - :diesel - :other_motor_oils - - # 08 Communication - - - - # 09 Recreation - - :other_recreation - :books - :newspapers - :periodicals - - # 10 (A) Education - - :education - - # 11 (B) Restaurant and Hotels - note takeaway food is already covered above - - lcf | 2020 | dvhh | B260 | 183 | numeric | scale | School meals - total amount paid last week | 1 - lcf | 2020 | dvhh | B482 | 224 | numeric | scale | Holiday hotel within United Kingdom | 1 - lcf | 2020 | dvhh | B483 | 225 | numeric | scale | Holiday hotel outside United Kingdom | 1 - lcf | 2020 | dvhh | B484 | 226 | numeric | scale | Holiday self-cathering within United Kingdom | 1 - lcf | 2020 | dvhh | B485 | 227 | numeric | scale | Holiday self-cathering outside United Kingdom | 1 - lcf | 2020 | dvhh | CB1111t | 586 | numeric | scale | Catered food non-alcoholic drink eaten / drunk on premises - children and adults | 1 - lcf | 2020 | dvhh | CB1112t | 587 | numeric | scale | Confectionery eaten off premises - children and adults | 1 - lcf | 2020 | dvhh | CB1113t | 588 | numeric | scale | Ice cream eaten off premises - children and adults | 1 - lcf | 2020 | dvhh | CB1114t | 589 | numeric | scale | Soft drinks eaten off premises - children and adults | 1 - lcf | 2020 | dvhh | CB1115t | 590 | numeric | scale | Hot food eaten off premises - children and adults | 1 - lcf | 2020 | dvhh | CB1116t | 591 | numeric | scale | Cold food eaten off premises - children and adults | 1 - lcf | 2020 | dvhh | CB1117c | 492 | numeric | scale | Confectionery (child) - children, aged between 7 and 15 | 1 - lcf | 2020 | dvhh | CB1118c | 493 | numeric | scale | Ice cream (child) - children, aged between 7 and 15 | 1 - lcf | 2020 | dvhh | CB1119c | 494 | numeric | scale | Soft drinks (child) - children, aged between 7 and 15 | 1 - lcf | 2020 | dvhh | CB111Ac | 495 | numeric | scale | Hot food (child) | 1 - lcf | 2020 | dvhh | CB111Bc | 496 | numeric | scale | Cold food (child) | 1 - lcf | 2020 | dvhh | CB1121t | 605 | numeric | scale | Food non-alcoholic drinks eaten drunk on premises - children and adults | 1 - lcf | 2020 | dvhh | CB1122t | 606 | numeric | scale | Confectionery - children and adults | 1 - lcf | 2020 | dvhh | CB1123t | 607 | numeric | scale | Ice cream - children and adults | 1 - lcf | 2020 | dvhh | CB1124t | 608 | numeric | scale | Soft drinks - children and adults | 1 - lcf | 2020 | dvhh | CB1125t | 609 | numeric | scale | Hot food - children and adults | 1 - lcf | 2020 | dvhh | CB1126t | 610 | numeric | scale | Cold food - children and adults | 1 - lcf | 2020 | dvhh | CB1127t | 611 | numeric | scale | Hot take away meal eaten at home - children and adults | 1 - lcf | 2020 | dvhh | CB1128t | 612 | numeric | scale | Cold take away meal eaten at home - children and adults | 1 - lcf | 2020 | dvhh | CB112Bt | 613 | numeric | scale | Contract catering (food) | 1 - lcf | 2020 | dvhh | CB1213t | 614 | numeric | scale | Meals bought and eaten at workplace - children and adults | 1 - dataset | year | tables | name | pos | var_fmt | measurement_level | label | data_type - ---------+------+--------+---------+-----+---------+-------------------+--------------------------------------------------------------------------------------+----------- - lcf | 2020 | dvhh | B110 | 90 | numeric | scale | Structure insurance - last payment | 1 - lcf | 2020 | dvhh | B168 | 115 | numeric | scale | Content insurance amount of last premium | 1 - lcf | 2020 | dvhh | B1802 | 137 | numeric | scale | Bank and Building societies charges - net amount last 3 months | 1 - lcf | 2020 | dvhh | B188 | 142 | numeric | scale | Vehicle insurance - amount paid last year | 1 - lcf | 2020 | dvhh | B229 | 168 | numeric | scale | Medical insurance - total amount premium | 1 - lcf | 2020 | dvhh | B238 | 172 | numeric | scale | Annual standing charge for credit cards | 1 - lcf | 2020 | dvhh | B273 | 201 | numeric | scale | Furniture removal and or storage | 1 - lcf | 2020 | dvhh | B280 | 202 | numeric | scale | Property transaction - purchase and sale | 1 - lcf | 2020 | dvhh | B281 | 203 | numeric | scale | Property transaction - sale only | 1 - lcf | 2020 | dvhh | B282 | 204 | numeric | scale | Property transaction - purchase only | 1 - lcf | 2020 | dvhh | B283 | 205 | numeric | scale | Property transaction - other payments | 1 - lcf | 2020 | dvhh | CC1111t | 1 | numeric | scale | Hairdressing salons and personal grooming - children and adults | 1 - lcf | 2020 | dvhh | CC1211t | 1 | numeric | scale | Electrical appliances for personal care - children and adults | 1 - lcf | 2020 | dvhh | CC1311t | 1 | numeric | scale | Toilet paper - children and adults | 1 - lcf | 2020 | dvhh | CC1312t | 1 | numeric | scale | Toiletries (disposables - tampons, lip balm, toothpaste, etc.) - children and adults | 1 - lcf | 2020 | dvhh | CC1313t | 1 | numeric | scale | Bar of soap, liquid soap, shower gel, etc. - children and adults | 1 - lcf | 2020 | dvhh | CC1314t | 1 | numeric | scale | Toilet requisites (durables - razors, hairbrushes, etc.) - children and adults | 1 - lcf | 2020 | dvhh | CC1315t | 1 | numeric | scale | Hair products - children and adults | 1 - lcf | 2020 | dvhh | CC1316t | 1 | numeric | scale | Cosmetics and related accessories - children and adults | 1 - lcf | 2020 | dvhh | CC3111t | 1 | numeric | scale | Jewellery, clocks and watches - children and adults | 1 - lcf | 2020 | dvhh | CC3112t | 1 | numeric | scale | Repairs to personal goods - children and adults | 1 - lcf | 2020 | dvhh | CC3211t | 1 | numeric | scale | Leather and travel goods (excluding baby items) - children and adults | 1 - lcf | 2020 | dvhh | CC3221t | 1 | numeric | scale | Other personal effects n.e.c. - children and adults | 1 - lcf | 2020 | dvhh | CC1317t | 1 | numeric | scale | Baby toiletries and accessories (disposable) - children and adults | 1 - lcf | 2020 | dvhh | CC3222t | 1 | numeric | scale | Baby equipment (excluding prams and pushchairs) - children and adults | 1 - lcf | 2020 | dvhh | CC3223t | 1 | numeric | scale | Prams, pram accessories and pushchairs - children and adults | 1 - lcf | 2020 | dvhh | CC3224t | 1 | numeric | scale | Sunglasses (non-prescription) - children and adults | 1 - lcf | 2020 | dvhh | CC4111t | 1 | numeric | scale | Residential homes - children and adults | 1 - lcf | 2020 | dvhh | CC4112t | 1 | numeric | scale | Home help - children and adults | 1 - lcf | 2020 | dvhh | CC4121t | 1 | numeric | scale | Nursery, creche, playschools - children and adults | 1 - lcf | 2020 | dvhh | CC4122t | 1 | numeric | scale | Child care payments - children and adults | 1 - lcf | 2020 | dvhh | CC5213t | 1 | numeric | scale | Insurance for household appliances - children and adults | 1 - lcf | 2020 | dvhh | CC5311c | 1 | numeric | scale | Private medical insurance - children, aged between 7 and 15 | 1 - lcf | 2020 | dvhh | CC5411c | 1 | numeric | scale | Vehicle insurance - children, aged between 7 and 15 | 1 - lcf | 2020 | dvhh | CC5412t | 1 | numeric | scale | Boat insurance (not home) - children and adults | 1 - lcf | 2020 | dvhh | CC5413t | 1 | numeric | scale | Non-package holiday, other travel insurance - children and adults | 1 - lcf | 2020 | dvhh | CC6211c | 1 | numeric | scale | Bank service charges - children, aged between 7 and 15 | 1 - lcf | 2020 | dvhh | CC6212t | 1 | numeric | scale | Bank and Post Office counter charges - children and adults | 1 - lcf | 2020 | dvhh | CC6214t | 1 | numeric | scale | Commission travellers cheques and currency - children and adults | 1 - lcf | 2020 | dvhh | CC7111t | 1 | numeric | scale | Legal fees paid to banks - children and adults | 1 - lcf | 2020 | dvhh | CC7112t | 1 | numeric | scale | Legal fees paid to solicitors - children and adults | 1 - lcf | 2020 | dvhh | CC7113t | 1 | numeric | scale | Other payments for services eg photocopy - children and adults | 1 - lcf | 2020 | dvhh | CC7114t | 1 | numeric | scale | Funeral expenses - children and adults | 1 - lcf | 2020 | dvhh | CC7115t | 1 | numeric | scale | Other professional fees including court fines - children and adults | 1 - lcf | 2020 | dvhh | CC7116t | 1 | numeric | scale | TU and professional organisations - children and adults | 1 - - # 20 (K) Non-Consumption Expenditure - - :non_consumption_expenditure - - dataset | year | tables | name | pos | var_fmt | measurement_level | label | data_type - ---------+------+--------+---------+-----+---------+-------------------+------------------------------------------------------------------------------------------+----------- - lcf | 2020 | dvhh | B030 | 68 | numeric | scale | Domestic rates - last net payment | 1 - lcf | 2020 | dvhh | B038p | 69 | numeric | scale | Council tax - last payment weekly amount | 1 - lcf | 2020 | dvhh | B130 | 92 | numeric | scale | Mortgage interest only - last payment | 1 - lcf | 2020 | dvhh | B150 | 94 | numeric | scale | Mortgage interest / principle - interest paid | 1 - lcf | 2020 | dvhh | B179 | 136 | numeric | scale | Vehicle road tax - amount refunded last | 1 - lcf | 2020 | dvhh | B187 | 141 | numeric | scale | Vehicle road tax - amount paid last year | 1 - lcf | 2020 | dvhh | B1961 | 150 | numeric | scale | Life insurance premium - amount premium | 1 - lcf | 2020 | dvhh | B199 | 152 | numeric | scale | Insurance for household and electrical a | 1 - lcf | 2020 | dvhh | B2011 | 154 | numeric | scale | Mortgage endowment policy amount premium | 1 - lcf | 2020 | dvhh | B205 | 157 | numeric | scale | Friendly socs - deductions from main pay | 1 - lcf | 2020 | dvhh | B206 | 158 | numeric | scale | Other insurance - total amount premium | 1 - lcf | 2020 | dvhh | B2081 | 160 | numeric | scale | Mortgage protection amount premium | 1 - lcf | 2020 | dvhh | B228 | 167 | numeric | scale | Personal pension | 1 - lcf | 2020 | dvhh | B237 | 171 | numeric | scale | Credit card interest payments | 1 - lcf | 2020 | dvhh | B265 | 188 | numeric | scale | Maintenance allowance expenditure | 1 - lcf | 2020 | dvhh | B334h | 1 | numeric | scale | Money sent abroad - household | 1 - lcf | 2020 | dvhh | CC5111c | 1 | numeric | scale | Life, death, non-house endowment - children, aged between 7 and 15 | 1 - lcf | 2020 | dvhh | CC5312c | 1 | numeric | scale | Accident, sickness, redundancy, animal insurance, etc. - children, aged between 7 and 15 | 1 - lcf | 2020 | dvhh | CC5511c | 1 | numeric | scale | Other insurance - children, aged between 7 and 15 | 1 - lcf | 2020 | dvhh | CK1313t | 1 | numeric | scale | Central heating installation (DIY) - children and adults | 1 - lcf | 2020 | dvhh | CK1314t | 1 | numeric | scale | Double Glazing, Kitchen Units, Sheds etc. - children and adults | 1 - lcf | 2020 | dvhh | CK1315t | 1 | numeric | scale | Purchase of materials for Capital Improvements - children and adults | 1 - lcf | 2020 | dvhh | CK1316t | 1 | numeric | scale | Bathroom fittings - children and adults | 1 - lcf | 2020 | dvhh | CK2111t | 1 | numeric | scale | Food stamps, other food related expenditure - children and adults | 1 - lcf | 2020 | dvhh | CK3111t | 1 | numeric | scale | Stamp duty, licences and fines (excluding motoring fines) - children and adults | 1 - lcf | 2020 | dvhh | CK3112t | 1 | numeric | scale | Motoring Fines - children and adults | 1 - lcf | 2020 | dvhh | CK4111t | 1 | numeric | scale | Money spent abroad - children and adults | 1 - lcf | 2020 | dvhh | CK4112t | 1 | numeric | scale | Duty free goods bought in UK - children and adults | 1 - lcf | 2020 | dvhh | CK5111t | 1 | numeric | scale | Savings, investments (excluding AVCs) - children and adults | 1 - lcf | 2020 | dvhh | CK5113t | 1 | numeric | scale | Additional Voluntary Contributions - children and adults | 1 - lcf | 2020 | dvhh | CK5212t | 1 | numeric | scale | Money given to members for specific purposes: pocket money - children and adults | 1 - lcf | 2020 | dvhh | CK5213t | 1 | numeric | scale | Money given to members for specific purposes: school dinner - children and adults | 1 - lcf | 2020 | dvhh | CK5214t | 1 | numeric | scale | Money given to members for specific purposes: school travel - children and adults | 1 - lcf | 2020 | dvhh | CK5215t | 1 | numeric | scale | Money given to children for specific purposes - children and adults | 1 - lcf | 2020 | dvhh | CK5216t | 1 | numeric | scale | Cash gifts to children - children and adults | 1 - lcf | 2020 | dvhh | CK5221t | 1 | numeric | scale | Money given to those outside the household - children and adults | 1 - lcf | 2020 | dvhh | CK5222t | 1 | numeric | scale | Present - not specified - children and adults | 1 - lcf | 2020 | dvhh | CK5223t | 1 | numeric | scale | Charitable donations and subscriptions - children and adults | 1 - lcf | 2020 | dvhh | CK5224c | 1 | numeric | scale | Money sent abroad - children, aged between 7 and 15 | 1 - lcf | 2020 | dvhh | CK5315c | 1 | numeric | scale | Club instalment payment - children, aged between 7 and 15 | 1 - - -lcf | 2020 | dvhh | C31315 | 630 | numeric | scale | Protective head gear (crash helmets) - adults | 1 -lcf | 2020 | dvhh | C31315c | 896 | numeric | scale | Protective head gear (crash helmets) - children, aged between 7 and 15 | 1 -lcf | 2020 | dvhh | C31315t | 1 | numeric | scale | Protective head gear (crash helmets) - children and adults | 1 - - :other_transport - :bus_boat_and_train_tickets - :air_travel - :petrol - :diesel - :other_motor_oils - - Pos. = 721 Variable = C72211 Variable label = Petrol - adults - This variable is    numeric, the SPSS measurement level is SCALE - Value label information for C72211 - - Pos. = 722 Variable = C72212 Variable label = Diesel oil - adults - This variable is    numeric, the SPSS measurement level is SCALE - Value label information for C72212 - - Pos. = 723 Variable = C72213 Variable label = Other motor oils - adults - This variable is    numeric, the SPSS measurement level is SCALE -=# - -end - -function uprate_incomes!( frshh :: DataFrame, lcfhh :: DataFrame ) - for r in eachrow( frshh ) - dd = split(r.intdate, "/") - y = parse(Int, dd[3]) - m = parse(Int, dd[1]) - q = div( m - 1, 3) + 1 - r.income = Uprating.uprate( r.income, y, q, Uprating.upr_nominal_gdp ) - println( "r.yearcode $(r.yearcode); r.mnthcode $(r.mnthcode); y=$y q=$q income=$(r.income) orig = $(r.income)") - end - for r in eachrow( lcfhh ) - # - # This is e.g January REIS and I don't know what REIS means - # - if r.a055 > 20 - r.a055 -= 20 - end - q = ((r.a055-1) ÷ 3) + 1 # 1,2,3=q1 and so on - # lcf year seems to be actual interview year - y = r.year - r.income = Uprating.uprate( r.income, y, q, Uprating.upr_nominal_gdp ) - end -end - -function uprate_was!( was :: DataFrame ) - -end - -const TOPCODE = 2420.03 - -function within(x;min=min,max=max) - return if x < min min elseif x > max max else x end -end - - -function frs_tenuremap( tentyp2 :: Union{Int,Missing}, default=9999 ) :: Vector{Int} - out = fill( default, 3 ) - if ismissing( tentyp2 ) - - elseif tentyp2 == 1 - out[1] = 1 - out[2] = 1 - elseif tentyp2 == 2 - out[1] = 2 - out[2] = 1 - elseif tentyp2 == 3 - out[1] = 3 - out[2] = 1 - elseif tentyp2 == 4 - out[1] = 4 - out[2] = 1 - elseif tentyp2 == 5 - out[1] = 5 - out[2] = 2 - elseif tentyp2 == 6 - out[1] = 6 - out[2] = 2 - elseif tentyp2 in [7,8] - out[1] = 7 - out[2] = 3 - else - @assert false "unmatched tentyp2 $tentyp2"; - end - return out -end - -function model_tenuremap( t :: Tenure_Type, default=9998 ) :: Vector{Int} - return frs_tenuremap( Int( t ), default ) -end - -#= -lcf | 2020 | dvhh | A121 | 0 | Not Recorded | Not_Recorded -lcf | 2020 | dvhh | A121 | 1 | Local authority rented unfurn | Local_authority_rented_unfurn -lcf | 2020 | dvhh | A121 | 2 | Housing association | Housing_association -lcf | 2020 | dvhh | A121 | 3 | Other rented unfurnished | Other_rented_unfurnished -lcf | 2020 | dvhh | A121 | 4 | Rented furnished | Rented_furnished -lcf | 2020 | dvhh | A121 | 5 | Owned with mortgage | Owned_with_mortgage -lcf | 2020 | dvhh | A121 | 6 | Owned by rental purchase | Owned_by_rental_purchase -lcf | 2020 | dvhh | A121 | 7 | Owned outright | Owned_outright -lcf | 2020 | dvhh | A121 | 8 | Rent free | Rent_free -=# -function lcf_tenuremap( a121 :: Union{Int,Missing}, default=9997 ) :: Vector{Int} - out = fill( default, 3 ) - if ismissing( a121 ) - ; - elseif a121 == 1 - out[1] = 1 - out[2] = 1 - elseif a121 == 2 - out[1] = 2 - out[2] = 1 - elseif a121 == 3 - out[1] = 3 - out[2] = 1 - elseif a121 == 4 - out[1] = 3 - out[2] = 1 - elseif a121 in [5,6] - out[1] = 5 - out[2] = 2 - elseif a121 == 7 - out[1] = 6 - out[2] = 2 - elseif a121 == 8 - out[1] = 7 - out[2] = 3 - else - @assert false "unmatched tentyp2 $tentyp2"; - end - return out -end - -#= WAS ten1r7 - Value = 1.0 Label = Own it outright - Value = 2.0 Label = Buying with mortgage - Value = 3.0 Label = Part rent part mortgage - Value = 4.0 Label = Rent it - Value = 5.0 Label = Rent-free - Value = 6.0 Label = Squatting - Value = -9.0 Label = Not asked / applicable - Value = -8.0 Label = Don't know/Refusal - - llord7 - -Value = 1.0 Label = Local authority / council / Scottish Homes - Value = 2.0 Label = Housing association / charitable trust / local housing company - Value = 3.0 Label = Employer (organisation) of household member - Value = 4.0 Label = Another organisation - Value = 5.0 Label = Relative / friend of household member - Value = 6.0 Label = Employer (individual) of household member - Value = 7.0 Label = Another individual private landlord - Value = -9.0 Label = Not asked / applicable - Value = -8.0 Label = Don't know/ Refusal - -FRS - - Missing_Tenure_Type = -1 - Council_Rented = 1 - Housing_Association = 2 - Private_Rented_Unfurnished = 3 - Private_Rented_Furnished = 4 - Mortgaged_Or_Shared = 5 - Owned_outright = 6 - Rent_free = 7 - Squats = 8 - -=# - -""" -Map to FRS i.e - Missing_Tenure_Type = -1 - Council_Rented = 1 - Housing_Association = 2 - Private_Rented_Unfurnished = 3 - Private_Rented_Furnished = 4 - Mortgaged_Or_Shared = 5 - Owned_outright = 6 - Rent_free/Squat = 7 -""" -function was_tenuremap_one( wasf :: DataFrame ) :: Vector{Int} - nrows,ncols = size( wasf ) - out = fill(0,nrows) - row = 0 - for was in eachrow( wasf ) - row += 1 - # ten1r7_i since 2 "-8s" so use imputed version - @assert was.ten1r7_i in 1:6 "was.ten1r7 out of range $(was.ten1r7)" - frsten = if was.ten1r7_i == 1 # o-outright - Owned_outright - elseif was.ten1r7_i in 2:3 - Mortgaged_Or_Shared - elseif was.ten1r7_i == 4 # rented - if was.llordr7 == 1 - Council_Rented - elseif was.llordr7 == 2 - Housing_Association - elseif was.llordr7 in 3:7 - if was.furnr7 in 1:2 # furnished, inc part - Private_Rented_Furnished - elseif was.furnr7 == 3 - Private_Rented_Unfurnished - else - @assert false "was.furnr7 out-of-range $(was.furnr7)" - end - else - @assert false "was.llord7 out of range $(was.llord7)" - end - elseif was.ten1r7_i == 5 - Rent_free - elseif was.ten1r7_i == 6 - Squats - end - out[row] = min( Int( frsten ), 7 ) # compress squat/rentfree - @assert out[row] in 1:7 - end # each row - out -end - -function was_tenuremap( was :: DataFrame ) :: Vector{Int} - out = was_tenuremap_ine( was ) - return lcf_tenuremap( out, 9997 ) -end - -#= -frs | 2020 | househol | GVTREGN | 112000001 | North East | North_East -frs | 2020 | househol | GVTREGN | 112000002 | North West | North_West -frs | 2020 | househol | GVTREGN | 112000003 | Yorks and the Humber | Yorks_and_the_Humber -frs | 2020 | househol | GVTREGN | 112000004 | East Midlands | East_Midlands -frs | 2020 | househol | GVTREGN | 112000005 | West Midlands | West_Midlands -frs | 2020 | househol | GVTREGN | 112000006 | East of England | East_of_England -frs | 2020 | househol | GVTREGN | 112000007 | London | London -frs | 2020 | househol | GVTREGN | 112000008 | South East | South_East -frs | 2020 | househol | GVTREGN | 112000009 | South West | South_West -frs | 2020 | househol | GVTREGN | 299999999 | Scotland | Scotland -frs | 2020 | househol | GVTREGN | 399999999 | Wales | Wales -frs | 2020 | househol | GVTREGN | 499999999 | Northern Ireland | Northern_Ireland - -2nd level is London=1,REngland=2,Scotland=3,Wales=4,NI=5 - -WAS -Value = 1.0 Label = Employee - Value = 2.0 Label = Self-employed - Value = 3.0 Label = Unemployed - Value = 4.0 Label = Student - Value = 5.0 Label = Looking after family home - Value = 6.0 Label = Sick or disabled - Value = 7.0 Label = Retired - Value = 8.0 Label = Other - Value = -9.0 Label = Not asked / applicable - Value = -8.0 Label = Don't know/ Refusal -WAS - -Value = 96.0 Label = Never worked and long-term unemployed - Value = 1.0 Label = Managerial and professional occupations - Value = 2.0 Label = Intermediate occupations - Value = 3.0 Label = Routine and manual occupations - Value = 97.0 Label = Not classified - Value = -8.0 Label = Don't know/ Refusal - Value = -9.0 Label = Not asked / applicable - -=# - -""" -lcf | 2020 | dvhh | Gorx | 1 | North East | North_East -lcf | 2020 | dvhh | Gorx | 2 | North West and Merseyside | North_West_and_Merseyside -lcf | 2020 | dvhh | Gorx | 3 | Yorkshire and the Humber | Yorkshire_and_the_Humber -lcf | 2020 | dvhh | Gorx | 4 | East Midlands | East_Midlands -lcf | 2020 | dvhh | Gorx | 5 | West Midlands | West_Midlands -lcf | 2020 | dvhh | Gorx | 6 | Eastern | Eastern -lcf | 2020 | dvhh | Gorx | 7 | London | London -lcf | 2020 | dvhh | Gorx | 8 | South East | South_East -lcf | 2020 | dvhh | Gorx | 9 | South West | South_West -lcf | 2020 | dvhh | Gorx | 10 | Wales | Wales -lcf | 2020 | dvhh | Gorx | 11 | Scotland | Scotland -lcf | 2020 | dvhh | Gorx | 12 | Northern Ireland | Northern_Ireland - -load 2 levels of region from LCF into a 3 vector - 1= actual/ 2=London/rEngland/Scot/Wales/Ni - -""" -function lcf_regionmap( gorx :: Union{Int,Missing} ) :: Vector{Int} - out = fill( 9998, 3 ) - if ismissing( gorx ) - ; - elseif gorx == 7 # london - out[1] = gorx - out[2] = 1 - elseif gorx in 1:9 - out[1] = gorx - out[2] = 2 - elseif gorx == 10 # wales - out[1] = 10 - out[2] = 4 - elseif gorx == 11 # scotland - out[1] = 11 - out[2] = 3 - elseif gorx == 12 - out[1] = 12 - out[2] = 5 - else - @assert false "unmatched gorx $gorx"; - end - return out -end - -""" -Convoluted household type map. See the note `lcf_frs_composition_mapping.md`. -""" -function composition_map( comp :: Int, mappings; default::Int ) :: Vector{Int} - out = fill( default, 3 ) - n = length(mappings) - for i in 1:n - if comp in mappings[i] - out[1] = i - break - end - end - @assert out[1] in 1:10 "unmatched comp $comp" - out[2] = - if out[1] in [1,2] # single m/f people - 1 - elseif out[1] in [3,4,7,8,9,10] # any with children - 2 - else # no children - 3 - end - return out -end - -function lcf_composition_map( a062 :: Int ) :: Vector{Int} - mappings = (lcf1=[1],lcf2=[2],lcf3=[3,4],lcf4=[5,6],lcf5=[7,8],lcf6=[18,23,26,28],lcf7=[9,10],lcf8=[11,12],lcf9=[13,14,15,16,17],lcf10=[19,24,20,21,22,25,27,29,30]) - return composition_map( a062, mappings, default=9998 ) -end - -function frs_composition_map( hhcomps :: Int ) :: Vector{Int} - mappings=(frs1=[1,3],frs2=[2,4],frs3=[9],frs4=[10],frs5=[5,6,7],frs6=[8],frs7=[12],frs8=[13],frs9=[14],frs10=[11,15,16,17]) - return composition_map( hhcomps, mappings, default=9999 ) -end - -## Move to Intermediate -function model_composition_map( hh :: Household ) :: Vector{Int} - num_male_pens = 0 - num_female_pens = 0 - num_male_npens = 0 - num_female_npens = 0 - num_children = 0 - for (k,p) in hh.people - if p.is_standard_child - num_children += 1 - elseif p.sex == Male - if p.age >= 66 - num_male_pens += 1 - else - num_male_npens += 1 - end - else - if p.age >= 65 - num_female_pens += 1 - else - num_female_npens += 1 - end - end - end - c = -1 - num_adults = num_male_npens + num_male_pens + num_female_npens + num_female_pens - num_pens = num_male_pens + num_female_pens - if num_adults == 1 - if num_children == 0 - c = if num_male_pens == 1 - 1 - elseif num_female_pens == 1 - 2 - elseif num_male_npens == 1 - 3 - elseif num_female_npens == 1 - 4 - end - else - c = if num_children == 1 - 9 - elseif num_children == 2 - 10 - elseif num_children >= 3 - 11 - end - end - elseif num_adults == 2 - if num_children == 0 - c = if num_pens == 0 - 7 - elseif num_pens == 1 - 6 - elseif num_pens == 2 - 5 - end - else - c = if num_children == 1 - 12 - elseif num_children == 2 - 13 - elseif num_children >= 3 - 14 - end - end - elseif num_adults >= 3 - c = if num_children == 0 - 8 - elseif num_children == 1 - 15 - elseif num_children == 2 - 16 - elseif num_children >= 3 - 17 - end - end - @assert c in 1:17 - return frs_composition_map( c ) -end - - -# sort(vcat(frsc...)) - -#= -lcf | 2020 | dvhh | A116 | 0 | Not Recorded | Not_Recorded -lcf | 2020 | dvhh | A116 | 1 | Whole house,bungalow-detached | Whole_house_bungalow_detached -lcf | 2020 | dvhh | A116 | 2 | Whole hse,bungalow-semi-dtchd | Whole_hse_bungalow_semi_dtchd -lcf | 2020 | dvhh | A116 | 3 | Whole house,bungalow-terraced | Whole_house_bungalow_terraced -lcf | 2020 | dvhh | A116 | 4 | Purpose-built flat maisonette | Purpose_built_flat_maisonette -lcf | 2020 | dvhh | A116 | 5 | Part of house converted flat | Part_of_house_converted_flat -lcf | 2020 | dvhh | A116 | 6 | Others | Others -=# -""" -Map accomodation. Unused in the end. -""" -function lcf_accmap( a116 :: Any, default=9998) :: Vector{Int} - @argcheck a116 in 1:6 - out = fill( default, 3 ) - # missing in 2020 f*** - if typeof(a116) <: AbstractString - return out - # a116 = tryparse( Int, a116 ) - end - - out[1] = a116 - if a116 in 1:3 - out[2] = 1 - elseif a116 in 4:5 - out[2] = 2 - elseif a116 == 6 - out[2] = 3 - else - @assert false "unmatched a116 $a116" - end - out -end - -#= -Pos. = 58 Variable = accomr7 Variable label = Type of accommodation -This variable is    numeric, the SPSS measurement level is NOMINAL - Value label information for accomr7 - Value = 1.0 Label = House / bungalow - Value = 2.0 Label = Flat / maisonette - Value = 3.0 Label = Room / rooms - Value = 4.0 Label = Other - Value = -9.0 Label = Not asked / applicable - Value = -8.0 Label = Don't know/ Refusal - -Pos. = 59 Variable = hsetyper7 Variable label = Type of house / bungalow -This variable is    numeric, the SPSS measurement level is SCALE - Value label information for hsetyper7 - Value = 1.0 Label = Detached - Value = 2.0 Label = Semi-detached - Value = 3.0 Label = Terraced (including end of terrace) - Value = -9.0 Label = Not asked / applicable - Value = -8.0 Label = Don't know/ Refusal - -Pos. = 60 Variable = flttypr7 Variable label = Type of flat / maisonette -This variable is    numeric, the SPSS measurement level is SCALE - Value label information for flttypr7 - Value = 1.0 Label = Purpose-built block - Value = 2.0 Label = Converted house / some other kind of building - Value = -9.0 Label = Not asked / applicable - Value = -8.0 Label = Don't know/ Refusal - -Pos. = 61 Variable = accothr7 Variable label = Other types of accommodation -This variable is    numeric, the SPSS measurement level is SCALE - Value label information for accothr7 - Value = 1.0 Label = Caravan, mobile home or houseboat - Value = 2.0 Label = Other - Value = -9.0 Label = Not asked / applicable - Value = -8.0 Label = Don't know/ Refusal - -=# - -function was_accommap_one( wasf :: DataFrame ) :: Vector{Int} - nrows,ncols = size( wasf ) - out = fill(0,nrows) - row = 0 - for was in eachrow( wasf ) - row += 1 - out[row] = if was.accomr7 == 1 # house - if was.hsetyper7 in 1:3 - was.hsetyper7 - else - @assert false "unmapped was.hsetyper7 $(was.hsetyper7)" - end - elseif was.accomr7 == 2 # flat - if was.flttypr7 == 1 - 4 - elseif was.flttypr7 == 2 - 5 - else - @assert false "unmapped was.flttypr7 $(was.flttypr7)" - end - elseif was.accomr7 == 3 # room/rooms ? how could this be true of a household? - 6 - elseif was.accomr7 == 4 - 6 - else - @assert false "unmapped was.accomr7 $(was.accomr7)" - end - @assert out[row] in 1:6 "out is $out" - end - out -end - - -""" -output: -lcf | 2020 | dvhh | A116 | 1 | Whole house,bungalow-detached | Whole_house_bungalow_detached -lcf | 2020 | dvhh | A116 | 2 | Whole hse,bungalow-semi-dtchd | Whole_hse_bungalow_semi_dtchd -lcf | 2020 | dvhh | A116 | 3 | Whole house,bungalow-terraced | Whole_house_bungalow_terraced -lcf | 2020 | dvhh | A116 | 4 | Purpose-built flat maisonette | Purpose_built_flat_maisonette -lcf | 2020 | dvhh | A116 | 5 | Part of house converted flat | Part_of_house_converted_flat -lcf | 2020 | dvhh | A116 | 6 | Others | Others - - - -""" -function was_accommap( was :: DataFrame ) :: Vector{Int} - out = was_accommap_one( was ) - return lcf_accmap.( out, 9997 ) -end - -""" - dwell_na = -1 - detatched = 1 - semi_detached = 2 - terraced = 3 - flat_or_maisonette = 4 - converted_flat = 5 - caravan = 6 - other_dwelling = 7 -""" -function model_accommap( dwelling :: DwellingType ):: Vector{Int} - out = Int( dwelling ) - if out == -1 - println( "-1 dwelling ") - out = rand(1:6) - end - out = min( 6, out ) # caravan=>other - return lcf_accmap( out, 9998 ) -end - -#= -frs | 2020 | househol | TYPEACC | 1 | Whole house/bungalow, detached | Whole_house_or_bungalow_detached -frs | 2020 | househol | TYPEACC | 2 | Whole house/bungalow, semi-detached | Whole_house_or_bungalow_semi_detached -frs | 2020 | househol | TYPEACC | 3 | Whole house/bungalow, terraced | Whole_house_or_bungalow_terraced -frs | 2020 | househol | TYPEACC | 4 | Purpose-built flat or maisonette | Purpose_built_flat_or_maisonette -frs | 2020 | househol | TYPEACC | 5 | Converted house/building | Converted_house_or_building -frs | 2020 | househol | TYPEACC | 6 | Caravan/Mobile home or Houseboat | Caravan_or_Mobile_home_or_Houseboat -frs | 2020 | househol | TYPEACC | 7 | Other | Other -=# - -""" -Map housing type. Not used because the f**ing this is deleted in 19/20 public lcf. -""" -function frs_accmap( typeacc :: Union{Int,Missing}) :: Vector{Int} - out = fill( 9999, 3 ) - out[1] = min(6,typeacc) - if typeacc in 1:3 - out[2] = 1 - elseif typeacc in 4:5 - out[2] = 2 - elseif typeacc in 6:7 - out[2] = 3 - else - @assert false "unmatched typeacc $typeacc" - end - out -end - -""" -Infuriatingly, this can't be used as rooms is deleted in 19/20 lcf -""" -function rooms( rooms :: Union{Missing,Int,AbstractString}, def::Int ) :: Vector{Int} - # !!! Another missing in lcf 2020 for NO FUCKING REASON - out = fill(def,3) - if (typeof(rooms) <: AbstractString) || rooms < 0 - return out - # a116 = tryparse( Int, a116 ) - end - - rooms = min( 6, rooms ) - if (ismissing(rooms) || (rooms == 0 )) - return [0,0, 1] - end - out = fill(0,3) - out[1] = rooms - out[2] = min( rooms, 3) - out[3] = rooms == 1 ? 1 : 2 - return out -end - -#= -frs | 2020 | househol | HHAGEGR4 | 1 | Age 16 to 19 | Age_16_to_19 -frs | 2020 | househol | HHAGEGR4 | 2 | Age 20 to 24 | Age_20_to_24 -frs | 2020 | househol | HHAGEGR4 | 3 | Age 25 to 29 | Age_25_to_29 -frs | 2020 | househol | HHAGEGR4 | 4 | Age 30 to 34 | Age_30_to_34 -frs | 2020 | househol | HHAGEGR4 | 5 | Age 35 to 39 | Age_35_to_39 -frs | 2020 | househol | HHAGEGR4 | 6 | Age 40 to 44 | Age_40_to_44 -frs | 2020 | househol | HHAGEGR4 | 7 | Age 45 to 49 | Age_45_to_49 -frs | 2020 | househol | HHAGEGR4 | 8 | Age 50 to 54 | Age_50_to_54 -frs | 2020 | househol | HHAGEGR4 | 9 | Age 55 to 59 | Age_55_to_59 -frs | 2020 | househol | HHAGEGR4 | 10 | Age 60 to 64 | Age_60_to_64 -frs | 2020 | househol | HHAGEGR4 | 11 | Age 65 to 69 | Age_65_to_69 -frs | 2020 | househol | HHAGEGR4 | 12 | Age 70 to 74 | Age_70_to_74 -frs | 2020 | househol | HHAGEGR4 | 13 | Age 75 or over | Age_75_or_over -=# - -""" -frs age group for hrp - 1st is exact, 2nd u40,40+ -""" -function frs_age_hrp( hhagegr4 :: Int ) :: Vector{Int} - out = fill( 9998, 3 ) - out[1] = hhagegr4 - if hhagegr4 <= 5 - out[2] = 1 - elseif hhagegr4 <= 13 - out[2] = 2 - else - @assert false "mapping hhagegr4 $hhagegr4" - end - out -end - -#= - Value = 3.0 Label = 15 but under 20 yrs - Value = 4.0 Label = 20 but under 25 yrs - Value = 5.0 Label = 25 but under 30 yrs - Value = 6.0 Label = 30 but under 35 yrs - Value = 7.0 Label = 35 but under 40 yrs - Value = 8.0 Label = 40 but under 45 yrs - Value = 9.0 Label = 45 but under 50 yrs - Value = 10.0 Label = 50 but under 55 yrs - Value = 11.0 Label = 55 but under 60 yrs - Value = 12.0 Label = 60 but under 65 yrs - Value = 13.0 Label = 65 but under 70 yrs - Value = 14.0 Label = 70 but under 75 yrs - Value = 15.0 Label = 75 but under 80 yrs - Value = 16.0 Label = 80 and over -=# - -""" -Triple for the age group for the lcf hrp - 1st is groups above to 75, 2nd is 16-39, 40+ 3rd no match. -See coding frame above. -""" -function lcf_age_hrp( a065p :: Int ) :: Vector{Int} - out = fill( 9998, 3 ) - a065p -= 2 - a065p = min( 13, a065p ) # 75+ - out[1] = a065p - if a065p <= 5 - out[2] = 1 - elseif a065p <= 13 - out[2] = 2 - else - @assert false "mapping a065p $a065p" - end - out -end - - -function was_frs_age_map( hhagegr4 :: Int, default=9998 ) :: Vector{Int} - out = fill( default, 3 ) - out[1] = hhagegr4 - if hhagegr4 in 1:3 # u35 - out[2] = 1 - elseif hhagegr4 in 4:5 # 35-64 - out[2] = 2 - else - out[2] = 3 - end - out -end - -function was_model_age_grp( age :: Int ) :: Vector{Int} - out = if age < 16 # can't happen? - 1 - elseif age < 25 - 2 - elseif age < 35 - 3 - elseif age < 45 - 4 - elseif age < 55 - 5 - elseif age < 65 - 6 - elseif age < 75 - 7 - elseif age >= 75 - 8 - end - return was_frs_age_map( out, 9998 ) -end - -#= - -hh gross income - -lcf | 2020 | dvhh | P389p | 1 | numeric | scale | Normal weekly disposable hhld income - top-coded | 1 - -p344p -lcf | 2020 | dvhh | p344p | 1 | numeric | scale | Gross normal weekly household income - top-coded | 1 -lcf | 2020 | dvhh | P352p | 1 | numeric | scale | Gross current income of household - top-coded -frs | 2020 | househol | HHINC | 249 | numeric | scale | HH - Total Household income | 1 - -julia> summarystats( lcfhh.p344p ) -Summary Stats: -Length: 5400 -Missing Count: 0 -Mean: 872.313711 -Minimum: 0.000000 -1st Quartile: 432.048923 -Median: 744.151615 -3rd Quartile: 1172.362500 -Maximum: 2420.030000 ## TOPCODED - - -julia> summarystats( frshh.hhinc ) -Summary Stats: -Length: 16364 -Missing Count: 0 -Mean: 855.592520 -Minimum: -7024.000000 -1st Quartile: 380.000000 -Median: 636.000000 -3rd Quartile: 1070.000000 -Maximum: 30084.000000 - -=# - -""" -Absolute difference in income, scaled by max difference (TOPCODE,since the possible range is zero to the top-coding) -""" -function compare_income( hhinc :: Real, p344p :: Real ) :: Real - # top & bottom code hhinc to match the lcf p344 - # hhinc = max( 0, hhinc ) - # hhinc = min( TOPCODE, hhinc ) - 1-abs( hhinc - p344p )/TOPCODE # topcode is also the range -end - -""" -Produce a comparison between on frs and one lcf row on tenure, region, wages, etc. -""" -function frs_lcf_match_row( frs :: DataFrameRow, lcf :: DataFrameRow ) :: Tuple - t = 0.0 - t += score( lcf_tenuremap( lcf.a121 ), frs_tenuremap( frs.tentyp2 )) - t += score( lcf_regionmap( lcf.gorx ), frs_regionmap( frs.gvtregn, 9997 )) - # !!! both next missing in 2020 LCF FUCKKK - # t += score( lcf_accmap( lcf.a116 ), frs_accmap( frs.typeacc )) - # t += score( rooms( lcf.a111p, 998 ), rooms( frs.bedroom6, 999 )) - t += score( lcf_age_hrp( lcf.a065p ), frs_age_hrp( frs.hhagegr4 )) - t += score( lcf_composition_map( lcf.a062 ), frs_composition_map( frs.hhcomps )) - t += lcf.any_wages == frs.any_wages ? 1 : 0 - t += lcf.any_pension_income == frs.any_pension_income ? 1 : 0 - t += lcf.any_selfemp == frs.any_selfemp ? 1 : 0 - t += lcf.hrp_unemployed == frs.hrp_unemployed ? 1 : 0 - t += lcf.hrp_non_white == frs.hrp_non_white ? 1 : 0 - t += lcf.datayear == frs.datayear ? 0.5 : 0 # - a little on same year FIXME use date range - # t += lcf.any_disabled == frs.any_disabled ? 1 : 0 -- not possible in LCF?? - t += lcf.has_female_adult == frs.has_female_adult ? 1 : 0 - t += score( lcf.num_children, frs.num_children ) - t += score( lcf.num_people, frs.num_people ) - # fixme should we include this at all? - incdiff = compare_income( lcf.income, frs.income ) - t += 10.0*incdiff - return t,incdiff -end - -function do_hh_sums( hh :: Household ) :: Tuple - any_wages = false - any_selfemp = false - any_pension_income = false - has_female_adult = false - income = 0.0 - for (pid,pers) in hh.people - if get(pers.income,wages,0) > 0 - any_wages = true - end - if get(pers.income,self_employment_income,0) > 0 - any_selfemp = true - end - if (get(pers.income,private_pensions,0) > 0) || pers.age >= 66 - any_pension_income = true - end - if (! pers.is_standard_child) && (pers.sex == Female ) - has_female_adult = true - end - # income += sum( pers.income, start=wages, stop=alimony_and_child_support_received ) # FIXME - end - income = hh.original_gross_income - return any_wages, any_selfemp, any_pension_income, has_female_adult, income -end - -function example_lcf_match( hh :: Household, lcf :: DataFrameRow ) :: Tuple - hrp = get_head( hh ) - t = 0.0 - t += score( lcf_tenuremap( lcf.a121 ), model_tenuremap( hh.tenure )) - t += score( lcf_regionmap( lcf.gorx ), model_regionmap( hh.region )) - # !!! both next missing in 2020 LCF FUCKKK - # t += score( lcf_accmap( lcf.a116 ), frs_accmap( frs.typeacc )) - # t += score( rooms( lcf.a111p, 998 ), rooms( frs.bedroom6, 999 )) - t += score( lcf_age_hrp( lcf.a065p ), frs_age_hrp(model_age_grp( hrp.age ))) - t += score( lcf_composition_map( lcf.a062 ), model_composition_map( hh )) - any_wages, any_selfemp, any_pension_income, has_female_adult, income = do_hh_sums( hh ) - t += lcf.any_wages == any_wages ? 1 : 0 - t += lcf.any_pension_income == any_pension_income ? 1 : 0 - t += lcf.any_selfemp == any_selfemp ? 1 : 0 - t += lcf.hrp_unemployed == hrp.employment_status == Unemployed ? 1 : 0 - t += lcf.hrp_non_white == hrp.ethnic_group !== White ? 1 : 0 - # t += lcf.datayear == frs.datayear ? 0.5 : 0 # - a little on same year FIXME use date range - # t += lcf.any_disabled == frs.any_disabled ? 1 : 0 -- not possible in LCF?? - t += Int(lcf.has_female_adult) == Int(has_female_adult) ? 1 : 0 - t += score( lcf.num_children, num_children(hh) ) - t += score( lcf.num_people, num_people(hh) ) - # fixme should we include this at all? - incdiff = compare_income( lcf.income, income ) - t += 10.0*incdiff - return t,incdiff -end - -islessscore( l1::LCFLocation, l2::LCFLocation ) = l1.score < l2.score -islessincdiff( l1::LCFLocation, l2::LCFLocation ) = l1.incdiff < l2.incdiff - -""" -Match one row in the FRS (recip) with all possible lcf matches (donor). Intended to be general -but isn't really any more. FIXME: pass in a saving function so we're not tied to case/datayear. -""" -function match_recip_row( recip, donor :: DataFrame, matcher :: Function, incomesym=:income ) :: Vector{LCFLocation} - drows, dcols = size(donor) - i = 0 - similar = Vector{LCFLocation}( undef, drows ) - for lr in eachrow(donor) - i += 1 - score, incdiff = matcher( recip, lr ) - similar[i] = LCFLocation( lr.case, lr.datayear, score, lr[incomesym], incdiff ) - end - # sort by characteristics - similar = sort( similar; lt=islessscore, rev=true )[1:NUM_SAMPLES] - # .. then the nearest income amongst those - similar = sort( similar; lt=islessincdiff, rev=true )[1:NUM_SAMPLES] - return similar -end - - - -""" -Create a dataframe for storing all the matches. -This has the FRS record and then 20 lcf records, with case,year,income and matching score for each. -""" -function makeoutdf( n :: Int, prefix :: AbstractString ) :: DataFrame - d = DataFrame( - frs_sernum = zeros(Int, n), - frs_datayear = zeros(Int, n), - frs_income = zeros(n)) - for i in 1:NUM_SAMPLES - case_sym = Symbol( "$(prefix)_case_$i") - datayear_sym = Symbol( "$(prefix)_datayear_$i") - score_sym = Symbol( "$(prefix)_score_$i") - income_sym = Symbol( "$(prefix)_income_$i") - d[!,case_sym] .= 0 - d[!,datayear_sym] .= 0 - d[!,score_sym] .= 0.0 - d[!,income_sym] .= 0.0 - end - return d -end - -""" -Map the entire datasets. -""" -function map_all_lcf_frs( recip :: DataFrame, donor :: DataFrame, matcher :: Function )::DataFrame - p = 0 - nrows = size(recip)[1] - df = makeoutdf( nrows, "lcf" ) - for fr in eachrow(recip); - p += 1 - println(p) - df[ hno, :frs_sernum] = fr.sernum - df[ hno, :frs_datayear] = fr.datayear - df[ hno, :frs_income] = fr.income - matches = match_recip_row( fr, donor, matcher ) - for i in 1:NUM_SAMPLES - lcf_case_sym = Symbol( "lcf_case_$i") - lcf_datayear_sym = Symbol( "lcf_datayear_$i") - lcf_score_sym = Symbol( "lcf_score_$i") - lcf_income_sym = Symbol( "lcf_income_$i") - df[ hno, lcf_case_sym] = matches[i].case - df[ hno, lcf_datayear_sym] = matches[i].datayear - df[ hno, lcf_score_sym] = matches[i].score - df[ hno, lcf_income_sym] = matches[i].income - end - if p > 10000000 - break - end - end - return df -end - -function map_example( example :: Household, donor :: DataFrame, matcher::Function )::LCFLocation - matches = map_recip_row( example, donor, matcher ) - return matches[1] -end - -""" -print out our lcf and frs records -""" -function comparefrslcf( frshh::DataFrame, lcfhh:: DataFrame, frs_sernums, frs_datayear::Int, lcf_case::Int, lcf_datayear::Int ) - lcf1 = lcfhh[(lcfhh.case .== lcf_case).&(lcfhh.datayear .== lcf_datayear), - [:any_wages,:any_pension_income,:any_selfemp,:hrp_unemployed, - :hrp_non_white,:has_female_adult,:num_children,:num_people, - :a121,:gorx,:a065p,:a062,:income]] - println(lcf1) - println( "lcf tenure",lcf_tenuremap( lcf1.a121[1] )) - println( "lcf region", lcf_regionmap( lcf1.gorx[1] )) - println( "lcf age_hrp", lcf_age_hrp( lcf1.a065p[1] )) - println( "lcf composition", lcf_composition_map( lcf1.a062[1] )) - for i in frs_sernums - println( "sernum $i") - frs1 = frshh[(frshh.sernum .== i).&(frshh.datayear.==frs_datayear), - [:any_wages,:any_pension_income,:any_selfemp,:hrp_unemployed,:hrp_non_white,:has_female_adult, - :num_children,:num_people,:tentyp2,:gvtregn,:hhagegr4,:hhcomps,:income]] - println(frs1) - println( "frs tenure", frs_tenuremap( frs1.tentyp2[1])) - println( "frs region", frs_regionmap( frs1.gvtregn[1] )) - println( "frs age hrp", lcf_age_hrp( frs1.hhagegr4[1] )) - println( "frs composition", frs_composition_map( frs1.hhcomps[1] )) - println( "income $(frs1.income)") - end -end -#= to run this, so - -lcfhh,lcfpers,lcf_hh_pp = load3lcfs() -frshh,frspers,frs_hh_pp = loadfrs() -uprate_incomes!( frshh, lcfhh ) # all in constant prices -alldf = map_all_lcf_frs(frshh, lcfhh, frs_lcf_match_row ) -CSV.write( "frs_lcf_matches_2020_vx.csv", alldf ) -CSV.write( "data/lcf_edited.csv", lcfhh ) - -# test stuff -lcfhrows = size(lcfhh)[1] -lcfhh.is_selected = fill( false, lcfhrows ) -for i in eachrow(alldf) - lcfhh[(lcfhh.datayear.==i.lcf_datayear_1).&(lcfhh.case.==i.lcf_case_1),:is_selected] .= true -end - -sellcfhh = lcfhh[lcfhh.is_selected,:] - -=# - -""" -Load 2018/9 - 2020/1 LCFs and add some matching fields. -""" -function load3lcfs()::Tuple - lcfhrows,lcfhcols,lcfhh18 = load( "/mnt/data/lcf/1819/tab/2018_dvhh_ukanon.tab", 2018 ) - lcfhrows,lcfhcols,lcfhh19 = load( "/mnt/data/lcf/1920/tab/lcfs_2019_dvhh_ukanon.tab", 2019 ) - lcfhrows,lcfhcols,lcfhh20 = load( "/mnt/data/lcf/2021/tab/lcfs_2020_dvhh_ukanon.tab", 2020 ) - lcfhh = vcat( lcfhh18, lcfhh19, lcfhh20, cols=:union ) - lcfhrows = size(lcfhh)[1] - - lcfprows,lcpfcols,lcfpers18 = load( "/mnt/data/lcf/1819/tab/2018_dvper_ukanon201819.tab", 2018 ) - lcfprows,lcpfcols,lcfpers19 = load( "/mnt/data/lcf/1920/tab/lcfs_2019_dvper_ukanon201920.tab", 2019 ) - lcfprows,lcpfcols,lcfpers20 = load( "/mnt/data/lcf/2021/tab/lcfs_2020_dvper_ukanon202021.tab",2020) - lcfpers = vcat( lcfpers18, lcfpers19, lcfpers20, cols=:union ) - lcf_hh_pp = innerjoin( lcfhh, lcfpers, on=[:case,:datayear], makeunique=true ) - lcfhh.any_wages .= lcfhh.p356p .> 0 - lcfhh.any_pension_income .= lcfhh.p364p .> 0 - lcfhh.any_selfemp .= lcfhh.p320p .!= 0 - lcfhh.hrp_unemployed .= lcfhh.p304 .== 1 - lcfhh.num_children = lcfhh.a040 + lcfhh.a041 + lcfhh.a042 - # LCF case ids of non white HRPs - convoluted; see: - # https://stackoverflow.com/questions/51046247/broadcast-version-of-in-function-or-in-operator - lcf_nonwhitepids = lcf_hh_pp[(lcf_hh_pp.a012p .∈ (["10","2","3","4"],)).&(lcf_hh_pp.a003 .== 1),:case] - lcfhh.hrp_non_white .= 0 - lcfhh[lcfhh.case .∈ (lcf_nonwhitepids,),:hrp_non_white] .= 1 - lcfhh.num_people = lcfhh.a049 - lcfhh.income = lcfhh.p344p - # not possible in lcf??? - lcfhh.any_disabled .= 0 - lcf_femalepids = lcf_hh_pp[(lcf_hh_pp.a004 .== 2),:case] - lcfhh.has_female_adult .= 0 - lcfhh[lcfhh.case .∈ (lcf_femalepids,),:has_female_adult] .= 1 - lcfhh.is_selected = fill( false, lcfhrows ) - lcfhh,lcfpers,lcf_hh_pp -end - -#= -Value = 1.0 Label = 0 to 15 -Value = 2.0 Label = 16 to 24 -Value = 3.0 Label = 25 to 34 -Value = 4.0 Label = 35 to 44 -Value = 5.0 Label = 45 to 54 -Value = 6.0 Label = 55 to 64 -Value = 7.0 Label = 65 to 74 -Value = 8.0 Label = 75 and over -Value = -9.0 Label = Not Routed -Value = -8.0 Label = Don t know -=# - -#= -frs | 2020 | househol | HHAGEGR4 | 1 | Age 16 to 19 | Age_16_to_19 -frs | 2020 | househol | HHAGEGR4 | 2 | Age 20 to 24 | Age_20_to_24 -frs | 2020 | househol | HHAGEGR4 | 3 | Age 25 to 29 | Age_25_to_29 -frs | 2020 | househol | HHAGEGR4 | 4 | Age 30 to 34 | Age_30_to_34 -frs | 2020 | househol | HHAGEGR4 | 5 | Age 35 to 39 | Age_35_to_39 -frs | 2020 | househol | HHAGEGR4 | 6 | Age 40 to 44 | Age_40_to_44 -frs | 2020 | househol | HHAGEGR4 | 7 | Age 45 to 49 | Age_45_to_49 -frs | 2020 | househol | HHAGEGR4 | 8 | Age 50 to 54 | Age_50_to_54 -frs | 2020 | househol | HHAGEGR4 | 9 | Age 55 to 59 | Age_55_to_59 -frs | 2020 | househol | HHAGEGR4 | 10 | Age 60 to 64 | Age_60_to_64 -frs | 2020 | househol | HHAGEGR4 | 11 | Age 65 to 69 | Age_65_to_69 -frs | 2020 | househol | HHAGEGR4 | 12 | Age 70 to 74 | Age_70_to_74 -frs | 2020 | househol | HHAGEGR4 | 13 | Age 75 or over | Age_75_or_over -=# - - -function model_age_grp( age :: Int ) - return if age < 16 - 1 - elseif age < 25 - 2 - elseif age < 35 - 3 - elseif age < 45 - 4 - elseif age < 55 - 5 - elseif age < 65 - 6 - elseif age < 75 - 7 - else - 8 - end -end - - -function xxmodel_age_grp( age :: Int ) - return if age < 20 - 1 - elseif age < 25 - 2 - elseif age < 30 - 3 - elseif age < 35 - 4 - elseif age < 40 - 5 - elseif age < 45 - 6 - elseif age < 50 - 7 - elseif age < 55 - 8 - elseif age < 60 - 9 - elseif age < 65 - 10 - elseif age < 70 - 11 - elseif age < 75 - 12 - elseif age >= 75 - 13 - end -end - - -#= - - Missing_Socio_Economic_Group = -1 - Employers_in_large_organisations = 1 - Higher_managerial_occupations = 2 - Higher_professional_occupations_New_self_employed = 3 - Lower_prof_and_higher_technical_Traditional_employee = 4 - Lower_managerial_occupations = 5 - Higher_supervisory_occupations = 6 - Intermediate_clerical_and_administrative = 7 - Employers_in_small_organisations_non_professional = 8 - Own_account_workers_non_professional = 9 - Lower_supervisory_occupations = 10 - Lower_technical_craft = 11 - Semi_routine_sales = 12 - Routine_sales_and_service = 13 - Never_worked = 14 - Full_time_student = 15 - Not_classified_or_inadequately_stated = 16 - Not_classifiable_for_other_reasons = 17 -end - -1 1.1 => 1 -2 1.2 => 2,3 -3 2.0 => 4 -4 3.0 => 5,6,7 -5 4.0 => 8,9 -6 5.0 => 10 -7 6.0 => 11,12, -8 7.0 => 13, -9 8.0 => 14,15 -10 97,-8,-9 => 16,17,-1 - -1.1 => 1 -1.2 => 2 -2 => 3 -3 => 4 -4 => 5 -5 => 6 -6 => 7 -7 => 8 -8 => 9 -9 => 10 - -nssec8r7 - Value = -9.0 Label = Not asked / applicable - Value = -8.0 Label = Don't know/ Refusal - 1 Value = 1.1 Label = Large employers and higher managerial occupations - 2 Value = 1.2 Label = Higher professional occupations - 3 Value = 2.0 Label = Lower managerial and professional occupations - 4 Value = 3.0 Label = Intermediate occupations - 5 Value = 4.0 Label = Small employers and own account workers - 6 Value = 5.0 Label = Lower supervisory and technical occupations - 7 Value = 6.0 Label = Semi-routine occupations - 8 Value = 7.0 Label = Routine occupations - 9 Value = 8.0 Label = Never worked and long-term unemployed - 10 Value = 97.0 Label = Not classified - -=# - -function map_socio( socio :: Int, default=9998 ) :: Vector{Int} - out = fill( default, 3 ) - out[1] = socio - out[2] = if socio in 1:5 - 1 - elseif socio !== 10 - 2 - else - 3 - end - out[3] = socio == 10 ? 2 : 1 - out -end - -""" - Value = -9.0 Label = Not asked / applicable - Value = -8.0 Label = Don't know/ Refusal - Value = 1.1 Label = Large employers and higher managerial occupations - Value = 1.2 Label = Higher professional occupations - Value = 2.0 Label = Lower managerial and professional occupations - Value = 3.0 Label = Intermediate occupations - Value = 4.0 Label = Small employers and own account workers - Value = 5.0 Label = Lower supervisory and technical occupations - Value = 6.0 Label = Semi-routine occupations - Value = 7.0 Label = Routine occupations - Value = 8.0 Label = Never worked and long-term unemployed - Value = 97.0 Label = Not classified -""" -function was_map_socio_one( socio :: Real ) :: Int - d = Dict([ - 1.1 => 1, # Employers_in_large_organisations is way out WAS vs FRS so amalgamate - 1.2 => 1, - 2 => 2, - 3 => 3, - 4 => 4, - 5 => 5, - 6 => 6, - 7 => 7, - 8 => 8, - 9 => 9, - 97=> 9, - -8=> 9, - -9=> 9]) - return d[socio] -end - -function was_map_socio( socio :: Real ) :: Vector{Int} - out = was_map_socio_one( socio ) - return map_socio( out, 9997 ) -end - -function model_map_socio( soc ) :: Vector{Int} - socio = Int( soc ) - out = if socio in [1,2,3] # Employers_in_large_organisations = 0.099% FRS 5.7% WAS so amalgamate - 1 - elseif socio in [4] - 2 - elseif socio in [5,6,7] - 3 - elseif socio in [8,9] - 4 - elseif socio in [10] - 5 - elseif socio in [11,12] - 6 - elseif socio in [13] - 7 - elseif socio in [14,15] - 8 - elseif socio in [16,17,-1] - 9 - else - @assert false "socio out of range $socio" - end - return map_socio( out) -end - -#= -1 1.1 => 1 -2 1.2 => 2,3 -3 2.0 => 4 -4 3.0 => 5,6,7 -5 4.0 => 8,9 -6 5.0 => 10 -7 6.0 => 11,12, -8 7.0 => 13, -9 8.0 => 14,15 -10 97,-8,-9 => 16,17,-1 -=# - -""" -Value = 1.0 Label = North East - Value = 2.0 Label = North West - Value = 4.0 Label = Yorkshire and The Humber - Value = 5.0 Label = East Midlands - Value = 6.0 Label = West Midlands - Value = 7.0 Label = East of England - Value = 8.0 Label = London - Value = 9.0 Label = South East - Value = 10.0 Label = South West - Value = 11.0 Label = Wales - Value = 12.0 Label = Scotland -""" -function was_regionmap_one( wasreg :: Int ) :: Standard_Region - d = Dict( [ - 1 => North_East, # = 112000001 - 2 => North_West, # = 112000002 - 4 => Yorks_and_the_Humber, # = 112000003 - 5 => East_Midlands, # = 112000004 - 6 => West_Midlands, # = 112000005 - 7 => East_of_England, # = 112000006 - 8 => London, # = 112000007 - 9 => South_East, # = 112000008 - 10 => South_West, # = 112000009 - 12 => Scotland, # = 299999999 - 11 => Wales ] ) # = 399999999 - return d[ wasreg ] -end - -""" -Just for fuckery WAS and LCF these numbers subtly different - was ommits 4 -""" -function was_regionmap( wasreg :: Int ) :: Vector{Int} - out = was_regionmap_one(wasreg) - return frs_regionmap( out, 9997 ) -end -#= -Value = 96.0 Label = Never worked and long-term unemployed - Value = 1.0 Label = Managerial and professional occupations - Value = 2.0 Label = Intermediate occupations - Value = 3.0 Label = Routine and manual occupations - Value = 97.0 Label = Not classified - Value = -8.0 Label = Don't know/ Refusal - Value = -9.0 Label = Not asked / applicable -=# - -#= -wasp = CSV.File( "/mnt/data/was/UKDA-7215-tab/tab/was_round_7_person_eul_june_2022.tab") |> DataFrame -wash = CSV.File( "/mnt/data/was/UKDA-7215-tab/tab/was_round_7_hhold_eul_march_2022.tab") |> DataFrame -rename!(wasp,lowercase.(names(wasp))) -rename!(wash,lowercase.(names(wash))) -washj = innerjoin( wasp, wash; on=:caser7,makeunique=true) -washj[washj.p_flag4r7 .∈ (1,3),:] # hrp only -washj[(washj.p_flag4r7 .== "1") .| (washj.p_flag4r7 .== "3"),:] - -mpers = CSV.File( "data/model_people_scotland-2015-2021.tab")|>DataFrame - - Missing_Marital_Status = -1 - Married_or_Civil_Partnership = 1 - Cohabiting = 2 - Single = 3 - Widowed = 4 - Separated = 5 - Divorced_or_Civil_Partnership_dissolved = 6 - -hrpdvmrdfr7 Variable label = Marital status of HRP or partner -This variable is    numeric, the SPSS measurement level is NOMINAL - Value label information for hrpdvmrdfr7 - - - - hrpempstat2r7 Variable label = Employment status of HRP or partner - This variable is    numeric, the SPSS measureme nt level is SCALE - Value label information for hrpempstat2r7 - Value = 1.0 Label = Employee - Value = 2.0 Label = Self-employed - Value = 3.0 Label = Unemployed - Value = 4.0 Label = Student - Value = 5.0 Label = Looking after family home - Value = 6.0 Label = Sick or disabled - Value = 7.0 Label = Retired - Value = 8.0 Label = Other - Value = -9.0 Label = Not asked / applicable - Value = -8.0 Label = Don't know/ Refusal - - - Missing_Marital_Status = -1 - Married_or_Civil_Partnership = 1 - Cohabiting = 2 - Single = 3 - Widowed = 4 - Separated = 5 - Divorced_or_Civil_Partnership_dissolved = 6 -=# - -function map_marital( ms :: Int, default=9998 ) :: Vector{Int} - out = fill( default, 3 ) - out[1] = ms - out[2] = ms in [1,2] ? 1 : 2 - return out -end - -""" -Value = -9.0 Label = Not asked / applicable - Value = -8.0 Label = Don't know/ Refusal - Value = 1.0 Label = Married - Value = 2.0 Label = Cohabiting - Value = 3.0 Label = Single - Value = 4.0 Label = Widowed - Value = 5.0 Label = Divorced - Value = 6.0 Label = Separated - Value = 7.0 Label = Same-sex couple - Value = 8.0 Label = Civil Partner - Value = 9.0 Label = Former / separated Civil Partner -""" -function was_map_marital_one( mar :: Int ) :: Marital_Status - out :: Marital_Status = if mar in [1,7,8] - Married_or_Civil_Partnership - elseif mar in 2 - Cohabiting - elseif mar in 3 - Single - elseif mar in 4 - Widowed - elseif mar in [6,9] - Separated - elseif mar in [5] - Divorced_or_Civil_Partnership_dissolved - elseif mar in [-9,-8] - Missing_Marital_Status - else - @assert false "unmapped mar $mar" - end - return out -end - - -""" -Missing_Marital_Status = -1 - Married_or_Civil_Partnership = 1 - Cohabiting = 2 - Single = 3 - Widowed = 4 - Separated = 5 - Divorced_or_Civil_Partnership_dissolved = 6 -""" -function model_map_marital( mar :: Marital_Status ):: Vector{Int} - im = Int( mar ) - @assert im in 1:6 "im missing $mar = $im" - return map_marital(im) -end - - -function map_empstat( ie :: Int, default=9998 ):: Vector{Int} - out = fill( default, 3 ) - out[1] = ie - out[2] = ie in 1:2 ? 1 : 2 - return out -end - -function was_map_empstat( ie :: Int ) :: Vector{Int} - return map_empstat( ie, 9997 ) -end - -""" - Missing_ILO_Employment = -1 - Full_time_Employee = 1 - Part_time_Employee = 2 - Full_time_Self_Employed = 3 - Part_time_Self_Employed = 4 - Unemployed = 5 - Retired = 6 - Student = 7 - Looking_after_family_or_home = 8 - Permanently_sick_or_disabled = 9 - Temporarily_sick_or_injured = 10 - Other_Inactive = 11 -""" -function model_map_empstat( ie :: ILO_Employment ) :: Vector{Int} # - out = if ie in [Full_time_Employee,Part_time_Employee] - 1 - elseif ie in [Full_time_Self_Employed,Part_time_Self_Employed ] - 2 - elseif ie == Unemployed - 3 - elseif ie == Retired - 7 - elseif ie == Student - 4 - elseif ie == Looking_after_family_or_home - 5 - elseif ie in [Permanently_sick_or_disabled,Temporarily_sick_or_injured] - 6 - elseif ie in [Other_Inactive,Missing_ILO_Employment] - 8 - else - @assert false "unmapped empstat $empstat = $ie" - end - return map_empstat( Int(out) ) -end - -""" -Create a WAS subset with marrstat, tenure, etc. mapped to same categories as FRS -""" -function create_was_subset(; outfilename="was_wave_7_subset.tab" ) - wasp = CSV.File( "/mnt/data/was/UKDA-7215-tab/tab/was_round_7_person_eul_june_2022.tab"; missingstring=["", " "]) |> DataFrame - wash = CSV.File( "/mnt/data/was/UKDA-7215-tab/tab/was_round_7_hhold_eul_march_2022.tab"; missingstring=["", " "]) |> DataFrame - rename!(wasp,lowercase.(names(wasp))) - rename!(wash,lowercase.(names(wash))) - wasj = innerjoin( wasp, wash; on=:caser7,makeunique=true) - wasj.p_flag4r7 = coalesce.(wasj.p_flag4r7, -1) - was = wasj[((wasj.p_flag4r7 .== 1) .| (wasj.p_flag4r7 .== 3)),:] - # @assert size( was )[1] == size( wash )[1] " sizes don't match $(size( was )) $(size( wash ))" # selected 1 per hh, missed no hhs - # this breaks! (17532, 5534) (17534, 852) - 2 missing, but that's OK?? - wpy=365.25/7 - - subwas = DataFrame() - subwas.case = was.caser7 - subwas.year = was.yearr7 - subwas.datayear .= 7 # wave 7 - subwas.month = was.monthr7 - subwas.q = div.(subwas.month .- 1, 3 ) .+ 1 - subwas.bedrooms = was.hbedrmr7 - subwas.region = Int.(was_regionmap_one.(was.gorr7)) - subwas.age_head = was.hrpdvage8r7 - subwas.weekly_gross_income = was.dvtotgirr7./wpy - subwas.tenure = was_tenuremap_one( was ) - subwas.accom = was_accommap_one( was ) - - subwas.household_type = was.hholdtyper7 - subwas.occupation = was.hrpnssec3r7 - subwas.total_wealth = was.totwlthr7 - subwas.num_children = was.numchildr7 - subwas.num_adults = was.dvhsizer7 - subwas.num_children - subwas.sex_head = was.hrpsexr7 - subwas.empstat_head = was.hrpempstat2r7 - subwas.socio_economic_head = was_map_socio_one.( was.nssec8r7 ) # hrpnssec3r7 - subwas.marital_status_head = Int.(was_map_marital_one.(was.hrpdvmrdfr7)) - - subwas.any_wages = was.dvgiempr7_aggr .> 0 - subwas.any_selfemp = was.dvgiser7_aggr .> 0 - subwas.any_pension_income = was.dvpinpvalr7_aggr .> 0 - subwas.has_degree = was.hrpedlevelr7 .== 1 - - subwas.net_housing = was.hpropwr7 - subwas.net_physical = was.hphyswr7 - subwas.total_pensions = was.totpenr7_aggr - subwas.net_financial = was.hfinwntr7_sum - subwas.total_value_of_other_property = was.othpropvalr7_sum - subwas.total_financial_liabilities = was.hfinlr7_excslc_aggr # Hhold value of financial liabilities - subwas.total_household_wealth = was.totwlthr7 - for row in eachrow( subwas ) - row.weekly_gross_income = Uprating.uprate( - row.weekly_gross_income, - row.year, - row.q, - Uprating.upr_nominal_gdp ) - end - CSV.write( "data/$(outfilename)", subwas; delim='\t') - return subwas -end -# HFINWNTR7_exSLC_Sum - -""" - -North_East = 1 -North_West = 2 -Yorks_and_the_Humber = 3 -East_Midlands = 4 -West_Midlands = 5 -East_of_England = 6 -London = 7 -South_East = 8 -South_West = 9 -Wales = 10 -Scotland = 11 -Northern_Ireland = 12 - -Heavily weight Scotland, then n england, then midland/wales, 0 London/SE -NOTE 2.0 1.0 0.5 0.1 -""" -function region_score_scotland( - a3 :: Vector{Int}, b3 :: Vector{Int}, weights = [2.0,1.0,0.5,0.1,0])::Float64 - @argcheck a3[1] == 11 - return if a3[1] == b3[1] # scotland - weights[1] - else - if b3[1] in [1,2,3 ] # neast, nwest, yorks - weights[2] - elseif b3[1] in [ 4, 5, 10, 12] # e/w midlands, wales - weights[3] - elseif b3[1] in [8] #South_West - weights[4] - else # london, seast - weights[5] - end - end -end - -""" -We're JUST going to use the model dataset here -""" -function model_was_match( - hh :: Household, - was :: DataFrameRow ) :: Tuple - t = 0.0 - incdiff = 0.0 - hrp = get_head( hh ) - t += score( was_model_age_grp( hrp.age ), was_frs_age_map(was.age_head, 9997 )) # ok - t += region_score_scotland( - model_regionmap( hh.region ), - frs_regionmap( was.region, 9997 ), - [1.5,0.8,0.3,0.2,0.1]) - t += score( model_accommap( hh.dwelling ), lcf_accmap( was.accom, 9997 )) - t += score( model_tenuremap( hh.tenure ), frs_tenuremap( was.tenure, 9997 )) - t += score( model_map_socio( hrp.socio_economic_grouping ), - map_socio( was.socio_economic_head, 9997 )) - t += score( model_map_empstat( hrp.employment_status ), map_empstat( was.empstat_head, 9997 )) - t += Int(hrp.sex) == was.sex_head ? 1 : 0 - t += score( model_map_marital(hrp.marital_status ), map_marital( was.marital_status_head, 9997 )) - # t += score( hh.data_year, was.year ) - any_wages, any_selfemp, any_pension_income, has_female_adult, income = do_hh_sums( hh ) - - # hh_composition - t += any_wages == was.any_wages ? 1 : 0 - t += any_selfemp == was.any_selfemp ? 1 : 0 - t += any_pension_income == was.any_pension_income ? 1 : 0 - - t += highqual_degree_equiv(hrp.highest_qualification) == was.has_degree ? 1 : 0 - - t += score( person_map(num_children( hh ),9999), person_map(was.num_children,9997 )) - t += score( person_map(num_adults( hh ),9999), person_map(was.num_adults,9997)) - incdiff = compare_income( income, was.weekly_gross_income ) - return t, incdiff -end - -""" -Match one row in the FRS (recip) with all possible lcf matches (donor). Intended to be general -but isn't really any more. FIXME: pass in a saving function so we're not tied to case/datayear. -""" -#= -function match_recip_row( recip, donor :: DataFrame, matcher :: Function ) :: Vector{LCFLocation} - drows, dcols = size(donor) - i = 0 - similar = Vector{LCFLocation}( undef, drows ) - for lr in eachrow(donor) - i += 1 - score, incdiff = matcher( recip, lr ) - similar[i] = LCFLocation( lr.case, lr.datayear, score, lr.income, incdiff ) - end - # sort by characteristics - similar = sort( similar; lt=islessscore, rev=true )[1:NUM_SAMPLES] - # .. then the nearest income amongst those - similar = sort( similar; lt=islessincdiff, rev=true )[1:NUM_SAMPLES] - return similar -end - -=# - - -""" -""" - -const WAS_TARGET_VARS = Dict( - ["age"=>3, - "region"=>3, - "accom"=>3, - "tenure"=>3, - "socio"=>3, - "empstat"=>3, - "marital"=>3, - "year"=>1, - "wages"=>1, - "selfemp"=>1, - "pensions"=>1, - "degree"=>1, - "children"=>3, - "adults"=>3, - "sex"=>1, - "year"=>1]) - -function pct(v) - round.( 100.0 .* v ./ sum(v), sigdigits=2 ) -end - -function compareone( frs :: DataFrame, was :: DataFrame, name :: String, n :: Int ) :: Array - out=[] - for i in 1:n - df = DataFrame( key=zeros(Int,200), frs=zeros(200), was=zeros(200), diff=zeros(200)) - key = Symbol( "$(name)_$(i)") - wd = sort( countmap( was[!,key])) - wf = sort( countmap( frs[!,key])) - if keys(wd) != keys(wf) - println( "key mismatch! $key wd = $(wd) wf = $(wf)") - end - wk = Int.(keys( wd )) - wv = pct(values( wd )) - fk = Int.(keys( wf )) - fv = pct(values( wf )) - mx = max( maximum(wk), maximum(fk)) - mn = min( minimum(wk), minimum(fk)) - incr = 1 - mn - len = mx - mn + 1 - j = 0 - for i in wk - j += 1 - df[i+incr,:key] = i - df[i+incr,:was] = wv[j] - end - j = 0 - for i in fk - j += 1 - df[i+incr,:key] = i - df[i+incr,:frs] = fv[j] - end - df[:,:diff] = df[:,:was] - df[:,:frs] - println( "$key") - push!( out, pretty_table( String, df[1:len,:];backend = Val(:markdown))) - end - out -end - -function create_was_frs_matching_dataset( settings :: Settings ) :: Tuple - - function addtodf( df::DataFrame, label, n, row::Int, data::Vector) - @assert size(data)[1] == n "data=$(size(data)[1]) n = $n" - for i in 1:n - k = Symbol( "$(label)_$(i)") - df[row,k] = data[i] - end - end - - settings.num_households, settings.num_people, nhh2 = - FRSHouseholdGetter.initialise( settings; reset=false ) - was_dataset = CSV.File(joinpath(data_dir( settings ),settings.wealth_dataset))|>DataFrame - nwas = size( was_dataset )[1] - wasset = DataFrame() - frsset = DataFrame() - for v in WAS_TARGET_VARS - k = v[1] - n = v[2] - for i in 1:n - key = Symbol( "$(k)_$(i)") - wasset[!,key] = zeros( Int, nwas ) - frsset[!,key] = zeros( Int, settings.num_households ) - end - end - println( names(wasset)) - hno = 0 - for was in eachrow( was_dataset ) - hno += 1 - addtodf( - wasset, - "age", - WAS_TARGET_VARS["age"], - hno, - was_frs_age_map(was.age_head, 9997 )) - addtodf( - wasset, - "region", - WAS_TARGET_VARS["region"], - hno, - frs_regionmap( was.region, 9997 )) - addtodf( - wasset, - "accom", - WAS_TARGET_VARS["accom"], - hno, - lcf_accmap( was.accom, 9997 )) - addtodf( - wasset, - "tenure", - WAS_TARGET_VARS["tenure"], - hno, - frs_tenuremap( was.tenure, 9997 )) - addtodf( - wasset, - "socio", - WAS_TARGET_VARS["socio"], - hno, - map_socio( was.socio_economic_head, 9997 )) - addtodf( - wasset, - "empstat", - WAS_TARGET_VARS["empstat"], - hno, - map_empstat( was.empstat_head, 9997 )) - addtodf( - wasset, - "sex", - WAS_TARGET_VARS["sex"], - hno, - [was.sex_head] ) - addtodf( - wasset, - "marital", - WAS_TARGET_VARS["marital"], - hno, - map_marital( was.marital_status_head, 9997 ) ) - addtodf( - wasset, - "year", - WAS_TARGET_VARS["year"], - hno, - [was.year] ) - addtodf( - wasset, - "wages", - WAS_TARGET_VARS["wages"], - hno, - [was.any_wages] ) - addtodf( - wasset, - "selfemp", - WAS_TARGET_VARS["selfemp"], - hno, - [was.any_selfemp] ) - addtodf( - wasset, - "pensions", - WAS_TARGET_VARS["pensions"], - hno, - [was.any_pension_income] ) - addtodf( - wasset, - "degree", - WAS_TARGET_VARS["degree"], - hno, - [was.has_degree] ) - addtodf( - wasset, - "children", - WAS_TARGET_VARS["degree"], - hno, - [was.num_children] ) - addtodf( - wasset, - "children", - WAS_TARGET_VARS["children"], - hno, - person_map(was.num_children, 9997)) - addtodf( - wasset, - "adults", - WAS_TARGET_VARS["adults"], - hno, - person_map(was.num_adults, 9997)) - end - for hno in 1:settings.num_households - hh = FRSHouseholdGetter.get_household(hno) - any_wages, any_selfemp, any_pension_income, has_female_adult, income = do_hh_sums( hh ) - hrp = get_head( hh ) - addtodf( - frsset, - "age", - WAS_TARGET_VARS["age"], - hno, - was_model_age_grp( hrp.age )) - addtodf( - frsset, - "region", - WAS_TARGET_VARS["region"], - hno, - model_regionmap( hh.region )) - addtodf( - frsset, - "accom", - WAS_TARGET_VARS["accom"], - hno, - model_accommap( hh.dwelling )) - addtodf( - frsset, - "tenure", - WAS_TARGET_VARS["tenure"], - hno, - model_tenuremap( hh.tenure )) - addtodf( - frsset, - "socio", - WAS_TARGET_VARS["socio"], - hno, - model_map_socio( hrp.socio_economic_grouping )) - addtodf( - frsset, - "empstat", - WAS_TARGET_VARS["empstat"], - hno, - model_map_empstat( hrp.employment_status )) - addtodf( - frsset, - "sex", - WAS_TARGET_VARS["sex"], - hno, - [Int(hrp.sex)] ) - addtodf( - frsset, - "marital", - WAS_TARGET_VARS["marital"], - hno, - model_map_marital(hrp.marital_status ) ) - addtodf( - frsset, - "year", - WAS_TARGET_VARS["year"], - hno, - [hh.interview_year] ) - addtodf( - frsset, - "wages", - WAS_TARGET_VARS["wages"], - hno, - [any_wages] ) - addtodf( - frsset, - "selfemp", - WAS_TARGET_VARS["selfemp"], - hno, - [any_selfemp] ) - addtodf( - frsset, - "pensions", - WAS_TARGET_VARS["pensions"], - hno, - [any_pension_income] ) - addtodf( - frsset, - "degree", - WAS_TARGET_VARS["degree"], - hno, - [highqual_degree_equiv(hrp.highest_qualification)] ) - addtodf( - frsset, - "children", - WAS_TARGET_VARS["children"], - hno, - person_map( num_children(hh), 9999)) - addtodf( - frsset, - "adults", - WAS_TARGET_VARS["adults"], - hno, - person_map( num_adults( hh ), 9999)) - - end - return frsset,wasset -end # create_was_frs_matching_dataset - -function checkall( filename = "was_matchchecks.md" ) - settings = Settings() - frsset, wasset = create_was_frs_matching_dataset( settings ) - outf = open( joinpath( "tmp", filename), "w") - for (k,i) in WAS_TARGET_VARS - tabs = compareone( frsset, wasset, k, i ) - println( outf, "## $k") - for t in tabs - println( outf, t ) - println( outf ) - end - end - close( outf ) -end - -""" -Map the entire datasets. -""" -function map_all_was( - settings :: Settings, - donor :: DataFrame, - matcher :: Function ) :: DataFrame - p = 0 - settings.num_households, - settings.num_people = - FRSHouseholdGetter.initialise( settings; reset=false ) - - df = makeoutdf( settings.num_households, "was" ) - for hno in 1:settings.num_households - hh = FRSHouseholdGetter.get_household( hno ) - println( "on hh $hno") - df[ hno, :frs_sernum] = hh.hid - df[ hno, :frs_datayear] = hh.data_year - df[ hno, :frs_income] = hh.original_gross_income - matches = match_recip_row( hh, donor, matcher, :weekly_gross_income ) - for i in 1:NUM_SAMPLES - was_case_sym = Symbol( "was_case_$i") - was_datayear_sym = Symbol( "was_datayear_$i") - was_score_sym = Symbol( "was_score_$i") - was_income_sym = Symbol( "was_income_$i") - df[ hno, was_case_sym] = matches[i].case - df[ hno, was_datayear_sym] = matches[i].datayear - df[ hno, was_score_sym] = matches[i].score - df[ hno, was_income_sym] = matches[i].income - end - if p > 10000000 - break - end - end - return df -end - -function create_frs_was_matches( data_source :: DataSource = FRSSource ) - settings = Settings() - settings.data_source = data_source - was_dataset = CSV.File(joinpath(data_dir( settings ),settings.wealth_dataset)*".tab")|>DataFrame - map_all_was( settings, was_dataset, model_was_match ) -end - -end # module - diff --git a/src/notused/frs_hbai_creation_libs.jl b/src/notused/frs_hbai_creation_libs.jl deleted file mode 100644 index 9bb5f305..00000000 --- a/src/notused/frs_hbai_creation_libs.jl +++ /dev/null @@ -1,1101 +0,0 @@ - -const MONTHS = Dict( - "JAN" => 1, - "FEB" => 2, - "MAR" => 3, - "APR" => 4, - "MAY" => 5, - "JUN" => 6, - "JUL" => 7, - "AUG" => 8, - "SEP" => 9, - "OCT" => 10, - "NOV" => 11, - "DEC" => 12 ) - - -# FIXME paths in Definitions.jl broken -const L_HBAI_DIR="/mnt/data/hbai/" -const L_FRS_DIR="/mnt/data/frs/" - - -""" -hacky routine to add uhid - unique hhid needed for mostly.ai generator -""" -function add_uhids( settings :: Settings ) - for i in 1:2 - datafs, data_source = if i == 1 - main_datasets( settings ), - settings.data_source - else - example_datasets( settings ), - ExampleSource - end - hh = CSV.File( datafs.hhlds ) |> DataFrame - pers = CSV.File( datafs.people ) |> DataFrame - hh.uhid = get_pid.( data_source, hh.data_year, hh.hid, 0 ) # - pers.uhid = get_pid.( data_source, pers.data_year, pers.hid, 0 ) # - CSV.write( datafs.hhlds, hh; delim='\t' ) - CSV.write( datafs.people, pers; delim='\t' ) - end -end - -""" -Make a subset of main model data. -TODO add in WAS,LCF,SHS mapping stuff -sz - 10 for 1/10 and so on -- another approach: https://discourse.julialang.org/t/how-to-sample-a-data-frame/32791/5 -""" -function make_sample( settings :: Settings; sz :: Int ) :: Tuple - datafs = main_datasets( settings ) - hh = CSV.File( datafs.hhlds ) |> DataFrame - pers = CSV.File( datafs.people ) |> DataFrame - uhid = copy(hh.uhid) - n = length(uhid) - suhids = sample( uhid, sz; replace=false, ordered=true ) - hhsample = hh[ in.(hh.uhid, ( suhids, )),: ] - perssample = pers[ in.(pers.uhid, ( suhids, )),: ] - return hhsample, perssample -end - -function loadfrs(which::AbstractString, year::Integer)::DataFrame - filename = "$(L_FRS_DIR)/$(year)/tab/$(which).tab" - df = loadtoframe(filename) - df.data_year .= year - return df -end - - -function is_in_hbai( - hbai_res :: DataFrame, - sernum::Integer, - benunit :: Integer, - person :: Integer ) :: Bool - - ad_hbai = hbai_res[((hbai_res.sernum.==sernum ).& - ((hbai_res.personhd.==person).|(hbai_res.personsp.==person)) .& - (hbai_res.benunit.==benunit)), :] - return size( ad_hbai )[1]>0 -end - -function is_in_hbai( - hbai_res :: DataFrame, - sernum::Integer ) :: Bool - - ad_hbai = hbai_res[(hbai_res.sernum.==sernum ), :] - return size( ad_hbai )[1]>0 -end - - -""" -hacky hack to hack PIP, etc into l/h -""" -function map12( v :: Union{Missing,Real}, amt :: Real ) :: Integer - r = -1 - if (! ismissing(v)) && v > 0 - r = v <= amt ? 1 : 2 - end - @assert r in [-1,1,2] - return r -end - - -""" -hacky hack to hack AA, etc into l/m/h, sometimes without the m -""" -function map123( v :: Union{Missing,Real}, amts :: Vector ) :: Integer - n = size(amts)[1] - @assert n in [1,2] - # println(n) - r = -1 - if (! ismissing(v)) && v > 0 - if n == 1 - r = v <= amts[1] ? 1 : 3 - else - # println("r=$r") - if v <= amts[1] - r = 1 - elseif v <= amts[2] - r = 2 - else - r = 3 - end - end - end - @assert r in [-1,1,2,3] - return r -end - -# -# @returns ns for the JSType enum -1=no 1=cont, 2=income 3=mixed -# !!! FIXME DUP -function make_jsa_type( frs_res::DataFrame, sernum :: Integer, benunit :: Integer, head :: Bool )::Tuple - ad_frs = frs_res[((frs_res.sernum.==sernum ).& - (frs_res.benunit.==benunit)), [:jsatyphd,:jsatypsp,:esatyphd,:esatypsp]] - @assert size( ad_frs )[1] .== 1 - af = ad_frs[1,:] - jsa = head ? af.jsatyphd : af.jsatypsp - # fixme refactor - # 2021 has mostly single blank - if typeof(jsa) <: AbstractString - jsa = if jsa == " " - -1 - else - parse(Int,jsa) - end - end - jtype = -1 - if jsa == -1 - jtype = -1 - elseif jsa in [1,3] - jtype = 1 - elseif jsa in [2,4] - jtype = 2 - elseif jsa in [5,6] - jtype = 3 - else - @assert false "JSA: value |$jsa| not mapped" - end - etype = -1 - esa = head ? af.esatyphd : af.esatypsp - # 2021 has mostly single blank - if typeof(esa) <: AbstractString - esa = if esa == " " - -1 - else - parse(Int,esa) - end - end - - if esa == -1 - etype = -1 - elseif esa in [1,3] - etype = 1 - elseif esa in [2,4] - etype = 2 - elseif esa in [5,6] - etype = 3 - else - @assert false "ESA: value |$esa| not mapped" - end - return( jtype, etype ) - - - - # see benefits PDF file - # 1 = Contributory - # 2 = Income Based - # 3 = Contributory (Imputed) - # 4 = Income Based (Imputed) - # 5 = Both contributory and income based - # 6 = Both contributory and income based (Imputed) - -end - -# -# @returns ns for the JSType enum -1=no 1=cont, 2=income 3=mixed -# -function make_jsa_type( frs_res::DataFrame, sernum :: Integer, benunit :: Integer, head :: Bool )::Tuple - ad_frs = frs_res[((frs_res.sernum.==sernum ).& - (frs_res.benunit.==benunit)), [:jsatyphd,:jsatypsp,:esatyphd,:esatypsp]] - @assert size( ad_frs )[1] .== 1 - af = ad_frs[1,:] - jsa = head ? af.jsatyphd : af.jsatypsp - # fixme refactor - # 2021 has mostly single blank - # FIXME DON'T NEED THIS - if typeof(jsa) <: AbstractString - jsa = if (jsa == " ") - -1 - else - parse(Int,jsa) - end - end - - jtype = -1 - if ismissing(jsa) || (jsa == -1) - jsa == -1 - # jtype = -1 - elseif jsa in [1,3] - jtype = 1 - elseif jsa in [2,4] - jtype = 2 - elseif jsa in [5,6] - jtype = 3 - else - @assert false "JSA: value |$jsa| not mapped" - end - etype = -1 - esa = head ? af.esatyphd : af.esatypsp - # 2021 has mostly single blank - if typeof(esa) <: AbstractString - esa = if esa == " " - -1 - else - parse(Int,esa) - end - end - - if ismissing(esa) || (esa == -1) - etype = -1 - elseif esa in [1,3] - etype = 1 - elseif esa in [2,4] - etype = 2 - elseif esa in [5,6] - etype = 3 - else - @assert false "ESA: value |$esa| not mapped" - end - return( JSAType(jtype), JSAType(etype) ) - - - - # see benefits PDF file - # 1 = Contributory - # 2 = Income Based - # 3 = Contributory (Imputed) - # 4 = Income Based (Imputed) - # 5 = Both contributory and income based - # 6 = Both contributory and income based (Imputed) - - end - - function initialise_person(n::Integer)::DataFrame - pers = DataFrame( - data_year = fill( 0, n ), # Vector{Union{Int64,Missing}}(missing, n), - hid = fill( BigInt(0), n ), #Vector{Union{BigInt,Missing}}(missing, n), - uhid = fill( BigInt(0), n ), # Vector{Union{BigInt,Missing}}(missing, n), # unique combination of hid&data_year, needed for ai generation - pid = fill( BigInt(0), n ), # Vector{Union{BigInt,Missing}}(missing, n), - pno = fill( 0, n ), # Vector{Union{Integer,Missing}}(missing, n), - is_hrp = fill( false, n ), # Vector{Union{Integer,Missing}}(missing, n), - is_bu_head = fill( false, n ), # = Vector{Union{Integer,Missing}}(missing, n), - - from_child_record = fill( false, n ), # Vector{Union{Integer,Missing}}(missing, n), - default_benefit_unit = fill( 0, n ), # = Vector{Union{Integer,Missing}}(missing, n), - age = fill( 0, n ), # Vector{Union{Integer,Missing}}(missing, n), - sex = fill( Missing_Sex, n ), # Vector{Union{Integer,Missing}}(missing, n), - ethnic_group = fill( Missing_Ethnic_Group, n ), # Vector{Union{Integer,Missing}}(missing, n), - marital_status = fill( Missing_Marital_Status, n ), # Vector{Union{Integer,Missing}}(missing, n), - highest_qualification = fill( Missing_Highest_Qualification, n ), # Vector{Union{Integer,Missing}}(missing, n), - sic = fill( Missing_SIC_2007, n ), # Missing_Vector{Union{Integer,Missing}}(missing, n), - occupational_classification = fill( Missing_Standard_Occupational_Classification, n ), # Vector{Union{Integer,Missing}}(missing, n), - public_or_private = fill( Missing_Employment_Sector, n ), # Vector{Union{Integer,Missing}}(missing, n), - principal_employment_type = fill( Missing_Employment_Type, n ), # Vector{Union{Integer,Missing}}(missing, n), - socio_economic_grouping = fill( Missing_Socio_Economic_Group, n ), # Vector{Union{Integer,Missing}}(missing, n), - age_completed_full_time_education = fill(0,n), # Vector{Union{Integer,Missing}}(missing, n), - years_in_full_time_work = fill(0,n), # Vector{Union{Integer,Missing}}(missing, n), - employment_status = fill( Missing_ILO_Employment, n ), #Vector{Union{Integer,Missing}}(missing, n), - usual_hours_worked = fill(0.0, n ), # Vector{Union{Real,Missing}}(missing, n), - actual_hours_worked = fill(0.0, n), # Vector{Union{Real,Missing}}(missing, n), - age_started_first_job = fill(0, n), # Vector{Union{Real,Missing}}(missing, n), - # for widow's benefits - type_of_bereavement_allowance = fill( missing_bereave, n ), # Vector{Union{Real,Missing}}(missing, n), - had_children_when_bereaved = fill( false, n ), #Vector{Union{Real,Missing}}(missing, n), - - pay_includes_ssp = fill( false, n ), # Vector{Union{Integer,Missing}}(missing, n), - pay_includes_smp = fill( false, n ), # Vector{Union{Integer,Missing}}(missing, n), - pay_includes_spp = fill( false, n ), # Vector{Union{Integer,Missing}}(missing, n), - pay_includes_sap = fill( false, n ), # Vector{Union{Integer,Missing}}(missing, n), - pay_includes_mileage = fill( false, n ), # Vector{Union{Integer,Missing}}(missing, n), - pay_includes_motoring_expenses = fill( false, n ), # Vector{Union{Integer,Missing}}(missing, n), - - income_wages = zeros(n), - income_self_employment_income = zeros(n), - income_self_employment_expenses = zeros(n), - income_self_employment_losses = zeros(n), - income_odd_jobs = zeros(n), - income_private_pensions = zeros(n), - income_national_savings = zeros(n), - income_bank_interest = zeros(n), - income_stocks_shares = zeros(n), - income_individual_savings_account = zeros(n), - # income_dividends = zeros(n), - income_property = zeros(n), - income_royalties = zeros(n), - income_bonds_and_gilts = zeros(n), - income_other_investment_income = zeros(n), - income_other_income = zeros(n), - income_alimony_and_child_support_received = zeros(n), - income_health_insurance = zeros(n), - income_alimony_and_child_support_paid = zeros(n), - income_care_insurance = zeros(n), - income_trade_unions_etc = zeros(n), - income_friendly_societies = zeros(n), - income_work_expenses = zeros(n), - income_avcs = zeros(n), - income_other_deductions = zeros(n), - income_loan_repayments = zeros(n), - income_student_loan_repayments = zeros(n), - income_pension_contributions_employer = zeros(n), - income_pension_contributions_employee = zeros(n), - income_education_allowances = zeros(n), - income_foster_care_payments = zeros(n), - income_student_grants = zeros(n), - income_student_loans = zeros(n), - income_income_tax = zeros(n), - income_national_insurance = zeros(n), - income_local_taxes = zeros(n), - income_free_school_meals = zeros(n), - income_dlaself_care = zeros(n), - income_dlamobility = zeros(n), - income_child_benefit = zeros(n), - income_pension_credit = zeros(n), - income_state_pension = zeros(n), - income_bereavement_allowance_or_widowed_parents_allowance_or_bereavement = zeros(n), - income_armed_forces_compensation_scheme = zeros(n), - income_war_widows_or_widowers_pension = zeros(n), - income_severe_disability_allowance = zeros(n), - income_attendance_allowance = zeros(n), - income_carers_allowance = zeros(n), - income_jobseekers_allowance = zeros(n), - income_industrial_injury_disablement_benefit = zeros(n), - income_employment_and_support_allowance = zeros(n), - income_incapacity_benefit = zeros(n), - income_income_support = zeros(n), - income_maternity_allowance = zeros(n), - income_maternity_grant_from_social_fund = zeros(n), - income_funeral_grant_from_social_fund = zeros(n), - income_any_other_ni_or_state_benefit = zeros(n), - income_trade_union_sick_or_strike_pay = zeros(n), - income_friendly_society_benefits = zeros(n), - income_private_sickness_scheme_benefits = zeros(n), - income_accident_insurance_scheme_benefits = zeros(n), - income_hospital_savings_scheme_benefits = zeros(n), - income_government_training_allowances = zeros(n), - income_guardians_allowance = zeros(n), - income_widows_payment = zeros(n), - income_unemployment_or_redundancy_insurance = zeros(n), - income_winter_fuel_payments = zeros(n), - income_child_winter_heating_assistance_payment = zeros(n), - income_dwp_third_party_payments_is_or_pc = zeros(n), - income_dwp_third_party_payments_jsa_or_esa = zeros(n), - income_social_fund_loan_repayment_from_is_or_pc = zeros(n), - income_social_fund_loan_repayment_from_jsa_or_esa = zeros(n), - income_extended_hb = zeros(n), - income_permanent_health_insurance = zeros(n), - income_any_other_sickness_insurance = zeros(n), - income_critical_illness_cover = zeros(n), - income_working_tax_credit = zeros(n), - income_child_tax_credit = zeros(n), - income_working_tax_credit_lump_sum = zeros(n), - income_child_tax_credit_lump_sum = zeros(n), - income_housing_benefit = zeros(n), - income_universal_credit = zeros(n), - income_personal_independence_payment_daily_living = zeros(n), - income_personal_independence_payment_mobility = zeros(n), - income_a_loan_from_the_dwp_and_dfc = zeros(n), - income_a_loan_or_grant_from_local_authority = zeros(n), - income_social_fund_loan_uc = zeros(n), - income_other_benefits = zeros(n), - income_scottish_child_payment = zeros(n), - income_job_start_payment = zeros(n), - income_troubles_permanent_disablement = zeros(n), - income_child_disability_payment_care = zeros(n), - income_child_disability_payment_mobility = zeros(n), - income_pupil_development_grant = zeros(n), - # FIXME next 4 shouldn't be needed - wages_frs = zeros(n), # Vector{Union{Real,Missing}}(missing, n), - self_emp_frs = zeros(n), # Vector{Union{Real,Missing}}(missing, n), - wages_hbai = zeros(n), # Vector{Union{Real,Missing}}(missing, n), - self_emp_hbai = zeros(n), # Vector{Union{Real,Missing}}(missing, n), - - jsa_type = fill( no_jsa, n ), # Vector{Union{Integer,Missing}}(missing, n), - esa_type = fill( no_jsa, n ), # Vector{Union{Integer,Missing}}(missing, n), - dlaself_care_type = fill( missing_lmh, n ), # Vector{Union{Integer,Missing}}(missing, n), - dlamobility_type = fill( missing_lmh, n ), # Vector{Union{Integer,Missing}}(missing, n), - attendance_allowance_type = fill( missing_lmh, n ), # = Vector{Union{Integer,Missing}}(missing, n), - # FIXME names consistent - personal_independence_payment_daily_living_type = fill( no_pip, n ), # Vector{Union{Integer,Missing}}(missing, n), - personal_independence_payment_mobility_type = fill( no_pip, n ), # = Vector{Union{Integer,Missing}}(missing, n), - - over_20_k_saving = fill(false,n), - asset_current_account = zeros(n), - asset_nsb_ordinary_account = zeros(n), - asset_nsb_investment_account = zeros(n), - asset_not_used = zeros(n), - asset_savings_investments_etc = zeros(n), - asset_government_gilt_edged_stock = zeros(n), - asset_unit_or_investment_trusts = zeros(n), - asset_stocks_shares_bonds_etc = zeros(n), - asset_pep = zeros(n), - asset_national_savings_capital_bonds = zeros(n), - asset_index_linked_national_savings_certificates = zeros(n), - asset_fixed_interest_national_savings_certificates = zeros(n), - asset_pensioners_guaranteed_bonds = zeros(n), - asset_saye = zeros(n), - asset_premium_bonds = zeros(n), - asset_national_savings_income_bonds = zeros(n), - asset_national_savings_deposit_bonds = zeros(n), - asset_first_option_bonds = zeros(n), - asset_yearly_plan = zeros(n), - asset_isa = zeros(n), - asset_fixd_rate_svngs_bonds_or_grntd_incm_bonds_or_grntd_growth_bonds = zeros(n), - asset_geb = zeros(n), - asset_basic_account = zeros(n), - asset_credit_unions = zeros(n), - asset_endowment_policy_not_linked = zeros(n), - asset_informal_assets = zeros(n), - asset_post_office_card_account= zeros(n), - asset_friendly_society_investment = zeros(n), - - contracted_out_of_serps = fill( false, n ), - registered_blind = fill( false, n ), - registered_partially_sighted = fill( false, n ), - registered_deaf = fill( false, n ), - disability_vision = fill( false, n ), - disability_hearing = fill( false, n ), - disability_mobility = fill( false, n ), - disability_dexterity = fill( false, n ), - disability_learning = fill( false, n ), - disability_memory = fill( false, n ), - disability_mental_health = fill( false, n ), - disability_stamina = fill( false, n ), - disability_socially = fill( false, n ), - disability_other_difficulty = fill( false, n ), - health_status = fill( Missing_Health_Status, n ), - - has_long_standing_illness = fill( false, n ), # = Vector{Union{Integer,Missing}}(missing, n), - adls_are_reduced = fill( Missing_ADLS_Inhibited, n ), #Vector{Union{Integer,Missing}}(missing, n), - how_long_adls_reduced = fill( Missing_Illness_Length, n ), #Vector{Union{Integer,Missing}}(missing, n), - - is_informal_carer = fill( false, n ), # Vector{Union{Integer,Missing}}(missing, n), - receives_informal_care_from_non_householder = fill( false, n ), - hours_of_care_received = zeros( n ), # Vector{Union{Real,Missing}}(missing, n), - hours_of_care_given = zeros(n), #Vector{Union{Real,Missing}}(missing, n), - hours_of_childcare = zeros(n), # Vector{Union{Real,Missing}}(missing, n), - cost_of_childcare = zeros(n), # Vector{Union{Real,Missing}}(missing, n), - childcare_type = fill( Missing_Child_Care_Type, n ), #Vector{Union{Integer,Missing}}(missing, n), - employer_provides_child_care = fill( false, n ), # Vector{Union{Integer,Missing}}(missing, n), - - - work_expenses = zeros(n), - travel_to_work = zeros(n), - debt_repayments = zeros(n), - wealth_and_assets = zeros(n), - totsav= zeros(Int,n), - - company_car_fuel_type = fill( Missing_Fuel_Type, n ), # Vector{Union{Integer,Missing}}(missing, n), - company_car_value = zeros(n), # Vector{Union{Real,Missing}}(missing, n), - company_car_contribution = zeros(n), # Vector{Union{Real,Missing}}(missing, n), - fuel_supplied = zeros(n), # Vector{Union{Real,Missing}}(missing, n), - - relationship_to_hoh = fill( Missing_Relationship, n ), # - relationship_1 = fill( Missing_Relationship, n ), # - relationship_2 = fill( Missing_Relationship, n ), # - relationship_3 = fill( Missing_Relationship, n ), # - relationship_4 = fill( Missing_Relationship, n ), # - relationship_5 = fill( Missing_Relationship, n ), # - relationship_6 = fill( Missing_Relationship, n ), # - relationship_7 = fill( Missing_Relationship, n ), # - relationship_8 = fill( Missing_Relationship, n ), # - relationship_9 = fill( Missing_Relationship, n ), # - relationship_10 = fill( Missing_Relationship, n ), # - relationship_11 = fill( Missing_Relationship, n ), # - relationship_12 = fill( Missing_Relationship, n ), # - relationship_13 = fill( Missing_Relationship, n ), # - relationship_14 = fill( Missing_Relationship, n ), # - relationship_15 = fill( Missing_Relationship, n ), # - onerand = fill( "", n ) # Vector{String}(undef,n) - ) -end - -const HH_TYPE_HINTS = [ - :region => Standard_Region, - :ct_band => CT_Band, - :tenure => Tenure_Type -] - - - - -function initialise_household(n::Integer)::DataFrame - # .. example check - # FIXME change all VectorUnion to fill(0,n) - # select value,count(value),label from dictionaries.enums where dataset='frs' and tables='househol' and variable_name='hhcomps' group by value,label; - return DataFrame( - data_year = fill(0,n), # Vector{Union{Integer,Missing}}(missing, n), - interview_year = fill(0,n), # = Vector{Union{Integer,Missing}}(missing, n), - interview_month = fill(0,n), # = Vector{Union{Integer,Missing}}(missing, n), - quarter= fill(0,n), # = Vector{Union{Integer,Missing}}(missing, n), - hid = fill(BigInt(0),n), # = Vector{Union{BigInt,Missing}}(missing, n), - uhid = fill(BigInt(0),n), # Vector{Union{BigInt,Missing}}(missing, n), # unique combination of hid&data_year, needed for ai generation - tenure = fill(Missing_Tenure_Type,n), # Vector{Union{Integer,Missing}}(missing, n), - region = fill( Missing_Standard_Region, n ), # Vector{Union{Integer,Missing}}(missing, n), - ct_band = fill( Missing_CT_Band, n ), # Vector{Union{Integer,Missing}}(missing, n), - dwelling = fill( dwell_na, n ), # Vector{Union{Integer,Missing}}(missing, n), - council_tax = zeros(n), # Vector{Union{Real,Missing}}(missing, n), - water_and_sewerage = zeros(n), # = Vector{Union{Real,Missing}}(missing, n), - mortgage_payment = zeros(n), # = Vector{Union{Real,Missing}}(missing, n), - mortgage_interest = zeros(n), # = Vector{Union{Real,Missing}}(missing, n), - years_outstanding_on_mortgage = fill(0,n), # Vector{Union{Integer,Missing}}(missing, n), - mortgage_outstanding = zeros(n), # = Vector{Union{Real,Missing}}(missing, n), - year_house_bought = fill(0,n), # = Vector{Union{Integer,Missing}}(missing, n), - gross_rent = zeros(n), # = Vector{Union{Real,Missing}}(missing, n), - rent_includes_water_and_sewerage = fill( false, n ), #Vector{Union{Integer,Missing}}(missing, n), - other_housing_charges = zeros(n), # Vector{Union{Real,Missing}}(missing, n), - gross_housing_costs = zeros(n), # = Vector{Union{Real,Missing}}(missing, n), - original_gross_income = zeros(n), # = Vector{Union{Real,Missing}}(missing, n), - total_wealth = zeros(n), # = Vector{Union{Real,Missing}}(missing, n), - house_value = zeros(n), # = Vector{Union{Real,Missing}}(missing, n), - weight = zeros(n), # = Vector{Union{Real,Missing}}(missing, n), - council = fill( "", n ), - nhs_board = fill( "", n ), - bedrooms = fill( 0, n ), - # these should map to the corresponding WAS hh categories - net_physical_wealth = zeros(n), # todo change all the rest to zeros(n) - net_financial_wealth = zeros(n), - net_housing_wealth = zeros(n), - net_pension_wealth = zeros(n), - onerand = fill("", n ) # Vector{String}(undef,n) - ) -end - -# -# the way this seems to work: if deduc1 in job record -# is > 0, the employee contrib here is set to -1 -# -function process_penprovs(a_pens::DataFrame)::Tuple - npens = size(a_pens)[1] - penconts_employer = 0.0 - penconts_employee = 0.0 - for p in 1:npens - pen = a_pens[p,:] - pc = safe_inc(0.0, pen.penamt) - if pen.penamtpd == 95 - pc /= 52.0 - end - if pen.pencon in [1,4,5] # ish ... - penconts_employee += pc - elseif pen.pencon == 2 # employer - penconts_employer += pc - elseif pen.pencon == 3 # oth employer and employee - penconts_employer += pc/2 - penconts_employee += pc/2 - end - end - # FIXME something about SERPS - (penconts_employee,penconts_employer) -end - -function process_pensions(a_pens::DataFrame)::NamedTuple - npens = size(a_pens)[1] - private_pension = 0.0 - tax = 0.0 - for p in 1:npens - private_pension = safe_inc(private_pension, a_pens[p, :penpay]) - private_pension = safe_inc(private_pension, a_pens[p, :ptamt]) # tax - private_pension = safe_inc(private_pension, a_pens[p, :penpd2]) # other deduction - tax = safe_inc(tax, a_pens[p, :ptamt]) - end - return (pension = private_pension, tax = tax) -end - -const NS_RATE = 0.01/WEEKS_PER_YEAR - -# | 2016 | accounts | nsamt | 1 | Jan-50 | Jan_50 | 0 -# | 2016 | accounts | nsamt | 2 | 51 - 100 | v_51_100 | 0 -# | 2016 | accounts | nsamt | 3 | 101 - 250 | v_101_250 | 0 -# | 2016 | accounts | nsamt | 4 | 251 - 500 | v_251_500 | 0 -# | 2016 | accounts | nsamt | 5 | 501 - 1000 | v_501_1000 | 0 -# | 2016 | accounts | nsamt | 6 | 1001 - 2000 | v_1001_2000 | 0 -# | 2016 | accounts | nsamt | 7 | 2001 - 3000 | v_2001_3000 | 0 -# | 2016 | accounts | nsamt | 8 | 3001 - 5000 | v_3001_5000 | 0 -# | 2016 | accounts | nsamt | 9 | 5001 - 10,000 | v_5001_10_000 | 0 -# | 2016 | accounts | nsamt | 10 | 10,001 - 20,000 | v_10_001_20_000 | 0 -# | 2016 | accounts | nsamt | 11 | 20,001 - 30,000 | v_20_001_30_000 | 0 -# | 2016 | accounts | nsamt | 12 | 30,001 or over | v_30_001_or_over | 0 -const NSAMT_ENUM_MIDPOINTS = [ - 25.0, - 75.0, - 175.0, - 375.0, - 750.0, - 1_500.0, - 2_500.0, - 4_000.0, - 7_500.0, - 15_000.0, - 25_000.0, - 40_000.0 -] - -""" -infer amounts from holdings (nsamt) assuming 1% pa interest rate -see: https://www.nsandi.com/historical-interest-rates for rates -why FRS records like this I have no idea -""" -function infer_national_savings_income( nsamt :: Integer )::Real - @assert ! (nsamt in [1:12]) "nsr out of range for enum: $nsamt" - NSAMT_ENUM_MIDPOINTS[ nsamt ]*NS_RATE -end - -function map_investment_income!(model_adult::DataFrameRow, accounts::DataFrame) - naccts = size(accounts)[1] - - model_adult.income_national_savings = 0.0 - model_adult.income_bank_interest = 0.0 - model_adult.income_stocks_shares = 0.0 - model_adult.income_individual_savings_account = 0.0 - model_adult.income_property = 0.0 - model_adult.income_royalties = 0.0 - model_adult.income_bonds_and_gilts = 0.0 - model_adult.income_other_investment_income = 0.0 - - - for i in 1:naccts - v = max(0.0, accounts[i, :accint]) # FIXME national savings stuff appears to be coded -1 for missing - if accounts[i, :invtax] == 1 - # FIXME is this right for dividends anymore? - v /= 0.8 - end - # FIXME building society - check with other models - # FIXME go over assignment to broad types against income - # tax book - atype = Account_Type(accounts[i, :account]) - nsamt = accounts[i, :nsamt] - # - # for national savings, amount held is recorded - # for the rest acctoint = interest pw from account - if nsamt > 0 - model_adult.income_national_savings += - infer_national_savings_income( nsamt ) # FIXME appears to be all zero! - elseif atype in [ - Current_account, - Basic_Account, - NSB_Investment_account, - NSB_Direct_Saver - - ] - model_adult.income_bank_interest += v - elseif atype in [ - National_Savings_capital_bonds, - Index_Linked_National_Savings_Certificates, - Fixed_Interest_National_Savings_Certificates, - National_Savings_income_bonds, - National_Savings_deposit_bonds - ] - ## this should never happen given, but does.. - # the weird way the FRS records National Savings as stocks - # nsamt should always be set for these records & handled above. - # @assert false - # println( "atype = $atype but nsamt is $nsamt" ) - elseif atype in [ - Stocks_Shares_Bonds_etc, - Member_of_Share_Club] - model_adult.income_stocks_shares += v - elseif atype in [ISA] - model_adult.income_individual_savings_account += v - elseif atype in [ - SAYE, - Savings_investments_etc, - Unit_or_Investment_Trusts, - Endowment_Policy_Not_Linked, - Profit_sharing, - Credit_Unions, - Yearly_Plan, - Premium_bonds, - Company_Share_Option_Plans, - Post_Office_Card_Account, - Pensioners_Guaranteed_Bonds, - Informal_Assets, - Friendly_Society_Investment - ] - model_adult.income_other_investment_income += v - elseif atype in [ - Guaranteed_Equity_Bond, - Fixed_Rate_Savings_or_Guaranteed_Income_or_Guaranteed_Growth_Bonds, - First_Option_bonds, - Government_Gilt_Edged_Stock] - model_adult.income_bonds_and_gilts += v - else - @assert false "failed to map $atype" - end - end # accounts loop -end # map_investment_income - -function map_alimony(frs_person::DataFrameRow, a_maint::DataFrame)::Tuple - nmaints = size(a_maint)[1] - alimony_paid = 0.0 # note: not including children - alimony_recieved = 0.0 # note: not including children - if frs_person.alimny == 1 # receives alimony - if frs_person.alius == 2 # not usual - alimony_recieved = safe_inc(0.0, frs_person.aluamt) - else - alimony_recieved = safe_inc(0.0, frs_person.aliamt) - end - end - for c in 1:nmaints - alimony_paid = safe_inc(alimony_paid, a_maint[c, :mramt]) - end - alimony_recieved, alimony_paid -end - -function map_car_value( cv :: Integer ) :: Real - v = 0.0 - @assert cv <= 10 "cv out-of-range = $cv" - if cv < 0 - v = 0.0 - elseif cv == 1 - v = 5_000.0 - elseif cv == 2 - v = 11_500.0 - elseif cv == 3 - v = 14_500.0 - elseif cv == 4 - v = 17_500.0 - elseif cv == 5 - v = 20_500.0 - elseif cv == 6 - v = 23_500.0 - elseif cv == 7 - v = 27_500.0 - elseif cv == 8 - v = 35_000.0 - elseif cv == 9 - v = 45_000.0 - elseif cv == 10 - v = 20_000 # Don't_know = 10 - end - v -end - -""" -process the "r01..r014 and relhrp codes. Note we're adding 'this person' (=0) rather than missing as in the raw data" -""" -function process_relationships!( model_person :: DataFrameRow, frs_person :: DataFrameRow ) - relhh = safe_assign( frs_person.relhrp ) - if (relhh == -1 ) - relhh = 0 # map 'this person'; note hrp/head no longer needs to be 1 - end - model_person.relationship_to_hoh = Relationship( relhh )# - for i in 1:14 - rel = i < 10 ? "r0" : "r" - relfrs = Symbol( "$(rel)$i" ) # :r10 or :r02 and so on - relmod = Symbol( "relationship_$(i)") # :relationship_10 or :relationship_2 - relp = safe_assign(frs_person[relfrs]) - if (frs_person.person == i) & (relp == -1) # again "this person = 0; makes mapping code (and just reading output) easier - relp = 0 - end - model_person[relmod] = Relationship(relp) - end -end - -function process_job_rec!(model_adult::DataFrameRow, a_job::DataFrame) - njobs = size(a_job)[1] - - earnings = 0.0 - actual_hours = 0.0 - usual_hours = 0.0 - health_insurance = 0.0 - alimony_and_child_support_paid = 0.0 - # care_insurance = 0.0 - trade_unions_etc = 0.0 - friendly_societies = 0.0 - work_expenses = 0.0 - pension_contributions_employee = 0.0 - avcs = 0.0 - other_deductions = 0.0 - student_loan_repayments = 0.0 - loan_repayments = 0.0 - self_employment_income = 0.0 - self_employment_expenses = 0.0 - self_employment_losses = 0.0 - tax = 0.0 - principal_employment_type = -1 - public_or_private = -1 - - company_car_fuel_type = 0 - company_car_value = 0.0 - company_car_contribution = 0.0 - fuel_supplied = 0.0 - - for j in 1:njobs - jb = a_job[j,:] # 1 row - if j == 1 # take 1st record job for all of these - principal_employment_type = safe_assign(jb.etype) - public_or_private = safe_assign(jb.jobsect) - end - usual_hours = safe_inc(usual_hours, jb.dvushr) - actual_hours = safe_inc(actual_hours, jb.jobhours) - - # alimony_and_child_support_paid = safe_inc( alimony_and_child_support_paid , a_job[j,udeduc0X]) - # care_insurance = safe_inc( care_insurance , jb.othded0X - # note these are *Usual* deductions - # "1... contribution *by you* to a Pension or superannuation scheme?" - # I *think* these contributions - pension_contributions_employee = safe_inc(pension_contributions_employee, jb.udeduc1) - avcs = safe_inc(avcs, jb.udeduc2) - trade_unions_etc = safe_inc(trade_unions_etc, jb.udeduc3) - friendly_societies = safe_inc(friendly_societies, jb.udeduc4) - other_deductions = safe_inc(other_deductions, jb.udeduc5) - loan_repayments = safe_inc(loan_repayments, jb.udeduc6) - health_insurance = safe_inc(health_insurance, jb.udeduc7) - other_deductions = safe_inc(other_deductions, jb.udeduc8) - student_loan_repayments = safe_inc(student_loan_repayments, jb.udeduc9) - work_expenses = safe_inc(work_expenses, jb.umotamt)# CARS FIXME add to this - - if jb.inclpay1 == 1 - model_adult.pay_includes_ssp = true - end - if jb.inclpay2 == 1 - model_adult.pay_includes_smp = true - end - # it refund .. 3 - if jb.inclpay4 == 1 - model_adult.pay_includes_mileage = true - end - if jb.inclpay5 == 1 - model_adult.pay_includes_motoring_expenses = true - end - if jb.inclpay6 == 1 - model_adult.pay_includes_spp = true - end - if jb.inclpay7 == 1 - model_adult.pay_includes_sap = true - end - - # self employment - if jb.prbefore > 0.0 - self_employment_income += jb.prbefore - elseif jb.profit1 > 0.0 - if jb.profit2 == -1 - # println( "jb.profit2 is |$(jb.profit2)| should be 1,2 pid=$(model_adult.pid)") - jb.profit2 = 1# jb.profit2 catch 1 weird -1 profit2 pid=120191636601 just treat as profit not loss - end - @assert jb.profit2 in [1, 2] - if jb.profit2 == 1 - self_employment_income += jb.profit1 - else - self_employment_losses += jb.profit1 - end - elseif jb.seincamt > 0.0 - self_employment_income += jb.seincamt - end - # setax = safe_inc(0.0, jb.setaxamt) - # tax += setax / 52.0 - - # earnings - addBonus = false - if jb.ugross > 0.0 # take usual when last not usual - earnings += jb.ugross - addBonus = true - elseif jb.grwage > 0.0 # then take last - earnings += jb.grwage - addBonus = true - elseif jb.ugrspay > 0.0 # then take total pay, but don't add bonuses - earnings += jb.ugrspay - end - if addBonus - for i in 1:6 - bon = Symbol(string("bonamt", i)) - tax = Symbol(string("bontax", i)) - if a_job[j, bon] > 0.0 - bon = a_job[j, bon] - if a_job[j, tax] == 2 - bon /= (1 - 0.22) # fixme hack basic rate - end - earnings += bon / 52.0 # fixwme weeks per year - end - end # bonuses loop - end # add bonuses - # cars - # assign once - if company_car_fuel_type < 0 - company_car_fuel_type = jb.fueltyp - end - mv = map_car_value(jb.carval) - # println( mv ) - company_car_value = safe_inc(company_car_value, mv ) - company_car_contribution = safe_inc(company_car_contribution, jb.caramt) - fuel_supplied = safe_inc(fuel_supplied, jb.fuelamt) - - end # jobs loop - - model_adult.usual_hours_worked = usual_hours - model_adult.actual_hours_worked = actual_hours - model_adult.income_wages = earnings - model_adult.principal_employment_type = Employment_Type(principal_employment_type) - model_adult.public_or_private = Employment_Sector(public_or_private) - ## FIXME look at this mapping again: pcodes - model_adult.income_health_insurance = health_insurance - # model_adult.income_# care_insurance = # care_insurance - model_adult.income_trade_unions_etc = trade_unions_etc - model_adult.income_friendly_societies = friendly_societies - model_adult.income_work_expenses = work_expenses - model_adult.income_pension_contributions_employee = pension_contributions_employee - model_adult.income_avcs = avcs - model_adult.income_other_deductions = other_deductions - model_adult.income_student_loan_repayments = student_loan_repayments # fixme maybe "slrepamt" or "slreppd" - model_adult.income_loan_repayments = loan_repayments # fixme maybe "slrepamt" or "slreppd" - - model_adult.income_self_employment_income = self_employment_income - model_adult.income_self_employment_expenses = self_employment_expenses - model_adult.income_self_employment_losses = self_employment_losses - - model_adult.company_car_fuel_type = Fuel_Type(company_car_fuel_type) - model_adult.company_car_value = company_car_value - model_adult.company_car_contribution = company_car_contribution - model_adult.fuel_supplied = fuel_supplied -end - -""" -Convoluted - take the benefit enum, make ... -FIXME: some represent one-off payments (winter fuel..) so maybe weeklyise, but all that -really matters is whether they are present -""" -function process_benefits!( model_adult::DataFrameRow, a_benefits::DataFrame) - nbens = size(a_benefits)[1] - for i in instances(Incomes_Type) - if i >= dlaself_care && i <= personal_independence_payment_mobility - ikey = make_sym_for_frame("income", i) - model_adult[ikey] = 0.0 - end - end - for b in 1:nbens - bno = a_benefits[b, :benefit] - if !(bno in [46, 47]) # 2015 receipt in last 6 months of tax credits - btype = Benefit_Type(bno) - # println( "bno=$bno BenefitType=$btype") - if btype <= Personal_Independence_Payment_Mobility - ikey = make_sym_for_frame("income", btype) - # println( "ikey=$ikey") - model_adult[ikey] = safe_inc(model_adult[ikey], a_benefits[b, :benamt]) - end - end - end -end - -""" -Convoluted - take the benefit enum, make ... -""" -function process_assets!(model_adult::DataFrameRow, an_asset::DataFrame) - nassets = size(an_asset)[1] - for i in instances(Asset_Type) - if (i > Missing_Asset_Type) - ikey = make_sym_for_asset(i) - model_adult[ikey] = 0.0 - end - end - for a in 1:nassets - ano = an_asset[a, :assetype] - atype = Asset_Type(ano) - ikey = make_sym_for_asset(atype) - v = an_asset[a, :howmuch] - if an_asset[a, :howmuche] > 0 - v = an_asset[a, :howmuche] - end - model_adult[ikey] = safe_inc(model_adult[ikey], v) - end -end - -function infer_hours_of_care(hourtot::Integer)::Real - hrs = Dict( - 0 => 0.0, - 1 => 2.0, - 2 => 7.0, - 3 => 14.0, - 4 => 27.5, - 5 => 42.5, - 6 => 75.0, - 7 => 100.0, - 8 => 10.0, - 9 => 27.5, - 10 => 50.0 - ) - h = 0.0 - if hourtot in keys(hrs) - h = hrs[hourtot] - end - h -end - -""" -remap child care type from pre-2017 version to 2017+ -""" -function map_child_care( year :: Integer, care ) :: Integer - if ismissing( care ) || care < -1 - care = -1 - end - if year >= 2017 - return care - end - if care > 0 # remap to2015/16 care to 2017+ - m = Dict( - 1=>1, - 2=>2, - 3=>3, - 4=>5, - 5=>4, - 6=>5, - 7=>4, - 8=>7, - 9=>8, - 10=>9, - 11=>10, - 12=>10, - 13=>11, - 14=>12, - 15=>13, - 16=>14, - 17=>15, - 18=>16, - 19=>17, - 20=>18 - ) - care = m[care] - end - care -end - - -function xparse(s::AbstractString)::Real - parse(Float64,s) -end - -function xparse(s::Missing)::Number - 0 -end - -function xparse(s::Number)::Number - s -end - -""" -Weekly equivalent of annual capital repayment on a mortgage. FIXME: Note the misnamed slot I'm putting this in pro. tem. -The mortgage record has been murdered in FRS 2021/2 -but the fields we need are in the monster record, so get from -that. Note early versions have 3 records and later frsxs have 2. -""" -function mortage_capital_payments( frsx :: AbstractDataFrame )::Real - #= - if size(frsx)[1] == 0 - return 0.0 - end - =# - @argcheck size(frsx)[1] in 1:10 # count of BUs - nmortgages = frsx.data_year[1] < 2020 ? 3 : 2 - cappay = 0.0 - for fx in eachrow(frsx) - for mortno in 1:nmortgages - mortends = fx[Symbol("mortend$mortno")] - if ! ismissing( mortends ) - mortend = xparse( mortends ) - rmort = xparse(fx[Symbol("rmort$mortno")]) - rmamt = xparse(fx[Symbol("rmamt$mortno")]) - borramt = xparse(fx[Symbol("borramt$mortno")]) - cap = if rmort == 1 - rmamt - else - borramt - end - repay = cap/mortend - # println( "mortend=$mortend rmort=$rmort ramt=$rmamt borramt=$borramt => $cap => repay=$repay") - cappay += repay - end - end - end - cappay/WEEKS_PER_YEAR; -end - -