Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adjust 2020 Incomes to TAZ Controls #350

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
4 changes: 4 additions & 0 deletions baus.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,7 @@ def get_simulation_summary_models():

simulation_summary_models = [

"adjust_initial_summary_year_incomes",
"interim_zone_output",
"new_buildings_summary",

Expand Down Expand Up @@ -351,6 +352,9 @@ def get_simulation_summary_models():
"maz_growth_summary",
]

if not run_setup['adjust_initial_summary_year_incomes']:
simulation_summary_models.remove("adjust_initial_summary_year_incomes")

return simulation_summary_models


Expand Down
5 changes: 5 additions & 0 deletions baus/datasources.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,11 @@ def base_year_summary_taz(mapping):
return df


@orca.table(cache=True)
def initial_summary_year_taz_controls():
return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/TAZ1454_2020_Land_Use.csv"))


# non-residential rent data
@orca.table(cache=True)
def costar(store, parcels):
Expand Down
73 changes: 73 additions & 0 deletions baus/summaries/core_summaries.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,79 @@
import pathlib
import orca
import pandas as pd
import numpy as np


@orca.step()
def adjust_initial_summary_year_incomes(households, initial_summary_year_taz_controls, year, initial_summary_year):

if year != initial_summary_year:
return

households = households.to_frame()
taz_controls = initial_summary_year_taz_controls.to_frame()

# first, update the household's household income categorical variable
for taz in initial_summary_year_taz_controls.index:
# select the tazdata for a taz
tazdata = taz_controls.iloc[taz]
# select all households in that taz
hhs_in_taz = households[households.zone_id == tazdata.ZONE].index

hhs_to_update = hhs_in_taz.copy()
for inc_quartile in [1, 2, 3, 4]:
# use the taz controls to calculate the proportion of households in an income quartile
prop = (tazdata['HHINCQ'+str(inc_quartile)]/tazdata['TOTHH'])
# use the total number of HHs in the TAZ to calculate the number of HHs that should be in the income group
if prop > 0:
hh_target = (len(hhs_in_taz) * prop).astype(int)
# randomly select households to assign to the income groups using the target number
hhs_for_inc_quartile = (np.random.choice(hhs_to_update, hh_target, replace=False))
# update households in the taz with their new income group
households.loc[households.household_id.isin(hhs_for_inc_quartile), 'base_income_quartile'] = inc_quartile
# remove the updated households from the set of households in the taz to be updated
hhs_to_update = hhs_to_update[~hhs_to_update.isin(hhs_for_inc_quartile)]

# second, update the continuous variable
# data from PUMS 2010 1-year data, with HINCP inflated to 1999 dollars and binned
income_array = {
'sd': {'HHINCQ1': 8365.99,
'HHINCQ2': 8691.1,
'HHINCQ3': 11496.26,
'HHINCQ4': 81914.8},
'avg': {'HHINCQ1': 15544.0,
'HHINCQ2': 44090.0,
'HHINCQ3': 78017.0,
'HHINCQ4': 171912.0}
}

# turn to dataframe
income_deets = pd.DataFrame.from_dict(income_array)

# Loop through the four income groups, and for each generate an array
# with the index of the households in that income group, where the incomes
# match the distributionn within that bin based on 2010 PUMS data.

households_df_grouped = households.groupby('base_income_quartile')
updated_income = {}
for nme, dta in households_df_grouped:
this_group_hhs = pd.Series(
np.random.normal(
loc=income_deets.loc[nme].avg,
scale=income_deets.loc[nme].sd,
size=len(dta)
), index=dta.index
)
updated_income[nme] = this_group_hhs
updated_income = pd.concat(updated_income)
updated_income.name = 'income'

#TODO: consider updating just the records that were re-classified
# assign series to households df, to the income variable
households['income'] = updated_income.reset_index(0).income.sort_index()

# save the final table of households with updated incomes
orca.add_table("households", households)


@orca.step()
Expand Down
5 changes: 5 additions & 0 deletions baus/summaries/geographic_summaries.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,11 @@ def geographic_summary(parcels, households, jobs, buildings, year, superdistrict
summary_table['mfdu'] = buildings_df[(buildings_df.building_type == 'HM') | (buildings_df.building_type == 'MR')].\
groupby(geography).residential_units.sum()

# add jurisdiction average income
if geography == 'juris':
summary_table['juris_ave_income'] = households_df.groupby(geography).income.quantile(.5)


# employees by sector
summary_table['totemp'] = jobs_df.groupby(geography).size()
for empsix in ['AGREMPN', 'MWTEMPN', 'RETEMPN', 'FPSEMPN', 'HEREMPN', 'OTHEMPN']:
Expand Down