diff --git a/baus.py b/baus.py index a7e78ab0e..38af59efd 100644 --- a/baus.py +++ b/baus.py @@ -315,6 +315,7 @@ def get_simulation_summary_models(): simulation_summary_models = [ + "adjust_initial_summary_year_incomes", "interim_zone_output", "new_buildings_summary", @@ -351,6 +352,9 @@ def get_simulation_summary_models(): "maz_growth_summary", ] + if not run_setup['adjust_initial_summary_year_incomes']: + simulation_summary_models.remove("adjust_initial_summary_year_incomes") + return simulation_summary_models diff --git a/baus/datasources.py b/baus/datasources.py index 651f730f4..7bc77af72 100644 --- a/baus/datasources.py +++ b/baus/datasources.py @@ -299,6 +299,11 @@ def base_year_summary_taz(mapping): return df +@orca.table(cache=True) +def initial_summary_year_taz_controls(): + return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/TAZ1454_2020_Land_Use.csv")) + + # non-residential rent data @orca.table(cache=True) def costar(store, parcels): diff --git a/baus/summaries/core_summaries.py b/baus/summaries/core_summaries.py index 7a4e0dd48..3e61f7d13 100644 --- a/baus/summaries/core_summaries.py +++ b/baus/summaries/core_summaries.py @@ -3,6 +3,79 @@ import pathlib import orca import pandas as pd +import numpy as np + + +@orca.step() +def adjust_initial_summary_year_incomes(households, initial_summary_year_taz_controls, year, initial_summary_year): + + if year != initial_summary_year: + return + + households = households.to_frame() + taz_controls = initial_summary_year_taz_controls.to_frame() + + # first, update the household's household income categorical variable + for taz in initial_summary_year_taz_controls.index: + # select the tazdata for a taz + tazdata = taz_controls.iloc[taz] + # select all households in that taz + hhs_in_taz = households[households.zone_id == tazdata.ZONE].index + + hhs_to_update = hhs_in_taz.copy() + for inc_quartile in [1, 2, 3, 4]: + # use the taz controls to calculate the proportion of households in an income quartile + prop = (tazdata['HHINCQ'+str(inc_quartile)]/tazdata['TOTHH']) + # use the total number of HHs in the TAZ to calculate the number of HHs that should be in the income group + if prop > 0: + hh_target = (len(hhs_in_taz) * prop).astype(int) + # randomly select households to assign to the income groups using the target number + hhs_for_inc_quartile = (np.random.choice(hhs_to_update, hh_target, replace=False)) + # update households in the taz with their new income group + households.loc[households.household_id.isin(hhs_for_inc_quartile), 'base_income_quartile'] = inc_quartile + # remove the updated households from the set of households in the taz to be updated + hhs_to_update = hhs_to_update[~hhs_to_update.isin(hhs_for_inc_quartile)] + + # second, update the continuous variable + # data from PUMS 2010 1-year data, with HINCP inflated to 1999 dollars and binned + income_array = { + 'sd': {'HHINCQ1': 8365.99, + 'HHINCQ2': 8691.1, + 'HHINCQ3': 11496.26, + 'HHINCQ4': 81914.8}, + 'avg': {'HHINCQ1': 15544.0, + 'HHINCQ2': 44090.0, + 'HHINCQ3': 78017.0, + 'HHINCQ4': 171912.0} + } + + # turn to dataframe + income_deets = pd.DataFrame.from_dict(income_array) + + # Loop through the four income groups, and for each generate an array + # with the index of the households in that income group, where the incomes + # match the distributionn within that bin based on 2010 PUMS data. + + households_df_grouped = households.groupby('base_income_quartile') + updated_income = {} + for nme, dta in households_df_grouped: + this_group_hhs = pd.Series( + np.random.normal( + loc=income_deets.loc[nme].avg, + scale=income_deets.loc[nme].sd, + size=len(dta) + ), index=dta.index + ) + updated_income[nme] = this_group_hhs + updated_income = pd.concat(updated_income) + updated_income.name = 'income' + + #TODO: consider updating just the records that were re-classified + # assign series to households df, to the income variable + households['income'] = updated_income.reset_index(0).income.sort_index() + + # save the final table of households with updated incomes + orca.add_table("households", households) @orca.step() diff --git a/baus/summaries/geographic_summaries.py b/baus/summaries/geographic_summaries.py index e4ccc4c44..7f7b8d3d8 100644 --- a/baus/summaries/geographic_summaries.py +++ b/baus/summaries/geographic_summaries.py @@ -80,6 +80,11 @@ def geographic_summary(parcels, households, jobs, buildings, year, superdistrict summary_table['mfdu'] = buildings_df[(buildings_df.building_type == 'HM') | (buildings_df.building_type == 'MR')].\ groupby(geography).residential_units.sum() + # add jurisdiction average income + if geography == 'juris': + summary_table['juris_ave_income'] = households_df.groupby(geography).income.quantile(.5) + + # employees by sector summary_table['totemp'] = jobs_df.groupby(geography).size() for empsix in ['AGREMPN', 'MWTEMPN', 'RETEMPN', 'FPSEMPN', 'HEREMPN', 'OTHEMPN']: