diff --git a/demos/census_api.ipynb b/demos/census_api.ipynb index e01f297..df722fd 100644 --- a/demos/census_api.ipynb +++ b/demos/census_api.ipynb @@ -5432,9 +5432,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 2", + "display_name": "py27", "language": "python", - "name": "python2" + "name": "py27" }, "language_info": { "codemirror_mode": { diff --git a/demos/synthesize.py b/demos/synthesize.py index f70a589..9d31e99 100644 --- a/demos/synthesize.py +++ b/demos/synthesize.py @@ -24,9 +24,9 @@ households, people, fit_quality = synthesize_all(starter, indexes=indexes) for geo, qual in fit_quality.items(): - print 'Geography: {} {} {} {}'.format( - geo.state, geo.county, geo.tract, geo.block_group) + print ('Geography: {} {} {} {}'.format( + geo.state, geo.county, geo.tract, geo.block_group)) # print ' household chisq: {}'.format(qual.household_chisq) # print ' household p: {}'.format(qual.household_p) - print ' people chisq: {}'.format(qual.people_chisq) - print ' people p: {}'.format(qual.people_p) + print (' people chisq: {}'.format(qual.people_chisq)) + print (' people p: {}'.format(qual.people_p)) diff --git a/synthpop/census_helpers.py b/synthpop/census_helpers.py index 1f82a19..341e936 100644 --- a/synthpop/census_helpers.py +++ b/synthpop/census_helpers.py @@ -48,16 +48,16 @@ def _scale_and_merge(self, df1, tot1, df2, tot2, columns_to_scale, df[col] = df[col].fillna(0).astype('int') return df - def block_group_query(self, census_columns, state, county, tract=None, - year=2016, id=None): + def block_group_query(self, census_columns, state, county, year, + tract=None,id=None): if id is None: id = "*" return self._query(census_columns, state, county, forstr="block group:%s" % id, tract=tract, year=year) - def tract_query(self, census_columns, state, county, tract=None, - year=2016): + def tract_query(self, census_columns, state, county, year, + tract=None): if tract is None: tract = "*" return self._query(census_columns, state, county, @@ -65,7 +65,7 @@ def tract_query(self, census_columns, state, county, tract=None, year=year) def _query(self, census_columns, state, county, forstr, - tract=None, year=2016): + year, tract=None): c = self.c state, county = self.try_fips_lookup(state, county) @@ -87,7 +87,7 @@ def chunks(l, n): for census_column_batch in chunks(census_columns, 45): census_column_batch = list(census_column_batch) - d = c.acs.get(['NAME'] + census_column_batch, + d = c.acs5.get(['NAME'] + census_column_batch, geo={'for': forstr, 'in': in_str}, year=year) df = pd.DataFrame(d) @@ -106,7 +106,7 @@ def chunks(l, n): def block_group_and_tract_query(self, block_group_columns, tract_columns, state, county, merge_columns, block_group_size_attr, - tract_size_attr, tract=None, year=2016): + tract_size_attr, year, tract=None): df2 = self.tract_query(tract_columns, state, county, tract=tract, year=year) df1 = self.block_group_query(block_group_columns, state, county, diff --git a/synthpop/recipes/starter2.py b/synthpop/recipes/starter2.py index b59baf0..024800f 100644 --- a/synthpop/recipes/starter2.py +++ b/synthpop/recipes/starter2.py @@ -23,27 +23,31 @@ class Starter: FIPS code for the county tract : string, optional FIPS code for a specific track or None for all tracts in the county + acsyear : integer, optional + Final year in the 5-year estimates ACS dataset. + Default: 2016, which corresponds to 2011-2016 ACS dataset Returns ------- household_marginals : DataFrame - Marginals per block group for the household data (from ACS) + Marginals per block group for the household data (from ACS 5-year estimates) person_marginals : DataFrame - Marginals per block group for the person data (from ACS) + Marginals per block group for the person data (from ACS 5-year estimates) household_jointdist : DataFrame - joint distributions for the households (from PUMS), one joint + joint distributions for the households (from PUMS 2010-2000), one joint distribution for each PUMA (one row per PUMA) person_jointdist : DataFrame - joint distributions for the persons (from PUMS), one joint + joint distributions for the persons (from PUMS 2010-2000), one joint distribution for each PUMA (one row per PUMA) tract_to_puma_map : dictionary keys are tract ids and pumas are puma ids """ - def __init__(self, key, state, county, tract=None): + def __init__(self, key, state, county, tract=None, acsyear= 2016): self.c = c = Census(key) self.state = state self.county = county self.tract = tract + self.acsyear = acsyear structure_size_columns = ['B25032_0%02dE' % i for i in range(1, 24)] age_of_head_columns = ['B25007_0%02dE' % i for i in range(1, 22)] @@ -68,7 +72,7 @@ def __init__(self, key, state, county, tract=None): merge_columns=['tract', 'county', 'state'], block_group_size_attr="B11005_001E", tract_size_attr="B08201_001E", - tract=tract) + tract=tract, year=acsyear) self.h_acs = h_acs self.h_acs_cat = cat.categorize(h_acs, { @@ -133,7 +137,7 @@ def __init__(self, key, state, county, tract=None): female_age_columns = ['B01001_0%02dE' % i for i in range(27, 50)] all_columns = population + sex + race + male_age_columns + \ female_age_columns + hh_population + hispanic - p_acs = c.block_group_query(all_columns, state, county, tract=tract) + p_acs = c.block_group_query(all_columns, state, county, tract=tract, year=acsyear) self.p_acs = p_acs self.p_acs_cat = cat.categorize(p_acs, { ("person_age", "19 and under"):