ACS year parameter made user-specified

UDST · Apr 27, 2018 · 6431ae1 · 6431ae1
1 parent 7e8f470
commit 6431ae1
Show file tree

Hide file tree

Showing 4 changed files with 24 additions and 20 deletions.
diff --git a/demos/census_api.ipynb b/demos/census_api.ipynb
@@ -5432,9 +5432,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 2",
+   "display_name": "py27",
    "language": "python",
-   "name": "python2"
+   "name": "py27"
   },
   "language_info": {
    "codemirror_mode": {

diff --git a/demos/synthesize.py b/demos/synthesize.py
@@ -24,9 +24,9 @@
 households, people, fit_quality = synthesize_all(starter, indexes=indexes)
 
 for geo, qual in fit_quality.items():
-    print 'Geography: {} {} {} {}'.format(
-        geo.state, geo.county, geo.tract, geo.block_group)
+    print ('Geography: {} {} {} {}'.format(
+        geo.state, geo.county, geo.tract, geo.block_group))
     # print '    household chisq: {}'.format(qual.household_chisq)
     # print '    household p:     {}'.format(qual.household_p)
-    print '    people chisq:    {}'.format(qual.people_chisq)
-    print '    people p:        {}'.format(qual.people_p)
+    print ('    people chisq:    {}'.format(qual.people_chisq))
+    print ('    people p:        {}'.format(qual.people_p))
diff --git a/synthpop/census_helpers.py b/synthpop/census_helpers.py
@@ -48,24 +48,24 @@ def _scale_and_merge(self, df1, tot1, df2, tot2, columns_to_scale,
             df[col] = df[col].fillna(0).astype('int')
         return df
 
-    def block_group_query(self, census_columns, state, county, tract=None,
-                          year=2016, id=None):
+    def block_group_query(self, census_columns, state, county, year, 
+                        tract=None,id=None):
         if id is None:
             id = "*"
         return self._query(census_columns, state, county,
                            forstr="block group:%s" % id,
                            tract=tract, year=year)
 
-    def tract_query(self, census_columns, state, county, tract=None,
-                    year=2016):
+    def tract_query(self, census_columns, state, county, year,
+                    tract=None):
         if tract is None:
             tract = "*"
         return self._query(census_columns, state, county,
                            forstr="tract:%s" % tract,
                            year=year)
 
     def _query(self, census_columns, state, county, forstr,
-               tract=None, year=2016):
+               year, tract=None):
         c = self.c
 
         state, county = self.try_fips_lookup(state, county)
@@ -87,7 +87,7 @@ def chunks(l, n):
 
         for census_column_batch in chunks(census_columns, 45):
             census_column_batch = list(census_column_batch)
-            d = c.acs.get(['NAME'] + census_column_batch,
+            d = c.acs5.get(['NAME'] + census_column_batch,
                           geo={'for': forstr,
                                'in': in_str}, year=year)
             df = pd.DataFrame(d)
@@ -106,7 +106,7 @@ def chunks(l, n):
     def block_group_and_tract_query(self, block_group_columns,
                                     tract_columns, state, county,
                                     merge_columns, block_group_size_attr,
-                                    tract_size_attr, tract=None, year=2016):
+                                    tract_size_attr, year, tract=None):
         df2 = self.tract_query(tract_columns, state, county, tract=tract,
                                year=year)
         df1 = self.block_group_query(block_group_columns, state, county,

diff --git a/synthpop/recipes/starter2.py b/synthpop/recipes/starter2.py
@@ -23,27 +23,31 @@ class Starter:
         FIPS code for the county
     tract : string, optional
         FIPS code for a specific track or None for all tracts in the county
+    acsyear : integer, optional
+        Final year in the 5-year estimates ACS dataset.
+        Default: 2016, which corresponds to 2011-2016 ACS dataset
 
     Returns
     -------
     household_marginals : DataFrame
-        Marginals per block group for the household data (from ACS)
+        Marginals per block group for the household data (from ACS 5-year estimates)
     person_marginals : DataFrame
-        Marginals per block group for the person data (from ACS)
+        Marginals per block group for the person data (from ACS 5-year estimates)
     household_jointdist : DataFrame
-        joint distributions for the households (from PUMS), one joint
+        joint distributions for the households (from PUMS 2010-2000), one joint
         distribution for each PUMA (one row per PUMA)
     person_jointdist : DataFrame
-        joint distributions for the persons (from PUMS), one joint
+        joint distributions for the persons (from PUMS 2010-2000), one joint
         distribution for each PUMA (one row per PUMA)
     tract_to_puma_map : dictionary
         keys are tract ids and pumas are puma ids
     """
-    def __init__(self, key, state, county, tract=None):
+    def __init__(self, key, state, county, tract=None, acsyear= 2016):
         self.c = c = Census(key)
         self.state = state
         self.county = county
         self.tract = tract
+        self.acsyear = acsyear
 
         structure_size_columns = ['B25032_0%02dE' % i for i in range(1, 24)]
         age_of_head_columns = ['B25007_0%02dE' % i for i in range(1, 22)]
@@ -68,7 +72,7 @@ def __init__(self, key, state, county, tract=None):
             merge_columns=['tract', 'county', 'state'],
             block_group_size_attr="B11005_001E",
             tract_size_attr="B08201_001E",
-            tract=tract)
+            tract=tract, year=acsyear)
         self.h_acs = h_acs
 
         self.h_acs_cat = cat.categorize(h_acs, {
@@ -133,7 +137,7 @@ def __init__(self, key, state, county, tract=None):
         female_age_columns = ['B01001_0%02dE' % i for i in range(27, 50)]
         all_columns = population + sex + race + male_age_columns + \
             female_age_columns + hh_population + hispanic
-        p_acs = c.block_group_query(all_columns, state, county, tract=tract)
+        p_acs = c.block_group_query(all_columns, state, county, tract=tract, year=acsyear)
         self.p_acs = p_acs
         self.p_acs_cat = cat.categorize(p_acs, {
             ("person_age", "19 and under"):