Skip to content

Commit

Permalink
pums slicing script to use fixed-length puma ids
Browse files Browse the repository at this point in the history
  • Loading branch information
janowicz committed Jul 16, 2015
1 parent bcfe7d0 commit cbb0491
Show file tree
Hide file tree
Showing 6 changed files with 21 additions and 340 deletions.
13 changes: 9 additions & 4 deletions scripts/dl_and_slice_pums.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,6 @@
with zipfile.ZipFile(filepath, "r") as z:
z.extractall(loader.get_path('pums'))

pums_file = 'ss13pusb.csv'
pums = pd.read_csv(os.path.join(loader.get_path('pums'), pums_file))

for pums_file in ['ss13husa.csv', 'ss13husb.csv',
'ss13husc.csv', 'ss13husd.csv',
'ss13pusa.csv', 'ss13pusb.csv',
Expand All @@ -47,14 +44,21 @@
pums = pd.read_csv(os.path.join(loader.get_path('pums'), pums_file))

for state_id in np.unique(pums['ST']):
' Processing pums for state %s' % state_id
print ' Processing pums for state %s' % state_id
pum_state = pums[pums['ST'] == state_id]
state_id = '{:>02}'.format(state_id)
if pums_file[4] == 'h':
pums_state_filename = 'puma_h_%s.csv' % (state_id)
elif pums_file[4] == 'p':
pums_state_filename = 'puma_p_%s.csv' % (state_id)
pum_state.to_csv(os.path.join(loader.get_path('pums'), pums_state_filename), index = False)

print ' Slicing up pums files by 2000 pumas'
for puma00 in np.unique(pum_state['PUMA00']):
if puma00 != -9:
print puma00
df = pum_state[pum_state['PUMA00'] == puma00]
puma00 = '{:>05}'.format(puma00)
if pums_file[4] == 'h':
output_filename = 'puma00_h_%s_%s.csv' % (state_id, puma00)
elif pums_file[4] == 'p':
Expand All @@ -66,6 +70,7 @@
if puma10 != -9:
print puma10
df = pum_state[pum_state['PUMA10'] == puma10]
puma10 = '{:>05}'.format(puma10)
if pums_file[4] == 'h':
output_filename = 'puma10_h_%s_%s.csv' % (state_id, puma10)
elif pums_file[4] == 'p':
Expand Down
2 changes: 1 addition & 1 deletion scripts/synth_example.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from synthpop.recipes.starter3 import Starter
from synthpop.recipes.starter2 import Starter
from synthpop.synthesizer import synthesize_all, enable_logging
import os

Expand Down
8 changes: 4 additions & 4 deletions synthpop/recipes/starter.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,9 +138,9 @@ def get_household_joint_dist_for_geography(self, ind):
puma10, puma00 = c.tract_to_puma(ind.state, ind.county, ind.tract)
# this is cached so won't download more than once
if type(puma00) == str:
h_pums = self.c.download_household_pums(int(ind.state), int(puma10), int(puma00))
h_pums = self.c.download_household_pums(ind.state, puma10, puma00)
elif np.isnan(puma00): # only puma10 available
h_pums = self.c.download_household_pums(int(ind.state), int(puma10), None)
h_pums = self.c.download_household_pums(ind.state, puma10, None)

def cars_cat(r):
if r.VEH == 0:
Expand Down Expand Up @@ -184,9 +184,9 @@ def get_person_joint_dist_for_geography(self, ind):
puma10, puma00 = c.tract_to_puma(ind.state, ind.county, ind.tract)
# this is cached so won't download more than once
if type(puma00) == str:
p_pums = self.c.download_population_pums(int(ind.state), int(puma10), int(puma00))
p_pums = self.c.download_population_pums(ind.state, puma10, puma00)
elif np.isnan(puma00): # only puma10 available
p_pums = self.c.download_population_pums(int(ind.state), int(puma10), None)
p_pums = self.c.download_population_pums(ind.state, puma10, None)

def age_cat(r):
if r.AGEP <= 19:
Expand Down
12 changes: 6 additions & 6 deletions synthpop/recipes/starter2.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,11 +179,11 @@ def get_household_joint_dist_for_geography(self, ind):
puma10, puma00 = c.tract_to_puma(ind.state, ind.county, ind.tract)
# this is cached so won't download more than once
if type(puma00) == str:
h_pums = self.c.download_household_pums(int(ind.state), int(puma10), int(puma00))
p_pums = self.c.download_population_pums(int(ind.state), int(puma10), int(puma00))
h_pums = self.c.download_household_pums(ind.state, puma10, puma00)
p_pums = self.c.download_population_pums(ind.state, puma10, puma00)
elif np.isnan(puma00): # only puma10 available
h_pums = self.c.download_household_pums(int(ind.state), int(puma10), None)
p_pums = self.c.download_population_pums(int(ind.state), int(puma10), None)
h_pums = self.c.download_household_pums(ind.state, puma10, None)
p_pums = self.c.download_population_pums(ind.state, puma10, None)

h_pums = h_pums.set_index('serialno')

Expand Down Expand Up @@ -294,9 +294,9 @@ def get_person_joint_dist_for_geography(self, ind):
puma10, puma00 = c.tract_to_puma(ind.state, ind.county, ind.tract)
# this is cached so won't download more than once
if type(puma00) == str:
p_pums = self.c.download_population_pums(int(ind.state), int(puma10), int(puma00))
p_pums = self.c.download_population_pums(ind.state, puma10, puma00)
elif np.isnan(puma00): # only puma10 available
p_pums = self.c.download_population_pums(int(ind.state), int(puma10), None)
p_pums = self.c.download_population_pums(ind.state, puma10, None)

def age_cat(r):
if r.AGEP <= 19:
Expand Down
Loading

0 comments on commit cbb0491

Please sign in to comment.