-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathepa_aqs_county_data_maker.py
88 lines (58 loc) · 2.46 KB
/
epa_aqs_county_data_maker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# This script creates a daily mean pollution score for each pollutant in each county
# Importing required modules
import pandas as pd
# Declaring username + filepath
username = ''
filepath = 'C:/Users/' + username + '/Documents/Data/ultrapollution/'
# Reading in the data sets
pm = pd.read_csv(filepath + 'epa_aqs_data_pm.csv')
pm10 = pd.read_csv(filepath + 'epa_aqs_data_pm10.csv')
co = pd.read_csv(filepath + 'epa_aqs_data_co.csv')
no2 = pd.read_csv(filepath + 'epa_aqs_data_no2.csv')
pb = pd.read_csv(filepath + 'epa_aqs_data_pb.csv')
o3 = pd.read_csv(filepath + 'epa_aqs_data_ozone.csv')
so2 = pd.read_csv(filepath + 'epa_aqs_data_so2.csv')
# A FIPS-creating function
def F(s,c):
s = str(s)
c = str(c)
if len(s) == 1:
s = '0' + s
if len(c) == 1:
c = '00' + c
elif len(c) == 2:
c = '0' + c
fip = s + c
return fip
# Creating the county-pollutant level data sets
sets = [pm, pm10, co, no2, pb, o3, so2]
names = ['pm', 'pm10', 'co', 'no2', 'pb', 'ozone', 'so2']
for i in range(len(sets)):
p = sets[i]
dates = []
states = []
counties = []
fips = []
values = []
state_list = list(p.State.unique())
for s in state_list:
tmp = p[p.State == s]
county_list = list(tmp.County.unique())
for c in county_list:
print('Pollutant ' + str(i+1) + ' of ' + str(len(sets)) + ' :: State ' + str(state_list.index(s)+1) + ' of 50 :: County ' + str(county_list.index(c)+1) + ' of ' + str(len(county_list)) + '.......') # Visualize progress
tmpc = tmp[tmp.County == c]
tmp_dates = list(tmpc.Date.unique())
for d in tmp_dates:
tmpd = tmpc[tmpc.Date == d]
dates.append(d)
states.append(s)
fips.append(F(s,c))
counties.append(c)
values.append(tmpd.Value.mean())
dates = pd.Series(dates, name = 'Date')
states = pd.Series(states, name = 'State')
counties = pd.Series(counties, name = 'County')
fips = pd.Series(fips, name = 'FIPS')
values = pd.Series(values, name = 'Value')
df = pd.concat([dates, states, counties, fips, values], axis = 1)
df.to_csv(filepath + names[i] + '_data.csv', index = False)