-
Notifications
You must be signed in to change notification settings - Fork 0
/
APIMS-web.py
90 lines (74 loc) · 2.37 KB
/
APIMS-web.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import pandas as pd
import numpy as np
from urllib import request
from pathlib import Path
import re
filename = "apims-2018-now.csv"
try:
df2 = pd.read_csv(filename,parse_dates=["Time"],infer_datetime_format=True)
df2 = df2.set_index("Time")
date = df2.tail(1).index[0]
except:
df2 = pd.DataFrame()
date = pd.to_datetime('2018/01/01 0000', format='%Y/%m/%d %H%M')
def returnFile(url=False):
if url:
return date.strftime(format='%Y/%m/%d/%H%M') + '.json'
else:
return date.strftime(format='%Y-%m-%d-%H%M') + '.json'
def returnURL():
return 'http://apims.doe.gov.my/data/public_v2/CAQM/hours24/' + returnFile(True)
def getJSON():
fpath = 'cache/' + returnFile()
cache_file = Path(fpath)
if not cache_file.is_file():
print("Requesting " + returnURL())
request.urlretrieve(returnURL(),fpath)
return fpath
def clean(x):
try:
return int(re.sub('\D', '', str(x)))
except:
return np.nan
while date < (pd.to_datetime('now')+pd.DateOffset(hours=8)):
print('Processing ' + returnFile())
try:
json = pd.read_json(getJSON())
except:
print('Error occured')
date += pd.DateOffset(hours=8)
continue
try:
df = json['24hour_api'].apply(pd.Series)
except:
df = json['24hour_api_apims'].apply(pd.Series)
new_header = df.iloc[0]
df = df[1:]
df.columns = new_header
df = df.set_index('Location')
df = df.drop(columns='State',axis=1)
df = df.applymap(clean)
df = df.transpose()
df = df.astype(np.float16)
ls = list(df.index)
passed = False
for i,v in enumerate(ls):
if v == '12:00AM':
passed = True
if passed:
ls[i] = date.strftime(format='%Y/%m/%d') + ' ' + ls[i]
else:
ls[i] = (date-pd.DateOffset(1)).strftime(format='%Y/%m/%d') + ' ' + ls[i]
df.index = pd.to_datetime(ls)
df.index.name = "Time"
df2 = df2.combine_first(df)
date += pd.DateOffset(hours=8)
#if date == pd.to_datetime('2019/10/24 2300'):
# date -= pd.DateOffset(hours=1)
df2.to_csv(filename,date_format='%Y-%m-%d %H:%M')
with open(filename, 'r') as file :
filedata = file.read()
filedata = filedata.replace('.0', '').replace(',0,0,', ',,,').replace(',0,', ',,')
with open(filename, 'w') as file:
file.write(filedata)
print("Saved! Operation complete.")