Skip to content

Commit cc6dc9d

Browse files
committed
Notebook for creating datasets
1 parent db5c214 commit cc6dc9d

File tree

1 file changed

+238
-0
lines changed

1 file changed

+238
-0
lines changed

notebooks/Demo/Create_datasets.ipynb

Lines changed: 238 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,238 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# General Imports\n"
8+
]
9+
},
10+
{
11+
"cell_type": "code",
12+
"execution_count": null,
13+
"metadata": {},
14+
"outputs": [],
15+
"source": [
16+
"import os\n",
17+
"import inspect\n",
18+
"import sys\n",
19+
"import pandas as pd\n",
20+
"import charts\n",
21+
"from opengrid_dev import config\n",
22+
"c = config.Config()\n",
23+
"\n",
24+
"from opengrid_dev.library import misc, houseprint\n",
25+
"\n",
26+
"import matplotlib.pyplot as plt\n",
27+
"%matplotlib inline\n",
28+
"plt.rcParams['figure.figsize'] = 16,8"
29+
]
30+
},
31+
{
32+
"cell_type": "code",
33+
"execution_count": null,
34+
"metadata": {},
35+
"outputs": [],
36+
"source": [
37+
"c.opengrid_libdir"
38+
]
39+
},
40+
{
41+
"cell_type": "markdown",
42+
"metadata": {},
43+
"source": [
44+
"## Houseprint"
45+
]
46+
},
47+
{
48+
"cell_type": "code",
49+
"execution_count": null,
50+
"metadata": {},
51+
"outputs": [],
52+
"source": [
53+
"hp = houseprint.Houseprint()\n",
54+
"hp.init_tmpo()\n",
55+
"hp._tmpos.debug = False"
56+
]
57+
},
58+
{
59+
"cell_type": "code",
60+
"execution_count": null,
61+
"metadata": {},
62+
"outputs": [],
63+
"source": [
64+
"hp.sync_tmpos()"
65+
]
66+
},
67+
{
68+
"cell_type": "markdown",
69+
"metadata": {},
70+
"source": [
71+
"## Create dataframes with minute data for a single year, by sensortype\n",
72+
"\n",
73+
"Only run if needed. Hourly frames can be created by loading these minute pickles. "
74+
]
75+
},
76+
{
77+
"cell_type": "code",
78+
"execution_count": null,
79+
"metadata": {},
80+
"outputs": [],
81+
"source": [
82+
"for sensortype in ['gas', \n",
83+
" 'water',\n",
84+
" 'electricity'\n",
85+
" ]:\n",
86+
" print('Processing {}'.format(sensortype))\n",
87+
" for y in ['2016']:\n",
88+
" print('year {}'.format(y))\n",
89+
" head = pd.Timestamp('{}0101'.format(y), tz='Europe/Brussels')\n",
90+
" tail = pd.Timestamp('{}0101 02:00:00'.format(int(y)+1), tz='Europe/Brussels')\n",
91+
" df = hp.get_data(sensortype=sensortype, head=head, tail=tail, diff=True, resample='min')\n",
92+
" df.rename(columns=lambda x: x[:4], inplace=True)\n",
93+
" df = df.tz_convert('Europe/Brussels')\n",
94+
" path = os.path.join(c.get('data', 'folder'), '{}_{}_min.pkl'.format(sensortype, y))\n",
95+
" df.to_pickle(path, compression='gzip')\n",
96+
" \n",
97+
" # Create a dataset with minute values for the 3 sensors for gas\n",
98+
" if sensortype == 'gas':\n",
99+
" df = df[['313b', 'd5a7', 'ba14']]\n",
100+
" dflim = df.loc[pd.Timestamp('2016-12-05 00:00:00', tz='Europe/Brussels'):pd.Timestamp('2016-12-19 00:00:00', tz='Europe/Brussels')]\n",
101+
" path = os.path.join(c.get('data', 'folder'), '{}_dec2016_min.pkl'.format(sensortype))\n",
102+
" dflim.to_pickle(path, compression='gzip')\n",
103+
" "
104+
]
105+
},
106+
{
107+
"cell_type": "code",
108+
"execution_count": null,
109+
"metadata": {},
110+
"outputs": [],
111+
"source": [
112+
"# Minute values for water for march 2015\n",
113+
"head = pd.Timestamp('20150301', tz='Europe/Brussels')\n",
114+
"tail = pd.Timestamp('20150401', tz='Europe/Brussels')\n",
115+
"df = hp.get_data(sensortype='water', head=head, tail=tail, diff=True, resample='min')\n",
116+
"df.rename(columns=lambda x: x[:4], inplace=True)\n",
117+
"df = df.tz_convert('Europe/Brussels')\n",
118+
"path = os.path.join(c.get('data', 'folder'), 'water_march2015_min.pkl')\n",
119+
"df.to_pickle(path, compression='gzip')"
120+
]
121+
},
122+
{
123+
"cell_type": "code",
124+
"execution_count": null,
125+
"metadata": {
126+
"scrolled": true
127+
},
128+
"outputs": [],
129+
"source": [
130+
"## Create dataframes with hourly data\n",
131+
"for sensortype in ['water', 'gas', 'electricity']:\n",
132+
" print('Processing {}'.format(sensortype))\n",
133+
" for y in ['2016']:\n",
134+
" print('year {}'.format(y))\n",
135+
" path_min = os.path.join(c.get('data', 'folder'), '{}_{}_min.pkl'.format(sensortype, y))\n",
136+
" df = pd.read_pickle(path_min, compression='gzip')\n",
137+
" # hourly: mean values\n",
138+
" df_hour = df.resample(rule='H').mean()\n",
139+
" # remove uncomplete sensors and sensors we don't want in the test dataset\n",
140+
" for sensor in ['565d']:\n",
141+
" try:\n",
142+
" df_hour = df_hour.drop(labels=[sensor], axis=1)\n",
143+
" except:\n",
144+
" pass \n",
145+
" df_hour = df_hour.loc[head:pd.Timestamp('{}0101'.format(int(y)+1), tz='Europe/Brussels')]\n",
146+
" df_hour = df_hour.dropna(axis=1, how='any')\n",
147+
" \n",
148+
" try:\n",
149+
" df_hour.plot()\n",
150+
" except:\n",
151+
" print(\"No full hourly data for {}\".format(y))\n",
152+
" \n",
153+
" path_hour = os.path.join(c.get('data', 'folder'), '{}_{}_hour.pkl'.format(sensortype, y))\n",
154+
" df_hour.to_pickle(path_hour, compression='gzip')"
155+
]
156+
},
157+
{
158+
"cell_type": "markdown",
159+
"metadata": {},
160+
"source": [
161+
"## Weather data "
162+
]
163+
},
164+
{
165+
"cell_type": "code",
166+
"execution_count": null,
167+
"metadata": {},
168+
"outputs": [],
169+
"source": [
170+
"from opengrid_dev.library import forecastwrapper\n",
171+
"start = pd.Timestamp('20151225', tz='Europe/Brussels')\n",
172+
"end = pd.Timestamp('20170101', tz='Europe/Brussels')\n",
173+
"\n",
174+
"\n",
175+
"Weather_Ukkel = forecastwrapper.Weather(location='Ukkel', start=start, end=end)"
176+
]
177+
},
178+
{
179+
"cell_type": "code",
180+
"execution_count": null,
181+
"metadata": {},
182+
"outputs": [],
183+
"source": [
184+
"columns = ['GlobalHorizontalIrradiance', 'humidity', 'temperature', 'windSpeed']\n",
185+
"df = Weather_Ukkel.hours()[columns]\n",
186+
"df.info()"
187+
]
188+
},
189+
{
190+
"cell_type": "code",
191+
"execution_count": null,
192+
"metadata": {},
193+
"outputs": [],
194+
"source": [
195+
"df = df.applymap(float).fillna(value=0)\n",
196+
"df.info()"
197+
]
198+
},
199+
{
200+
"cell_type": "code",
201+
"execution_count": null,
202+
"metadata": {},
203+
"outputs": [],
204+
"source": [
205+
"path = os.path.join(c.get('data', 'folder'), 'weather_2016_hour.pkl')\n",
206+
"df.to_pickle(path, compression='gzip')"
207+
]
208+
},
209+
{
210+
"cell_type": "code",
211+
"execution_count": null,
212+
"metadata": {},
213+
"outputs": [],
214+
"source": []
215+
}
216+
],
217+
"metadata": {
218+
"kernelspec": {
219+
"display_name": "Python 3",
220+
"language": "python",
221+
"name": "python3"
222+
},
223+
"language_info": {
224+
"codemirror_mode": {
225+
"name": "ipython",
226+
"version": 3
227+
},
228+
"file_extension": ".py",
229+
"mimetype": "text/x-python",
230+
"name": "python",
231+
"nbconvert_exporter": "python",
232+
"pygments_lexer": "ipython3",
233+
"version": "3.5.2"
234+
}
235+
},
236+
"nbformat": 4,
237+
"nbformat_minor": 1
238+
}

0 commit comments

Comments
 (0)