Skip to content

Commit 18a5765

Browse files
author
Stefan Heid
committed
#51 first experiments
1 parent f849855 commit 18a5765

File tree

1 file changed

+103
-0
lines changed

1 file changed

+103
-0
lines changed
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
from datetime import datetime
2+
from os import makedirs
3+
from typing import List
4+
5+
import gym
6+
import numpy as np
7+
from stable_baselines3 import PPO
8+
from stable_baselines3.common.monitor import Monitor
9+
10+
from openmodelica_microgrid_gym.env import PlotTmpl
11+
from openmodelica_microgrid_gym.net import Network
12+
from openmodelica_microgrid_gym.util import nested_map
13+
14+
np.random.seed(0)
15+
16+
timestamp = datetime.now().strftime(f'%Y.%b.%d %X ')
17+
makedirs(timestamp)
18+
19+
# Simulation definitions
20+
net = Network.load('../../net/net_single-inv-curr.yaml')
21+
max_episode_steps = 300 # number of simulation steps per episode
22+
num_episodes = 1 # number of simulation episodes (i.e. SafeOpt iterations)
23+
iLimit = 30 # inverter current limit / A
24+
iNominal = 20 # nominal inverter current / A
25+
mu = 2 # factor for barrier function (see below)
26+
27+
28+
class Reward:
29+
def __init__(self):
30+
self._idx = None
31+
32+
def set_idx(self, obs):
33+
if self._idx is None:
34+
self._idx = nested_map(
35+
lambda n: obs.index(n),
36+
[[f'lc1.inductor{k}.i' for k in '123'], [f'inverter1.i_ref.{k}' for k in '012']])
37+
38+
def rew_fun(self, cols: List[str], data: np.ndarray) -> float:
39+
"""
40+
Defines the reward function for the environment. Uses the observations and setpoints to evaluate the quality of the
41+
used parameters.
42+
Takes current measurement and setpoints so calculate the mean-root-error control error and uses a logarithmic
43+
barrier function in case of violating the current limit. Barrier function is adjustable using parameter mu.
44+
45+
:param cols: list of variable names of the data
46+
:param data: observation data from the environment (ControlVariables, e.g. currents and voltages)
47+
:return: Error as negative reward
48+
"""
49+
self.set_idx(cols)
50+
idx = self._idx
51+
52+
Iabc_master = data[idx[0]] # 3 phase currents at LC inductors
53+
ISPabc_master = data[idx[1]] # convert dq set-points into three-phase abc coordinates
54+
55+
# control error = mean-root-error (MRE) of reference minus measurement
56+
# (due to normalization the control error is often around zero -> compared to MSE metric, the MRE provides
57+
# better, i.e. more significant, gradients)
58+
# plus barrier penalty for violating the current constraint
59+
error = np.sum((np.abs((ISPabc_master - Iabc_master)) / iLimit) ** 0.5, axis=0) \
60+
# + -np.sum(mu * np.log(1 - np.maximum(np.abs(Iabc_master) - iNominal, 0) / (iLimit - iNominal)), axis=0) \
61+
# * max_episode_steps
62+
63+
return -np.clip(error.squeeze(), 0, 1e5)
64+
65+
66+
def xylables(fig):
67+
ax = fig.gca()
68+
ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
69+
ax.set_ylabel('$i_{\mathrm{abc}}\,/\,\mathrm{A}$')
70+
ax.grid(which='both')
71+
fig.savefig(f'{timestamp}/Inductor_currents.pdf')
72+
73+
74+
env = gym.make('openmodelica_microgrid_gym:ModelicaEnv_test-v1',
75+
reward_fun=Reward().rew_fun,
76+
viz_cols=[
77+
PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'inverter1.i_ref.{k}' for k in '012']],
78+
callback=xylables,
79+
color=[['b', 'r', 'g'], ['b', 'r', 'g']],
80+
style=[[None], ['--']]
81+
),
82+
],
83+
viz_mode='episode',
84+
max_episode_steps=max_episode_steps,
85+
net=net,
86+
model_path='../../omg_grid/grid.network_singleInverter.fmu')
87+
88+
with open(f'{timestamp}/env.txt', 'w') as f:
89+
print(str(env), file=f)
90+
env = Monitor(env)
91+
92+
model = PPO('MlpPolicy', env, verbose=1, tensorboard_log=f'{timestamp}/')
93+
model.learn(total_timesteps=1000000)
94+
model.save(f'{timestamp}/model')
95+
96+
obs = env.reset()
97+
for _ in range(1000):
98+
env.render()
99+
action, _states = model.predict(obs, deterministic=True)
100+
obs, reward, done, info = env.step(action)
101+
if done:
102+
break
103+
env.close()

0 commit comments

Comments
 (0)