Skip to content

Commit bf9f622

Browse files
author
Daniel Weber
committed
- added DDPG exammple
1 parent 170e0f2 commit bf9f622

File tree

2 files changed

+130
-1
lines changed

2 files changed

+130
-1
lines changed

experiments/issue51_new/stable_baselines.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,5 +108,5 @@ def _on_step(self) -> bool:
108108
model = PPO('MlpPolicy', env, verbose=1, tensorboard_log=f'{timestamp}/')
109109
checkpoint_on_event = CheckpointCallback(save_freq=100000, save_path=f'{timestamp}/checkpoints/')
110110
record_env = RecordEnvCallback()
111-
plot_callback = EveryNTimesteps(n_steps=20000, callback=record_env)
111+
plot_callback = EveryNTimesteps(n_steps=2000, callback=record_env)
112112
model.learn(total_timesteps=5000000, callback=[checkpoint_on_event, plot_callback])
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
from datetime import datetime
2+
from os import makedirs
3+
from typing import List
4+
5+
import gym
6+
import numpy as np
7+
from stable_baselines3 import DDPG
8+
from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise
9+
from stable_baselines3.common.callbacks import BaseCallback, CheckpointCallback, EveryNTimesteps
10+
from stable_baselines3.common.monitor import Monitor
11+
12+
from openmodelica_microgrid_gym.env import PlotTmpl
13+
from openmodelica_microgrid_gym.net import Network
14+
from openmodelica_microgrid_gym.util import nested_map
15+
16+
np.random.seed(0)
17+
18+
timestamp = datetime.now().strftime(f'%Y.%b.%d %X ')
19+
makedirs(timestamp)
20+
21+
# Simulation definitions
22+
net = Network.load('../../net/net_single-inv-curr.yaml')
23+
max_episode_steps = 300 # number of simulation steps per episode
24+
num_episodes = 1 # number of simulation episodes (i.e. SafeOpt iterations)
25+
iLimit = 30 # inverter current limit / A
26+
iNominal = 20 # nominal inverter current / A
27+
mu = 2 # factor for barrier function (see below)
28+
29+
30+
class Reward:
31+
def __init__(self):
32+
self._idx = None
33+
34+
def set_idx(self, obs):
35+
if self._idx is None:
36+
self._idx = nested_map(
37+
lambda n: obs.index(n),
38+
[[f'lc1.inductor{k}.i' for k in '123'], [f'inverter1.i_ref.{k}' for k in '012']])
39+
40+
def rew_fun(self, cols: List[str], data: np.ndarray, risk) -> float:
41+
"""
42+
Defines the reward function for the environment. Uses the observations and setpoints to evaluate the quality of the
43+
used parameters.
44+
Takes current measurement and setpoints so calculate the mean-root-error control error and uses a logarithmic
45+
barrier function in case of violating the current limit. Barrier function is adjustable using parameter mu.
46+
47+
:param cols: list of variable names of the data
48+
:param data: observation data from the environment (ControlVariables, e.g. currents and voltages)
49+
:return: Error as negative reward
50+
"""
51+
self.set_idx(cols)
52+
idx = self._idx
53+
54+
Iabc_master = data[idx[0]] # 3 phase currents at LC inductors
55+
ISPabc_master = data[idx[1]] # convert dq set-points into three-phase abc coordinates
56+
57+
# control error = mean-root-error (MRE) of reference minus measurement
58+
# (due to normalization the control error is often around zero -> compared to MSE metric, the MRE provides
59+
# better, i.e. more significant, gradients)
60+
# plus barrier penalty for violating the current constraint
61+
error = np.sum((np.abs((ISPabc_master - Iabc_master)) / iLimit) ** 0.5, axis=0) \
62+
+ -np.sum(mu * np.log(1 - np.maximum(np.abs(Iabc_master) - iNominal, 0) / (iLimit - iNominal)), axis=0)
63+
# error /= max_episode_steps
64+
65+
return -np.clip(error.squeeze(), 0, 1e5)
66+
67+
68+
def xylables(fig):
69+
ax = fig.gca()
70+
ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
71+
ax.set_ylabel('$i_{\mathrm{abc}}\,/\,\mathrm{A}$')
72+
ax.grid(which='both')
73+
fig.savefig(f'{timestamp}/Inductor_currents.pdf')
74+
75+
76+
env = gym.make('openmodelica_microgrid_gym:ModelicaEnv_test-v1',
77+
reward_fun=Reward().rew_fun,
78+
viz_cols=[
79+
PlotTmpl([[f'lc1.inductor{i}.i' for i in '123'], [f'inverter1.i_ref.{k}' for k in '012']],
80+
callback=xylables,
81+
color=[['b', 'r', 'g'], ['b', 'r', 'g']],
82+
style=[[None], ['--']]
83+
),
84+
],
85+
viz_mode='episode',
86+
max_episode_steps=max_episode_steps,
87+
net=net,
88+
model_path='../../omg_grid/grid.network_singleInverter.fmu',
89+
is_normalized=True)
90+
91+
with open(f'{timestamp}/env.txt', 'w') as f:
92+
print(str(env), file=f)
93+
env = Monitor(env)
94+
95+
96+
class RecordEnvCallback(BaseCallback):
97+
def _on_step(self) -> bool:
98+
obs = env.reset()
99+
for _ in range(max_episode_steps):
100+
env.render()
101+
action, _states = model.predict(obs, deterministic=True)
102+
obs, reward, done, info = env.step(action)
103+
if done:
104+
break
105+
env.close()
106+
env.reset()
107+
return True
108+
109+
110+
n_actions = env.action_space.shape[-1]
111+
action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions))
112+
113+
model = DDPG('MlpPolicy', env, verbose=1, tensorboard_log=f'{timestamp}/')
114+
checkpoint_on_event = CheckpointCallback(save_freq=100000, save_path=f'{timestamp}/checkpoints/')
115+
record_env = RecordEnvCallback()
116+
plot_callback = EveryNTimesteps(n_steps=50000, callback=record_env)
117+
model.learn(total_timesteps=500000, callback=[checkpoint_on_event, plot_callback])
118+
119+
model.save('ddpg_CC')
120+
121+
del model # remove to demonstrate saving and loading
122+
123+
model = DDPG.load("ddpg_CC")
124+
125+
# obs = env.reset()
126+
# while True:
127+
# action, _states = model.predict(obs)
128+
# obs, rewards, dones, info = env.step(action)
129+
# env.render()

0 commit comments

Comments
 (0)