Skip to content

Commit

Permalink
#51 extended stable baselines experiment
Browse files Browse the repository at this point in the history
  • Loading branch information
Stefan Heid committed Dec 18, 2020
1 parent 18a5765 commit 5a543ba
Showing 1 changed file with 24 additions and 15 deletions.
39 changes: 24 additions & 15 deletions experiments/issue51_new/stable_baselines.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import gym
import numpy as np
from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import BaseCallback, CheckpointCallback, EveryNTimesteps
from stable_baselines3.common.monitor import Monitor

from openmodelica_microgrid_gym.env import PlotTmpl
Expand Down Expand Up @@ -35,7 +36,7 @@ def set_idx(self, obs):
lambda n: obs.index(n),
[[f'lc1.inductor{k}.i' for k in '123'], [f'inverter1.i_ref.{k}' for k in '012']])

def rew_fun(self, cols: List[str], data: np.ndarray) -> float:
def rew_fun(self, cols: List[str], data: np.ndarray, risk) -> float:
"""
Defines the reward function for the environment. Uses the observations and setpoints to evaluate the quality of the
used parameters.
Expand All @@ -57,8 +58,8 @@ def rew_fun(self, cols: List[str], data: np.ndarray) -> float:
# better, i.e. more significant, gradients)
# plus barrier penalty for violating the current constraint
error = np.sum((np.abs((ISPabc_master - Iabc_master)) / iLimit) ** 0.5, axis=0) \
# + -np.sum(mu * np.log(1 - np.maximum(np.abs(Iabc_master) - iNominal, 0) / (iLimit - iNominal)), axis=0) \
# * max_episode_steps
# + -np.sum(mu * np.log(1 - np.maximum(np.abs(Iabc_master) - iNominal, 0) / (iLimit - iNominal)), axis=0)
error /= max_episode_steps

return -np.clip(error.squeeze(), 0, 1e5)

Expand All @@ -74,7 +75,7 @@ def xylables(fig):
env = gym.make('openmodelica_microgrid_gym:ModelicaEnv_test-v1',
reward_fun=Reward().rew_fun,
viz_cols=[
PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'inverter1.i_ref.{k}' for k in '012']],
PlotTmpl([[f'lc1.inductor{i}.i' for i in '123'], [f'inverter1.i_ref.{k}' for k in '012']],
callback=xylables,
color=[['b', 'r', 'g'], ['b', 'r', 'g']],
style=[[None], ['--']]
Expand All @@ -89,15 +90,23 @@ def xylables(fig):
print(str(env), file=f)
env = Monitor(env)


class RecordEnvCallback(BaseCallback):
def _on_step(self) -> bool:
obs = env.reset()
for _ in range(max_episode_steps):
env.render()
action, _states = model.predict(obs, deterministic=True)
obs, reward, done, info = env.step(action)
if done:
break
env.close()
env.reset()
return True


model = PPO('MlpPolicy', env, verbose=1, tensorboard_log=f'{timestamp}/')
model.learn(total_timesteps=1000000)
model.save(f'{timestamp}/model')

obs = env.reset()
for _ in range(1000):
env.render()
action, _states = model.predict(obs, deterministic=True)
obs, reward, done, info = env.step(action)
if done:
break
env.close()
checkpoint_on_event = CheckpointCallback(save_freq=100000, save_path=f'{timestamp}/checkpoints/')
record_env = RecordEnvCallback()
plot_callback = EveryNTimesteps(n_steps=20000, callback=record_env)
model.learn(total_timesteps=5000000, callback=[checkpoint_on_event, plot_callback])

0 comments on commit 5a543ba

Please sign in to comment.