-
Notifications
You must be signed in to change notification settings - Fork 0
/
policy_predator.py
58 lines (53 loc) · 2.18 KB
/
policy_predator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.optimizers import Adam
import numpy as np
import os
from stable_baselines3 import PPO
from ppenv import *
class Policy:
def __init__(self, load=False, pos=(0, 0)):
self.pos = pos
if not load:
self.alpha = 0.01
self.policy = keras.models.Sequential([
keras.layers.Dense(25, activation="relu", input_shape=[25]),
keras.layers.Dense(10, activation="relu"),
keras.layers.Dense(5, activation="softmax"),
])
self.policy.compile(optimizer=Adam(learning_rate=self.alpha))
else:
env = PPEnv()
#path_best_model = 'Training/my_model_PP_2'
path_best_model = os.path.join('Training', 'Saved Models', 'PP_PPO_1_2M')
self.policy = PPO('MlpPolicy', env=env, verbose=0)
self.policy = PPO.load(path_best_model)
#self.policy = keras.models.load_model(path_best_model)
def __call__(self, state):
pi = self.policy(state)
return pi
def get_obs(array, pos):
obs = []
i, j = pos
for ii in range(-2, 3, 1):
row = []
for jj in range(-2, 3, 1):
# row.append((array[(i + ii) % len(array)][(j + jj) % len(array)] // 2) * 2)
row.append((array[(i + ii) % len(array)][(j + jj) % len(array)] // 2) * 2)
obs.append(row)
return np.array(obs)
def apply_action(array, pos, action, model=None):
i, j = pos
if action == 0:
array[i][j], array[i][(j + 1) % len(array)] = array[i][(j + 1) % len(array)], array[i][j]
model.pos = (i, (j + 1) % len(array))
elif action == 1:
array[i][j], array[i][(j - 1) % len(array)] = array[i][(j - 1) % len(array)], array[i][j]
model.pos = (i, (j - 1) % len(array))
elif action == 2:
array[i][j], array[(i - 1) % len(array)][j] = array[(i - 1) % len(array)][j], array[i][j]
model.pos = ((i - 1) % len(array), j)
elif action == 3:
array[i][j], array[(i + 1) % len(array)][j] = array[(i + 1) % len(array)][j], array[i][j]
model.pos = ((i + 1) % len(array), j)
return array