Skip to content

Commit

Permalink
asm_env, asm_fns added: started refactoring (#10)
Browse files Browse the repository at this point in the history
* asm_env, asm_fns added: refactoring

* added saved_agents to .gitignore

* played with hyperpars, added more sb3_zoo hyperpars, fixed bug in asm_fns.observe_2o

* changes from Chris email chain: harvest_vul, survey_vul

* no resetting p_big, sdr, rho in initialize_population

* harvest_vul and survey_vul now different

* missing avoid zero division condition

* isVecObs for vec env implementation inside sb3/evaluate_policy

---------

Co-authored-by: Felipe Montealegre-Mora <[email protected]>
  • Loading branch information
felimomo and Felipe Montealegre-Mora authored Mar 21, 2024
1 parent bc0a45d commit bb69f3b
Show file tree
Hide file tree
Showing 15 changed files with 612 additions and 31 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -158,3 +158,5 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

saved_agents/
63 changes: 38 additions & 25 deletions hyperpars/rppo-asm2o.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,38 +2,38 @@

# algo overall
algo: "RPPO"
total_timesteps: 10000000
total_timesteps: 20000000

additional_imports: ["torch"]

# env overall
env_id: "Asm2o-v0"
config: {}
n_envs: 32
n_envs: 4

# io
repo: "cboettig/rl-ecology"
save_path: "../saved_agents"

# # MINIMAL CONFIG
id: "minimal"
algo_config:
policy: 'MlpLstmPolicy'
tensorboard_log: "~/logs"
# id: "minimal"
# algo_config:
# policy: 'MlpLstmPolicy'
# tensorboard_log: "../../logs"

# # SLOW LEARN
# id: "slow"
# algo_config:
# policy: 'MlpLstmPolicy'
# tensorboard_log: "~/logs"
# tensorboard_log: "../../logs"
# learning_rate: 0.0001
# # default learning rate = 0.0003

# # EXTRA SLOW LEARN
# id: "extra-slow"
# algo_config:
# policy: 'MlpLstmPolicy'
# tensorboard_log: "~/logs"
# tensorboard_log: "../../logs"
# learning_rate: 0.00003


Expand All @@ -46,7 +46,7 @@ algo_config:
# algo_config:
# # normalize: True # not clear what this one actually does -- from the source code it seems to 'activate' VecNormalize, but more care & examination needed
# policy: 'MlpLstmPolicy'
# tensorboard_log: "~/logs"
# tensorboard_log: "../../logs"
# n_steps: 256
# batch_size: 256
# gae_lambda: 0.95
Expand All @@ -68,7 +68,7 @@ algo_config:
# id: "cheetah"
# algo_config:
# policy: 'MlpLstmPolicy'
# tensorboard_log: "~/logs"
# tensorboard_log: "../../logs"
# batch_size: 64
# n_steps: 512
# gamma: 0.98
Expand All @@ -88,26 +88,26 @@ algo_config:



# # INVERTED PENDULUM
# id: "inv_pend"
# algo_config:
# tensorboard_log: "~/logs"
# policy: 'MlpLstmPolicy'
# n_steps: 2048
# batch_size: 64
# gae_lambda: 0.95
# gamma: 0.99
# n_epochs: 10
# ent_coef: 0.0
# learning_rate: 2.5e-4
# clip_range: 0.2
# INVERTED PENDULUM
id: "inv_pend"
algo_config:
tensorboard_log: "../../logs"
policy: 'MlpLstmPolicy'
n_steps: 2048
batch_size: 64
gae_lambda: 0.95
gamma: 0.99
n_epochs: 10
ent_coef: 0.0
learning_rate: 2.5e-4
clip_range: 0.2


# # MOUNTAIN CAR NO VEL

# id: "mount_car"
# algo_config:
# tensorboard_log: "~/logs"
# tensorboard_log: "../../logs"
# policy: 'MlpLstmPolicy'
# batch_size: 256
# n_steps: 1024
Expand All @@ -124,4 +124,17 @@ algo_config:
# policy_kwargs: "dict(log_std_init=0.0, ortho_init=False,
# lstm_hidden_size=32,
# enable_critic_lstm=True,
# net_arch=dict(pi=[64], vf=[64]))"
# net_arch=dict(pi=[64], vf=[64]))"

# SPACE INVADERS V4
# id: "space_invaders"
# algo_config:
# tensorboard_log: "../../logs"
# policy: 'MlpLstmPolicy'
# batch_size: 512
# # clip_range: 0.1
# ent_coef: 0.012
# frame_stack: 4
# learning_rate: 2.5e-4
# policy_kwargs: dict(enable_critic_lstm=False, lstm_hidden_size=128, )
# vf_coef: 0.5
2 changes: 1 addition & 1 deletion hyperpars/tqc-asm2o-v0-1.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
algo: "TQC"
env_id: "Asm2o-v0"
n_envs: 6
tensorboard: "/~/logs"
tensorboard: "~/logs"
total_timesteps: 12000000
config: {"learning_rate": 0.0001,
"learning_starts": 1000,
Expand Down
106 changes: 106 additions & 0 deletions scripts/fixed_policy_opt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
#!/opt/venv/bin/python
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("-p", "--policy", choices = ["msy", "esc", "cr"], help="Policy to be tuned", type=str)
parser.add_argument("-v", "--verbose", help="Verbosity of tuning method", type=bool)
parser.add_argument("-o", "--opt-algo", choices=["gp", "gbrt"], help="Optimization algo used")
args = parser.parse_args()

from huggingface_hub import hf_hub_download, HfApi, login
import numpy as np
from skopt.space import Real
from skopt.utils import use_named_args
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.monitor import Monitor

from rl4fisheries import AsmEnv

# hf login
# api = HfApi()
# login()

# optimization algo
if args.opt_algo == "gp":
from skopt import gp_minimize
opt_algo = gp_minimize
elif args.opt_algo == "gbrt":
from skopt import gbrt_minimize
opt_algo = gbrt_minimize

# policy
if args.policy == "msy":
from rl4fisheries import Msy
policy_cls = Msy
elif args.policy == "esc":
from rl4fisheries import ConstEsc
policy_cls = ConstEsc
elif args.policy == "cr":
from rl4fisheries import CautionaryRule
policy_cls = CautionaryRule


# optimizing space
msy_space = [Real(0.002, 0.25, name='mortality')]
esc_space = [Real(0.02, 0.15, name='escapement')]
cr_space = [
Real(0.00001, 1, name='radius'),
Real(0.00001, np.pi/4.00001, name='theta'),
Real(0, 0.2, name='y2')
]
space = {'msy':msy_space, 'esc':esc_space, 'cr':cr_space}[args.policy]

# optimizing function
from stable_baselines3.common.monitor import Monitor

@use_named_args(space)
def msy_fn(**params):
agent = Msy(AsmEnv(), mortality=params['mortality'])
env = AsmEnv()
mean, sd = evaluate_policy(agent, Monitor(env), n_eval_episodes=100)
return -mean

@use_named_args(space)
def esc_fn(**params):
agent = ConstEsc(AsmEnv(), escapement=params['escapement'])
env = AsmEnv()
mean, sd = evaluate_policy(agent, Monitor(env), n_eval_episodes=100)
return -mean

@use_named_args(space)
def cr_fn(**params):
theta = params["theta"]
radius = params["radius"]
x1 = np.sin(theta) * radius
x2 = np.cos(theta) * radius

assert x1 <= x2, ("CautionaryRule error: x1 < x2, " + str(x1) + ", ", str(x2) )

agent = CautionaryRule(AsmEnv(), x1 = x1, x2 = x2, y2 = params["y2"])
env = AsmEnv()
mean, sd = evaluate_policy(agent, Monitor(env), n_eval_episodes=100)
return -mean

opt_fn = {'msy':msy_fn, 'esc':esc_fn, 'cr':cr_fn}[args.policy]


# optimize
results = opt_algo(opt_fn, space, n_calls=300, verbose=args.verbose, n_jobs=-1)
print(
f"{args.policy}-{args.opt_algo} results: "
f"opt args = {[eval(f'{r:.4f}') for r in results.x]}, "
f"rew={results.fun:.4f}"
)

# save
path = "../saved_agents/"
fname = f"{args.policy}_{args.opt_algo}.pkl"
dump(results, path+fname)

api.upload_file(
path_or_fileobj=path+fname,
path_in_repo="sb3/rl4fisheries/"+fname,
repo_id="boettiger-lab/rl4eco",
repo_type="model",
)


15 changes: 15 additions & 0 deletions scripts/tune_fixed_policies.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/bin/bash

# move to script directory for normalized relative paths.
scriptdir="$(dirname "$0")"
cd "$scriptdir"

# gp
python fixed_policy_opt.py -p msy -v True -o gp &
python fixed_policy_opt.py -p esc -v True -o gp &
python fixed_policy_opt.py -p cr -v True -o gp &

# gbrt
python fixed_policy_opt.py -p msy -v True -o gbrt &
python fixed_policy_opt.py -p esc -v True -o gbrt &
python fixed_policy_opt.py -p cr -v True -o gbrt &
4 changes: 4 additions & 0 deletions src/rl4fisheries/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from rl4fisheries.envs.asm import Asm
from rl4fisheries.envs.asm_2o import Asm2o
from rl4fisheries.envs.asm_esc import AsmEsc
from rl4fisheries.envs.asm_env import AsmEnv

from rl4fisheries.agents.cautionary_rule import CautionaryRule
from rl4fisheries.agents.const_esc import ConstEsc
Expand All @@ -15,3 +16,6 @@
register(id="AsmEsc-v0", entry_point="rl4fisheries.envs.asm_esc:AsmEsc")
# action is harvest, but observes both total count and mean biomass
register(id="Asm2o-v0", entry_point="rl4fisheries.envs.asm_2o:Asm2o")
# action is harvest, but observes both total count and mean biomass
register(id="AsmEnv", entry_point="rl4fisheries.envs.asm_env:AsmEnv")

Empty file.
6 changes: 5 additions & 1 deletion src/rl4fisheries/agents/cautionary_rule.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,22 @@
from tqdm import tqdm
from .unit_interface import unitInterface

from rl4fisheries.agents.common import isVecObs

class CautionaryRule:
def __init__(self, x1=0, x2=1, y2=1, obs_bounds=1, **kwargs):
def __init__(self, env, x1=0, x2=1, y2=1, obs_bounds=1, **kwargs):
self.ui = unitInterface(bounds=obs_bounds)
self.x1 = x1
self.x2 = x2
self.y2 = y2
self.policy_type = "CautionaryRule_piecewise_linear"
self.env = env

assert x1 <= x2, "CautionaryRule error: x1 <= x2"

def predict(self, observation, **kwargs):
if isVecObs(observation, self.env):
observation = observation[0]
pop = self.ui.to_natural_units(observation)
raw_prediction = np.clip( self.predict_raw(pop), 0, 1)
return np.float32([2 * raw_prediction - 1]), {}
Expand Down
10 changes: 10 additions & 0 deletions src/rl4fisheries/agents/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import numpy as np

def isVecObs(obs, env):
shp = env.observation_space.shape
if (
(shp != np.shape(obs)) and
(np.shape(obs[0]) == shp) # quick n dirty, possibly prone to bugs tho
):
return True
return False
9 changes: 7 additions & 2 deletions src/rl4fisheries/agents/const_esc.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,24 @@
import polars as pl
from tqdm import tqdm

from rl4fisheries.agents.common import isVecObs

class ConstEsc:
def __init__(self, escapement=0, bounds = 1, **kwargs):
def __init__(self, env, escapement=0, bounds = 1, **kwargs):
from .unit_interface import unitInterface
self.ui = unitInterface(bounds=bounds)
self.escapement = escapement
self.bounds = bounds
self.policy_type = "constant_escapement"
self.env = env


def predict(self, observation, **kwargs):
if isVecObs(observation, self.env):
observation = observation[0]
pop = self.ui.to_natural_units(observation)
raw_prediction = self.predict_raw(pop)
return 2 * raw_prediction - 1, {}
return np.float32([2 * raw_prediction - 1]), {}

def predict_raw(self, pop):
population = pop[0]
Expand Down
6 changes: 5 additions & 1 deletion src/rl4fisheries/agents/msy.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,18 @@
import polars as pl
from tqdm import tqdm

from rl4fisheries.agents.common import isVecObs

class Msy:
def __init__(self, mortality: float =0, threshold: float =0, env = None, **kwargs):
def __init__(self, env, mortality: float =0, threshold: float =0, **kwargs):
self.mortality = mortality
self.threshold = threshold
self.policy_type = "msy_and_threshold"
self.env = env

def predict(self, observation, **kwargs):
if isVecObs(observation, self.env):
observation = observation[0]
pop = self.state_to_pop(observation)
raw_prediction = raw_prediction = np.clip( self.predict_raw(pop), 0, 1)
return np.float32([2 * raw_prediction - 1]), {}
Expand Down
Empty file.
Loading

0 comments on commit bb69f3b

Please sign in to comment.