boettiger-lab · cboettig · Jun 7, 2024 · Mar 21, 2024 · Mar 21, 2024 · Mar 21, 2024
diff --git a/hyperpars/for_results/fixed_policy_UM1.yml b/hyperpars/for_results/fixed_policy_UM1.yml
@@ -0,0 +1,7 @@
+config:
+    upow: 1
+    harvest_fn_name: "default"
+n_eval_episodes: 250
+n_calls: 70
+id: "UM1"
+repo_id: "boettiger-lab/rl4eco"
diff --git a/hyperpars/for_results/fixed_policy_UM2.yml b/hyperpars/for_results/fixed_policy_UM2.yml
@@ -0,0 +1,7 @@
+config:
+    upow: 0.6
+    harvest_fn_name: "default"
+n_eval_episodes: 250
+n_calls: 70
+id: "UM2"
+repo_id: "boettiger-lab/rl4eco"
diff --git a/hyperpars/for_results/fixed_policy_UM3.yml b/hyperpars/for_results/fixed_policy_UM3.yml
@@ -0,0 +1,8 @@
+config:
+    upow: 1
+    harvest_fn_name: "trophy"
+    n_trophy_ages: 10
+n_eval_episodes: 250
+n_calls: 70
+id: "UM3"
+repo_id: "boettiger-lab/rl4eco"
diff --git a/hyperpars/for_results/ppo_biomass_UM1.yml b/hyperpars/for_results/ppo_biomass_UM1.yml
@@ -0,0 +1,41 @@
+# algo 
+algo: "PPO"
+total_timesteps: 6000000
+algo_config:
+    tensorboard_log: "../../../logs"
+    #
+    policy: 'MlpPolicy'
+    # learning_rate: 0.00015
+    policy_kwargs: "dict(net_arch=[64, 32, 16])"
+    #
+    # batch_size: 512
+    # gamma: 0.9999
+    # learning_rate: !!float 7.77e-05
+    # ent_coef: 0.00429
+    # clip_range: 0.1
+    # gae_lambda: 0.9
+    # max_grad_norm: 5
+    # vf_coef: 0.19
+    # policy_kwargs: "dict(log_std_init=-3.29, ortho_init=False, net_arch=[256, 128])"
+    # policy_kwargs: "dict(net_arch=[256, 128])"
+    use_sde: True
+    # clip_range: 0.1
+
+# env
+env_id: "AsmEnv"
+config: 
+    observation_fn_id: 'observe_1o'
+    n_observs: 1
+    #
+    harvest_fn_name: "default"
+    upow: 1
+n_envs: 12
+
+# io
+repo: "cboettig/rl-ecology"
+save_path: "../saved_agents/results/"
+
+# misc
+id: "biomass-UM1-64-32-16"
+# id: "short-test"
+additional_imports: ["torch"]
diff --git a/hyperpars/for_results/ppo_biomass_UM2.yml b/hyperpars/for_results/ppo_biomass_UM2.yml
@@ -0,0 +1,41 @@
+# algo 
+algo: "PPO"
+total_timesteps: 6000000
+algo_config:
+    tensorboard_log: "../../../logs"
+    #
+    policy: 'MlpPolicy'
+    # learning_rate: 0.00015
+    policy_kwargs: "dict(net_arch=[64, 32, 16])"
+    #
+    # batch_size: 512
+    # gamma: 0.9999
+    # learning_rate: !!float 7.77e-05
+    # ent_coef: 0.00429
+    # clip_range: 0.1
+    # gae_lambda: 0.9
+    # max_grad_norm: 5
+    # vf_coef: 0.19
+    # policy_kwargs: "dict(log_std_init=-3.29, ortho_init=False, net_arch=[256, 128])"
+    # policy_kwargs: "dict(net_arch=[256, 128])"
+    use_sde: True
+    # clip_range: 0.1
+
+# env
+env_id: "AsmEnv"
+config: 
+    observation_fn_id: 'observe_1o'
+    n_observs: 1
+    #
+    harvest_fn_name: "default"
+    upow: 0.6
+n_envs: 12
+
+# io
+repo: "cboettig/rl-ecology"
+save_path: "../saved_agents/results/"
+
+# misc
+id: "biomass-UM2-64-32-16"
+# id: "short-test"
+additional_imports: ["torch"]
diff --git a/hyperpars/for_results/ppo_biomass_UM3.yml b/hyperpars/for_results/ppo_biomass_UM3.yml
@@ -0,0 +1,42 @@
+# algo 
+algo: "PPO"
+total_timesteps: 6000000
+algo_config:
+    tensorboard_log: "../../../logs"
+    #
+    policy: 'MlpPolicy'
+    # learning_rate: 0.00015
+    policy_kwargs: "dict(net_arch=[64, 32, 16])"
+    #
+    # batch_size: 512
+    # gamma: 0.9999
+    # learning_rate: !!float 7.77e-05
+    # ent_coef: 0.00429
+    # clip_range: 0.1
+    # gae_lambda: 0.9
+    # max_grad_norm: 5
+    # vf_coef: 0.19
+    # policy_kwargs: "dict(log_std_init=-3.29, ortho_init=False, net_arch=[256, 128])"
+    # policy_kwargs: "dict(net_arch=[256, 128])"
+    use_sde: True
+    # clip_range: 0.1
+
+# env
+env_id: "AsmEnv"
+config: 
+    observation_fn_id: 'observe_1o'
+    n_observs: 1
+    #
+    harvest_fn_name: "trophy"
+    n_trophy_ages: 10
+    upow: 1
+n_envs: 12
+
+# io
+repo: "cboettig/rl-ecology"
+save_path: "../saved_agents/results/"
+
+# misc
+id: "biomass-UM3-64-32-16"
+# id: "short-test"
+additional_imports: ["torch"]
diff --git a/hyperpars/for_results/ppo_both_UM1.yml b/hyperpars/for_results/ppo_both_UM1.yml
@@ -0,0 +1,41 @@
+# algo 
+algo: "PPO"
+total_timesteps: 6000000
+algo_config:
+    tensorboard_log: "../../../logs"
+    #
+    policy: 'MlpPolicy'
+    # learning_rate: 0.00015
+    policy_kwargs: "dict(net_arch=[64, 32, 16])"
+    #
+    # batch_size: 512
+    # gamma: 0.9999
+    # learning_rate: !!float 7.77e-05
+    # ent_coef: 0.00429
+    # clip_range: 0.1
+    # gae_lambda: 0.9
+    # max_grad_norm: 5
+    # vf_coef: 0.19
+    # policy_kwargs: "dict(log_std_init=-3.29, ortho_init=False, net_arch=[256, 128])"
+    # policy_kwargs: "dict(net_arch=[256, 128])"
+    use_sde: True
+    # clip_range: 0.1
+
+# env
+env_id: "AsmEnv"
+config: 
+    observation_fn_id: 'observe_2o'
+    n_observs: 2
+    #
+    harvest_fn_name: "default"
+    upow: 1
+n_envs: 12
+
+# io
+repo: "cboettig/rl-ecology"
+save_path: "../saved_agents/results/"
+
+# misc
+id: "2obs-UM1-64-32-16"
+# id: "short-test"
+additional_imports: ["torch"]
diff --git a/hyperpars/for_results/ppo_both_UM2.yml b/hyperpars/for_results/ppo_both_UM2.yml
@@ -0,0 +1,41 @@
+# algo 
+algo: "PPO"
+total_timesteps: 6000000
+algo_config:
+    tensorboard_log: "../../../logs"
+    #
+    policy: 'MlpPolicy'
+    # learning_rate: 0.00015
+    policy_kwargs: "dict(net_arch=[64, 32, 16])"
+    #
+    # batch_size: 512
+    # gamma: 0.9999
+    # learning_rate: !!float 7.77e-05
+    # ent_coef: 0.00429
+    # clip_range: 0.1
+    # gae_lambda: 0.9
+    # max_grad_norm: 5
+    # vf_coef: 0.19
+    # policy_kwargs: "dict(log_std_init=-3.29, ortho_init=False, net_arch=[256, 128])"
+    # policy_kwargs: "dict(net_arch=[256, 128])"
+    use_sde: True
+    # clip_range: 0.1
+
+# env
+env_id: "AsmEnv"
+config: 
+    observation_fn_id: 'observe_2o'
+    n_observs: 2
+    #
+    harvest_fn_name: "default"
+    upow: 0.6
+n_envs: 12
+
+# io
+repo: "cboettig/rl-ecology"
+save_path: "../saved_agents/results/"
+
+# misc
+id: "2obs-UM2-64-32-16"
+# id: "short-test"
+additional_imports: ["torch"]
diff --git a/hyperpars/for_results/ppo_both_UM3.yml b/hyperpars/for_results/ppo_both_UM3.yml
@@ -0,0 +1,42 @@
+# algo 
+algo: "PPO"
+total_timesteps: 6000000
+algo_config:
+    tensorboard_log: "../../../logs"
+    #
+    policy: 'MlpPolicy'
+    # learning_rate: 0.00015
+    policy_kwargs: "dict(net_arch=[64, 32, 16])"
+    #
+    # batch_size: 512
+    # gamma: 0.9999
+    # learning_rate: !!float 7.77e-05
+    # ent_coef: 0.00429
+    # clip_range: 0.1
+    # gae_lambda: 0.9
+    # max_grad_norm: 5
+    # vf_coef: 0.19
+    # policy_kwargs: "dict(log_std_init=-3.29, ortho_init=False, net_arch=[256, 128])"
+    # policy_kwargs: "dict(net_arch=[256, 128])"
+    use_sde: True
+    # clip_range: 0.1
+
+# env
+env_id: "AsmEnv"
+config: 
+    observation_fn_id: 'observe_2o'
+    n_observs: 2
+    #
+    harvest_fn_name: "trophy"
+    n_trophy_ages: 10
+    upow: 1
+n_envs: 12
+
+# io
+repo: "cboettig/rl-ecology"
+save_path: "../saved_agents/results/"
+
+# misc
+id: "2obs-UM3-64-32-16"
+# id: "short-test"
+additional_imports: ["torch"]
diff --git a/hyperpars/for_results/ppo_mwt_UM1.yml b/hyperpars/for_results/ppo_mwt_UM1.yml
@@ -0,0 +1,41 @@
+# algo 
+algo: "PPO"
+total_timesteps: 6000000
+algo_config:
+    tensorboard_log: "../../../logs"
+    #
+    policy: 'MlpPolicy'
+    # learning_rate: 0.00015
+    policy_kwargs: "dict(net_arch=[64, 32, 16])"
+    #
+    # batch_size: 512
+    # gamma: 0.9999
+    # learning_rate: !!float 7.77e-05
+    # ent_coef: 0.00429
+    # clip_range: 0.1
+    # gae_lambda: 0.9
+    # max_grad_norm: 5
+    # vf_coef: 0.19
+    # policy_kwargs: "dict(log_std_init=-3.29, ortho_init=False, net_arch=[256, 128])"
+    # policy_kwargs: "dict(net_arch=[256, 128])"
+    use_sde: True
+    # clip_range: 0.1
+
+# env
+env_id: "AsmEnv"
+config: 
+    observation_fn_id: 'observe_mwt'
+    n_observs: 1
+    #
+    harvest_fn_name: "default"
+    upow: 1
+n_envs: 12
+
+# io
+repo: "cboettig/rl-ecology"
+save_path: "../saved_agents/results/"
+
+# misc
+id: "mwt-UM1-64-32-16"
+# id: "short-test"
+additional_imports: ["torch"]
diff --git a/hyperpars/for_results/ppo_mwt_UM2.yml b/hyperpars/for_results/ppo_mwt_UM2.yml
@@ -0,0 +1,41 @@
+# algo 
+algo: "PPO"
+total_timesteps: 6000000
+algo_config:
+    tensorboard_log: "../../../logs"
+    #
+    policy: 'MlpPolicy'
+    # learning_rate: 0.00015
+    policy_kwargs: "dict(net_arch=[64, 32, 16])"
+    #
+    # batch_size: 512
+    # gamma: 0.9999
+    # learning_rate: !!float 7.77e-05
+    # ent_coef: 0.00429
+    # clip_range: 0.1
+    # gae_lambda: 0.9
+    # max_grad_norm: 5
+    # vf_coef: 0.19
+    # policy_kwargs: "dict(log_std_init=-3.29, ortho_init=False, net_arch=[256, 128])"
+    # policy_kwargs: "dict(net_arch=[256, 128])"
+    use_sde: True
+    # clip_range: 0.1
+
+# env
+env_id: "AsmEnv"
+config: 
+    observation_fn_id: 'observe_mwt'
+    n_observs: 1
+    #
+    harvest_fn_name: "default"
+    upow: 0.6
+n_envs: 12
+
+# io
+repo: "cboettig/rl-ecology"
+save_path: "../saved_agents/results/"
+
+# misc
+id: "mwt-UM2-64-32-16"
+# id: "short-test"
+additional_imports: ["torch"]