First results (#14)

* hyperpars * added full observe fn, and enforce-min harvest fn * longer training, checkpoints for long training * notebooks * added lognormal noise, made it default * generator syntax for rand numbers, lognormal params * added new mean-corrected r-dev function (and made it default) * typo * clarifying comment * no exponential term * need kwargs to make mean-corrected fn compatible with old version * r_devs now lognormal + big school anomalies * sdr as arg * train.py: checkpoint locations * hf directory typo * notebooks * erased old models of r-devs * episodes notebook updated to new r-devs * clipping syntax * get_r_devs back compatibility * added nicer policy plots * readme * added CR agent test * CR test debug * test debug * test debug * test debug * test debug --------- Co-authored-by: Felipe Montealegre-Mora <[email protected]>
boettiger-lab · Jul 8, 2024 · c877881 · c877881
1 parent b37b77a
commit c877881
Show file tree

Hide file tree

Showing 12 changed files with 2,604 additions and 878 deletions.
diff --git a/README.md b/README.md
@@ -6,7 +6,7 @@ Models:
 - `asm_esc.py`: provides `AsmEscEnv()` which inherits from `AsmEnv` and has one difference to it: actions in `AsmEscEnv()` represent escapement levels rather than fishing intensities. 
 - `ams_cr_like.py`: provides `AsmCRLike()`. In this environment, mean weight is observed and the action is to set parameters `(x1, x2, y2)` for a biomass-based harvest control rule of the type `CautionaryRule` (specified below).
 
-Strategies evaluated with MSE: 
+Strategies evaluated with Bayesian Optimization: 
 
 - `agents.cautionary_rule.CautionaryRule`: piece-wise linear harvest-control rule specified by three parameters `(x1, x2, y2)`. Example plot (TBD).
 - `agents.msy.Msy`: constant mortality harvest control rule. Specified by one parameter `mortality`.

diff --git a/hyperpars/ppo-asm.yml b/hyperpars/ppo-asm.yml
@@ -1,40 +1,42 @@
 # algo 
 algo: "PPO"
-total_timesteps: 4000000
+total_timesteps: 40000000
 algo_config:
     tensorboard_log: "../../../logs"
     #
     policy: 'MlpPolicy'
-    batch_size: 512
-    gamma: 0.9999
-    learning_rate: !!float 7.77e-05
-    ent_coef: 0.00429
-    clip_range: 0.1
-    gae_lambda: 0.9
-    max_grad_norm: 5
-    vf_coef: 0.19
+    # learning_rate: 0.00015
+    policy_kwargs: "dict(net_arch=[256, 128, 128, 32])"
+    #
+    # batch_size: 512
+    # gamma: 0.9999
+    # learning_rate: !!float 7.77e-05
+    # ent_coef: 0.00429
+    # clip_range: 0.1
+    # gae_lambda: 0.9
+    # max_grad_norm: 5
+    # vf_coef: 0.19
     # policy_kwargs: "dict(log_std_init=-3.29, ortho_init=False, net_arch=[256, 128])"
-    policy_kwargs: "dict(net_arch=[300, 200])"
+    # policy_kwargs: "dict(net_arch=[256, 128])"
     use_sde: True
-    clip_range: 0.1
+    # clip_range: 0.1
 
 # env
 env_id: "AsmEnv"
 config: 
-    observation_fn_id: 'observe_1o'
-    n_observs: 1
-    harvest_fn_name: "trophy"
-    n_trophy_ages: 10
-    # upow: 0.6
-    # use_custom_harv_vul: True
-    # use_custom_surv_vul: True
+    observation_fn_id: 'observe_2o'
+    n_observs: 2
+    #
+    harvest_fn_name: "default"
+    # n_trophy_ages: 10
+    upow: 1
 n_envs: 12
 
 # io
 repo: "cboettig/rl-ecology"
-save_path: "../saved_agents/results/"
+save_path: "../saved_agents/long_tests/"
 
 # misc
-id: "results-trophy-nage-10-1obs-hyperpars-larger-net"
+id: "UM1-256-128-128-32-long"
 # id: "short-test"
 additional_imports: ["torch"]
diff --git a/notebooks/for_results/0_tests.ipynb b/notebooks/for_results/0_tests.ipynb
@@ -14,7 +14,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 1,
    "id": "86470282-d84b-49ff-a7e9-5647580b3d65",
    "metadata": {},
    "outputs": [],
@@ -26,7 +26,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 10,
    "id": "2c293e58-45b2-41e1-881b-a85d9230aba0",
    "metadata": {},
    "outputs": [
@@ -47,45 +47,62 @@
        "    (policy_net): Sequential(\n",
        "      (0): Linear(in_features=2, out_features=64, bias=True)\n",
        "      (1): Tanh()\n",
-       "      (2): Linear(in_features=64, out_features=64, bias=True)\n",
+       "      (2): Linear(in_features=64, out_features=32, bias=True)\n",
        "      (3): Tanh()\n",
+       "      (4): Linear(in_features=32, out_features=16, bias=True)\n",
+       "      (5): Tanh()\n",
        "    )\n",
        "    (value_net): Sequential(\n",
        "      (0): Linear(in_features=2, out_features=64, bias=True)\n",
        "      (1): Tanh()\n",
-       "      (2): Linear(in_features=64, out_features=64, bias=True)\n",
+       "      (2): Linear(in_features=64, out_features=32, bias=True)\n",
        "      (3): Tanh()\n",
+       "      (4): Linear(in_features=32, out_features=16, bias=True)\n",
+       "      (5): Tanh()\n",
        "    )\n",
        "  )\n",
-       "  (action_net): Linear(in_features=64, out_features=1, bias=True)\n",
-       "  (value_net): Linear(in_features=64, out_features=1, bias=True)\n",
+       "  (action_net): Linear(in_features=16, out_features=1, bias=True)\n",
+       "  (value_net): Linear(in_features=16, out_features=1, bias=True)\n",
        ")"
       ]
      },
-     "execution_count": 13,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
     "with open('../../hyperpars/for_results/ppo_both_UM2.yml') as stream:\n",
     "    options = yaml.safe_load(stream)\n",
-    "\n",
-    "PPO(env=AsmEnv(config=options['config']),**options['algo_config']).policy"
+    "options['algo_config']['policy_kwargs'] = eval(options['algo_config']['policy_kwargs'])\n",
+    "PPO(env=AsmEnv(config=options['config']), **options['algo_config']).policy"
    ]
   },
   {
-   "cell_type": "markdown",
-   "id": "79d44b0b-25bf-4fe7-8ad1-c9e2ba181e54",
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "4e1695dd-212e-489c-9a0b-4459600e02b4",
    "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'net_arch': [64, 32, 16]}"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "They all seem to be of the form $\\{in\\}\\rightarrow 64\\rightarrow 64 \\rightarrow\\{out\\}$ by default."
+    "options['algo_config']['policy_kwargs']"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "8fc31253-c12b-4777-abcb-2344477475ef",
+   "id": "6d79b258-1c1e-4b60-a276-97ddd2659634",
    "metadata": {},
    "outputs": [],
    "source": []

diff --git a/notebooks/for_results/1_fp_skopt.ipynb b/notebooks/for_results/1_fp_skopt.ipynb